const statusEl = document.querySelector("#tokenizer-status");
const pipelineEl = document.querySelector("#pipeline-steps");
const tokenSummaryEl = document.querySelector("#token-summary");
const tokenSequenceEl = document.querySelector("#token-sequence");
const selectedTokenEl = document.querySelector("#selected-token");
const detectionsEl = document.querySelector("#tokenizer-detections");
let selectedTokenIndex = null;
function formatShape(shape) {
if (!shape || !shape.length) {
return "-";
}
return `[${shape.join(", ")}]`;
}
function setStatus(ready, text) {
statusEl.textContent = text;
statusEl.classList.toggle("online", ready);
statusEl.classList.toggle("offline", !ready);
}
function renderPipeline(data) {
const steps = [
["OpenCV RGB 帧", `${data.image_size?.width ?? "-"} × ${data.image_size?.height ?? "-"}`],
["PIL Image", `${data.image_size?.width ?? "-"} × ${data.image_size?.height ?? "-"}`],
["DetrImageProcessor", `pixel_values ${formatShape(data.pixel_values_shape)} / pixel_mask ${formatShape(data.pixel_mask_shape)}`],
["ResNet-50 backbone", `feature map ${formatShape(data.feature_map_shape)}`],
["1×1 convolution", `projected ${formatShape(data.projected_feature_map_shape)}`],
["视觉 token embedding", `由 projected feature map flatten 得到 ${formatShape(data.visual_tokens_shape)}`],
["位置 embedding", `二维位置 embedding ${formatShape(data.position_encoding_shape)}`],
["Transformer Encoder", formatShape(data.encoder_last_hidden_state_shape)],
["Object query embedding + Decoder", `object query embedding 解码后 ${formatShape(data.decoder_last_hidden_state_shape)}`],
["类别 logits + boxes", `logits ${formatShape(data.logits_shape)} / boxes ${formatShape(data.pred_boxes_shape)}`],
["post_process_object_detection", `检测结果 ${data.detections?.length ?? 0} 个`],
];
pipelineEl.innerHTML = steps
.map(([title, value], index) => `
`)
.join("");
}
function renderTokens(data) {
const grid = data.token_grid || {};
tokenSummaryEl.textContent = `帧号 ${data.frame_id ?? "-"} · token 网格 ${grid.rows ?? "-"} × ${grid.cols ?? "-"},总数 ${grid.total ?? "-"},展示前 ${grid.shown ?? 0} 个 token,每个显示前 8 维采样。`;
tokenSequenceEl.innerHTML = (data.token_sequence || [])
.map((token) => `
`)
.join("");
tokenSequenceEl.querySelectorAll(".token-cell").forEach((button) => {
button.addEventListener("click", () => {
selectedTokenIndex = Number(button.dataset.index);
renderSelectedToken(data);
renderTokens(data);
});
});
renderSelectedToken(data);
}
function renderSelectedToken(data) {
const tokens = data.token_sequence || [];
const token = tokens.find((item) => item.index === selectedTokenIndex) || tokens[0];
if (!token) {
selectedTokenEl.textContent = "暂无 token。";
return;
}
selectedTokenIndex = token.index;
selectedTokenEl.innerHTML = `
Token #${token.index} · 网格位置 (${token.row}, ${token.col}) · L2 ${token.magnitude}
[${token.values.map((value) => Number(value).toFixed(4)).join(", ")}, ...]
`;
}
function renderDetections(detections) {
if (!detections.length) {
detectionsEl.className = "detections empty";
detectionsEl.textContent = "暂无目标";
return;
}
detectionsEl.className = "detections";
detectionsEl.innerHTML = detections
.map((det) => `
${det.label}
${(det.score * 100).toFixed(1)}%
box: [${det.box.join(", ")}]
`)
.join("");
}
async function refreshTokenizer() {
try {
const response = await fetch(`/tokenizer/state?t=${Date.now()}`);
const data = await response.json();
if (!data.ready) {
setStatus(false, data.error || "等待帧");
tokenSummaryEl.textContent = data.error || "等待视频帧";
return;
}
setStatus(Boolean(data.connected), data.connected ? "动态更新中" : "未连接");
renderPipeline(data);
renderTokens(data);
renderDetections(data.detections || []);
} catch (error) {
setStatus(false, "更新失败");
tokenSummaryEl.textContent = `更新失败:${error}`;
} finally {
setTimeout(refreshTokenizer, 1200);
}
}
refreshTokenizer();