const statusEl = document.querySelector("#tokenizer-status"); const pipelineEl = document.querySelector("#pipeline-steps"); const tokenSummaryEl = document.querySelector("#token-summary"); const tokenSequenceEl = document.querySelector("#token-sequence"); const selectedTokenEl = document.querySelector("#selected-token"); const detectionsEl = document.querySelector("#tokenizer-detections"); let selectedTokenIndex = null; function formatShape(shape) { if (!shape || !shape.length) { return "-"; } return `[${shape.join(", ")}]`; } function setStatus(ready, text) { statusEl.textContent = text; statusEl.classList.toggle("online", ready); statusEl.classList.toggle("offline", !ready); } function renderPipeline(data) { const steps = [ ["OpenCV RGB 帧", `${data.image_size?.width ?? "-"} × ${data.image_size?.height ?? "-"}`], ["PIL Image", `${data.image_size?.width ?? "-"} × ${data.image_size?.height ?? "-"}`], ["DetrImageProcessor", `pixel_values ${formatShape(data.pixel_values_shape)} / pixel_mask ${formatShape(data.pixel_mask_shape)}`], ["ResNet-50 backbone", `feature map ${formatShape(data.feature_map_shape)}`], ["1×1 convolution", `projected ${formatShape(data.projected_feature_map_shape)}`], ["视觉 token embedding", `由 projected feature map flatten 得到 ${formatShape(data.visual_tokens_shape)}`], ["位置 embedding", `二维位置 embedding ${formatShape(data.position_encoding_shape)}`], ["Transformer Encoder", formatShape(data.encoder_last_hidden_state_shape)], ["Object query embedding + Decoder", `object query embedding 解码后 ${formatShape(data.decoder_last_hidden_state_shape)}`], ["类别 logits + boxes", `logits ${formatShape(data.logits_shape)} / boxes ${formatShape(data.pred_boxes_shape)}`], ["post_process_object_detection", `检测结果 ${data.detections?.length ?? 0} 个`], ]; pipelineEl.innerHTML = steps .map(([title, value], index) => `
${index + 1}
${title}
${value}
`) .join(""); } function renderTokens(data) { const grid = data.token_grid || {}; tokenSummaryEl.textContent = `帧号 ${data.frame_id ?? "-"} · token 网格 ${grid.rows ?? "-"} × ${grid.cols ?? "-"},总数 ${grid.total ?? "-"},展示前 ${grid.shown ?? 0} 个 token,每个显示前 8 维采样。`; tokenSequenceEl.innerHTML = (data.token_sequence || []) .map((token) => ` `) .join(""); tokenSequenceEl.querySelectorAll(".token-cell").forEach((button) => { button.addEventListener("click", () => { selectedTokenIndex = Number(button.dataset.index); renderSelectedToken(data); renderTokens(data); }); }); renderSelectedToken(data); } function renderSelectedToken(data) { const tokens = data.token_sequence || []; const token = tokens.find((item) => item.index === selectedTokenIndex) || tokens[0]; if (!token) { selectedTokenEl.textContent = "暂无 token。"; return; } selectedTokenIndex = token.index; selectedTokenEl.innerHTML = `
Token #${token.index} · 网格位置 (${token.row}, ${token.col}) · L2 ${token.magnitude}
[${token.values.map((value) => Number(value).toFixed(4)).join(", ")}, ...]
`; } function renderDetections(detections) { if (!detections.length) { detectionsEl.className = "detections empty"; detectionsEl.textContent = "暂无目标"; return; } detectionsEl.className = "detections"; detectionsEl.innerHTML = detections .map((det) => `
${det.label} ${(det.score * 100).toFixed(1)}%
box: [${det.box.join(", ")}]
`) .join(""); } async function refreshTokenizer() { try { const response = await fetch(`/tokenizer/state?t=${Date.now()}`); const data = await response.json(); if (!data.ready) { setStatus(false, data.error || "等待帧"); tokenSummaryEl.textContent = data.error || "等待视频帧"; return; } setStatus(Boolean(data.connected), data.connected ? "动态更新中" : "未连接"); renderPipeline(data); renderTokens(data); renderDetections(data.detections || []); } catch (error) { setStatus(false, "更新失败"); tokenSummaryEl.textContent = `更新失败:${error}`; } finally { setTimeout(refreshTokenizer, 1200); } } refreshTokenizer();