132 lines
4.8 KiB
JavaScript
132 lines
4.8 KiB
JavaScript
const statusEl = document.querySelector("#tokenizer-status");
|
||
const pipelineEl = document.querySelector("#pipeline-steps");
|
||
const tokenSummaryEl = document.querySelector("#token-summary");
|
||
const tokenSequenceEl = document.querySelector("#token-sequence");
|
||
const selectedTokenEl = document.querySelector("#selected-token");
|
||
const detectionsEl = document.querySelector("#tokenizer-detections");
|
||
|
||
let selectedTokenIndex = null;
|
||
|
||
function formatShape(shape) {
|
||
if (!shape || !shape.length) {
|
||
return "-";
|
||
}
|
||
return `[${shape.join(", ")}]`;
|
||
}
|
||
|
||
function setStatus(ready, text) {
|
||
statusEl.textContent = text;
|
||
statusEl.classList.toggle("online", ready);
|
||
statusEl.classList.toggle("offline", !ready);
|
||
}
|
||
|
||
function renderPipeline(data) {
|
||
const steps = [
|
||
["OpenCV RGB 帧", `${data.image_size?.width ?? "-"} × ${data.image_size?.height ?? "-"}`],
|
||
["PIL Image", `${data.image_size?.width ?? "-"} × ${data.image_size?.height ?? "-"}`],
|
||
["DetrImageProcessor", `pixel_values ${formatShape(data.pixel_values_shape)} / pixel_mask ${formatShape(data.pixel_mask_shape)}`],
|
||
["ResNet-50 backbone", `feature map ${formatShape(data.feature_map_shape)}`],
|
||
["1×1 convolution", `projected ${formatShape(data.projected_feature_map_shape)}`],
|
||
["视觉 token embedding", `由 projected feature map flatten 得到 ${formatShape(data.visual_tokens_shape)}`],
|
||
["位置 embedding", `二维位置 embedding ${formatShape(data.position_encoding_shape)}`],
|
||
["Transformer Encoder", formatShape(data.encoder_last_hidden_state_shape)],
|
||
["Object query embedding + Decoder", `object query embedding 解码后 ${formatShape(data.decoder_last_hidden_state_shape)}`],
|
||
["类别 logits + boxes", `logits ${formatShape(data.logits_shape)} / boxes ${formatShape(data.pred_boxes_shape)}`],
|
||
["post_process_object_detection", `检测结果 ${data.detections?.length ?? 0} 个`],
|
||
];
|
||
|
||
pipelineEl.innerHTML = steps
|
||
.map(([title, value], index) => `
|
||
<div class="pipeline-step">
|
||
<div class="step-index">${index + 1}</div>
|
||
<div>
|
||
<div class="step-title">${title}</div>
|
||
<div class="step-value">${value}</div>
|
||
</div>
|
||
</div>
|
||
`)
|
||
.join("");
|
||
}
|
||
|
||
function renderTokens(data) {
|
||
const grid = data.token_grid || {};
|
||
tokenSummaryEl.textContent = `帧号 ${data.frame_id ?? "-"} · token 网格 ${grid.rows ?? "-"} × ${grid.cols ?? "-"},总数 ${grid.total ?? "-"},展示前 ${grid.shown ?? 0} 个 token,每个显示前 8 维采样。`;
|
||
tokenSequenceEl.innerHTML = (data.token_sequence || [])
|
||
.map((token) => `
|
||
<button class="token-cell ${token.index === selectedTokenIndex ? "selected" : ""}" data-index="${token.index}">
|
||
<span>#${token.index}</span>
|
||
<small>(${token.row}, ${token.col})</small>
|
||
</button>
|
||
`)
|
||
.join("");
|
||
|
||
tokenSequenceEl.querySelectorAll(".token-cell").forEach((button) => {
|
||
button.addEventListener("click", () => {
|
||
selectedTokenIndex = Number(button.dataset.index);
|
||
renderSelectedToken(data);
|
||
renderTokens(data);
|
||
});
|
||
});
|
||
|
||
renderSelectedToken(data);
|
||
}
|
||
|
||
function renderSelectedToken(data) {
|
||
const tokens = data.token_sequence || [];
|
||
const token = tokens.find((item) => item.index === selectedTokenIndex) || tokens[0];
|
||
if (!token) {
|
||
selectedTokenEl.textContent = "暂无 token。";
|
||
return;
|
||
}
|
||
selectedTokenIndex = token.index;
|
||
selectedTokenEl.innerHTML = `
|
||
<div class="token-detail-title">Token #${token.index} · 网格位置 (${token.row}, ${token.col}) · L2 ${token.magnitude}</div>
|
||
<div class="token-vector">[${token.values.map((value) => Number(value).toFixed(4)).join(", ")}, ...]</div>
|
||
`;
|
||
}
|
||
|
||
function renderDetections(detections) {
|
||
if (!detections.length) {
|
||
detectionsEl.className = "detections empty";
|
||
detectionsEl.textContent = "暂无目标";
|
||
return;
|
||
}
|
||
|
||
detectionsEl.className = "detections";
|
||
detectionsEl.innerHTML = detections
|
||
.map((det) => `
|
||
<div class="det-item">
|
||
<div class="det-title">
|
||
<span>${det.label}</span>
|
||
<span>${(det.score * 100).toFixed(1)}%</span>
|
||
</div>
|
||
<div class="det-box">box: [${det.box.join(", ")}]</div>
|
||
</div>
|
||
`)
|
||
.join("");
|
||
}
|
||
|
||
async function refreshTokenizer() {
|
||
try {
|
||
const response = await fetch(`/tokenizer/state?t=${Date.now()}`);
|
||
const data = await response.json();
|
||
if (!data.ready) {
|
||
setStatus(false, data.error || "等待帧");
|
||
tokenSummaryEl.textContent = data.error || "等待视频帧";
|
||
return;
|
||
}
|
||
|
||
setStatus(Boolean(data.connected), data.connected ? "动态更新中" : "未连接");
|
||
renderPipeline(data);
|
||
renderTokens(data);
|
||
renderDetections(data.detections || []);
|
||
} catch (error) {
|
||
setStatus(false, "更新失败");
|
||
tokenSummaryEl.textContent = `更新失败:${error}`;
|
||
} finally {
|
||
setTimeout(refreshTokenizer, 1200);
|
||
}
|
||
}
|
||
|
||
refreshTokenizer();
|