first commit

2026-06-03 11:00:50 +08:00
commit 322b72ac5b
18 changed files with 1821 additions and 0 deletions
--- a/app/init.py
+++ b/app/init.py
--- a/app/capacity_api.py
+++ b/app/capacity_api.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+import hashlib
+import json
+import time
+import urllib.parse
+import urllib.request
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from typing import Any
+
+
+@dataclass
+class Token:
+    access_token: str
+    expires_at: float
+
+
+@dataclass
+class StreamUrlResult:
+    url: str
+    timings: dict[str, float]
+
+
+class CapacityApiClient:
+    def __init__(
+        self,
+        base_url: str,
+        app_id: str,
+        app_secret: str,
+        account: str,
+        method: str,
+        timeout: int = 20,
+    ):
+        self.base_url = base_url.rstrip("/")
+        self.app_id = app_id
+        self.app_secret = app_secret
+        self.account = account
+        self.method = method
+        self.timeout = timeout
+        self.token: Token | None = None
+
+    def get_stream_url(self, device_num: str) -> str:
+        return self.get_stream_url_details(device_num).url
+
+    def get_stream_url_details(self, device_num: str) -> StreamUrlResult:
+        timings: dict[str, float] = {}
+        started = time.monotonic()
+        access_token = self._get_access_token(timings)
+        timings["token_ms"] = round((time.monotonic() - started) * 1000, 2)
+        started = time.monotonic()
+        business_params = {
+            "account": self.account,
+            "deviceNum": device_num,
+            "isSubStream": 0,
+            "networkType": 1,
+            "urlType": 1,
+        }
+        # 接口文档要求业务参数整体放进 params JSON 字符串后再参与签名。
+        params = {
+            "accessToken": access_token,
+            "appId": self.app_id,
+            "method": self.method,
+            "params": json.dumps(business_params, ensure_ascii=False, separators=(",", ":")),
+            "timestamp": self._timestamp(),
+            "v": "1.0.0",
+        }
+        params["sign"] = self._sign(params)
+        timings["sign_ms"] = round((time.monotonic() - started) * 1000, 2)
+        started = time.monotonic()
+        data = self._get_json(f"{self.base_url}/rest", params)
+        timings["stream_url_ms"] = round((time.monotonic() - started) * 1000, 2)
+        if data.get("errorCode") != "0":
+            raise RuntimeError(data.get("errorMsg") or f"播放地址接口返回错误 {data.get('errorCode')}")
+
+        payload = data.get("data") or {}
+        stream_url = payload.get("rtspUrl") or payload.get("rtspUri")
+        if not stream_url:
+            raise RuntimeError("播放地址接口未返回 RTSP 地址")
+        return StreamUrlResult(stream_url, timings)
+
+    def _get_access_token(self, timings: dict[str, float] | None = None) -> str:
+        if self.token and time.time() < self.token.expires_at - 300:
+            if timings is not None:
+                timings["token_cache"] = 1
+            return self.token.access_token
+
+        data = self._get_json(
+            f"{self.base_url}/oauth/token",
+            {
+                "grantType": "client_credential",
+                "appId": self.app_id,
+                "appSecret": self.app_secret,
+            },
+        )
+        if data.get("errorCode") != "0":
+            raise RuntimeError(data.get("errorMsg") or f"获取 accessToken 失败 {data.get('errorCode')}")
+
+        payload = data.get("data") or {}
+        access_token = payload.get("accessToken")
+        if not access_token:
+            raise RuntimeError("token 接口未返回 accessToken")
+
+        self.token = Token(
+            access_token=access_token,
+            expires_at=time.time() + int(payload.get("expiresIn") or 604800),
+        )
+        return access_token
+
+    def _get_json(self, url: str, params: dict[str, Any]) -> dict[str, Any]:
+        query = urllib.parse.urlencode(params)
+        with urllib.request.urlopen(f"{url}?{query}", timeout=self.timeout) as response:
+            body = response.read().decode("utf-8")
+        return json.loads(body)
+
+    def _sign(self, params: dict[str, Any]) -> str:
+        # 签名规则：appSecret + 按 ASCII key 排序后的 key/value + appSecret。
+        raw = self.app_secret + "".join(f"{key}{params[key]}" for key in sorted(params)) + self.app_secret
+        return hashlib.md5(raw.encode("utf-8")).hexdigest().upper()
+
+    @staticmethod
+    def _timestamp() -> str:
+        return datetime.now(timezone(timedelta(hours=8))).strftime("%Y-%m-%d %H:%M:%S")
--- a/app/config.py
+++ b/app/config.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+@dataclass(frozen=True)
+class Settings:
+    stream_url: str
+    api_base_url: str
+    app_id: str
+    app_secret: str
+    device_list_path: str
+    device_account: str
+    stream_method: str
+    detr_model: str
+    confidence: float
+    frame_skip: int
+    jpeg_quality: int
+    resize_width: int | None
+    vehicle_labels: set[str]
+
+
+def _optional_int(name: str) -> int | None:
+    value = os.getenv(name, "").strip()
+    if not value:
+        return None
+    parsed = int(value)
+    return parsed if parsed > 0 else None
+
+
+def load_settings() -> Settings:
+    stream_url = os.getenv("STREAM_URL", "").strip()
+
+    vehicle_labels = {
+        item.strip()
+        for item in os.getenv("VEHICLE_LABELS", "car,motorcycle,bus,truck,bicycle").split(",")
+        if item.strip()
+    }
+
+    return Settings(
+        stream_url=stream_url,
+        api_base_url=os.getenv("API_BASE_URL", "https://apicapacity.51iwifi.com"),
+        app_id=os.getenv("APP_ID", ""),
+        app_secret=os.getenv("APP_SECRET", ""),
+        device_list_path=os.getenv("DEVICE_LIST_PATH", "devicelist.env"),
+        device_account=os.getenv("DEVICE_ACCOUNT", "21cn"),
+        stream_method=os.getenv("STREAM_METHOD", "capacity.geye.device.devUrl.get"),
+        detr_model=os.getenv("DETR_MODEL", "facebook/detr-resnet-50"),
+        confidence=float(os.getenv("DETR_CONFIDENCE", "0.6")),
+        frame_skip=max(1, int(os.getenv("FRAME_SKIP", "3"))),
+        jpeg_quality=min(100, max(1, int(os.getenv("JPEG_QUALITY", "80")))),
+        resize_width=_optional_int("RESIZE_WIDTH"),
+        vehicle_labels=vehicle_labels,
+    )
+
+
+def mask_url(url: str) -> str:
+    if "token=" not in url:
+        return url
+    prefix, _ = url.split("token=", 1)
+    return f"{prefix}token=***"
--- a/app/detector.py
+++ b/app/detector.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+from typing import Any
+
+import torch
+from PIL import Image
+from transformers import DetrForObjectDetection, DetrImageProcessor
+
+
+class DetrVehicleDetector:
+    def __init__(self, model_name: str, confidence: float, vehicle_labels: set[str]):
+        self.confidence = confidence
+        self.vehicle_labels = vehicle_labels
+        self.device = self._select_device()
+        self.processor = DetrImageProcessor.from_pretrained(model_name)
+        self.model = DetrForObjectDetection.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.model.eval()
+
+    @staticmethod
+    def _select_device() -> torch.device:
+        if torch.backends.mps.is_available():
+            return torch.device("mps")
+        return torch.device("cpu")
+
+    @property
+    def device_name(self) -> str:
+        return self.device.type
+
+    @torch.no_grad()
+    def detect(self, frame_rgb: Any) -> list[dict[str, Any]]:
+        image = Image.fromarray(frame_rgb)
+        inputs = self.processor(images=image, return_tensors="pt")
+        inputs = {key: value.to(self.device) for key, value in inputs.items()}
+
+        outputs = self.model(**inputs)
+        # DETR 后处理需要原图尺寸，PIL size 是 (宽, 高)，这里转成 (高, 宽)。
+        target_sizes = torch.tensor([image.size[::-1]], device=self.device)
+        results = self.processor.post_process_object_detection(
+            outputs,
+            target_sizes=target_sizes,
+            threshold=self.confidence,
+        )[0]
+
+        detections = []
+        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+            label_name = self.model.config.id2label[label.item()]
+            if label_name not in self.vehicle_labels:
+                continue
+
+            x1, y1, x2, y2 = box.detach().cpu().numpy().astype(int).tolist()
+            detections.append(
+                {
+                    "label": label_name,
+                    "score": round(float(score.detach().cpu()), 4),
+                    "box": [x1, y1, x2, y2],
+                }
+            )
+
+        return detections
--- a/app/device_manager.py
+++ b/app/device_manager.py
@@ -0,0 +1,91 @@
+from __future__ import annotations
+
+import threading
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from app.capacity_api import CapacityApiClient
+
+
+@dataclass(frozen=True)
+class Device:
+    name: str
+    device_num: str
+
+
+class DeviceManager:
+    def __init__(self, path: str, api_client: CapacityApiClient, fallback_url: str = ""):
+        self.devices = self._load_devices(path)
+        self.api_client = api_client
+        self.fallback_url = fallback_url
+        self.lock = threading.Lock()
+        self.current_device_num = self.devices[0].device_num if self.devices else ""
+        self.current_url = fallback_url
+        self.timings: dict[str, float] = {}
+        self.updated_at = 0.0
+        self.version = 0
+
+    def set_current_device(self, device_num: str) -> int:
+        if device_num not in {device.device_num for device in self.devices}:
+            raise ValueError("设备不在 devicelist.env 中")
+        with self.lock:
+            self.current_device_num = device_num
+            self.current_url = ""
+            self.timings = {}
+            self.updated_at = time.time()
+            self.version += 1
+            return self.version
+
+    def resolve_stream_url(self) -> str:
+        with self.lock:
+            device_num = self.current_device_num
+            version = self.version
+        if not device_num:
+            if self.fallback_url:
+                return self.fallback_url
+            raise RuntimeError("devicelist.env 中没有可用设备号")
+
+        result = self.api_client.get_stream_url_details(device_num)
+        with self.lock:
+            # 避免旧摄像头的慢接口响应覆盖用户刚切换的新选择。
+            if version != self.version or device_num != self.current_device_num:
+                return self.current_url
+            self.current_url = result.url
+            self.timings = dict(result.timings)
+            self.updated_at = time.time()
+        return result.url
+
+    def get_snapshot(self) -> dict[str, Any]:
+        with self.lock:
+            return {
+                "devices": [device.__dict__ for device in self.devices],
+                "current_device_num": self.current_device_num,
+                "current_url": self.current_url,
+                "source_timings": dict(self.timings),
+                "source_updated_at": self.updated_at,
+            }
+
+    @staticmethod
+    def _load_devices(path: str) -> list[Device]:
+        devices: list[Device] = []
+        file_path = Path(path)
+        if not file_path.exists():
+            return devices
+
+        for line in file_path.read_text(encoding="utf-8").splitlines():
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            if "=" in stripped:
+                name, value = stripped.split("=", 1)
+                values = [item.strip() for item in value.split(",") if item.strip()]
+                display_name = name.strip()
+            else:
+                values = [stripped]
+                display_name = "摄像头"
+            for device_num in values:
+                name = display_name if len(values) == 1 else f"摄像头 {len(devices) + 1}"
+                devices.append(Device(name, device_num))
+        return devices
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,163 @@
+from __future__ import annotations
+
+import asyncio
+
+from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
+from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+
+from app.capacity_api import CapacityApiClient
+from app.config import load_settings, mask_url
+from app.detector import DetrVehicleDetector
+from app.device_manager import DeviceManager
+from app.stream_worker import StreamWorker
+
+settings = load_settings()
+api_client = CapacityApiClient(
+    base_url=settings.api_base_url,
+    app_id=settings.app_id,
+    app_secret=settings.app_secret,
+    account=settings.device_account,
+    method=settings.stream_method,
+)
+detector = DetrVehicleDetector(
+    model_name=settings.detr_model,
+    confidence=settings.confidence,
+    vehicle_labels=settings.vehicle_labels,
+)
+device_manager = DeviceManager(
+    path=settings.device_list_path,
+    api_client=api_client,
+    fallback_url=settings.stream_url,
+)
+def resolve_stream_url() -> str:
+    return device_manager.resolve_stream_url()
+
+
+worker = StreamWorker(
+    stream_url=resolve_stream_url,
+    detector=detector,
+    frame_skip=settings.frame_skip,
+    jpeg_quality=settings.jpeg_quality,
+    resize_width=settings.resize_width,
+)
+
+app = FastAPI(title="DETR 动态打标")
+app.mount("/static", StaticFiles(directory="app/static"), name="static")
+templates = Jinja2Templates(directory="app/templates")
+
+
+def display_model_name(model_name: str) -> str:
+    return model_name.rsplit("/", 1)[-1]
+
+
+@app.on_event("startup")
+def startup() -> None:
+    worker.start()
+
+
+@app.on_event("shutdown")
+def shutdown() -> None:
+    worker.stop()
+
+
+@app.get("/", response_class=HTMLResponse)
+def index(request: Request) -> HTMLResponse:
+    return templates.TemplateResponse(
+        "index.html",
+        {
+            "request": request,
+            "model": display_model_name(settings.detr_model),
+            "device": detector.device_name,
+            "stream_url": f"设备号：{device_manager.get_snapshot()['current_device_num']}",
+        },
+    )
+
+
+@app.get("/video")
+def video() -> StreamingResponse:
+    async def generate():
+        while True:
+            frame = worker.get_jpeg()
+            if frame is None:
+                await asyncio.sleep(0.1)
+                continue
+
+            yield b"--frame\r\nContent-Type: image/jpeg\r\n\r\n" + frame + b"\r\n"
+            await asyncio.sleep(0.03)
+
+    return StreamingResponse(
+        generate(),
+        media_type="multipart/x-mixed-replace; boundary=frame",
+    )
+
+
+@app.get("/detections")
+def detections() -> JSONResponse:
+    snapshot = worker.get_snapshot()
+    return JSONResponse(
+        {
+            "frame_id": snapshot["frame_id"],
+            "updated_at": snapshot["updated_at"],
+            "detections": snapshot["detections"],
+        }
+    )
+
+
+@app.get("/status")
+def status() -> JSONResponse:
+    snapshot = worker.get_snapshot()
+    device_snapshot = device_manager.get_snapshot()
+    timings = dict(device_snapshot["source_timings"])
+    # 合并取流地址和 OpenCV 读流耗时，前端按同一个 timings 对象展示。
+    timings.update(snapshot["timings"])
+    return JSONResponse(
+        {
+            "running": snapshot["running"],
+            "connected": snapshot["connected"],
+            "frame_id": snapshot["frame_id"],
+            "updated_at": snapshot["updated_at"],
+            "fps": snapshot["fps"],
+            "error": snapshot["error"],
+            "source": mask_url(device_snapshot["current_url"]) if device_snapshot["current_url"] else "等待获取播放地址",
+            "model": display_model_name(settings.detr_model),
+            "device": detector.device_name,
+            "frame_skip": settings.frame_skip,
+            "confidence": settings.confidence,
+            "devices": device_snapshot["devices"],
+            "current_device_num": device_snapshot["current_device_num"],
+            "timings": timings,
+        }
+    )
+
+
+@app.post("/devices/{device_num}")
+def switch_device(device_num: str) -> JSONResponse:
+    try:
+        device_manager.set_current_device(device_num)
+    except ValueError as exc:
+        raise HTTPException(status_code=404, detail=str(exc)) from exc
+    worker.reconnect()
+    return JSONResponse({"current_device_num": device_num})
+
+
+@app.websocket("/ws/detections")
+async def websocket_detections(websocket: WebSocket) -> None:
+    await websocket.accept()
+    try:
+        while True:
+            data = worker.get_snapshot()
+            device_snapshot = device_manager.get_snapshot()
+            data.update(
+                {
+                    "devices": device_snapshot["devices"],
+                    "current_device_num": device_snapshot["current_device_num"],
+                    "source": mask_url(device_snapshot["current_url"]) if device_snapshot["current_url"] else "等待获取播放地址",
+                    "timings": {**device_snapshot["source_timings"], **data["timings"]},
+                }
+            )
+            await websocket.send_json(data)
+            await asyncio.sleep(0.5)
+    except WebSocketDisconnect:
+        return
--- a/app/static/app.js
+++ b/app/static/app.js
@@ -0,0 +1,142 @@
+const connection = document.querySelector("#connection");
+const detectionsEl = document.querySelector("#detections");
+const frameIdEl = document.querySelector("#frame-id");
+const fpsEl = document.querySelector("#fps");
+const errorEl = document.querySelector("#error");
+const sourceEl = document.querySelector("#source");
+const deviceSelect = document.querySelector("#device-select");
+const timingTokenEl = document.querySelector("#timing-token");
+const timingSignEl = document.querySelector("#timing-sign");
+const timingUrlEl = document.querySelector("#timing-url");
+const timingOpenEl = document.querySelector("#timing-open");
+const timingFrameEl = document.querySelector("#timing-frame");
+
+let selectedDevice = "";
+let pendingDevice = "";
+let devicesSignature = "";
+
+function setConnection(online, text) {
+  connection.textContent = text;
+  connection.classList.toggle("online", online);
+  connection.classList.toggle("offline", !online);
+}
+
+function formatMs(value) {
+  if (value === undefined || value === null || value === 0) {
+    return "-";
+  }
+  return `${Number(value).toFixed(2)} ms`;
+}
+
+function renderDevices(devices, currentDeviceNum) {
+  if (!devices.length) {
+    deviceSelect.innerHTML = '<option value="">未配置摄像头</option>';
+    deviceSelect.disabled = true;
+    return;
+  }
+
+  const displayDevice = pendingDevice || currentDeviceNum;
+  const nextSignature = `${displayDevice}|${devices.map((device) => device.device_num).join(",")}`;
+  if (nextSignature === devicesSignature) {
+    return;
+  }
+
+  selectedDevice = displayDevice;
+  devicesSignature = nextSignature;
+  deviceSelect.innerHTML = devices
+    .map((device) => {
+      const selected = device.device_num === displayDevice ? "selected" : "";
+      return `<option value="${device.device_num}" ${selected}>${device.name} · ${device.device_num}</option>`;
+    })
+    .join("");
+}
+
+function renderTimings(timings) {
+  timingTokenEl.textContent = timings?.token_cache ? "缓存" : formatMs(timings?.token_ms);
+  timingSignEl.textContent = formatMs(timings?.sign_ms);
+  timingUrlEl.textContent = formatMs(timings?.stream_url_ms);
+  timingOpenEl.textContent = formatMs(timings?.open_ms);
+  timingFrameEl.textContent = formatMs(timings?.first_frame_ms);
+}
+
+function renderDetections(detections) {
+  if (!detections.length) {
+    detectionsEl.className = "detections empty";
+    detectionsEl.textContent = "暂无目标";
+    return;
+  }
+
+  detectionsEl.className = "detections";
+  detectionsEl.innerHTML = detections
+    .map((det) => {
+      const score = `${(det.score * 100).toFixed(1)}%`;
+      const box = det.box.join(", ");
+      return `
+        <div class="det-item">
+          <div class="det-title">
+            <span>${det.label}</span>
+            <span>${score}</span>
+          </div>
+          <div class="det-box">box: [${box}]</div>
+        </div>
+      `;
+    })
+    .join("");
+}
+
+async function switchDevice(deviceNum) {
+  pendingDevice = deviceNum;
+  devicesSignature = "";
+  setConnection(false, "切换中");
+  const response = await fetch(`/devices/${encodeURIComponent(deviceNum)}`, { method: "POST" });
+  if (!response.ok) {
+    throw new Error("切换摄像头失败");
+  }
+  const video = document.querySelector("#video");
+  video.src = `/video?t=${Date.now()}`;
+}
+
+function connectWebSocket() {
+  const protocol = window.location.protocol === "https:" ? "wss" : "ws";
+  const ws = new WebSocket(`${protocol}://${window.location.host}/ws/detections`);
+
+  ws.addEventListener("open", () => setConnection(true, "已连接"));
+
+  ws.addEventListener("message", (event) => {
+    const data = JSON.parse(event.data);
+    frameIdEl.textContent = data.frame_id ?? "-";
+    fpsEl.textContent = data.fps ?? "-";
+    errorEl.textContent = data.error || (data.connected ? "正常" : "未连接");
+    sourceEl.textContent = data.source || "-";
+    setConnection(Boolean(data.connected), data.connected ? "已连接" : "重连中");
+    if (pendingDevice && data.current_device_num === pendingDevice) {
+      pendingDevice = "";
+      deviceSelect.disabled = false;
+    }
+    renderDevices(data.devices || [], data.current_device_num || "");
+    renderTimings(data.timings || {});
+    renderDetections(data.detections || []);
+  });
+
+  ws.addEventListener("close", () => {
+    setConnection(false, "已断开");
+    setTimeout(connectWebSocket, 1500);
+  });
+
+  ws.addEventListener("error", () => {
+    setConnection(false, "连接错误");
+    ws.close();
+  });
+}
+
+deviceSelect.addEventListener("change", (event) => {
+  selectedDevice = event.target.value;
+  switchDevice(event.target.value).catch(() => {
+    pendingDevice = "";
+    devicesSignature = "";
+    setConnection(false, "切换失败");
+    deviceSelect.disabled = false;
+  });
+});
+
+connectWebSocket();
--- a/app/static/style.css
+++ b/app/static/style.css
@@ -0,0 +1,209 @@
+:root {
+  color-scheme: dark;
+  --bg: #0c1017;
+  --panel: #151b26;
+  --panel-2: #101722;
+  --text: #eef4ff;
+  --muted: #8f9db3;
+  --line: #273246;
+  --green: #2ee887;
+  --yellow: #f7c948;
+  --red: #ff6b6b;
+}
+
+* {
+  box-sizing: border-box;
+}
+
+body {
+  margin: 0;
+  min-height: 100vh;
+  background: radial-gradient(circle at top left, #182235, var(--bg) 45%);
+  color: var(--text);
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+}
+
+.topbar {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 24px;
+  padding: 22px 28px;
+  border-bottom: 1px solid var(--line);
+  background: rgba(12, 16, 23, 0.82);
+  backdrop-filter: blur(14px);
+}
+
+h1,
+h2,
+p {
+  margin: 0;
+}
+
+h1 {
+  font-size: 24px;
+  letter-spacing: 0.02em;
+}
+
+h2 {
+  margin-bottom: 14px;
+  font-size: 18px;
+}
+
+p {
+  margin-top: 8px;
+  color: var(--muted);
+}
+
+.badge {
+  min-width: 86px;
+  padding: 8px 12px;
+  border: 1px solid var(--line);
+  border-radius: 999px;
+  color: var(--yellow);
+  text-align: center;
+  background: var(--panel);
+}
+
+.badge.online {
+  color: var(--green);
+}
+
+.badge.offline {
+  color: var(--red);
+}
+
+.layout {
+  display: grid;
+  grid-template-columns: minmax(0, 1fr) 360px;
+  gap: 18px;
+  padding: 18px;
+}
+
+.video-card,
+.side-card {
+  border: 1px solid var(--line);
+  border-radius: 18px;
+  background: rgba(21, 27, 38, 0.9);
+  box-shadow: 0 18px 40px rgba(0, 0, 0, 0.28);
+}
+
+.video-card {
+  overflow: hidden;
+}
+
+.pipeline {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  padding: 14px;
+  border-bottom: 1px solid var(--line);
+  overflow-x: auto;
+}
+
+.stage {
+  flex: 0 0 auto;
+  padding: 9px 12px;
+  border: 1px solid var(--line);
+  border-radius: 10px;
+  color: var(--muted);
+  background: var(--panel-2);
+}
+
+.stage.active {
+  border-color: rgba(46, 232, 135, 0.5);
+  color: var(--green);
+}
+
+.arrow {
+  color: var(--muted);
+}
+
+.video-wrap {
+  display: grid;
+  place-items: center;
+  min-height: 420px;
+  background: #05070b;
+}
+
+#video {
+  display: block;
+  width: 100%;
+  height: auto;
+  max-height: calc(100vh - 190px);
+  object-fit: contain;
+}
+
+.side-card {
+  display: flex;
+  flex-direction: column;
+  gap: 22px;
+  padding: 18px;
+}
+
+.status-grid {
+  display: grid;
+  grid-template-columns: 72px minmax(0, 1fr);
+  gap: 10px 12px;
+  margin: 0;
+}
+
+.status-grid dt {
+  color: var(--muted);
+}
+
+.status-grid dd {
+  margin: 0;
+  word-break: break-all;
+}
+
+.device-select {
+  width: 100%;
+  min-height: 34px;
+  border: 1px solid var(--line);
+  border-radius: 8px;
+  color: var(--text);
+  background: var(--panel-2);
+}
+
+.detections {
+  display: flex;
+  flex-direction: column;
+  gap: 10px;
+}
+
+.detections.empty {
+  color: var(--muted);
+}
+
+.det-item {
+  padding: 12px;
+  border: 1px solid var(--line);
+  border-radius: 12px;
+  background: var(--panel-2);
+}
+
+.det-title {
+  display: flex;
+  justify-content: space-between;
+  margin-bottom: 8px;
+  color: var(--green);
+  font-weight: 700;
+}
+
+.det-box {
+  color: var(--muted);
+  font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
+  font-size: 12px;
+}
+
+@media (max-width: 980px) {
+  .layout {
+    grid-template-columns: 1fr;
+  }
+
+  .topbar {
+    align-items: flex-start;
+    flex-direction: column;
+  }
+}
--- a/app/stream_worker.py
+++ b/app/stream_worker.py
@@ -0,0 +1,221 @@
+from __future__ import annotations
+
+import threading
+import time
+from typing import Any, Callable
+
+import cv2
+
+
+class StreamWorker:
+    def __init__(
+        self,
+        stream_url: str | Callable[[], str],
+        detector: Any,
+        frame_skip: int = 3,
+        jpeg_quality: int = 80,
+        resize_width: int | None = None,
+    ):
+        self.stream_url = stream_url
+        self.detector = detector
+        self.frame_skip = max(1, frame_skip)
+        self.jpeg_quality = jpeg_quality
+        self.resize_width = resize_width
+
+        self.lock = threading.Lock()
+        self.latest_jpeg: bytes | None = None
+        self.latest_detections: list[dict[str, Any]] = []
+        self.frame_id = 0
+        self.updated_at = 0.0
+        self.running = False
+        self.connected = False
+        self.error = "尚未启动"
+        self.fps = 0.0
+        self.thread: threading.Thread | None = None
+        self.reconnect_requested = False
+        self.reconnect_version = 0
+        self.resolve_ms = 0.0
+        self.open_ms = 0.0
+        self.first_frame_ms = 0.0
+
+    def start(self) -> None:
+        if self.running:
+            return
+
+        self.running = True
+        self.thread = threading.Thread(target=self._run, daemon=True)
+        self.thread.start()
+
+    def stop(self) -> None:
+        self.running = False
+        if self.thread and self.thread.is_alive():
+            self.thread.join(timeout=2)
+
+    def reconnect(self) -> None:
+        with self.lock:
+            self.latest_jpeg = None
+            self.latest_detections = []
+            self.frame_id = 0
+            self.fps = 0.0
+            self.reconnect_requested = True
+            self.reconnect_version += 1
+            self.connected = False
+            self.error = "正在切换视频源"
+            self.resolve_ms = 0.0
+            self.open_ms = 0.0
+            self.first_frame_ms = 0.0
+
+    def get_jpeg(self) -> bytes | None:
+        with self.lock:
+            return self.latest_jpeg
+
+    def get_snapshot(self) -> dict[str, Any]:
+        with self.lock:
+            return {
+                "frame_id": self.frame_id,
+                "updated_at": self.updated_at,
+                "detections": list(self.latest_detections),
+                "running": self.running,
+                "connected": self.connected,
+                "error": self.error,
+                "fps": round(self.fps, 2),
+                "timings": {
+                    "resolve_ms": self.resolve_ms,
+                    "open_ms": self.open_ms,
+                    "first_frame_ms": self.first_frame_ms,
+                },
+            }
+
+    def _run(self) -> None:
+        cap: cv2.VideoCapture | None = None
+        last_detections: list[dict[str, Any]] = []
+        fps_window_start = time.monotonic()
+        fps_frames = 0
+
+        while self.running:
+            with self.lock:
+                should_reconnect = self.reconnect_requested
+                run_version = self.reconnect_version
+                self.reconnect_requested = False
+            if should_reconnect:
+                # 切换摄像头时必须释放旧连接，否则 OpenCV 会继续阻塞读旧流。
+                if cap is not None:
+                    cap.release()
+                    cap = None
+
+            if cap is None or not cap.isOpened():
+                started = time.monotonic()
+                stream_url = self.stream_url() if callable(self.stream_url) else self.stream_url
+                resolve_ms = round((time.monotonic() - started) * 1000, 2)
+                started = time.monotonic()
+                cap = cv2.VideoCapture(stream_url)
+                open_ms = round((time.monotonic() - started) * 1000, 2)
+                with self.lock:
+                    self.open_ms = open_ms
+                    self.resolve_ms = resolve_ms
+                    self.first_frame_ms = 0.0
+                if not cap.isOpened():
+                    self._set_connection_state(False, "无法打开视频流，2 秒后重试")
+                    cap.release()
+                    cap = None
+                    time.sleep(2)
+                    continue
+                self._set_connection_state(True, "已连接")
+
+            started = time.monotonic()
+            ok, frame = cap.read()
+            with self.lock:
+                current_version = self.reconnect_version
+            # 丢弃切换期间从旧连接读到的帧，避免前端画面回跳。
+            if current_version != run_version:
+                continue
+            if self.first_frame_ms == 0.0:
+                with self.lock:
+                    self.first_frame_ms = round((time.monotonic() - started) * 1000, 2)
+            if not ok:
+                self._set_connection_state(False, "读取视频帧失败，正在重连")
+                cap.release()
+                cap = None
+                time.sleep(2)
+                continue
+
+            frame = self._resize(frame)
+            self.frame_id += 1
+
+            if self.frame_id % self.frame_skip == 0:
+                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                last_detections = self.detector.detect(frame_rgb)
+
+            annotated = self._draw(frame, last_detections)
+            ok, jpeg = cv2.imencode(
+                ".jpg",
+                annotated,
+                [cv2.IMWRITE_JPEG_QUALITY, self.jpeg_quality],
+            )
+            if not ok:
+                continue
+
+            fps_frames += 1
+            now = time.monotonic()
+            if now - fps_window_start >= 1:
+                fps = fps_frames / (now - fps_window_start)
+                fps_window_start = now
+                fps_frames = 0
+            else:
+                fps = self.fps
+
+            with self.lock:
+                current_version = self.reconnect_version
+            if current_version != run_version:
+                continue
+
+            with self.lock:
+                self.latest_jpeg = jpeg.tobytes()
+                self.latest_detections = list(last_detections)
+                self.updated_at = time.time()
+                self.connected = True
+                self.error = ""
+                self.fps = fps
+
+        if cap is not None:
+            cap.release()
+        self._set_connection_state(False, "已停止")
+
+    def _resize(self, frame: Any) -> Any:
+        if not self.resize_width:
+            return frame
+
+        height, width = frame.shape[:2]
+        if width <= self.resize_width:
+            return frame
+
+        scale = self.resize_width / width
+        return cv2.resize(frame, (self.resize_width, int(height * scale)))
+
+    def _draw(self, frame: Any, detections: list[dict[str, Any]]) -> Any:
+        for detection in detections:
+            x1, y1, x2, y2 = detection["box"]
+            label = detection["label"]
+            score = detection["score"]
+
+            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 220, 80), 2)
+            text = f"{label} {score:.2f}"
+            cv2.rectangle(frame, (x1, max(0, y1 - 26)), (x1 + 150, y1), (0, 220, 80), -1)
+            cv2.putText(
+                frame,
+                text,
+                (x1 + 5, max(18, y1 - 7)),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.55,
+                (0, 0, 0),
+                2,
+                cv2.LINE_AA,
+            )
+
+        return frame
+
+    def _set_connection_state(self, connected: bool, error: str) -> None:
+        with self.lock:
+            self.connected = connected
+            self.error = error
+            self.updated_at = time.time()
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -0,0 +1,82 @@
+<!doctype html>
+<html lang="zh-CN">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>DETR 动态打标</title>
+    <link rel="stylesheet" href="/static/style.css" />
+  </head>
+  <body>
+    <header class="topbar">
+      <div>
+        <h1>DETR 动态打标</h1>
+        <p>使用 Python、OpenCV、PyTorch、Transformers DETR 和 FastAPI，Mac mini m2 上运行。</p>
+      </div>
+      <div class="badge" id="connection">连接中</div>
+    </header>
+
+    <main class="layout">
+      <section class="video-card">
+        <div class="pipeline">
+          <div class="stage active">源节点</div>
+          <div class="arrow">→</div>
+          <div class="stage active">DETR 推理</div>
+          <div class="arrow">→</div>
+          <div class="stage active">OSD 打标</div>
+          <div class="arrow">→</div>
+          <div class="stage active">FastAPI 输出</div>
+        </div>
+        <div class="video-wrap">
+          <img id="video" src="/video" alt="动态打标视频流" />
+        </div>
+      </section>
+
+      <aside class="side-card">
+        <section>
+          <h2>运行状态</h2>
+          <dl class="status-grid">
+            <dt>摄像头</dt>
+            <dd>
+              <select id="device-select" class="device-select"></select>
+            </dd>
+            <dt>视频源</dt>
+            <dd id="source">{{ stream_url }}</dd>
+            <dt>模型</dt>
+            <dd>{{ model }}</dd>
+            <dt>设备</dt>
+            <dd>{{ device }}</dd>
+            <dt>帧号</dt>
+            <dd id="frame-id">-</dd>
+            <dt>FPS</dt>
+            <dd id="fps">-</dd>
+            <dt>状态</dt>
+            <dd id="error">等待视频帧</dd>
+          </dl>
+        </section>
+
+        <section>
+          <h2>连接耗时</h2>
+          <dl class="status-grid">
+            <dt>Token</dt>
+            <dd id="timing-token">-</dd>
+            <dt>签名</dt>
+            <dd id="timing-sign">-</dd>
+            <dt>取流地址</dt>
+            <dd id="timing-url">-</dd>
+            <dt>打开流</dt>
+            <dd id="timing-open">-</dd>
+            <dt>首帧</dt>
+            <dd id="timing-frame">-</dd>
+          </dl>
+        </section>
+
+        <section>
+          <h2>检测结果</h2>
+          <div id="detections" class="detections empty">暂无目标</div>
+        </section>
+      </aside>
+    </main>
+
+    <script src="/static/app.js"></script>
+  </body>
+</html>