diff --git a/README_zh.md b/README_zh.md index 23fe2c6..97cd5ed 100644 --- a/README_zh.md +++ b/README_zh.md @@ -49,6 +49,22 @@ - 最大放置时间:`10800` 秒,也就是 3 小时 - 垃圾桶投放确认窗口:`120` 秒 +## 区域标定 + +项目内置一个本地 Web 标定工具,可以从 RTSP 拉取一帧截图,再用鼠标标定 8 个格口和垃圾桶区域: + +```bash +python3 tools/calibrator/server.py --host 127.0.0.1 --port 18090 +``` + +打开: + +```text +http://127.0.0.1:18090 +``` + +详细说明见 `tools/calibrator/README_zh.md`。 + ## 本地测试 ```bash diff --git a/docs/plans/2026-04-27-cold-display-guard-design.md b/docs/plans/2026-04-27-cold-display-guard-design.md index 0ca5e39..c69a663 100644 --- a/docs/plans/2026-04-27-cold-display-guard-design.md +++ b/docs/plans/2026-04-27-cold-display-guard-design.md @@ -111,3 +111,11 @@ The vision layer should output normalized observations: ``` Trash disposal confirmation should use motion/object evidence inside the trash ROI, not merely a person standing near the bin. + +## Calibration Tool + +The project includes a local RTSP snapshot calibration tool under `tools/calibrator`. + +The tool runs a small standard-library HTTP server. The browser submits an RTSP URL to `/api/capture`; the server calls `ffmpeg`, extracts one JPEG frame, and returns it to the browser. The page then lets the operator draw normalized polygons for `r1c1` through `r2c4` plus `trash`. + +This intentionally uses a single captured frame rather than a live preview. Calibration only needs a representative camera view, and a snapshot avoids browser RTSP limitations and live stream transcoding. diff --git a/progress.md b/progress.md index 8dfa22d..acffb6c 100644 --- a/progress.md +++ b/progress.md @@ -9,3 +9,4 @@ - First test run failed because `_end_batch()` set `ended_at` before calculating dwell seconds, causing ended batches to report `0` seconds. Fixed by calculating dwell before assigning `ended_at`. - Test suite now passes: `PYTHONPATH=src python3 -m unittest discover -s tests -v`. - Initialized git repository and created the initial project commit. +- Added RTSP single-frame calibration tool under `tools/calibrator`. diff --git a/tools/calibrator/README_zh.md b/tools/calibrator/README_zh.md new file mode 100644 index 0000000..ef9895b --- /dev/null +++ b/tools/calibrator/README_zh.md @@ -0,0 +1,52 @@ +# RTSP 单帧区域标定工具 + +这个工具用于从 RTSP 摄像头拉取一帧截图,然后在浏览器里标定展示柜格口和垃圾桶区域。 + +## 依赖 + +本机需要安装 `ffmpeg`,并且命令行可直接执行: + +```bash +ffmpeg -version +``` + +## 启动 + +在项目根目录执行: + +```bash +python3 tools/calibrator/server.py --host 127.0.0.1 --port 18090 +``` + +然后打开: + +```text +http://127.0.0.1:18090 +``` + +## 使用步骤 + +1. 输入 RTSP 地址。 +2. 点击“抓取一帧”。 +3. 选择 `r1c1` 到 `r2c4` 中的一个区域。 +4. 在截图上按顺时针或逆时针点击格口顶点。 +5. 每个格口建议标 4 个点;如果透视明显,可以标更多点。 +6. 标完 8 个格口后,选择 `trash` 并标定垃圾桶区域。 +7. 复制右侧生成的 TOML 配置。 +8. 把生成内容合入 `config/example.toml` 或实际部署配置。 + +## 坐标说明 + +导出的坐标是归一化坐标: + +- 左上角是 `[0.0, 0.0]` +- 右下角是 `[1.0, 1.0]` + +这样即使摄像头截图分辨率变化,标定结果也可以复用。 + +## 注意 + +- 标定截图应来自真实安装角度。 +- 标定时展示柜门最好保持日常运行状态。 +- 垃圾桶区域只框垃圾桶开口和投放可见区域,不要框太大。 +- RTSP 密码只会发给本地标定服务,不会保存到项目文件。 diff --git a/tools/calibrator/app.js b/tools/calibrator/app.js new file mode 100644 index 0000000..39fd7ed --- /dev/null +++ b/tools/calibrator/app.js @@ -0,0 +1,256 @@ +const zoneIds = ["r1c1", "r1c2", "r1c3", "r1c4", "r2c1", "r2c2", "r2c3", "r2c4", "trash"]; +const colors = { + r1c1: "#e11d48", + r1c2: "#f97316", + r1c3: "#ca8a04", + r1c4: "#16a34a", + r2c1: "#0891b2", + r2c2: "#2563eb", + r2c3: "#7c3aed", + r2c4: "#db2777", + trash: "#111827", +}; + +const state = { + activeZone: "r1c1", + polygons: Object.fromEntries(zoneIds.map((id) => [id, []])), + image: null, + imageUrl: null, +}; + +const canvas = document.getElementById("canvas"); +const ctx = canvas.getContext("2d"); +const zoneList = document.getElementById("zoneList"); +const statusEl = document.getElementById("status"); +const tomlOutput = document.getElementById("tomlOutput"); + +function init() { + for (const zoneId of zoneIds) { + const button = document.createElement("button"); + button.type = "button"; + button.className = "zone-button"; + button.textContent = zoneId; + button.dataset.zoneId = zoneId; + button.addEventListener("click", () => { + state.activeZone = zoneId; + render(); + }); + zoneList.appendChild(button); + } + + document.getElementById("captureFrame").addEventListener("click", captureFrame); + document.getElementById("undoPoint").addEventListener("click", undoPoint); + document.getElementById("clearZone").addEventListener("click", clearZone); + document.getElementById("clearAll").addEventListener("click", clearAll); + document.getElementById("copyToml").addEventListener("click", copyToml); + canvas.addEventListener("click", addPoint); + window.addEventListener("resize", render); + render(); +} + +async function captureFrame() { + const rtspUrl = document.getElementById("rtspUrl").value.trim(); + if (!rtspUrl) { + setStatus("请输入 RTSP 地址"); + return; + } + + setStatus("正在抓取一帧..."); + try { + const response = await fetch("/api/capture", { + method: "POST", + headers: {"Content-Type": "application/json"}, + body: JSON.stringify({rtsp_url: rtspUrl, timeout_seconds: 12}), + }); + if (!response.ok) { + const payload = await response.json(); + throw new Error(payload.error || `HTTP ${response.status}`); + } + + const blob = await response.blob(); + if (state.imageUrl) { + URL.revokeObjectURL(state.imageUrl); + } + state.imageUrl = URL.createObjectURL(blob); + const image = new Image(); + image.onload = () => { + state.image = image; + canvas.width = image.naturalWidth; + canvas.height = image.naturalHeight; + setStatus(`已抓取 ${image.naturalWidth}x${image.naturalHeight}`); + render(); + }; + image.src = state.imageUrl; + } catch (error) { + setStatus(`抓帧失败:${error.message}`); + } +} + +function addPoint(event) { + if (!state.image) { + setStatus("请先抓取一帧"); + return; + } + const rect = canvas.getBoundingClientRect(); + const scaleX = canvas.width / rect.width; + const scaleY = canvas.height / rect.height; + const point = { + x: clamp(((event.clientX - rect.left) * scaleX) / canvas.width), + y: clamp(((event.clientY - rect.top) * scaleY) / canvas.height), + }; + state.polygons[state.activeZone].push(point); + render(); +} + +function undoPoint() { + state.polygons[state.activeZone].pop(); + render(); +} + +function clearZone() { + state.polygons[state.activeZone] = []; + render(); +} + +function clearAll() { + for (const zoneId of zoneIds) { + state.polygons[zoneId] = []; + } + render(); +} + +async function copyToml() { + const text = tomlOutput.value; + if (!text.trim()) { + setStatus("没有可复制的 TOML"); + return; + } + await navigator.clipboard.writeText(text); + setStatus("TOML 已复制"); +} + +function render() { + renderZoneButtons(); + renderCanvas(); + tomlOutput.value = buildToml(); +} + +function renderZoneButtons() { + for (const button of zoneList.querySelectorAll("button")) { + const zoneId = button.dataset.zoneId; + button.classList.toggle("active", zoneId === state.activeZone); + button.classList.toggle("done", state.polygons[zoneId].length >= 3); + } +} + +function renderCanvas() { + ctx.clearRect(0, 0, canvas.width, canvas.height); + if (state.image) { + ctx.drawImage(state.image, 0, 0, canvas.width, canvas.height); + } else { + ctx.fillStyle = "#111820"; + ctx.fillRect(0, 0, canvas.width, canvas.height); + ctx.fillStyle = "#d9e0e7"; + ctx.font = "22px sans-serif"; + ctx.textAlign = "center"; + ctx.fillText("输入 RTSP 地址后抓取一帧", canvas.width / 2, canvas.height / 2); + } + + for (const zoneId of zoneIds) { + drawPolygon(zoneId, state.polygons[zoneId], zoneId === state.activeZone); + } +} + +function drawPolygon(zoneId, points, active) { + if (!points.length) { + return; + } + const color = colors[zoneId] || "#ffffff"; + ctx.save(); + ctx.strokeStyle = color; + ctx.fillStyle = color; + ctx.lineWidth = active ? 4 : 2; + ctx.globalAlpha = 0.22; + ctx.beginPath(); + points.forEach((point, index) => { + const x = point.x * canvas.width; + const y = point.y * canvas.height; + if (index === 0) { + ctx.moveTo(x, y); + } else { + ctx.lineTo(x, y); + } + }); + if (points.length >= 3) { + ctx.closePath(); + ctx.fill(); + } + ctx.globalAlpha = 1; + ctx.stroke(); + + for (const [index, point] of points.entries()) { + const x = point.x * canvas.width; + const y = point.y * canvas.height; + ctx.beginPath(); + ctx.arc(x, y, 5, 0, Math.PI * 2); + ctx.fill(); + ctx.fillStyle = "#ffffff"; + ctx.font = "12px sans-serif"; + ctx.textAlign = "center"; + ctx.fillText(String(index + 1), x, y - 9); + ctx.fillStyle = color; + } + + const first = points[0]; + ctx.fillStyle = color; + ctx.font = active ? "bold 18px sans-serif" : "14px sans-serif"; + ctx.textAlign = "left"; + ctx.fillText(zoneId, first.x * canvas.width + 8, first.y * canvas.height + 18); + ctx.restore(); +} + +function buildToml() { + const lines = [ + "[layout]", + "rows = 2", + "cols = 4", + `zone_ids = [${zoneIds.filter((id) => id !== "trash").map((id) => quote(id)).join(", ")}]`, + "", + ]; + + for (const zoneId of zoneIds.filter((id) => id !== "trash")) { + const points = state.polygons[zoneId]; + if (points.length < 3) { + continue; + } + lines.push("[[zones]]"); + lines.push(`id = ${quote(zoneId)}`); + lines.push(`polygon = ${formatPoints(points)}`); + lines.push(""); + } + + const trashPoints = state.polygons.trash; + if (trashPoints.length >= 3) { + lines.push("[trash]"); + lines.push(`roi = ${formatPoints(trashPoints)}`); + } + return lines.join("\n"); +} + +function formatPoints(points) { + return `[${points.map((point) => `[${point.x.toFixed(4)}, ${point.y.toFixed(4)}]`).join(", ")}]`; +} + +function quote(value) { + return `"${value}"`; +} + +function clamp(value) { + return Math.min(1, Math.max(0, value)); +} + +function setStatus(message) { + statusEl.textContent = message; +} + +init(); diff --git a/tools/calibrator/index.html b/tools/calibrator/index.html new file mode 100644 index 0000000..70f7670 --- /dev/null +++ b/tools/calibrator/index.html @@ -0,0 +1,52 @@ + + + + + + 冷藏展示柜区域标定 + + + +
+
+
+

冷藏展示柜区域标定

+

从 RTSP 拉取一帧截图,在图上标定 8 个格口和垃圾桶区域。

+
+ +
+ +
+ + + +
+ +
+ + +
+ +
+ + +
+
+ + + diff --git a/tools/calibrator/server.py b/tools/calibrator/server.py new file mode 100644 index 0000000..445c6ae --- /dev/null +++ b/tools/calibrator/server.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import argparse +import json +import mimetypes +import subprocess +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from urllib.parse import unquote + + +ROOT = Path(__file__).resolve().parent + + +class CalibratorHandler(BaseHTTPRequestHandler): + server_version = "ColdDisplayCalibrator/0.1" + + def do_GET(self) -> None: + path = "/" if self.path == "/" else unquote(self.path.split("?", 1)[0]) + if path == "/": + self._send_file(ROOT / "index.html") + return + target = (ROOT / path.lstrip("/")).resolve() + if ROOT not in target.parents or not target.is_file(): + self.send_error(404) + return + self._send_file(target) + + def do_POST(self) -> None: + if self.path != "/api/capture": + self.send_error(404) + return + + try: + payload = self._read_json() + rtsp_url = str(payload.get("rtsp_url", "")).strip() + timeout_seconds = float(payload.get("timeout_seconds", 10)) + if not rtsp_url.lower().startswith("rtsp://"): + self._send_json({"error": "rtsp_url must start with rtsp://"}, status=400) + return + image = capture_rtsp_frame(rtsp_url, timeout_seconds) + except CaptureError as exc: + self._send_json({"error": str(exc)}, status=502) + return + except (ValueError, json.JSONDecodeError) as exc: + self._send_json({"error": f"invalid request: {exc}"}, status=400) + return + + self.send_response(200) + self.send_header("Content-Type", "image/jpeg") + self.send_header("Content-Length", str(len(image))) + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(image) + + def log_message(self, format: str, *args: object) -> None: + print(f"{self.address_string()} - {format % args}") + + def _read_json(self) -> dict: + length = int(self.headers.get("Content-Length", "0")) + return json.loads(self.rfile.read(length).decode("utf-8")) + + def _send_file(self, path: Path) -> None: + data = path.read_bytes() + content_type = mimetypes.guess_type(path.name)[0] or "application/octet-stream" + self.send_response(200) + self.send_header("Content-Type", content_type) + self.send_header("Content-Length", str(len(data))) + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(data) + + def _send_json(self, payload: dict, status: int = 200) -> None: + data = json.dumps(payload, ensure_ascii=False).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json; charset=utf-8") + self.send_header("Content-Length", str(len(data))) + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(data) + + +class CaptureError(RuntimeError): + pass + + +def capture_rtsp_frame(rtsp_url: str, timeout_seconds: float) -> bytes: + command = [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-rtsp_transport", + "tcp", + "-i", + rtsp_url, + "-frames:v", + "1", + "-f", + "image2pipe", + "-vcodec", + "mjpeg", + "-", + ] + try: + result = subprocess.run( + command, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=max(1.0, timeout_seconds), + ) + except FileNotFoundError as exc: + raise CaptureError("ffmpeg not found; install ffmpeg first") from exc + except subprocess.TimeoutExpired as exc: + raise CaptureError(f"ffmpeg timed out after {timeout_seconds:g}s") from exc + + if result.returncode != 0: + message = result.stderr.decode("utf-8", errors="replace").strip() + raise CaptureError(message or f"ffmpeg exited with code {result.returncode}") + if not result.stdout: + raise CaptureError("ffmpeg returned no image data") + return result.stdout + + +def main() -> int: + parser = argparse.ArgumentParser(description="RTSP snapshot calibration web tool.") + parser.add_argument("--host", default="127.0.0.1") + parser.add_argument("--port", default=18090, type=int) + args = parser.parse_args() + + server = ThreadingHTTPServer((args.host, args.port), CalibratorHandler) + print(f"Calibration server: http://{args.host}:{args.port}") + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nStopping calibration server") + finally: + server.server_close() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/calibrator/style.css b/tools/calibrator/style.css new file mode 100644 index 0000000..8e69b68 --- /dev/null +++ b/tools/calibrator/style.css @@ -0,0 +1,186 @@ +:root { + color-scheme: light; + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; + background: #f4f6f8; + color: #17202a; +} + +* { + box-sizing: border-box; +} + +body { + margin: 0; +} + +button, +input, +textarea { + font: inherit; +} + +button { + border: 1px solid #9aa7b3; + background: #ffffff; + color: #17202a; + border-radius: 6px; + padding: 8px 12px; + cursor: pointer; +} + +button:hover { + background: #edf2f7; +} + +.app { + min-height: 100vh; + display: flex; + flex-direction: column; +} + +.topbar { + display: flex; + justify-content: space-between; + gap: 16px; + align-items: center; + padding: 16px 20px; + background: #ffffff; + border-bottom: 1px solid #d9e0e7; +} + +h1, +h2, +p { + margin: 0; +} + +h1 { + font-size: 20px; +} + +h2 { + font-size: 15px; + margin-bottom: 10px; +} + +.topbar p, +.hint, +#status { + color: #5d6b78; + font-size: 13px; +} + +.capture { + display: grid; + grid-template-columns: minmax(280px, 1fr) auto minmax(180px, auto); + gap: 12px; + align-items: end; + padding: 14px 20px; + background: #ffffff; + border-bottom: 1px solid #d9e0e7; +} + +.capture label { + display: grid; + gap: 6px; + font-size: 13px; + color: #344454; +} + +.capture input { + width: 100%; + border: 1px solid #9aa7b3; + border-radius: 6px; + padding: 9px 10px; +} + +.workspace { + flex: 1; + min-height: 0; + display: grid; + grid-template-columns: 220px minmax(420px, 1fr) 340px; + gap: 12px; + padding: 12px; +} + +.panel { + background: #ffffff; + border: 1px solid #d9e0e7; + border-radius: 8px; + padding: 12px; + min-height: 0; +} + +.zone-list { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 8px; +} + +.zone-button { + min-height: 36px; +} + +.zone-button.active { + background: #1f6feb; + border-color: #1f6feb; + color: #ffffff; +} + +.zone-button.done::after { + content: " ✓"; +} + +.actions { + display: grid; + gap: 8px; + margin: 14px 0; +} + +.canvas-wrap { + min-width: 0; + min-height: 0; + display: flex; + align-items: center; + justify-content: center; + background: #202832; + border-radius: 8px; + overflow: hidden; +} + +canvas { + width: 100%; + height: 100%; + object-fit: contain; + background: #111820; + cursor: crosshair; +} + +.output-panel { + display: flex; + flex-direction: column; +} + +textarea { + flex: 1; + width: 100%; + min-height: 420px; + resize: none; + border: 1px solid #9aa7b3; + border-radius: 6px; + padding: 10px; + font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; + font-size: 12px; + line-height: 1.45; +} + +@media (max-width: 1100px) { + .workspace { + grid-template-columns: 190px minmax(360px, 1fr); + } + + .output-panel { + grid-column: 1 / -1; + min-height: 260px; + } +}