feat: add rtsp snapshot calibrator

This commit is contained in:
Yoilun
2026-04-27 11:12:00 +08:00
parent 36dc3548e6
commit b3672c564a
8 changed files with 715 additions and 0 deletions

View File

@@ -49,6 +49,22 @@
- 最大放置时间:`10800` 秒,也就是 3 小时
- 垃圾桶投放确认窗口:`120`
## 区域标定
项目内置一个本地 Web 标定工具,可以从 RTSP 拉取一帧截图,再用鼠标标定 8 个格口和垃圾桶区域:
```bash
python3 tools/calibrator/server.py --host 127.0.0.1 --port 18090
```
打开:
```text
http://127.0.0.1:18090
```
详细说明见 `tools/calibrator/README_zh.md`
## 本地测试
```bash

View File

@@ -111,3 +111,11 @@ The vision layer should output normalized observations:
```
Trash disposal confirmation should use motion/object evidence inside the trash ROI, not merely a person standing near the bin.
## Calibration Tool
The project includes a local RTSP snapshot calibration tool under `tools/calibrator`.
The tool runs a small standard-library HTTP server. The browser submits an RTSP URL to `/api/capture`; the server calls `ffmpeg`, extracts one JPEG frame, and returns it to the browser. The page then lets the operator draw normalized polygons for `r1c1` through `r2c4` plus `trash`.
This intentionally uses a single captured frame rather than a live preview. Calibration only needs a representative camera view, and a snapshot avoids browser RTSP limitations and live stream transcoding.

View File

@@ -9,3 +9,4 @@
- First test run failed because `_end_batch()` set `ended_at` before calculating dwell seconds, causing ended batches to report `0` seconds. Fixed by calculating dwell before assigning `ended_at`.
- Test suite now passes: `PYTHONPATH=src python3 -m unittest discover -s tests -v`.
- Initialized git repository and created the initial project commit.
- Added RTSP single-frame calibration tool under `tools/calibrator`.

View File

@@ -0,0 +1,52 @@
# RTSP 单帧区域标定工具
这个工具用于从 RTSP 摄像头拉取一帧截图,然后在浏览器里标定展示柜格口和垃圾桶区域。
## 依赖
本机需要安装 `ffmpeg`,并且命令行可直接执行:
```bash
ffmpeg -version
```
## 启动
在项目根目录执行:
```bash
python3 tools/calibrator/server.py --host 127.0.0.1 --port 18090
```
然后打开:
```text
http://127.0.0.1:18090
```
## 使用步骤
1. 输入 RTSP 地址。
2. 点击“抓取一帧”。
3. 选择 `r1c1``r2c4` 中的一个区域。
4. 在截图上按顺时针或逆时针点击格口顶点。
5. 每个格口建议标 4 个点;如果透视明显,可以标更多点。
6. 标完 8 个格口后,选择 `trash` 并标定垃圾桶区域。
7. 复制右侧生成的 TOML 配置。
8. 把生成内容合入 `config/example.toml` 或实际部署配置。
## 坐标说明
导出的坐标是归一化坐标:
- 左上角是 `[0.0, 0.0]`
- 右下角是 `[1.0, 1.0]`
这样即使摄像头截图分辨率变化,标定结果也可以复用。
## 注意
- 标定截图应来自真实安装角度。
- 标定时展示柜门最好保持日常运行状态。
- 垃圾桶区域只框垃圾桶开口和投放可见区域,不要框太大。
- RTSP 密码只会发给本地标定服务,不会保存到项目文件。

256
tools/calibrator/app.js Normal file
View File

@@ -0,0 +1,256 @@
const zoneIds = ["r1c1", "r1c2", "r1c3", "r1c4", "r2c1", "r2c2", "r2c3", "r2c4", "trash"];
const colors = {
r1c1: "#e11d48",
r1c2: "#f97316",
r1c3: "#ca8a04",
r1c4: "#16a34a",
r2c1: "#0891b2",
r2c2: "#2563eb",
r2c3: "#7c3aed",
r2c4: "#db2777",
trash: "#111827",
};
const state = {
activeZone: "r1c1",
polygons: Object.fromEntries(zoneIds.map((id) => [id, []])),
image: null,
imageUrl: null,
};
const canvas = document.getElementById("canvas");
const ctx = canvas.getContext("2d");
const zoneList = document.getElementById("zoneList");
const statusEl = document.getElementById("status");
const tomlOutput = document.getElementById("tomlOutput");
function init() {
for (const zoneId of zoneIds) {
const button = document.createElement("button");
button.type = "button";
button.className = "zone-button";
button.textContent = zoneId;
button.dataset.zoneId = zoneId;
button.addEventListener("click", () => {
state.activeZone = zoneId;
render();
});
zoneList.appendChild(button);
}
document.getElementById("captureFrame").addEventListener("click", captureFrame);
document.getElementById("undoPoint").addEventListener("click", undoPoint);
document.getElementById("clearZone").addEventListener("click", clearZone);
document.getElementById("clearAll").addEventListener("click", clearAll);
document.getElementById("copyToml").addEventListener("click", copyToml);
canvas.addEventListener("click", addPoint);
window.addEventListener("resize", render);
render();
}
async function captureFrame() {
const rtspUrl = document.getElementById("rtspUrl").value.trim();
if (!rtspUrl) {
setStatus("请输入 RTSP 地址");
return;
}
setStatus("正在抓取一帧...");
try {
const response = await fetch("/api/capture", {
method: "POST",
headers: {"Content-Type": "application/json"},
body: JSON.stringify({rtsp_url: rtspUrl, timeout_seconds: 12}),
});
if (!response.ok) {
const payload = await response.json();
throw new Error(payload.error || `HTTP ${response.status}`);
}
const blob = await response.blob();
if (state.imageUrl) {
URL.revokeObjectURL(state.imageUrl);
}
state.imageUrl = URL.createObjectURL(blob);
const image = new Image();
image.onload = () => {
state.image = image;
canvas.width = image.naturalWidth;
canvas.height = image.naturalHeight;
setStatus(`已抓取 ${image.naturalWidth}x${image.naturalHeight}`);
render();
};
image.src = state.imageUrl;
} catch (error) {
setStatus(`抓帧失败:${error.message}`);
}
}
function addPoint(event) {
if (!state.image) {
setStatus("请先抓取一帧");
return;
}
const rect = canvas.getBoundingClientRect();
const scaleX = canvas.width / rect.width;
const scaleY = canvas.height / rect.height;
const point = {
x: clamp(((event.clientX - rect.left) * scaleX) / canvas.width),
y: clamp(((event.clientY - rect.top) * scaleY) / canvas.height),
};
state.polygons[state.activeZone].push(point);
render();
}
function undoPoint() {
state.polygons[state.activeZone].pop();
render();
}
function clearZone() {
state.polygons[state.activeZone] = [];
render();
}
function clearAll() {
for (const zoneId of zoneIds) {
state.polygons[zoneId] = [];
}
render();
}
async function copyToml() {
const text = tomlOutput.value;
if (!text.trim()) {
setStatus("没有可复制的 TOML");
return;
}
await navigator.clipboard.writeText(text);
setStatus("TOML 已复制");
}
function render() {
renderZoneButtons();
renderCanvas();
tomlOutput.value = buildToml();
}
function renderZoneButtons() {
for (const button of zoneList.querySelectorAll("button")) {
const zoneId = button.dataset.zoneId;
button.classList.toggle("active", zoneId === state.activeZone);
button.classList.toggle("done", state.polygons[zoneId].length >= 3);
}
}
function renderCanvas() {
ctx.clearRect(0, 0, canvas.width, canvas.height);
if (state.image) {
ctx.drawImage(state.image, 0, 0, canvas.width, canvas.height);
} else {
ctx.fillStyle = "#111820";
ctx.fillRect(0, 0, canvas.width, canvas.height);
ctx.fillStyle = "#d9e0e7";
ctx.font = "22px sans-serif";
ctx.textAlign = "center";
ctx.fillText("输入 RTSP 地址后抓取一帧", canvas.width / 2, canvas.height / 2);
}
for (const zoneId of zoneIds) {
drawPolygon(zoneId, state.polygons[zoneId], zoneId === state.activeZone);
}
}
function drawPolygon(zoneId, points, active) {
if (!points.length) {
return;
}
const color = colors[zoneId] || "#ffffff";
ctx.save();
ctx.strokeStyle = color;
ctx.fillStyle = color;
ctx.lineWidth = active ? 4 : 2;
ctx.globalAlpha = 0.22;
ctx.beginPath();
points.forEach((point, index) => {
const x = point.x * canvas.width;
const y = point.y * canvas.height;
if (index === 0) {
ctx.moveTo(x, y);
} else {
ctx.lineTo(x, y);
}
});
if (points.length >= 3) {
ctx.closePath();
ctx.fill();
}
ctx.globalAlpha = 1;
ctx.stroke();
for (const [index, point] of points.entries()) {
const x = point.x * canvas.width;
const y = point.y * canvas.height;
ctx.beginPath();
ctx.arc(x, y, 5, 0, Math.PI * 2);
ctx.fill();
ctx.fillStyle = "#ffffff";
ctx.font = "12px sans-serif";
ctx.textAlign = "center";
ctx.fillText(String(index + 1), x, y - 9);
ctx.fillStyle = color;
}
const first = points[0];
ctx.fillStyle = color;
ctx.font = active ? "bold 18px sans-serif" : "14px sans-serif";
ctx.textAlign = "left";
ctx.fillText(zoneId, first.x * canvas.width + 8, first.y * canvas.height + 18);
ctx.restore();
}
function buildToml() {
const lines = [
"[layout]",
"rows = 2",
"cols = 4",
`zone_ids = [${zoneIds.filter((id) => id !== "trash").map((id) => quote(id)).join(", ")}]`,
"",
];
for (const zoneId of zoneIds.filter((id) => id !== "trash")) {
const points = state.polygons[zoneId];
if (points.length < 3) {
continue;
}
lines.push("[[zones]]");
lines.push(`id = ${quote(zoneId)}`);
lines.push(`polygon = ${formatPoints(points)}`);
lines.push("");
}
const trashPoints = state.polygons.trash;
if (trashPoints.length >= 3) {
lines.push("[trash]");
lines.push(`roi = ${formatPoints(trashPoints)}`);
}
return lines.join("\n");
}
function formatPoints(points) {
return `[${points.map((point) => `[${point.x.toFixed(4)}, ${point.y.toFixed(4)}]`).join(", ")}]`;
}
function quote(value) {
return `"${value}"`;
}
function clamp(value) {
return Math.min(1, Math.max(0, value));
}
function setStatus(message) {
statusEl.textContent = message;
}
init();

View File

@@ -0,0 +1,52 @@
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>冷藏展示柜区域标定</title>
<link rel="stylesheet" href="/style.css">
</head>
<body>
<main class="app">
<header class="topbar">
<div>
<h1>冷藏展示柜区域标定</h1>
<p>从 RTSP 拉取一帧截图,在图上标定 8 个格口和垃圾桶区域。</p>
</div>
<button id="copyToml" type="button">复制 TOML</button>
</header>
<section class="capture">
<label>
RTSP 地址
<input id="rtspUrl" type="text" placeholder="rtsp://user:password@camera-ip:554/stream">
</label>
<button id="captureFrame" type="button">抓取一帧</button>
<span id="status"></span>
</section>
<section class="workspace">
<aside class="panel">
<h2>区域</h2>
<div id="zoneList" class="zone-list"></div>
<div class="actions">
<button id="undoPoint" type="button">撤销点</button>
<button id="clearZone" type="button">清空当前</button>
<button id="clearAll" type="button">清空全部</button>
</div>
<p class="hint">选择区域后,在截图上按顺时针或逆时针点击顶点。每个区域建议 4 个点,透视明显时可以多点。</p>
</aside>
<section class="canvas-wrap">
<canvas id="canvas" width="1280" height="720"></canvas>
</section>
<aside class="panel output-panel">
<h2>导出配置</h2>
<textarea id="tomlOutput" spellcheck="false"></textarea>
</aside>
</section>
</main>
<script src="/app.js"></script>
</body>
</html>

144
tools/calibrator/server.py Normal file
View File

@@ -0,0 +1,144 @@
from __future__ import annotations
import argparse
import json
import mimetypes
import subprocess
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from urllib.parse import unquote
ROOT = Path(__file__).resolve().parent
class CalibratorHandler(BaseHTTPRequestHandler):
server_version = "ColdDisplayCalibrator/0.1"
def do_GET(self) -> None:
path = "/" if self.path == "/" else unquote(self.path.split("?", 1)[0])
if path == "/":
self._send_file(ROOT / "index.html")
return
target = (ROOT / path.lstrip("/")).resolve()
if ROOT not in target.parents or not target.is_file():
self.send_error(404)
return
self._send_file(target)
def do_POST(self) -> None:
if self.path != "/api/capture":
self.send_error(404)
return
try:
payload = self._read_json()
rtsp_url = str(payload.get("rtsp_url", "")).strip()
timeout_seconds = float(payload.get("timeout_seconds", 10))
if not rtsp_url.lower().startswith("rtsp://"):
self._send_json({"error": "rtsp_url must start with rtsp://"}, status=400)
return
image = capture_rtsp_frame(rtsp_url, timeout_seconds)
except CaptureError as exc:
self._send_json({"error": str(exc)}, status=502)
return
except (ValueError, json.JSONDecodeError) as exc:
self._send_json({"error": f"invalid request: {exc}"}, status=400)
return
self.send_response(200)
self.send_header("Content-Type", "image/jpeg")
self.send_header("Content-Length", str(len(image)))
self.send_header("Cache-Control", "no-store")
self.end_headers()
self.wfile.write(image)
def log_message(self, format: str, *args: object) -> None:
print(f"{self.address_string()} - {format % args}")
def _read_json(self) -> dict:
length = int(self.headers.get("Content-Length", "0"))
return json.loads(self.rfile.read(length).decode("utf-8"))
def _send_file(self, path: Path) -> None:
data = path.read_bytes()
content_type = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
self.send_response(200)
self.send_header("Content-Type", content_type)
self.send_header("Content-Length", str(len(data)))
self.send_header("Cache-Control", "no-store")
self.end_headers()
self.wfile.write(data)
def _send_json(self, payload: dict, status: int = 200) -> None:
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
self.send_response(status)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(data)))
self.send_header("Cache-Control", "no-store")
self.end_headers()
self.wfile.write(data)
class CaptureError(RuntimeError):
pass
def capture_rtsp_frame(rtsp_url: str, timeout_seconds: float) -> bytes:
command = [
"ffmpeg",
"-hide_banner",
"-loglevel",
"error",
"-rtsp_transport",
"tcp",
"-i",
rtsp_url,
"-frames:v",
"1",
"-f",
"image2pipe",
"-vcodec",
"mjpeg",
"-",
]
try:
result = subprocess.run(
command,
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=max(1.0, timeout_seconds),
)
except FileNotFoundError as exc:
raise CaptureError("ffmpeg not found; install ffmpeg first") from exc
except subprocess.TimeoutExpired as exc:
raise CaptureError(f"ffmpeg timed out after {timeout_seconds:g}s") from exc
if result.returncode != 0:
message = result.stderr.decode("utf-8", errors="replace").strip()
raise CaptureError(message or f"ffmpeg exited with code {result.returncode}")
if not result.stdout:
raise CaptureError("ffmpeg returned no image data")
return result.stdout
def main() -> int:
parser = argparse.ArgumentParser(description="RTSP snapshot calibration web tool.")
parser.add_argument("--host", default="127.0.0.1")
parser.add_argument("--port", default=18090, type=int)
args = parser.parse_args()
server = ThreadingHTTPServer((args.host, args.port), CalibratorHandler)
print(f"Calibration server: http://{args.host}:{args.port}")
try:
server.serve_forever()
except KeyboardInterrupt:
print("\nStopping calibration server")
finally:
server.server_close()
return 0
if __name__ == "__main__":
raise SystemExit(main())

186
tools/calibrator/style.css Normal file
View File

@@ -0,0 +1,186 @@
:root {
color-scheme: light;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
background: #f4f6f8;
color: #17202a;
}
* {
box-sizing: border-box;
}
body {
margin: 0;
}
button,
input,
textarea {
font: inherit;
}
button {
border: 1px solid #9aa7b3;
background: #ffffff;
color: #17202a;
border-radius: 6px;
padding: 8px 12px;
cursor: pointer;
}
button:hover {
background: #edf2f7;
}
.app {
min-height: 100vh;
display: flex;
flex-direction: column;
}
.topbar {
display: flex;
justify-content: space-between;
gap: 16px;
align-items: center;
padding: 16px 20px;
background: #ffffff;
border-bottom: 1px solid #d9e0e7;
}
h1,
h2,
p {
margin: 0;
}
h1 {
font-size: 20px;
}
h2 {
font-size: 15px;
margin-bottom: 10px;
}
.topbar p,
.hint,
#status {
color: #5d6b78;
font-size: 13px;
}
.capture {
display: grid;
grid-template-columns: minmax(280px, 1fr) auto minmax(180px, auto);
gap: 12px;
align-items: end;
padding: 14px 20px;
background: #ffffff;
border-bottom: 1px solid #d9e0e7;
}
.capture label {
display: grid;
gap: 6px;
font-size: 13px;
color: #344454;
}
.capture input {
width: 100%;
border: 1px solid #9aa7b3;
border-radius: 6px;
padding: 9px 10px;
}
.workspace {
flex: 1;
min-height: 0;
display: grid;
grid-template-columns: 220px minmax(420px, 1fr) 340px;
gap: 12px;
padding: 12px;
}
.panel {
background: #ffffff;
border: 1px solid #d9e0e7;
border-radius: 8px;
padding: 12px;
min-height: 0;
}
.zone-list {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 8px;
}
.zone-button {
min-height: 36px;
}
.zone-button.active {
background: #1f6feb;
border-color: #1f6feb;
color: #ffffff;
}
.zone-button.done::after {
content: " ✓";
}
.actions {
display: grid;
gap: 8px;
margin: 14px 0;
}
.canvas-wrap {
min-width: 0;
min-height: 0;
display: flex;
align-items: center;
justify-content: center;
background: #202832;
border-radius: 8px;
overflow: hidden;
}
canvas {
width: 100%;
height: 100%;
object-fit: contain;
background: #111820;
cursor: crosshair;
}
.output-panel {
display: flex;
flex-direction: column;
}
textarea {
flex: 1;
width: 100%;
min-height: 420px;
resize: none;
border: 1px solid #9aa7b3;
border-radius: 6px;
padding: 10px;
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
font-size: 12px;
line-height: 1.45;
}
@media (max-width: 1100px) {
.workspace {
grid-template-columns: 190px minmax(360px, 1fr);
}
.output-panel {
grid-column: 1 / -1;
min-height: 260px;
}
}