fix: preserve handled display cabinet cases

This commit is contained in:
2026-06-15 16:08:12 +08:00
parent 7b9ec2e148
commit 0c3895c24c
5 changed files with 223 additions and 6 deletions

View File

@@ -114,7 +114,12 @@ def capture_alert_snapshot(
file_name = build_snapshot_file_name(alert_events[0], captured_at)
object_key_hint = build_object_key_hint(settings.object_key_prefix, alert_events[0], captured_at, file_name)
try:
annotated_frame = apply_calibration_overlay(frame, config)
annotated_frame = apply_calibration_overlay(
frame,
config,
zone_ids=alert_event_zone_ids(alert_events),
include_trash=False,
)
image_bytes = (jpeg_encoder or encode_frame_to_jpeg)(annotated_frame, settings.encode_timeout_seconds)
result = (uploader or upload_snapshot_bytes)(
image_bytes,
@@ -137,8 +142,14 @@ def capture_alert_snapshot(
}
def apply_calibration_overlay(frame: Frame, config: dict[str, Any]) -> Frame:
regions = load_calibration_overlay_regions(config)
def apply_calibration_overlay(
frame: Frame,
config: dict[str, Any],
*,
zone_ids: set[str] | None = None,
include_trash: bool = True,
) -> Frame:
regions = load_calibration_overlay_regions(config, zone_ids=zone_ids, include_trash=include_trash)
if not regions or frame.width <= 0 or frame.height <= 0:
return frame
@@ -170,11 +181,19 @@ def apply_calibration_overlay(frame: Frame, config: dict[str, Any]) -> Frame:
return Frame(width=frame.width, height=frame.height, rgb=bytes(rgb))
def load_calibration_overlay_regions(config: dict[str, Any]) -> list[CalibrationOverlayRegion]:
def load_calibration_overlay_regions(
config: dict[str, Any],
*,
zone_ids: set[str] | None = None,
include_trash: bool = True,
) -> list[CalibrationOverlayRegion]:
regions: list[CalibrationOverlayRegion] = []
for index, zone in enumerate(config.get("zones", [])):
if not isinstance(zone, dict):
continue
zone_id = str(zone.get("id", "")).strip()
if zone_ids is not None and zone_id not in zone_ids:
continue
polygon = normalize_overlay_polygon(zone.get("polygon", []))
if len(polygon) >= 3:
color = ZONE_OVERLAY_PALETTE[index % len(ZONE_OVERLAY_PALETTE)]
@@ -188,7 +207,7 @@ def load_calibration_overlay_regions(config: dict[str, Any]) -> list[Calibration
)
trash = config.get("trash", {})
if isinstance(trash, dict):
if include_trash and isinstance(trash, dict):
polygon = normalize_overlay_polygon(trash.get("roi", []))
if len(polygon) >= 3:
regions.append(
@@ -202,6 +221,10 @@ def load_calibration_overlay_regions(config: dict[str, Any]) -> list[Calibration
return regions
def alert_event_zone_ids(events: list[dict[str, object]]) -> set[str]:
return {zone_id for event in events if (zone_id := str(event.get("zone_id", "")).strip())}
def overlay_region_label(payload: dict[str, Any], *, fallback: str) -> str:
for key in ("label", "name", "id"):
value = str(payload.get(key, "")).strip()

View File

@@ -171,6 +171,7 @@ def load_case_store(path: Path) -> CaseStore:
def persist_case_updates(case_store: CaseStore, path: Path, events: list[dict[str, object]]) -> list[dict[str, object]]:
case_store = load_case_store(path)
snapshots = case_store.apply_batch_events(events)
append_case_snapshots(path, snapshots)
return snapshots

View File

@@ -1,5 +1,37 @@
# Task Todo
## Current Task: Runtime/API Case State Reopen Fix
**Goal:** When the management API marks a display-cabinet case as handled, the runtime process must not later append a newer `open` snapshot for the same case from stale in-memory state.
- [x] Add a failing regression test for API-written `handled` state being preserved when runtime persists later events.
- [x] Fix runtime case persistence to reconcile with the latest JSONL snapshots before applying new events.
- [x] Run targeted case/runtime tests.
- [x] Record remote chain verification and deployment status.
### Findings
- On `xiaozheng@10.8.0.23`, `case_batch_000911` was marked `handled` at `2026-06-15T07:27:12Z`, then runtime appended a newer `open` snapshot for the same case at `2026-06-15T15:38:03+08:00`.
- The API and runtime are separate processes sharing `logs/cases.jsonl`; runtime keeps a long-lived `CaseStore` loaded at startup and did not see the API-written handled snapshot.
### Verification
- RED:
- `eval "$(/opt/homebrew/bin/pyenv init -)" && PYTHONPATH=src python -m unittest tests.test_main.RuntimeRestoreTests.test_persist_case_updates_preserves_api_handled_snapshot -v`
- Result before fix: failed because runtime appended a later `open` snapshot.
- Local targeted verification:
- `eval "$(/opt/homebrew/bin/pyenv init -)" && PYTHONPATH=src python -m unittest tests.test_main.RuntimeRestoreTests.test_persist_case_updates_preserves_api_handled_snapshot -v`
- `eval "$(/opt/homebrew/bin/pyenv init -)" && PYTHONPATH=src python -m unittest tests/test_cases.py -v`
- `eval "$(/opt/homebrew/bin/pyenv init -)" && PYTHONPATH=src python -m unittest tests/test_main.py -v`
- Result: all passed.
- Remote deployment:
- Synced only `src/cold_display_guard/main.py` to `xiaozheng@10.8.0.23:/home/xiaozheng/cold_display_guard/src/cold_display_guard/main.py`.
- Ran `docker compose --env-file deploy/cold-display-guard.env -f deploy/docker-compose.yml up -d --build cold-display-guard-runtime`.
- Compose recreated `cold-display-guard-api` and `cold-display-guard-runtime`; health check returned `status=ok`.
- Remote behavior check:
- Ran the same API-handled/runtime-later-event scenario inside `cold-display-guard-runtime` using a temp JSONL file.
- Result: `{"handled_source": "manual", "latest_status": "handled", "new_snapshots": 0}`.
- [x] Review the current project instructions and check for task-relevant lessons.
- [x] Inspect the OTA upload API document and current runtime/webhook capture path.
- [x] Create an isolated worktree for alarm snapshot upload implementation.
@@ -339,3 +371,51 @@
- `GET /api/manage/health` returned `status=ok` and `runtime_status=running`.
- Running container uses `fontsize=13`, `boxcolor=black@0.34`, and `boxborderw=2` for region labels.
- `cold-display-guard-runtime` logs show normal startup after restart.
## Current Task: Limit Alert Snapshot Overlay To Event Zones
**Goal:** Uploaded warning/alarm screenshots should only draw the cold-display region polygons and names for the zones that actually triggered the warning/alarm event. Other configured zones and the trash ROI should not be drawn on those uploaded screenshots.
**Plan:** Keep the full calibration overlay helper available for tests and general use, but pass alert event zone IDs from `capture_alert_snapshot` into the overlay loader and disable trash ROI drawing for alert uploads.
- [x] Add a regression test proving alert snapshot upload only annotates the triggering event zone.
- [x] Filter snapshot overlay regions by event `zone_id` during alert upload.
- [x] Preserve full overlay behavior when `apply_calibration_overlay` is called without filters.
- [x] Run full local Python verification.
- [x] Deploy `alarm_snapshots.py` to `xiaozheng@10.8.0.23`.
- [x] Verify remote API/runtime health and deployed filtered-overlay behavior.
### Review
- Local verification passed:
- `PYTHONPATH=src python3 -m unittest tests/test_alarm_snapshots.py -v`
- `PYTHONPATH=src python3 -m unittest discover -s tests -v` (`104` tests)
- Deployed only `src/cold_display_guard/alarm_snapshots.py` to `xiaozheng@10.8.0.23` after backing up the previous remote file; live config was not overwritten.
- Rebuilt `cold-display-guard:dev` and restarted `cold-display-guard-api` plus `cold-display-guard-runtime`.
- Remote verification passed:
- `GET /api/manage/health` returned `status=ok` and `runtime_status=running`.
- Container-side smoke test for a zone-1 alert returned `zone1_changed=True`, `zone2_unchanged=True`, and `trash_unchanged=True`.
- API/runtime logs show normal startup after restart.
## Current Task: Check Webhook Duplicate Delivery
**Goal:** Verify whether `cold_display_guard` is sending duplicate Webhook requests to `video-recognition` on `xiaozheng@10.8.0.23`.
**Investigation:** Compare the sending code path, remote webhook delivery audit, retry queue state, cold-display event/case logs, `video-recognition` HTTP logs, and the receiver-side JSONL payloads.
- [x] Inspect sender code path for direct event/case delivery and retry drain behavior.
- [x] Confirm remote Webhook config uses the same URL for `event_url` and `case_url`.
- [x] Check sender delivery audit for duplicate receiver `task_id` values.
- [x] Check retry queue for pending successful redelivery risk.
- [x] Check receiver-side cold-display JSONL for duplicate payloads and duplicate business keys.
- [x] Trace the only coarse duplicate-looking case around `batch_000898`.
### Review
- Current remote config sends both `batch_event` and `case_event` to `http://10.8.0.23:8080/api/webhook/cold-display-guard`, so one business transition can produce two HTTP POSTs to the same endpoint with different `kind` values.
- Sender audit `logs/webhook_delivery.jsonl` contains `3056` records total; recent valid delivery has `321` direct `ok` records and `0` retry `ok` records.
- Receiver-returned `task_id` values are unique: `321` unique task IDs and `0` duplicate task IDs.
- Retry queue has `547` latest retry items, all `dead_letter`; there are no pending retries.
- Receiver-side `video-recognition` cold-display files for `2026-06-15` contain `181` business payloads; exact payload duplicates are `0`, and fine-grained business key duplicates are `0`.
- Sender `events.jsonl` contains `3325` events; duplicate `(batch_id, event, ts, zone_id)` keys are `0`.
- The only coarse duplicate-looking receiver entry was `batch_000898` at `13:20:26`: the same frame emitted `time_pre_warning` and `pre_warning_handled`, which produced separate `case_event` actions `created` and `handled`. This is not the same Webhook request repeated.

View File

@@ -252,6 +252,64 @@ class AlarmSnapshotTests(unittest.TestCase):
self.assertNotEqual(encoded_frames[0].rgb, source_frame.rgb)
self.assertNotEqual(encoded_frames[0].pixel(1, 1), (0, 0, 0))
def test_capture_alert_snapshot_only_draws_alert_event_zones(self) -> None:
encoded_frames: list[Frame] = []
def fake_encode(frame: Frame, timeout_seconds: float) -> bytes:
encoded_frames.append(frame)
return b"jpeg-bytes"
def fake_upload(
image_bytes: bytes,
*,
file_name: str,
object_key_hint: str,
settings,
post_json_request=None,
post_multipart_request=None,
) -> dict[str, object]:
return {"status": "uploaded", "object_key": "uploads/alarms/zone-only.jpg", "file_name": file_name}
source_frame = Frame(width=30, height=20, rgb=b"\x00\x00\x00" * 600)
result = capture_alert_snapshot(
source_frame,
[
{
"event": "time_alarm",
"severity": "alarm",
"batch_id": "batch_1",
"camera_id": "cam_1",
"zone_id": "1",
"ts": "2026-06-09T09:00:00+00:00",
}
],
{
"alarm_snapshot_upload": {"enabled": True},
"zones": [
{
"id": "1",
"label": "区域 1",
"polygon": [[0.00, 0.00], [0.45, 0.00], [0.45, 1.00], [0.00, 1.00]],
},
{
"id": "2",
"label": "区域 2",
"polygon": [[0.55, 0.00], [1.00, 0.00], [1.00, 1.00], [0.55, 1.00]],
},
],
"trash": {"roi": [[0.45, 0.50], [0.55, 0.50], [0.55, 1.00], [0.45, 1.00]]},
},
now=datetime(2026, 6, 9, 9, 0, tzinfo=UTC),
jpeg_encoder=fake_encode,
uploader=fake_upload,
)
self.assertEqual(result["status"], "uploaded")
self.assertEqual(len(encoded_frames), 1)
self.assertNotEqual(encoded_frames[0].pixel(5, 10), (0, 0, 0))
self.assertEqual(encoded_frames[0].pixel(25, 10), (0, 0, 0))
self.assertEqual(encoded_frames[0].pixel(15, 15), (0, 0, 0))
if __name__ == "__main__":
unittest.main()

View File

@@ -6,7 +6,7 @@ import unittest
from datetime import datetime, timezone
from pathlib import Path
from cold_display_guard.cases import CaseStore
from cold_display_guard.cases import CaseStore, append_case_snapshots, load_case_snapshots
from cold_display_guard.main import (
case_sink_path,
capture_runtime_alarm_snapshot,
@@ -67,6 +67,61 @@ class RuntimeRestoreTests(unittest.TestCase):
self.assertEqual(written[0]["case_type"], "time_alarm")
self.assertEqual(written[0]["case_status"], "open")
def test_persist_case_updates_preserves_api_handled_snapshot(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
path = Path(tmpdir) / "cases.jsonl"
runtime_store = CaseStore()
created = persist_case_updates(
runtime_store,
path,
[
{
"event": "time_alarm",
"ts": datetime(2026, 6, 9, 9, 0, tzinfo=UTC).isoformat(),
"batch_id": "batch_000001",
"camera_id": "cam_01",
"zone_id": "1",
"zone_label": "区域 1",
"severity": "alarm",
"state": "alerted",
}
],
)[0]
api_store = CaseStore(load_case_snapshots(path))
append_case_snapshots(
path,
[
api_store.mark_handled(
str(created["case_id"]),
handled_at=datetime(2026, 6, 9, 9, 5, tzinfo=UTC),
handled_by="alice",
handled_source="manual",
)
],
)
snapshots = persist_case_updates(
runtime_store,
path,
[
{
"event": "batch_pending_disposal",
"ts": datetime(2026, 6, 9, 9, 6, tzinfo=UTC).isoformat(),
"batch_id": "batch_000001",
"camera_id": "cam_01",
"zone_id": "1",
"zone_label": "区域 1",
"severity": "warning",
"state": "pending_disposal",
}
],
)
latest = CaseStore(load_case_snapshots(path)).latest_cases()[0]
self.assertEqual(snapshots, [])
self.assertEqual(latest["case_status"], "handled")
self.assertEqual(latest["handled_source"], "manual")
def test_deliver_runtime_webhooks_sends_event_and_case_payloads(self) -> None:
deliveries: list[tuple[str, dict[str, object]]] = []