fix: preserve handled display cabinet cases
This commit is contained in:
@@ -114,7 +114,12 @@ def capture_alert_snapshot(
|
||||
file_name = build_snapshot_file_name(alert_events[0], captured_at)
|
||||
object_key_hint = build_object_key_hint(settings.object_key_prefix, alert_events[0], captured_at, file_name)
|
||||
try:
|
||||
annotated_frame = apply_calibration_overlay(frame, config)
|
||||
annotated_frame = apply_calibration_overlay(
|
||||
frame,
|
||||
config,
|
||||
zone_ids=alert_event_zone_ids(alert_events),
|
||||
include_trash=False,
|
||||
)
|
||||
image_bytes = (jpeg_encoder or encode_frame_to_jpeg)(annotated_frame, settings.encode_timeout_seconds)
|
||||
result = (uploader or upload_snapshot_bytes)(
|
||||
image_bytes,
|
||||
@@ -137,8 +142,14 @@ def capture_alert_snapshot(
|
||||
}
|
||||
|
||||
|
||||
def apply_calibration_overlay(frame: Frame, config: dict[str, Any]) -> Frame:
|
||||
regions = load_calibration_overlay_regions(config)
|
||||
def apply_calibration_overlay(
|
||||
frame: Frame,
|
||||
config: dict[str, Any],
|
||||
*,
|
||||
zone_ids: set[str] | None = None,
|
||||
include_trash: bool = True,
|
||||
) -> Frame:
|
||||
regions = load_calibration_overlay_regions(config, zone_ids=zone_ids, include_trash=include_trash)
|
||||
if not regions or frame.width <= 0 or frame.height <= 0:
|
||||
return frame
|
||||
|
||||
@@ -170,11 +181,19 @@ def apply_calibration_overlay(frame: Frame, config: dict[str, Any]) -> Frame:
|
||||
return Frame(width=frame.width, height=frame.height, rgb=bytes(rgb))
|
||||
|
||||
|
||||
def load_calibration_overlay_regions(config: dict[str, Any]) -> list[CalibrationOverlayRegion]:
|
||||
def load_calibration_overlay_regions(
|
||||
config: dict[str, Any],
|
||||
*,
|
||||
zone_ids: set[str] | None = None,
|
||||
include_trash: bool = True,
|
||||
) -> list[CalibrationOverlayRegion]:
|
||||
regions: list[CalibrationOverlayRegion] = []
|
||||
for index, zone in enumerate(config.get("zones", [])):
|
||||
if not isinstance(zone, dict):
|
||||
continue
|
||||
zone_id = str(zone.get("id", "")).strip()
|
||||
if zone_ids is not None and zone_id not in zone_ids:
|
||||
continue
|
||||
polygon = normalize_overlay_polygon(zone.get("polygon", []))
|
||||
if len(polygon) >= 3:
|
||||
color = ZONE_OVERLAY_PALETTE[index % len(ZONE_OVERLAY_PALETTE)]
|
||||
@@ -188,7 +207,7 @@ def load_calibration_overlay_regions(config: dict[str, Any]) -> list[Calibration
|
||||
)
|
||||
|
||||
trash = config.get("trash", {})
|
||||
if isinstance(trash, dict):
|
||||
if include_trash and isinstance(trash, dict):
|
||||
polygon = normalize_overlay_polygon(trash.get("roi", []))
|
||||
if len(polygon) >= 3:
|
||||
regions.append(
|
||||
@@ -202,6 +221,10 @@ def load_calibration_overlay_regions(config: dict[str, Any]) -> list[Calibration
|
||||
return regions
|
||||
|
||||
|
||||
def alert_event_zone_ids(events: list[dict[str, object]]) -> set[str]:
|
||||
return {zone_id for event in events if (zone_id := str(event.get("zone_id", "")).strip())}
|
||||
|
||||
|
||||
def overlay_region_label(payload: dict[str, Any], *, fallback: str) -> str:
|
||||
for key in ("label", "name", "id"):
|
||||
value = str(payload.get(key, "")).strip()
|
||||
|
||||
@@ -171,6 +171,7 @@ def load_case_store(path: Path) -> CaseStore:
|
||||
|
||||
|
||||
def persist_case_updates(case_store: CaseStore, path: Path, events: list[dict[str, object]]) -> list[dict[str, object]]:
|
||||
case_store = load_case_store(path)
|
||||
snapshots = case_store.apply_batch_events(events)
|
||||
append_case_snapshots(path, snapshots)
|
||||
return snapshots
|
||||
|
||||
@@ -1,5 +1,37 @@
|
||||
# Task Todo
|
||||
|
||||
## Current Task: Runtime/API Case State Reopen Fix
|
||||
|
||||
**Goal:** When the management API marks a display-cabinet case as handled, the runtime process must not later append a newer `open` snapshot for the same case from stale in-memory state.
|
||||
|
||||
- [x] Add a failing regression test for API-written `handled` state being preserved when runtime persists later events.
|
||||
- [x] Fix runtime case persistence to reconcile with the latest JSONL snapshots before applying new events.
|
||||
- [x] Run targeted case/runtime tests.
|
||||
- [x] Record remote chain verification and deployment status.
|
||||
|
||||
### Findings
|
||||
|
||||
- On `xiaozheng@10.8.0.23`, `case_batch_000911` was marked `handled` at `2026-06-15T07:27:12Z`, then runtime appended a newer `open` snapshot for the same case at `2026-06-15T15:38:03+08:00`.
|
||||
- The API and runtime are separate processes sharing `logs/cases.jsonl`; runtime keeps a long-lived `CaseStore` loaded at startup and did not see the API-written handled snapshot.
|
||||
|
||||
### Verification
|
||||
|
||||
- RED:
|
||||
- `eval "$(/opt/homebrew/bin/pyenv init -)" && PYTHONPATH=src python -m unittest tests.test_main.RuntimeRestoreTests.test_persist_case_updates_preserves_api_handled_snapshot -v`
|
||||
- Result before fix: failed because runtime appended a later `open` snapshot.
|
||||
- Local targeted verification:
|
||||
- `eval "$(/opt/homebrew/bin/pyenv init -)" && PYTHONPATH=src python -m unittest tests.test_main.RuntimeRestoreTests.test_persist_case_updates_preserves_api_handled_snapshot -v`
|
||||
- `eval "$(/opt/homebrew/bin/pyenv init -)" && PYTHONPATH=src python -m unittest tests/test_cases.py -v`
|
||||
- `eval "$(/opt/homebrew/bin/pyenv init -)" && PYTHONPATH=src python -m unittest tests/test_main.py -v`
|
||||
- Result: all passed.
|
||||
- Remote deployment:
|
||||
- Synced only `src/cold_display_guard/main.py` to `xiaozheng@10.8.0.23:/home/xiaozheng/cold_display_guard/src/cold_display_guard/main.py`.
|
||||
- Ran `docker compose --env-file deploy/cold-display-guard.env -f deploy/docker-compose.yml up -d --build cold-display-guard-runtime`.
|
||||
- Compose recreated `cold-display-guard-api` and `cold-display-guard-runtime`; health check returned `status=ok`.
|
||||
- Remote behavior check:
|
||||
- Ran the same API-handled/runtime-later-event scenario inside `cold-display-guard-runtime` using a temp JSONL file.
|
||||
- Result: `{"handled_source": "manual", "latest_status": "handled", "new_snapshots": 0}`.
|
||||
|
||||
- [x] Review the current project instructions and check for task-relevant lessons.
|
||||
- [x] Inspect the OTA upload API document and current runtime/webhook capture path.
|
||||
- [x] Create an isolated worktree for alarm snapshot upload implementation.
|
||||
@@ -339,3 +371,51 @@
|
||||
- `GET /api/manage/health` returned `status=ok` and `runtime_status=running`.
|
||||
- Running container uses `fontsize=13`, `boxcolor=black@0.34`, and `boxborderw=2` for region labels.
|
||||
- `cold-display-guard-runtime` logs show normal startup after restart.
|
||||
|
||||
## Current Task: Limit Alert Snapshot Overlay To Event Zones
|
||||
|
||||
**Goal:** Uploaded warning/alarm screenshots should only draw the cold-display region polygons and names for the zones that actually triggered the warning/alarm event. Other configured zones and the trash ROI should not be drawn on those uploaded screenshots.
|
||||
|
||||
**Plan:** Keep the full calibration overlay helper available for tests and general use, but pass alert event zone IDs from `capture_alert_snapshot` into the overlay loader and disable trash ROI drawing for alert uploads.
|
||||
|
||||
- [x] Add a regression test proving alert snapshot upload only annotates the triggering event zone.
|
||||
- [x] Filter snapshot overlay regions by event `zone_id` during alert upload.
|
||||
- [x] Preserve full overlay behavior when `apply_calibration_overlay` is called without filters.
|
||||
- [x] Run full local Python verification.
|
||||
- [x] Deploy `alarm_snapshots.py` to `xiaozheng@10.8.0.23`.
|
||||
- [x] Verify remote API/runtime health and deployed filtered-overlay behavior.
|
||||
|
||||
### Review
|
||||
|
||||
- Local verification passed:
|
||||
- `PYTHONPATH=src python3 -m unittest tests/test_alarm_snapshots.py -v`
|
||||
- `PYTHONPATH=src python3 -m unittest discover -s tests -v` (`104` tests)
|
||||
- Deployed only `src/cold_display_guard/alarm_snapshots.py` to `xiaozheng@10.8.0.23` after backing up the previous remote file; live config was not overwritten.
|
||||
- Rebuilt `cold-display-guard:dev` and restarted `cold-display-guard-api` plus `cold-display-guard-runtime`.
|
||||
- Remote verification passed:
|
||||
- `GET /api/manage/health` returned `status=ok` and `runtime_status=running`.
|
||||
- Container-side smoke test for a zone-1 alert returned `zone1_changed=True`, `zone2_unchanged=True`, and `trash_unchanged=True`.
|
||||
- API/runtime logs show normal startup after restart.
|
||||
|
||||
## Current Task: Check Webhook Duplicate Delivery
|
||||
|
||||
**Goal:** Verify whether `cold_display_guard` is sending duplicate Webhook requests to `video-recognition` on `xiaozheng@10.8.0.23`.
|
||||
|
||||
**Investigation:** Compare the sending code path, remote webhook delivery audit, retry queue state, cold-display event/case logs, `video-recognition` HTTP logs, and the receiver-side JSONL payloads.
|
||||
|
||||
- [x] Inspect sender code path for direct event/case delivery and retry drain behavior.
|
||||
- [x] Confirm remote Webhook config uses the same URL for `event_url` and `case_url`.
|
||||
- [x] Check sender delivery audit for duplicate receiver `task_id` values.
|
||||
- [x] Check retry queue for pending successful redelivery risk.
|
||||
- [x] Check receiver-side cold-display JSONL for duplicate payloads and duplicate business keys.
|
||||
- [x] Trace the only coarse duplicate-looking case around `batch_000898`.
|
||||
|
||||
### Review
|
||||
|
||||
- Current remote config sends both `batch_event` and `case_event` to `http://10.8.0.23:8080/api/webhook/cold-display-guard`, so one business transition can produce two HTTP POSTs to the same endpoint with different `kind` values.
|
||||
- Sender audit `logs/webhook_delivery.jsonl` contains `3056` records total; recent valid delivery has `321` direct `ok` records and `0` retry `ok` records.
|
||||
- Receiver-returned `task_id` values are unique: `321` unique task IDs and `0` duplicate task IDs.
|
||||
- Retry queue has `547` latest retry items, all `dead_letter`; there are no pending retries.
|
||||
- Receiver-side `video-recognition` cold-display files for `2026-06-15` contain `181` business payloads; exact payload duplicates are `0`, and fine-grained business key duplicates are `0`.
|
||||
- Sender `events.jsonl` contains `3325` events; duplicate `(batch_id, event, ts, zone_id)` keys are `0`.
|
||||
- The only coarse duplicate-looking receiver entry was `batch_000898` at `13:20:26`: the same frame emitted `time_pre_warning` and `pre_warning_handled`, which produced separate `case_event` actions `created` and `handled`. This is not the same Webhook request repeated.
|
||||
|
||||
@@ -252,6 +252,64 @@ class AlarmSnapshotTests(unittest.TestCase):
|
||||
self.assertNotEqual(encoded_frames[0].rgb, source_frame.rgb)
|
||||
self.assertNotEqual(encoded_frames[0].pixel(1, 1), (0, 0, 0))
|
||||
|
||||
def test_capture_alert_snapshot_only_draws_alert_event_zones(self) -> None:
|
||||
encoded_frames: list[Frame] = []
|
||||
|
||||
def fake_encode(frame: Frame, timeout_seconds: float) -> bytes:
|
||||
encoded_frames.append(frame)
|
||||
return b"jpeg-bytes"
|
||||
|
||||
def fake_upload(
|
||||
image_bytes: bytes,
|
||||
*,
|
||||
file_name: str,
|
||||
object_key_hint: str,
|
||||
settings,
|
||||
post_json_request=None,
|
||||
post_multipart_request=None,
|
||||
) -> dict[str, object]:
|
||||
return {"status": "uploaded", "object_key": "uploads/alarms/zone-only.jpg", "file_name": file_name}
|
||||
|
||||
source_frame = Frame(width=30, height=20, rgb=b"\x00\x00\x00" * 600)
|
||||
result = capture_alert_snapshot(
|
||||
source_frame,
|
||||
[
|
||||
{
|
||||
"event": "time_alarm",
|
||||
"severity": "alarm",
|
||||
"batch_id": "batch_1",
|
||||
"camera_id": "cam_1",
|
||||
"zone_id": "1",
|
||||
"ts": "2026-06-09T09:00:00+00:00",
|
||||
}
|
||||
],
|
||||
{
|
||||
"alarm_snapshot_upload": {"enabled": True},
|
||||
"zones": [
|
||||
{
|
||||
"id": "1",
|
||||
"label": "区域 1",
|
||||
"polygon": [[0.00, 0.00], [0.45, 0.00], [0.45, 1.00], [0.00, 1.00]],
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"label": "区域 2",
|
||||
"polygon": [[0.55, 0.00], [1.00, 0.00], [1.00, 1.00], [0.55, 1.00]],
|
||||
},
|
||||
],
|
||||
"trash": {"roi": [[0.45, 0.50], [0.55, 0.50], [0.55, 1.00], [0.45, 1.00]]},
|
||||
},
|
||||
now=datetime(2026, 6, 9, 9, 0, tzinfo=UTC),
|
||||
jpeg_encoder=fake_encode,
|
||||
uploader=fake_upload,
|
||||
)
|
||||
|
||||
self.assertEqual(result["status"], "uploaded")
|
||||
self.assertEqual(len(encoded_frames), 1)
|
||||
self.assertNotEqual(encoded_frames[0].pixel(5, 10), (0, 0, 0))
|
||||
self.assertEqual(encoded_frames[0].pixel(25, 10), (0, 0, 0))
|
||||
self.assertEqual(encoded_frames[0].pixel(15, 15), (0, 0, 0))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -6,7 +6,7 @@ import unittest
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from cold_display_guard.cases import CaseStore
|
||||
from cold_display_guard.cases import CaseStore, append_case_snapshots, load_case_snapshots
|
||||
from cold_display_guard.main import (
|
||||
case_sink_path,
|
||||
capture_runtime_alarm_snapshot,
|
||||
@@ -67,6 +67,61 @@ class RuntimeRestoreTests(unittest.TestCase):
|
||||
self.assertEqual(written[0]["case_type"], "time_alarm")
|
||||
self.assertEqual(written[0]["case_status"], "open")
|
||||
|
||||
def test_persist_case_updates_preserves_api_handled_snapshot(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = Path(tmpdir) / "cases.jsonl"
|
||||
runtime_store = CaseStore()
|
||||
created = persist_case_updates(
|
||||
runtime_store,
|
||||
path,
|
||||
[
|
||||
{
|
||||
"event": "time_alarm",
|
||||
"ts": datetime(2026, 6, 9, 9, 0, tzinfo=UTC).isoformat(),
|
||||
"batch_id": "batch_000001",
|
||||
"camera_id": "cam_01",
|
||||
"zone_id": "1",
|
||||
"zone_label": "区域 1",
|
||||
"severity": "alarm",
|
||||
"state": "alerted",
|
||||
}
|
||||
],
|
||||
)[0]
|
||||
api_store = CaseStore(load_case_snapshots(path))
|
||||
append_case_snapshots(
|
||||
path,
|
||||
[
|
||||
api_store.mark_handled(
|
||||
str(created["case_id"]),
|
||||
handled_at=datetime(2026, 6, 9, 9, 5, tzinfo=UTC),
|
||||
handled_by="alice",
|
||||
handled_source="manual",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
snapshots = persist_case_updates(
|
||||
runtime_store,
|
||||
path,
|
||||
[
|
||||
{
|
||||
"event": "batch_pending_disposal",
|
||||
"ts": datetime(2026, 6, 9, 9, 6, tzinfo=UTC).isoformat(),
|
||||
"batch_id": "batch_000001",
|
||||
"camera_id": "cam_01",
|
||||
"zone_id": "1",
|
||||
"zone_label": "区域 1",
|
||||
"severity": "warning",
|
||||
"state": "pending_disposal",
|
||||
}
|
||||
],
|
||||
)
|
||||
latest = CaseStore(load_case_snapshots(path)).latest_cases()[0]
|
||||
|
||||
self.assertEqual(snapshots, [])
|
||||
self.assertEqual(latest["case_status"], "handled")
|
||||
self.assertEqual(latest["handled_source"], "manual")
|
||||
|
||||
def test_deliver_runtime_webhooks_sends_event_and_case_payloads(self) -> None:
|
||||
deliveries: list[tuple[str, dict[str, object]]] = []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user