feat: add webhook retry queue

This commit is contained in:
2026-06-09 11:32:34 +08:00
parent 81f170924c
commit 8f516fdc01
12 changed files with 940 additions and 74 deletions

View File

@@ -9,7 +9,9 @@ from pathlib import Path
from cold_display_guard.webhooks import (
build_batch_event_payload,
build_case_event_payload,
drain_webhook_retries,
load_webhook_settings,
load_retry_snapshots,
send_batch_event_webhooks,
send_case_webhooks,
)
@@ -29,6 +31,10 @@ class WebhookTests(unittest.TestCase):
"callback_token": "secret",
"connect_timeout_seconds": 4,
"read_timeout_seconds": 6,
"retry_max_attempts": 4,
"retry_backoff_seconds": 15,
"retry_max_backoff_seconds": 90,
"retry_batch_limit": 8,
}
}
)
@@ -39,6 +45,10 @@ class WebhookTests(unittest.TestCase):
self.assertEqual(settings.callback_token, "secret")
self.assertEqual(settings.connect_timeout_seconds, 4)
self.assertEqual(settings.read_timeout_seconds, 6)
self.assertEqual(settings.retry_max_attempts, 4)
self.assertEqual(settings.retry_backoff_seconds, 15)
self.assertEqual(settings.retry_max_backoff_seconds, 90)
self.assertEqual(settings.retry_batch_limit, 8)
def test_build_batch_event_payload_wraps_runtime_event(self) -> None:
payload = build_batch_event_payload(
@@ -182,6 +192,139 @@ class WebhookTests(unittest.TestCase):
self.assertEqual(logged[0]["target"], "batch_event")
self.assertIn("network down", logged[0]["message"])
def test_non_2xx_delivery_is_enqueued_for_retry(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
audit_path = Path(tmpdir) / "webhook_delivery.jsonl"
retry_path = Path(tmpdir) / "webhook_retry.jsonl"
send_batch_event_webhooks(
[
{
"event": "time_alarm",
"ts": datetime(2026, 6, 9, 9, 0, tzinfo=UTC).isoformat(),
"batch_id": "batch_000001",
"camera_id": "cam_01",
"zone_id": "1",
"zone_label": "区域 1",
"severity": "alarm",
"state": "alerted",
}
],
{
"webhooks": {
"enabled": True,
"event_url": "https://example.com/events",
"retry_max_attempts": 3,
"retry_backoff_seconds": 30,
}
},
audit_path,
retry_path=retry_path,
http_post=lambda url, payload, timeout: (503, "service unavailable"),
now=datetime(2026, 6, 9, 9, 0, tzinfo=UTC),
)
retries = load_retry_snapshots(retry_path)
logged = [json.loads(line) for line in audit_path.read_text(encoding="utf-8").splitlines()]
self.assertEqual(logged[0]["status"], "error")
self.assertEqual(logged[0]["status_code"], 503)
self.assertEqual(retries[-1]["status"], "pending")
self.assertEqual(retries[-1]["attempt_count"], 1)
self.assertEqual(retries[-1]["target"], "batch_event")
self.assertEqual(retries[-1]["url"], "https://example.com/events")
def test_due_retry_is_marked_delivered_after_success(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
audit_path = Path(tmpdir) / "webhook_delivery.jsonl"
retry_path = Path(tmpdir) / "webhook_retry.jsonl"
config = {
"webhooks": {
"enabled": True,
"event_url": "https://example.com/events",
"retry_max_attempts": 3,
"retry_backoff_seconds": 30,
}
}
send_batch_event_webhooks(
[
{
"event": "time_alarm",
"ts": datetime(2026, 6, 9, 9, 0, tzinfo=UTC).isoformat(),
"batch_id": "batch_000001",
"camera_id": "cam_01",
"zone_id": "1",
"zone_label": "区域 1",
"severity": "alarm",
"state": "alerted",
}
],
config,
audit_path,
retry_path=retry_path,
http_post=lambda url, payload, timeout: (503, "service unavailable"),
now=datetime(2026, 6, 9, 9, 0, tzinfo=UTC),
)
drained = drain_webhook_retries(
config,
retry_path,
audit_path,
http_post=lambda url, payload, timeout: (200, "ok"),
now=datetime(2026, 6, 9, 9, 1, tzinfo=UTC),
)
retries = load_retry_snapshots(retry_path)
self.assertEqual(len(drained), 1)
self.assertEqual(retries[-1]["status"], "delivered")
self.assertEqual(retries[-1]["attempt_count"], 2)
self.assertEqual(retries[-1]["last_status_code"], 200)
def test_retry_reaches_dead_letter_after_attempt_limit(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
audit_path = Path(tmpdir) / "webhook_delivery.jsonl"
retry_path = Path(tmpdir) / "webhook_retry.jsonl"
config = {
"webhooks": {
"enabled": True,
"event_url": "https://example.com/events",
"retry_max_attempts": 2,
"retry_backoff_seconds": 30,
}
}
send_batch_event_webhooks(
[
{
"event": "time_alarm",
"ts": datetime(2026, 6, 9, 9, 0, tzinfo=UTC).isoformat(),
"batch_id": "batch_000001",
"camera_id": "cam_01",
"zone_id": "1",
"zone_label": "区域 1",
"severity": "alarm",
"state": "alerted",
}
],
config,
audit_path,
retry_path=retry_path,
http_post=lambda url, payload, timeout: (503, "service unavailable"),
now=datetime(2026, 6, 9, 9, 0, tzinfo=UTC),
)
drained = drain_webhook_retries(
config,
retry_path,
audit_path,
http_post=lambda url, payload, timeout: (503, "still down"),
now=datetime(2026, 6, 9, 9, 1, tzinfo=UTC),
)
retries = load_retry_snapshots(retry_path)
self.assertEqual(len(drained), 1)
self.assertEqual(retries[-1]["status"], "dead_letter")
self.assertEqual(retries[-1]["attempt_count"], 2)
self.assertEqual(retries[-1]["last_status_code"], 503)
if __name__ == "__main__":
unittest.main()