Initial video AI analysis project

This commit is contained in:
yangyl
2026-06-17 11:33:54 +08:00
commit ef0047af6d
35 changed files with 8613 additions and 0 deletions

309
tests/test_aggregator.py Normal file
View File

@@ -0,0 +1,309 @@
import json
import tempfile
import unittest
from datetime import datetime, timedelta
from pathlib import Path
from video_ai_analysis_poc.aggregator import aggregate_outputs
class AggregatorTests(unittest.TestCase):
def test_aggregates_video_results_folder_summary_and_merges_adjacent_events(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp)
video_a = {
"video_id": "video-a",
"path": "/videos/a.mp4",
"status": "probed",
"duration_seconds": 40.0,
"codec_name": "h264",
"width": 1920,
"height": 1080,
}
video_b = {
"video_id": "video-b",
"path": "/videos/b.mp4",
"status": "probe_failed",
"last_error": "bad file",
}
self._write_jsonl(output_dir / "video_manifest.jsonl", [video_a, video_b])
clips = [
self._clip("video-a", "video-a_c000001", 0.0, 10.0),
self._clip("video-a", "video-a_c000002", 12.0, 20.0),
self._clip("video-a", "video-a_c000003", 21.0, 30.0),
self._clip("video-b", "video-b_c000001", 0.0, 10.0),
]
self._write_jsonl(output_dir / "clip_manifest.jsonl", clips)
results = [
self._result(
"video-a",
"video-a_c000001",
"/videos/a.mp4",
0.0,
10.0,
"09:00:01",
[{"event_type": "queue_detected", "start_offset_seconds": 1.0, "end_offset_seconds": 10.0}],
),
self._result(
"video-a",
"video-a_c000002",
"/videos/a.mp4",
12.0,
20.0,
"09:00:13",
[{"event_type": "queue_detected", "start_offset_seconds": 12.0, "end_offset_seconds": 16.0}],
),
self._result(
"video-a",
"video-a_c000003",
"/videos/a.mp4",
21.0,
30.0,
"09:00:22",
[{"event_type": "staff_absent", "start_offset_seconds": 21.0, "end_offset_seconds": 25.0}],
),
{
"schema_version": "local-batch-v1",
"video_id": "video-b",
"video_path": "/videos/b.mp4",
"clip_id": "video-b_c000001",
"status": "inference_failed",
"monitoring_timeline": {
"video_start_time": None,
"clip_start_seconds": 0.0,
"clip_end_seconds": 10.0,
"frame_times": [],
"screen_time": "",
},
"events": [],
"raw_response": "",
"processing": {},
"error": "offline",
},
]
self._write_jsonl(output_dir / "clip_results.jsonl", results)
aggregate_outputs(
output_dir,
{
"input": {"dir": "/videos"},
"schema": {"version": "local-batch-v1", "merge_gap_seconds": 3},
"runtime": {"timezone": "Asia/Shanghai"},
},
)
video_result_path = output_dir / "videos" / "video-a" / "video_result.json"
self.assertTrue(video_result_path.exists())
video_result = json.loads(video_result_path.read_text(encoding="utf-8"))
self.assertEqual(video_result["schema_version"], "local-batch-v1")
self.assertEqual(video_result["video_id"], "video-a")
self.assertEqual(video_result["video_path"], "/videos/a.mp4")
self.assertEqual(video_result["probe"]["codec_name"], "h264")
self.assertIsNone(video_result["monitoring_timeline"]["video_start_time"])
self.assertEqual(video_result["monitoring_timeline"]["video_duration_seconds"], 40.0)
self.assertEqual(video_result["clip_count"], 3)
self.assertEqual(video_result["failed_clip_count"], 0)
self.assertEqual(video_result["event_counts"], {"queue_detected": 1, "staff_absent": 1})
self.assertEqual(len(video_result["events"]), 2)
merged = video_result["events"][0]
self.assertEqual(merged["event_type"], "queue_detected")
self.assertEqual(merged["start_offset_seconds"], 1.0)
self.assertEqual(merged["end_offset_seconds"], 16.0)
self.assertEqual(merged["screen_times"], ["09:00:01", "09:00:13"])
self.assertEqual(merged["evidence"]["clip_ids"], ["video-a_c000001", "video-a_c000002"])
self.assertEqual(
[
clip["clip_start_beijing_time"]
for clip in merged["evidence"]["clips"]
],
["2026-06-15 07:00:00", "2026-06-15 07:00:12"],
)
self.assertEqual(
[
clip["clip_end_beijing_time"]
for clip in merged["evidence"]["clips"]
],
["2026-06-15 07:00:10", "2026-06-15 07:00:20"],
)
self.assertEqual(video_result["outputs"]["clip_results_jsonl"], "clip_results.jsonl")
self.assertIn("started_at", video_result["processing"])
self.assertIn("finished_at", video_result["processing"])
failed_video_result = json.loads(
(output_dir / "videos" / "video-b" / "video_result.json").read_text(
encoding="utf-8"
)
)
self.assertEqual(failed_video_result["clip_count"], 1)
self.assertEqual(failed_video_result["failed_clip_count"], 1)
self.assertEqual(failed_video_result["event_counts"], {})
folder_summary = json.loads(
(output_dir / "folder_summary.json").read_text(encoding="utf-8")
)
self.assertEqual(folder_summary["schema_version"], "local-batch-v1")
self.assertEqual(folder_summary["input_dir"], "/videos")
self.assertEqual(folder_summary["video_count"], 2)
self.assertEqual(folder_summary["processed_video_count"], 1)
self.assertEqual(folder_summary["failed_video_count"], 1)
self.assertEqual(folder_summary["event_counts"], {"queue_detected": 1, "staff_absent": 1})
self.assertEqual(
[video["video_id"] for video in folder_summary["videos"]],
["video-a", "video-b"],
)
self.assertIn("processing", folder_summary)
def test_ffprobe_start_time_is_not_treated_as_monitoring_timeline_start(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp)
self._write_jsonl(
output_dir / "video_manifest.jsonl",
[
{
"video_id": "video-local",
"path": "/videos/local.mp4",
"status": "probed",
"duration_seconds": 12.0,
"start_time": 0.0,
}
],
)
self._write_jsonl(
output_dir / "clip_manifest.jsonl",
[self._clip("video-local", "video-local_c000001", 0.0, 10.0)],
)
self._write_jsonl(output_dir / "clip_results.jsonl", [])
aggregate_outputs(
output_dir,
{
"input": {"dir": "/videos"},
"schema": {"version": "local-batch-v1", "merge_gap_seconds": 3},
},
)
video_result = json.loads(
(output_dir / "videos" / "video-local" / "video_result.json").read_text(
encoding="utf-8"
)
)
self.assertEqual(video_result["probe"]["start_time"], 0.0)
self.assertIsNone(video_result["monitoring_timeline"]["video_start_time"])
def test_does_not_merge_different_event_types_videos_or_large_gaps(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp)
self._write_jsonl(
output_dir / "video_manifest.jsonl",
[
{"video_id": "video-a", "path": "/videos/a.mp4", "status": "probed"},
{"video_id": "video-b", "path": "/videos/b.mp4", "status": "probed"},
],
)
self._write_jsonl(
output_dir / "clip_manifest.jsonl",
[
self._clip("video-a", "a1", 0.0, 10.0),
self._clip("video-a", "a2", 40.0, 50.0),
self._clip("video-a", "a3", 51.0, 60.0),
self._clip("video-b", "b1", 0.0, 10.0),
],
)
self._write_jsonl(
output_dir / "clip_results.jsonl",
[
self._result("video-a", "a1", "/videos/a.mp4", 0.0, 10.0, "", [{"event_type": "queue_detected", "start_offset_seconds": 1.0, "end_offset_seconds": 5.0}]),
self._result("video-a", "a2", "/videos/a.mp4", 40.0, 50.0, "", [{"event_type": "queue_detected", "start_offset_seconds": 40.0, "end_offset_seconds": 45.0}]),
self._result("video-a", "a3", "/videos/a.mp4", 51.0, 60.0, "", [{"event_type": "staff_absent", "start_offset_seconds": 51.0, "end_offset_seconds": 55.0}]),
self._result("video-b", "b1", "/videos/b.mp4", 0.0, 10.0, "", [{"event_type": "queue_detected", "start_offset_seconds": 1.0, "end_offset_seconds": 5.0}]),
],
)
aggregate_outputs(
output_dir,
{
"input": {"dir": "/videos"},
"schema": {"version": "local-batch-v1", "merge_gap_seconds": 3},
},
)
video_a = json.loads(
(output_dir / "videos" / "video-a" / "video_result.json").read_text(
encoding="utf-8"
)
)
video_b = json.loads(
(output_dir / "videos" / "video-b" / "video_result.json").read_text(
encoding="utf-8"
)
)
self.assertEqual(len(video_a["events"]), 3)
self.assertEqual(video_a["event_counts"], {"queue_detected": 2, "staff_absent": 1})
self.assertEqual(len(video_b["events"]), 1)
self.assertEqual(video_b["event_counts"], {"queue_detected": 1})
def _clip(self, video_id, clip_id, start, end):
return {
"video_id": video_id,
"clip_id": clip_id,
"clip_start_seconds": start,
"clip_end_seconds": end,
"clip_start_timecode": "00:00:00",
"clip_end_timecode": "00:00:10",
"frame_times": [
{
"frame_path": f"frames/{video_id}/{clip_id}.jpg",
"offset_seconds": start,
"timecode": "00:00:00",
}
],
"status": "pending",
}
def _result(self, video_id, clip_id, video_path, start, end, screen_time, events):
base = datetime(2026, 6, 15, 7, 0, 0)
clip_start_beijing_time = (base + timedelta(seconds=start)).strftime(
"%Y-%m-%d %H:%M:%S"
)
clip_end_beijing_time = (base + timedelta(seconds=end)).strftime(
"%Y-%m-%d %H:%M:%S"
)
return {
"schema_version": "local-batch-v1",
"video_id": video_id,
"video_path": video_path,
"clip_id": clip_id,
"status": "ok",
"monitoring_timeline": {
"video_start_time": None,
"clip_start_seconds": start,
"clip_end_seconds": end,
"clip_start_timecode": "00:00:00",
"clip_end_timecode": "00:00:10",
"clip_start_beijing_time": clip_start_beijing_time,
"clip_end_beijing_time": clip_end_beijing_time,
"frame_times": [
{
"frame_path": f"frames/{video_id}/{clip_id}.jpg",
"offset_seconds": start,
"timecode": "00:00:00",
"beijing_time": clip_start_beijing_time,
}
],
"screen_time": screen_time,
},
"events": events,
"raw_response": "{}",
"processing": {},
"error": None,
}
def _write_jsonl(self, path, records):
path.write_text(
"".join(json.dumps(record, sort_keys=True) + "\n" for record in records),
encoding="utf-8",
)
if __name__ == "__main__":
unittest.main()

1275
tests/test_cli.py Normal file

File diff suppressed because it is too large Load Diff

167
tests/test_clips.py Normal file
View File

@@ -0,0 +1,167 @@
import json
import tempfile
import unittest
from pathlib import Path
from video_ai_analysis_poc.clips import build_clip_records, build_clip_records_from_manifest
class ClipTests(unittest.TestCase):
def test_build_clip_records_uniformly_samples_frames_per_clip(self):
frames = [
{
"video_id": "video-abc",
"frame_id": f"video-abc_f{index + 1:06d}",
"frame_path": f"frames/video-abc/{index + 1:06d}.jpg",
"offset_seconds": float(index),
"timecode": f"00:00:{index:02d}",
"pts_time": float(index),
"status": "sampled",
}
for index in range(10)
]
clips = build_clip_records(
frames,
{
"length_seconds": 10,
"stride_seconds": 10,
"frames_per_clip": 4,
"min_frames_per_clip": 2,
},
)
self.assertEqual(len(clips), 1)
self.assertEqual(clips[0]["clip_id"], "video-abc_c000001")
self.assertEqual(clips[0]["clip_start_seconds"], 0.0)
self.assertEqual(clips[0]["clip_end_seconds"], 10.0)
self.assertEqual(
[frame["offset_seconds"] for frame in clips[0]["frame_times"]],
[0.0, 3.0, 6.0, 9.0],
)
self.assertEqual(clips[0]["status"], "pending")
self.assertEqual(clips[0]["retry_count"], 0)
self.assertIsNone(clips[0]["last_error"])
def test_tail_clip_end_is_truncated_to_last_frame_interval(self):
frames = [
{
"video_id": "video-abc",
"frame_id": f"video-abc_f{index + 1:06d}",
"frame_path": f"frames/video-abc/{index + 1:06d}.jpg",
"offset_seconds": float(index),
"timecode": f"00:00:{index:02d}",
"pts_time": float(index),
"status": "sampled",
}
for index in range(15)
]
clips = build_clip_records(
frames,
{
"length_seconds": 10,
"stride_seconds": 10,
"frames_per_clip": 8,
"min_frames_per_clip": 4,
},
)
self.assertEqual(len(clips), 2)
self.assertEqual(clips[1]["clip_start_seconds"], 10.0)
self.assertEqual(clips[1]["clip_end_seconds"], 15.0)
self.assertEqual(clips[1]["clip_end_timecode"], "00:00:15")
def test_build_clip_records_adds_beijing_time_range_and_frame_times(self):
frames = [
{
"video_id": "video-abc",
"frame_id": f"video-abc_f{index + 1:06d}",
"frame_path": f"frames/video-abc/{index + 1:06d}.jpg",
"offset_seconds": float(index),
"timecode": f"00:00:{index:02d}",
"pts_time": float(index),
"beijing_time": f"2026-06-15 07:00:{index:02d}",
"status": "sampled",
}
for index in range(10)
]
clips = build_clip_records(
frames,
{
"length_seconds": 10,
"stride_seconds": 10,
"frames_per_clip": 4,
"min_frames_per_clip": 2,
},
)
self.assertEqual(clips[0]["clip_start_beijing_time"], "2026-06-15 07:00:00")
self.assertEqual(clips[0]["clip_end_beijing_time"], "2026-06-15 07:00:10")
self.assertEqual(
[frame["beijing_time"] for frame in clips[0]["frame_times"]],
[
"2026-06-15 07:00:00",
"2026-06-15 07:00:03",
"2026-06-15 07:00:06",
"2026-06-15 07:00:09",
],
)
def test_build_clip_records_from_manifest_skips_failed_frames_and_writes_jsonl(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
frame_manifest = root / "frame_manifest.jsonl"
clip_manifest = root / "clip_manifest.jsonl"
records = [
{
"video_id": "video-abc",
"frame_id": f"video-abc_f{index + 1:06d}",
"frame_path": f"frames/video-abc/{index + 1:06d}.jpg",
"offset_seconds": float(index),
"timecode": f"00:00:{index:02d}",
"pts_time": float(index),
"status": "sampled",
}
for index in range(4)
]
records.append(
{
"video_id": "video-abc",
"frame_id": None,
"frame_path": None,
"offset_seconds": None,
"timecode": None,
"pts_time": None,
"status": "sample_failed",
"last_error": "bad decode",
}
)
frame_manifest.write_text(
"\n".join(json.dumps(record, sort_keys=True) for record in records) + "\n",
encoding="utf-8",
)
clips = build_clip_records_from_manifest(
frame_manifest,
clip_manifest,
{
"length_seconds": 10,
"stride_seconds": 10,
"frames_per_clip": 8,
"min_frames_per_clip": 4,
},
)
self.assertEqual(len(clips), 1)
self.assertEqual(len(clips[0]["frame_times"]), 4)
persisted = [
json.loads(line)
for line in clip_manifest.read_text(encoding="utf-8").splitlines()
]
self.assertEqual(persisted, clips)
if __name__ == "__main__":
unittest.main()

240
tests/test_config.py Normal file
View File

@@ -0,0 +1,240 @@
import tempfile
import unittest
from pathlib import Path
from video_ai_analysis_poc.config import load_config
class ConfigTests(unittest.TestCase):
def test_loads_local_batch_yaml_and_applies_cli_overrides(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
input_dir = root / "videos"
output_dir = root / "out"
override_input = root / "override-videos"
override_output = root / "override-out"
input_dir.mkdir()
override_input.mkdir()
config_path = root / "local_batch.yaml"
config_path.write_text(
"\n".join(
[
"input:",
f" dir: {input_dir}",
" recursive: false",
' extensions: [".mp4", ".mov"]',
"output:",
f" dir: {output_dir}",
" overwrite: false",
"ffprobe:",
" timeout_seconds: 5",
]
),
encoding="utf-8",
)
config = load_config(
config_path,
input_dir=override_input,
output_dir=override_output,
)
self.assertEqual(config["input"]["dir"], str(override_input.resolve()))
self.assertEqual(config["output"]["dir"], str(override_output.resolve()))
self.assertFalse(config["input"]["recursive"])
self.assertEqual(config["input"]["extensions"], [".mp4", ".mov"])
self.assertEqual(config["ffprobe"]["timeout_seconds"], 5)
def test_rejects_output_dir_equal_to_input_dir(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
input_dir = root / "videos"
input_dir.mkdir()
config_path = root / "local_batch.yaml"
config_path.write_text(
"\n".join(
[
"input:",
f" dir: {input_dir}",
"output:",
f" dir: {input_dir}",
]
),
encoding="utf-8",
)
with self.assertRaisesRegex(ValueError, "output dir must not equal input dir"):
load_config(config_path)
def test_rejects_output_dir_inside_reference_project(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
input_dir = root / "videos"
input_dir.mkdir()
forbidden_output = (
Path("/Users/yoilun/AI-train/zhengxin-vlm-0413")
/ "outputs"
/ "local-batch"
)
config_path = root / "local_batch.yaml"
config_path.write_text(
"\n".join(
[
"input:",
f" dir: {input_dir}",
"output:",
f" dir: {forbidden_output}",
]
),
encoding="utf-8",
)
with self.assertRaisesRegex(
ValueError, "output dir must not be inside forbidden reference dir"
):
load_config(config_path)
def test_loads_nested_mapping_values(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
input_dir = root / "videos"
output_dir = root / "output"
input_dir.mkdir()
config_path = root / "local_batch.yaml"
config_path.write_text(
"\n".join(
[
"input:",
f" dir: {input_dir}",
"output:",
f" dir: {output_dir}",
"ffmpeg:",
" codec_decoders:",
" h264: h264_cuvid",
" hevc: hevc_cuvid",
]
),
encoding="utf-8",
)
config = load_config(config_path)
self.assertEqual(
config["ffmpeg"]["codec_decoders"],
{"h264": "h264_cuvid", "hevc": "hevc_cuvid"},
)
def test_loads_prompt_block_scalar_values(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
input_dir = root / "videos"
output_dir = root / "output"
input_dir.mkdir()
config_path = root / "local_batch.yaml"
config_path.write_text(
"\n".join(
[
"input:",
f" dir: {input_dir}",
"output:",
f" dir: {output_dir}",
"prompt:",
" system: >-",
" First instruction.",
" Second instruction.",
"",
" Final instruction.",
" user: 'Return strict JSON.'",
]
),
encoding="utf-8",
)
config = load_config(config_path)
self.assertEqual(
config["prompt"]["system"],
"First instruction.\nSecond instruction.\n\nFinal instruction.",
)
self.assertEqual(config["prompt"]["user"], "Return strict JSON.")
def test_defaults_source_mode_to_local_and_hik_cloud_section(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
input_dir = root / "videos"
output_dir = root / "output"
input_dir.mkdir()
config_path = root / "local_batch.yaml"
config_path.write_text(
"\n".join(
[
"input:",
f" dir: {input_dir}",
"output:",
f" dir: {output_dir}",
]
),
encoding="utf-8",
)
config = load_config(config_path)
self.assertEqual(config["source"]["mode"], "local")
self.assertIn("devices", config["hik_cloud"])
self.assertIn("time_ranges", config["hik_cloud"])
def test_loads_hik_cloud_devices_and_time_ranges_as_list_of_mappings(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
input_dir = root / "videos"
output_dir = root / "output"
input_dir.mkdir()
config_path = root / "local_batch.yaml"
config_path.write_text(
"\n".join(
[
"input:",
f" dir: {input_dir}",
"output:",
f" dir: {output_dir}",
"source:",
" mode: hik_cloud",
"hik_cloud:",
" devices:",
" - device_serial: EXAMPLE_DEVICE_SERIAL",
" channel_no: 1",
" name: front",
" time_ranges:",
' - begin: "2026-02-03 09:00:00"',
' end: "2026-02-03 10:30:00"',
]
),
encoding="utf-8",
)
config = load_config(config_path)
self.assertEqual(config["source"]["mode"], "hik_cloud")
self.assertEqual(
config["hik_cloud"]["devices"],
[
{
"device_serial": "EXAMPLE_DEVICE_SERIAL",
"channel_no": 1,
"name": "front",
}
],
)
self.assertEqual(
config["hik_cloud"]["time_ranges"],
[
{
"begin": "2026-02-03 09:00:00",
"end": "2026-02-03 10:30:00",
}
],
)
if __name__ == "__main__":
unittest.main()

41
tests/test_discovery.py Normal file
View File

@@ -0,0 +1,41 @@
import tempfile
import unittest
from pathlib import Path
from video_ai_analysis_poc.discovery import discover_videos
class DiscoveryTests(unittest.TestCase):
def test_discovers_supported_extensions_without_recursion(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
nested = root / "nested"
nested.mkdir()
supported = root / "a.MP4"
unsupported = root / "notes.txt"
nested_video = nested / "b.mov"
supported.write_text("not a real video", encoding="utf-8")
unsupported.write_text("ignore me", encoding="utf-8")
nested_video.write_text("not a real video", encoding="utf-8")
videos = discover_videos(root, [".mp4", ".mov"], recursive=False)
self.assertEqual(videos, [supported])
def test_discovers_supported_extensions_recursively_sorted(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
nested = root / "nested"
nested.mkdir()
first = root / "a.mp4"
second = nested / "b.mov"
first.write_text("x", encoding="utf-8")
second.write_text("x", encoding="utf-8")
videos = discover_videos(root, [".mp4", ".mov"], recursive=True)
self.assertEqual(videos, [first, second])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,357 @@
import json
import subprocess
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from video_ai_analysis_poc.ffmpeg_sampler import (
build_sample_command,
sample_video_frames,
)
class FfmpegSamplerTests(unittest.TestCase):
def test_build_sample_command_uses_nvdec_decoder_for_h264(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp) / "output"
command = build_sample_command(
Path("/tmp/input.mp4"),
output_dir,
"video-abc",
{
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"hwaccel": "cuda",
"codec_decoders": {"h264": "h264_cuvid", "hevc": "hevc_cuvid"},
"frame_fps": 1,
"frame_width": 640,
"jpeg_quality": 4,
},
codec_name="h264",
)
self.assertIn("-hwaccel", command)
self.assertIn("cuda", command)
self.assertIn("-c:v", command)
self.assertIn("h264_cuvid", command)
self.assertEqual(command[-1], str(output_dir / "frames" / "video-abc" / "%06d.jpg"))
def test_build_sample_command_uses_nvdec_decoder_for_hevc(self):
with tempfile.TemporaryDirectory() as tmp:
command = build_sample_command(
Path("/tmp/input.mp4"),
Path(tmp) / "output",
"video-abc",
{
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"hwaccel": "cuda",
"codec_decoders": {"h264": "h264_cuvid", "hevc": "hevc_cuvid"},
"frame_fps": 1,
"frame_width": 640,
"jpeg_quality": 4,
},
codec_name="hevc",
)
self.assertIn("-hwaccel", command)
self.assertIn("cuda", command)
self.assertIn("-c:v", command)
self.assertIn("hevc_cuvid", command)
def test_build_sample_command_refuses_cpu_fallback_by_default(self):
with tempfile.TemporaryDirectory() as tmp:
with self.assertRaisesRegex(ValueError, "NVDEC decoder is required"):
build_sample_command(
Path("/tmp/input.mp4"),
Path(tmp),
"video-abc",
{
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"codec_decoders": {"h264": "h264_cuvid", "hevc": "hevc_cuvid"},
},
codec_name="vp9",
)
def test_sample_video_frames_writes_structured_failure_record(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
manifest_path = root / "frame_manifest.jsonl"
failure = subprocess.CalledProcessError(
returncode=1,
cmd=["ffmpeg"],
stderr="No decoder h264_cuvid",
)
with patch("subprocess.run", side_effect=failure):
records = sample_video_frames(
{
"video_id": "video-abc",
"path": str(root / "input.mp4"),
"codec_name": "h264",
},
root,
{
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"hwaccel": "cuda",
"codec_decoders": {"h264": "h264_cuvid"},
"frame_fps": 1,
"frame_width": 640,
"jpeg_quality": 4,
"timeout_seconds_per_video": 30,
},
manifest_path=manifest_path,
)
self.assertEqual(len(records), 1)
self.assertEqual(records[0]["video_id"], "video-abc")
self.assertEqual(records[0]["status"], "sample_failed")
self.assertIn("h264_cuvid", records[0]["last_error"])
persisted = [
json.loads(line)
for line in manifest_path.read_text(encoding="utf-8").splitlines()
]
self.assertEqual(persisted, records)
def test_sample_video_frames_persists_success_nvdec_evidence(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
manifest_path = root / "frame_manifest.jsonl"
video_id = "video-abc"
frame_dir = root / "frames" / video_id
def run_success(*args, **kwargs):
frame_dir.mkdir(parents=True, exist_ok=True)
(frame_dir / "000001.jpg").write_bytes(b"jpg")
return subprocess.CompletedProcess(
args=args[0],
returncode=0,
stdout="",
stderr="Using decoder h264_cuvid with hwaccel cuda",
)
with patch("subprocess.run", side_effect=run_success):
records = sample_video_frames(
{
"video_id": video_id,
"path": str(root / "input.mp4"),
"codec_name": "h264",
},
root,
{
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"hwaccel": "cuda",
"codec_decoders": {"h264": "h264_cuvid"},
"frame_fps": 1,
"frame_width": 640,
"jpeg_quality": 4,
"timeout_seconds_per_video": 30,
},
manifest_path=manifest_path,
)
self.assertEqual(records[0]["status"], "sampled")
self.assertEqual(records[0]["decoder"], "h264_cuvid")
self.assertEqual(records[0]["hwaccel"], "cuda")
self.assertIn("h264_cuvid", records[0]["ffmpeg_command"])
self.assertIn("Using decoder h264_cuvid", records[0]["stderr_summary"])
persisted = [
json.loads(line)
for line in manifest_path.read_text(encoding="utf-8").splitlines()
]
self.assertEqual(persisted, records)
def test_sample_video_frames_adds_beijing_time_from_hik_actual_begin(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
video_id = "video-abc"
frame_dir = root / "frames" / video_id
def run_success(command, *args, **kwargs):
frame_dir.mkdir(parents=True, exist_ok=True)
(frame_dir / "000001.jpg").write_bytes(b"jpg")
(frame_dir / "000002.jpg").write_bytes(b"jpg")
return subprocess.CompletedProcess(
args=command,
returncode=0,
stdout="",
stderr="",
)
with patch("subprocess.run", side_effect=run_success):
records = sample_video_frames(
{
"video_id": video_id,
"path": str(root / "input.mp4"),
"codec_name": "h264",
"actual_begin": 1781478000,
"actual_end": 1781478600,
},
root,
{
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"hwaccel": "cuda",
"codec_decoders": {"h264": "h264_cuvid"},
"frame_fps": 1,
"frame_width": 640,
"jpeg_quality": 4,
"timeout_seconds_per_video": 30,
"timezone": "Asia/Shanghai",
},
)
self.assertEqual(records[0]["beijing_time"], "2026-06-15 07:00:00")
self.assertEqual(records[1]["beijing_time"], "2026-06-15 07:00:01")
def test_sample_video_frames_caps_output_frames_to_requested_duration(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
video_id = "video-abc"
frame_dir = root / "frames" / video_id
captured_command = []
def run_success(command, *args, **kwargs):
captured_command.extend(command)
frame_dir.mkdir(parents=True, exist_ok=True)
(frame_dir / "000001.jpg").write_bytes(b"jpg")
return subprocess.CompletedProcess(
args=command,
returncode=0,
stdout="",
stderr="",
)
with patch("subprocess.run", side_effect=run_success):
sample_video_frames(
{
"video_id": video_id,
"path": str(root / "input.mp4"),
"codec_name": "hevc",
"requested_begin": 1000,
"requested_end": 1600,
},
root,
{
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"hwaccel": "cuda",
"codec_decoders": {"hevc": "hevc_cuvid"},
"frame_fps": 1,
"frame_width": 640,
"jpeg_quality": 4,
"timeout_seconds_per_video": 30,
},
)
self.assertIn("-frames:v", captured_command)
frames_flag_index = captured_command.index("-frames:v")
self.assertEqual(captured_command[frames_flag_index + 1], "601")
def test_sample_video_frames_limits_decode_window_to_requested_duration(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
video_id = "video-abc"
frame_dir = root / "frames" / video_id
captured_command = []
def run_success(command, *args, **kwargs):
captured_command.extend(command)
frame_dir.mkdir(parents=True, exist_ok=True)
(frame_dir / "000001.jpg").write_bytes(b"jpg")
return subprocess.CompletedProcess(
args=command,
returncode=0,
stdout="",
stderr="",
)
with patch("subprocess.run", side_effect=run_success):
sample_video_frames(
{
"video_id": video_id,
"path": str(root / "input.mp4"),
"codec_name": "hevc",
"requested_begin": 1000,
"requested_end": 1600,
"duration_seconds": 104259.921,
},
root,
{
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"hwaccel": "cuda",
"codec_decoders": {"hevc": "hevc_cuvid"},
"frame_fps": 1,
"frame_width": 640,
"jpeg_quality": 4,
"timeout_seconds_per_video": 30,
},
)
self.assertIn("-t", captured_command)
input_index = captured_command.index("-i")
t_flag_index = captured_command.index("-t")
vf_index = captured_command.index("-vf")
self.assertLess(input_index, t_flag_index)
self.assertLess(t_flag_index, vf_index)
self.assertEqual(captured_command[t_flag_index + 1], "600")
def test_sample_video_frames_uses_complete_frames_when_ffmpeg_exits_nonzero(self):
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
video_id = "video-abc"
frame_dir = root / "frames" / video_id
manifest_path = root / "frame_manifest.jsonl"
def run_with_nonzero_exit(command, *args, **kwargs):
frame_dir.mkdir(parents=True, exist_ok=True)
for index in range(1, 602):
(frame_dir / f"{index:06d}.jpg").write_bytes(b"jpg")
raise subprocess.CalledProcessError(
returncode=1,
cmd=command,
stderr="trailing decoder error after requested frames",
)
with patch("subprocess.run", side_effect=run_with_nonzero_exit):
records = sample_video_frames(
{
"video_id": video_id,
"path": str(root / "input.mp4"),
"codec_name": "hevc",
"requested_begin": 1000,
"requested_end": 1600,
},
root,
{
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"hwaccel": "cuda",
"codec_decoders": {"hevc": "hevc_cuvid"},
"frame_fps": 1,
"frame_width": 640,
"jpeg_quality": 4,
"timeout_seconds_per_video": 30,
},
manifest_path=manifest_path,
)
self.assertEqual(len(records), 601)
self.assertEqual({record["status"] for record in records}, {"sampled"})
self.assertIn("-t", records[0]["ffmpeg_command"])
self.assertIn("trailing decoder error", records[0]["stderr_summary"])
persisted = [
json.loads(line)
for line in manifest_path.read_text(encoding="utf-8").splitlines()
]
self.assertEqual(persisted, records)
if __name__ == "__main__":
unittest.main()

61
tests/test_frames.py Normal file
View File

@@ -0,0 +1,61 @@
import tempfile
import unittest
from pathlib import Path
from video_ai_analysis_poc.frames import build_frame_records, seconds_to_timecode
class FrameTests(unittest.TestCase):
def test_seconds_to_timecode_formats_relative_offsets(self):
self.assertEqual(seconds_to_timecode(0), "00:00:00")
self.assertEqual(seconds_to_timecode(65.2), "00:01:05")
self.assertEqual(seconds_to_timecode(3661), "01:01:01")
def test_build_frame_records_uses_stable_paths_and_offsets(self):
with tempfile.TemporaryDirectory() as tmp:
frame_dir = Path(tmp) / "frames" / "video-abc"
frame_dir.mkdir(parents=True)
first = frame_dir / "000001.jpg"
second = frame_dir / "000002.jpg"
first.write_bytes(b"jpg")
second.write_bytes(b"jpg")
records = build_frame_records(
"video-abc",
Path(tmp),
[first, second],
frame_fps=1,
)
self.assertEqual(records[0]["frame_id"], "video-abc_f000001")
self.assertEqual(records[0]["frame_path"], "frames/video-abc/000001.jpg")
self.assertEqual(records[0]["offset_seconds"], 0.0)
self.assertEqual(records[0]["timecode"], "00:00:00")
self.assertEqual(records[0]["pts_time"], 0.0)
self.assertEqual(records[0]["status"], "sampled")
self.assertEqual(records[1]["offset_seconds"], 1.0)
def test_build_frame_records_adds_beijing_time_from_timeline_epoch(self):
with tempfile.TemporaryDirectory() as tmp:
frame_dir = Path(tmp) / "frames" / "video-abc"
frame_dir.mkdir(parents=True)
first = frame_dir / "000001.jpg"
second = frame_dir / "000002.jpg"
first.write_bytes(b"jpg")
second.write_bytes(b"jpg")
records = build_frame_records(
"video-abc",
Path(tmp),
[first, second],
frame_fps=1,
timeline_start_epoch=1781478000,
timezone_name="Asia/Shanghai",
)
self.assertEqual(records[0]["beijing_time"], "2026-06-15 07:00:00")
self.assertEqual(records[1]["beijing_time"], "2026-06-15 07:00:01")
if __name__ == "__main__":
unittest.main()

554
tests/test_hik_cloud.py Normal file
View File

@@ -0,0 +1,554 @@
import os
import tempfile
import unittest
from datetime import datetime
from pathlib import Path
from unittest.mock import patch
from zoneinfo import ZoneInfo
from video_ai_analysis_poc import hik_cloud
from video_ai_analysis_poc.hik_cloud import (
build_download_chunks,
request_download_address,
resolve_access_token,
)
from video_ai_analysis_poc.manifest import read_jsonl, write_manifest
class HikCloudTests(unittest.TestCase):
def test_build_download_chunks_defaults_to_600_second_chunks(self):
config = {
"runtime": {"timezone": "Asia/Shanghai"},
"hik_cloud": {
"devices": [
{
"device_serial": "EXAMPLE_DEVICE_SERIAL",
"channel_no": 1,
"name": "front",
}
],
"time_ranges": [
{
"begin": "2026-02-03 09:00:00",
"end": "2026-02-03 10:30:00",
}
],
},
}
chunks = build_download_chunks(config)
requested_begin = int(
datetime(2026, 2, 3, 9, 0, 0, tzinfo=ZoneInfo("Asia/Shanghai")).timestamp()
)
requested_end = int(
datetime(2026, 2, 3, 10, 30, 0, tzinfo=ZoneInfo("Asia/Shanghai")).timestamp()
)
self.assertEqual(len(chunks), 9)
self.assertEqual(chunks[0]["time_begin"], requested_begin)
self.assertEqual(chunks[0]["time_end"], requested_begin + 600)
self.assertEqual(chunks[-1]["time_begin"], requested_begin + 4800)
self.assertEqual(chunks[-1]["time_end"], requested_end)
for chunk in chunks:
self.assertLessEqual(chunk["time_end"] - chunk["time_begin"], 600)
def test_build_download_chunks_allows_explicit_3600_second_chunks(self):
config = {
"runtime": {"timezone": "Asia/Shanghai"},
"hik_cloud": {
"chunk_seconds": 3600,
"devices": [{"device_serial": "EXAMPLE_DEVICE_SERIAL", "channel_no": 1}],
"time_ranges": [
{
"begin": "2026-02-03 09:00:00",
"end": "2026-02-03 10:30:00",
}
],
},
}
chunks = build_download_chunks(config)
requested_begin = int(
datetime(2026, 2, 3, 9, 0, 0, tzinfo=ZoneInfo("Asia/Shanghai")).timestamp()
)
requested_end = int(
datetime(2026, 2, 3, 10, 30, 0, tzinfo=ZoneInfo("Asia/Shanghai")).timestamp()
)
self.assertEqual(len(chunks), 2)
self.assertEqual(chunks[0]["time_begin"], requested_begin)
self.assertEqual(chunks[0]["time_end"], requested_begin + 3600)
self.assertEqual(chunks[1]["time_begin"], requested_begin + 3600)
self.assertEqual(chunks[1]["time_end"], requested_end)
for chunk in chunks:
self.assertLessEqual(chunk["time_end"] - chunk["time_begin"], 3600)
def test_build_download_chunks_accepts_epoch_time_ranges(self):
config = {
"hik_cloud": {
"devices": [{"device_serial": "EXAMPLE_DEVICE_SERIAL", "channel_no": 1}],
"time_ranges": [{"begin": 1770080400, "end": 1770084000.0}],
}
}
chunks = build_download_chunks(config)
self.assertEqual(len(chunks), 6)
self.assertEqual(chunks[0]["time_begin"], 1770080400)
self.assertEqual(chunks[0]["time_end"], 1770081000)
self.assertEqual(chunks[-1]["time_begin"], 1770083400)
self.assertEqual(chunks[-1]["time_end"], 1770084000)
def test_build_download_chunks_rejects_end_before_begin(self):
config = {
"hik_cloud": {
"devices": [{"device_serial": "EXAMPLE_DEVICE_SERIAL", "channel_no": 1}],
"time_ranges": [
{
"begin": "2026-02-03 10:30:00",
"end": "2026-02-03 09:00:00",
}
],
},
}
with self.assertRaisesRegex(ValueError, "end must be after begin"):
build_download_chunks(config)
def test_build_download_chunks_rejects_chunk_seconds_over_3600(self):
config = {
"hik_cloud": {
"chunk_seconds": 7200,
"devices": [{"device_serial": "EXAMPLE_DEVICE_SERIAL", "channel_no": 1}],
"time_ranges": [
{
"begin": "2026-02-03 09:00:00",
"end": "2026-02-03 11:30:00",
}
],
},
}
with self.assertRaisesRegex(
ValueError, "chunk_seconds must be less than or equal to 3600"
):
build_download_chunks(config)
def test_resolve_access_token_prefers_literal_token_over_environment(self):
config = {
"hik_cloud": {
"access_token": "DIRECT_TOKEN",
"access_token_env": "HIK_CLOUD_ACCESS_TOKEN",
}
}
with patch.dict(os.environ, {"HIK_CLOUD_ACCESS_TOKEN": "ENV_TOKEN"}):
token = resolve_access_token(config)
self.assertEqual(token, "DIRECT_TOKEN")
def test_resolve_access_token_reads_configured_environment_variable(self):
hik_config = {"access_token_env": "HIK_CLOUD_ACCESS_TOKEN"}
with patch.dict(os.environ, {"HIK_CLOUD_ACCESS_TOKEN": "ENV_TOKEN"}):
token = resolve_access_token(hik_config)
self.assertEqual(token, "ENV_TOKEN")
def test_resolve_access_token_raises_without_leaking_secret_values(self):
hik_config = {"access_token_env": "HIK_CLOUD_ACCESS_TOKEN"}
with patch.dict(os.environ, {}, clear=True):
with self.assertRaises(ValueError) as raised:
resolve_access_token(hik_config)
message = str(raised.exception)
self.assertIn("access_token", message)
self.assertNotIn("TOKEN", message)
def test_request_download_address_posts_expected_request_and_returns_success(self):
chunk = {
"device_serial": "EXAMPLE_DEVICE_SERIAL",
"channel_no": 1,
"requested_begin": 1764856787,
"requested_end": 1764856978,
"time_begin": 1764856787,
"time_end": 1764856978,
}
hik_config = {
"api_base_url": "https://api2.hik-cloud.com/",
"download_path": "/v1/carrier/cstorage/open/play/download",
"access_token": "TOKEN",
"timeout_seconds": 12,
}
calls = []
def fake_http_post(url, json_body, headers, timeout_seconds):
calls.append(
{
"url": url,
"json_body": json_body,
"headers": headers,
"timeout_seconds": timeout_seconds,
}
)
return {
"code": 0,
"success": True,
"data": {
"url": "https://download.example/video.mp4?sig=abc",
"actualBeginTime": "1764856787",
"actualEndTime": "1764856978",
},
}
result = request_download_address(chunk, hik_config, http_post=fake_http_post)
self.assertEqual(len(calls), 1)
self.assertEqual(
calls[0]["url"],
"https://api2.hik-cloud.com/v1/carrier/cstorage/open/play/download",
)
self.assertEqual(calls[0]["headers"]["Authorization"], "bearer TOKEN")
self.assertEqual(calls[0]["headers"]["Content-Type"], "application/json")
self.assertEqual(
calls[0]["json_body"],
{
"deviceSerial": "EXAMPLE_DEVICE_SERIAL",
"channelNo": 1,
"timeBegin": 1764856787,
"timeEnd": 1764856978,
},
)
self.assertEqual(calls[0]["timeout_seconds"], 12)
self.assertEqual(result["status"], "address_ok")
self.assertEqual(result["url"], "https://download.example/video.mp4?sig=abc")
self.assertEqual(result["actual_begin"], 1764856787)
self.assertEqual(result["actual_end"], 1764856978)
self.assertEqual(result["device_serial"], "EXAMPLE_DEVICE_SERIAL")
self.assertEqual(result["channel_no"], 1)
self.assertEqual(result["requested_begin"], 1764856787)
self.assertEqual(result["requested_end"], 1764856978)
def test_request_download_address_returns_no_recording_for_known_empty_code(self):
chunk = {
"device_serial": "EXAMPLE_DEVICE_SERIAL",
"channel_no": 1,
"requested_begin": 1764856787,
"requested_end": 1764856978,
"time_begin": 1764856787,
"time_end": 1764856978,
}
hik_config = {
"api_base_url": "https://api2.hik-cloud.com",
"download_path": "/v1/carrier/cstorage/open/play/download",
"access_token": "TOKEN",
}
def fake_http_post(url, json_body, headers, timeout_seconds):
return {"code": 80438027, "msg": "no recording"}
result = request_download_address(chunk, hik_config, http_post=fake_http_post)
self.assertEqual(result["status"], "no_recording")
self.assertEqual(result["code"], 80438027)
self.assertEqual(result["device_serial"], "EXAMPLE_DEVICE_SERIAL")
self.assertNotIn("url", result)
def test_request_download_address_returns_sanitized_failure_for_other_codes(self):
chunk = {
"device_serial": "EXAMPLE_DEVICE_SERIAL",
"channel_no": 1,
"requested_begin": 1764856787,
"requested_end": 1764856978,
"time_begin": 1764856787,
"time_end": 1764856978,
}
hik_config = {
"api_base_url": "https://api2.hik-cloud.com",
"download_path": "/v1/carrier/cstorage/open/play/download",
"access_token": "TOKEN",
}
def fake_http_post(url, json_body, headers, timeout_seconds):
return {"code": 80430002, "msg": "bad TOKEN Authorization request"}
result = request_download_address(chunk, hik_config, http_post=fake_http_post)
self.assertEqual(result["status"], "address_failed")
self.assertEqual(result["code"], 80430002)
self.assertIn("last_error", result)
self.assertNotIn("TOKEN", str(result))
self.assertNotIn("Authorization", str(result))
def test_download_hik_cloud_recordings_writes_file_records_and_manifest(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp)
config = _download_config()
address_calls = []
download_calls = []
def fake_address_client(chunk, hik_config):
address_calls.append((chunk, hik_config))
return {
**chunk,
"status": "address_ok",
"url": (
"https://download.example/video.mp4?"
"sign=SECRET&sig=SECRET&TOKEN=SECRET"
),
"actual_begin": chunk["time_begin"] + 1,
"actual_end": chunk["time_end"] - 1,
}
def fake_download_url(url, timeout_seconds=None):
download_calls.append((url, timeout_seconds))
return b"fake mp4 bytes"
records = hik_cloud.download_hik_cloud_recordings(
config,
output_dir,
address_client=fake_address_client,
download_url=fake_download_url,
)
self.assertEqual(len(address_calls), 1)
self.assertEqual(len(download_calls), 1)
self.assertEqual(download_calls[0][1], 600)
expected_path = (
output_dir
/ "downloads"
/ "hik_cloud"
/ "EXAMPLE_DEVICE_SERIAL"
/ "ch1"
/ "EXAMPLE_DEVICE_SERIAL_ch1_1764856787_1764856978.mp4"
).resolve(strict=False)
self.assertEqual(expected_path.read_bytes(), b"fake mp4 bytes")
self.assertEqual(len(records), 1)
self.assertEqual(records[0]["path"], str(expected_path))
self.assertEqual(records[0]["source"], "hik_cloud")
self.assertEqual(records[0]["source_path"], "hik_cloud://EXAMPLE_DEVICE_SERIAL/ch1/1764856787-1764856978")
self.assertEqual(records[0]["device_serial"], "EXAMPLE_DEVICE_SERIAL")
self.assertEqual(records[0]["channel_no"], 1)
self.assertEqual(records[0]["requested_begin"], 1764856787)
self.assertEqual(records[0]["requested_end"], 1764856978)
self.assertEqual(records[0]["actual_begin"], 1764856788)
self.assertEqual(records[0]["actual_end"], 1764856977)
self.assertEqual(records[0]["status"], "downloaded")
manifest = read_jsonl(output_dir / "hik_cloud_download_manifest.jsonl")
self.assertEqual(len(manifest), 1)
self.assertEqual(manifest[0]["status"], "downloaded")
self.assertIsNone(manifest[0]["last_error"])
self.assertEqual(manifest[0]["download_url_host"], "download.example")
self.assertEqual(manifest[0]["path"], str(expected_path))
serialized_path = expected_path.name
serialized_manifest = str(manifest)
self.assertNotIn("sign=", serialized_path)
self.assertNotIn("sig=", serialized_path)
self.assertNotIn("TOKEN", serialized_path)
self.assertNotIn("sign=", serialized_manifest)
self.assertNotIn("sig=", serialized_manifest)
self.assertNotIn("TOKEN", serialized_manifest)
def test_download_hik_cloud_recordings_can_plan_without_downloading(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp)
config = _download_config()
download_calls = []
def fake_address_client(chunk, hik_config):
return {
**chunk,
"status": "address_ok",
"url": (
"https://download.example/video.mp4?"
"sign=SECRET&sig=SECRET&TOKEN=SECRET"
),
"actual_begin": chunk["time_begin"],
"actual_end": chunk["time_end"],
}
def fake_download_url(url, timeout_seconds=None):
download_calls.append(url)
return b"unexpected"
records = hik_cloud.download_hik_cloud_recordings(
config,
output_dir,
address_client=fake_address_client,
download_url=fake_download_url,
download=False,
)
self.assertEqual(records, [])
self.assertEqual(download_calls, [])
manifest = read_jsonl(output_dir / "hik_cloud_download_manifest.jsonl")
self.assertEqual(len(manifest), 1)
self.assertEqual(manifest[0]["status"], "address_ok")
self.assertIsNone(manifest[0]["path"])
self.assertEqual(manifest[0]["download_url_host"], "download.example")
self.assertNotIn("sign=", str(manifest))
self.assertNotIn("sig=", str(manifest))
self.assertNotIn("TOKEN", str(manifest))
def test_download_hik_cloud_recordings_records_empty_and_address_failures(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp)
config = _download_config(
time_ranges=[
{"begin": 1764856787, "end": 1764856978},
{"begin": 1764857000, "end": 1764857100},
]
)
statuses = ["no_recording", "address_failed"]
download_calls = []
def fake_address_client(chunk, hik_config):
status = statuses.pop(0)
return {
**chunk,
"status": status,
"actual_begin": None,
"actual_end": None,
"last_error": None if status == "no_recording" else "api failed",
}
def fake_download_url(url, timeout_seconds=None):
download_calls.append(url)
return b"unexpected"
records = hik_cloud.download_hik_cloud_recordings(
config,
output_dir,
address_client=fake_address_client,
download_url=fake_download_url,
)
self.assertEqual(records, [])
self.assertEqual(download_calls, [])
manifest = read_jsonl(output_dir / "hik_cloud_download_manifest.jsonl")
self.assertEqual([record["status"] for record in manifest], ["no_recording", "address_failed"])
def test_download_hik_cloud_recordings_records_download_failure_and_continues(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp)
config = _download_config(
time_ranges=[
{"begin": 1764856787, "end": 1764856978},
{"begin": 1764857000, "end": 1764857100},
]
)
download_calls = []
def fake_address_client(chunk, hik_config):
return {
**chunk,
"status": "address_ok",
"url": (
"https://download.example/video.mp4?"
"sign=SECRET&sig=SECRET&TOKEN=SECRET"
),
"actual_begin": chunk["time_begin"],
"actual_end": chunk["time_end"],
}
def fake_download_url(url, timeout_seconds=None):
download_calls.append(url)
if len(download_calls) == 1:
raise RuntimeError(
"download failed for query sign=SECRET&sig=SECRET&TOKEN=SECRET"
)
return b"second chunk"
records = hik_cloud.download_hik_cloud_recordings(
config,
output_dir,
address_client=fake_address_client,
download_url=fake_download_url,
)
self.assertEqual(len(download_calls), 2)
self.assertEqual(len(records), 1)
self.assertEqual(records[0]["status"], "downloaded")
manifest = read_jsonl(output_dir / "hik_cloud_download_manifest.jsonl")
self.assertEqual([record["status"] for record in manifest], ["download_failed", "downloaded"])
self.assertIn("last_error", manifest[0])
self.assertNotIn("sign=", str(manifest))
self.assertNotIn("sig=", str(manifest))
self.assertNotIn("TOKEN", str(manifest))
self.assertNotIn("SECRET", str(manifest))
def test_download_hik_cloud_recordings_resume_skips_existing_downloaded_file(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp)
config = _download_config(resume=True)
downloaded_path = (
output_dir
/ "downloads"
/ "hik_cloud"
/ "EXAMPLE_DEVICE_SERIAL"
/ "ch1"
/ "EXAMPLE_DEVICE_SERIAL_ch1_1764856787_1764856978.mp4"
)
downloaded_path.parent.mkdir(parents=True, exist_ok=True)
downloaded_path.write_bytes(b"existing")
existing_record = {
"source": "hik_cloud",
"path": str(downloaded_path),
"device_serial": "EXAMPLE_DEVICE_SERIAL",
"channel_no": 1,
"requested_begin": 1764856787,
"requested_end": 1764856978,
"actual_begin": 1764856787,
"actual_end": 1764856978,
"status": "downloaded",
"retry_count": 0,
"last_error": None,
}
write_manifest(
output_dir / "hik_cloud_download_manifest.jsonl",
[existing_record],
)
def failing_address_client(chunk, hik_config):
raise AssertionError("resume should skip address lookup")
def failing_download_url(url, timeout_seconds=None):
raise AssertionError("resume should skip download")
records = hik_cloud.download_hik_cloud_recordings(
config,
output_dir,
address_client=failing_address_client,
download_url=failing_download_url,
)
expected_video_record = {
**existing_record,
"source_path": "hik_cloud://EXAMPLE_DEVICE_SERIAL/ch1/1764856787-1764856978",
}
self.assertEqual(records, [expected_video_record])
manifest = read_jsonl(output_dir / "hik_cloud_download_manifest.jsonl")
self.assertEqual(manifest, [existing_record])
def _download_config(
*,
time_ranges=None,
resume: bool = False,
):
return {
"output": {"resume": resume},
"hik_cloud": {
"access_token": "TOKEN",
"download_timeout_seconds": 600,
"devices": [{"device_serial": "EXAMPLE_DEVICE_SERIAL", "channel_no": 1}],
"time_ranges": time_ranges
or [{"begin": 1764856787, "end": 1764856978}],
},
}
if __name__ == "__main__":
unittest.main()

30
tests/test_manifest.py Normal file
View File

@@ -0,0 +1,30 @@
import json
import tempfile
import unittest
from pathlib import Path
from video_ai_analysis_poc.manifest import read_jsonl, write_manifest
class ManifestTests(unittest.TestCase):
def test_write_manifest_writes_status_retry_and_error_fields(self):
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "video_manifest.jsonl"
records = [
{"path": "/tmp/a.mp4", "status": "probed"},
{"path": "/tmp/b.mp4", "status": "probe_failed", "last_error": "bad data"},
]
write_manifest(path, records)
lines = path.read_text(encoding="utf-8").splitlines()
decoded = [json.loads(line) for line in lines]
self.assertEqual(decoded[0]["retry_count"], 0)
self.assertIsNone(decoded[0]["last_error"])
self.assertEqual(decoded[1]["status"], "probe_failed")
self.assertEqual(decoded[1]["last_error"], "bad data")
self.assertEqual(read_jsonl(path), decoded)
if __name__ == "__main__":
unittest.main()

51
tests/test_probe.py Normal file
View File

@@ -0,0 +1,51 @@
import subprocess
import unittest
from pathlib import Path
from unittest.mock import patch
from video_ai_analysis_poc.probe import probe_video
class ProbeTests(unittest.TestCase):
def test_probe_video_returns_structured_metadata(self):
payload = (
'{"streams":[{"codec_type":"video","codec_name":"h264",'
'"width":1920,"height":1080,"avg_frame_rate":"30000/1001"}],'
'"format":{"duration":"12.5","format_name":"mov,mp4,m4a,3gp,3g2,mj2",'
'"start_time":"0.000000"}}'
)
completed = subprocess.CompletedProcess(
args=["ffprobe"],
returncode=0,
stdout=payload,
stderr="",
)
with patch("subprocess.run", return_value=completed):
result = probe_video(Path("/tmp/video.mp4"), timeout_seconds=3)
self.assertEqual(result["status"], "probed")
self.assertEqual(result["codec_name"], "h264")
self.assertEqual(result["width"], 1920)
self.assertEqual(result["height"], 1080)
self.assertAlmostEqual(result["fps"], 29.97002997)
self.assertEqual(result["duration_seconds"], 12.5)
self.assertIsNone(result["last_error"])
def test_probe_video_returns_structured_failure(self):
failure = subprocess.CalledProcessError(
returncode=1,
cmd=["ffprobe"],
stderr="Invalid data found when processing input",
)
with patch("subprocess.run", side_effect=failure):
result = probe_video(Path("/tmp/bad.mp4"), timeout_seconds=3)
self.assertEqual(result["status"], "probe_failed")
self.assertEqual(result["retry_count"], 0)
self.assertIn("Invalid data", result["last_error"])
if __name__ == "__main__":
unittest.main()

135
tests/test_result_parser.py Normal file
View File

@@ -0,0 +1,135 @@
import unittest
from video_ai_analysis_poc.result_parser import build_clip_result, extract_json_payload
class ResultParserTests(unittest.TestCase):
def test_extract_json_payload_handles_markdown_and_prose(self):
payload = extract_json_payload(
"analysis follows\n```json\n{\"screen_time\":\"12:31:20\",\"events\":[]}\n```"
)
self.assertEqual(payload, {"screen_time": "12:31:20", "events": []})
def test_build_clip_result_preserves_timeline_screen_time_and_events(self):
clip_record = {
"video_id": "video-abc",
"clip_id": "video-abc_c000001",
"clip_start_seconds": 120.0,
"clip_end_seconds": 130.0,
"clip_start_timecode": "00:02:00",
"clip_end_timecode": "00:02:10",
"clip_start_beijing_time": "2026-06-15 07:02:00",
"clip_end_beijing_time": "2026-06-15 07:02:10",
"frame_times": [
{
"frame_path": "frames/video-abc/000120.jpg",
"offset_seconds": 120.0,
"timecode": "00:02:00",
"beijing_time": "2026-06-15 07:02:00",
}
],
}
raw_response = (
"Here is the result: "
"{\"画面时间\":\"2026-06-14 12:31:20\","
"\"events\":[{\"event_type\":\"queue_detected\",\"confidence\":0.86}]}"
)
result = build_clip_result(
raw_response,
clip_record,
{"path": "/videos/a.mp4"},
{
"schema": {"version": "local-batch-v1"},
"runtime": {"timezone": "Asia/Shanghai"},
},
processing={"latency_ms": 1800},
)
self.assertEqual(result["schema_version"], "local-batch-v1")
self.assertEqual(result["video_id"], "video-abc")
self.assertEqual(result["video_path"], "/videos/a.mp4")
self.assertEqual(result["clip_id"], "video-abc_c000001")
self.assertEqual(result["status"], "ok")
self.assertEqual(result["monitoring_timeline"]["timezone"], "Asia/Shanghai")
self.assertIsNone(result["monitoring_timeline"]["video_start_time"])
self.assertEqual(
result["monitoring_timeline"]["clip_start_beijing_time"],
"2026-06-15 07:02:00",
)
self.assertEqual(
result["monitoring_timeline"]["clip_end_beijing_time"],
"2026-06-15 07:02:10",
)
self.assertEqual(result["monitoring_timeline"]["frame_times"], clip_record["frame_times"])
self.assertEqual(
result["monitoring_timeline"]["screen_time"],
"2026-06-14 12:31:20",
)
self.assertEqual(result["events"][0]["event_type"], "queue_detected")
self.assertEqual(result["events"][0]["start_offset_seconds"], 120.0)
self.assertEqual(result["events"][0]["end_offset_seconds"], 130.0)
self.assertEqual(result["raw_response"], raw_response)
self.assertEqual(result["processing"]["latency_ms"], 1800)
self.assertIsNone(result["error"])
def test_build_clip_result_reads_zhengxin_time_key(self):
result = build_clip_result(
(
'{"Action":"Action_Idle","quality_status":"qualified",'
'"error_type":"","安全隐患":"","人物位置":"","总结":"",'
'"时间":"2026-06-14 12:31:20","employees":[],"guests":[]}'
),
{
"video_id": "video-abc",
"clip_id": "video-abc_c000001",
"clip_start_seconds": 0.0,
"clip_end_seconds": 10.0,
"clip_start_timecode": "00:00:00",
"clip_end_timecode": "00:00:10",
"frame_times": [],
},
{"path": "/videos/a.mp4"},
{
"schema": {"version": "local-batch-v1"},
"runtime": {"timezone": "Asia/Shanghai"},
},
processing={},
)
self.assertEqual(result["status"], "ok")
self.assertEqual(
result["monitoring_timeline"]["screen_time"],
"2026-06-14 12:31:20",
)
def test_build_clip_result_records_parse_failure_without_crashing(self):
result = build_clip_result(
"not json",
{
"video_id": "video-abc",
"clip_id": "video-abc_c000001",
"clip_start_seconds": 0.0,
"clip_end_seconds": 10.0,
"clip_start_timecode": "00:00:00",
"clip_end_timecode": "00:00:10",
"frame_times": [],
},
{"path": "/videos/a.mp4"},
{
"schema": {"version": "local-batch-v1"},
"runtime": {"timezone": "Asia/Shanghai"},
},
processing={},
)
self.assertEqual(result["status"], "parse_failed")
self.assertEqual(result["events"], [])
self.assertEqual(result["monitoring_timeline"]["screen_time"], "")
self.assertEqual(result["raw_response"], "not json")
self.assertIn("JSON", result["error"])
if __name__ == "__main__":
unittest.main()

85
tests/test_vlm_client.py Normal file
View File

@@ -0,0 +1,85 @@
import base64
import json
import tempfile
import unittest
from pathlib import Path
from video_ai_analysis_poc.vlm_client import infer_clip
class VlmClientTests(unittest.TestCase):
def test_infer_clip_uses_config_prompt_url_and_data_uri_images(self):
with tempfile.TemporaryDirectory() as tmp:
output_dir = Path(tmp)
frame_path = output_dir / "frames" / "video-abc" / "000001.jpg"
frame_path.parent.mkdir(parents=True)
frame_path.write_bytes(b"jpg-bytes")
calls = []
def http_post(url, payload, timeout_seconds):
calls.append((url, payload, timeout_seconds))
return {
"status": 200,
"body": {
"choices": [
{
"message": {
"content": json.dumps(
{"screen_time": "10:00:01", "events": []}
)
}
}
]
},
}
result = infer_clip(
{
"clip_id": "video-abc_c000001",
"frame_times": [
{
"frame_path": "frames/video-abc/000001.jpg",
"offset_seconds": 0.0,
"timecode": "00:00:00",
}
],
},
output_dir,
{
"api_base_url": "http://localhost:8679/",
"chat_completions_path": "/v1/chat/completions",
"model": "memai-zhengxin-v3-20260413",
"timeout_seconds": 17,
"max_tokens": 256,
"temperature": 0,
"image_transport": "data_uri",
},
{
"system": "system prompt from config",
"user": "user prompt from config",
},
http_post=http_post,
)
self.assertEqual(result["raw_response"], '{"screen_time": "10:00:01", "events": []}')
self.assertEqual(len(calls), 1)
url, payload, timeout_seconds = calls[0]
self.assertEqual(url, "http://localhost:8679/v1/chat/completions")
self.assertEqual(timeout_seconds, 17)
self.assertEqual(payload["model"], "memai-zhengxin-v3-20260413")
self.assertEqual(payload["messages"][0]["role"], "system")
self.assertEqual(payload["messages"][0]["content"], "system prompt from config")
user_content = payload["messages"][1]["content"]
self.assertEqual(user_content[0], {"type": "text", "text": "user prompt from config"})
self.assertEqual(user_content[1]["type"], "image_url")
expected_data = base64.b64encode(b"jpg-bytes").decode("ascii")
self.assertEqual(
user_content[1]["image_url"]["url"],
f"data:image/jpeg;base64,{expected_data}",
)
self.assertEqual(result["http_status"], 200)
self.assertIsInstance(result["latency_ms"], int)
if __name__ == "__main__":
unittest.main()