import io import json import tempfile import unittest from contextlib import redirect_stderr from pathlib import Path from unittest.mock import ANY, patch from video_ai_analysis_poc.cli import main from video_ai_analysis_poc.paths import stable_video_id class CliTests(unittest.TestCase): def test_unknown_source_mode_exits_with_cli_error_without_traceback(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) output_dir = root / "output" config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "source:", " mode: bad_mode", "output:", f" dir: {output_dir}", ] ), encoding="utf-8", ) stderr = io.StringIO() with redirect_stderr(stderr), self.assertRaises(SystemExit): main(["--config", str(config_path), "--dry-run"]) stderr_text = stderr.getvalue() self.assertIn("unsupported source.mode: bad_mode", stderr_text) self.assertNotIn("Traceback", stderr_text) def test_local_source_dry_run_uses_discovery_and_not_hik_cloud_downloader(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "source:", " mode: local", "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", ] ), encoding="utf-8", ) with ( patch("video_ai_analysis_poc.cli.discover_videos") as discover_videos, patch( "video_ai_analysis_poc.cli.download_hik_cloud_recordings" ) as download_hik_cloud, ): discover_videos.return_value = [] exit_code = main(["--config", str(config_path), "--dry-run"]) self.assertEqual(exit_code, 0) discover_videos.assert_called_once_with( str(input_dir.resolve(strict=False)), [".mp4"], recursive=False, ) download_hik_cloud.assert_not_called() def test_hik_cloud_source_downloads_probes_and_preserves_cloud_metadata(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) output_dir = root / "output" downloaded_path = output_dir / "downloads" / "hik_cloud" / "clip.mp4" downloaded_path.parent.mkdir(parents=True) downloaded_path.write_bytes(b"mp4") config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "source:", " mode: hik_cloud", "output:", f" dir: {output_dir}", "ffprobe:", " timeout_seconds: 7", ] ), encoding="utf-8", ) downloaded_record = { "source": "hik_cloud", "path": str(downloaded_path), "source_path": "hik_cloud://DS-1/ch1/100-160", "device_serial": "DS-1", "channel_no": 1, "requested_begin": 100, "requested_end": 160, "actual_begin": 101, "actual_end": 159, "status": "downloaded", "retry_count": 0, "last_error": None, } with ( patch( "video_ai_analysis_poc.cli.download_hik_cloud_recordings" ) as download_hik_cloud, patch("video_ai_analysis_poc.cli.discover_videos") as discover_videos, patch("video_ai_analysis_poc.cli.probe_video") as probe_video, patch("video_ai_analysis_poc.cli.sample_video_frames") as sample_frames, ): download_hik_cloud.return_value = [downloaded_record] probe_video.return_value = { "path": str(downloaded_path), "status": "probed", "retry_count": 0, "last_error": None, "duration_seconds": 60.0, "codec_name": "h264", } sample_frames.return_value = [] exit_code = main(["--config", str(config_path), "--until", "clips"]) self.assertEqual(exit_code, 0) download_hik_cloud.assert_called_once_with( ANY, output_dir.resolve(strict=False), download=True ) discover_videos.assert_not_called() probe_video.assert_called_once_with(str(downloaded_path), timeout_seconds=7) records = [ json.loads(line) for line in (output_dir / "video_manifest.jsonl") .read_text(encoding="utf-8") .splitlines() ] self.assertEqual(len(records), 1) record = records[0] self.assertEqual(record["video_id"], stable_video_id(downloaded_path)) self.assertEqual(record["status"], "probed") self.assertEqual(record["source"], "hik_cloud") self.assertEqual(record["source_path"], "hik_cloud://DS-1/ch1/100-160") self.assertEqual(record["device_serial"], "DS-1") self.assertEqual(record["channel_no"], 1) self.assertEqual(record["requested_begin"], 100) self.assertEqual(record["requested_end"], 160) self.assertEqual(record["actual_begin"], 101) self.assertEqual(record["actual_end"], 159) self.assertEqual(record["duration_seconds"], 60.0) self.assertEqual(record["codec_name"], "h264") def test_hik_cloud_dry_run_requests_addresses_without_download_probe_or_analysis(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) output_dir = root / "output" config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "source:", " mode: hik_cloud", "output:", f" dir: {output_dir}", ] ), encoding="utf-8", ) with ( patch( "video_ai_analysis_poc.cli.download_hik_cloud_recordings" ) as download_hik_cloud, patch("video_ai_analysis_poc.cli.probe_video") as probe_video, patch("video_ai_analysis_poc.cli.sample_video_frames") as sample_frames, patch("video_ai_analysis_poc.cli.infer_clip") as infer_clip, patch("video_ai_analysis_poc.cli.aggregate_outputs") as aggregate_outputs, ): download_hik_cloud.return_value = [] exit_code = main(["--config", str(config_path), "--dry-run"]) self.assertEqual(exit_code, 0) download_hik_cloud.assert_called_once_with( ANY, output_dir.resolve(strict=False), download=False ) probe_video.assert_not_called() self.assertTrue((output_dir / "video_manifest.jsonl").exists()) sample_frames.assert_not_called() infer_clip.assert_not_called() aggregate_outputs.assert_not_called() def test_hik_cloud_until_clips_generates_video_frame_and_clip_manifests(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) output_dir = root / "output" downloaded_path = output_dir / "downloads" / "hik_cloud" / "clips.mp4" downloaded_path.parent.mkdir(parents=True) downloaded_path.write_bytes(b"mp4") video_id = stable_video_id(downloaded_path) config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "source:", " mode: hik_cloud", "output:", f" dir: {output_dir}", "ffprobe:", " timeout_seconds: 5", "ffmpeg:", " prefer_nvdec: true", " allow_cpu_fallback: false", " hwaccel: cuda", " codec_decoders:", " h264: h264_cuvid", "clip:", " length_seconds: 10", " stride_seconds: 10", " frames_per_clip: 4", " min_frames_per_clip: 2", ] ), encoding="utf-8", ) downloaded_record = { "source": "hik_cloud", "path": str(downloaded_path), "source_path": "hik_cloud://DS-3/ch1/300-360", "device_serial": "DS-3", "channel_no": 1, "requested_begin": 300, "requested_end": 360, "actual_begin": 300, "actual_end": 360, "status": "downloaded", } frame_records = [ { "video_id": video_id, "frame_id": f"{video_id}_f{index + 1:06d}", "frame_path": f"frames/{video_id}/{index + 1:06d}.jpg", "offset_seconds": float(index), "timecode": f"00:00:{index:02d}", "pts_time": float(index), "status": "sampled", "retry_count": 0, "last_error": None, } for index in range(4) ] with ( patch( "video_ai_analysis_poc.cli.download_hik_cloud_recordings" ) as download_hik_cloud, patch("video_ai_analysis_poc.cli.probe_video") as probe_video, patch("video_ai_analysis_poc.cli.sample_video_frames") as sample_frames, patch("video_ai_analysis_poc.cli.infer_clip") as infer_clip, patch("video_ai_analysis_poc.cli.aggregate_outputs") as aggregate_outputs, ): download_hik_cloud.return_value = [downloaded_record] probe_video.return_value = { "path": str(downloaded_path), "status": "probed", "retry_count": 0, "last_error": None, "duration_seconds": 4.0, "codec_name": "h264", } sample_frames.return_value = frame_records exit_code = main(["--config", str(config_path), "--until", "clips"]) self.assertEqual(exit_code, 0) download_hik_cloud.assert_called_once_with( ANY, output_dir.resolve(strict=False), download=True ) sample_frames.assert_called_once() infer_clip.assert_not_called() aggregate_outputs.assert_not_called() video_records = [ json.loads(line) for line in (output_dir / "video_manifest.jsonl") .read_text(encoding="utf-8") .splitlines() ] persisted_frames = [ json.loads(line) for line in (output_dir / "frame_manifest.jsonl") .read_text(encoding="utf-8") .splitlines() ] clip_records = [ json.loads(line) for line in (output_dir / "clip_manifest.jsonl") .read_text(encoding="utf-8") .splitlines() ] self.assertEqual(len(video_records), 1) self.assertEqual(video_records[0]["video_id"], video_id) self.assertEqual(video_records[0]["source"], "hik_cloud") self.assertEqual(persisted_frames, frame_records) self.assertEqual(len(clip_records), 1) self.assertEqual(clip_records[0]["video_id"], video_id) self.assertEqual(clip_records[0]["status"], "pending") def test_dry_run_discovers_probes_and_writes_manifest_without_crashing(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() (input_dir / "dummy.mp4").write_text("not a real video", encoding="utf-8") (input_dir / "ignore.txt").write_text("ignore", encoding="utf-8") config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", "ffprobe:", " timeout_seconds: 3", ] ), encoding="utf-8", ) exit_code = main(["--config", str(config_path), "--dry-run"]) self.assertEqual(exit_code, 0) manifest_path = output_dir / "video_manifest.jsonl" self.assertTrue(manifest_path.exists()) records = [ json.loads(line) for line in manifest_path.read_text(encoding="utf-8").splitlines() ] self.assertEqual(len(records), 1) self.assertEqual(records[0]["status"], "probe_failed") self.assertEqual(records[0]["retry_count"], 0) self.assertIn("dummy.mp4", records[0]["path"]) def test_dry_run_resume_preserves_existing_completed_manifest_record(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() output_dir.mkdir() video_path = input_dir / "done.mp4" video_path.write_text("not a real video", encoding="utf-8") existing_record = { "path": str(video_path), "video_id": stable_video_id(video_path), "status": "probed", "retry_count": 0, "last_error": None, "duration_seconds": 12.5, } manifest_path = output_dir / "video_manifest.jsonl" manifest_path.write_text( json.dumps(existing_record, sort_keys=True) + "\n", encoding="utf-8", ) config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", " resume: true", "ffprobe:", " timeout_seconds: 3", ] ), encoding="utf-8", ) with patch("video_ai_analysis_poc.cli.probe_video") as probe_video: exit_code = main(["--config", str(config_path), "--dry-run"]) self.assertEqual(exit_code, 0) probe_video.assert_not_called() records = [ json.loads(line) for line in manifest_path.read_text(encoding="utf-8").splitlines() ] self.assertEqual(records, [existing_record]) def test_until_clips_writes_video_frame_and_clip_manifests_without_vlm(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() video_path = input_dir / "clip.mp4" video_path.write_text("not a real video", encoding="utf-8") config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", "ffprobe:", " timeout_seconds: 3", "ffmpeg:", " prefer_nvdec: true", " allow_cpu_fallback: false", " hwaccel: cuda", " codec_decoders:", " h264: h264_cuvid", " frame_fps: 1", " frame_width: 640", " jpeg_quality: 4", " timeout_seconds_per_video: 30", "clip:", " length_seconds: 10", " stride_seconds: 10", " frames_per_clip: 4", " min_frames_per_clip: 2", ] ), encoding="utf-8", ) frame_records = [ { "video_id": stable_video_id(video_path), "frame_id": f"{stable_video_id(video_path)}_f{index + 1:06d}", "frame_path": f"frames/{stable_video_id(video_path)}/{index + 1:06d}.jpg", "offset_seconds": float(index), "timecode": f"00:00:{index:02d}", "pts_time": float(index), "status": "sampled", } for index in range(4) ] with ( patch("video_ai_analysis_poc.cli.probe_video") as probe_video, patch("video_ai_analysis_poc.cli.sample_video_frames") as sample_frames, ): probe_video.return_value = { "path": str(video_path), "status": "probed", "retry_count": 0, "last_error": None, "duration_seconds": 4.0, "codec_name": "h264", } sample_frames.return_value = frame_records exit_code = main(["--config", str(config_path), "--until", "clips"]) self.assertEqual(exit_code, 0) self.assertTrue((output_dir / "video_manifest.jsonl").exists()) self.assertTrue((output_dir / "frame_manifest.jsonl").exists()) self.assertTrue((output_dir / "clip_manifest.jsonl").exists()) clip_records = [ json.loads(line) for line in (output_dir / "clip_manifest.jsonl") .read_text(encoding="utf-8") .splitlines() ] self.assertEqual(len(clip_records), 1) self.assertEqual(clip_records[0]["status"], "pending") self.assertEqual(len(clip_records[0]["frame_times"]), 4) def test_until_clips_resume_preserves_existing_frame_and_clip_manifests(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() output_dir.mkdir() video_path = input_dir / "done.mp4" video_path.write_text("not a real video", encoding="utf-8") video_id = stable_video_id(video_path) video_record = { "path": str(video_path), "video_id": video_id, "status": "probed", "retry_count": 0, "last_error": None, "duration_seconds": 4.0, "codec_name": "h264", } frame_record = { "video_id": video_id, "frame_id": f"{video_id}_f000001", "frame_path": f"frames/{video_id}/000001.jpg", "offset_seconds": 0.0, "timecode": "00:00:00", "pts_time": 0.0, "status": "sampled", "retry_count": 0, "last_error": None, } clip_record = { "video_id": video_id, "clip_id": f"{video_id}_c000001", "clip_start_seconds": 0.0, "clip_end_seconds": 4.0, "clip_start_timecode": "00:00:00", "clip_end_timecode": "00:00:04", "frame_times": [ { "frame_id": frame_record["frame_id"], "frame_path": frame_record["frame_path"], "offset_seconds": 0.0, "timecode": "00:00:00", "pts_time": 0.0, } ], "status": "pending", "retry_count": 0, "last_error": None, } (output_dir / "video_manifest.jsonl").write_text( json.dumps(video_record, sort_keys=True) + "\n", encoding="utf-8", ) (output_dir / "frame_manifest.jsonl").write_text( json.dumps(frame_record, sort_keys=True) + "\n", encoding="utf-8", ) (output_dir / "clip_manifest.jsonl").write_text( json.dumps(clip_record, sort_keys=True) + "\n", encoding="utf-8", ) config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", " resume: true", "ffprobe:", " timeout_seconds: 3", "ffmpeg:", " prefer_nvdec: true", " allow_cpu_fallback: false", " codec_decoders:", " h264: h264_cuvid", "clip:", " length_seconds: 10", " stride_seconds: 10", " frames_per_clip: 4", " min_frames_per_clip: 1", ] ), encoding="utf-8", ) with patch("video_ai_analysis_poc.cli.sample_video_frames") as sample_frames: exit_code = main(["--config", str(config_path), "--until", "clips"]) self.assertEqual(exit_code, 0) sample_frames.assert_not_called() persisted_frames = [ json.loads(line) for line in (output_dir / "frame_manifest.jsonl") .read_text(encoding="utf-8") .splitlines() ] persisted_clips = [ json.loads(line) for line in (output_dir / "clip_manifest.jsonl") .read_text(encoding="utf-8") .splitlines() ] self.assertEqual(persisted_frames, [frame_record]) self.assertEqual(persisted_clips, [clip_record]) def test_resume_backfills_beijing_times_without_reinference(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() output_dir.mkdir() video_path = input_dir / "done.mp4" video_path.write_text("not a real video", encoding="utf-8") video_id = stable_video_id(video_path) video_record = { "path": str(video_path), "video_id": video_id, "status": "probed", "retry_count": 0, "last_error": None, "duration_seconds": 10.0, "codec_name": "h264", "actual_begin": 1781478000, "actual_end": 1781478010, } frame_records = [ { "video_id": video_id, "frame_id": f"{video_id}_f{index + 1:06d}", "frame_path": f"frames/{video_id}/{index + 1:06d}.jpg", "offset_seconds": float(index), "timecode": f"00:00:{index:02d}", "pts_time": float(index), "status": "sampled", "retry_count": 0, "last_error": None, } for index in range(10) ] clip_id = f"{video_id}_c000001" old_clip_record = { "video_id": video_id, "clip_id": clip_id, "clip_start_seconds": 0.0, "clip_end_seconds": 10.0, "clip_start_timecode": "00:00:00", "clip_end_timecode": "00:00:10", "frame_times": [frame_records[0]], "status": "pending", "retry_count": 0, "last_error": None, } old_result_record = { "schema_version": "local-batch-v1", "video_id": video_id, "video_path": str(video_path), "clip_id": clip_id, "status": "ok", "monitoring_timeline": { "timezone": "Asia/Shanghai", "clip_start_seconds": 0.0, "clip_end_seconds": 10.0, "clip_start_timecode": "00:00:00", "clip_end_timecode": "00:00:10", "frame_times": [frame_records[0]], "screen_time": "2026-06-15 07:00:00", }, "events": [], "raw_response": "{}", "processing": {}, "error": None, } (output_dir / "video_manifest.jsonl").write_text( json.dumps(video_record, sort_keys=True) + "\n", encoding="utf-8", ) (output_dir / "frame_manifest.jsonl").write_text( "".join( json.dumps(record, sort_keys=True) + "\n" for record in frame_records ), encoding="utf-8", ) (output_dir / "clip_manifest.jsonl").write_text( json.dumps(old_clip_record, sort_keys=True) + "\n", encoding="utf-8", ) (output_dir / "clip_results.jsonl").write_text( json.dumps(old_result_record, sort_keys=True) + "\n", encoding="utf-8", ) config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", " resume: true", "ffprobe:", " timeout_seconds: 3", "ffmpeg:", " prefer_nvdec: true", " allow_cpu_fallback: false", " codec_decoders:", " h264: h264_cuvid", "clip:", " length_seconds: 10", " stride_seconds: 10", " frames_per_clip: 4", " min_frames_per_clip: 1", "runtime:", " timezone: Asia/Shanghai", ] ), encoding="utf-8", ) with ( patch("video_ai_analysis_poc.cli.sample_video_frames") as sample_frames, patch("video_ai_analysis_poc.cli.infer_clip") as infer_clip, ): exit_code = main(["--config", str(config_path), "--until", "inference"]) self.assertEqual(exit_code, 0) sample_frames.assert_not_called() infer_clip.assert_not_called() persisted_frames = [ json.loads(line) for line in (output_dir / "frame_manifest.jsonl") .read_text(encoding="utf-8") .splitlines() ] persisted_clips = [ json.loads(line) for line in (output_dir / "clip_manifest.jsonl") .read_text(encoding="utf-8") .splitlines() ] result_records = [ json.loads(line) for line in (output_dir / "clip_results.jsonl") .read_text(encoding="utf-8") .splitlines() ] self.assertEqual(persisted_frames[0]["beijing_time"], "2026-06-15 07:00:00") self.assertEqual(persisted_frames[9]["beijing_time"], "2026-06-15 07:00:09") self.assertEqual( persisted_clips[0]["clip_start_beijing_time"], "2026-06-15 07:00:00", ) self.assertEqual( persisted_clips[0]["clip_end_beijing_time"], "2026-06-15 07:00:10", ) self.assertEqual( result_records[0]["monitoring_timeline"]["clip_start_beijing_time"], "2026-06-15 07:00:00", ) self.assertEqual( result_records[0]["monitoring_timeline"]["frame_times"][0][ "beijing_time" ], "2026-06-15 07:00:00", ) def test_until_inference_writes_clip_results_and_uses_limit(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() output_dir.mkdir() video_path = input_dir / "ready.mp4" video_path.write_text("not a real video", encoding="utf-8") video_id = stable_video_id(video_path) video_record = { "path": str(video_path), "video_id": video_id, "status": "probed", "retry_count": 0, "last_error": None, "duration_seconds": 20.0, "codec_name": "h264", } clip_records = [ { "video_id": video_id, "clip_id": f"{video_id}_c00000{index + 1}", "clip_start_seconds": float(index * 10), "clip_end_seconds": float((index + 1) * 10), "clip_start_timecode": f"00:00:{index * 10:02d}", "clip_end_timecode": f"00:00:{(index + 1) * 10:02d}", "frame_times": [ { "frame_path": f"frames/{video_id}/00000{index + 1}.jpg", "offset_seconds": float(index * 10), "timecode": f"00:00:{index * 10:02d}", } ], "status": "pending", "retry_count": 0, "last_error": None, } for index in range(2) ] for clip in clip_records: frame_path = output_dir / clip["frame_times"][0]["frame_path"] frame_path.parent.mkdir(parents=True, exist_ok=True) frame_path.write_bytes(b"jpg") (output_dir / "video_manifest.jsonl").write_text( json.dumps(video_record, sort_keys=True) + "\n", encoding="utf-8", ) (output_dir / "clip_manifest.jsonl").write_text( "".join(json.dumps(record, sort_keys=True) + "\n" for record in clip_records), encoding="utf-8", ) config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", " resume: true", "ffprobe:", " timeout_seconds: 3", "vlm:", " api_base_url: http://localhost:8679", " chat_completions_path: /v1/chat/completions", " model: memai-zhengxin-v3-20260413", "prompt:", " system: system from config", " user: user from config", "schema:", " version: local-batch-v1", "runtime:", " timezone: Asia/Shanghai", ] ), encoding="utf-8", ) with patch("video_ai_analysis_poc.cli.infer_clip") as infer_clip: infer_clip.return_value = { "raw_response": '{"screen_time":"10:00:01","events":[]}', "latency_ms": 12, "http_status": 200, } exit_code = main( [ "--config", str(config_path), "--until", "inference", "--limit-clips", "1", ] ) self.assertEqual(exit_code, 0) self.assertEqual(infer_clip.call_count, 1) result_records = [ json.loads(line) for line in (output_dir / "clip_results.jsonl") .read_text(encoding="utf-8") .splitlines() ] self.assertEqual(len(result_records), 1) self.assertEqual(result_records[0]["status"], "ok") self.assertEqual(result_records[0]["monitoring_timeline"]["frame_times"], clip_records[0]["frame_times"]) self.assertEqual(result_records[0]["monitoring_timeline"]["screen_time"], "10:00:01") self.assertEqual(result_records[0]["events"], []) self.assertFalse((output_dir / "video_result.json").exists()) self.assertFalse((output_dir / "folder_summary.json").exists()) def test_until_inference_flushes_clip_results_after_each_clip(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() output_dir.mkdir() video_path = input_dir / "ready.mp4" video_path.write_text("not a real video", encoding="utf-8") video_id = stable_video_id(video_path) video_record = { "path": str(video_path), "video_id": video_id, "status": "probed", "retry_count": 0, "last_error": None, "duration_seconds": 20.0, "codec_name": "h264", } clip_records = [ { "video_id": video_id, "clip_id": f"{video_id}_c00000{index + 1}", "clip_start_seconds": float(index * 10), "clip_end_seconds": float((index + 1) * 10), "clip_start_timecode": f"00:00:{index * 10:02d}", "clip_end_timecode": f"00:00:{(index + 1) * 10:02d}", "frame_times": [], "status": "pending", "retry_count": 0, "last_error": None, } for index in range(2) ] (output_dir / "video_manifest.jsonl").write_text( json.dumps(video_record, sort_keys=True) + "\n", encoding="utf-8", ) (output_dir / "clip_manifest.jsonl").write_text( "".join(json.dumps(record, sort_keys=True) + "\n" for record in clip_records), encoding="utf-8", ) config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", "output:", f" dir: {output_dir}", " resume: true", "schema:", " version: local-batch-v1", ] ), encoding="utf-8", ) flush_observations = [] def fake_infer_clip(clip_record, *args, **kwargs): if clip_record["clip_id"] == clip_records[1]["clip_id"]: results_path = output_dir / "clip_results.jsonl" flushed = [] if results_path.exists(): flushed = [ json.loads(line) for line in results_path.read_text(encoding="utf-8").splitlines() ] flush_observations.append( { "exists": results_path.exists(), "count": len(flushed), "first_clip_id": flushed[0]["clip_id"] if flushed else None, "first_status": flushed[0]["status"] if flushed else None, } ) return { "raw_response": '{"screen_time":"10:00:01","events":[]}', "latency_ms": 12, "http_status": 200, } with patch("video_ai_analysis_poc.cli.infer_clip", side_effect=fake_infer_clip): exit_code = main(["--config", str(config_path), "--until", "inference"]) self.assertEqual(exit_code, 0) self.assertEqual( flush_observations, [ { "exists": True, "count": 1, "first_clip_id": clip_records[0]["clip_id"], "first_status": "ok", } ], ) result_records = [ json.loads(line) for line in (output_dir / "clip_results.jsonl") .read_text(encoding="utf-8") .splitlines() ] self.assertEqual(len(result_records), 2) def test_until_inference_preserves_existing_ok_result_and_records_parse_failure(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() output_dir.mkdir() video_path = input_dir / "ready.mp4" video_path.write_text("not a real video", encoding="utf-8") video_id = stable_video_id(video_path) ok_clip_id = f"{video_id}_c000001" retry_clip_id = f"{video_id}_c000002" video_record = { "path": str(video_path), "video_id": video_id, "status": "probed", "retry_count": 0, "last_error": None, } clip_records = [ { "video_id": video_id, "clip_id": ok_clip_id, "clip_start_seconds": 0.0, "clip_end_seconds": 10.0, "clip_start_timecode": "00:00:00", "clip_end_timecode": "00:00:10", "frame_times": [], "status": "pending", }, { "video_id": video_id, "clip_id": retry_clip_id, "clip_start_seconds": 10.0, "clip_end_seconds": 20.0, "clip_start_timecode": "00:00:10", "clip_end_timecode": "00:00:20", "frame_times": [], "status": "pending", }, ] existing_ok = { "schema_version": "local-batch-v1", "video_id": video_id, "video_path": str(video_path), "clip_id": ok_clip_id, "status": "ok", "monitoring_timeline": {"frame_times": [], "screen_time": ""}, "events": [], "raw_response": "{}", "processing": {}, "error": None, } (output_dir / "video_manifest.jsonl").write_text( json.dumps(video_record, sort_keys=True) + "\n", encoding="utf-8", ) (output_dir / "clip_manifest.jsonl").write_text( "".join(json.dumps(record, sort_keys=True) + "\n" for record in clip_records), encoding="utf-8", ) (output_dir / "clip_results.jsonl").write_text( json.dumps(existing_ok, sort_keys=True) + "\n", encoding="utf-8", ) config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", " resume: true", "ffprobe:", " timeout_seconds: 3", ] ), encoding="utf-8", ) with patch("video_ai_analysis_poc.cli.infer_clip") as infer_clip: infer_clip.return_value = { "raw_response": "not-json", "latency_ms": 12, "http_status": 200, } exit_code = main(["--config", str(config_path), "--until", "inference"]) self.assertEqual(exit_code, 0) self.assertEqual(infer_clip.call_count, 2) result_records = [ json.loads(line) for line in (output_dir / "clip_results.jsonl") .read_text(encoding="utf-8") .splitlines() ] self.assertEqual(result_records[0], existing_ok) self.assertEqual(result_records[1]["clip_id"], retry_clip_id) self.assertEqual(result_records[1]["status"], "parse_failed") self.assertIn("JSON", result_records[1]["error"]) def test_until_inference_retries_once_after_parse_failure(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() output_dir.mkdir() video_path = input_dir / "ready.mp4" video_path.write_text("not a real video", encoding="utf-8") video_id = stable_video_id(video_path) clip_id = f"{video_id}_c000001" (output_dir / "video_manifest.jsonl").write_text( json.dumps( { "path": str(video_path), "video_id": video_id, "status": "probed", "retry_count": 0, "last_error": None, }, sort_keys=True, ) + "\n", encoding="utf-8", ) (output_dir / "clip_manifest.jsonl").write_text( json.dumps( { "video_id": video_id, "clip_id": clip_id, "clip_start_seconds": 0.0, "clip_end_seconds": 10.0, "clip_start_timecode": "00:00:00", "clip_end_timecode": "00:00:10", "frame_times": [], "status": "pending", }, sort_keys=True, ) + "\n", encoding="utf-8", ) config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", " resume: true", "schema:", " version: local-batch-v1", " parse_retry: 1", ] ), encoding="utf-8", ) with patch("video_ai_analysis_poc.cli.infer_clip") as infer_clip: infer_clip.side_effect = [ { "raw_response": "not-json", "latency_ms": 10, "http_status": 200, }, { "raw_response": '{"screen_time":"10:00:02","events":[]}', "latency_ms": 11, "http_status": 200, }, ] exit_code = main(["--config", str(config_path), "--until", "inference"]) self.assertEqual(exit_code, 0) self.assertEqual(infer_clip.call_count, 2) result_records = [ json.loads(line) for line in (output_dir / "clip_results.jsonl") .read_text(encoding="utf-8") .splitlines() ] self.assertEqual(result_records[0]["status"], "ok") self.assertEqual(result_records[0]["monitoring_timeline"]["screen_time"], "10:00:02") def test_default_full_run_writes_clip_video_and_folder_results(self): with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) input_dir = root / "input" output_dir = root / "output" input_dir.mkdir() video_path = input_dir / "full.mp4" video_path.write_text("not a real video", encoding="utf-8") video_id = stable_video_id(video_path) frame_records = [ { "video_id": video_id, "frame_id": f"{video_id}_f{index + 1:06d}", "frame_path": f"frames/{video_id}/{index + 1:06d}.jpg", "offset_seconds": float(index), "timecode": f"00:00:{index:02d}", "pts_time": float(index), "status": "sampled", } for index in range(4) ] for frame in frame_records: frame_path = output_dir / frame["frame_path"] frame_path.parent.mkdir(parents=True, exist_ok=True) frame_path.write_bytes(b"jpg") config_path = root / "local_batch.yaml" config_path.write_text( "\n".join( [ "input:", f" dir: {input_dir}", " recursive: false", ' extensions: [".mp4"]', "output:", f" dir: {output_dir}", " resume: true", "ffprobe:", " timeout_seconds: 3", "ffmpeg:", " prefer_nvdec: true", " allow_cpu_fallback: false", " codec_decoders:", " h264: h264_cuvid", "clip:", " length_seconds: 10", " stride_seconds: 10", " frames_per_clip: 4", " min_frames_per_clip: 1", "schema:", " version: local-batch-v1", " merge_gap_seconds: 30", ] ), encoding="utf-8", ) with ( patch("video_ai_analysis_poc.cli.probe_video") as probe_video, patch("video_ai_analysis_poc.cli.sample_video_frames") as sample_frames, patch("video_ai_analysis_poc.cli.infer_clip") as infer_clip, ): probe_video.return_value = { "path": str(video_path), "status": "probed", "retry_count": 0, "last_error": None, "duration_seconds": 4.0, "codec_name": "h264", } sample_frames.return_value = frame_records infer_clip.return_value = { "raw_response": ( '{"screen_time":"10:00:02",' '"events":[{"event_type":"queue_detected","confidence":0.8}]}' ), "latency_ms": 12, "http_status": 200, } exit_code = main(["--config", str(config_path)]) self.assertEqual(exit_code, 0) self.assertTrue((output_dir / "clip_results.jsonl").exists()) self.assertTrue((output_dir / "videos" / video_id / "video_result.json").exists()) self.assertTrue((output_dir / "folder_summary.json").exists()) folder_summary = json.loads( (output_dir / "folder_summary.json").read_text(encoding="utf-8") ) self.assertEqual(folder_summary["video_count"], 1) self.assertEqual(folder_summary["processed_video_count"], 1) self.assertEqual(folder_summary["failed_video_count"], 0) self.assertEqual(folder_summary["event_counts"], {"queue_detected": 1}) if __name__ == "__main__": unittest.main()