from __future__ import annotations import argparse import json from pathlib import Path from typing import Sequence from .aggregator import aggregate_outputs from .clips import build_clip_records from .config import DEFAULT_CONFIG_PATH, load_config from .discovery import discover_videos from .ffmpeg_sampler import sample_video_frames from .hik_cloud import download_hik_cloud_recordings from .manifest import read_jsonl, write_manifest from .paths import stable_video_id from .probe import probe_video from .result_parser import build_clip_result from .timeline import DEFAULT_TIMEZONE, format_beijing_time, timeline_start_epoch from .vlm_client import infer_clip def main(argv: Sequence[str] | None = None) -> int: parser = argparse.ArgumentParser( description="Local video batch analysis PoC entrypoint." ) parser.add_argument("--config", default=str(DEFAULT_CONFIG_PATH)) parser.add_argument("--input-dir") parser.add_argument("--output-dir") parser.add_argument("--dry-run", action="store_true") parser.add_argument("--until", choices=["clips", "inference"]) parser.add_argument("--limit-clips", type=int) args = parser.parse_args(argv) config = load_config( args.config, input_dir=args.input_dir, output_dir=args.output_dir, ) if args.dry_run and args.until: parser.error("--dry-run cannot be combined with --until") if args.limit_clips is not None and args.limit_clips < 0: parser.error("--limit-clips must be non-negative") output_dir = Path(config["output"]["dir"]) output_dir.mkdir(parents=True, exist_ok=True) video_manifest_path = output_dir / "video_manifest.jsonl" resume_enabled = bool(config.get("output", {}).get("resume", False)) records = _load_resume_records( video_manifest_path, resume=resume_enabled, ) record_indexes = { _record_key(record): index for index, record in enumerate(records) if _record_key(record) is not None } try: _acquire_source_records( config, output_dir, records, record_indexes, download_source=not args.dry_run, ) except ValueError as exc: parser.error(str(exc)) write_manifest(video_manifest_path, records) if args.dry_run: return 0 clip_manifest_path = output_dir / "clip_manifest.jsonl" existing_clip_records = read_jsonl(clip_manifest_path) if resume_enabled else [] existing_clip_video_ids = { str(record.get("video_id")) for record in existing_clip_records if record.get("video_id") } frame_manifest_path = output_dir / "frame_manifest.jsonl" frame_records = read_jsonl(frame_manifest_path) if resume_enabled else [] timezone_name = str(config.get("runtime", {}).get("timezone", DEFAULT_TIMEZONE)) backfilled_frame_video_ids = _backfill_frame_beijing_times( frame_records, records, timezone_name=timezone_name, ) existing_sampled_video_ids = { str(record.get("video_id")) for record in frame_records if record.get("status") == "sampled" and record.get("video_id") } changed_frame_video_ids: set[str] = set(backfilled_frame_video_ids) for record in records: if record.get("status") != "probed": continue video_id = str(record.get("video_id")) if args.until == "inference" and video_id in existing_clip_video_ids: continue if video_id in existing_sampled_video_ids: continue frame_records = _without_video_records(frame_records, video_id) ffmpeg_config = dict(config["ffmpeg"]) ffmpeg_config["timezone"] = timezone_name frame_records.extend( sample_video_frames( record, output_dir, ffmpeg_config, manifest_path=None, ) ) changed_frame_video_ids.add(video_id) write_manifest(frame_manifest_path, frame_records) sampled_video_ids = { str(record.get("video_id")) for record in frame_records if record.get("status") == "sampled" and record.get("video_id") } clip_rebuild_video_ids = changed_frame_video_ids | ( sampled_video_ids - existing_clip_video_ids ) clip_records = [ record for record in existing_clip_records if str(record.get("video_id")) not in clip_rebuild_video_ids ] frames_to_build = [ record for record in frame_records if str(record.get("video_id")) in clip_rebuild_video_ids ] clip_records.extend(build_clip_records(frames_to_build, config["clip"])) write_manifest(output_dir / "clip_manifest.jsonl", clip_records) if args.until == "clips": return 0 _run_inference( clip_records, records, output_dir, config, limit_clips=args.limit_clips, resume=resume_enabled, ) if args.until == "inference": return 0 aggregate_outputs(output_dir, config) return 0 def _load_resume_records(path: Path, *, resume: bool) -> list[dict[str, object]]: if not resume: return [] return read_jsonl(path) def _record_key(record: dict[str, object]) -> str | None: video_id = record.get("video_id") if video_id: return str(video_id) path = record.get("path") if path: return stable_video_id(str(path)) return None def _acquire_source_records( config: dict[str, object], output_dir: Path, records: list[dict[str, object]], record_indexes: dict[str, int], *, download_source: bool = True, ) -> None: for source_record in _source_video_records( config, output_dir, download_source=download_source, ): path = source_record.get("path") if not path: continue video_id = stable_video_id(str(path)) existing_index = record_indexes.get(video_id) if ( existing_index is not None and records[existing_index].get("status") == "probed" ): continue probe_record = probe_video( str(path), timeout_seconds=config["ffprobe"]["timeout_seconds"], ) record = {**source_record, **probe_record, "video_id": video_id} if existing_index is None: record_indexes[video_id] = len(records) records.append(record) else: records[existing_index] = record def _source_video_records( config: dict[str, object], output_dir: Path, *, download_source: bool = True, ) -> list[dict[str, object]]: source_config = config.get("source", {}) source_mode = "local" if isinstance(source_config, dict): source_mode = str(source_config.get("mode", "local")) if source_mode == "local": videos = discover_videos( config["input"]["dir"], config["input"]["extensions"], recursive=config["input"]["recursive"], ) return [{"path": path} for path in videos] if source_mode == "hik_cloud": return [ record for record in download_hik_cloud_recordings( config, output_dir, download=download_source, ) if record.get("status") == "downloaded" ] raise ValueError(f"unsupported source.mode: {source_mode}") def _without_video_records( records: list[dict[str, object]], video_id: str, ) -> list[dict[str, object]]: return [record for record in records if str(record.get("video_id")) != video_id] def _backfill_frame_beijing_times( frame_records: list[dict[str, object]], video_records: list[dict[str, object]], *, timezone_name: str, ) -> set[str]: video_by_id = { str(record.get("video_id")): record for record in video_records if record.get("video_id") } changed_video_ids: set[str] = set() for frame_record in frame_records: if frame_record.get("status") != "sampled" or frame_record.get("beijing_time"): continue video_id = str(frame_record.get("video_id") or "") start_epoch = timeline_start_epoch(video_by_id.get(video_id, {})) beijing_time = format_beijing_time( start_epoch, offset_seconds=float(frame_record.get("offset_seconds") or 0), timezone_name=timezone_name, ) if beijing_time is None: continue frame_record["beijing_time"] = beijing_time changed_video_ids.add(video_id) return changed_video_ids def _run_inference( clip_records: list[dict[str, object]], video_records: list[dict[str, object]], output_dir: Path, config: dict[str, object], *, limit_clips: int | None, resume: bool, ) -> None: results_path = output_dir / "clip_results.jsonl" result_records = read_jsonl(results_path) if resume else [] clip_by_id = { str(record.get("clip_id")): record for record in clip_records if record.get("clip_id") } result_records = [ _refresh_result_timeline(record, clip_by_id, config) for record in result_records ] ok_clip_ids = { str(record.get("clip_id")) for record in result_records if record.get("status") == "ok" and record.get("clip_id") } video_by_id = { str(record.get("video_id")): record for record in video_records if record.get("video_id") } processed = 0 for clip_record in clip_records: clip_id = str(clip_record.get("clip_id")) if clip_id in ok_clip_ids: continue if limit_clips is not None and processed >= limit_clips: break result_records = [ record for record in result_records if str(record.get("clip_id")) != clip_id ] video_record = video_by_id.get(str(clip_record.get("video_id")), {}) result = _infer_and_parse_clip(clip_record, video_record, output_dir, config) result_records.append(result) _write_jsonl_exact(results_path, result_records) processed += 1 _write_jsonl_exact(results_path, result_records) def _refresh_result_timeline( result_record: dict[str, object], clip_by_id: dict[str, dict[str, object]], config: dict[str, object], ) -> dict[str, object]: clip_record = clip_by_id.get(str(result_record.get("clip_id"))) if not clip_record: return result_record if not _clip_has_beijing_timing(clip_record): return result_record timeline = dict(result_record.get("monitoring_timeline") or {}) timeline.update( { "timezone": config.get("runtime", {}).get("timezone", DEFAULT_TIMEZONE), "clip_start_seconds": clip_record.get("clip_start_seconds"), "clip_end_seconds": clip_record.get("clip_end_seconds"), "clip_start_timecode": clip_record.get("clip_start_timecode"), "clip_end_timecode": clip_record.get("clip_end_timecode"), "clip_start_beijing_time": clip_record.get("clip_start_beijing_time"), "clip_end_beijing_time": clip_record.get("clip_end_beijing_time"), "frame_times": clip_record.get("frame_times", []), } ) refreshed = dict(result_record) refreshed["monitoring_timeline"] = timeline return refreshed def _clip_has_beijing_timing(clip_record: dict[str, object]) -> bool: if clip_record.get("clip_start_beijing_time") or clip_record.get("clip_end_beijing_time"): return True for frame in clip_record.get("frame_times", []) or []: if isinstance(frame, dict) and frame.get("beijing_time"): return True return False def _infer_and_parse_clip( clip_record: dict[str, object], video_record: dict[str, object], output_dir: Path, config: dict[str, object], ) -> dict[str, object]: schema_config = config.get("schema", {}) parse_retry = 0 if isinstance(schema_config, dict): parse_retry = int(schema_config.get("parse_retry", 0)) attempts = parse_retry + 1 result: dict[str, object] | None = None for attempt in range(attempts): try: inference = infer_clip( clip_record, output_dir, config["vlm"], config["prompt"], ) except Exception as exc: return build_clip_result( "", clip_record, video_record, config, processing={}, status="inference_failed", error=str(exc), ) result = build_clip_result( str(inference.get("raw_response", "")), clip_record, video_record, config, processing={ "latency_ms": inference.get("latency_ms"), "http_status": inference.get("http_status"), "attempt": attempt + 1, }, ) if result.get("status") != "parse_failed": return result if result is None: raise RuntimeError("unreachable inference state") return result def _write_jsonl_exact( path: Path, records: list[dict[str, object]], ) -> None: path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", encoding="utf-8") as handle: for record in records: handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n") if __name__ == "__main__": raise SystemExit(main())