Initial video AI analysis project
This commit is contained in:
424
video_ai_analysis_poc/cli.py
Normal file
424
video_ai_analysis_poc/cli.py
Normal file
@@ -0,0 +1,424 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from .aggregator import aggregate_outputs
|
||||
from .clips import build_clip_records
|
||||
from .config import DEFAULT_CONFIG_PATH, load_config
|
||||
from .discovery import discover_videos
|
||||
from .ffmpeg_sampler import sample_video_frames
|
||||
from .hik_cloud import download_hik_cloud_recordings
|
||||
from .manifest import read_jsonl, write_manifest
|
||||
from .paths import stable_video_id
|
||||
from .probe import probe_video
|
||||
from .result_parser import build_clip_result
|
||||
from .timeline import DEFAULT_TIMEZONE, format_beijing_time, timeline_start_epoch
|
||||
from .vlm_client import infer_clip
|
||||
|
||||
|
||||
def main(argv: Sequence[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Local video batch analysis PoC entrypoint."
|
||||
)
|
||||
parser.add_argument("--config", default=str(DEFAULT_CONFIG_PATH))
|
||||
parser.add_argument("--input-dir")
|
||||
parser.add_argument("--output-dir")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--until", choices=["clips", "inference"])
|
||||
parser.add_argument("--limit-clips", type=int)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
config = load_config(
|
||||
args.config,
|
||||
input_dir=args.input_dir,
|
||||
output_dir=args.output_dir,
|
||||
)
|
||||
if args.dry_run and args.until:
|
||||
parser.error("--dry-run cannot be combined with --until")
|
||||
if args.limit_clips is not None and args.limit_clips < 0:
|
||||
parser.error("--limit-clips must be non-negative")
|
||||
|
||||
output_dir = Path(config["output"]["dir"])
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_manifest_path = output_dir / "video_manifest.jsonl"
|
||||
resume_enabled = bool(config.get("output", {}).get("resume", False))
|
||||
records = _load_resume_records(
|
||||
video_manifest_path,
|
||||
resume=resume_enabled,
|
||||
)
|
||||
record_indexes = {
|
||||
_record_key(record): index
|
||||
for index, record in enumerate(records)
|
||||
if _record_key(record) is not None
|
||||
}
|
||||
|
||||
try:
|
||||
_acquire_source_records(
|
||||
config,
|
||||
output_dir,
|
||||
records,
|
||||
record_indexes,
|
||||
download_source=not args.dry_run,
|
||||
)
|
||||
except ValueError as exc:
|
||||
parser.error(str(exc))
|
||||
|
||||
write_manifest(video_manifest_path, records)
|
||||
if args.dry_run:
|
||||
return 0
|
||||
|
||||
clip_manifest_path = output_dir / "clip_manifest.jsonl"
|
||||
existing_clip_records = read_jsonl(clip_manifest_path) if resume_enabled else []
|
||||
existing_clip_video_ids = {
|
||||
str(record.get("video_id"))
|
||||
for record in existing_clip_records
|
||||
if record.get("video_id")
|
||||
}
|
||||
|
||||
frame_manifest_path = output_dir / "frame_manifest.jsonl"
|
||||
frame_records = read_jsonl(frame_manifest_path) if resume_enabled else []
|
||||
timezone_name = str(config.get("runtime", {}).get("timezone", DEFAULT_TIMEZONE))
|
||||
backfilled_frame_video_ids = _backfill_frame_beijing_times(
|
||||
frame_records,
|
||||
records,
|
||||
timezone_name=timezone_name,
|
||||
)
|
||||
existing_sampled_video_ids = {
|
||||
str(record.get("video_id"))
|
||||
for record in frame_records
|
||||
if record.get("status") == "sampled" and record.get("video_id")
|
||||
}
|
||||
changed_frame_video_ids: set[str] = set(backfilled_frame_video_ids)
|
||||
for record in records:
|
||||
if record.get("status") != "probed":
|
||||
continue
|
||||
video_id = str(record.get("video_id"))
|
||||
if args.until == "inference" and video_id in existing_clip_video_ids:
|
||||
continue
|
||||
if video_id in existing_sampled_video_ids:
|
||||
continue
|
||||
frame_records = _without_video_records(frame_records, video_id)
|
||||
ffmpeg_config = dict(config["ffmpeg"])
|
||||
ffmpeg_config["timezone"] = timezone_name
|
||||
frame_records.extend(
|
||||
sample_video_frames(
|
||||
record,
|
||||
output_dir,
|
||||
ffmpeg_config,
|
||||
manifest_path=None,
|
||||
)
|
||||
)
|
||||
changed_frame_video_ids.add(video_id)
|
||||
write_manifest(frame_manifest_path, frame_records)
|
||||
|
||||
sampled_video_ids = {
|
||||
str(record.get("video_id"))
|
||||
for record in frame_records
|
||||
if record.get("status") == "sampled" and record.get("video_id")
|
||||
}
|
||||
clip_rebuild_video_ids = changed_frame_video_ids | (
|
||||
sampled_video_ids - existing_clip_video_ids
|
||||
)
|
||||
clip_records = [
|
||||
record
|
||||
for record in existing_clip_records
|
||||
if str(record.get("video_id")) not in clip_rebuild_video_ids
|
||||
]
|
||||
frames_to_build = [
|
||||
record
|
||||
for record in frame_records
|
||||
if str(record.get("video_id")) in clip_rebuild_video_ids
|
||||
]
|
||||
clip_records.extend(build_clip_records(frames_to_build, config["clip"]))
|
||||
write_manifest(output_dir / "clip_manifest.jsonl", clip_records)
|
||||
if args.until == "clips":
|
||||
return 0
|
||||
|
||||
_run_inference(
|
||||
clip_records,
|
||||
records,
|
||||
output_dir,
|
||||
config,
|
||||
limit_clips=args.limit_clips,
|
||||
resume=resume_enabled,
|
||||
)
|
||||
if args.until == "inference":
|
||||
return 0
|
||||
aggregate_outputs(output_dir, config)
|
||||
return 0
|
||||
|
||||
|
||||
def _load_resume_records(path: Path, *, resume: bool) -> list[dict[str, object]]:
|
||||
if not resume:
|
||||
return []
|
||||
return read_jsonl(path)
|
||||
|
||||
|
||||
def _record_key(record: dict[str, object]) -> str | None:
|
||||
video_id = record.get("video_id")
|
||||
if video_id:
|
||||
return str(video_id)
|
||||
path = record.get("path")
|
||||
if path:
|
||||
return stable_video_id(str(path))
|
||||
return None
|
||||
|
||||
|
||||
def _acquire_source_records(
|
||||
config: dict[str, object],
|
||||
output_dir: Path,
|
||||
records: list[dict[str, object]],
|
||||
record_indexes: dict[str, int],
|
||||
*,
|
||||
download_source: bool = True,
|
||||
) -> None:
|
||||
for source_record in _source_video_records(
|
||||
config,
|
||||
output_dir,
|
||||
download_source=download_source,
|
||||
):
|
||||
path = source_record.get("path")
|
||||
if not path:
|
||||
continue
|
||||
video_id = stable_video_id(str(path))
|
||||
existing_index = record_indexes.get(video_id)
|
||||
if (
|
||||
existing_index is not None
|
||||
and records[existing_index].get("status") == "probed"
|
||||
):
|
||||
continue
|
||||
|
||||
probe_record = probe_video(
|
||||
str(path),
|
||||
timeout_seconds=config["ffprobe"]["timeout_seconds"],
|
||||
)
|
||||
record = {**source_record, **probe_record, "video_id": video_id}
|
||||
if existing_index is None:
|
||||
record_indexes[video_id] = len(records)
|
||||
records.append(record)
|
||||
else:
|
||||
records[existing_index] = record
|
||||
|
||||
|
||||
def _source_video_records(
|
||||
config: dict[str, object],
|
||||
output_dir: Path,
|
||||
*,
|
||||
download_source: bool = True,
|
||||
) -> list[dict[str, object]]:
|
||||
source_config = config.get("source", {})
|
||||
source_mode = "local"
|
||||
if isinstance(source_config, dict):
|
||||
source_mode = str(source_config.get("mode", "local"))
|
||||
|
||||
if source_mode == "local":
|
||||
videos = discover_videos(
|
||||
config["input"]["dir"],
|
||||
config["input"]["extensions"],
|
||||
recursive=config["input"]["recursive"],
|
||||
)
|
||||
return [{"path": path} for path in videos]
|
||||
|
||||
if source_mode == "hik_cloud":
|
||||
return [
|
||||
record
|
||||
for record in download_hik_cloud_recordings(
|
||||
config,
|
||||
output_dir,
|
||||
download=download_source,
|
||||
)
|
||||
if record.get("status") == "downloaded"
|
||||
]
|
||||
|
||||
raise ValueError(f"unsupported source.mode: {source_mode}")
|
||||
|
||||
|
||||
def _without_video_records(
|
||||
records: list[dict[str, object]],
|
||||
video_id: str,
|
||||
) -> list[dict[str, object]]:
|
||||
return [record for record in records if str(record.get("video_id")) != video_id]
|
||||
|
||||
|
||||
def _backfill_frame_beijing_times(
|
||||
frame_records: list[dict[str, object]],
|
||||
video_records: list[dict[str, object]],
|
||||
*,
|
||||
timezone_name: str,
|
||||
) -> set[str]:
|
||||
video_by_id = {
|
||||
str(record.get("video_id")): record
|
||||
for record in video_records
|
||||
if record.get("video_id")
|
||||
}
|
||||
changed_video_ids: set[str] = set()
|
||||
for frame_record in frame_records:
|
||||
if frame_record.get("status") != "sampled" or frame_record.get("beijing_time"):
|
||||
continue
|
||||
video_id = str(frame_record.get("video_id") or "")
|
||||
start_epoch = timeline_start_epoch(video_by_id.get(video_id, {}))
|
||||
beijing_time = format_beijing_time(
|
||||
start_epoch,
|
||||
offset_seconds=float(frame_record.get("offset_seconds") or 0),
|
||||
timezone_name=timezone_name,
|
||||
)
|
||||
if beijing_time is None:
|
||||
continue
|
||||
frame_record["beijing_time"] = beijing_time
|
||||
changed_video_ids.add(video_id)
|
||||
return changed_video_ids
|
||||
|
||||
|
||||
def _run_inference(
|
||||
clip_records: list[dict[str, object]],
|
||||
video_records: list[dict[str, object]],
|
||||
output_dir: Path,
|
||||
config: dict[str, object],
|
||||
*,
|
||||
limit_clips: int | None,
|
||||
resume: bool,
|
||||
) -> None:
|
||||
results_path = output_dir / "clip_results.jsonl"
|
||||
result_records = read_jsonl(results_path) if resume else []
|
||||
clip_by_id = {
|
||||
str(record.get("clip_id")): record
|
||||
for record in clip_records
|
||||
if record.get("clip_id")
|
||||
}
|
||||
result_records = [
|
||||
_refresh_result_timeline(record, clip_by_id, config)
|
||||
for record in result_records
|
||||
]
|
||||
ok_clip_ids = {
|
||||
str(record.get("clip_id"))
|
||||
for record in result_records
|
||||
if record.get("status") == "ok" and record.get("clip_id")
|
||||
}
|
||||
video_by_id = {
|
||||
str(record.get("video_id")): record
|
||||
for record in video_records
|
||||
if record.get("video_id")
|
||||
}
|
||||
processed = 0
|
||||
for clip_record in clip_records:
|
||||
clip_id = str(clip_record.get("clip_id"))
|
||||
if clip_id in ok_clip_ids:
|
||||
continue
|
||||
if limit_clips is not None and processed >= limit_clips:
|
||||
break
|
||||
|
||||
result_records = [
|
||||
record for record in result_records if str(record.get("clip_id")) != clip_id
|
||||
]
|
||||
video_record = video_by_id.get(str(clip_record.get("video_id")), {})
|
||||
result = _infer_and_parse_clip(clip_record, video_record, output_dir, config)
|
||||
result_records.append(result)
|
||||
_write_jsonl_exact(results_path, result_records)
|
||||
processed += 1
|
||||
|
||||
_write_jsonl_exact(results_path, result_records)
|
||||
|
||||
|
||||
def _refresh_result_timeline(
|
||||
result_record: dict[str, object],
|
||||
clip_by_id: dict[str, dict[str, object]],
|
||||
config: dict[str, object],
|
||||
) -> dict[str, object]:
|
||||
clip_record = clip_by_id.get(str(result_record.get("clip_id")))
|
||||
if not clip_record:
|
||||
return result_record
|
||||
if not _clip_has_beijing_timing(clip_record):
|
||||
return result_record
|
||||
timeline = dict(result_record.get("monitoring_timeline") or {})
|
||||
timeline.update(
|
||||
{
|
||||
"timezone": config.get("runtime", {}).get("timezone", DEFAULT_TIMEZONE),
|
||||
"clip_start_seconds": clip_record.get("clip_start_seconds"),
|
||||
"clip_end_seconds": clip_record.get("clip_end_seconds"),
|
||||
"clip_start_timecode": clip_record.get("clip_start_timecode"),
|
||||
"clip_end_timecode": clip_record.get("clip_end_timecode"),
|
||||
"clip_start_beijing_time": clip_record.get("clip_start_beijing_time"),
|
||||
"clip_end_beijing_time": clip_record.get("clip_end_beijing_time"),
|
||||
"frame_times": clip_record.get("frame_times", []),
|
||||
}
|
||||
)
|
||||
refreshed = dict(result_record)
|
||||
refreshed["monitoring_timeline"] = timeline
|
||||
return refreshed
|
||||
|
||||
|
||||
def _clip_has_beijing_timing(clip_record: dict[str, object]) -> bool:
|
||||
if clip_record.get("clip_start_beijing_time") or clip_record.get("clip_end_beijing_time"):
|
||||
return True
|
||||
for frame in clip_record.get("frame_times", []) or []:
|
||||
if isinstance(frame, dict) and frame.get("beijing_time"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _infer_and_parse_clip(
|
||||
clip_record: dict[str, object],
|
||||
video_record: dict[str, object],
|
||||
output_dir: Path,
|
||||
config: dict[str, object],
|
||||
) -> dict[str, object]:
|
||||
schema_config = config.get("schema", {})
|
||||
parse_retry = 0
|
||||
if isinstance(schema_config, dict):
|
||||
parse_retry = int(schema_config.get("parse_retry", 0))
|
||||
|
||||
attempts = parse_retry + 1
|
||||
result: dict[str, object] | None = None
|
||||
for attempt in range(attempts):
|
||||
try:
|
||||
inference = infer_clip(
|
||||
clip_record,
|
||||
output_dir,
|
||||
config["vlm"],
|
||||
config["prompt"],
|
||||
)
|
||||
except Exception as exc:
|
||||
return build_clip_result(
|
||||
"",
|
||||
clip_record,
|
||||
video_record,
|
||||
config,
|
||||
processing={},
|
||||
status="inference_failed",
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
result = build_clip_result(
|
||||
str(inference.get("raw_response", "")),
|
||||
clip_record,
|
||||
video_record,
|
||||
config,
|
||||
processing={
|
||||
"latency_ms": inference.get("latency_ms"),
|
||||
"http_status": inference.get("http_status"),
|
||||
"attempt": attempt + 1,
|
||||
},
|
||||
)
|
||||
if result.get("status") != "parse_failed":
|
||||
return result
|
||||
if result is None:
|
||||
raise RuntimeError("unreachable inference state")
|
||||
return result
|
||||
|
||||
|
||||
def _write_jsonl_exact(
|
||||
path: Path,
|
||||
records: list[dict[str, object]],
|
||||
) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
for record in records:
|
||||
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user