Initial video AI analysis project

This commit is contained in:
yangyl
2026-06-17 11:33:54 +08:00
commit ef0047af6d
35 changed files with 8613 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
"""Local video batch analysis PoC."""
__all__ = [
"config",
"discovery",
"manifest",
"paths",
"probe",
]

View File

@@ -0,0 +1,403 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from .manifest import read_jsonl
def aggregate_outputs(
output_dir: str | Path,
config: dict[str, Any],
) -> dict[str, Any]:
root = Path(output_dir).expanduser().resolve(strict=False)
started_at = _now_iso()
video_records = read_jsonl(root / "video_manifest.jsonl")
clip_records = read_jsonl(root / "clip_manifest.jsonl")
clip_results = read_jsonl(root / "clip_results.jsonl")
schema_version = str(config.get("schema", {}).get("version", "local-batch-v1"))
merge_gap_seconds = float(config.get("schema", {}).get("merge_gap_seconds", 30))
clips_by_video = _group_by_video(clip_records)
results_by_video = _group_by_video(clip_results)
videos_summary = []
folder_event_counts: dict[str, int] = {}
processed_video_count = 0
failed_video_count = 0
for video_record in video_records:
video_id = str(video_record.get("video_id") or "")
if not video_id:
continue
video_clips = clips_by_video.get(video_id, [])
video_results = results_by_video.get(video_id, [])
video_result = _build_video_result(
video_record,
video_clips,
video_results,
schema_version=schema_version,
merge_gap_seconds=merge_gap_seconds,
started_at=started_at,
)
result_path = root / "videos" / video_id / "video_result.json"
_write_json(result_path, video_result)
failed_clip_count = int(video_result["failed_clip_count"])
video_failed = video_record.get("status") != "probed" or failed_clip_count > 0
if video_failed:
failed_video_count += 1
else:
processed_video_count += 1
for event_type, count in video_result["event_counts"].items():
folder_event_counts[event_type] = folder_event_counts.get(event_type, 0) + int(count)
videos_summary.append(
{
"video_id": video_id,
"video_path": video_result["video_path"],
"status": "failed" if video_failed else "processed",
"clip_count": video_result["clip_count"],
"failed_clip_count": failed_clip_count,
"failed_clip_counts": video_result["failed_clip_counts"],
"event_counts": video_result["event_counts"],
"outputs": {"video_result_json": f"videos/{video_id}/video_result.json"},
"error": video_record.get("last_error"),
}
)
folder_summary = {
"schema_version": schema_version,
"input_dir": str(config.get("input", {}).get("dir")),
"video_count": len(video_records),
"processed_video_count": processed_video_count,
"failed_video_count": failed_video_count,
"event_counts": dict(sorted(folder_event_counts.items())),
"videos": videos_summary,
"processing": {
"started_at": started_at,
"finished_at": _now_iso(),
},
}
_write_json(root / "folder_summary.json", folder_summary)
return folder_summary
def _build_video_result(
video_record: dict[str, Any],
clip_records: list[dict[str, Any]],
clip_results: list[dict[str, Any]],
*,
schema_version: str,
merge_gap_seconds: float,
started_at: str,
) -> dict[str, Any]:
video_id = str(video_record.get("video_id"))
failed_clip_counts = _failed_clip_counts(clip_results)
merged_events = _merge_events(_event_records(clip_results), merge_gap_seconds)
event_counts = _event_counts(merged_events)
video_duration = _first_present(
video_record,
("duration_seconds", "video_duration_seconds", "duration"),
)
video_start_time = _video_start_time(video_record, clip_results)
return {
"schema_version": schema_version,
"video_id": video_id,
"video_path": _video_path(video_record, clip_results),
"probe": _probe(video_record),
"monitoring_timeline": {
"video_start_time": video_start_time,
"video_duration_seconds": video_duration,
},
"clip_count": len(clip_records),
"failed_clip_count": sum(failed_clip_counts.values()),
"failed_clip_counts": failed_clip_counts,
"event_counts": event_counts,
"events": merged_events,
"outputs": {"clip_results_jsonl": "clip_results.jsonl"},
"processing": {
"started_at": started_at,
"finished_at": _now_iso(),
},
}
def _event_records(clip_results: list[dict[str, Any]]) -> list[dict[str, Any]]:
records = []
for result in clip_results:
if result.get("status") != "ok":
continue
timeline = result.get("monitoring_timeline") or {}
if not isinstance(timeline, dict):
timeline = {}
for event in result.get("events") or []:
if not isinstance(event, dict):
continue
event_record = _normalize_event(event, result, timeline)
records.append(event_record)
return sorted(
records,
key=lambda event: (
str(event.get("video_id")),
str(event.get("event_type")),
float(event.get("start_offset_seconds") or 0),
float(event.get("end_offset_seconds") or 0),
),
)
def _normalize_event(
event: dict[str, Any],
result: dict[str, Any],
timeline: dict[str, Any],
) -> dict[str, Any]:
clip_id = str(result.get("clip_id"))
frame_times = [
dict(frame)
for frame in timeline.get("frame_times", [])
if isinstance(frame, dict)
]
frame_paths = [
str(frame.get("frame_path"))
for frame in frame_times
if frame.get("frame_path") is not None
]
start = event.get("start_offset_seconds", timeline.get("clip_start_seconds"))
end = event.get("end_offset_seconds", timeline.get("clip_end_seconds"))
screen_time = str(timeline.get("screen_time") or "")
normalized = {
"video_id": str(result.get("video_id")),
"event_type": str(event.get("event_type") or "unknown"),
"start_time": event.get("start_time"),
"end_time": event.get("end_time"),
"start_offset_seconds": _float_or_none(start),
"end_offset_seconds": _float_or_none(end),
"confidence": event.get("confidence"),
"severity": event.get("severity"),
"attributes": event.get("attributes") if isinstance(event.get("attributes"), dict) else {},
"screen_times": [screen_time] if screen_time else [],
"evidence": {
"clip_ids": [clip_id],
"frame_paths": frame_paths,
"frame_times": frame_times,
"clips": [
{
"clip_id": clip_id,
"clip_start_seconds": timeline.get("clip_start_seconds"),
"clip_end_seconds": timeline.get("clip_end_seconds"),
"clip_start_timecode": timeline.get("clip_start_timecode"),
"clip_end_timecode": timeline.get("clip_end_timecode"),
"clip_start_beijing_time": timeline.get("clip_start_beijing_time"),
"clip_end_beijing_time": timeline.get("clip_end_beijing_time"),
"screen_time": screen_time,
}
],
},
"source_event_count": 1,
}
original_evidence = event.get("evidence")
if isinstance(original_evidence, dict):
original_clip_id = original_evidence.get("clip_id")
if original_clip_id:
normalized["evidence"]["clip_ids"] = _unique(
[*normalized["evidence"]["clip_ids"], str(original_clip_id)]
)
original_frame_paths = original_evidence.get("frame_paths")
if isinstance(original_frame_paths, list):
normalized["evidence"]["frame_paths"] = _unique(
[*normalized["evidence"]["frame_paths"], *map(str, original_frame_paths)]
)
return normalized
def _merge_events(
events: list[dict[str, Any]],
merge_gap_seconds: float,
) -> list[dict[str, Any]]:
merged: list[dict[str, Any]] = []
for event in events:
if not merged or not _can_merge(merged[-1], event, merge_gap_seconds):
merged.append(_copy_event(event))
continue
_merge_into(merged[-1], event)
for event in merged:
event.pop("video_id", None)
return merged
def _can_merge(
previous: dict[str, Any],
current: dict[str, Any],
merge_gap_seconds: float,
) -> bool:
if previous.get("video_id") != current.get("video_id"):
return False
if previous.get("event_type") != current.get("event_type"):
return False
previous_end = _float_or_none(previous.get("end_offset_seconds"))
current_start = _float_or_none(current.get("start_offset_seconds"))
if previous_end is None or current_start is None:
return False
return current_start - previous_end <= merge_gap_seconds
def _merge_into(target: dict[str, Any], event: dict[str, Any]) -> None:
target["start_offset_seconds"] = _min_number(
target.get("start_offset_seconds"),
event.get("start_offset_seconds"),
)
target["end_offset_seconds"] = _max_number(
target.get("end_offset_seconds"),
event.get("end_offset_seconds"),
)
target["screen_times"] = _unique(
[*target.get("screen_times", []), *event.get("screen_times", [])]
)
target["source_event_count"] = int(target.get("source_event_count", 1)) + int(
event.get("source_event_count", 1)
)
target["evidence"]["clip_ids"] = _unique(
[*target["evidence"].get("clip_ids", []), *event["evidence"].get("clip_ids", [])]
)
target["evidence"]["frame_paths"] = _unique(
[
*target["evidence"].get("frame_paths", []),
*event["evidence"].get("frame_paths", []),
]
)
target["evidence"]["frame_times"].extend(event["evidence"].get("frame_times", []))
target["evidence"]["clips"].extend(event["evidence"].get("clips", []))
if target.get("confidence") is None:
target["confidence"] = event.get("confidence")
elif event.get("confidence") is not None:
target["confidence"] = max(float(target["confidence"]), float(event["confidence"]))
def _copy_event(event: dict[str, Any]) -> dict[str, Any]:
copied = dict(event)
copied["screen_times"] = list(event.get("screen_times", []))
copied["attributes"] = dict(event.get("attributes", {}))
copied["evidence"] = {
"clip_ids": list(event["evidence"].get("clip_ids", [])),
"frame_paths": list(event["evidence"].get("frame_paths", [])),
"frame_times": [dict(frame) for frame in event["evidence"].get("frame_times", [])],
"clips": [dict(clip) for clip in event["evidence"].get("clips", [])],
}
return copied
def _group_by_video(records: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
grouped: dict[str, list[dict[str, Any]]] = {}
for record in records:
video_id = record.get("video_id")
if video_id:
grouped.setdefault(str(video_id), []).append(record)
return grouped
def _failed_clip_counts(clip_results: list[dict[str, Any]]) -> dict[str, int]:
counts = {"parse_failed": 0, "inference_failed": 0}
for result in clip_results:
status = result.get("status")
if status in counts:
counts[str(status)] += 1
return counts
def _event_counts(events: list[dict[str, Any]]) -> dict[str, int]:
counts: dict[str, int] = {}
for event in events:
event_type = str(event.get("event_type") or "unknown")
counts[event_type] = counts.get(event_type, 0) + 1
return dict(sorted(counts.items()))
def _probe(video_record: dict[str, Any]) -> dict[str, Any]:
excluded = {"video_id", "path", "source_path", "status", "retry_count", "last_error"}
probe = {
key: value
for key, value in video_record.items()
if key not in excluded
}
probe["status"] = video_record.get("status")
if video_record.get("last_error") is not None:
probe["last_error"] = video_record.get("last_error")
return probe
def _video_path(
video_record: dict[str, Any],
clip_results: list[dict[str, Any]],
) -> str | None:
path = video_record.get("path") or video_record.get("source_path")
if path is not None:
return str(path)
for result in clip_results:
if result.get("video_path") is not None:
return str(result["video_path"])
return None
def _video_start_time(
video_record: dict[str, Any],
clip_results: list[dict[str, Any]],
) -> Any:
if video_record.get("video_start_time") is not None:
return video_record.get("video_start_time")
for result in clip_results:
timeline = result.get("monitoring_timeline")
if isinstance(timeline, dict) and timeline.get("video_start_time") is not None:
return timeline.get("video_start_time")
return None
def _first_present(record: dict[str, Any], keys: tuple[str, ...]) -> Any:
for key in keys:
if record.get(key) is not None:
return record.get(key)
return None
def _float_or_none(value: Any) -> float | None:
if value is None:
return None
try:
return float(value)
except (TypeError, ValueError):
return None
def _min_number(left: Any, right: Any) -> float | None:
values = [value for value in (_float_or_none(left), _float_or_none(right)) if value is not None]
return min(values) if values else None
def _max_number(left: Any, right: Any) -> float | None:
values = [value for value in (_float_or_none(left), _float_or_none(right)) if value is not None]
return max(values) if values else None
def _unique(values: list[Any]) -> list[Any]:
seen = set()
unique_values = []
for value in values:
marker = json.dumps(value, sort_keys=True) if isinstance(value, dict) else value
if marker in seen:
continue
seen.add(marker)
unique_values.append(value)
return unique_values
def _write_json(path: Path, payload: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n",
encoding="utf-8",
)
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()

View File

@@ -0,0 +1,424 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Sequence
from .aggregator import aggregate_outputs
from .clips import build_clip_records
from .config import DEFAULT_CONFIG_PATH, load_config
from .discovery import discover_videos
from .ffmpeg_sampler import sample_video_frames
from .hik_cloud import download_hik_cloud_recordings
from .manifest import read_jsonl, write_manifest
from .paths import stable_video_id
from .probe import probe_video
from .result_parser import build_clip_result
from .timeline import DEFAULT_TIMEZONE, format_beijing_time, timeline_start_epoch
from .vlm_client import infer_clip
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="Local video batch analysis PoC entrypoint."
)
parser.add_argument("--config", default=str(DEFAULT_CONFIG_PATH))
parser.add_argument("--input-dir")
parser.add_argument("--output-dir")
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--until", choices=["clips", "inference"])
parser.add_argument("--limit-clips", type=int)
args = parser.parse_args(argv)
config = load_config(
args.config,
input_dir=args.input_dir,
output_dir=args.output_dir,
)
if args.dry_run and args.until:
parser.error("--dry-run cannot be combined with --until")
if args.limit_clips is not None and args.limit_clips < 0:
parser.error("--limit-clips must be non-negative")
output_dir = Path(config["output"]["dir"])
output_dir.mkdir(parents=True, exist_ok=True)
video_manifest_path = output_dir / "video_manifest.jsonl"
resume_enabled = bool(config.get("output", {}).get("resume", False))
records = _load_resume_records(
video_manifest_path,
resume=resume_enabled,
)
record_indexes = {
_record_key(record): index
for index, record in enumerate(records)
if _record_key(record) is not None
}
try:
_acquire_source_records(
config,
output_dir,
records,
record_indexes,
download_source=not args.dry_run,
)
except ValueError as exc:
parser.error(str(exc))
write_manifest(video_manifest_path, records)
if args.dry_run:
return 0
clip_manifest_path = output_dir / "clip_manifest.jsonl"
existing_clip_records = read_jsonl(clip_manifest_path) if resume_enabled else []
existing_clip_video_ids = {
str(record.get("video_id"))
for record in existing_clip_records
if record.get("video_id")
}
frame_manifest_path = output_dir / "frame_manifest.jsonl"
frame_records = read_jsonl(frame_manifest_path) if resume_enabled else []
timezone_name = str(config.get("runtime", {}).get("timezone", DEFAULT_TIMEZONE))
backfilled_frame_video_ids = _backfill_frame_beijing_times(
frame_records,
records,
timezone_name=timezone_name,
)
existing_sampled_video_ids = {
str(record.get("video_id"))
for record in frame_records
if record.get("status") == "sampled" and record.get("video_id")
}
changed_frame_video_ids: set[str] = set(backfilled_frame_video_ids)
for record in records:
if record.get("status") != "probed":
continue
video_id = str(record.get("video_id"))
if args.until == "inference" and video_id in existing_clip_video_ids:
continue
if video_id in existing_sampled_video_ids:
continue
frame_records = _without_video_records(frame_records, video_id)
ffmpeg_config = dict(config["ffmpeg"])
ffmpeg_config["timezone"] = timezone_name
frame_records.extend(
sample_video_frames(
record,
output_dir,
ffmpeg_config,
manifest_path=None,
)
)
changed_frame_video_ids.add(video_id)
write_manifest(frame_manifest_path, frame_records)
sampled_video_ids = {
str(record.get("video_id"))
for record in frame_records
if record.get("status") == "sampled" and record.get("video_id")
}
clip_rebuild_video_ids = changed_frame_video_ids | (
sampled_video_ids - existing_clip_video_ids
)
clip_records = [
record
for record in existing_clip_records
if str(record.get("video_id")) not in clip_rebuild_video_ids
]
frames_to_build = [
record
for record in frame_records
if str(record.get("video_id")) in clip_rebuild_video_ids
]
clip_records.extend(build_clip_records(frames_to_build, config["clip"]))
write_manifest(output_dir / "clip_manifest.jsonl", clip_records)
if args.until == "clips":
return 0
_run_inference(
clip_records,
records,
output_dir,
config,
limit_clips=args.limit_clips,
resume=resume_enabled,
)
if args.until == "inference":
return 0
aggregate_outputs(output_dir, config)
return 0
def _load_resume_records(path: Path, *, resume: bool) -> list[dict[str, object]]:
if not resume:
return []
return read_jsonl(path)
def _record_key(record: dict[str, object]) -> str | None:
video_id = record.get("video_id")
if video_id:
return str(video_id)
path = record.get("path")
if path:
return stable_video_id(str(path))
return None
def _acquire_source_records(
config: dict[str, object],
output_dir: Path,
records: list[dict[str, object]],
record_indexes: dict[str, int],
*,
download_source: bool = True,
) -> None:
for source_record in _source_video_records(
config,
output_dir,
download_source=download_source,
):
path = source_record.get("path")
if not path:
continue
video_id = stable_video_id(str(path))
existing_index = record_indexes.get(video_id)
if (
existing_index is not None
and records[existing_index].get("status") == "probed"
):
continue
probe_record = probe_video(
str(path),
timeout_seconds=config["ffprobe"]["timeout_seconds"],
)
record = {**source_record, **probe_record, "video_id": video_id}
if existing_index is None:
record_indexes[video_id] = len(records)
records.append(record)
else:
records[existing_index] = record
def _source_video_records(
config: dict[str, object],
output_dir: Path,
*,
download_source: bool = True,
) -> list[dict[str, object]]:
source_config = config.get("source", {})
source_mode = "local"
if isinstance(source_config, dict):
source_mode = str(source_config.get("mode", "local"))
if source_mode == "local":
videos = discover_videos(
config["input"]["dir"],
config["input"]["extensions"],
recursive=config["input"]["recursive"],
)
return [{"path": path} for path in videos]
if source_mode == "hik_cloud":
return [
record
for record in download_hik_cloud_recordings(
config,
output_dir,
download=download_source,
)
if record.get("status") == "downloaded"
]
raise ValueError(f"unsupported source.mode: {source_mode}")
def _without_video_records(
records: list[dict[str, object]],
video_id: str,
) -> list[dict[str, object]]:
return [record for record in records if str(record.get("video_id")) != video_id]
def _backfill_frame_beijing_times(
frame_records: list[dict[str, object]],
video_records: list[dict[str, object]],
*,
timezone_name: str,
) -> set[str]:
video_by_id = {
str(record.get("video_id")): record
for record in video_records
if record.get("video_id")
}
changed_video_ids: set[str] = set()
for frame_record in frame_records:
if frame_record.get("status") != "sampled" or frame_record.get("beijing_time"):
continue
video_id = str(frame_record.get("video_id") or "")
start_epoch = timeline_start_epoch(video_by_id.get(video_id, {}))
beijing_time = format_beijing_time(
start_epoch,
offset_seconds=float(frame_record.get("offset_seconds") or 0),
timezone_name=timezone_name,
)
if beijing_time is None:
continue
frame_record["beijing_time"] = beijing_time
changed_video_ids.add(video_id)
return changed_video_ids
def _run_inference(
clip_records: list[dict[str, object]],
video_records: list[dict[str, object]],
output_dir: Path,
config: dict[str, object],
*,
limit_clips: int | None,
resume: bool,
) -> None:
results_path = output_dir / "clip_results.jsonl"
result_records = read_jsonl(results_path) if resume else []
clip_by_id = {
str(record.get("clip_id")): record
for record in clip_records
if record.get("clip_id")
}
result_records = [
_refresh_result_timeline(record, clip_by_id, config)
for record in result_records
]
ok_clip_ids = {
str(record.get("clip_id"))
for record in result_records
if record.get("status") == "ok" and record.get("clip_id")
}
video_by_id = {
str(record.get("video_id")): record
for record in video_records
if record.get("video_id")
}
processed = 0
for clip_record in clip_records:
clip_id = str(clip_record.get("clip_id"))
if clip_id in ok_clip_ids:
continue
if limit_clips is not None and processed >= limit_clips:
break
result_records = [
record for record in result_records if str(record.get("clip_id")) != clip_id
]
video_record = video_by_id.get(str(clip_record.get("video_id")), {})
result = _infer_and_parse_clip(clip_record, video_record, output_dir, config)
result_records.append(result)
_write_jsonl_exact(results_path, result_records)
processed += 1
_write_jsonl_exact(results_path, result_records)
def _refresh_result_timeline(
result_record: dict[str, object],
clip_by_id: dict[str, dict[str, object]],
config: dict[str, object],
) -> dict[str, object]:
clip_record = clip_by_id.get(str(result_record.get("clip_id")))
if not clip_record:
return result_record
if not _clip_has_beijing_timing(clip_record):
return result_record
timeline = dict(result_record.get("monitoring_timeline") or {})
timeline.update(
{
"timezone": config.get("runtime", {}).get("timezone", DEFAULT_TIMEZONE),
"clip_start_seconds": clip_record.get("clip_start_seconds"),
"clip_end_seconds": clip_record.get("clip_end_seconds"),
"clip_start_timecode": clip_record.get("clip_start_timecode"),
"clip_end_timecode": clip_record.get("clip_end_timecode"),
"clip_start_beijing_time": clip_record.get("clip_start_beijing_time"),
"clip_end_beijing_time": clip_record.get("clip_end_beijing_time"),
"frame_times": clip_record.get("frame_times", []),
}
)
refreshed = dict(result_record)
refreshed["monitoring_timeline"] = timeline
return refreshed
def _clip_has_beijing_timing(clip_record: dict[str, object]) -> bool:
if clip_record.get("clip_start_beijing_time") or clip_record.get("clip_end_beijing_time"):
return True
for frame in clip_record.get("frame_times", []) or []:
if isinstance(frame, dict) and frame.get("beijing_time"):
return True
return False
def _infer_and_parse_clip(
clip_record: dict[str, object],
video_record: dict[str, object],
output_dir: Path,
config: dict[str, object],
) -> dict[str, object]:
schema_config = config.get("schema", {})
parse_retry = 0
if isinstance(schema_config, dict):
parse_retry = int(schema_config.get("parse_retry", 0))
attempts = parse_retry + 1
result: dict[str, object] | None = None
for attempt in range(attempts):
try:
inference = infer_clip(
clip_record,
output_dir,
config["vlm"],
config["prompt"],
)
except Exception as exc:
return build_clip_result(
"",
clip_record,
video_record,
config,
processing={},
status="inference_failed",
error=str(exc),
)
result = build_clip_result(
str(inference.get("raw_response", "")),
clip_record,
video_record,
config,
processing={
"latency_ms": inference.get("latency_ms"),
"http_status": inference.get("http_status"),
"attempt": attempt + 1,
},
)
if result.get("status") != "parse_failed":
return result
if result is None:
raise RuntimeError("unreachable inference state")
return result
def _write_jsonl_exact(
path: Path,
records: list[dict[str, object]],
) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8") as handle:
for record in records:
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,158 @@
from __future__ import annotations
from pathlib import Path
from typing import Any
from .frames import seconds_to_timecode
from .manifest import read_jsonl, write_manifest
from .timeline import derive_time_from_reference
def build_clip_records(
frame_records: list[dict[str, Any]],
clip_config: dict[str, Any],
) -> list[dict[str, Any]]:
sampled_frames = [
record for record in frame_records if record.get("status") == "sampled"
]
by_video: dict[str, list[dict[str, Any]]] = {}
for frame in sampled_frames:
by_video.setdefault(str(frame["video_id"]), []).append(frame)
clips = []
for video_id, frames in sorted(by_video.items()):
clips.extend(_build_video_clips(video_id, frames, clip_config))
return clips
def build_clip_records_from_manifest(
frame_manifest_path: str | Path,
clip_manifest_path: str | Path,
clip_config: dict[str, Any],
) -> list[dict[str, Any]]:
clips = build_clip_records(read_jsonl(frame_manifest_path), clip_config)
write_manifest(clip_manifest_path, clips)
return clips
def _build_video_clips(
video_id: str,
frames: list[dict[str, Any]],
clip_config: dict[str, Any],
) -> list[dict[str, Any]]:
sorted_frames = sorted(frames, key=lambda frame: float(frame["offset_seconds"]))
if not sorted_frames:
return []
length_seconds = float(clip_config.get("length_seconds", 10))
stride_seconds = float(clip_config.get("stride_seconds", length_seconds))
frames_per_clip = int(clip_config.get("frames_per_clip", 8))
min_frames_per_clip = int(clip_config.get("min_frames_per_clip", 4))
max_offset = max(float(frame["offset_seconds"]) for frame in sorted_frames)
timeline_end = _estimated_timeline_end(sorted_frames)
clips = []
clip_index = 1
start = 0.0
while start <= max_offset:
end = min(start + length_seconds, timeline_end)
in_window = [
frame
for frame in sorted_frames
if start <= float(frame["offset_seconds"]) < end
]
if len(in_window) >= min_frames_per_clip:
selected_frames = _uniform_sample(in_window, frames_per_clip)
start_beijing_time, end_beijing_time = _clip_beijing_time_range(
in_window,
start,
end,
)
clip = {
"video_id": video_id,
"clip_id": f"{video_id}_c{clip_index:06d}",
"clip_start_seconds": round(start, 6),
"clip_end_seconds": round(end, 6),
"clip_start_timecode": seconds_to_timecode(start),
"clip_end_timecode": seconds_to_timecode(end),
"frame_times": [_frame_time(frame) for frame in selected_frames],
"status": "pending",
"retry_count": 0,
"last_error": None,
}
if start_beijing_time is not None:
clip["clip_start_beijing_time"] = start_beijing_time
if end_beijing_time is not None:
clip["clip_end_beijing_time"] = end_beijing_time
clips.append(clip)
clip_index += 1
start += stride_seconds
return clips
def _estimated_timeline_end(frames: list[dict[str, Any]]) -> float:
offsets = [float(frame["offset_seconds"]) for frame in frames]
if len(offsets) < 2:
return offsets[-1]
intervals = [
current - previous
for previous, current in zip(offsets, offsets[1:])
if current > previous
]
if not intervals:
return offsets[-1]
return offsets[-1] + min(intervals)
def _uniform_sample(
frames: list[dict[str, Any]],
frames_per_clip: int,
) -> list[dict[str, Any]]:
if len(frames) <= frames_per_clip:
return frames
if frames_per_clip <= 1:
return [frames[0]]
last_index = len(frames) - 1
indexes = [
round(position * last_index / (frames_per_clip - 1))
for position in range(frames_per_clip)
]
return [frames[index] for index in indexes]
def _frame_time(frame: dict[str, Any]) -> dict[str, Any]:
record = {
"frame_id": frame.get("frame_id"),
"frame_path": frame.get("frame_path"),
"offset_seconds": frame.get("offset_seconds"),
"timecode": frame.get("timecode"),
"pts_time": frame.get("pts_time"),
}
if frame.get("beijing_time") is not None:
record["beijing_time"] = frame.get("beijing_time")
return record
def _clip_beijing_time_range(
frames: list[dict[str, Any]],
start: float,
end: float,
) -> tuple[str | None, str | None]:
for frame in frames:
reference_time = frame.get("beijing_time")
if not reference_time:
continue
reference_offset = frame.get("offset_seconds")
return (
derive_time_from_reference(
str(reference_time),
reference_offset_seconds=reference_offset,
target_offset_seconds=start,
),
derive_time_from_reference(
str(reference_time),
reference_offset_seconds=reference_offset,
target_offset_seconds=end,
),
)
return None, None

View File

@@ -0,0 +1,278 @@
from __future__ import annotations
import ast
from pathlib import Path
from typing import Any
from .paths import resolve_path, validate_output_dir
DEFAULT_CONFIG_PATH = Path(__file__).resolve().parent.parent / "config" / "local_batch.yaml"
def load_config(
config_path: str | Path = DEFAULT_CONFIG_PATH,
*,
input_dir: str | Path | None = None,
output_dir: str | Path | None = None,
) -> dict[str, Any]:
path = Path(config_path).expanduser().resolve(strict=False)
raw_config = _parse_simple_yaml(path)
config = _with_defaults(raw_config)
base_dir = path.parent.parent if path.parent.name == "config" else path.parent
if input_dir is not None:
config["input"]["dir"] = str(input_dir)
if output_dir is not None:
config["output"]["dir"] = str(output_dir)
config["input"]["dir"] = str(resolve_path(config["input"]["dir"], base_dir=base_dir))
config["output"]["dir"] = str(
resolve_path(config["output"]["dir"], base_dir=base_dir)
)
validate_output_dir(config["input"]["dir"], config["output"]["dir"])
extensions = config["input"].get("extensions", [])
config["input"]["extensions"] = _normalize_extensions(extensions)
config["input"]["recursive"] = bool(config["input"].get("recursive", True))
config.setdefault("ffprobe", {})
config["ffprobe"]["timeout_seconds"] = int(
config["ffprobe"].get("timeout_seconds", 30)
)
return config
def _with_defaults(config: dict[str, Any]) -> dict[str, Any]:
merged: dict[str, Any] = {
"input": {
"dir": "./videos",
"recursive": True,
"extensions": [".mp4", ".mov", ".mkv", ".avi", ".flv", ".ts", ".m4v"],
},
"output": {
"dir": "./outputs/local-batch",
"overwrite": False,
"resume": True,
"keep_frames": True,
},
"source": {"mode": "local"},
"hik_cloud": {
"api_base_url": "https://api2.hik-cloud.com",
"download_path": "/v1/carrier/cstorage/open/play/download",
"access_token": None,
"access_token_env": "HIK_CLOUD_ACCESS_TOKEN",
"devices": [],
"time_ranges": [],
"chunk_seconds": 600,
"timeout_seconds": 60,
"download_timeout_seconds": 600,
},
"ffprobe": {"timeout_seconds": 30},
"ffmpeg": {
"prefer_nvdec": True,
"allow_cpu_fallback": False,
"hwaccel": "cuda",
"codec_decoders": {"h264": "h264_cuvid", "hevc": "hevc_cuvid"},
"frame_fps": 1,
"frame_width": 640,
"jpeg_quality": 4,
"timeout_seconds_per_video": 3600,
},
"clip": {
"length_seconds": 10,
"stride_seconds": 10,
"frames_per_clip": 8,
"min_frames_per_clip": 4,
},
"vlm": {
"api_base_url": "http://localhost:8679",
"chat_completions_path": "/v1/chat/completions",
"model": "memai-zhengxin-v3-20260413",
"timeout_seconds": 120,
"max_tokens": 512,
"temperature": 0,
"batch_size": 1,
"image_transport": "data_uri",
"retries": 1,
},
"prompt": {
"system": "You are a store video analysis assistant. Return strict JSON only.",
"user": "Analyze this clip. Return events and screen_time. If no event, return events: [].",
},
"schema": {
"version": "local-batch-v1",
"event_types": [
"customer_enter",
"customer_leave",
"queue_detected",
"staff_absent",
"staff_present",
"area_crowded",
"abnormal_behavior",
"unknown",
],
"require_strict_json": True,
"parse_retry": 1,
"merge_gap_seconds": 30,
},
"runtime": {"timezone": "Asia/Shanghai", "log_level": "INFO"},
}
for section, values in config.items():
if isinstance(values, dict) and isinstance(merged.get(section), dict):
merged[section].update(values)
else:
merged[section] = values
return merged
def _normalize_extensions(extensions: list[str]) -> list[str]:
normalized = []
for extension in extensions:
value = str(extension).lower()
if not value.startswith("."):
value = f".{value}"
normalized.append(value)
return normalized
def _parse_simple_yaml(path: Path) -> dict[str, Any]:
if not path.exists():
raise FileNotFoundError(f"config file not found: {path}")
root: dict[str, Any] = {}
stack: list[tuple[int, dict[str, Any] | list[Any]]] = [(-1, root)]
lines = path.read_text(encoding="utf-8").splitlines()
index = 0
while index < len(lines):
raw_line = lines[index].rstrip()
stripped = raw_line.strip()
if not stripped or raw_line.lstrip().startswith("#"):
index += 1
continue
indent = len(raw_line) - len(raw_line.lstrip(" "))
while indent <= stack[-1][0]:
stack.pop()
parent = stack[-1][1]
if stripped.startswith("- "):
if not isinstance(parent, list):
raise ValueError(f"list item without list parent: {raw_line}")
item = stripped[2:].strip()
if ":" in item:
key, value = item.split(":", 1)
mapping: dict[str, Any] = {}
parent.append(mapping)
key = key.strip()
value = value.strip()
if not value:
next_stripped = _next_stripped(lines, index)
child: dict[str, Any] | list[Any]
child = [] if next_stripped and next_stripped.startswith("- ") else {}
mapping[key] = child
stack.append((indent, mapping))
stack.append((indent + 2, child))
else:
mapping[key] = _parse_scalar(value)
stack.append((indent, mapping))
else:
parent.append(_parse_scalar(item))
index += 1
continue
if not isinstance(parent, dict):
raise ValueError(f"mapping entry inside list is not supported: {raw_line}")
if ":" not in stripped:
raise ValueError(f"unsupported config line: {raw_line}")
key, value = stripped.split(":", 1)
key = key.strip()
value = value.strip()
if _is_block_scalar(value):
parent[key], index = _parse_block_scalar(lines, index, indent, value)
continue
if not value:
next_stripped = _next_stripped(lines, index)
child: dict[str, Any] | list[Any]
child = [] if next_stripped and next_stripped.startswith("- ") else {}
parent[key] = child
stack.append((indent, child))
else:
parent[key] = _parse_scalar(value)
index += 1
return root
def _next_stripped(lines: list[str], current_index: int) -> str | None:
for raw_line in lines[current_index + 1 :]:
stripped = raw_line.strip()
if stripped and not raw_line.lstrip().startswith("#"):
return stripped
return None
def _is_block_scalar(value: str) -> bool:
return value in {">", ">-", "|", "|-"}
def _parse_block_scalar(
lines: list[str],
start_index: int,
parent_indent: int,
marker: str,
) -> tuple[str, int]:
content_lines: list[str] = []
content_indent: int | None = None
index = start_index + 1
while index < len(lines):
raw_line = lines[index].rstrip()
stripped = raw_line.strip()
if not stripped:
content_lines.append("")
index += 1
continue
indent = len(raw_line) - len(raw_line.lstrip(" "))
if indent <= parent_indent:
break
if content_indent is None:
content_indent = indent
content_lines.append(raw_line[content_indent:])
index += 1
if marker.endswith("-"):
while content_lines and content_lines[-1] == "":
content_lines.pop()
return "\n".join(content_lines), index
def _parse_scalar(value: str) -> Any:
lower = value.lower()
if lower == "true":
return True
if lower == "false":
return False
if lower in {"null", "none"}:
return None
if value.startswith("[") and value.endswith("]"):
parsed = ast.literal_eval(value)
if not isinstance(parsed, list):
raise ValueError(f"expected list value: {value}")
return parsed
if (value.startswith('"') and value.endswith('"')) or (
value.startswith("'") and value.endswith("'")
):
return ast.literal_eval(value)
try:
return int(value)
except ValueError:
pass
try:
return float(value)
except ValueError:
return value

View File

@@ -0,0 +1,27 @@
from __future__ import annotations
from pathlib import Path
def discover_videos(
input_dir: str | Path,
extensions: list[str],
*,
recursive: bool,
) -> list[Path]:
root = Path(input_dir).expanduser()
if not root.exists():
raise FileNotFoundError(f"input dir not found: {root}")
if not root.is_dir():
raise NotADirectoryError(f"input path is not a directory: {root}")
allowed = {
extension.lower() if extension.startswith(".") else f".{extension.lower()}"
for extension in extensions
}
iterator = root.rglob("*") if recursive else root.iterdir()
return sorted(
path
for path in iterator
if path.is_file() and path.suffix.lower() in allowed
)

View File

@@ -0,0 +1,243 @@
from __future__ import annotations
import math
import subprocess
from pathlib import Path
from typing import Any
from .frames import build_frame_records
from .manifest import read_jsonl, write_manifest
from .timeline import DEFAULT_TIMEZONE, timeline_start_epoch
NVDEC_CODECS = {"h264", "hevc"}
def build_sample_command(
video_path: str | Path,
output_dir: str | Path,
video_id: str,
ffmpeg_config: dict[str, Any],
*,
codec_name: str | None,
max_frames: int | None = None,
max_duration_seconds: float | None = None,
) -> list[str]:
frame_dir = Path(output_dir).expanduser() / "frames" / video_id
frame_pattern = frame_dir / "%06d.jpg"
command = ["ffmpeg", "-hide_banner", "-y"]
codec = (codec_name or "").lower()
prefer_nvdec = bool(ffmpeg_config.get("prefer_nvdec", True))
allow_cpu_fallback = bool(ffmpeg_config.get("allow_cpu_fallback", False))
decoders = ffmpeg_config.get("codec_decoders", {})
decoder = decoders.get(codec) if isinstance(decoders, dict) else None
if prefer_nvdec and codec in NVDEC_CODECS and decoder:
command.extend(
[
"-hwaccel",
str(ffmpeg_config.get("hwaccel", "cuda")),
"-c:v",
str(decoder),
]
)
elif not allow_cpu_fallback:
raise ValueError(
f"NVDEC decoder is required for codec {codec_name!r}; CPU fallback is disabled"
)
frame_fps = ffmpeg_config.get("frame_fps", 1)
frame_width = ffmpeg_config.get("frame_width", 640)
jpeg_quality = ffmpeg_config.get("jpeg_quality", 4)
command.extend(
[
"-i",
str(Path(video_path).expanduser()),
]
)
if max_duration_seconds is not None and max_duration_seconds > 0:
command.extend(["-t", f"{max_duration_seconds:g}"])
command.extend(
[
"-vf",
f"fps={frame_fps},scale={frame_width}:-2",
"-q:v",
str(jpeg_quality),
]
)
if max_frames is not None and max_frames > 0:
command.extend(["-frames:v", str(max_frames)])
command.append(str(frame_pattern))
return command
def sample_video_frames(
video_record: dict[str, Any],
output_dir: str | Path,
ffmpeg_config: dict[str, Any],
*,
manifest_path: str | Path | None = None,
) -> list[dict[str, Any]]:
video_id = str(video_record["video_id"])
output_root = Path(output_dir).expanduser().resolve(strict=False)
frame_dir = output_root / "frames" / video_id
frame_dir.mkdir(parents=True, exist_ok=True)
try:
max_frames = _max_output_frames(video_record, ffmpeg_config)
timezone_name = str(ffmpeg_config.get("timezone", DEFAULT_TIMEZONE))
start_epoch = timeline_start_epoch(video_record)
command = build_sample_command(
video_record.get("path") or video_record.get("source_path"),
output_root,
video_id,
ffmpeg_config,
codec_name=video_record.get("codec_name"),
max_frames=max_frames,
max_duration_seconds=_record_duration_seconds(video_record),
)
completed = subprocess.run(
command,
capture_output=True,
text=True,
check=True,
timeout=int(ffmpeg_config.get("timeout_seconds_per_video", 3600)),
)
records = build_frame_records(
video_id,
output_root,
frame_dir.glob("*.jpg"),
frame_fps=float(ffmpeg_config.get("frame_fps", 1)),
timeline_start_epoch=start_epoch,
timezone_name=timezone_name,
)
_attach_success_evidence(
records,
command,
stderr=completed.stderr,
)
except subprocess.CalledProcessError as exc:
records = build_frame_records(
video_id,
output_root,
frame_dir.glob("*.jpg"),
frame_fps=float(ffmpeg_config.get("frame_fps", 1)),
timeline_start_epoch=start_epoch,
timezone_name=timezone_name,
)
if records and (max_frames is None or len(records) >= max_frames):
_attach_success_evidence(
records,
command,
stderr=exc.stderr,
)
else:
records = [_failure_record(video_id, exc)]
except (subprocess.TimeoutExpired, ValueError) as exc:
records = [_failure_record(video_id, exc)]
if manifest_path is not None:
_replace_video_records(Path(manifest_path), video_id, records)
return records
def _replace_video_records(
manifest_path: Path,
video_id: str,
new_records: list[dict[str, Any]],
) -> None:
existing = [
record
for record in read_jsonl(manifest_path)
if str(record.get("video_id")) != video_id
]
write_manifest(manifest_path, [*existing, *new_records])
def _failure_record(video_id: str, exc: BaseException) -> dict[str, Any]:
return {
"video_id": video_id,
"frame_id": None,
"frame_path": None,
"offset_seconds": None,
"timecode": None,
"pts_time": None,
"status": "sample_failed",
"retry_count": 0,
"last_error": _error_text(exc),
}
def _attach_success_evidence(
records: list[dict[str, Any]],
command: list[str],
*,
stderr: str | None,
) -> None:
evidence = {
"ffmpeg_command": command,
"decoder": _command_value_after(command, "-c:v"),
"hwaccel": _command_value_after(command, "-hwaccel"),
"stderr_summary": _stderr_summary(stderr),
}
for record in records:
record.update(evidence)
def _command_value_after(command: list[str], flag: str) -> str | None:
try:
index = command.index(flag)
except ValueError:
return None
if index + 1 >= len(command):
return None
return command[index + 1]
def _stderr_summary(stderr: str | None, *, limit: int = 2000) -> str:
if not stderr:
return ""
text = stderr.strip()
if len(text) <= limit:
return text
return text[:limit]
def _error_text(exc: BaseException) -> str:
if isinstance(exc, subprocess.CalledProcessError):
return str(exc.stderr or exc.stdout or exc)
if isinstance(exc, subprocess.TimeoutExpired):
return f"ffmpeg timed out after {exc.timeout}s"
return str(exc)
def _max_output_frames(
video_record: dict[str, Any],
ffmpeg_config: dict[str, Any],
) -> int | None:
frame_fps = _optional_float(ffmpeg_config.get("frame_fps", 1))
if frame_fps is None or frame_fps <= 0:
return None
duration_seconds = _record_duration_seconds(video_record)
if duration_seconds is None or duration_seconds <= 0:
return None
return max(1, math.ceil(duration_seconds * frame_fps) + 1)
def _record_duration_seconds(video_record: dict[str, Any]) -> float | None:
for begin_key, end_key in (
("actual_begin", "actual_end"),
("requested_begin", "requested_end"),
):
begin = _optional_float(video_record.get(begin_key))
end = _optional_float(video_record.get(end_key))
if begin is not None and end is not None and end > begin:
return end - begin
return _optional_float(video_record.get("duration_seconds"))
def _optional_float(value: Any) -> float | None:
if value is None or value == "":
return None
return float(value)

View File

@@ -0,0 +1,59 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Iterable
from .timeline import DEFAULT_TIMEZONE, format_beijing_time
def seconds_to_timecode(seconds: float | int | None) -> str | None:
if seconds is None:
return None
total_seconds = int(float(seconds))
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
remaining_seconds = total_seconds % 60
return f"{hours:02d}:{minutes:02d}:{remaining_seconds:02d}"
def build_frame_records(
video_id: str,
output_dir: str | Path,
frame_paths: Iterable[str | Path],
*,
frame_fps: float,
timeline_start_epoch: float | int | str | None = None,
timezone_name: str = DEFAULT_TIMEZONE,
) -> list[dict[str, Any]]:
base_dir = Path(output_dir).expanduser().resolve(strict=False)
records = []
for index, frame_path in enumerate(sorted(Path(path) for path in frame_paths), start=1):
offset_seconds = round((index - 1) / frame_fps, 6)
record = {
"video_id": video_id,
"frame_id": f"{video_id}_f{index:06d}",
"frame_path": _relative_frame_path(frame_path, base_dir),
"offset_seconds": offset_seconds,
"timecode": seconds_to_timecode(offset_seconds),
"pts_time": offset_seconds,
"status": "sampled",
"retry_count": 0,
"last_error": None,
}
beijing_time = format_beijing_time(
timeline_start_epoch,
offset_seconds=offset_seconds,
timezone_name=timezone_name,
)
if beijing_time is not None:
record["beijing_time"] = beijing_time
records.append(record)
return records
def _relative_frame_path(frame_path: Path, base_dir: Path) -> str:
resolved = frame_path.expanduser().resolve(strict=False)
try:
return resolved.relative_to(base_dir).as_posix()
except ValueError:
return resolved.as_posix()

View File

@@ -0,0 +1,450 @@
from __future__ import annotations
import json
import os
import re
from datetime import datetime
from pathlib import Path
from typing import Any
from urllib.parse import urlparse, urlunparse
import urllib.request
from zoneinfo import ZoneInfo
from .manifest import read_jsonl, write_manifest
from .paths import hik_cloud_download_path
DEFAULT_TIMEZONE = "Asia/Shanghai"
DEFAULT_CHUNK_SECONDS = 600
MAX_CHUNK_SECONDS = 3600
DEFAULT_API_BASE_URL = "https://api2.hik-cloud.com"
DEFAULT_DOWNLOAD_PATH = "/v1/carrier/cstorage/open/play/download"
DEFAULT_TIMEOUT_SECONDS = 60
DEFAULT_DOWNLOAD_TIMEOUT_SECONDS = 600
DOWNLOAD_MANIFEST_NAME = "hik_cloud_download_manifest.jsonl"
NO_RECORDING_CODE = 80438027
TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
def parse_hik_time(value: str | int | float, timezone: str = DEFAULT_TIMEZONE) -> int:
if isinstance(value, bool):
raise ValueError(f"unsupported time value: {value!r}")
if isinstance(value, int | float):
return int(value)
if isinstance(value, str):
parsed = datetime.strptime(value, TIME_FORMAT)
return int(parsed.replace(tzinfo=ZoneInfo(timezone)).timestamp())
raise ValueError(f"unsupported time value: {value!r}")
def build_download_chunks(config: dict[str, Any]) -> list[dict[str, Any]]:
hik_config = config.get("hik_cloud", {})
runtime_config = config.get("runtime", {})
timezone = runtime_config.get("timezone", DEFAULT_TIMEZONE)
chunk_seconds = int(hik_config.get("chunk_seconds", DEFAULT_CHUNK_SECONDS))
if chunk_seconds <= 0:
raise ValueError("chunk_seconds must be greater than 0")
if chunk_seconds > MAX_CHUNK_SECONDS:
raise ValueError("chunk_seconds must be less than or equal to 3600")
chunks: list[dict[str, Any]] = []
devices = hik_config.get("devices", [])
time_ranges = hik_config.get("time_ranges", [])
for device in devices:
for time_range in time_ranges:
requested_begin = parse_hik_time(time_range["begin"], timezone)
requested_end = parse_hik_time(time_range["end"], timezone)
if requested_end <= requested_begin:
raise ValueError("time range end must be after begin")
time_begin = requested_begin
while time_begin < requested_end:
time_end = min(time_begin + chunk_seconds, requested_end)
chunks.append(
{
"device_serial": device["device_serial"],
"channel_no": device["channel_no"],
"requested_begin": requested_begin,
"requested_end": requested_end,
"time_begin": time_begin,
"time_end": time_end,
}
)
time_begin = time_end
return chunks
def resolve_access_token(config_or_hik_config: dict[str, Any]) -> str:
hik_config = _hik_config(config_or_hik_config)
access_token = hik_config.get("access_token")
if access_token:
return str(access_token)
access_token_env = hik_config.get("access_token_env")
if access_token_env:
env_token = os.environ.get(str(access_token_env))
if env_token:
return env_token
raise ValueError(
"missing hik_cloud access_token; configure access_token or access_token_env"
)
def request_download_address(
chunk: dict[str, Any],
hik_config: dict[str, Any],
*,
http_post: Any | None = None,
) -> dict[str, Any]:
token = resolve_access_token(hik_config)
api_base_url = str(hik_config.get("api_base_url") or DEFAULT_API_BASE_URL)
download_path = str(hik_config.get("download_path") or DEFAULT_DOWNLOAD_PATH)
url = api_base_url.rstrip("/") + download_path
headers = {
"Authorization": f"bearer {token}",
"Content-Type": "application/json",
}
json_body = {
"deviceSerial": chunk["device_serial"],
"channelNo": chunk["channel_no"],
"timeBegin": chunk["time_begin"],
"timeEnd": chunk["time_end"],
}
timeout_seconds = int(hik_config.get("timeout_seconds", DEFAULT_TIMEOUT_SECONDS))
post = http_post or _post_json
try:
response = post(url, json_body, headers, timeout_seconds)
except Exception as exc: # pragma: no cover - exact urllib failures vary.
return {
**_chunk_metadata(chunk),
"status": "address_failed",
"code": None,
"last_error": _sanitize_error(exc, token),
}
code = _optional_int(response.get("code"))
if code == 0:
data = response.get("data") or {}
return {
**_chunk_metadata(chunk),
"status": "address_ok",
"code": code,
"url": data.get("url"),
"actual_begin": _optional_int(data.get("actualBeginTime")),
"actual_end": _optional_int(data.get("actualEndTime")),
}
status = "no_recording" if code == NO_RECORDING_CODE else "address_failed"
result = {
**_chunk_metadata(chunk),
"status": status,
"code": code,
"last_error": _api_error_message(response, token),
}
return result
def download_hik_cloud_recordings(
config: dict[str, Any],
output_dir: str | Path,
*,
address_client: Any | None = None,
download_url: Any | None = None,
download: bool = True,
) -> list[dict[str, Any]]:
output_path = Path(output_dir).expanduser().resolve(strict=False)
manifest_path = output_path / DOWNLOAD_MANIFEST_NAME
hik_config = _hik_config(config)
chunks = build_download_chunks(config)
resume = bool(config.get("output", {}).get("resume", False))
manifest_records = read_jsonl(manifest_path) if resume else []
existing_downloads = {
_manifest_key(record): record
for record in manifest_records
if _is_resumable_download(record)
}
get_address = address_client or request_download_address
fetch = download_url or _download_url
download_timeout_seconds = int(
hik_config.get("download_timeout_seconds", DEFAULT_DOWNLOAD_TIMEOUT_SECONDS)
)
token = _redaction_token(hik_config)
video_records: list[dict[str, Any]] = []
for chunk in chunks:
key = _chunk_key(chunk)
existing_record = existing_downloads.get(key)
if download and existing_record is not None:
video_records.append(_video_record_from_manifest(existing_record))
continue
address_result = get_address(chunk, hik_config)
status = address_result.get("status")
if status != "address_ok":
_upsert_manifest_record(
manifest_records,
_manifest_record(
chunk,
address_result,
status=str(status or "address_failed"),
token=token,
),
)
continue
if not download:
_upsert_manifest_record(
manifest_records,
_manifest_record(
chunk,
address_result,
status="address_ok",
token=token,
),
)
continue
url = str(address_result.get("url") or "")
target_path = hik_cloud_download_path(
output_path,
str(chunk["device_serial"]),
chunk["channel_no"],
int(chunk["time_begin"]),
int(chunk["time_end"]),
)
try:
payload = fetch(url, timeout_seconds=download_timeout_seconds)
target_path.parent.mkdir(parents=True, exist_ok=True)
target_path.write_bytes(payload)
except Exception as exc: # pragma: no cover - concrete network failures vary.
_upsert_manifest_record(
manifest_records,
_manifest_record(
chunk,
address_result,
status="download_failed",
path=target_path,
last_error=_sanitize_error(exc, token),
token=token,
),
)
continue
record = _downloaded_video_record(chunk, address_result, target_path)
video_records.append(record)
_upsert_manifest_record(
manifest_records,
_manifest_record(
chunk,
address_result,
status="downloaded",
path=target_path,
token=token,
),
)
write_manifest(manifest_path, manifest_records)
return video_records
def _post_json(
url: str,
json_body: dict[str, Any],
headers: dict[str, str],
timeout_seconds: int,
) -> dict[str, Any]:
request = urllib.request.Request(
url,
data=json.dumps(json_body).encode("utf-8"),
headers=headers,
method="POST",
)
with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
return json.loads(response.read().decode("utf-8"))
def _download_url(url: str, *, timeout_seconds: int | None = None) -> bytes:
with urllib.request.urlopen(url, timeout=timeout_seconds) as response:
return response.read()
def _hik_config(config_or_hik_config: dict[str, Any]) -> dict[str, Any]:
hik_config = config_or_hik_config.get("hik_cloud")
if isinstance(hik_config, dict):
return hik_config
return config_or_hik_config
def _chunk_metadata(chunk: dict[str, Any]) -> dict[str, Any]:
return {
"device_serial": chunk["device_serial"],
"channel_no": chunk["channel_no"],
"requested_begin": chunk.get("requested_begin"),
"requested_end": chunk.get("requested_end"),
"time_begin": chunk["time_begin"],
"time_end": chunk["time_end"],
}
def _optional_int(value: Any) -> int | None:
if value is None or value == "":
return None
return int(value)
def _api_error_message(response: dict[str, Any], token: str) -> str:
code = response.get("code")
message = response.get("msg") or response.get("message") or "hik api error"
return _sanitize_error(f"hik api code {code}: {message}", token)
def _sanitize_error(value: Any, token: str = "") -> str | None:
if value is None:
return None
message = str(value)
for raw_url in re.findall(r"https?://[^\s'\"<>]+", message):
parsed = urlparse(raw_url)
sanitized_url = urlunparse(
(parsed.scheme, parsed.netloc, parsed.path, "", "", "")
)
message = message.replace(raw_url, sanitized_url)
message = re.sub(
r"\b(?:sign|sig|token|access_token)=[^&\s'\"<>]+",
"[redacted-query]",
message,
flags=re.IGNORECASE,
)
if token:
message = message.replace(token, "[redacted]")
message = message.replace("Authorization", "[redacted-header]")
return message
def _downloaded_video_record(
chunk: dict[str, Any],
address_result: dict[str, Any],
path: Path,
) -> dict[str, Any]:
return {
"source": "hik_cloud",
"path": str(path),
"source_path": _source_path(chunk),
"device_serial": chunk["device_serial"],
"channel_no": chunk["channel_no"],
"requested_begin": chunk["time_begin"],
"requested_end": chunk["time_end"],
"actual_begin": address_result.get("actual_begin"),
"actual_end": address_result.get("actual_end"),
"status": "downloaded",
"retry_count": 0,
"last_error": None,
}
def _manifest_record(
chunk: dict[str, Any],
address_result: dict[str, Any],
*,
status: str,
token: str,
path: Path | None = None,
last_error: str | None = None,
) -> dict[str, Any]:
url = address_result.get("url")
record = {
"source": "hik_cloud",
"device_serial": chunk["device_serial"],
"channel_no": chunk["channel_no"],
"requested_begin": chunk["time_begin"],
"requested_end": chunk["time_end"],
"actual_begin": address_result.get("actual_begin"),
"actual_end": address_result.get("actual_end"),
"path": str(path) if path is not None else None,
"status": status,
"retry_count": 0,
"last_error": _sanitize_error(last_error or address_result.get("last_error"), token),
}
if url:
record["download_url_host"] = urlparse(str(url)).netloc
if "code" in address_result:
record["code"] = address_result.get("code")
if status == "downloaded":
record["source_path"] = _source_path(chunk)
return record
def _source_path(chunk: dict[str, Any]) -> str:
time_begin = chunk.get("time_begin", chunk.get("requested_begin"))
time_end = chunk.get("time_end", chunk.get("requested_end"))
return (
f"hik_cloud://{chunk['device_serial']}/ch{chunk['channel_no']}/"
f"{int(time_begin)}-{int(time_end)}"
)
def _is_resumable_download(record: dict[str, Any]) -> bool:
path = record.get("path")
return (
record.get("status") == "downloaded"
and isinstance(path, str)
and Path(path).exists()
)
def _video_record_from_manifest(record: dict[str, Any]) -> dict[str, Any]:
return {
"source": "hik_cloud",
"path": record["path"],
"source_path": record.get("source_path") or _source_path(record),
"device_serial": record["device_serial"],
"channel_no": record["channel_no"],
"requested_begin": record["requested_begin"],
"requested_end": record["requested_end"],
"actual_begin": record.get("actual_begin"),
"actual_end": record.get("actual_end"),
"status": "downloaded",
"retry_count": record.get("retry_count", 0),
"last_error": record.get("last_error"),
}
def _upsert_manifest_record(
records: list[dict[str, Any]],
new_record: dict[str, Any],
) -> None:
new_key = _manifest_key(new_record)
for index, record in enumerate(records):
if _manifest_key(record) == new_key:
records[index] = new_record
return
records.append(new_record)
def _chunk_key(chunk: dict[str, Any]) -> tuple[Any, Any, Any, Any]:
return (
chunk.get("device_serial"),
chunk.get("channel_no"),
chunk.get("time_begin"),
chunk.get("time_end"),
)
def _manifest_key(record: dict[str, Any]) -> tuple[Any, Any, Any, Any]:
return (
record.get("device_serial"),
record.get("channel_no"),
record.get("requested_begin"),
record.get("requested_end"),
)
def _redaction_token(hik_config: dict[str, Any]) -> str:
token = hik_config.get("access_token")
if token:
return str(token)
token_env = hik_config.get("access_token_env")
if token_env:
return os.environ.get(str(token_env), "")
return ""

View File

@@ -0,0 +1,35 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any, Iterable
def write_manifest(path: str | Path, records: Iterable[dict[str, Any]]) -> None:
manifest_path = Path(path).expanduser().resolve(strict=False)
manifest_path.parent.mkdir(parents=True, exist_ok=True)
with manifest_path.open("w", encoding="utf-8") as handle:
for record in records:
normalized = _normalize_record(record)
handle.write(
json.dumps(normalized, ensure_ascii=False, sort_keys=True) + "\n"
)
def read_jsonl(path: str | Path) -> list[dict[str, Any]]:
jsonl_path = Path(path).expanduser().resolve(strict=False)
if not jsonl_path.exists():
return []
records = []
for line in jsonl_path.read_text(encoding="utf-8").splitlines():
if line.strip():
records.append(json.loads(line))
return records
def _normalize_record(record: dict[str, Any]) -> dict[str, Any]:
normalized = dict(record)
normalized.setdefault("status", "pending")
normalized.setdefault("retry_count", 0)
normalized.setdefault("last_error", None)
return normalized

View File

@@ -0,0 +1,71 @@
from __future__ import annotations
import hashlib
from pathlib import Path
FORBIDDEN_REFERENCE_ROOT = Path("/Users/yoilun/AI-train/zhengxin-vlm-0413")
def resolve_path(path: str | Path, *, base_dir: Path | None = None) -> Path:
candidate = Path(path).expanduser()
if not candidate.is_absolute() and base_dir is not None:
candidate = base_dir / candidate
return candidate.resolve(strict=False)
def _is_relative_to(path: Path, parent: Path) -> bool:
try:
path.relative_to(parent)
return True
except ValueError:
return False
def validate_output_dir(
input_dir: str | Path,
output_dir: str | Path,
*,
forbidden_root: Path = FORBIDDEN_REFERENCE_ROOT,
) -> Path:
resolved_input = resolve_path(input_dir)
resolved_output = resolve_path(output_dir)
resolved_forbidden = resolve_path(forbidden_root)
if resolved_output == resolved_input:
raise ValueError("output dir must not equal input dir")
if _is_relative_to(resolved_output, resolved_forbidden):
raise ValueError(
f"output dir must not be inside forbidden reference dir: {resolved_forbidden}"
)
return resolved_output
def stable_video_id(path: str | Path) -> str:
resolved = str(resolve_path(path))
digest = hashlib.sha1(resolved.encode("utf-8")).hexdigest()[:16]
return f"video-{digest}"
def hik_cloud_download_path(
output_dir: str | Path,
device_serial: str,
channel_no: int | str,
time_begin: int,
time_end: int,
) -> Path:
safe_device = _safe_path_component(device_serial)
safe_channel = _safe_path_component(str(channel_no))
filename = f"{safe_device}_ch{safe_channel}_{int(time_begin)}_{int(time_end)}.mp4"
return (
resolve_path(output_dir)
/ "downloads"
/ "hik_cloud"
/ safe_device
/ f"ch{safe_channel}"
/ filename
)
def _safe_path_component(value: str) -> str:
return "".join(char if char.isalnum() or char in "._-" else "_" for char in value)

View File

@@ -0,0 +1,99 @@
from __future__ import annotations
import json
import subprocess
from pathlib import Path
from typing import Any
def probe_video(path: str | Path, *, timeout_seconds: int = 30) -> dict[str, Any]:
video_path = Path(path).expanduser().resolve(strict=False)
base_record: dict[str, Any] = {
"path": str(video_path),
"status": "probe_failed",
"retry_count": 0,
"last_error": None,
}
command = [
"ffprobe",
"-v",
"error",
"-print_format",
"json",
"-show_format",
"-show_streams",
str(video_path),
]
try:
completed = subprocess.run(
command,
capture_output=True,
text=True,
check=True,
timeout=timeout_seconds,
)
payload = json.loads(completed.stdout or "{}")
video_stream = _first_video_stream(payload)
format_info = payload.get("format", {})
return {
**base_record,
"status": "probed",
"duration_seconds": _optional_float(format_info.get("duration")),
"codec_name": video_stream.get("codec_name"),
"width": _optional_int(video_stream.get("width")),
"height": _optional_int(video_stream.get("height")),
"fps": _parse_frame_rate(
video_stream.get("avg_frame_rate") or video_stream.get("r_frame_rate")
),
"format_name": format_info.get("format_name"),
"start_time": _optional_float(format_info.get("start_time")),
}
except subprocess.TimeoutExpired as exc:
base_record["last_error"] = f"ffprobe timed out after {timeout_seconds}s"
if exc.stderr:
base_record["last_error"] += f": {exc.stderr}"
return base_record
except subprocess.CalledProcessError as exc:
base_record["last_error"] = _error_text(exc.stderr or exc.stdout or str(exc))
return base_record
except (json.JSONDecodeError, ValueError) as exc:
base_record["last_error"] = f"ffprobe parse failed: {exc}"
return base_record
def _first_video_stream(payload: dict[str, Any]) -> dict[str, Any]:
for stream in payload.get("streams", []):
if stream.get("codec_type") == "video":
return stream
raise ValueError("ffprobe output did not contain a video stream")
def _parse_frame_rate(value: str | None) -> float | None:
if not value or value == "0/0":
return None
if "/" in value:
numerator, denominator = value.split("/", 1)
denominator_value = float(denominator)
if denominator_value == 0:
return None
return float(numerator) / denominator_value
return float(value)
def _optional_float(value: Any) -> float | None:
if value is None or value == "":
return None
return float(value)
def _optional_int(value: Any) -> int | None:
if value is None or value == "":
return None
return int(value)
def _error_text(value: Any) -> str:
if isinstance(value, bytes):
return value.decode("utf-8", errors="replace").strip()
return str(value).strip()

View File

@@ -0,0 +1,138 @@
from __future__ import annotations
import json
from typing import Any
def extract_json_payload(raw_response: str) -> dict[str, Any]:
text = raw_response.strip()
if not text:
raise ValueError("JSON payload is empty")
try:
payload = json.loads(text)
if isinstance(payload, dict):
return payload
except json.JSONDecodeError:
pass
decoder = json.JSONDecoder()
for index, char in enumerate(text):
if char != "{":
continue
try:
payload, _ = decoder.raw_decode(text[index:])
except json.JSONDecodeError:
continue
if isinstance(payload, dict):
return payload
raise ValueError("JSON object not found in model response")
def build_clip_result(
raw_response: str,
clip_record: dict[str, Any],
video_record: dict[str, Any] | None,
config: dict[str, Any],
*,
processing: dict[str, Any] | None = None,
status: str | None = None,
error: str | None = None,
) -> dict[str, Any]:
processing_record = dict(processing or {})
if status is not None:
payload: dict[str, Any] = {}
result_status = status
result_error = error
else:
try:
payload = extract_json_payload(raw_response)
result_status = "ok"
result_error = None
except ValueError as exc:
payload = {}
result_status = "parse_failed"
result_error = str(exc)
timeline = _timeline(clip_record, config, payload)
return {
"schema_version": config.get("schema", {}).get("version", "local-batch-v1"),
"video_id": str(clip_record.get("video_id")),
"video_path": _video_path(video_record),
"clip_id": str(clip_record.get("clip_id")),
"status": result_status,
"monitoring_timeline": timeline,
"events": _events(payload, clip_record) if result_status == "ok" else [],
"raw_response": raw_response,
"processing": processing_record,
"error": result_error,
}
def _timeline(
clip_record: dict[str, Any],
config: dict[str, Any],
payload: dict[str, Any],
) -> dict[str, Any]:
return {
"timezone": config.get("runtime", {}).get("timezone", "Asia/Shanghai"),
"video_start_time": clip_record.get("video_start_time"),
"clip_start_seconds": clip_record.get("clip_start_seconds"),
"clip_end_seconds": clip_record.get("clip_end_seconds"),
"clip_start_timecode": clip_record.get("clip_start_timecode"),
"clip_end_timecode": clip_record.get("clip_end_timecode"),
"clip_start_beijing_time": clip_record.get("clip_start_beijing_time"),
"clip_end_beijing_time": clip_record.get("clip_end_beijing_time"),
"frame_times": clip_record.get("frame_times", []),
"screen_time": str(
payload.get("screen_time") or payload.get("画面时间") or payload.get("时间") or ""
),
}
def _events(
payload: dict[str, Any],
clip_record: dict[str, Any],
) -> list[dict[str, Any]]:
raw_events = payload.get("events") or []
if not isinstance(raw_events, list):
return []
return [
_event(event, clip_record)
for event in raw_events
if isinstance(event, dict)
]
def _event(
event: dict[str, Any],
clip_record: dict[str, Any],
) -> dict[str, Any]:
normalized = dict(event)
normalized.setdefault("event_type", "unknown")
normalized.setdefault("start_time", None)
normalized.setdefault("end_time", None)
normalized.setdefault("start_offset_seconds", clip_record.get("clip_start_seconds"))
normalized.setdefault("end_offset_seconds", clip_record.get("clip_end_seconds"))
normalized.setdefault("confidence", None)
normalized.setdefault("severity", None)
normalized.setdefault("attributes", {})
normalized.setdefault(
"evidence",
{
"clip_id": clip_record.get("clip_id"),
"frame_paths": [
frame.get("frame_path")
for frame in clip_record.get("frame_times", [])
if frame.get("frame_path")
],
},
)
return normalized
def _video_path(video_record: dict[str, Any] | None) -> str | None:
if not video_record:
return None
value = video_record.get("path") or video_record.get("source_path")
return str(value) if value is not None else None

View File

@@ -0,0 +1,67 @@
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from typing import Any
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
DEFAULT_TIMEZONE = "Asia/Shanghai"
def format_beijing_time(
epoch_seconds: float | int | str | None,
*,
offset_seconds: float | int = 0,
timezone_name: str = DEFAULT_TIMEZONE,
) -> str | None:
epoch = _optional_float(epoch_seconds)
if epoch is None:
return None
zone = _zone(timezone_name)
timestamp = epoch + float(offset_seconds)
return datetime.fromtimestamp(timestamp, tz=timezone.utc).astimezone(zone).strftime(
TIME_FORMAT
)
def derive_time_from_reference(
reference_time: str | None,
*,
reference_offset_seconds: float | int | None,
target_offset_seconds: float | int | None,
) -> str | None:
if not reference_time:
return None
reference_offset = _optional_float(reference_offset_seconds)
target_offset = _optional_float(target_offset_seconds)
if reference_offset is None or target_offset is None:
return None
try:
reference = datetime.strptime(reference_time, TIME_FORMAT)
except ValueError:
return None
return (reference + timedelta(seconds=target_offset - reference_offset)).strftime(
TIME_FORMAT
)
def timeline_start_epoch(record: dict[str, Any]) -> float | None:
for key in ("actual_begin", "requested_begin"):
value = _optional_float(record.get(key))
if value is not None:
return value
return None
def _zone(timezone_name: str) -> ZoneInfo:
try:
return ZoneInfo(timezone_name)
except ZoneInfoNotFoundError:
return ZoneInfo(DEFAULT_TIMEZONE)
def _optional_float(value: Any) -> float | None:
if value is None or value == "":
return None
return float(value)

View File

@@ -0,0 +1,134 @@
from __future__ import annotations
import base64
import json
import time
import urllib.request
from pathlib import Path
from typing import Any, Callable
HttpPost = Callable[[str, dict[str, Any], int], dict[str, Any]]
def infer_clip(
clip_record: dict[str, Any],
output_dir: str | Path,
vlm_config: dict[str, Any],
prompt_config: dict[str, Any],
*,
http_post: HttpPost | None = None,
) -> dict[str, Any]:
start = time.monotonic()
client = http_post or _post_json
url = build_chat_url(vlm_config)
payload = build_payload(clip_record, output_dir, vlm_config, prompt_config)
response = client(url, payload, int(vlm_config.get("timeout_seconds", 120)))
latency_ms = int((time.monotonic() - start) * 1000)
return {
"raw_response": _extract_message_content(response.get("body")),
"http_status": response.get("status"),
"latency_ms": latency_ms,
}
def build_chat_url(vlm_config: dict[str, Any]) -> str:
return (
str(vlm_config["api_base_url"]).rstrip("/")
+ str(vlm_config["chat_completions_path"])
)
def build_payload(
clip_record: dict[str, Any],
output_dir: str | Path,
vlm_config: dict[str, Any],
prompt_config: dict[str, Any],
) -> dict[str, Any]:
content: list[dict[str, Any]] = [
{"type": "text", "text": str(prompt_config.get("user", ""))}
]
for frame in clip_record.get("frame_times", []):
frame_path = frame.get("frame_path")
if not frame_path:
continue
content.append(
{
"type": "image_url",
"image_url": {
"url": _image_url(
frame_path,
output_dir,
str(vlm_config.get("image_transport", "data_uri")),
)
},
}
)
return {
"model": vlm_config.get("model"),
"messages": [
{"role": "system", "content": str(prompt_config.get("system", ""))},
{"role": "user", "content": content},
],
"temperature": vlm_config.get("temperature", 0),
"max_tokens": vlm_config.get("max_tokens", 512),
}
def _image_url(
frame_path: str | Path,
output_dir: str | Path,
image_transport: str,
) -> str:
if image_transport != "data_uri":
return str(frame_path)
path = Path(frame_path).expanduser()
if not path.is_absolute():
path = Path(output_dir).expanduser() / path
data = base64.b64encode(path.read_bytes()).decode("ascii")
return f"data:{_mime_type(path)};base64,{data}"
def _mime_type(path: Path) -> str:
suffix = path.suffix.lower()
if suffix in {".jpg", ".jpeg"}:
return "image/jpeg"
if suffix == ".png":
return "image/png"
if suffix == ".webp":
return "image/webp"
return "application/octet-stream"
def _post_json(
url: str,
payload: dict[str, Any],
timeout_seconds: int,
) -> dict[str, Any]:
body = json.dumps(payload).encode("utf-8")
request = urllib.request.Request(
url,
data=body,
headers={"Content-Type": "application/json"},
method="POST",
)
with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
response_body = response.read().decode("utf-8")
return {
"status": response.status,
"body": json.loads(response_body) if response_body else {},
}
def _extract_message_content(body: Any) -> str:
if not isinstance(body, dict):
return ""
choices = body.get("choices")
if not choices:
return ""
message = choices[0].get("message", {}) if isinstance(choices[0], dict) else {}
content = message.get("content", "")
if isinstance(content, str):
return content
return json.dumps(content, ensure_ascii=False)