Initial video AI analysis project
This commit is contained in:
9
video_ai_analysis_poc/__init__.py
Normal file
9
video_ai_analysis_poc/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Local video batch analysis PoC."""
|
||||
|
||||
__all__ = [
|
||||
"config",
|
||||
"discovery",
|
||||
"manifest",
|
||||
"paths",
|
||||
"probe",
|
||||
]
|
||||
403
video_ai_analysis_poc/aggregator.py
Normal file
403
video_ai_analysis_poc/aggregator.py
Normal file
@@ -0,0 +1,403 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .manifest import read_jsonl
|
||||
|
||||
|
||||
def aggregate_outputs(
|
||||
output_dir: str | Path,
|
||||
config: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
root = Path(output_dir).expanduser().resolve(strict=False)
|
||||
started_at = _now_iso()
|
||||
video_records = read_jsonl(root / "video_manifest.jsonl")
|
||||
clip_records = read_jsonl(root / "clip_manifest.jsonl")
|
||||
clip_results = read_jsonl(root / "clip_results.jsonl")
|
||||
|
||||
schema_version = str(config.get("schema", {}).get("version", "local-batch-v1"))
|
||||
merge_gap_seconds = float(config.get("schema", {}).get("merge_gap_seconds", 30))
|
||||
clips_by_video = _group_by_video(clip_records)
|
||||
results_by_video = _group_by_video(clip_results)
|
||||
|
||||
videos_summary = []
|
||||
folder_event_counts: dict[str, int] = {}
|
||||
processed_video_count = 0
|
||||
failed_video_count = 0
|
||||
|
||||
for video_record in video_records:
|
||||
video_id = str(video_record.get("video_id") or "")
|
||||
if not video_id:
|
||||
continue
|
||||
video_clips = clips_by_video.get(video_id, [])
|
||||
video_results = results_by_video.get(video_id, [])
|
||||
video_result = _build_video_result(
|
||||
video_record,
|
||||
video_clips,
|
||||
video_results,
|
||||
schema_version=schema_version,
|
||||
merge_gap_seconds=merge_gap_seconds,
|
||||
started_at=started_at,
|
||||
)
|
||||
result_path = root / "videos" / video_id / "video_result.json"
|
||||
_write_json(result_path, video_result)
|
||||
|
||||
failed_clip_count = int(video_result["failed_clip_count"])
|
||||
video_failed = video_record.get("status") != "probed" or failed_clip_count > 0
|
||||
if video_failed:
|
||||
failed_video_count += 1
|
||||
else:
|
||||
processed_video_count += 1
|
||||
for event_type, count in video_result["event_counts"].items():
|
||||
folder_event_counts[event_type] = folder_event_counts.get(event_type, 0) + int(count)
|
||||
videos_summary.append(
|
||||
{
|
||||
"video_id": video_id,
|
||||
"video_path": video_result["video_path"],
|
||||
"status": "failed" if video_failed else "processed",
|
||||
"clip_count": video_result["clip_count"],
|
||||
"failed_clip_count": failed_clip_count,
|
||||
"failed_clip_counts": video_result["failed_clip_counts"],
|
||||
"event_counts": video_result["event_counts"],
|
||||
"outputs": {"video_result_json": f"videos/{video_id}/video_result.json"},
|
||||
"error": video_record.get("last_error"),
|
||||
}
|
||||
)
|
||||
|
||||
folder_summary = {
|
||||
"schema_version": schema_version,
|
||||
"input_dir": str(config.get("input", {}).get("dir")),
|
||||
"video_count": len(video_records),
|
||||
"processed_video_count": processed_video_count,
|
||||
"failed_video_count": failed_video_count,
|
||||
"event_counts": dict(sorted(folder_event_counts.items())),
|
||||
"videos": videos_summary,
|
||||
"processing": {
|
||||
"started_at": started_at,
|
||||
"finished_at": _now_iso(),
|
||||
},
|
||||
}
|
||||
_write_json(root / "folder_summary.json", folder_summary)
|
||||
return folder_summary
|
||||
|
||||
|
||||
def _build_video_result(
|
||||
video_record: dict[str, Any],
|
||||
clip_records: list[dict[str, Any]],
|
||||
clip_results: list[dict[str, Any]],
|
||||
*,
|
||||
schema_version: str,
|
||||
merge_gap_seconds: float,
|
||||
started_at: str,
|
||||
) -> dict[str, Any]:
|
||||
video_id = str(video_record.get("video_id"))
|
||||
failed_clip_counts = _failed_clip_counts(clip_results)
|
||||
merged_events = _merge_events(_event_records(clip_results), merge_gap_seconds)
|
||||
event_counts = _event_counts(merged_events)
|
||||
video_duration = _first_present(
|
||||
video_record,
|
||||
("duration_seconds", "video_duration_seconds", "duration"),
|
||||
)
|
||||
video_start_time = _video_start_time(video_record, clip_results)
|
||||
return {
|
||||
"schema_version": schema_version,
|
||||
"video_id": video_id,
|
||||
"video_path": _video_path(video_record, clip_results),
|
||||
"probe": _probe(video_record),
|
||||
"monitoring_timeline": {
|
||||
"video_start_time": video_start_time,
|
||||
"video_duration_seconds": video_duration,
|
||||
},
|
||||
"clip_count": len(clip_records),
|
||||
"failed_clip_count": sum(failed_clip_counts.values()),
|
||||
"failed_clip_counts": failed_clip_counts,
|
||||
"event_counts": event_counts,
|
||||
"events": merged_events,
|
||||
"outputs": {"clip_results_jsonl": "clip_results.jsonl"},
|
||||
"processing": {
|
||||
"started_at": started_at,
|
||||
"finished_at": _now_iso(),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _event_records(clip_results: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
records = []
|
||||
for result in clip_results:
|
||||
if result.get("status") != "ok":
|
||||
continue
|
||||
timeline = result.get("monitoring_timeline") or {}
|
||||
if not isinstance(timeline, dict):
|
||||
timeline = {}
|
||||
for event in result.get("events") or []:
|
||||
if not isinstance(event, dict):
|
||||
continue
|
||||
event_record = _normalize_event(event, result, timeline)
|
||||
records.append(event_record)
|
||||
return sorted(
|
||||
records,
|
||||
key=lambda event: (
|
||||
str(event.get("video_id")),
|
||||
str(event.get("event_type")),
|
||||
float(event.get("start_offset_seconds") or 0),
|
||||
float(event.get("end_offset_seconds") or 0),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _normalize_event(
|
||||
event: dict[str, Any],
|
||||
result: dict[str, Any],
|
||||
timeline: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
clip_id = str(result.get("clip_id"))
|
||||
frame_times = [
|
||||
dict(frame)
|
||||
for frame in timeline.get("frame_times", [])
|
||||
if isinstance(frame, dict)
|
||||
]
|
||||
frame_paths = [
|
||||
str(frame.get("frame_path"))
|
||||
for frame in frame_times
|
||||
if frame.get("frame_path") is not None
|
||||
]
|
||||
start = event.get("start_offset_seconds", timeline.get("clip_start_seconds"))
|
||||
end = event.get("end_offset_seconds", timeline.get("clip_end_seconds"))
|
||||
screen_time = str(timeline.get("screen_time") or "")
|
||||
normalized = {
|
||||
"video_id": str(result.get("video_id")),
|
||||
"event_type": str(event.get("event_type") or "unknown"),
|
||||
"start_time": event.get("start_time"),
|
||||
"end_time": event.get("end_time"),
|
||||
"start_offset_seconds": _float_or_none(start),
|
||||
"end_offset_seconds": _float_or_none(end),
|
||||
"confidence": event.get("confidence"),
|
||||
"severity": event.get("severity"),
|
||||
"attributes": event.get("attributes") if isinstance(event.get("attributes"), dict) else {},
|
||||
"screen_times": [screen_time] if screen_time else [],
|
||||
"evidence": {
|
||||
"clip_ids": [clip_id],
|
||||
"frame_paths": frame_paths,
|
||||
"frame_times": frame_times,
|
||||
"clips": [
|
||||
{
|
||||
"clip_id": clip_id,
|
||||
"clip_start_seconds": timeline.get("clip_start_seconds"),
|
||||
"clip_end_seconds": timeline.get("clip_end_seconds"),
|
||||
"clip_start_timecode": timeline.get("clip_start_timecode"),
|
||||
"clip_end_timecode": timeline.get("clip_end_timecode"),
|
||||
"clip_start_beijing_time": timeline.get("clip_start_beijing_time"),
|
||||
"clip_end_beijing_time": timeline.get("clip_end_beijing_time"),
|
||||
"screen_time": screen_time,
|
||||
}
|
||||
],
|
||||
},
|
||||
"source_event_count": 1,
|
||||
}
|
||||
original_evidence = event.get("evidence")
|
||||
if isinstance(original_evidence, dict):
|
||||
original_clip_id = original_evidence.get("clip_id")
|
||||
if original_clip_id:
|
||||
normalized["evidence"]["clip_ids"] = _unique(
|
||||
[*normalized["evidence"]["clip_ids"], str(original_clip_id)]
|
||||
)
|
||||
original_frame_paths = original_evidence.get("frame_paths")
|
||||
if isinstance(original_frame_paths, list):
|
||||
normalized["evidence"]["frame_paths"] = _unique(
|
||||
[*normalized["evidence"]["frame_paths"], *map(str, original_frame_paths)]
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
def _merge_events(
|
||||
events: list[dict[str, Any]],
|
||||
merge_gap_seconds: float,
|
||||
) -> list[dict[str, Any]]:
|
||||
merged: list[dict[str, Any]] = []
|
||||
for event in events:
|
||||
if not merged or not _can_merge(merged[-1], event, merge_gap_seconds):
|
||||
merged.append(_copy_event(event))
|
||||
continue
|
||||
_merge_into(merged[-1], event)
|
||||
for event in merged:
|
||||
event.pop("video_id", None)
|
||||
return merged
|
||||
|
||||
|
||||
def _can_merge(
|
||||
previous: dict[str, Any],
|
||||
current: dict[str, Any],
|
||||
merge_gap_seconds: float,
|
||||
) -> bool:
|
||||
if previous.get("video_id") != current.get("video_id"):
|
||||
return False
|
||||
if previous.get("event_type") != current.get("event_type"):
|
||||
return False
|
||||
previous_end = _float_or_none(previous.get("end_offset_seconds"))
|
||||
current_start = _float_or_none(current.get("start_offset_seconds"))
|
||||
if previous_end is None or current_start is None:
|
||||
return False
|
||||
return current_start - previous_end <= merge_gap_seconds
|
||||
|
||||
|
||||
def _merge_into(target: dict[str, Any], event: dict[str, Any]) -> None:
|
||||
target["start_offset_seconds"] = _min_number(
|
||||
target.get("start_offset_seconds"),
|
||||
event.get("start_offset_seconds"),
|
||||
)
|
||||
target["end_offset_seconds"] = _max_number(
|
||||
target.get("end_offset_seconds"),
|
||||
event.get("end_offset_seconds"),
|
||||
)
|
||||
target["screen_times"] = _unique(
|
||||
[*target.get("screen_times", []), *event.get("screen_times", [])]
|
||||
)
|
||||
target["source_event_count"] = int(target.get("source_event_count", 1)) + int(
|
||||
event.get("source_event_count", 1)
|
||||
)
|
||||
target["evidence"]["clip_ids"] = _unique(
|
||||
[*target["evidence"].get("clip_ids", []), *event["evidence"].get("clip_ids", [])]
|
||||
)
|
||||
target["evidence"]["frame_paths"] = _unique(
|
||||
[
|
||||
*target["evidence"].get("frame_paths", []),
|
||||
*event["evidence"].get("frame_paths", []),
|
||||
]
|
||||
)
|
||||
target["evidence"]["frame_times"].extend(event["evidence"].get("frame_times", []))
|
||||
target["evidence"]["clips"].extend(event["evidence"].get("clips", []))
|
||||
if target.get("confidence") is None:
|
||||
target["confidence"] = event.get("confidence")
|
||||
elif event.get("confidence") is not None:
|
||||
target["confidence"] = max(float(target["confidence"]), float(event["confidence"]))
|
||||
|
||||
|
||||
def _copy_event(event: dict[str, Any]) -> dict[str, Any]:
|
||||
copied = dict(event)
|
||||
copied["screen_times"] = list(event.get("screen_times", []))
|
||||
copied["attributes"] = dict(event.get("attributes", {}))
|
||||
copied["evidence"] = {
|
||||
"clip_ids": list(event["evidence"].get("clip_ids", [])),
|
||||
"frame_paths": list(event["evidence"].get("frame_paths", [])),
|
||||
"frame_times": [dict(frame) for frame in event["evidence"].get("frame_times", [])],
|
||||
"clips": [dict(clip) for clip in event["evidence"].get("clips", [])],
|
||||
}
|
||||
return copied
|
||||
|
||||
|
||||
def _group_by_video(records: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
|
||||
grouped: dict[str, list[dict[str, Any]]] = {}
|
||||
for record in records:
|
||||
video_id = record.get("video_id")
|
||||
if video_id:
|
||||
grouped.setdefault(str(video_id), []).append(record)
|
||||
return grouped
|
||||
|
||||
|
||||
def _failed_clip_counts(clip_results: list[dict[str, Any]]) -> dict[str, int]:
|
||||
counts = {"parse_failed": 0, "inference_failed": 0}
|
||||
for result in clip_results:
|
||||
status = result.get("status")
|
||||
if status in counts:
|
||||
counts[str(status)] += 1
|
||||
return counts
|
||||
|
||||
|
||||
def _event_counts(events: list[dict[str, Any]]) -> dict[str, int]:
|
||||
counts: dict[str, int] = {}
|
||||
for event in events:
|
||||
event_type = str(event.get("event_type") or "unknown")
|
||||
counts[event_type] = counts.get(event_type, 0) + 1
|
||||
return dict(sorted(counts.items()))
|
||||
|
||||
|
||||
def _probe(video_record: dict[str, Any]) -> dict[str, Any]:
|
||||
excluded = {"video_id", "path", "source_path", "status", "retry_count", "last_error"}
|
||||
probe = {
|
||||
key: value
|
||||
for key, value in video_record.items()
|
||||
if key not in excluded
|
||||
}
|
||||
probe["status"] = video_record.get("status")
|
||||
if video_record.get("last_error") is not None:
|
||||
probe["last_error"] = video_record.get("last_error")
|
||||
return probe
|
||||
|
||||
|
||||
def _video_path(
|
||||
video_record: dict[str, Any],
|
||||
clip_results: list[dict[str, Any]],
|
||||
) -> str | None:
|
||||
path = video_record.get("path") or video_record.get("source_path")
|
||||
if path is not None:
|
||||
return str(path)
|
||||
for result in clip_results:
|
||||
if result.get("video_path") is not None:
|
||||
return str(result["video_path"])
|
||||
return None
|
||||
|
||||
|
||||
def _video_start_time(
|
||||
video_record: dict[str, Any],
|
||||
clip_results: list[dict[str, Any]],
|
||||
) -> Any:
|
||||
if video_record.get("video_start_time") is not None:
|
||||
return video_record.get("video_start_time")
|
||||
for result in clip_results:
|
||||
timeline = result.get("monitoring_timeline")
|
||||
if isinstance(timeline, dict) and timeline.get("video_start_time") is not None:
|
||||
return timeline.get("video_start_time")
|
||||
return None
|
||||
|
||||
|
||||
def _first_present(record: dict[str, Any], keys: tuple[str, ...]) -> Any:
|
||||
for key in keys:
|
||||
if record.get(key) is not None:
|
||||
return record.get(key)
|
||||
return None
|
||||
|
||||
|
||||
def _float_or_none(value: Any) -> float | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _min_number(left: Any, right: Any) -> float | None:
|
||||
values = [value for value in (_float_or_none(left), _float_or_none(right)) if value is not None]
|
||||
return min(values) if values else None
|
||||
|
||||
|
||||
def _max_number(left: Any, right: Any) -> float | None:
|
||||
values = [value for value in (_float_or_none(left), _float_or_none(right)) if value is not None]
|
||||
return max(values) if values else None
|
||||
|
||||
|
||||
def _unique(values: list[Any]) -> list[Any]:
|
||||
seen = set()
|
||||
unique_values = []
|
||||
for value in values:
|
||||
marker = json.dumps(value, sort_keys=True) if isinstance(value, dict) else value
|
||||
if marker in seen:
|
||||
continue
|
||||
seen.add(marker)
|
||||
unique_values.append(value)
|
||||
return unique_values
|
||||
|
||||
|
||||
def _write_json(path: Path, payload: dict[str, Any]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(
|
||||
json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
424
video_ai_analysis_poc/cli.py
Normal file
424
video_ai_analysis_poc/cli.py
Normal file
@@ -0,0 +1,424 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from .aggregator import aggregate_outputs
|
||||
from .clips import build_clip_records
|
||||
from .config import DEFAULT_CONFIG_PATH, load_config
|
||||
from .discovery import discover_videos
|
||||
from .ffmpeg_sampler import sample_video_frames
|
||||
from .hik_cloud import download_hik_cloud_recordings
|
||||
from .manifest import read_jsonl, write_manifest
|
||||
from .paths import stable_video_id
|
||||
from .probe import probe_video
|
||||
from .result_parser import build_clip_result
|
||||
from .timeline import DEFAULT_TIMEZONE, format_beijing_time, timeline_start_epoch
|
||||
from .vlm_client import infer_clip
|
||||
|
||||
|
||||
def main(argv: Sequence[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Local video batch analysis PoC entrypoint."
|
||||
)
|
||||
parser.add_argument("--config", default=str(DEFAULT_CONFIG_PATH))
|
||||
parser.add_argument("--input-dir")
|
||||
parser.add_argument("--output-dir")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--until", choices=["clips", "inference"])
|
||||
parser.add_argument("--limit-clips", type=int)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
config = load_config(
|
||||
args.config,
|
||||
input_dir=args.input_dir,
|
||||
output_dir=args.output_dir,
|
||||
)
|
||||
if args.dry_run and args.until:
|
||||
parser.error("--dry-run cannot be combined with --until")
|
||||
if args.limit_clips is not None and args.limit_clips < 0:
|
||||
parser.error("--limit-clips must be non-negative")
|
||||
|
||||
output_dir = Path(config["output"]["dir"])
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_manifest_path = output_dir / "video_manifest.jsonl"
|
||||
resume_enabled = bool(config.get("output", {}).get("resume", False))
|
||||
records = _load_resume_records(
|
||||
video_manifest_path,
|
||||
resume=resume_enabled,
|
||||
)
|
||||
record_indexes = {
|
||||
_record_key(record): index
|
||||
for index, record in enumerate(records)
|
||||
if _record_key(record) is not None
|
||||
}
|
||||
|
||||
try:
|
||||
_acquire_source_records(
|
||||
config,
|
||||
output_dir,
|
||||
records,
|
||||
record_indexes,
|
||||
download_source=not args.dry_run,
|
||||
)
|
||||
except ValueError as exc:
|
||||
parser.error(str(exc))
|
||||
|
||||
write_manifest(video_manifest_path, records)
|
||||
if args.dry_run:
|
||||
return 0
|
||||
|
||||
clip_manifest_path = output_dir / "clip_manifest.jsonl"
|
||||
existing_clip_records = read_jsonl(clip_manifest_path) if resume_enabled else []
|
||||
existing_clip_video_ids = {
|
||||
str(record.get("video_id"))
|
||||
for record in existing_clip_records
|
||||
if record.get("video_id")
|
||||
}
|
||||
|
||||
frame_manifest_path = output_dir / "frame_manifest.jsonl"
|
||||
frame_records = read_jsonl(frame_manifest_path) if resume_enabled else []
|
||||
timezone_name = str(config.get("runtime", {}).get("timezone", DEFAULT_TIMEZONE))
|
||||
backfilled_frame_video_ids = _backfill_frame_beijing_times(
|
||||
frame_records,
|
||||
records,
|
||||
timezone_name=timezone_name,
|
||||
)
|
||||
existing_sampled_video_ids = {
|
||||
str(record.get("video_id"))
|
||||
for record in frame_records
|
||||
if record.get("status") == "sampled" and record.get("video_id")
|
||||
}
|
||||
changed_frame_video_ids: set[str] = set(backfilled_frame_video_ids)
|
||||
for record in records:
|
||||
if record.get("status") != "probed":
|
||||
continue
|
||||
video_id = str(record.get("video_id"))
|
||||
if args.until == "inference" and video_id in existing_clip_video_ids:
|
||||
continue
|
||||
if video_id in existing_sampled_video_ids:
|
||||
continue
|
||||
frame_records = _without_video_records(frame_records, video_id)
|
||||
ffmpeg_config = dict(config["ffmpeg"])
|
||||
ffmpeg_config["timezone"] = timezone_name
|
||||
frame_records.extend(
|
||||
sample_video_frames(
|
||||
record,
|
||||
output_dir,
|
||||
ffmpeg_config,
|
||||
manifest_path=None,
|
||||
)
|
||||
)
|
||||
changed_frame_video_ids.add(video_id)
|
||||
write_manifest(frame_manifest_path, frame_records)
|
||||
|
||||
sampled_video_ids = {
|
||||
str(record.get("video_id"))
|
||||
for record in frame_records
|
||||
if record.get("status") == "sampled" and record.get("video_id")
|
||||
}
|
||||
clip_rebuild_video_ids = changed_frame_video_ids | (
|
||||
sampled_video_ids - existing_clip_video_ids
|
||||
)
|
||||
clip_records = [
|
||||
record
|
||||
for record in existing_clip_records
|
||||
if str(record.get("video_id")) not in clip_rebuild_video_ids
|
||||
]
|
||||
frames_to_build = [
|
||||
record
|
||||
for record in frame_records
|
||||
if str(record.get("video_id")) in clip_rebuild_video_ids
|
||||
]
|
||||
clip_records.extend(build_clip_records(frames_to_build, config["clip"]))
|
||||
write_manifest(output_dir / "clip_manifest.jsonl", clip_records)
|
||||
if args.until == "clips":
|
||||
return 0
|
||||
|
||||
_run_inference(
|
||||
clip_records,
|
||||
records,
|
||||
output_dir,
|
||||
config,
|
||||
limit_clips=args.limit_clips,
|
||||
resume=resume_enabled,
|
||||
)
|
||||
if args.until == "inference":
|
||||
return 0
|
||||
aggregate_outputs(output_dir, config)
|
||||
return 0
|
||||
|
||||
|
||||
def _load_resume_records(path: Path, *, resume: bool) -> list[dict[str, object]]:
|
||||
if not resume:
|
||||
return []
|
||||
return read_jsonl(path)
|
||||
|
||||
|
||||
def _record_key(record: dict[str, object]) -> str | None:
|
||||
video_id = record.get("video_id")
|
||||
if video_id:
|
||||
return str(video_id)
|
||||
path = record.get("path")
|
||||
if path:
|
||||
return stable_video_id(str(path))
|
||||
return None
|
||||
|
||||
|
||||
def _acquire_source_records(
|
||||
config: dict[str, object],
|
||||
output_dir: Path,
|
||||
records: list[dict[str, object]],
|
||||
record_indexes: dict[str, int],
|
||||
*,
|
||||
download_source: bool = True,
|
||||
) -> None:
|
||||
for source_record in _source_video_records(
|
||||
config,
|
||||
output_dir,
|
||||
download_source=download_source,
|
||||
):
|
||||
path = source_record.get("path")
|
||||
if not path:
|
||||
continue
|
||||
video_id = stable_video_id(str(path))
|
||||
existing_index = record_indexes.get(video_id)
|
||||
if (
|
||||
existing_index is not None
|
||||
and records[existing_index].get("status") == "probed"
|
||||
):
|
||||
continue
|
||||
|
||||
probe_record = probe_video(
|
||||
str(path),
|
||||
timeout_seconds=config["ffprobe"]["timeout_seconds"],
|
||||
)
|
||||
record = {**source_record, **probe_record, "video_id": video_id}
|
||||
if existing_index is None:
|
||||
record_indexes[video_id] = len(records)
|
||||
records.append(record)
|
||||
else:
|
||||
records[existing_index] = record
|
||||
|
||||
|
||||
def _source_video_records(
|
||||
config: dict[str, object],
|
||||
output_dir: Path,
|
||||
*,
|
||||
download_source: bool = True,
|
||||
) -> list[dict[str, object]]:
|
||||
source_config = config.get("source", {})
|
||||
source_mode = "local"
|
||||
if isinstance(source_config, dict):
|
||||
source_mode = str(source_config.get("mode", "local"))
|
||||
|
||||
if source_mode == "local":
|
||||
videos = discover_videos(
|
||||
config["input"]["dir"],
|
||||
config["input"]["extensions"],
|
||||
recursive=config["input"]["recursive"],
|
||||
)
|
||||
return [{"path": path} for path in videos]
|
||||
|
||||
if source_mode == "hik_cloud":
|
||||
return [
|
||||
record
|
||||
for record in download_hik_cloud_recordings(
|
||||
config,
|
||||
output_dir,
|
||||
download=download_source,
|
||||
)
|
||||
if record.get("status") == "downloaded"
|
||||
]
|
||||
|
||||
raise ValueError(f"unsupported source.mode: {source_mode}")
|
||||
|
||||
|
||||
def _without_video_records(
|
||||
records: list[dict[str, object]],
|
||||
video_id: str,
|
||||
) -> list[dict[str, object]]:
|
||||
return [record for record in records if str(record.get("video_id")) != video_id]
|
||||
|
||||
|
||||
def _backfill_frame_beijing_times(
|
||||
frame_records: list[dict[str, object]],
|
||||
video_records: list[dict[str, object]],
|
||||
*,
|
||||
timezone_name: str,
|
||||
) -> set[str]:
|
||||
video_by_id = {
|
||||
str(record.get("video_id")): record
|
||||
for record in video_records
|
||||
if record.get("video_id")
|
||||
}
|
||||
changed_video_ids: set[str] = set()
|
||||
for frame_record in frame_records:
|
||||
if frame_record.get("status") != "sampled" or frame_record.get("beijing_time"):
|
||||
continue
|
||||
video_id = str(frame_record.get("video_id") or "")
|
||||
start_epoch = timeline_start_epoch(video_by_id.get(video_id, {}))
|
||||
beijing_time = format_beijing_time(
|
||||
start_epoch,
|
||||
offset_seconds=float(frame_record.get("offset_seconds") or 0),
|
||||
timezone_name=timezone_name,
|
||||
)
|
||||
if beijing_time is None:
|
||||
continue
|
||||
frame_record["beijing_time"] = beijing_time
|
||||
changed_video_ids.add(video_id)
|
||||
return changed_video_ids
|
||||
|
||||
|
||||
def _run_inference(
|
||||
clip_records: list[dict[str, object]],
|
||||
video_records: list[dict[str, object]],
|
||||
output_dir: Path,
|
||||
config: dict[str, object],
|
||||
*,
|
||||
limit_clips: int | None,
|
||||
resume: bool,
|
||||
) -> None:
|
||||
results_path = output_dir / "clip_results.jsonl"
|
||||
result_records = read_jsonl(results_path) if resume else []
|
||||
clip_by_id = {
|
||||
str(record.get("clip_id")): record
|
||||
for record in clip_records
|
||||
if record.get("clip_id")
|
||||
}
|
||||
result_records = [
|
||||
_refresh_result_timeline(record, clip_by_id, config)
|
||||
for record in result_records
|
||||
]
|
||||
ok_clip_ids = {
|
||||
str(record.get("clip_id"))
|
||||
for record in result_records
|
||||
if record.get("status") == "ok" and record.get("clip_id")
|
||||
}
|
||||
video_by_id = {
|
||||
str(record.get("video_id")): record
|
||||
for record in video_records
|
||||
if record.get("video_id")
|
||||
}
|
||||
processed = 0
|
||||
for clip_record in clip_records:
|
||||
clip_id = str(clip_record.get("clip_id"))
|
||||
if clip_id in ok_clip_ids:
|
||||
continue
|
||||
if limit_clips is not None and processed >= limit_clips:
|
||||
break
|
||||
|
||||
result_records = [
|
||||
record for record in result_records if str(record.get("clip_id")) != clip_id
|
||||
]
|
||||
video_record = video_by_id.get(str(clip_record.get("video_id")), {})
|
||||
result = _infer_and_parse_clip(clip_record, video_record, output_dir, config)
|
||||
result_records.append(result)
|
||||
_write_jsonl_exact(results_path, result_records)
|
||||
processed += 1
|
||||
|
||||
_write_jsonl_exact(results_path, result_records)
|
||||
|
||||
|
||||
def _refresh_result_timeline(
|
||||
result_record: dict[str, object],
|
||||
clip_by_id: dict[str, dict[str, object]],
|
||||
config: dict[str, object],
|
||||
) -> dict[str, object]:
|
||||
clip_record = clip_by_id.get(str(result_record.get("clip_id")))
|
||||
if not clip_record:
|
||||
return result_record
|
||||
if not _clip_has_beijing_timing(clip_record):
|
||||
return result_record
|
||||
timeline = dict(result_record.get("monitoring_timeline") or {})
|
||||
timeline.update(
|
||||
{
|
||||
"timezone": config.get("runtime", {}).get("timezone", DEFAULT_TIMEZONE),
|
||||
"clip_start_seconds": clip_record.get("clip_start_seconds"),
|
||||
"clip_end_seconds": clip_record.get("clip_end_seconds"),
|
||||
"clip_start_timecode": clip_record.get("clip_start_timecode"),
|
||||
"clip_end_timecode": clip_record.get("clip_end_timecode"),
|
||||
"clip_start_beijing_time": clip_record.get("clip_start_beijing_time"),
|
||||
"clip_end_beijing_time": clip_record.get("clip_end_beijing_time"),
|
||||
"frame_times": clip_record.get("frame_times", []),
|
||||
}
|
||||
)
|
||||
refreshed = dict(result_record)
|
||||
refreshed["monitoring_timeline"] = timeline
|
||||
return refreshed
|
||||
|
||||
|
||||
def _clip_has_beijing_timing(clip_record: dict[str, object]) -> bool:
|
||||
if clip_record.get("clip_start_beijing_time") or clip_record.get("clip_end_beijing_time"):
|
||||
return True
|
||||
for frame in clip_record.get("frame_times", []) or []:
|
||||
if isinstance(frame, dict) and frame.get("beijing_time"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _infer_and_parse_clip(
|
||||
clip_record: dict[str, object],
|
||||
video_record: dict[str, object],
|
||||
output_dir: Path,
|
||||
config: dict[str, object],
|
||||
) -> dict[str, object]:
|
||||
schema_config = config.get("schema", {})
|
||||
parse_retry = 0
|
||||
if isinstance(schema_config, dict):
|
||||
parse_retry = int(schema_config.get("parse_retry", 0))
|
||||
|
||||
attempts = parse_retry + 1
|
||||
result: dict[str, object] | None = None
|
||||
for attempt in range(attempts):
|
||||
try:
|
||||
inference = infer_clip(
|
||||
clip_record,
|
||||
output_dir,
|
||||
config["vlm"],
|
||||
config["prompt"],
|
||||
)
|
||||
except Exception as exc:
|
||||
return build_clip_result(
|
||||
"",
|
||||
clip_record,
|
||||
video_record,
|
||||
config,
|
||||
processing={},
|
||||
status="inference_failed",
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
result = build_clip_result(
|
||||
str(inference.get("raw_response", "")),
|
||||
clip_record,
|
||||
video_record,
|
||||
config,
|
||||
processing={
|
||||
"latency_ms": inference.get("latency_ms"),
|
||||
"http_status": inference.get("http_status"),
|
||||
"attempt": attempt + 1,
|
||||
},
|
||||
)
|
||||
if result.get("status") != "parse_failed":
|
||||
return result
|
||||
if result is None:
|
||||
raise RuntimeError("unreachable inference state")
|
||||
return result
|
||||
|
||||
|
||||
def _write_jsonl_exact(
|
||||
path: Path,
|
||||
records: list[dict[str, object]],
|
||||
) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
for record in records:
|
||||
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
158
video_ai_analysis_poc/clips.py
Normal file
158
video_ai_analysis_poc/clips.py
Normal file
@@ -0,0 +1,158 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .frames import seconds_to_timecode
|
||||
from .manifest import read_jsonl, write_manifest
|
||||
from .timeline import derive_time_from_reference
|
||||
|
||||
|
||||
def build_clip_records(
|
||||
frame_records: list[dict[str, Any]],
|
||||
clip_config: dict[str, Any],
|
||||
) -> list[dict[str, Any]]:
|
||||
sampled_frames = [
|
||||
record for record in frame_records if record.get("status") == "sampled"
|
||||
]
|
||||
by_video: dict[str, list[dict[str, Any]]] = {}
|
||||
for frame in sampled_frames:
|
||||
by_video.setdefault(str(frame["video_id"]), []).append(frame)
|
||||
|
||||
clips = []
|
||||
for video_id, frames in sorted(by_video.items()):
|
||||
clips.extend(_build_video_clips(video_id, frames, clip_config))
|
||||
return clips
|
||||
|
||||
|
||||
def build_clip_records_from_manifest(
|
||||
frame_manifest_path: str | Path,
|
||||
clip_manifest_path: str | Path,
|
||||
clip_config: dict[str, Any],
|
||||
) -> list[dict[str, Any]]:
|
||||
clips = build_clip_records(read_jsonl(frame_manifest_path), clip_config)
|
||||
write_manifest(clip_manifest_path, clips)
|
||||
return clips
|
||||
|
||||
|
||||
def _build_video_clips(
|
||||
video_id: str,
|
||||
frames: list[dict[str, Any]],
|
||||
clip_config: dict[str, Any],
|
||||
) -> list[dict[str, Any]]:
|
||||
sorted_frames = sorted(frames, key=lambda frame: float(frame["offset_seconds"]))
|
||||
if not sorted_frames:
|
||||
return []
|
||||
|
||||
length_seconds = float(clip_config.get("length_seconds", 10))
|
||||
stride_seconds = float(clip_config.get("stride_seconds", length_seconds))
|
||||
frames_per_clip = int(clip_config.get("frames_per_clip", 8))
|
||||
min_frames_per_clip = int(clip_config.get("min_frames_per_clip", 4))
|
||||
max_offset = max(float(frame["offset_seconds"]) for frame in sorted_frames)
|
||||
timeline_end = _estimated_timeline_end(sorted_frames)
|
||||
|
||||
clips = []
|
||||
clip_index = 1
|
||||
start = 0.0
|
||||
while start <= max_offset:
|
||||
end = min(start + length_seconds, timeline_end)
|
||||
in_window = [
|
||||
frame
|
||||
for frame in sorted_frames
|
||||
if start <= float(frame["offset_seconds"]) < end
|
||||
]
|
||||
if len(in_window) >= min_frames_per_clip:
|
||||
selected_frames = _uniform_sample(in_window, frames_per_clip)
|
||||
start_beijing_time, end_beijing_time = _clip_beijing_time_range(
|
||||
in_window,
|
||||
start,
|
||||
end,
|
||||
)
|
||||
clip = {
|
||||
"video_id": video_id,
|
||||
"clip_id": f"{video_id}_c{clip_index:06d}",
|
||||
"clip_start_seconds": round(start, 6),
|
||||
"clip_end_seconds": round(end, 6),
|
||||
"clip_start_timecode": seconds_to_timecode(start),
|
||||
"clip_end_timecode": seconds_to_timecode(end),
|
||||
"frame_times": [_frame_time(frame) for frame in selected_frames],
|
||||
"status": "pending",
|
||||
"retry_count": 0,
|
||||
"last_error": None,
|
||||
}
|
||||
if start_beijing_time is not None:
|
||||
clip["clip_start_beijing_time"] = start_beijing_time
|
||||
if end_beijing_time is not None:
|
||||
clip["clip_end_beijing_time"] = end_beijing_time
|
||||
clips.append(clip)
|
||||
clip_index += 1
|
||||
start += stride_seconds
|
||||
return clips
|
||||
|
||||
|
||||
def _estimated_timeline_end(frames: list[dict[str, Any]]) -> float:
|
||||
offsets = [float(frame["offset_seconds"]) for frame in frames]
|
||||
if len(offsets) < 2:
|
||||
return offsets[-1]
|
||||
intervals = [
|
||||
current - previous
|
||||
for previous, current in zip(offsets, offsets[1:])
|
||||
if current > previous
|
||||
]
|
||||
if not intervals:
|
||||
return offsets[-1]
|
||||
return offsets[-1] + min(intervals)
|
||||
|
||||
|
||||
def _uniform_sample(
|
||||
frames: list[dict[str, Any]],
|
||||
frames_per_clip: int,
|
||||
) -> list[dict[str, Any]]:
|
||||
if len(frames) <= frames_per_clip:
|
||||
return frames
|
||||
if frames_per_clip <= 1:
|
||||
return [frames[0]]
|
||||
last_index = len(frames) - 1
|
||||
indexes = [
|
||||
round(position * last_index / (frames_per_clip - 1))
|
||||
for position in range(frames_per_clip)
|
||||
]
|
||||
return [frames[index] for index in indexes]
|
||||
|
||||
|
||||
def _frame_time(frame: dict[str, Any]) -> dict[str, Any]:
|
||||
record = {
|
||||
"frame_id": frame.get("frame_id"),
|
||||
"frame_path": frame.get("frame_path"),
|
||||
"offset_seconds": frame.get("offset_seconds"),
|
||||
"timecode": frame.get("timecode"),
|
||||
"pts_time": frame.get("pts_time"),
|
||||
}
|
||||
if frame.get("beijing_time") is not None:
|
||||
record["beijing_time"] = frame.get("beijing_time")
|
||||
return record
|
||||
|
||||
|
||||
def _clip_beijing_time_range(
|
||||
frames: list[dict[str, Any]],
|
||||
start: float,
|
||||
end: float,
|
||||
) -> tuple[str | None, str | None]:
|
||||
for frame in frames:
|
||||
reference_time = frame.get("beijing_time")
|
||||
if not reference_time:
|
||||
continue
|
||||
reference_offset = frame.get("offset_seconds")
|
||||
return (
|
||||
derive_time_from_reference(
|
||||
str(reference_time),
|
||||
reference_offset_seconds=reference_offset,
|
||||
target_offset_seconds=start,
|
||||
),
|
||||
derive_time_from_reference(
|
||||
str(reference_time),
|
||||
reference_offset_seconds=reference_offset,
|
||||
target_offset_seconds=end,
|
||||
),
|
||||
)
|
||||
return None, None
|
||||
278
video_ai_analysis_poc/config.py
Normal file
278
video_ai_analysis_poc/config.py
Normal file
@@ -0,0 +1,278 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .paths import resolve_path, validate_output_dir
|
||||
|
||||
|
||||
DEFAULT_CONFIG_PATH = Path(__file__).resolve().parent.parent / "config" / "local_batch.yaml"
|
||||
|
||||
|
||||
def load_config(
|
||||
config_path: str | Path = DEFAULT_CONFIG_PATH,
|
||||
*,
|
||||
input_dir: str | Path | None = None,
|
||||
output_dir: str | Path | None = None,
|
||||
) -> dict[str, Any]:
|
||||
path = Path(config_path).expanduser().resolve(strict=False)
|
||||
raw_config = _parse_simple_yaml(path)
|
||||
config = _with_defaults(raw_config)
|
||||
|
||||
base_dir = path.parent.parent if path.parent.name == "config" else path.parent
|
||||
|
||||
if input_dir is not None:
|
||||
config["input"]["dir"] = str(input_dir)
|
||||
if output_dir is not None:
|
||||
config["output"]["dir"] = str(output_dir)
|
||||
|
||||
config["input"]["dir"] = str(resolve_path(config["input"]["dir"], base_dir=base_dir))
|
||||
config["output"]["dir"] = str(
|
||||
resolve_path(config["output"]["dir"], base_dir=base_dir)
|
||||
)
|
||||
validate_output_dir(config["input"]["dir"], config["output"]["dir"])
|
||||
|
||||
extensions = config["input"].get("extensions", [])
|
||||
config["input"]["extensions"] = _normalize_extensions(extensions)
|
||||
config["input"]["recursive"] = bool(config["input"].get("recursive", True))
|
||||
config.setdefault("ffprobe", {})
|
||||
config["ffprobe"]["timeout_seconds"] = int(
|
||||
config["ffprobe"].get("timeout_seconds", 30)
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
def _with_defaults(config: dict[str, Any]) -> dict[str, Any]:
|
||||
merged: dict[str, Any] = {
|
||||
"input": {
|
||||
"dir": "./videos",
|
||||
"recursive": True,
|
||||
"extensions": [".mp4", ".mov", ".mkv", ".avi", ".flv", ".ts", ".m4v"],
|
||||
},
|
||||
"output": {
|
||||
"dir": "./outputs/local-batch",
|
||||
"overwrite": False,
|
||||
"resume": True,
|
||||
"keep_frames": True,
|
||||
},
|
||||
"source": {"mode": "local"},
|
||||
"hik_cloud": {
|
||||
"api_base_url": "https://api2.hik-cloud.com",
|
||||
"download_path": "/v1/carrier/cstorage/open/play/download",
|
||||
"access_token": None,
|
||||
"access_token_env": "HIK_CLOUD_ACCESS_TOKEN",
|
||||
"devices": [],
|
||||
"time_ranges": [],
|
||||
"chunk_seconds": 600,
|
||||
"timeout_seconds": 60,
|
||||
"download_timeout_seconds": 600,
|
||||
},
|
||||
"ffprobe": {"timeout_seconds": 30},
|
||||
"ffmpeg": {
|
||||
"prefer_nvdec": True,
|
||||
"allow_cpu_fallback": False,
|
||||
"hwaccel": "cuda",
|
||||
"codec_decoders": {"h264": "h264_cuvid", "hevc": "hevc_cuvid"},
|
||||
"frame_fps": 1,
|
||||
"frame_width": 640,
|
||||
"jpeg_quality": 4,
|
||||
"timeout_seconds_per_video": 3600,
|
||||
},
|
||||
"clip": {
|
||||
"length_seconds": 10,
|
||||
"stride_seconds": 10,
|
||||
"frames_per_clip": 8,
|
||||
"min_frames_per_clip": 4,
|
||||
},
|
||||
"vlm": {
|
||||
"api_base_url": "http://localhost:8679",
|
||||
"chat_completions_path": "/v1/chat/completions",
|
||||
"model": "memai-zhengxin-v3-20260413",
|
||||
"timeout_seconds": 120,
|
||||
"max_tokens": 512,
|
||||
"temperature": 0,
|
||||
"batch_size": 1,
|
||||
"image_transport": "data_uri",
|
||||
"retries": 1,
|
||||
},
|
||||
"prompt": {
|
||||
"system": "You are a store video analysis assistant. Return strict JSON only.",
|
||||
"user": "Analyze this clip. Return events and screen_time. If no event, return events: [].",
|
||||
},
|
||||
"schema": {
|
||||
"version": "local-batch-v1",
|
||||
"event_types": [
|
||||
"customer_enter",
|
||||
"customer_leave",
|
||||
"queue_detected",
|
||||
"staff_absent",
|
||||
"staff_present",
|
||||
"area_crowded",
|
||||
"abnormal_behavior",
|
||||
"unknown",
|
||||
],
|
||||
"require_strict_json": True,
|
||||
"parse_retry": 1,
|
||||
"merge_gap_seconds": 30,
|
||||
},
|
||||
"runtime": {"timezone": "Asia/Shanghai", "log_level": "INFO"},
|
||||
}
|
||||
for section, values in config.items():
|
||||
if isinstance(values, dict) and isinstance(merged.get(section), dict):
|
||||
merged[section].update(values)
|
||||
else:
|
||||
merged[section] = values
|
||||
return merged
|
||||
|
||||
|
||||
def _normalize_extensions(extensions: list[str]) -> list[str]:
|
||||
normalized = []
|
||||
for extension in extensions:
|
||||
value = str(extension).lower()
|
||||
if not value.startswith("."):
|
||||
value = f".{value}"
|
||||
normalized.append(value)
|
||||
return normalized
|
||||
|
||||
|
||||
def _parse_simple_yaml(path: Path) -> dict[str, Any]:
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"config file not found: {path}")
|
||||
|
||||
root: dict[str, Any] = {}
|
||||
stack: list[tuple[int, dict[str, Any] | list[Any]]] = [(-1, root)]
|
||||
lines = path.read_text(encoding="utf-8").splitlines()
|
||||
|
||||
index = 0
|
||||
while index < len(lines):
|
||||
raw_line = lines[index].rstrip()
|
||||
stripped = raw_line.strip()
|
||||
if not stripped or raw_line.lstrip().startswith("#"):
|
||||
index += 1
|
||||
continue
|
||||
|
||||
indent = len(raw_line) - len(raw_line.lstrip(" "))
|
||||
while indent <= stack[-1][0]:
|
||||
stack.pop()
|
||||
parent = stack[-1][1]
|
||||
|
||||
if stripped.startswith("- "):
|
||||
if not isinstance(parent, list):
|
||||
raise ValueError(f"list item without list parent: {raw_line}")
|
||||
item = stripped[2:].strip()
|
||||
if ":" in item:
|
||||
key, value = item.split(":", 1)
|
||||
mapping: dict[str, Any] = {}
|
||||
parent.append(mapping)
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
if not value:
|
||||
next_stripped = _next_stripped(lines, index)
|
||||
child: dict[str, Any] | list[Any]
|
||||
child = [] if next_stripped and next_stripped.startswith("- ") else {}
|
||||
mapping[key] = child
|
||||
stack.append((indent, mapping))
|
||||
stack.append((indent + 2, child))
|
||||
else:
|
||||
mapping[key] = _parse_scalar(value)
|
||||
stack.append((indent, mapping))
|
||||
else:
|
||||
parent.append(_parse_scalar(item))
|
||||
index += 1
|
||||
continue
|
||||
|
||||
if not isinstance(parent, dict):
|
||||
raise ValueError(f"mapping entry inside list is not supported: {raw_line}")
|
||||
|
||||
if ":" not in stripped:
|
||||
raise ValueError(f"unsupported config line: {raw_line}")
|
||||
|
||||
key, value = stripped.split(":", 1)
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
if _is_block_scalar(value):
|
||||
parent[key], index = _parse_block_scalar(lines, index, indent, value)
|
||||
continue
|
||||
if not value:
|
||||
next_stripped = _next_stripped(lines, index)
|
||||
child: dict[str, Any] | list[Any]
|
||||
child = [] if next_stripped and next_stripped.startswith("- ") else {}
|
||||
parent[key] = child
|
||||
stack.append((indent, child))
|
||||
else:
|
||||
parent[key] = _parse_scalar(value)
|
||||
index += 1
|
||||
|
||||
return root
|
||||
|
||||
|
||||
def _next_stripped(lines: list[str], current_index: int) -> str | None:
|
||||
for raw_line in lines[current_index + 1 :]:
|
||||
stripped = raw_line.strip()
|
||||
if stripped and not raw_line.lstrip().startswith("#"):
|
||||
return stripped
|
||||
return None
|
||||
|
||||
|
||||
def _is_block_scalar(value: str) -> bool:
|
||||
return value in {">", ">-", "|", "|-"}
|
||||
|
||||
|
||||
def _parse_block_scalar(
|
||||
lines: list[str],
|
||||
start_index: int,
|
||||
parent_indent: int,
|
||||
marker: str,
|
||||
) -> tuple[str, int]:
|
||||
content_lines: list[str] = []
|
||||
content_indent: int | None = None
|
||||
index = start_index + 1
|
||||
|
||||
while index < len(lines):
|
||||
raw_line = lines[index].rstrip()
|
||||
stripped = raw_line.strip()
|
||||
if not stripped:
|
||||
content_lines.append("")
|
||||
index += 1
|
||||
continue
|
||||
|
||||
indent = len(raw_line) - len(raw_line.lstrip(" "))
|
||||
if indent <= parent_indent:
|
||||
break
|
||||
if content_indent is None:
|
||||
content_indent = indent
|
||||
content_lines.append(raw_line[content_indent:])
|
||||
index += 1
|
||||
|
||||
if marker.endswith("-"):
|
||||
while content_lines and content_lines[-1] == "":
|
||||
content_lines.pop()
|
||||
return "\n".join(content_lines), index
|
||||
|
||||
|
||||
def _parse_scalar(value: str) -> Any:
|
||||
lower = value.lower()
|
||||
if lower == "true":
|
||||
return True
|
||||
if lower == "false":
|
||||
return False
|
||||
if lower in {"null", "none"}:
|
||||
return None
|
||||
if value.startswith("[") and value.endswith("]"):
|
||||
parsed = ast.literal_eval(value)
|
||||
if not isinstance(parsed, list):
|
||||
raise ValueError(f"expected list value: {value}")
|
||||
return parsed
|
||||
if (value.startswith('"') and value.endswith('"')) or (
|
||||
value.startswith("'") and value.endswith("'")
|
||||
):
|
||||
return ast.literal_eval(value)
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
return float(value)
|
||||
except ValueError:
|
||||
return value
|
||||
27
video_ai_analysis_poc/discovery.py
Normal file
27
video_ai_analysis_poc/discovery.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def discover_videos(
|
||||
input_dir: str | Path,
|
||||
extensions: list[str],
|
||||
*,
|
||||
recursive: bool,
|
||||
) -> list[Path]:
|
||||
root = Path(input_dir).expanduser()
|
||||
if not root.exists():
|
||||
raise FileNotFoundError(f"input dir not found: {root}")
|
||||
if not root.is_dir():
|
||||
raise NotADirectoryError(f"input path is not a directory: {root}")
|
||||
|
||||
allowed = {
|
||||
extension.lower() if extension.startswith(".") else f".{extension.lower()}"
|
||||
for extension in extensions
|
||||
}
|
||||
iterator = root.rglob("*") if recursive else root.iterdir()
|
||||
return sorted(
|
||||
path
|
||||
for path in iterator
|
||||
if path.is_file() and path.suffix.lower() in allowed
|
||||
)
|
||||
243
video_ai_analysis_poc/ffmpeg_sampler.py
Normal file
243
video_ai_analysis_poc/ffmpeg_sampler.py
Normal file
@@ -0,0 +1,243 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .frames import build_frame_records
|
||||
from .manifest import read_jsonl, write_manifest
|
||||
from .timeline import DEFAULT_TIMEZONE, timeline_start_epoch
|
||||
|
||||
|
||||
NVDEC_CODECS = {"h264", "hevc"}
|
||||
|
||||
|
||||
def build_sample_command(
|
||||
video_path: str | Path,
|
||||
output_dir: str | Path,
|
||||
video_id: str,
|
||||
ffmpeg_config: dict[str, Any],
|
||||
*,
|
||||
codec_name: str | None,
|
||||
max_frames: int | None = None,
|
||||
max_duration_seconds: float | None = None,
|
||||
) -> list[str]:
|
||||
frame_dir = Path(output_dir).expanduser() / "frames" / video_id
|
||||
frame_pattern = frame_dir / "%06d.jpg"
|
||||
command = ["ffmpeg", "-hide_banner", "-y"]
|
||||
|
||||
codec = (codec_name or "").lower()
|
||||
prefer_nvdec = bool(ffmpeg_config.get("prefer_nvdec", True))
|
||||
allow_cpu_fallback = bool(ffmpeg_config.get("allow_cpu_fallback", False))
|
||||
decoders = ffmpeg_config.get("codec_decoders", {})
|
||||
decoder = decoders.get(codec) if isinstance(decoders, dict) else None
|
||||
|
||||
if prefer_nvdec and codec in NVDEC_CODECS and decoder:
|
||||
command.extend(
|
||||
[
|
||||
"-hwaccel",
|
||||
str(ffmpeg_config.get("hwaccel", "cuda")),
|
||||
"-c:v",
|
||||
str(decoder),
|
||||
]
|
||||
)
|
||||
elif not allow_cpu_fallback:
|
||||
raise ValueError(
|
||||
f"NVDEC decoder is required for codec {codec_name!r}; CPU fallback is disabled"
|
||||
)
|
||||
|
||||
frame_fps = ffmpeg_config.get("frame_fps", 1)
|
||||
frame_width = ffmpeg_config.get("frame_width", 640)
|
||||
jpeg_quality = ffmpeg_config.get("jpeg_quality", 4)
|
||||
command.extend(
|
||||
[
|
||||
"-i",
|
||||
str(Path(video_path).expanduser()),
|
||||
]
|
||||
)
|
||||
if max_duration_seconds is not None and max_duration_seconds > 0:
|
||||
command.extend(["-t", f"{max_duration_seconds:g}"])
|
||||
command.extend(
|
||||
[
|
||||
"-vf",
|
||||
f"fps={frame_fps},scale={frame_width}:-2",
|
||||
"-q:v",
|
||||
str(jpeg_quality),
|
||||
]
|
||||
)
|
||||
if max_frames is not None and max_frames > 0:
|
||||
command.extend(["-frames:v", str(max_frames)])
|
||||
command.append(str(frame_pattern))
|
||||
return command
|
||||
|
||||
|
||||
def sample_video_frames(
|
||||
video_record: dict[str, Any],
|
||||
output_dir: str | Path,
|
||||
ffmpeg_config: dict[str, Any],
|
||||
*,
|
||||
manifest_path: str | Path | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
video_id = str(video_record["video_id"])
|
||||
output_root = Path(output_dir).expanduser().resolve(strict=False)
|
||||
frame_dir = output_root / "frames" / video_id
|
||||
frame_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
max_frames = _max_output_frames(video_record, ffmpeg_config)
|
||||
timezone_name = str(ffmpeg_config.get("timezone", DEFAULT_TIMEZONE))
|
||||
start_epoch = timeline_start_epoch(video_record)
|
||||
command = build_sample_command(
|
||||
video_record.get("path") or video_record.get("source_path"),
|
||||
output_root,
|
||||
video_id,
|
||||
ffmpeg_config,
|
||||
codec_name=video_record.get("codec_name"),
|
||||
max_frames=max_frames,
|
||||
max_duration_seconds=_record_duration_seconds(video_record),
|
||||
)
|
||||
completed = subprocess.run(
|
||||
command,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
timeout=int(ffmpeg_config.get("timeout_seconds_per_video", 3600)),
|
||||
)
|
||||
records = build_frame_records(
|
||||
video_id,
|
||||
output_root,
|
||||
frame_dir.glob("*.jpg"),
|
||||
frame_fps=float(ffmpeg_config.get("frame_fps", 1)),
|
||||
timeline_start_epoch=start_epoch,
|
||||
timezone_name=timezone_name,
|
||||
)
|
||||
_attach_success_evidence(
|
||||
records,
|
||||
command,
|
||||
stderr=completed.stderr,
|
||||
)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
records = build_frame_records(
|
||||
video_id,
|
||||
output_root,
|
||||
frame_dir.glob("*.jpg"),
|
||||
frame_fps=float(ffmpeg_config.get("frame_fps", 1)),
|
||||
timeline_start_epoch=start_epoch,
|
||||
timezone_name=timezone_name,
|
||||
)
|
||||
if records and (max_frames is None or len(records) >= max_frames):
|
||||
_attach_success_evidence(
|
||||
records,
|
||||
command,
|
||||
stderr=exc.stderr,
|
||||
)
|
||||
else:
|
||||
records = [_failure_record(video_id, exc)]
|
||||
except (subprocess.TimeoutExpired, ValueError) as exc:
|
||||
records = [_failure_record(video_id, exc)]
|
||||
|
||||
if manifest_path is not None:
|
||||
_replace_video_records(Path(manifest_path), video_id, records)
|
||||
return records
|
||||
|
||||
|
||||
def _replace_video_records(
|
||||
manifest_path: Path,
|
||||
video_id: str,
|
||||
new_records: list[dict[str, Any]],
|
||||
) -> None:
|
||||
existing = [
|
||||
record
|
||||
for record in read_jsonl(manifest_path)
|
||||
if str(record.get("video_id")) != video_id
|
||||
]
|
||||
write_manifest(manifest_path, [*existing, *new_records])
|
||||
|
||||
|
||||
def _failure_record(video_id: str, exc: BaseException) -> dict[str, Any]:
|
||||
return {
|
||||
"video_id": video_id,
|
||||
"frame_id": None,
|
||||
"frame_path": None,
|
||||
"offset_seconds": None,
|
||||
"timecode": None,
|
||||
"pts_time": None,
|
||||
"status": "sample_failed",
|
||||
"retry_count": 0,
|
||||
"last_error": _error_text(exc),
|
||||
}
|
||||
|
||||
|
||||
def _attach_success_evidence(
|
||||
records: list[dict[str, Any]],
|
||||
command: list[str],
|
||||
*,
|
||||
stderr: str | None,
|
||||
) -> None:
|
||||
evidence = {
|
||||
"ffmpeg_command": command,
|
||||
"decoder": _command_value_after(command, "-c:v"),
|
||||
"hwaccel": _command_value_after(command, "-hwaccel"),
|
||||
"stderr_summary": _stderr_summary(stderr),
|
||||
}
|
||||
for record in records:
|
||||
record.update(evidence)
|
||||
|
||||
|
||||
def _command_value_after(command: list[str], flag: str) -> str | None:
|
||||
try:
|
||||
index = command.index(flag)
|
||||
except ValueError:
|
||||
return None
|
||||
if index + 1 >= len(command):
|
||||
return None
|
||||
return command[index + 1]
|
||||
|
||||
|
||||
def _stderr_summary(stderr: str | None, *, limit: int = 2000) -> str:
|
||||
if not stderr:
|
||||
return ""
|
||||
text = stderr.strip()
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[:limit]
|
||||
|
||||
|
||||
def _error_text(exc: BaseException) -> str:
|
||||
if isinstance(exc, subprocess.CalledProcessError):
|
||||
return str(exc.stderr or exc.stdout or exc)
|
||||
if isinstance(exc, subprocess.TimeoutExpired):
|
||||
return f"ffmpeg timed out after {exc.timeout}s"
|
||||
return str(exc)
|
||||
|
||||
|
||||
def _max_output_frames(
|
||||
video_record: dict[str, Any],
|
||||
ffmpeg_config: dict[str, Any],
|
||||
) -> int | None:
|
||||
frame_fps = _optional_float(ffmpeg_config.get("frame_fps", 1))
|
||||
if frame_fps is None or frame_fps <= 0:
|
||||
return None
|
||||
duration_seconds = _record_duration_seconds(video_record)
|
||||
if duration_seconds is None or duration_seconds <= 0:
|
||||
return None
|
||||
return max(1, math.ceil(duration_seconds * frame_fps) + 1)
|
||||
|
||||
|
||||
def _record_duration_seconds(video_record: dict[str, Any]) -> float | None:
|
||||
for begin_key, end_key in (
|
||||
("actual_begin", "actual_end"),
|
||||
("requested_begin", "requested_end"),
|
||||
):
|
||||
begin = _optional_float(video_record.get(begin_key))
|
||||
end = _optional_float(video_record.get(end_key))
|
||||
if begin is not None and end is not None and end > begin:
|
||||
return end - begin
|
||||
return _optional_float(video_record.get("duration_seconds"))
|
||||
|
||||
|
||||
def _optional_float(value: Any) -> float | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
return float(value)
|
||||
59
video_ai_analysis_poc/frames.py
Normal file
59
video_ai_analysis_poc/frames.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
from .timeline import DEFAULT_TIMEZONE, format_beijing_time
|
||||
|
||||
|
||||
def seconds_to_timecode(seconds: float | int | None) -> str | None:
|
||||
if seconds is None:
|
||||
return None
|
||||
total_seconds = int(float(seconds))
|
||||
hours = total_seconds // 3600
|
||||
minutes = (total_seconds % 3600) // 60
|
||||
remaining_seconds = total_seconds % 60
|
||||
return f"{hours:02d}:{minutes:02d}:{remaining_seconds:02d}"
|
||||
|
||||
|
||||
def build_frame_records(
|
||||
video_id: str,
|
||||
output_dir: str | Path,
|
||||
frame_paths: Iterable[str | Path],
|
||||
*,
|
||||
frame_fps: float,
|
||||
timeline_start_epoch: float | int | str | None = None,
|
||||
timezone_name: str = DEFAULT_TIMEZONE,
|
||||
) -> list[dict[str, Any]]:
|
||||
base_dir = Path(output_dir).expanduser().resolve(strict=False)
|
||||
records = []
|
||||
for index, frame_path in enumerate(sorted(Path(path) for path in frame_paths), start=1):
|
||||
offset_seconds = round((index - 1) / frame_fps, 6)
|
||||
record = {
|
||||
"video_id": video_id,
|
||||
"frame_id": f"{video_id}_f{index:06d}",
|
||||
"frame_path": _relative_frame_path(frame_path, base_dir),
|
||||
"offset_seconds": offset_seconds,
|
||||
"timecode": seconds_to_timecode(offset_seconds),
|
||||
"pts_time": offset_seconds,
|
||||
"status": "sampled",
|
||||
"retry_count": 0,
|
||||
"last_error": None,
|
||||
}
|
||||
beijing_time = format_beijing_time(
|
||||
timeline_start_epoch,
|
||||
offset_seconds=offset_seconds,
|
||||
timezone_name=timezone_name,
|
||||
)
|
||||
if beijing_time is not None:
|
||||
record["beijing_time"] = beijing_time
|
||||
records.append(record)
|
||||
return records
|
||||
|
||||
|
||||
def _relative_frame_path(frame_path: Path, base_dir: Path) -> str:
|
||||
resolved = frame_path.expanduser().resolve(strict=False)
|
||||
try:
|
||||
return resolved.relative_to(base_dir).as_posix()
|
||||
except ValueError:
|
||||
return resolved.as_posix()
|
||||
450
video_ai_analysis_poc/hik_cloud.py
Normal file
450
video_ai_analysis_poc/hik_cloud.py
Normal file
@@ -0,0 +1,450 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
import urllib.request
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from .manifest import read_jsonl, write_manifest
|
||||
from .paths import hik_cloud_download_path
|
||||
|
||||
|
||||
DEFAULT_TIMEZONE = "Asia/Shanghai"
|
||||
DEFAULT_CHUNK_SECONDS = 600
|
||||
MAX_CHUNK_SECONDS = 3600
|
||||
DEFAULT_API_BASE_URL = "https://api2.hik-cloud.com"
|
||||
DEFAULT_DOWNLOAD_PATH = "/v1/carrier/cstorage/open/play/download"
|
||||
DEFAULT_TIMEOUT_SECONDS = 60
|
||||
DEFAULT_DOWNLOAD_TIMEOUT_SECONDS = 600
|
||||
DOWNLOAD_MANIFEST_NAME = "hik_cloud_download_manifest.jsonl"
|
||||
NO_RECORDING_CODE = 80438027
|
||||
TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
|
||||
def parse_hik_time(value: str | int | float, timezone: str = DEFAULT_TIMEZONE) -> int:
|
||||
if isinstance(value, bool):
|
||||
raise ValueError(f"unsupported time value: {value!r}")
|
||||
if isinstance(value, int | float):
|
||||
return int(value)
|
||||
if isinstance(value, str):
|
||||
parsed = datetime.strptime(value, TIME_FORMAT)
|
||||
return int(parsed.replace(tzinfo=ZoneInfo(timezone)).timestamp())
|
||||
raise ValueError(f"unsupported time value: {value!r}")
|
||||
|
||||
|
||||
def build_download_chunks(config: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
hik_config = config.get("hik_cloud", {})
|
||||
runtime_config = config.get("runtime", {})
|
||||
timezone = runtime_config.get("timezone", DEFAULT_TIMEZONE)
|
||||
chunk_seconds = int(hik_config.get("chunk_seconds", DEFAULT_CHUNK_SECONDS))
|
||||
if chunk_seconds <= 0:
|
||||
raise ValueError("chunk_seconds must be greater than 0")
|
||||
if chunk_seconds > MAX_CHUNK_SECONDS:
|
||||
raise ValueError("chunk_seconds must be less than or equal to 3600")
|
||||
|
||||
chunks: list[dict[str, Any]] = []
|
||||
devices = hik_config.get("devices", [])
|
||||
time_ranges = hik_config.get("time_ranges", [])
|
||||
for device in devices:
|
||||
for time_range in time_ranges:
|
||||
requested_begin = parse_hik_time(time_range["begin"], timezone)
|
||||
requested_end = parse_hik_time(time_range["end"], timezone)
|
||||
if requested_end <= requested_begin:
|
||||
raise ValueError("time range end must be after begin")
|
||||
|
||||
time_begin = requested_begin
|
||||
while time_begin < requested_end:
|
||||
time_end = min(time_begin + chunk_seconds, requested_end)
|
||||
chunks.append(
|
||||
{
|
||||
"device_serial": device["device_serial"],
|
||||
"channel_no": device["channel_no"],
|
||||
"requested_begin": requested_begin,
|
||||
"requested_end": requested_end,
|
||||
"time_begin": time_begin,
|
||||
"time_end": time_end,
|
||||
}
|
||||
)
|
||||
time_begin = time_end
|
||||
return chunks
|
||||
|
||||
|
||||
def resolve_access_token(config_or_hik_config: dict[str, Any]) -> str:
|
||||
hik_config = _hik_config(config_or_hik_config)
|
||||
access_token = hik_config.get("access_token")
|
||||
if access_token:
|
||||
return str(access_token)
|
||||
|
||||
access_token_env = hik_config.get("access_token_env")
|
||||
if access_token_env:
|
||||
env_token = os.environ.get(str(access_token_env))
|
||||
if env_token:
|
||||
return env_token
|
||||
|
||||
raise ValueError(
|
||||
"missing hik_cloud access_token; configure access_token or access_token_env"
|
||||
)
|
||||
|
||||
|
||||
def request_download_address(
|
||||
chunk: dict[str, Any],
|
||||
hik_config: dict[str, Any],
|
||||
*,
|
||||
http_post: Any | None = None,
|
||||
) -> dict[str, Any]:
|
||||
token = resolve_access_token(hik_config)
|
||||
api_base_url = str(hik_config.get("api_base_url") or DEFAULT_API_BASE_URL)
|
||||
download_path = str(hik_config.get("download_path") or DEFAULT_DOWNLOAD_PATH)
|
||||
url = api_base_url.rstrip("/") + download_path
|
||||
headers = {
|
||||
"Authorization": f"bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
json_body = {
|
||||
"deviceSerial": chunk["device_serial"],
|
||||
"channelNo": chunk["channel_no"],
|
||||
"timeBegin": chunk["time_begin"],
|
||||
"timeEnd": chunk["time_end"],
|
||||
}
|
||||
timeout_seconds = int(hik_config.get("timeout_seconds", DEFAULT_TIMEOUT_SECONDS))
|
||||
post = http_post or _post_json
|
||||
|
||||
try:
|
||||
response = post(url, json_body, headers, timeout_seconds)
|
||||
except Exception as exc: # pragma: no cover - exact urllib failures vary.
|
||||
return {
|
||||
**_chunk_metadata(chunk),
|
||||
"status": "address_failed",
|
||||
"code": None,
|
||||
"last_error": _sanitize_error(exc, token),
|
||||
}
|
||||
|
||||
code = _optional_int(response.get("code"))
|
||||
if code == 0:
|
||||
data = response.get("data") or {}
|
||||
return {
|
||||
**_chunk_metadata(chunk),
|
||||
"status": "address_ok",
|
||||
"code": code,
|
||||
"url": data.get("url"),
|
||||
"actual_begin": _optional_int(data.get("actualBeginTime")),
|
||||
"actual_end": _optional_int(data.get("actualEndTime")),
|
||||
}
|
||||
|
||||
status = "no_recording" if code == NO_RECORDING_CODE else "address_failed"
|
||||
result = {
|
||||
**_chunk_metadata(chunk),
|
||||
"status": status,
|
||||
"code": code,
|
||||
"last_error": _api_error_message(response, token),
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def download_hik_cloud_recordings(
|
||||
config: dict[str, Any],
|
||||
output_dir: str | Path,
|
||||
*,
|
||||
address_client: Any | None = None,
|
||||
download_url: Any | None = None,
|
||||
download: bool = True,
|
||||
) -> list[dict[str, Any]]:
|
||||
output_path = Path(output_dir).expanduser().resolve(strict=False)
|
||||
manifest_path = output_path / DOWNLOAD_MANIFEST_NAME
|
||||
hik_config = _hik_config(config)
|
||||
chunks = build_download_chunks(config)
|
||||
resume = bool(config.get("output", {}).get("resume", False))
|
||||
manifest_records = read_jsonl(manifest_path) if resume else []
|
||||
existing_downloads = {
|
||||
_manifest_key(record): record
|
||||
for record in manifest_records
|
||||
if _is_resumable_download(record)
|
||||
}
|
||||
get_address = address_client or request_download_address
|
||||
fetch = download_url or _download_url
|
||||
download_timeout_seconds = int(
|
||||
hik_config.get("download_timeout_seconds", DEFAULT_DOWNLOAD_TIMEOUT_SECONDS)
|
||||
)
|
||||
token = _redaction_token(hik_config)
|
||||
|
||||
video_records: list[dict[str, Any]] = []
|
||||
for chunk in chunks:
|
||||
key = _chunk_key(chunk)
|
||||
existing_record = existing_downloads.get(key)
|
||||
if download and existing_record is not None:
|
||||
video_records.append(_video_record_from_manifest(existing_record))
|
||||
continue
|
||||
|
||||
address_result = get_address(chunk, hik_config)
|
||||
status = address_result.get("status")
|
||||
if status != "address_ok":
|
||||
_upsert_manifest_record(
|
||||
manifest_records,
|
||||
_manifest_record(
|
||||
chunk,
|
||||
address_result,
|
||||
status=str(status or "address_failed"),
|
||||
token=token,
|
||||
),
|
||||
)
|
||||
continue
|
||||
|
||||
if not download:
|
||||
_upsert_manifest_record(
|
||||
manifest_records,
|
||||
_manifest_record(
|
||||
chunk,
|
||||
address_result,
|
||||
status="address_ok",
|
||||
token=token,
|
||||
),
|
||||
)
|
||||
continue
|
||||
|
||||
url = str(address_result.get("url") or "")
|
||||
target_path = hik_cloud_download_path(
|
||||
output_path,
|
||||
str(chunk["device_serial"]),
|
||||
chunk["channel_no"],
|
||||
int(chunk["time_begin"]),
|
||||
int(chunk["time_end"]),
|
||||
)
|
||||
try:
|
||||
payload = fetch(url, timeout_seconds=download_timeout_seconds)
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
target_path.write_bytes(payload)
|
||||
except Exception as exc: # pragma: no cover - concrete network failures vary.
|
||||
_upsert_manifest_record(
|
||||
manifest_records,
|
||||
_manifest_record(
|
||||
chunk,
|
||||
address_result,
|
||||
status="download_failed",
|
||||
path=target_path,
|
||||
last_error=_sanitize_error(exc, token),
|
||||
token=token,
|
||||
),
|
||||
)
|
||||
continue
|
||||
|
||||
record = _downloaded_video_record(chunk, address_result, target_path)
|
||||
video_records.append(record)
|
||||
_upsert_manifest_record(
|
||||
manifest_records,
|
||||
_manifest_record(
|
||||
chunk,
|
||||
address_result,
|
||||
status="downloaded",
|
||||
path=target_path,
|
||||
token=token,
|
||||
),
|
||||
)
|
||||
|
||||
write_manifest(manifest_path, manifest_records)
|
||||
return video_records
|
||||
|
||||
|
||||
def _post_json(
|
||||
url: str,
|
||||
json_body: dict[str, Any],
|
||||
headers: dict[str, str],
|
||||
timeout_seconds: int,
|
||||
) -> dict[str, Any]:
|
||||
request = urllib.request.Request(
|
||||
url,
|
||||
data=json.dumps(json_body).encode("utf-8"),
|
||||
headers=headers,
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
|
||||
return json.loads(response.read().decode("utf-8"))
|
||||
|
||||
|
||||
def _download_url(url: str, *, timeout_seconds: int | None = None) -> bytes:
|
||||
with urllib.request.urlopen(url, timeout=timeout_seconds) as response:
|
||||
return response.read()
|
||||
|
||||
|
||||
def _hik_config(config_or_hik_config: dict[str, Any]) -> dict[str, Any]:
|
||||
hik_config = config_or_hik_config.get("hik_cloud")
|
||||
if isinstance(hik_config, dict):
|
||||
return hik_config
|
||||
return config_or_hik_config
|
||||
|
||||
|
||||
def _chunk_metadata(chunk: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"device_serial": chunk["device_serial"],
|
||||
"channel_no": chunk["channel_no"],
|
||||
"requested_begin": chunk.get("requested_begin"),
|
||||
"requested_end": chunk.get("requested_end"),
|
||||
"time_begin": chunk["time_begin"],
|
||||
"time_end": chunk["time_end"],
|
||||
}
|
||||
|
||||
|
||||
def _optional_int(value: Any) -> int | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
return int(value)
|
||||
|
||||
|
||||
def _api_error_message(response: dict[str, Any], token: str) -> str:
|
||||
code = response.get("code")
|
||||
message = response.get("msg") or response.get("message") or "hik api error"
|
||||
return _sanitize_error(f"hik api code {code}: {message}", token)
|
||||
|
||||
|
||||
def _sanitize_error(value: Any, token: str = "") -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
message = str(value)
|
||||
for raw_url in re.findall(r"https?://[^\s'\"<>]+", message):
|
||||
parsed = urlparse(raw_url)
|
||||
sanitized_url = urlunparse(
|
||||
(parsed.scheme, parsed.netloc, parsed.path, "", "", "")
|
||||
)
|
||||
message = message.replace(raw_url, sanitized_url)
|
||||
message = re.sub(
|
||||
r"\b(?:sign|sig|token|access_token)=[^&\s'\"<>]+",
|
||||
"[redacted-query]",
|
||||
message,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
if token:
|
||||
message = message.replace(token, "[redacted]")
|
||||
message = message.replace("Authorization", "[redacted-header]")
|
||||
return message
|
||||
|
||||
|
||||
def _downloaded_video_record(
|
||||
chunk: dict[str, Any],
|
||||
address_result: dict[str, Any],
|
||||
path: Path,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"source": "hik_cloud",
|
||||
"path": str(path),
|
||||
"source_path": _source_path(chunk),
|
||||
"device_serial": chunk["device_serial"],
|
||||
"channel_no": chunk["channel_no"],
|
||||
"requested_begin": chunk["time_begin"],
|
||||
"requested_end": chunk["time_end"],
|
||||
"actual_begin": address_result.get("actual_begin"),
|
||||
"actual_end": address_result.get("actual_end"),
|
||||
"status": "downloaded",
|
||||
"retry_count": 0,
|
||||
"last_error": None,
|
||||
}
|
||||
|
||||
|
||||
def _manifest_record(
|
||||
chunk: dict[str, Any],
|
||||
address_result: dict[str, Any],
|
||||
*,
|
||||
status: str,
|
||||
token: str,
|
||||
path: Path | None = None,
|
||||
last_error: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
url = address_result.get("url")
|
||||
record = {
|
||||
"source": "hik_cloud",
|
||||
"device_serial": chunk["device_serial"],
|
||||
"channel_no": chunk["channel_no"],
|
||||
"requested_begin": chunk["time_begin"],
|
||||
"requested_end": chunk["time_end"],
|
||||
"actual_begin": address_result.get("actual_begin"),
|
||||
"actual_end": address_result.get("actual_end"),
|
||||
"path": str(path) if path is not None else None,
|
||||
"status": status,
|
||||
"retry_count": 0,
|
||||
"last_error": _sanitize_error(last_error or address_result.get("last_error"), token),
|
||||
}
|
||||
if url:
|
||||
record["download_url_host"] = urlparse(str(url)).netloc
|
||||
if "code" in address_result:
|
||||
record["code"] = address_result.get("code")
|
||||
if status == "downloaded":
|
||||
record["source_path"] = _source_path(chunk)
|
||||
return record
|
||||
|
||||
|
||||
def _source_path(chunk: dict[str, Any]) -> str:
|
||||
time_begin = chunk.get("time_begin", chunk.get("requested_begin"))
|
||||
time_end = chunk.get("time_end", chunk.get("requested_end"))
|
||||
return (
|
||||
f"hik_cloud://{chunk['device_serial']}/ch{chunk['channel_no']}/"
|
||||
f"{int(time_begin)}-{int(time_end)}"
|
||||
)
|
||||
|
||||
|
||||
def _is_resumable_download(record: dict[str, Any]) -> bool:
|
||||
path = record.get("path")
|
||||
return (
|
||||
record.get("status") == "downloaded"
|
||||
and isinstance(path, str)
|
||||
and Path(path).exists()
|
||||
)
|
||||
|
||||
|
||||
def _video_record_from_manifest(record: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"source": "hik_cloud",
|
||||
"path": record["path"],
|
||||
"source_path": record.get("source_path") or _source_path(record),
|
||||
"device_serial": record["device_serial"],
|
||||
"channel_no": record["channel_no"],
|
||||
"requested_begin": record["requested_begin"],
|
||||
"requested_end": record["requested_end"],
|
||||
"actual_begin": record.get("actual_begin"),
|
||||
"actual_end": record.get("actual_end"),
|
||||
"status": "downloaded",
|
||||
"retry_count": record.get("retry_count", 0),
|
||||
"last_error": record.get("last_error"),
|
||||
}
|
||||
|
||||
|
||||
def _upsert_manifest_record(
|
||||
records: list[dict[str, Any]],
|
||||
new_record: dict[str, Any],
|
||||
) -> None:
|
||||
new_key = _manifest_key(new_record)
|
||||
for index, record in enumerate(records):
|
||||
if _manifest_key(record) == new_key:
|
||||
records[index] = new_record
|
||||
return
|
||||
records.append(new_record)
|
||||
|
||||
|
||||
def _chunk_key(chunk: dict[str, Any]) -> tuple[Any, Any, Any, Any]:
|
||||
return (
|
||||
chunk.get("device_serial"),
|
||||
chunk.get("channel_no"),
|
||||
chunk.get("time_begin"),
|
||||
chunk.get("time_end"),
|
||||
)
|
||||
|
||||
|
||||
def _manifest_key(record: dict[str, Any]) -> tuple[Any, Any, Any, Any]:
|
||||
return (
|
||||
record.get("device_serial"),
|
||||
record.get("channel_no"),
|
||||
record.get("requested_begin"),
|
||||
record.get("requested_end"),
|
||||
)
|
||||
|
||||
|
||||
def _redaction_token(hik_config: dict[str, Any]) -> str:
|
||||
token = hik_config.get("access_token")
|
||||
if token:
|
||||
return str(token)
|
||||
token_env = hik_config.get("access_token_env")
|
||||
if token_env:
|
||||
return os.environ.get(str(token_env), "")
|
||||
return ""
|
||||
35
video_ai_analysis_poc/manifest.py
Normal file
35
video_ai_analysis_poc/manifest.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
def write_manifest(path: str | Path, records: Iterable[dict[str, Any]]) -> None:
|
||||
manifest_path = Path(path).expanduser().resolve(strict=False)
|
||||
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with manifest_path.open("w", encoding="utf-8") as handle:
|
||||
for record in records:
|
||||
normalized = _normalize_record(record)
|
||||
handle.write(
|
||||
json.dumps(normalized, ensure_ascii=False, sort_keys=True) + "\n"
|
||||
)
|
||||
|
||||
|
||||
def read_jsonl(path: str | Path) -> list[dict[str, Any]]:
|
||||
jsonl_path = Path(path).expanduser().resolve(strict=False)
|
||||
if not jsonl_path.exists():
|
||||
return []
|
||||
records = []
|
||||
for line in jsonl_path.read_text(encoding="utf-8").splitlines():
|
||||
if line.strip():
|
||||
records.append(json.loads(line))
|
||||
return records
|
||||
|
||||
|
||||
def _normalize_record(record: dict[str, Any]) -> dict[str, Any]:
|
||||
normalized = dict(record)
|
||||
normalized.setdefault("status", "pending")
|
||||
normalized.setdefault("retry_count", 0)
|
||||
normalized.setdefault("last_error", None)
|
||||
return normalized
|
||||
71
video_ai_analysis_poc/paths.py
Normal file
71
video_ai_analysis_poc/paths.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
FORBIDDEN_REFERENCE_ROOT = Path("/Users/yoilun/AI-train/zhengxin-vlm-0413")
|
||||
|
||||
|
||||
def resolve_path(path: str | Path, *, base_dir: Path | None = None) -> Path:
|
||||
candidate = Path(path).expanduser()
|
||||
if not candidate.is_absolute() and base_dir is not None:
|
||||
candidate = base_dir / candidate
|
||||
return candidate.resolve(strict=False)
|
||||
|
||||
|
||||
def _is_relative_to(path: Path, parent: Path) -> bool:
|
||||
try:
|
||||
path.relative_to(parent)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def validate_output_dir(
|
||||
input_dir: str | Path,
|
||||
output_dir: str | Path,
|
||||
*,
|
||||
forbidden_root: Path = FORBIDDEN_REFERENCE_ROOT,
|
||||
) -> Path:
|
||||
resolved_input = resolve_path(input_dir)
|
||||
resolved_output = resolve_path(output_dir)
|
||||
resolved_forbidden = resolve_path(forbidden_root)
|
||||
|
||||
if resolved_output == resolved_input:
|
||||
raise ValueError("output dir must not equal input dir")
|
||||
if _is_relative_to(resolved_output, resolved_forbidden):
|
||||
raise ValueError(
|
||||
f"output dir must not be inside forbidden reference dir: {resolved_forbidden}"
|
||||
)
|
||||
return resolved_output
|
||||
|
||||
|
||||
def stable_video_id(path: str | Path) -> str:
|
||||
resolved = str(resolve_path(path))
|
||||
digest = hashlib.sha1(resolved.encode("utf-8")).hexdigest()[:16]
|
||||
return f"video-{digest}"
|
||||
|
||||
|
||||
def hik_cloud_download_path(
|
||||
output_dir: str | Path,
|
||||
device_serial: str,
|
||||
channel_no: int | str,
|
||||
time_begin: int,
|
||||
time_end: int,
|
||||
) -> Path:
|
||||
safe_device = _safe_path_component(device_serial)
|
||||
safe_channel = _safe_path_component(str(channel_no))
|
||||
filename = f"{safe_device}_ch{safe_channel}_{int(time_begin)}_{int(time_end)}.mp4"
|
||||
return (
|
||||
resolve_path(output_dir)
|
||||
/ "downloads"
|
||||
/ "hik_cloud"
|
||||
/ safe_device
|
||||
/ f"ch{safe_channel}"
|
||||
/ filename
|
||||
)
|
||||
|
||||
|
||||
def _safe_path_component(value: str) -> str:
|
||||
return "".join(char if char.isalnum() or char in "._-" else "_" for char in value)
|
||||
99
video_ai_analysis_poc/probe.py
Normal file
99
video_ai_analysis_poc/probe.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def probe_video(path: str | Path, *, timeout_seconds: int = 30) -> dict[str, Any]:
|
||||
video_path = Path(path).expanduser().resolve(strict=False)
|
||||
base_record: dict[str, Any] = {
|
||||
"path": str(video_path),
|
||||
"status": "probe_failed",
|
||||
"retry_count": 0,
|
||||
"last_error": None,
|
||||
}
|
||||
command = [
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
str(video_path),
|
||||
]
|
||||
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
command,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
timeout=timeout_seconds,
|
||||
)
|
||||
payload = json.loads(completed.stdout or "{}")
|
||||
video_stream = _first_video_stream(payload)
|
||||
format_info = payload.get("format", {})
|
||||
return {
|
||||
**base_record,
|
||||
"status": "probed",
|
||||
"duration_seconds": _optional_float(format_info.get("duration")),
|
||||
"codec_name": video_stream.get("codec_name"),
|
||||
"width": _optional_int(video_stream.get("width")),
|
||||
"height": _optional_int(video_stream.get("height")),
|
||||
"fps": _parse_frame_rate(
|
||||
video_stream.get("avg_frame_rate") or video_stream.get("r_frame_rate")
|
||||
),
|
||||
"format_name": format_info.get("format_name"),
|
||||
"start_time": _optional_float(format_info.get("start_time")),
|
||||
}
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
base_record["last_error"] = f"ffprobe timed out after {timeout_seconds}s"
|
||||
if exc.stderr:
|
||||
base_record["last_error"] += f": {exc.stderr}"
|
||||
return base_record
|
||||
except subprocess.CalledProcessError as exc:
|
||||
base_record["last_error"] = _error_text(exc.stderr or exc.stdout or str(exc))
|
||||
return base_record
|
||||
except (json.JSONDecodeError, ValueError) as exc:
|
||||
base_record["last_error"] = f"ffprobe parse failed: {exc}"
|
||||
return base_record
|
||||
|
||||
|
||||
def _first_video_stream(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
for stream in payload.get("streams", []):
|
||||
if stream.get("codec_type") == "video":
|
||||
return stream
|
||||
raise ValueError("ffprobe output did not contain a video stream")
|
||||
|
||||
|
||||
def _parse_frame_rate(value: str | None) -> float | None:
|
||||
if not value or value == "0/0":
|
||||
return None
|
||||
if "/" in value:
|
||||
numerator, denominator = value.split("/", 1)
|
||||
denominator_value = float(denominator)
|
||||
if denominator_value == 0:
|
||||
return None
|
||||
return float(numerator) / denominator_value
|
||||
return float(value)
|
||||
|
||||
|
||||
def _optional_float(value: Any) -> float | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
return float(value)
|
||||
|
||||
|
||||
def _optional_int(value: Any) -> int | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
return int(value)
|
||||
|
||||
|
||||
def _error_text(value: Any) -> str:
|
||||
if isinstance(value, bytes):
|
||||
return value.decode("utf-8", errors="replace").strip()
|
||||
return str(value).strip()
|
||||
138
video_ai_analysis_poc/result_parser.py
Normal file
138
video_ai_analysis_poc/result_parser.py
Normal file
@@ -0,0 +1,138 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
|
||||
def extract_json_payload(raw_response: str) -> dict[str, Any]:
|
||||
text = raw_response.strip()
|
||||
if not text:
|
||||
raise ValueError("JSON payload is empty")
|
||||
|
||||
try:
|
||||
payload = json.loads(text)
|
||||
if isinstance(payload, dict):
|
||||
return payload
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
decoder = json.JSONDecoder()
|
||||
for index, char in enumerate(text):
|
||||
if char != "{":
|
||||
continue
|
||||
try:
|
||||
payload, _ = decoder.raw_decode(text[index:])
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
if isinstance(payload, dict):
|
||||
return payload
|
||||
raise ValueError("JSON object not found in model response")
|
||||
|
||||
|
||||
def build_clip_result(
|
||||
raw_response: str,
|
||||
clip_record: dict[str, Any],
|
||||
video_record: dict[str, Any] | None,
|
||||
config: dict[str, Any],
|
||||
*,
|
||||
processing: dict[str, Any] | None = None,
|
||||
status: str | None = None,
|
||||
error: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
processing_record = dict(processing or {})
|
||||
if status is not None:
|
||||
payload: dict[str, Any] = {}
|
||||
result_status = status
|
||||
result_error = error
|
||||
else:
|
||||
try:
|
||||
payload = extract_json_payload(raw_response)
|
||||
result_status = "ok"
|
||||
result_error = None
|
||||
except ValueError as exc:
|
||||
payload = {}
|
||||
result_status = "parse_failed"
|
||||
result_error = str(exc)
|
||||
|
||||
timeline = _timeline(clip_record, config, payload)
|
||||
return {
|
||||
"schema_version": config.get("schema", {}).get("version", "local-batch-v1"),
|
||||
"video_id": str(clip_record.get("video_id")),
|
||||
"video_path": _video_path(video_record),
|
||||
"clip_id": str(clip_record.get("clip_id")),
|
||||
"status": result_status,
|
||||
"monitoring_timeline": timeline,
|
||||
"events": _events(payload, clip_record) if result_status == "ok" else [],
|
||||
"raw_response": raw_response,
|
||||
"processing": processing_record,
|
||||
"error": result_error,
|
||||
}
|
||||
|
||||
|
||||
def _timeline(
|
||||
clip_record: dict[str, Any],
|
||||
config: dict[str, Any],
|
||||
payload: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"timezone": config.get("runtime", {}).get("timezone", "Asia/Shanghai"),
|
||||
"video_start_time": clip_record.get("video_start_time"),
|
||||
"clip_start_seconds": clip_record.get("clip_start_seconds"),
|
||||
"clip_end_seconds": clip_record.get("clip_end_seconds"),
|
||||
"clip_start_timecode": clip_record.get("clip_start_timecode"),
|
||||
"clip_end_timecode": clip_record.get("clip_end_timecode"),
|
||||
"clip_start_beijing_time": clip_record.get("clip_start_beijing_time"),
|
||||
"clip_end_beijing_time": clip_record.get("clip_end_beijing_time"),
|
||||
"frame_times": clip_record.get("frame_times", []),
|
||||
"screen_time": str(
|
||||
payload.get("screen_time") or payload.get("画面时间") or payload.get("时间") or ""
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _events(
|
||||
payload: dict[str, Any],
|
||||
clip_record: dict[str, Any],
|
||||
) -> list[dict[str, Any]]:
|
||||
raw_events = payload.get("events") or []
|
||||
if not isinstance(raw_events, list):
|
||||
return []
|
||||
return [
|
||||
_event(event, clip_record)
|
||||
for event in raw_events
|
||||
if isinstance(event, dict)
|
||||
]
|
||||
|
||||
|
||||
def _event(
|
||||
event: dict[str, Any],
|
||||
clip_record: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
normalized = dict(event)
|
||||
normalized.setdefault("event_type", "unknown")
|
||||
normalized.setdefault("start_time", None)
|
||||
normalized.setdefault("end_time", None)
|
||||
normalized.setdefault("start_offset_seconds", clip_record.get("clip_start_seconds"))
|
||||
normalized.setdefault("end_offset_seconds", clip_record.get("clip_end_seconds"))
|
||||
normalized.setdefault("confidence", None)
|
||||
normalized.setdefault("severity", None)
|
||||
normalized.setdefault("attributes", {})
|
||||
normalized.setdefault(
|
||||
"evidence",
|
||||
{
|
||||
"clip_id": clip_record.get("clip_id"),
|
||||
"frame_paths": [
|
||||
frame.get("frame_path")
|
||||
for frame in clip_record.get("frame_times", [])
|
||||
if frame.get("frame_path")
|
||||
],
|
||||
},
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
def _video_path(video_record: dict[str, Any] | None) -> str | None:
|
||||
if not video_record:
|
||||
return None
|
||||
value = video_record.get("path") or video_record.get("source_path")
|
||||
return str(value) if value is not None else None
|
||||
67
video_ai_analysis_poc/timeline.py
Normal file
67
video_ai_analysis_poc/timeline.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
|
||||
|
||||
TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
|
||||
DEFAULT_TIMEZONE = "Asia/Shanghai"
|
||||
|
||||
|
||||
def format_beijing_time(
|
||||
epoch_seconds: float | int | str | None,
|
||||
*,
|
||||
offset_seconds: float | int = 0,
|
||||
timezone_name: str = DEFAULT_TIMEZONE,
|
||||
) -> str | None:
|
||||
epoch = _optional_float(epoch_seconds)
|
||||
if epoch is None:
|
||||
return None
|
||||
zone = _zone(timezone_name)
|
||||
timestamp = epoch + float(offset_seconds)
|
||||
return datetime.fromtimestamp(timestamp, tz=timezone.utc).astimezone(zone).strftime(
|
||||
TIME_FORMAT
|
||||
)
|
||||
|
||||
|
||||
def derive_time_from_reference(
|
||||
reference_time: str | None,
|
||||
*,
|
||||
reference_offset_seconds: float | int | None,
|
||||
target_offset_seconds: float | int | None,
|
||||
) -> str | None:
|
||||
if not reference_time:
|
||||
return None
|
||||
reference_offset = _optional_float(reference_offset_seconds)
|
||||
target_offset = _optional_float(target_offset_seconds)
|
||||
if reference_offset is None or target_offset is None:
|
||||
return None
|
||||
try:
|
||||
reference = datetime.strptime(reference_time, TIME_FORMAT)
|
||||
except ValueError:
|
||||
return None
|
||||
return (reference + timedelta(seconds=target_offset - reference_offset)).strftime(
|
||||
TIME_FORMAT
|
||||
)
|
||||
|
||||
|
||||
def timeline_start_epoch(record: dict[str, Any]) -> float | None:
|
||||
for key in ("actual_begin", "requested_begin"):
|
||||
value = _optional_float(record.get(key))
|
||||
if value is not None:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _zone(timezone_name: str) -> ZoneInfo:
|
||||
try:
|
||||
return ZoneInfo(timezone_name)
|
||||
except ZoneInfoNotFoundError:
|
||||
return ZoneInfo(DEFAULT_TIMEZONE)
|
||||
|
||||
|
||||
def _optional_float(value: Any) -> float | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
return float(value)
|
||||
134
video_ai_analysis_poc/vlm_client.py
Normal file
134
video_ai_analysis_poc/vlm_client.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import time
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
|
||||
HttpPost = Callable[[str, dict[str, Any], int], dict[str, Any]]
|
||||
|
||||
|
||||
def infer_clip(
|
||||
clip_record: dict[str, Any],
|
||||
output_dir: str | Path,
|
||||
vlm_config: dict[str, Any],
|
||||
prompt_config: dict[str, Any],
|
||||
*,
|
||||
http_post: HttpPost | None = None,
|
||||
) -> dict[str, Any]:
|
||||
start = time.monotonic()
|
||||
client = http_post or _post_json
|
||||
url = build_chat_url(vlm_config)
|
||||
payload = build_payload(clip_record, output_dir, vlm_config, prompt_config)
|
||||
response = client(url, payload, int(vlm_config.get("timeout_seconds", 120)))
|
||||
latency_ms = int((time.monotonic() - start) * 1000)
|
||||
return {
|
||||
"raw_response": _extract_message_content(response.get("body")),
|
||||
"http_status": response.get("status"),
|
||||
"latency_ms": latency_ms,
|
||||
}
|
||||
|
||||
|
||||
def build_chat_url(vlm_config: dict[str, Any]) -> str:
|
||||
return (
|
||||
str(vlm_config["api_base_url"]).rstrip("/")
|
||||
+ str(vlm_config["chat_completions_path"])
|
||||
)
|
||||
|
||||
|
||||
def build_payload(
|
||||
clip_record: dict[str, Any],
|
||||
output_dir: str | Path,
|
||||
vlm_config: dict[str, Any],
|
||||
prompt_config: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
content: list[dict[str, Any]] = [
|
||||
{"type": "text", "text": str(prompt_config.get("user", ""))}
|
||||
]
|
||||
for frame in clip_record.get("frame_times", []):
|
||||
frame_path = frame.get("frame_path")
|
||||
if not frame_path:
|
||||
continue
|
||||
content.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": _image_url(
|
||||
frame_path,
|
||||
output_dir,
|
||||
str(vlm_config.get("image_transport", "data_uri")),
|
||||
)
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"model": vlm_config.get("model"),
|
||||
"messages": [
|
||||
{"role": "system", "content": str(prompt_config.get("system", ""))},
|
||||
{"role": "user", "content": content},
|
||||
],
|
||||
"temperature": vlm_config.get("temperature", 0),
|
||||
"max_tokens": vlm_config.get("max_tokens", 512),
|
||||
}
|
||||
|
||||
|
||||
def _image_url(
|
||||
frame_path: str | Path,
|
||||
output_dir: str | Path,
|
||||
image_transport: str,
|
||||
) -> str:
|
||||
if image_transport != "data_uri":
|
||||
return str(frame_path)
|
||||
path = Path(frame_path).expanduser()
|
||||
if not path.is_absolute():
|
||||
path = Path(output_dir).expanduser() / path
|
||||
data = base64.b64encode(path.read_bytes()).decode("ascii")
|
||||
return f"data:{_mime_type(path)};base64,{data}"
|
||||
|
||||
|
||||
def _mime_type(path: Path) -> str:
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in {".jpg", ".jpeg"}:
|
||||
return "image/jpeg"
|
||||
if suffix == ".png":
|
||||
return "image/png"
|
||||
if suffix == ".webp":
|
||||
return "image/webp"
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def _post_json(
|
||||
url: str,
|
||||
payload: dict[str, Any],
|
||||
timeout_seconds: int,
|
||||
) -> dict[str, Any]:
|
||||
body = json.dumps(payload).encode("utf-8")
|
||||
request = urllib.request.Request(
|
||||
url,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
|
||||
response_body = response.read().decode("utf-8")
|
||||
return {
|
||||
"status": response.status,
|
||||
"body": json.loads(response_body) if response_body else {},
|
||||
}
|
||||
|
||||
|
||||
def _extract_message_content(body: Any) -> str:
|
||||
if not isinstance(body, dict):
|
||||
return ""
|
||||
choices = body.get("choices")
|
||||
if not choices:
|
||||
return ""
|
||||
message = choices[0].get("message", {}) if isinstance(choices[0], dict) else {}
|
||||
content = message.get("content", "")
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
return json.dumps(content, ensure_ascii=False)
|
||||
Reference in New Issue
Block a user