Add QSC prompt and phase timings
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from contextlib import contextmanager
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
from typing import Callable, Iterator, Sequence, TypeVar
|
||||
|
||||
from .aggregator import aggregate_outputs
|
||||
from .clips import build_clip_records
|
||||
@@ -18,6 +21,64 @@ from .result_parser import build_clip_result
|
||||
from .timeline import DEFAULT_TIMEZONE, format_beijing_time, timeline_start_epoch
|
||||
from .vlm_client import infer_clip
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def _new_phase_timings() -> dict[str, object]:
|
||||
return {
|
||||
"schema_version": "phase-timings-v1",
|
||||
"started_at": _utc_now_iso(),
|
||||
"updated_at": _utc_now_iso(),
|
||||
"phases": {},
|
||||
}
|
||||
|
||||
|
||||
def _write_phase_timings(
|
||||
output_dir: Path,
|
||||
phase_timings: dict[str, object],
|
||||
) -> None:
|
||||
phase_timings["updated_at"] = _utc_now_iso()
|
||||
(output_dir / "phase_timings.json").write_text(
|
||||
json.dumps(phase_timings, ensure_ascii=False, sort_keys=True, indent=2) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _measure_phase(
|
||||
phase_timings: dict[str, object] | None,
|
||||
phase_name: str,
|
||||
func: Callable[[], T],
|
||||
) -> T:
|
||||
with _timed_phase(phase_timings, phase_name):
|
||||
return func()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _timed_phase(
|
||||
phase_timings: dict[str, object] | None,
|
||||
phase_name: str,
|
||||
) -> Iterator[None]:
|
||||
started = time.perf_counter()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if phase_timings is not None:
|
||||
phases = phase_timings.get("phases")
|
||||
if not isinstance(phases, dict):
|
||||
phases = {}
|
||||
phase_timings["phases"] = phases
|
||||
previous = phases.get(phase_name, 0)
|
||||
if not isinstance(previous, (int, float)):
|
||||
previous = 0
|
||||
phases[phase_name] = round(
|
||||
float(previous) + time.perf_counter() - started,
|
||||
6,
|
||||
)
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def main(argv: Sequence[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
@@ -43,6 +104,7 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
|
||||
output_dir = Path(config["output"]["dir"])
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
phase_timings = _new_phase_timings()
|
||||
|
||||
video_manifest_path = output_dir / "video_manifest.jsonl"
|
||||
resume_enabled = bool(config.get("output", {}).get("resume", False))
|
||||
@@ -63,11 +125,13 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
records,
|
||||
record_indexes,
|
||||
download_source=not args.dry_run,
|
||||
phase_timings=phase_timings,
|
||||
)
|
||||
except ValueError as exc:
|
||||
parser.error(str(exc))
|
||||
|
||||
write_manifest(video_manifest_path, records)
|
||||
_write_phase_timings(output_dir, phase_timings)
|
||||
if args.dry_run:
|
||||
return 0
|
||||
|
||||
@@ -93,27 +157,29 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
if record.get("status") == "sampled" and record.get("video_id")
|
||||
}
|
||||
changed_frame_video_ids: set[str] = set(backfilled_frame_video_ids)
|
||||
for record in records:
|
||||
if record.get("status") != "probed":
|
||||
continue
|
||||
video_id = str(record.get("video_id"))
|
||||
if args.until == "inference" and video_id in existing_clip_video_ids:
|
||||
continue
|
||||
if video_id in existing_sampled_video_ids:
|
||||
continue
|
||||
frame_records = _without_video_records(frame_records, video_id)
|
||||
ffmpeg_config = dict(config["ffmpeg"])
|
||||
ffmpeg_config["timezone"] = timezone_name
|
||||
frame_records.extend(
|
||||
sample_video_frames(
|
||||
record,
|
||||
output_dir,
|
||||
ffmpeg_config,
|
||||
manifest_path=None,
|
||||
with _timed_phase(phase_timings, "frame_sampling_seconds"):
|
||||
for record in records:
|
||||
if record.get("status") != "probed":
|
||||
continue
|
||||
video_id = str(record.get("video_id"))
|
||||
if args.until == "inference" and video_id in existing_clip_video_ids:
|
||||
continue
|
||||
if video_id in existing_sampled_video_ids:
|
||||
continue
|
||||
frame_records = _without_video_records(frame_records, video_id)
|
||||
ffmpeg_config = dict(config["ffmpeg"])
|
||||
ffmpeg_config["timezone"] = timezone_name
|
||||
frame_records.extend(
|
||||
sample_video_frames(
|
||||
record,
|
||||
output_dir,
|
||||
ffmpeg_config,
|
||||
manifest_path=None,
|
||||
)
|
||||
)
|
||||
)
|
||||
changed_frame_video_ids.add(video_id)
|
||||
changed_frame_video_ids.add(video_id)
|
||||
write_manifest(frame_manifest_path, frame_records)
|
||||
_write_phase_timings(output_dir, phase_timings)
|
||||
|
||||
sampled_video_ids = {
|
||||
str(record.get("video_id"))
|
||||
@@ -133,22 +199,28 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
for record in frame_records
|
||||
if str(record.get("video_id")) in clip_rebuild_video_ids
|
||||
]
|
||||
clip_records.extend(build_clip_records(frames_to_build, config["clip"]))
|
||||
with _timed_phase(phase_timings, "clip_generation_seconds"):
|
||||
clip_records.extend(build_clip_records(frames_to_build, config["clip"]))
|
||||
write_manifest(output_dir / "clip_manifest.jsonl", clip_records)
|
||||
_write_phase_timings(output_dir, phase_timings)
|
||||
if args.until == "clips":
|
||||
return 0
|
||||
|
||||
_run_inference(
|
||||
clip_records,
|
||||
records,
|
||||
output_dir,
|
||||
config,
|
||||
limit_clips=args.limit_clips,
|
||||
resume=resume_enabled,
|
||||
)
|
||||
with _timed_phase(phase_timings, "inference_seconds"):
|
||||
_run_inference(
|
||||
clip_records,
|
||||
records,
|
||||
output_dir,
|
||||
config,
|
||||
limit_clips=args.limit_clips,
|
||||
resume=resume_enabled,
|
||||
)
|
||||
_write_phase_timings(output_dir, phase_timings)
|
||||
if args.until == "inference":
|
||||
return 0
|
||||
aggregate_outputs(output_dir, config)
|
||||
with _timed_phase(phase_timings, "aggregation_seconds"):
|
||||
aggregate_outputs(output_dir, config)
|
||||
_write_phase_timings(output_dir, phase_timings)
|
||||
return 0
|
||||
|
||||
|
||||
@@ -175,33 +247,40 @@ def _acquire_source_records(
|
||||
record_indexes: dict[str, int],
|
||||
*,
|
||||
download_source: bool = True,
|
||||
phase_timings: dict[str, object] | None = None,
|
||||
) -> None:
|
||||
for source_record in _source_video_records(
|
||||
config,
|
||||
output_dir,
|
||||
download_source=download_source,
|
||||
):
|
||||
path = source_record.get("path")
|
||||
if not path:
|
||||
continue
|
||||
video_id = stable_video_id(str(path))
|
||||
existing_index = record_indexes.get(video_id)
|
||||
if (
|
||||
existing_index is not None
|
||||
and records[existing_index].get("status") == "probed"
|
||||
):
|
||||
continue
|
||||
|
||||
probe_record = probe_video(
|
||||
str(path),
|
||||
timeout_seconds=config["ffprobe"]["timeout_seconds"],
|
||||
source_records = _measure_phase(
|
||||
phase_timings,
|
||||
"source_acquisition_seconds",
|
||||
lambda: _source_video_records(
|
||||
config,
|
||||
output_dir,
|
||||
download_source=download_source,
|
||||
)
|
||||
record = {**source_record, **probe_record, "video_id": video_id}
|
||||
if existing_index is None:
|
||||
record_indexes[video_id] = len(records)
|
||||
records.append(record)
|
||||
else:
|
||||
records[existing_index] = record
|
||||
)
|
||||
with _timed_phase(phase_timings, "video_probe_seconds"):
|
||||
for source_record in source_records:
|
||||
path = source_record.get("path")
|
||||
if not path:
|
||||
continue
|
||||
video_id = stable_video_id(str(path))
|
||||
existing_index = record_indexes.get(video_id)
|
||||
if (
|
||||
existing_index is not None
|
||||
and records[existing_index].get("status") == "probed"
|
||||
):
|
||||
continue
|
||||
|
||||
probe_record = probe_video(
|
||||
str(path),
|
||||
timeout_seconds=config["ffprobe"]["timeout_seconds"],
|
||||
)
|
||||
record = {**source_record, **probe_record, "video_id": video_id}
|
||||
if existing_index is None:
|
||||
record_indexes[video_id] = len(records)
|
||||
records.append(record)
|
||||
else:
|
||||
records[existing_index] = record
|
||||
|
||||
|
||||
def _source_video_records(
|
||||
|
||||
@@ -63,6 +63,7 @@ def build_clip_result(
|
||||
"status": result_status,
|
||||
"monitoring_timeline": timeline,
|
||||
"events": _events(payload, clip_record) if result_status == "ok" else [],
|
||||
"qsc_events": _qsc_events(payload) if result_status == "ok" else [],
|
||||
"raw_response": raw_response,
|
||||
"processing": processing_record,
|
||||
"error": result_error,
|
||||
@@ -131,6 +132,17 @@ def _event(
|
||||
return normalized
|
||||
|
||||
|
||||
def _qsc_events(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
raw_events = payload.get("qsc_events") or []
|
||||
if not isinstance(raw_events, list):
|
||||
return []
|
||||
return [
|
||||
dict(event)
|
||||
for event in raw_events
|
||||
if isinstance(event, dict)
|
||||
]
|
||||
|
||||
|
||||
def _video_path(video_record: dict[str, Any] | None) -> str | None:
|
||||
if not video_record:
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user