Initial video AI analysis project
This commit is contained in:
134
video_ai_analysis_poc/vlm_client.py
Normal file
134
video_ai_analysis_poc/vlm_client.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import time
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
|
||||
HttpPost = Callable[[str, dict[str, Any], int], dict[str, Any]]
|
||||
|
||||
|
||||
def infer_clip(
|
||||
clip_record: dict[str, Any],
|
||||
output_dir: str | Path,
|
||||
vlm_config: dict[str, Any],
|
||||
prompt_config: dict[str, Any],
|
||||
*,
|
||||
http_post: HttpPost | None = None,
|
||||
) -> dict[str, Any]:
|
||||
start = time.monotonic()
|
||||
client = http_post or _post_json
|
||||
url = build_chat_url(vlm_config)
|
||||
payload = build_payload(clip_record, output_dir, vlm_config, prompt_config)
|
||||
response = client(url, payload, int(vlm_config.get("timeout_seconds", 120)))
|
||||
latency_ms = int((time.monotonic() - start) * 1000)
|
||||
return {
|
||||
"raw_response": _extract_message_content(response.get("body")),
|
||||
"http_status": response.get("status"),
|
||||
"latency_ms": latency_ms,
|
||||
}
|
||||
|
||||
|
||||
def build_chat_url(vlm_config: dict[str, Any]) -> str:
|
||||
return (
|
||||
str(vlm_config["api_base_url"]).rstrip("/")
|
||||
+ str(vlm_config["chat_completions_path"])
|
||||
)
|
||||
|
||||
|
||||
def build_payload(
|
||||
clip_record: dict[str, Any],
|
||||
output_dir: str | Path,
|
||||
vlm_config: dict[str, Any],
|
||||
prompt_config: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
content: list[dict[str, Any]] = [
|
||||
{"type": "text", "text": str(prompt_config.get("user", ""))}
|
||||
]
|
||||
for frame in clip_record.get("frame_times", []):
|
||||
frame_path = frame.get("frame_path")
|
||||
if not frame_path:
|
||||
continue
|
||||
content.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": _image_url(
|
||||
frame_path,
|
||||
output_dir,
|
||||
str(vlm_config.get("image_transport", "data_uri")),
|
||||
)
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"model": vlm_config.get("model"),
|
||||
"messages": [
|
||||
{"role": "system", "content": str(prompt_config.get("system", ""))},
|
||||
{"role": "user", "content": content},
|
||||
],
|
||||
"temperature": vlm_config.get("temperature", 0),
|
||||
"max_tokens": vlm_config.get("max_tokens", 512),
|
||||
}
|
||||
|
||||
|
||||
def _image_url(
|
||||
frame_path: str | Path,
|
||||
output_dir: str | Path,
|
||||
image_transport: str,
|
||||
) -> str:
|
||||
if image_transport != "data_uri":
|
||||
return str(frame_path)
|
||||
path = Path(frame_path).expanduser()
|
||||
if not path.is_absolute():
|
||||
path = Path(output_dir).expanduser() / path
|
||||
data = base64.b64encode(path.read_bytes()).decode("ascii")
|
||||
return f"data:{_mime_type(path)};base64,{data}"
|
||||
|
||||
|
||||
def _mime_type(path: Path) -> str:
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in {".jpg", ".jpeg"}:
|
||||
return "image/jpeg"
|
||||
if suffix == ".png":
|
||||
return "image/png"
|
||||
if suffix == ".webp":
|
||||
return "image/webp"
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def _post_json(
|
||||
url: str,
|
||||
payload: dict[str, Any],
|
||||
timeout_seconds: int,
|
||||
) -> dict[str, Any]:
|
||||
body = json.dumps(payload).encode("utf-8")
|
||||
request = urllib.request.Request(
|
||||
url,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
|
||||
response_body = response.read().decode("utf-8")
|
||||
return {
|
||||
"status": response.status,
|
||||
"body": json.loads(response_body) if response_body else {},
|
||||
}
|
||||
|
||||
|
||||
def _extract_message_content(body: Any) -> str:
|
||||
if not isinstance(body, dict):
|
||||
return ""
|
||||
choices = body.get("choices")
|
||||
if not choices:
|
||||
return ""
|
||||
message = choices[0].get("message", {}) if isinstance(choices[0], dict) else {}
|
||||
content = message.get("content", "")
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
return json.dumps(content, ensure_ascii=False)
|
||||
Reference in New Issue
Block a user