feat: add deployment configuration and scripts for managed-portal, including Dockerfiles and environment settings

This commit is contained in:
2026-05-13 16:49:21 +08:00
parent 330373b8f1
commit f8a6d9803d
13 changed files with 563 additions and 71 deletions

View File

@@ -0,0 +1,4 @@
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
COPY rootfs/ /

View File

@@ -0,0 +1,75 @@
name: managed-portal
services:
managed-portal:
image: ${MANAGED_PORTAL_IMAGE:?MANAGED_PORTAL_IMAGE is required}
container_name: managed-portal
restart: unless-stopped
depends_on:
- store-dwell-alert
- people-flow-project
environment:
TZ: ${TZ:-Asia/Shanghai}
MANAGED_PORTAL_HTTP_ADDR: ":9080"
MANAGED_PORTAL_REGISTRY_PATH: "/app/managed_services.yaml"
volumes:
- /var/run/docker.sock:/var/run/docker.sock
networks:
- managed-portal
store-dwell-alert:
image: ${STORE_DWELL_ALERT_IMAGE:?STORE_DWELL_ALERT_IMAGE is required}
container_name: store-dwell-alert
restart: unless-stopped
environment:
TZ: ${TZ:-Asia/Shanghai}
CAMERA_ID: ${MANAGED_STORE_DWELL_CAMERA_ID:-store_cam_01}
RTSP_URL: ${MANAGED_STORE_DWELL_RTSP_URL:-}
EVENT_SINK_PATH: ${MANAGED_STORE_DWELL_EVENT_SINK_PATH:-logs/events.jsonl}
API_HOST: 0.0.0.0
API_PORT: 18081
CONFIG_PATH: /app/config/local.yaml
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- ${MANAGED_STORE_DWELL_CONFIG_DIR:-../managed/store_dwell_alert/config}:/app/config
- ${MANAGED_STORE_DWELL_DATA_DIR:-../managed/store_dwell_alert/data}:/app/data
networks:
- managed-portal
people-flow-project:
image: ${PEOPLE_FLOW_PROJECT_IMAGE:?PEOPLE_FLOW_PROJECT_IMAGE is required}
container_name: people-flow-project
restart: unless-stopped
gpus: all
shm_size: "1gb"
environment:
TZ: ${TZ:-Asia/Shanghai}
CONFIG_PATH: /opt/people-flow/config/local.yaml
RTSP_URL: ${MANAGED_PEOPLE_FLOW_RTSP_URL:-}
OUTPUT_DIR: /opt/people-flow/outputs
API_HOST: 0.0.0.0
API_PORT: 18082
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- ${MANAGED_PEOPLE_FLOW_CONFIG_DIR:-../managed/people_flow_project/config}:/opt/people-flow/config
- ${MANAGED_PEOPLE_FLOW_OUTPUT_DIR:-../managed/people_flow_project/outputs}:/opt/people-flow/outputs
- ${MANAGED_PEOPLE_FLOW_WEIGHTS_DIR:-../managed/people_flow_project/weights}:/opt/people-flow/weights
networks:
- managed-portal
managed-portal-web:
image: ${MANAGED_PORTAL_WEB_IMAGE:?MANAGED_PORTAL_WEB_IMAGE is required}
container_name: managed-portal-web
restart: unless-stopped
depends_on:
- managed-portal
ports:
- "${MANAGED_PORTAL_WEB_PORT:-13000}:80"
networks:
- managed-portal
networks:
managed-portal:
driver: bridge

View File

@@ -66,6 +66,7 @@ services:
volumes: volumes:
- ${MANAGED_PEOPLE_FLOW_CONFIG_DIR:-../managed/people_flow_project/config}:/opt/people-flow/config - ${MANAGED_PEOPLE_FLOW_CONFIG_DIR:-../managed/people_flow_project/config}:/opt/people-flow/config
- ${MANAGED_PEOPLE_FLOW_OUTPUT_DIR:-../managed/people_flow_project/outputs}:/opt/people-flow/outputs - ${MANAGED_PEOPLE_FLOW_OUTPUT_DIR:-../managed/people_flow_project/outputs}:/opt/people-flow/outputs
- ${MANAGED_PEOPLE_FLOW_WEIGHTS_DIR:-../managed/people_flow_project/weights}:/opt/people-flow/weights
networks: networks:
- managed-portal - managed-portal

View File

@@ -0,0 +1,111 @@
#!/usr/bin/env sh
set -eu
RELEASE_VERSION="${RELEASE_VERSION:-20260513-330373b-11}"
BASE_URL="${BASE_URL:-http://10.8.0.1/ai_deploy}"
BUNDLE_NAME="${BUNDLE_NAME:-managed-portal-${RELEASE_VERSION}.zip}"
INSTALL_ROOT="${INSTALL_ROOT:-/opt/managed-portal-releases}"
TARGET_DIR="${TARGET_DIR:-${INSTALL_ROOT}/managed-portal-${RELEASE_VERSION}}"
require_command() {
if ! command -v "$1" >/dev/null 2>&1; then
echo "missing required command: $1" >&2
exit 1
fi
}
run_compose() {
if command -v docker-compose >/dev/null 2>&1; then
docker-compose "$@"
return 0
fi
docker compose "$@"
}
download_bundle() {
tmp_dir="$1"
bundle_zip="$tmp_dir/$BUNDLE_NAME"
bundle_url="${BASE_URL%/}/$BUNDLE_NAME"
echo "downloading $bundle_url" >&2
curl -fL "$bundle_url" -o "$bundle_zip"
echo "$bundle_zip"
}
build_overlay_image() {
overlay_name="$1"
base_image="$2"
overlay_root="$3"
overlay_image="$4"
overlay_context="$(dirname "$overlay_root")"
if [ ! -d "$overlay_root" ]; then
printf '%s\n' "$base_image"
return 0
fi
if [ -z "$(find "$overlay_root" -mindepth 1 -print -quit)" ]; then
printf '%s\n' "$base_image"
return 0
fi
echo "building runtime overlay for $overlay_name" >&2
docker build \
-f "$TARGET_DIR/deploy/Dockerfile.runtime-overlay" \
--build-arg "BASE_IMAGE=$base_image" \
-t "$overlay_image" \
"$overlay_context" >/dev/null
printf '%s\n' "$overlay_image"
}
require_command curl
require_command unzip
require_command docker
tmp_dir="$(mktemp -d)"
trap 'rm -rf "$tmp_dir"' EXIT INT TERM
mkdir -p "$INSTALL_ROOT"
bundle_zip="$(download_bundle "$tmp_dir")"
rm -rf "$TARGET_DIR"
unzip -oq "$bundle_zip" -d "$INSTALL_ROOT"
if [ ! -f "$TARGET_DIR/release-manifest.env" ]; then
echo "release-manifest.env not found in $TARGET_DIR" >&2
exit 1
fi
set -a
. "$TARGET_DIR/release-manifest.env"
set +a
echo "pulling release images"
docker pull "$MANAGED_PORTAL_IMAGE"
docker pull "$MANAGED_PORTAL_WEB_IMAGE"
docker pull "$PEOPLE_FLOW_PROJECT_IMAGE"
docker pull "$STORE_DWELL_ALERT_IMAGE"
PEOPLE_FLOW_PROJECT_IMAGE="$(build_overlay_image \
people-flow-project \
"$PEOPLE_FLOW_PROJECT_IMAGE" \
"$TARGET_DIR/runtime-overlays/people-flow-project/rootfs" \
"managed-portal-runtime/people-flow-project:${RELEASE_VERSION}")"
STORE_DWELL_ALERT_IMAGE="$(build_overlay_image \
store-dwell-alert \
"$STORE_DWELL_ALERT_IMAGE" \
"$TARGET_DIR/runtime-overlays/store-dwell-alert/rootfs" \
"managed-portal-runtime/store-dwell-alert:${RELEASE_VERSION}")"
export MANAGED_PORTAL_IMAGE
export MANAGED_PORTAL_WEB_IMAGE
export PEOPLE_FLOW_PROJECT_IMAGE
export STORE_DWELL_ALERT_IMAGE
cd "$TARGET_DIR/deploy"
run_compose \
--env-file managed-portal.release.env \
-f docker-compose.ota-release.yml \
up -d
echo "release installed under $TARGET_DIR"

View File

@@ -0,0 +1,14 @@
IMAGE_VERSION=dev
TZ=Asia/Shanghai
MANAGED_PORTAL_WEB_PORT=13000
MANAGED_STORE_DWELL_CAMERA_ID=cam_192_168_1_10
MANAGED_STORE_DWELL_RTSP_URL=rtsp://admin:Zxjp2026@192.168.1.10:554/Streaming/Channels/101
MANAGED_STORE_DWELL_EVENT_SINK_PATH=logs/events.jsonl
MANAGED_STORE_DWELL_CONFIG_DIR=../managed/store_dwell_alert/config
MANAGED_STORE_DWELL_DATA_DIR=../managed/store_dwell_alert/data
MANAGED_PEOPLE_FLOW_RTSP_URL=rtsp://admin:Zxjp2026@192.168.1.10:554/Streaming/Channels/101
MANAGED_PEOPLE_FLOW_CONFIG_DIR=../managed/people_flow_project/config
MANAGED_PEOPLE_FLOW_OUTPUT_DIR=../managed/people_flow_project/outputs
MANAGED_PEOPLE_FLOW_WEIGHTS_DIR=/home/xiaozheng/people_flow_project/weights

View File

@@ -3,7 +3,7 @@ FROM swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/library/python:3.12-slim
ENV PYTHONDONTWRITEBYTECODE=1 \ ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \ PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \ PIP_NO_CACHE_DIR=1 \
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \ PIP_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/ \
DEEPFACE_HOME=/root/.deepface \ DEEPFACE_HOME=/root/.deepface \
TF_CPP_MIN_LOG_LEVEL=2 TF_CPP_MIN_LOG_LEVEL=2
@@ -19,8 +19,7 @@ RUN sed -i 's|http://deb.debian.org/debian|http://mirrors.aliyun.com/debian|g; s
COPY requirements-docker.txt ./requirements-docker.txt COPY requirements-docker.txt ./requirements-docker.txt
RUN python -m pip install --upgrade pip setuptools wheel && \ RUN pip install "numpy<2"
pip install "numpy<2"
RUN pip install --extra-index-url https://download.pytorch.org/whl/cpu \ RUN pip install --extra-index-url https://download.pytorch.org/whl/cpu \
"torch==2.6.0+cpu" "torchvision==0.21.0+cpu" "torch==2.6.0+cpu" "torchvision==0.21.0+cpu"
@@ -32,17 +31,12 @@ RUN pip install -r requirements-docker.txt
COPY . . COPY . .
COPY scripts/docker-entrypoint.sh /opt/people-flow/scripts/docker-entrypoint.sh COPY scripts/docker-entrypoint.sh /opt/people-flow/scripts/docker-entrypoint.sh
RUN test -f /opt/people-flow/weights/yolo11n.pt && \ RUN mkdir -p /root/.deepface/weights /opt/people-flow/outputs && \
test -f /opt/people-flow/weights/deepface/age_model_weights.h5 && \
test -f /opt/people-flow/weights/deepface/gender_model_weights.h5 && \
test -f /opt/people-flow/weights/deepface/retinaface.h5 && \
mkdir -p /root/.deepface/weights /opt/people-flow/outputs && \
cp /opt/people-flow/weights/deepface/*.h5 /root/.deepface/weights/ && \
chmod +x /opt/people-flow/scripts/docker-entrypoint.sh chmod +x /opt/people-flow/scripts/docker-entrypoint.sh
EXPOSE 18082 EXPOSE 18082
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \ HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:18082/api/manage/health', timeout=3).read()" || exit 1 CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:18082/api/manage/health', timeout=3).read()" || exit 1
ENTRYPOINT ["/opt/people-flow/scripts/docker-entrypoint.sh"] ENTRYPOINT ["/opt/people-flow/scripts/docker-entrypoint.sh"]

View File

@@ -8,8 +8,15 @@ OUTPUT_DIR="${OUTPUT_DIR:-${PROJECT_DIR}/outputs}"
RTSP_URL="${RTSP_URL:-}" RTSP_URL="${RTSP_URL:-}"
API_HOST="${API_HOST:-0.0.0.0}" API_HOST="${API_HOST:-0.0.0.0}"
API_PORT="${API_PORT:-18082}" API_PORT="${API_PORT:-18082}"
RTSP_STALL_TIMEOUT_SECONDS="${RTSP_STALL_TIMEOUT_SECONDS:-180}"
DEEPFACE_CACHE_DIR="/root/.deepface/weights"
DEEPFACE_SOURCE_DIR="${PROJECT_DIR}/weights/deepface"
mkdir -p "${OUTPUT_DIR}" "$(dirname "${CONFIG_PATH}")" mkdir -p "${OUTPUT_DIR}" "$(dirname "${CONFIG_PATH}")" "${DEEPFACE_CACHE_DIR}"
if [ -d "${DEEPFACE_SOURCE_DIR}" ]; then
find "${DEEPFACE_SOURCE_DIR}" -maxdepth 1 -name '*.h5' -exec cp {} "${DEEPFACE_CACHE_DIR}/" \;
fi
if [ ! -f "${CONFIG_PATH}" ]; then if [ ! -f "${CONFIG_PATH}" ]; then
cp "${CONFIG_TEMPLATE}" "${CONFIG_PATH}" cp "${CONFIG_TEMPLATE}" "${CONFIG_PATH}"
@@ -37,13 +44,31 @@ config_path.write_text(
) )
PY PY
exec python - "$CONFIG_PATH" "$API_HOST" "$API_PORT" <<'PY' RTSP_OUTPUT_SUBDIR="$(python - "$CONFIG_PATH" <<'PY'
from pathlib import Path
import sys
import yaml
config_path = Path(sys.argv[1])
raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
rtsp = raw.get("rtsp") or {}
print(rtsp.get("output_subdir", "rtsp_stream"))
PY
)"
RTSP_STATUS_PATH="${OUTPUT_DIR}/${RTSP_OUTPUT_SUBDIR}/worker_status.json"
exec python - "$CONFIG_PATH" "$API_HOST" "$API_PORT" "$RTSP_STATUS_PATH" "$RTSP_STALL_TIMEOUT_SECONDS" <<'PY'
from pathlib import Path
import signal import signal
import subprocess import subprocess
import sys import sys
import time import time
config_path, api_host, api_port = sys.argv[1:4] from src.people_flow.worker_status import worker_status_stall_reason
config_path, api_host, api_port, status_path_raw, stall_timeout_raw = sys.argv[1:6]
status_path = Path(status_path_raw)
stall_timeout_seconds = max(float(stall_timeout_raw), 30.0)
commands = [ commands = [
[sys.executable, "main.py", "--config", config_path, "rtsp"], [sys.executable, "main.py", "--config", config_path, "rtsp"],
[ [
@@ -59,21 +84,38 @@ commands = [
], ],
] ]
processes = [subprocess.Popen(command) for command in commands] processes = [subprocess.Popen(command) for command in commands]
supervisor_started_at = time.time()
def terminate_all(signum, _frame): def stop_all(excluded_index=None):
for process in processes: for index, process in enumerate(processes):
if process.poll() is None: if index == excluded_index or process.poll() is not None:
process.terminate() continue
process.terminate()
deadline = time.time() + 10 deadline = time.time() + 10
for process in processes: for index, process in enumerate(processes):
if process.poll() is not None: if index == excluded_index or process.poll() is not None:
continue continue
timeout = max(0, deadline - time.time()) timeout = max(0, deadline - time.time())
try: try:
process.wait(timeout=timeout) process.wait(timeout=timeout)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
process.kill() process.kill()
def stale_reason():
if processes[0].poll() is not None:
return None
return worker_status_stall_reason(
status_path,
started_at=supervisor_started_at,
max_age_seconds=stall_timeout_seconds,
)
def terminate_all(signum, _frame):
stop_all()
raise SystemExit(128 + signum) raise SystemExit(128 + signum)
@@ -85,19 +127,12 @@ while True:
return_code = process.poll() return_code = process.poll()
if return_code is None: if return_code is None:
continue continue
for other_index, other_process in enumerate(processes): stop_all(excluded_index=index)
if other_index == index or other_process.poll() is not None:
continue
other_process.terminate()
deadline = time.time() + 10
for other_index, other_process in enumerate(processes):
if other_index == index or other_process.poll() is not None:
continue
timeout = max(0, deadline - time.time())
try:
other_process.wait(timeout=timeout)
except subprocess.TimeoutExpired:
other_process.kill()
raise SystemExit(return_code) raise SystemExit(return_code)
reason = stale_reason()
if reason is not None:
print(reason, flush=True)
stop_all()
raise SystemExit(1)
time.sleep(0.5) time.sleep(0.5)
PY PY

View File

@@ -24,6 +24,7 @@ from .queue_analytics import QueueWindowTracker
from .tracking import extract_person_tracks from .tracking import extract_person_tracks
from .window_identity import WindowIdentityResolver from .window_identity import WindowIdentityResolver
from .webhook import dispatch_json_event from .webhook import dispatch_json_event
from .worker_status import write_worker_status
SUPPORTED_EXTENSIONS = {".mp4", ".mov", ".mkv", ".avi"} SUPPORTED_EXTENSIONS = {".mp4", ".mov", ".mkv", ".avi"}
@@ -157,6 +158,7 @@ class PeopleFlowPipeline:
def process_rtsp(self, source: str) -> dict: def process_rtsp(self, source: str) -> dict:
rtsp_paths = self.get_rtsp_output_paths() rtsp_paths = self.get_rtsp_output_paths()
status_path = rtsp_paths["root"] / "worker_status.json"
sample_interval = max(float(self.config.rtsp.sample_interval_seconds), 0.01) sample_interval = max(float(self.config.rtsp.sample_interval_seconds), 0.01)
window_seconds = max(int(self.config.rtsp.window_seconds), 1) window_seconds = max(int(self.config.rtsp.window_seconds), 1)
reconnect_delay = max(float(self.config.rtsp.reconnect_delay_seconds), 0.1) reconnect_delay = max(float(self.config.rtsp.reconnect_delay_seconds), 0.1)
@@ -189,8 +191,39 @@ class PeopleFlowPipeline:
if not Path(self.config.webhook.event_log_path).is_absolute() if not Path(self.config.webhook.event_log_path).is_absolute()
else Path(self.config.webhook.event_log_path) else Path(self.config.webhook.event_log_path)
) )
last_status_phase: str | None = None
last_status_written_at = 0.0
def update_status(
phase: str,
*,
force: bool = False,
note: str | None = None,
) -> None:
nonlocal last_status_phase, last_status_written_at
current_time = time.monotonic()
if (
not force
and phase == last_status_phase
and current_time - last_status_written_at < 5.0
):
return
write_worker_status(
status_path,
phase,
source=source,
window_index=window_index,
frame_index=frame_index,
last_processed_at=last_processed_wall_time,
note=note,
)
last_status_phase = phase
last_status_written_at = current_time
try: try:
update_status("starting", force=True)
while True: while True:
now = datetime.now().astimezone() now = datetime.now().astimezone()
while now >= window_end: while now >= window_end:
@@ -215,6 +248,7 @@ class PeopleFlowPipeline:
webhook_url=self.config.webhook.url, webhook_url=self.config.webhook.url,
timeout_seconds=self.config.webhook.timeout_seconds, timeout_seconds=self.config.webhook.timeout_seconds,
) )
update_status("window_flushed", force=True)
print(f"window_json={json_path}", flush=True) print(f"window_json={json_path}", flush=True)
print(f"window_total_people={payload['total_people']}", flush=True) print(f"window_total_people={payload['total_people']}", flush=True)
window_index += 1 window_index += 1
@@ -229,13 +263,25 @@ class PeopleFlowPipeline:
now = datetime.now().astimezone() now = datetime.now().astimezone()
if capture is None or not capture.isOpened(): if capture is None or not capture.isOpened():
update_status("opening_stream")
capture = self._open_rtsp_capture(source, open_timeout_seconds) capture = self._open_rtsp_capture(source, open_timeout_seconds)
if capture is None: if capture is None:
update_status(
"waiting_to_reconnect",
force=True,
note="open_failed",
)
time.sleep(reconnect_delay) time.sleep(reconnect_delay)
continue continue
update_status("reading_frame")
ok, frame = capture.read() ok, frame = capture.read()
if not ok or frame is None: if not ok or frame is None:
update_status(
"waiting_to_reconnect",
force=True,
note="read_failed",
)
capture.release() capture.release()
capture = None capture = None
time.sleep(reconnect_delay) time.sleep(reconnect_delay)
@@ -251,6 +297,7 @@ class PeopleFlowPipeline:
self.config.queue, self.config.queue,
self.config.queue.to_pixel_area(width=width, height=height), self.config.queue.to_pixel_area(width=width, height=height),
) )
update_status("capture_ready", force=True)
current_time = time.monotonic() current_time = time.monotonic()
if current_time - last_processed_at < sample_interval: if current_time - last_processed_at < sample_interval:
@@ -258,6 +305,7 @@ class PeopleFlowPipeline:
time.sleep(idle_sleep) time.sleep(idle_sleep)
continue continue
update_status("tracking_frame")
last_processed_at = current_time last_processed_at = current_time
observations = self._track_frame(frame) observations = self._track_frame(frame)
person_keys = identity_resolver.resolve(frame, observations) person_keys = identity_resolver.resolve(frame, observations)
@@ -281,9 +329,11 @@ class PeopleFlowPipeline:
next_heartbeat_at = current_time + 60.0 next_heartbeat_at = current_time + 60.0
last_processed_wall_time = now last_processed_wall_time = now
frame_index += 1 frame_index += 1
update_status("processed_frame", force=True)
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
finally: finally:
update_status("stopped", force=True)
if capture is not None: if capture is not None:
capture.release() capture.release()

View File

@@ -0,0 +1,84 @@
from __future__ import annotations
import json
from datetime import datetime
from pathlib import Path
def write_worker_status(
path: Path,
phase: str,
*,
source: str,
window_index: int,
frame_index: int,
last_processed_at: datetime | None,
note: str | None = None,
) -> dict:
path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"phase": phase,
"updated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
"source": source,
"window_index": window_index,
"frame_index": frame_index,
"last_processed_at": (
last_processed_at.isoformat(timespec="seconds")
if last_processed_at is not None
else None
),
}
if note:
payload["note"] = note
temp_path = path.with_suffix(path.suffix + ".tmp")
temp_path.write_text(
json.dumps(payload, ensure_ascii=True, indent=2),
encoding="utf-8",
)
temp_path.replace(path)
return payload
def load_worker_status(path: Path) -> dict | None:
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except (FileNotFoundError, json.JSONDecodeError):
return None
return payload if isinstance(payload, dict) else None
def worker_status_age_seconds(path: Path, now: float | None = None) -> float | None:
try:
stat_result = path.stat()
except FileNotFoundError:
return None
current_time = datetime.now().timestamp() if now is None else now
return max(0.0, current_time - stat_result.st_mtime)
def worker_status_stall_reason(
path: Path,
*,
started_at: float,
max_age_seconds: float,
now: float | None = None,
) -> str | None:
current_time = datetime.now().timestamp() if now is None else now
age_seconds = worker_status_age_seconds(path, now=current_time)
if age_seconds is None:
if current_time - started_at < max_age_seconds:
return None
return f"rtsp worker status missing path={path}"
if age_seconds <= max_age_seconds:
return None
payload = load_worker_status(path) or {}
phase = payload.get("phase", "unknown")
updated_at = payload.get("updated_at", "unknown")
return (
f"rtsp worker stalled path={path} phase={phase} "
f"updated_at={updated_at} age_seconds={age_seconds:.1f}"
)

View File

@@ -0,0 +1,101 @@
from __future__ import annotations
import os
from datetime import datetime
from pathlib import Path
from src.people_flow.worker_status import (
load_worker_status,
worker_status_age_seconds,
worker_status_stall_reason,
write_worker_status,
)
def test_write_worker_status_persists_progress(tmp_path: Path):
status_path = tmp_path / "outputs" / "rtsp_stream" / "worker_status.json"
last_processed_at = datetime(2026, 5, 13, 16, 30, 0).astimezone()
write_worker_status(
status_path,
"processed_frame",
source="rtsp://camera/stream",
window_index=3,
frame_index=42,
last_processed_at=last_processed_at,
note="healthy",
)
payload = load_worker_status(status_path)
assert payload is not None
assert payload["phase"] == "processed_frame"
assert payload["source"] == "rtsp://camera/stream"
assert payload["window_index"] == 3
assert payload["frame_index"] == 42
assert payload["last_processed_at"] == last_processed_at.isoformat(
timespec="seconds"
)
assert payload["note"] == "healthy"
assert "updated_at" in payload
def test_worker_status_age_seconds_uses_file_mtime(tmp_path: Path):
status_path = tmp_path / "worker_status.json"
write_worker_status(
status_path,
"tracking_frame",
source="rtsp://camera/stream",
window_index=0,
frame_index=0,
last_processed_at=None,
)
os.utime(status_path, (100.0, 100.0))
assert worker_status_age_seconds(status_path, now=280.0) == 180.0
assert worker_status_age_seconds(tmp_path / "missing.json", now=280.0) is None
def test_worker_status_stall_reason_reports_missing_and_stale_status(tmp_path: Path):
missing_path = tmp_path / "missing.json"
assert (
worker_status_stall_reason(
missing_path,
started_at=150.0,
max_age_seconds=180.0,
now=300.0,
)
is None
)
assert "status missing" in worker_status_stall_reason(
missing_path,
started_at=0.0,
max_age_seconds=180.0,
now=300.0,
)
status_path = tmp_path / "worker_status.json"
write_worker_status(
status_path,
"tracking_frame",
source="rtsp://camera/stream",
window_index=0,
frame_index=2,
last_processed_at=None,
)
os.utime(status_path, (100.0, 100.0))
reason = worker_status_stall_reason(
status_path,
started_at=0.0,
max_age_seconds=180.0,
now=300.0,
)
assert reason is not None
assert "status=missing" not in reason
assert "phase=tracking_frame" in reason
assert "age_seconds=200.0" in reason

View File

@@ -3,23 +3,22 @@ FROM swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/library/python:3.12-slim
ENV PYTHONDONTWRITEBYTECODE=1 \ ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \ PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \ PIP_NO_CACHE_DIR=1 \
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple PIP_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/
WORKDIR /app WORKDIR /app
RUN sed -i 's|http://deb.debian.org/debian|http://mirrors.aliyun.com/debian|g; s|http://deb.debian.org/debian-security|http://mirrors.aliyun.com/debian-security|g' /etc/apt/sources.list.d/debian.sources \ RUN sed -i 's|http://deb.debian.org/debian|http://mirrors.aliyun.com/debian|g; s|http://deb.debian.org/debian-security|http://mirrors.aliyun.com/debian-security|g' /etc/apt/sources.list.d/debian.sources \
&& apt-get update \ && apt-get update \
&& apt-get install -y --no-install-recommends \ && apt-get install -y --no-install-recommends \
ffmpeg \ ffmpeg \
libgl1 \ libgl1 \
libglib2.0-0 \ libglib2.0-0 \
libgomp1 \ libgomp1 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
COPY requirements.txt /app/requirements.txt COPY requirements.txt /app/requirements.txt
RUN python -m pip install --upgrade pip setuptools wheel \ RUN python -m pip install --extra-index-url https://download.pytorch.org/whl/cpu \
&& python -m pip install --extra-index-url https://download.pytorch.org/whl/cpu \ "torch==2.6.0+cpu" "torchvision==0.21.0+cpu" \
"torch==2.6.0+cpu" "torchvision==0.21.0+cpu" \
&& python -m pip install -r /app/requirements.txt && python -m pip install -r /app/requirements.txt
COPY app /app/app COPY app /app/app
@@ -35,6 +34,6 @@ RUN test -f /app/weights/yolo11n.pt \
EXPOSE 18081 EXPOSE 18081
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:18081/api/manage/health', timeout=3).read()" || exit 1 CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:18081/api/manage/health', timeout=3).read()" || exit 1
ENTRYPOINT ["/app/scripts/docker-entrypoint.sh"] ENTRYPOINT ["/app/scripts/docker-entrypoint.sh"]

View File

@@ -13,3 +13,7 @@
- Trigger: the user clarified that this repository is meant to run in mainland China environments. - Trigger: the user clarified that this repository is meant to run in mainland China environments.
- Rule: future code, build, deployment, and integration changes must consider mainland China network accessibility and should prefer China-friendly defaults where practical. - Rule: future code, build, deployment, and integration changes must consider mainland China network accessibility and should prefer China-friendly defaults where practical.
- Preventive action: when adding dependencies, mirrors, external endpoints, or download flows, explicitly check whether the default path works reliably in mainland China and add configuration or fallback when needed. - Preventive action: when adding dependencies, mirrors, external endpoints, or download flows, explicitly check whether the default path works reliably in mainland China and add configuration or fallback when needed.
- Trigger: the user required deployment to use `docker compose` only and explicitly disallowed host environment changes.
- Rule: for remote rollout tasks in this repo, prefer repository-contained `docker compose` changes and do not install packages, edit host configs, or mutate global environment state unless the user explicitly approves it.
- Preventive action: when a deployment is blocked, first fix Dockerfiles, compose files, env files, and mounted paths inside the repo before considering any host-level workaround.

View File

@@ -2,44 +2,64 @@
## Checklist ## Checklist
- [x] Confirm the changed `people_flow_project` slice is locally validated before deploy. - [x] Audit the current `.11` deployment state, image tags, and runtime container diffs.
- [x] Verify the plan covers remote sync, service rebuild, health verification, and post-deploy output inspection. - [x] Identify the minimal release payload: pushed images, compose/env/config assets, weights, and runtime-added files not present in the base images.
- [x] Sync the updated `people_flow_project` runtime files to `10.8.0.11` and verify remote hashes. - [x] Push the `.11` images to `ota.zhengxinshipin.com:5443` with stable release tags.
- [x] Rebuild and restart only the `people-flow-project` service on the remote host. - [x] Build a ZIP bundle containing compose files and all required non-image runtime assets.
- [x] Verify the remote container is healthy after deployment. - [x] Publish the ZIP bundle and an install script under `/var/www/html/ai_deploy` on `10.8.0.1`.
- [x] Print the actual new output structure from the deployed remote code path and note any limitation versus waiting for the next live half-hour webhook. - [x] Verify the published artifacts are downloadable and the install flow is internally consistent.
- [x] Record deployment and verification evidence in the Review section.
## Scope And Risks ## Scope And Risks
- Scope: deploy the `people_flow_project` output-label changes to `10.8.0.11` and inspect the newly available output structure from the remote deployed code. - Scope: publish the current managed-portal deployment that is running on `10.8.0.11` by pushing its images to `ota.zhengxinshipin.com:5443`, generating a downloadable install script on `10.8.0.1`, and uploading a ZIP bundle with compose/runtime assets required for the stack to run correctly elsewhere.
- Expected touch points: `managed/people_flow_project/src/people_flow/queue_analytics.py`, `managed/people_flow_project/src/people_flow/manage_api.py`, remote deployment under `/home/xiaozheng/managed-portal`, and the `people-flow-project` docker compose service. - Expected touch points: remote Docker images on `.11`, runtime asset directories under `managed/`, deployment compose/env files under `deploy/`, and installer artifacts on `/var/www/html/ai_deploy` on `10.8.0.1`.
- Risk: the currently saved live webhook/window JSON files on the remote host will not gain the new label fields until the next real half-hour window is emitted after restart, so immediate inspection may need to use a direct code-path sample or manage API response rather than a freshly emitted live webhook file. - Risk: the running `.11` containers use local `:dev` images and also contain runtime-added files such as `lap` inside `people-flow-project`; pushing only the local images will not fully reproduce the running state unless those extras are separately bundled or the install path reapplies them.
- Risk: restarting `people-flow-project` resets the current rolling half-hour window boundary; that is acceptable for deployment but should be stated explicitly. - Risk: required assets may live outside the image as mounted files, especially configs, outputs, weights, and managed data. Missing any of these will produce an install that starts but does not behave like `.11`.
- Risk: registry push may require credentials that are not currently cached for user `xiaozheng`; confirm push access before finalizing the artifact layout.
## Validation Intent ## Validation Intent
- Verify remote file parity before rebuilding. - Prove the exact `.11` images were retagged and pushed to `ota.zhengxinshipin.com:5443`.
- Check container health and startup logs after deployment. - Prove the ZIP bundle includes compose/env/config/runtime assets needed by the current `.11` deployment.
- Print an actual structure from the deployed remote code path immediately, and distinguish it from the next live webhook file that will only appear after the next rollover. - Prove the install script on `10.8.0.1` references the published URLs, downloads the ZIP, unpacks it, and pulls the registry images expected by the compose file.
## Review ## Review
- Status: completed. - Status: completed.
- Result: the updated `people_flow_project` code is deployed on `10.8.0.11`, the rebuilt `people-flow-project` container is healthy, and the deployed remote code path now exposes the new human-readable queue level and change labels. The currently saved live window/webhook files were generated before the next post-restart half-hour rollover, so the most immediate proof comes from the deployed manage API response and a direct runtime-code simulation inside the container. - Result: published the current `.11` managed-portal stack as release `20260513-330373b-11`, including pushed registry images, a runtime-asset ZIP, and an install script under `/var/www/html/ai_deploy` on `10.8.0.1`.
- Release payload:
- Registry images pushed to `ota.zhengxinshipin.com:5443`:
- `managed-portal:20260513-330373b-11`
- `managed-portal-web:20260513-330373b-11`
- `people-flow-project:20260513-330373b-11`
- `store-dwell-alert:20260513-330373b-11`
- ZIP bundle: `/var/www/html/ai_deploy/managed-portal-20260513-330373b-11.zip`
- Installer script: `/var/www/html/ai_deploy/install-managed-portal-20260513-330373b-11.sh`
- Latest symlinks:
- `/var/www/html/ai_deploy/managed-portal-latest.zip`
- `/var/www/html/ai_deploy/install-managed-portal-latest.sh`
- ZIP contents include:
- `deploy/docker-compose.yml`
- `deploy/docker-compose.ota-release.yml`
- `deploy/managed-portal.release.env`
- `deploy/Dockerfile.runtime-overlay`
- `managed_services.yaml`
- mounted runtime assets from `.11`: people-flow config/outputs/weights and store-dwell config/data
- runtime overlays extracted from running containers for `lap` in both Python services and `/app/logs/events.jsonl` from `store-dwell-alert`
- Verification: - Verification:
- synced `managed/people_flow_project/src/people_flow/queue_analytics.py` and `managed/people_flow_project/src/people_flow/manage_api.py` to `/home/xiaozheng/managed-portal/managed/people_flow_project/src/people_flow/` on `10.8.0.11` and verified SHA256 parity with local files: - Registry push succeeded for all four images. Observed repo digests:
- `queue_analytics.py`: `dd12c0a7af2d7c1bf68e3496560fe2ea0fb5c1d582bea7c4dada0caf105711c8` - `managed-portal@sha256:589f699edce8271c80516030eae81abed95d8e62804976955eb86bf211d98f4e`
- `manage_api.py`: `c723fd570a29b43cd055dfaca4a5fc9ce1459b55754d2dbd0b8edcdef7da4cf1` - `managed-portal-web@sha256:f2e99c4745a3c16118a74084585f0a455e4f5295d9eb4cbabf2689b841966d9b`
- rebuilt and restarted only `people-flow-project` with `docker compose --env-file managed-portal.10.8.0.11.env up -d --build people-flow-project` on the remote host; - `people-flow-project@sha256:963ecd41ee8a3f986c581b5330ce7163614571427711d524b936f05c3e84ec96`
- confirmed remote status after deploy: `people-flow-project` is `Up` and `healthy`; - `store-dwell-alert@sha256:d324cb2653ef25f6984a12b0cfa92064bf2c86b2946462001d14d254818d243d`
- queried the deployed manage API summary endpoint inside the container and observed these actual metrics keys/values from the live response: `{ "queue_level": "normal", "queue_level_label": "人数正常", "previous_queue_level": "few", "previous_queue_level_label": "人少", "status_change": "queue_normalized", "status_change_label": "人数变正常" }`; - Source and published ZIP sizes match exactly: `1261636056` bytes on `.11` and `.1`.
- executed a direct simulation inside the deployed container using the updated `QueueWindowTracker` code path and printed the actual new `queue_metrics` JSON: - HTTP validation succeeded:
- `queue_level`: `crowded` - `http://10.8.0.1/ai_deploy/managed-portal-20260513-330373b-11.zip` => `200 OK`, `Content-Length: 1261636056`
- `queue_level_label`: `人多` - `http://10.8.0.1/ai_deploy/install-managed-portal-20260513-330373b-11.sh` => `200 OK`
- `previous_queue_level`: `null` - ZIP content validation succeeded both at the source and after upload, including `release-manifest.env`, `deploy/docker-compose.ota-release.yml`, and runtime overlay files under `runtime-overlays/.../lap/...`.
- `previous_queue_level_label`: `""` - Local release asset validation passed:
- `status_change`: `initial` - `sh -n deploy/install-managed-portal-ota.sh`
- `status_change_label`: `初始` - compose config expansion for `deploy/docker-compose.ota-release.yml` with the `.11` env file and placeholder image refs
- plus the existing `queue_time_threshold_seconds`, `over_threshold_count`, `under_threshold_count`, and `people[]` fields; - Residual risk:
- noted deployment side effect: restarting `people-flow-project` resets the current rolling 1800-second window, so the next real live `half_hour_report` file/webhook emitted after this restart will be the first persisted artifact that contains the new label fields. - The published installer was validated for syntax and asset consistency, but it was not executed end-to-end on a fresh target host in this task.
- The bundle intentionally excludes ephemeral `/tmp`, `/run`, and NVIDIA runtime-injected host libraries; reproducing GPU runtime behavior still depends on the target host having a working NVIDIA container runtime when `gpus: all` is used.