feat: add deployment configuration and scripts for managed-portal, including Dockerfiles and environment settings
This commit is contained in:
4
deploy/Dockerfile.runtime-overlay
Normal file
4
deploy/Dockerfile.runtime-overlay
Normal file
@@ -0,0 +1,4 @@
|
||||
ARG BASE_IMAGE
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
COPY rootfs/ /
|
||||
75
deploy/docker-compose.ota-release.yml
Normal file
75
deploy/docker-compose.ota-release.yml
Normal file
@@ -0,0 +1,75 @@
|
||||
name: managed-portal
|
||||
|
||||
services:
|
||||
managed-portal:
|
||||
image: ${MANAGED_PORTAL_IMAGE:?MANAGED_PORTAL_IMAGE is required}
|
||||
container_name: managed-portal
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- store-dwell-alert
|
||||
- people-flow-project
|
||||
environment:
|
||||
TZ: ${TZ:-Asia/Shanghai}
|
||||
MANAGED_PORTAL_HTTP_ADDR: ":9080"
|
||||
MANAGED_PORTAL_REGISTRY_PATH: "/app/managed_services.yaml"
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
networks:
|
||||
- managed-portal
|
||||
|
||||
store-dwell-alert:
|
||||
image: ${STORE_DWELL_ALERT_IMAGE:?STORE_DWELL_ALERT_IMAGE is required}
|
||||
container_name: store-dwell-alert
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
TZ: ${TZ:-Asia/Shanghai}
|
||||
CAMERA_ID: ${MANAGED_STORE_DWELL_CAMERA_ID:-store_cam_01}
|
||||
RTSP_URL: ${MANAGED_STORE_DWELL_RTSP_URL:-}
|
||||
EVENT_SINK_PATH: ${MANAGED_STORE_DWELL_EVENT_SINK_PATH:-logs/events.jsonl}
|
||||
API_HOST: 0.0.0.0
|
||||
API_PORT: 18081
|
||||
CONFIG_PATH: /app/config/local.yaml
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes:
|
||||
- ${MANAGED_STORE_DWELL_CONFIG_DIR:-../managed/store_dwell_alert/config}:/app/config
|
||||
- ${MANAGED_STORE_DWELL_DATA_DIR:-../managed/store_dwell_alert/data}:/app/data
|
||||
networks:
|
||||
- managed-portal
|
||||
|
||||
people-flow-project:
|
||||
image: ${PEOPLE_FLOW_PROJECT_IMAGE:?PEOPLE_FLOW_PROJECT_IMAGE is required}
|
||||
container_name: people-flow-project
|
||||
restart: unless-stopped
|
||||
gpus: all
|
||||
shm_size: "1gb"
|
||||
environment:
|
||||
TZ: ${TZ:-Asia/Shanghai}
|
||||
CONFIG_PATH: /opt/people-flow/config/local.yaml
|
||||
RTSP_URL: ${MANAGED_PEOPLE_FLOW_RTSP_URL:-}
|
||||
OUTPUT_DIR: /opt/people-flow/outputs
|
||||
API_HOST: 0.0.0.0
|
||||
API_PORT: 18082
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes:
|
||||
- ${MANAGED_PEOPLE_FLOW_CONFIG_DIR:-../managed/people_flow_project/config}:/opt/people-flow/config
|
||||
- ${MANAGED_PEOPLE_FLOW_OUTPUT_DIR:-../managed/people_flow_project/outputs}:/opt/people-flow/outputs
|
||||
- ${MANAGED_PEOPLE_FLOW_WEIGHTS_DIR:-../managed/people_flow_project/weights}:/opt/people-flow/weights
|
||||
networks:
|
||||
- managed-portal
|
||||
|
||||
managed-portal-web:
|
||||
image: ${MANAGED_PORTAL_WEB_IMAGE:?MANAGED_PORTAL_WEB_IMAGE is required}
|
||||
container_name: managed-portal-web
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- managed-portal
|
||||
ports:
|
||||
- "${MANAGED_PORTAL_WEB_PORT:-13000}:80"
|
||||
networks:
|
||||
- managed-portal
|
||||
|
||||
networks:
|
||||
managed-portal:
|
||||
driver: bridge
|
||||
@@ -66,6 +66,7 @@ services:
|
||||
volumes:
|
||||
- ${MANAGED_PEOPLE_FLOW_CONFIG_DIR:-../managed/people_flow_project/config}:/opt/people-flow/config
|
||||
- ${MANAGED_PEOPLE_FLOW_OUTPUT_DIR:-../managed/people_flow_project/outputs}:/opt/people-flow/outputs
|
||||
- ${MANAGED_PEOPLE_FLOW_WEIGHTS_DIR:-../managed/people_flow_project/weights}:/opt/people-flow/weights
|
||||
networks:
|
||||
- managed-portal
|
||||
|
||||
|
||||
111
deploy/install-managed-portal-ota.sh
Normal file
111
deploy/install-managed-portal-ota.sh
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
RELEASE_VERSION="${RELEASE_VERSION:-20260513-330373b-11}"
|
||||
BASE_URL="${BASE_URL:-http://10.8.0.1/ai_deploy}"
|
||||
BUNDLE_NAME="${BUNDLE_NAME:-managed-portal-${RELEASE_VERSION}.zip}"
|
||||
INSTALL_ROOT="${INSTALL_ROOT:-/opt/managed-portal-releases}"
|
||||
TARGET_DIR="${TARGET_DIR:-${INSTALL_ROOT}/managed-portal-${RELEASE_VERSION}}"
|
||||
|
||||
require_command() {
|
||||
if ! command -v "$1" >/dev/null 2>&1; then
|
||||
echo "missing required command: $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
run_compose() {
|
||||
if command -v docker-compose >/dev/null 2>&1; then
|
||||
docker-compose "$@"
|
||||
return 0
|
||||
fi
|
||||
docker compose "$@"
|
||||
}
|
||||
|
||||
download_bundle() {
|
||||
tmp_dir="$1"
|
||||
bundle_zip="$tmp_dir/$BUNDLE_NAME"
|
||||
bundle_url="${BASE_URL%/}/$BUNDLE_NAME"
|
||||
|
||||
echo "downloading $bundle_url" >&2
|
||||
curl -fL "$bundle_url" -o "$bundle_zip"
|
||||
echo "$bundle_zip"
|
||||
}
|
||||
|
||||
build_overlay_image() {
|
||||
overlay_name="$1"
|
||||
base_image="$2"
|
||||
overlay_root="$3"
|
||||
overlay_image="$4"
|
||||
overlay_context="$(dirname "$overlay_root")"
|
||||
|
||||
if [ ! -d "$overlay_root" ]; then
|
||||
printf '%s\n' "$base_image"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ -z "$(find "$overlay_root" -mindepth 1 -print -quit)" ]; then
|
||||
printf '%s\n' "$base_image"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "building runtime overlay for $overlay_name" >&2
|
||||
docker build \
|
||||
-f "$TARGET_DIR/deploy/Dockerfile.runtime-overlay" \
|
||||
--build-arg "BASE_IMAGE=$base_image" \
|
||||
-t "$overlay_image" \
|
||||
"$overlay_context" >/dev/null
|
||||
printf '%s\n' "$overlay_image"
|
||||
}
|
||||
|
||||
require_command curl
|
||||
require_command unzip
|
||||
require_command docker
|
||||
|
||||
tmp_dir="$(mktemp -d)"
|
||||
trap 'rm -rf "$tmp_dir"' EXIT INT TERM
|
||||
|
||||
mkdir -p "$INSTALL_ROOT"
|
||||
bundle_zip="$(download_bundle "$tmp_dir")"
|
||||
rm -rf "$TARGET_DIR"
|
||||
unzip -oq "$bundle_zip" -d "$INSTALL_ROOT"
|
||||
|
||||
if [ ! -f "$TARGET_DIR/release-manifest.env" ]; then
|
||||
echo "release-manifest.env not found in $TARGET_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -a
|
||||
. "$TARGET_DIR/release-manifest.env"
|
||||
set +a
|
||||
|
||||
echo "pulling release images"
|
||||
docker pull "$MANAGED_PORTAL_IMAGE"
|
||||
docker pull "$MANAGED_PORTAL_WEB_IMAGE"
|
||||
docker pull "$PEOPLE_FLOW_PROJECT_IMAGE"
|
||||
docker pull "$STORE_DWELL_ALERT_IMAGE"
|
||||
|
||||
PEOPLE_FLOW_PROJECT_IMAGE="$(build_overlay_image \
|
||||
people-flow-project \
|
||||
"$PEOPLE_FLOW_PROJECT_IMAGE" \
|
||||
"$TARGET_DIR/runtime-overlays/people-flow-project/rootfs" \
|
||||
"managed-portal-runtime/people-flow-project:${RELEASE_VERSION}")"
|
||||
|
||||
STORE_DWELL_ALERT_IMAGE="$(build_overlay_image \
|
||||
store-dwell-alert \
|
||||
"$STORE_DWELL_ALERT_IMAGE" \
|
||||
"$TARGET_DIR/runtime-overlays/store-dwell-alert/rootfs" \
|
||||
"managed-portal-runtime/store-dwell-alert:${RELEASE_VERSION}")"
|
||||
|
||||
export MANAGED_PORTAL_IMAGE
|
||||
export MANAGED_PORTAL_WEB_IMAGE
|
||||
export PEOPLE_FLOW_PROJECT_IMAGE
|
||||
export STORE_DWELL_ALERT_IMAGE
|
||||
|
||||
cd "$TARGET_DIR/deploy"
|
||||
run_compose \
|
||||
--env-file managed-portal.release.env \
|
||||
-f docker-compose.ota-release.yml \
|
||||
up -d
|
||||
|
||||
echo "release installed under $TARGET_DIR"
|
||||
14
deploy/managed-portal.10.8.0.12.env
Normal file
14
deploy/managed-portal.10.8.0.12.env
Normal file
@@ -0,0 +1,14 @@
|
||||
IMAGE_VERSION=dev
|
||||
TZ=Asia/Shanghai
|
||||
MANAGED_PORTAL_WEB_PORT=13000
|
||||
|
||||
MANAGED_STORE_DWELL_CAMERA_ID=cam_192_168_1_10
|
||||
MANAGED_STORE_DWELL_RTSP_URL=rtsp://admin:Zxjp2026@192.168.1.10:554/Streaming/Channels/101
|
||||
MANAGED_STORE_DWELL_EVENT_SINK_PATH=logs/events.jsonl
|
||||
MANAGED_STORE_DWELL_CONFIG_DIR=../managed/store_dwell_alert/config
|
||||
MANAGED_STORE_DWELL_DATA_DIR=../managed/store_dwell_alert/data
|
||||
|
||||
MANAGED_PEOPLE_FLOW_RTSP_URL=rtsp://admin:Zxjp2026@192.168.1.10:554/Streaming/Channels/101
|
||||
MANAGED_PEOPLE_FLOW_CONFIG_DIR=../managed/people_flow_project/config
|
||||
MANAGED_PEOPLE_FLOW_OUTPUT_DIR=../managed/people_flow_project/outputs
|
||||
MANAGED_PEOPLE_FLOW_WEIGHTS_DIR=/home/xiaozheng/people_flow_project/weights
|
||||
@@ -3,7 +3,7 @@ FROM swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/library/python:3.12-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
|
||||
PIP_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/ \
|
||||
DEEPFACE_HOME=/root/.deepface \
|
||||
TF_CPP_MIN_LOG_LEVEL=2
|
||||
|
||||
@@ -19,8 +19,7 @@ RUN sed -i 's|http://deb.debian.org/debian|http://mirrors.aliyun.com/debian|g; s
|
||||
|
||||
COPY requirements-docker.txt ./requirements-docker.txt
|
||||
|
||||
RUN python -m pip install --upgrade pip setuptools wheel && \
|
||||
pip install "numpy<2"
|
||||
RUN pip install "numpy<2"
|
||||
|
||||
RUN pip install --extra-index-url https://download.pytorch.org/whl/cpu \
|
||||
"torch==2.6.0+cpu" "torchvision==0.21.0+cpu"
|
||||
@@ -32,12 +31,7 @@ RUN pip install -r requirements-docker.txt
|
||||
COPY . .
|
||||
COPY scripts/docker-entrypoint.sh /opt/people-flow/scripts/docker-entrypoint.sh
|
||||
|
||||
RUN test -f /opt/people-flow/weights/yolo11n.pt && \
|
||||
test -f /opt/people-flow/weights/deepface/age_model_weights.h5 && \
|
||||
test -f /opt/people-flow/weights/deepface/gender_model_weights.h5 && \
|
||||
test -f /opt/people-flow/weights/deepface/retinaface.h5 && \
|
||||
mkdir -p /root/.deepface/weights /opt/people-flow/outputs && \
|
||||
cp /opt/people-flow/weights/deepface/*.h5 /root/.deepface/weights/ && \
|
||||
RUN mkdir -p /root/.deepface/weights /opt/people-flow/outputs && \
|
||||
chmod +x /opt/people-flow/scripts/docker-entrypoint.sh
|
||||
|
||||
EXPOSE 18082
|
||||
|
||||
@@ -8,8 +8,15 @@ OUTPUT_DIR="${OUTPUT_DIR:-${PROJECT_DIR}/outputs}"
|
||||
RTSP_URL="${RTSP_URL:-}"
|
||||
API_HOST="${API_HOST:-0.0.0.0}"
|
||||
API_PORT="${API_PORT:-18082}"
|
||||
RTSP_STALL_TIMEOUT_SECONDS="${RTSP_STALL_TIMEOUT_SECONDS:-180}"
|
||||
DEEPFACE_CACHE_DIR="/root/.deepface/weights"
|
||||
DEEPFACE_SOURCE_DIR="${PROJECT_DIR}/weights/deepface"
|
||||
|
||||
mkdir -p "${OUTPUT_DIR}" "$(dirname "${CONFIG_PATH}")"
|
||||
mkdir -p "${OUTPUT_DIR}" "$(dirname "${CONFIG_PATH}")" "${DEEPFACE_CACHE_DIR}"
|
||||
|
||||
if [ -d "${DEEPFACE_SOURCE_DIR}" ]; then
|
||||
find "${DEEPFACE_SOURCE_DIR}" -maxdepth 1 -name '*.h5' -exec cp {} "${DEEPFACE_CACHE_DIR}/" \;
|
||||
fi
|
||||
|
||||
if [ ! -f "${CONFIG_PATH}" ]; then
|
||||
cp "${CONFIG_TEMPLATE}" "${CONFIG_PATH}"
|
||||
@@ -37,13 +44,31 @@ config_path.write_text(
|
||||
)
|
||||
PY
|
||||
|
||||
exec python - "$CONFIG_PATH" "$API_HOST" "$API_PORT" <<'PY'
|
||||
RTSP_OUTPUT_SUBDIR="$(python - "$CONFIG_PATH" <<'PY'
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import yaml
|
||||
|
||||
config_path = Path(sys.argv[1])
|
||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
|
||||
rtsp = raw.get("rtsp") or {}
|
||||
print(rtsp.get("output_subdir", "rtsp_stream"))
|
||||
PY
|
||||
)"
|
||||
RTSP_STATUS_PATH="${OUTPUT_DIR}/${RTSP_OUTPUT_SUBDIR}/worker_status.json"
|
||||
|
||||
exec python - "$CONFIG_PATH" "$API_HOST" "$API_PORT" "$RTSP_STATUS_PATH" "$RTSP_STALL_TIMEOUT_SECONDS" <<'PY'
|
||||
from pathlib import Path
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
config_path, api_host, api_port = sys.argv[1:4]
|
||||
from src.people_flow.worker_status import worker_status_stall_reason
|
||||
|
||||
config_path, api_host, api_port, status_path_raw, stall_timeout_raw = sys.argv[1:6]
|
||||
status_path = Path(status_path_raw)
|
||||
stall_timeout_seconds = max(float(stall_timeout_raw), 30.0)
|
||||
commands = [
|
||||
[sys.executable, "main.py", "--config", config_path, "rtsp"],
|
||||
[
|
||||
@@ -59,21 +84,38 @@ commands = [
|
||||
],
|
||||
]
|
||||
processes = [subprocess.Popen(command) for command in commands]
|
||||
supervisor_started_at = time.time()
|
||||
|
||||
|
||||
def terminate_all(signum, _frame):
|
||||
for process in processes:
|
||||
if process.poll() is None:
|
||||
def stop_all(excluded_index=None):
|
||||
for index, process in enumerate(processes):
|
||||
if index == excluded_index or process.poll() is not None:
|
||||
continue
|
||||
process.terminate()
|
||||
deadline = time.time() + 10
|
||||
for process in processes:
|
||||
if process.poll() is not None:
|
||||
for index, process in enumerate(processes):
|
||||
if index == excluded_index or process.poll() is not None:
|
||||
continue
|
||||
timeout = max(0, deadline - time.time())
|
||||
try:
|
||||
process.wait(timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
|
||||
|
||||
def stale_reason():
|
||||
if processes[0].poll() is not None:
|
||||
return None
|
||||
|
||||
return worker_status_stall_reason(
|
||||
status_path,
|
||||
started_at=supervisor_started_at,
|
||||
max_age_seconds=stall_timeout_seconds,
|
||||
)
|
||||
|
||||
|
||||
def terminate_all(signum, _frame):
|
||||
stop_all()
|
||||
raise SystemExit(128 + signum)
|
||||
|
||||
|
||||
@@ -85,19 +127,12 @@ while True:
|
||||
return_code = process.poll()
|
||||
if return_code is None:
|
||||
continue
|
||||
for other_index, other_process in enumerate(processes):
|
||||
if other_index == index or other_process.poll() is not None:
|
||||
continue
|
||||
other_process.terminate()
|
||||
deadline = time.time() + 10
|
||||
for other_index, other_process in enumerate(processes):
|
||||
if other_index == index or other_process.poll() is not None:
|
||||
continue
|
||||
timeout = max(0, deadline - time.time())
|
||||
try:
|
||||
other_process.wait(timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
other_process.kill()
|
||||
stop_all(excluded_index=index)
|
||||
raise SystemExit(return_code)
|
||||
reason = stale_reason()
|
||||
if reason is not None:
|
||||
print(reason, flush=True)
|
||||
stop_all()
|
||||
raise SystemExit(1)
|
||||
time.sleep(0.5)
|
||||
PY
|
||||
|
||||
@@ -24,6 +24,7 @@ from .queue_analytics import QueueWindowTracker
|
||||
from .tracking import extract_person_tracks
|
||||
from .window_identity import WindowIdentityResolver
|
||||
from .webhook import dispatch_json_event
|
||||
from .worker_status import write_worker_status
|
||||
|
||||
SUPPORTED_EXTENSIONS = {".mp4", ".mov", ".mkv", ".avi"}
|
||||
|
||||
@@ -157,6 +158,7 @@ class PeopleFlowPipeline:
|
||||
|
||||
def process_rtsp(self, source: str) -> dict:
|
||||
rtsp_paths = self.get_rtsp_output_paths()
|
||||
status_path = rtsp_paths["root"] / "worker_status.json"
|
||||
sample_interval = max(float(self.config.rtsp.sample_interval_seconds), 0.01)
|
||||
window_seconds = max(int(self.config.rtsp.window_seconds), 1)
|
||||
reconnect_delay = max(float(self.config.rtsp.reconnect_delay_seconds), 0.1)
|
||||
@@ -189,8 +191,39 @@ class PeopleFlowPipeline:
|
||||
if not Path(self.config.webhook.event_log_path).is_absolute()
|
||||
else Path(self.config.webhook.event_log_path)
|
||||
)
|
||||
last_status_phase: str | None = None
|
||||
last_status_written_at = 0.0
|
||||
|
||||
def update_status(
|
||||
phase: str,
|
||||
*,
|
||||
force: bool = False,
|
||||
note: str | None = None,
|
||||
) -> None:
|
||||
nonlocal last_status_phase, last_status_written_at
|
||||
|
||||
current_time = time.monotonic()
|
||||
if (
|
||||
not force
|
||||
and phase == last_status_phase
|
||||
and current_time - last_status_written_at < 5.0
|
||||
):
|
||||
return
|
||||
|
||||
write_worker_status(
|
||||
status_path,
|
||||
phase,
|
||||
source=source,
|
||||
window_index=window_index,
|
||||
frame_index=frame_index,
|
||||
last_processed_at=last_processed_wall_time,
|
||||
note=note,
|
||||
)
|
||||
last_status_phase = phase
|
||||
last_status_written_at = current_time
|
||||
|
||||
try:
|
||||
update_status("starting", force=True)
|
||||
while True:
|
||||
now = datetime.now().astimezone()
|
||||
while now >= window_end:
|
||||
@@ -215,6 +248,7 @@ class PeopleFlowPipeline:
|
||||
webhook_url=self.config.webhook.url,
|
||||
timeout_seconds=self.config.webhook.timeout_seconds,
|
||||
)
|
||||
update_status("window_flushed", force=True)
|
||||
print(f"window_json={json_path}", flush=True)
|
||||
print(f"window_total_people={payload['total_people']}", flush=True)
|
||||
window_index += 1
|
||||
@@ -229,13 +263,25 @@ class PeopleFlowPipeline:
|
||||
now = datetime.now().astimezone()
|
||||
|
||||
if capture is None or not capture.isOpened():
|
||||
update_status("opening_stream")
|
||||
capture = self._open_rtsp_capture(source, open_timeout_seconds)
|
||||
if capture is None:
|
||||
update_status(
|
||||
"waiting_to_reconnect",
|
||||
force=True,
|
||||
note="open_failed",
|
||||
)
|
||||
time.sleep(reconnect_delay)
|
||||
continue
|
||||
|
||||
update_status("reading_frame")
|
||||
ok, frame = capture.read()
|
||||
if not ok or frame is None:
|
||||
update_status(
|
||||
"waiting_to_reconnect",
|
||||
force=True,
|
||||
note="read_failed",
|
||||
)
|
||||
capture.release()
|
||||
capture = None
|
||||
time.sleep(reconnect_delay)
|
||||
@@ -251,6 +297,7 @@ class PeopleFlowPipeline:
|
||||
self.config.queue,
|
||||
self.config.queue.to_pixel_area(width=width, height=height),
|
||||
)
|
||||
update_status("capture_ready", force=True)
|
||||
|
||||
current_time = time.monotonic()
|
||||
if current_time - last_processed_at < sample_interval:
|
||||
@@ -258,6 +305,7 @@ class PeopleFlowPipeline:
|
||||
time.sleep(idle_sleep)
|
||||
continue
|
||||
|
||||
update_status("tracking_frame")
|
||||
last_processed_at = current_time
|
||||
observations = self._track_frame(frame)
|
||||
person_keys = identity_resolver.resolve(frame, observations)
|
||||
@@ -281,9 +329,11 @@ class PeopleFlowPipeline:
|
||||
next_heartbeat_at = current_time + 60.0
|
||||
last_processed_wall_time = now
|
||||
frame_index += 1
|
||||
update_status("processed_frame", force=True)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
update_status("stopped", force=True)
|
||||
if capture is not None:
|
||||
capture.release()
|
||||
|
||||
|
||||
84
managed/people_flow_project/src/people_flow/worker_status.py
Normal file
84
managed/people_flow_project/src/people_flow/worker_status.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def write_worker_status(
|
||||
path: Path,
|
||||
phase: str,
|
||||
*,
|
||||
source: str,
|
||||
window_index: int,
|
||||
frame_index: int,
|
||||
last_processed_at: datetime | None,
|
||||
note: str | None = None,
|
||||
) -> dict:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
payload = {
|
||||
"phase": phase,
|
||||
"updated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
|
||||
"source": source,
|
||||
"window_index": window_index,
|
||||
"frame_index": frame_index,
|
||||
"last_processed_at": (
|
||||
last_processed_at.isoformat(timespec="seconds")
|
||||
if last_processed_at is not None
|
||||
else None
|
||||
),
|
||||
}
|
||||
if note:
|
||||
payload["note"] = note
|
||||
|
||||
temp_path = path.with_suffix(path.suffix + ".tmp")
|
||||
temp_path.write_text(
|
||||
json.dumps(payload, ensure_ascii=True, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
temp_path.replace(path)
|
||||
return payload
|
||||
|
||||
|
||||
def load_worker_status(path: Path) -> dict | None:
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
|
||||
def worker_status_age_seconds(path: Path, now: float | None = None) -> float | None:
|
||||
try:
|
||||
stat_result = path.stat()
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
current_time = datetime.now().timestamp() if now is None else now
|
||||
return max(0.0, current_time - stat_result.st_mtime)
|
||||
|
||||
|
||||
def worker_status_stall_reason(
|
||||
path: Path,
|
||||
*,
|
||||
started_at: float,
|
||||
max_age_seconds: float,
|
||||
now: float | None = None,
|
||||
) -> str | None:
|
||||
current_time = datetime.now().timestamp() if now is None else now
|
||||
age_seconds = worker_status_age_seconds(path, now=current_time)
|
||||
if age_seconds is None:
|
||||
if current_time - started_at < max_age_seconds:
|
||||
return None
|
||||
return f"rtsp worker status missing path={path}"
|
||||
|
||||
if age_seconds <= max_age_seconds:
|
||||
return None
|
||||
|
||||
payload = load_worker_status(path) or {}
|
||||
phase = payload.get("phase", "unknown")
|
||||
updated_at = payload.get("updated_at", "unknown")
|
||||
return (
|
||||
f"rtsp worker stalled path={path} phase={phase} "
|
||||
f"updated_at={updated_at} age_seconds={age_seconds:.1f}"
|
||||
)
|
||||
101
managed/people_flow_project/tests/test_worker_status.py
Normal file
101
managed/people_flow_project/tests/test_worker_status.py
Normal file
@@ -0,0 +1,101 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from src.people_flow.worker_status import (
|
||||
load_worker_status,
|
||||
worker_status_age_seconds,
|
||||
worker_status_stall_reason,
|
||||
write_worker_status,
|
||||
)
|
||||
|
||||
|
||||
def test_write_worker_status_persists_progress(tmp_path: Path):
|
||||
status_path = tmp_path / "outputs" / "rtsp_stream" / "worker_status.json"
|
||||
last_processed_at = datetime(2026, 5, 13, 16, 30, 0).astimezone()
|
||||
|
||||
write_worker_status(
|
||||
status_path,
|
||||
"processed_frame",
|
||||
source="rtsp://camera/stream",
|
||||
window_index=3,
|
||||
frame_index=42,
|
||||
last_processed_at=last_processed_at,
|
||||
note="healthy",
|
||||
)
|
||||
|
||||
payload = load_worker_status(status_path)
|
||||
|
||||
assert payload is not None
|
||||
assert payload["phase"] == "processed_frame"
|
||||
assert payload["source"] == "rtsp://camera/stream"
|
||||
assert payload["window_index"] == 3
|
||||
assert payload["frame_index"] == 42
|
||||
assert payload["last_processed_at"] == last_processed_at.isoformat(
|
||||
timespec="seconds"
|
||||
)
|
||||
assert payload["note"] == "healthy"
|
||||
assert "updated_at" in payload
|
||||
|
||||
|
||||
def test_worker_status_age_seconds_uses_file_mtime(tmp_path: Path):
|
||||
status_path = tmp_path / "worker_status.json"
|
||||
|
||||
write_worker_status(
|
||||
status_path,
|
||||
"tracking_frame",
|
||||
source="rtsp://camera/stream",
|
||||
window_index=0,
|
||||
frame_index=0,
|
||||
last_processed_at=None,
|
||||
)
|
||||
|
||||
os.utime(status_path, (100.0, 100.0))
|
||||
|
||||
assert worker_status_age_seconds(status_path, now=280.0) == 180.0
|
||||
assert worker_status_age_seconds(tmp_path / "missing.json", now=280.0) is None
|
||||
|
||||
|
||||
def test_worker_status_stall_reason_reports_missing_and_stale_status(tmp_path: Path):
|
||||
missing_path = tmp_path / "missing.json"
|
||||
|
||||
assert (
|
||||
worker_status_stall_reason(
|
||||
missing_path,
|
||||
started_at=150.0,
|
||||
max_age_seconds=180.0,
|
||||
now=300.0,
|
||||
)
|
||||
is None
|
||||
)
|
||||
assert "status missing" in worker_status_stall_reason(
|
||||
missing_path,
|
||||
started_at=0.0,
|
||||
max_age_seconds=180.0,
|
||||
now=300.0,
|
||||
)
|
||||
|
||||
status_path = tmp_path / "worker_status.json"
|
||||
write_worker_status(
|
||||
status_path,
|
||||
"tracking_frame",
|
||||
source="rtsp://camera/stream",
|
||||
window_index=0,
|
||||
frame_index=2,
|
||||
last_processed_at=None,
|
||||
)
|
||||
os.utime(status_path, (100.0, 100.0))
|
||||
|
||||
reason = worker_status_stall_reason(
|
||||
status_path,
|
||||
started_at=0.0,
|
||||
max_age_seconds=180.0,
|
||||
now=300.0,
|
||||
)
|
||||
|
||||
assert reason is not None
|
||||
assert "status=missing" not in reason
|
||||
assert "phase=tracking_frame" in reason
|
||||
assert "age_seconds=200.0" in reason
|
||||
@@ -3,7 +3,7 @@ FROM swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/library/python:3.12-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
PIP_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@@ -17,8 +17,7 @@ RUN sed -i 's|http://deb.debian.org/debian|http://mirrors.aliyun.com/debian|g; s
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
RUN python -m pip install --upgrade pip setuptools wheel \
|
||||
&& python -m pip install --extra-index-url https://download.pytorch.org/whl/cpu \
|
||||
RUN python -m pip install --extra-index-url https://download.pytorch.org/whl/cpu \
|
||||
"torch==2.6.0+cpu" "torchvision==0.21.0+cpu" \
|
||||
&& python -m pip install -r /app/requirements.txt
|
||||
|
||||
|
||||
@@ -13,3 +13,7 @@
|
||||
- Trigger: the user clarified that this repository is meant to run in mainland China environments.
|
||||
- Rule: future code, build, deployment, and integration changes must consider mainland China network accessibility and should prefer China-friendly defaults where practical.
|
||||
- Preventive action: when adding dependencies, mirrors, external endpoints, or download flows, explicitly check whether the default path works reliably in mainland China and add configuration or fallback when needed.
|
||||
|
||||
- Trigger: the user required deployment to use `docker compose` only and explicitly disallowed host environment changes.
|
||||
- Rule: for remote rollout tasks in this repo, prefer repository-contained `docker compose` changes and do not install packages, edit host configs, or mutate global environment state unless the user explicitly approves it.
|
||||
- Preventive action: when a deployment is blocked, first fix Dockerfiles, compose files, env files, and mounted paths inside the repo before considering any host-level workaround.
|
||||
|
||||
@@ -2,44 +2,64 @@
|
||||
|
||||
## Checklist
|
||||
|
||||
- [x] Confirm the changed `people_flow_project` slice is locally validated before deploy.
|
||||
- [x] Verify the plan covers remote sync, service rebuild, health verification, and post-deploy output inspection.
|
||||
- [x] Sync the updated `people_flow_project` runtime files to `10.8.0.11` and verify remote hashes.
|
||||
- [x] Rebuild and restart only the `people-flow-project` service on the remote host.
|
||||
- [x] Verify the remote container is healthy after deployment.
|
||||
- [x] Print the actual new output structure from the deployed remote code path and note any limitation versus waiting for the next live half-hour webhook.
|
||||
- [x] Record deployment and verification evidence in the Review section.
|
||||
- [x] Audit the current `.11` deployment state, image tags, and runtime container diffs.
|
||||
- [x] Identify the minimal release payload: pushed images, compose/env/config assets, weights, and runtime-added files not present in the base images.
|
||||
- [x] Push the `.11` images to `ota.zhengxinshipin.com:5443` with stable release tags.
|
||||
- [x] Build a ZIP bundle containing compose files and all required non-image runtime assets.
|
||||
- [x] Publish the ZIP bundle and an install script under `/var/www/html/ai_deploy` on `10.8.0.1`.
|
||||
- [x] Verify the published artifacts are downloadable and the install flow is internally consistent.
|
||||
|
||||
## Scope And Risks
|
||||
|
||||
- Scope: deploy the `people_flow_project` output-label changes to `10.8.0.11` and inspect the newly available output structure from the remote deployed code.
|
||||
- Expected touch points: `managed/people_flow_project/src/people_flow/queue_analytics.py`, `managed/people_flow_project/src/people_flow/manage_api.py`, remote deployment under `/home/xiaozheng/managed-portal`, and the `people-flow-project` docker compose service.
|
||||
- Risk: the currently saved live webhook/window JSON files on the remote host will not gain the new label fields until the next real half-hour window is emitted after restart, so immediate inspection may need to use a direct code-path sample or manage API response rather than a freshly emitted live webhook file.
|
||||
- Risk: restarting `people-flow-project` resets the current rolling half-hour window boundary; that is acceptable for deployment but should be stated explicitly.
|
||||
- Scope: publish the current managed-portal deployment that is running on `10.8.0.11` by pushing its images to `ota.zhengxinshipin.com:5443`, generating a downloadable install script on `10.8.0.1`, and uploading a ZIP bundle with compose/runtime assets required for the stack to run correctly elsewhere.
|
||||
- Expected touch points: remote Docker images on `.11`, runtime asset directories under `managed/`, deployment compose/env files under `deploy/`, and installer artifacts on `/var/www/html/ai_deploy` on `10.8.0.1`.
|
||||
- Risk: the running `.11` containers use local `:dev` images and also contain runtime-added files such as `lap` inside `people-flow-project`; pushing only the local images will not fully reproduce the running state unless those extras are separately bundled or the install path reapplies them.
|
||||
- Risk: required assets may live outside the image as mounted files, especially configs, outputs, weights, and managed data. Missing any of these will produce an install that starts but does not behave like `.11`.
|
||||
- Risk: registry push may require credentials that are not currently cached for user `xiaozheng`; confirm push access before finalizing the artifact layout.
|
||||
|
||||
## Validation Intent
|
||||
|
||||
- Verify remote file parity before rebuilding.
|
||||
- Check container health and startup logs after deployment.
|
||||
- Print an actual structure from the deployed remote code path immediately, and distinguish it from the next live webhook file that will only appear after the next rollover.
|
||||
- Prove the exact `.11` images were retagged and pushed to `ota.zhengxinshipin.com:5443`.
|
||||
- Prove the ZIP bundle includes compose/env/config/runtime assets needed by the current `.11` deployment.
|
||||
- Prove the install script on `10.8.0.1` references the published URLs, downloads the ZIP, unpacks it, and pulls the registry images expected by the compose file.
|
||||
|
||||
## Review
|
||||
|
||||
- Status: completed.
|
||||
- Result: the updated `people_flow_project` code is deployed on `10.8.0.11`, the rebuilt `people-flow-project` container is healthy, and the deployed remote code path now exposes the new human-readable queue level and change labels. The currently saved live window/webhook files were generated before the next post-restart half-hour rollover, so the most immediate proof comes from the deployed manage API response and a direct runtime-code simulation inside the container.
|
||||
- Result: published the current `.11` managed-portal stack as release `20260513-330373b-11`, including pushed registry images, a runtime-asset ZIP, and an install script under `/var/www/html/ai_deploy` on `10.8.0.1`.
|
||||
- Release payload:
|
||||
- Registry images pushed to `ota.zhengxinshipin.com:5443`:
|
||||
- `managed-portal:20260513-330373b-11`
|
||||
- `managed-portal-web:20260513-330373b-11`
|
||||
- `people-flow-project:20260513-330373b-11`
|
||||
- `store-dwell-alert:20260513-330373b-11`
|
||||
- ZIP bundle: `/var/www/html/ai_deploy/managed-portal-20260513-330373b-11.zip`
|
||||
- Installer script: `/var/www/html/ai_deploy/install-managed-portal-20260513-330373b-11.sh`
|
||||
- Latest symlinks:
|
||||
- `/var/www/html/ai_deploy/managed-portal-latest.zip`
|
||||
- `/var/www/html/ai_deploy/install-managed-portal-latest.sh`
|
||||
- ZIP contents include:
|
||||
- `deploy/docker-compose.yml`
|
||||
- `deploy/docker-compose.ota-release.yml`
|
||||
- `deploy/managed-portal.release.env`
|
||||
- `deploy/Dockerfile.runtime-overlay`
|
||||
- `managed_services.yaml`
|
||||
- mounted runtime assets from `.11`: people-flow config/outputs/weights and store-dwell config/data
|
||||
- runtime overlays extracted from running containers for `lap` in both Python services and `/app/logs/events.jsonl` from `store-dwell-alert`
|
||||
- Verification:
|
||||
- synced `managed/people_flow_project/src/people_flow/queue_analytics.py` and `managed/people_flow_project/src/people_flow/manage_api.py` to `/home/xiaozheng/managed-portal/managed/people_flow_project/src/people_flow/` on `10.8.0.11` and verified SHA256 parity with local files:
|
||||
- `queue_analytics.py`: `dd12c0a7af2d7c1bf68e3496560fe2ea0fb5c1d582bea7c4dada0caf105711c8`
|
||||
- `manage_api.py`: `c723fd570a29b43cd055dfaca4a5fc9ce1459b55754d2dbd0b8edcdef7da4cf1`
|
||||
- rebuilt and restarted only `people-flow-project` with `docker compose --env-file managed-portal.10.8.0.11.env up -d --build people-flow-project` on the remote host;
|
||||
- confirmed remote status after deploy: `people-flow-project` is `Up` and `healthy`;
|
||||
- queried the deployed manage API summary endpoint inside the container and observed these actual metrics keys/values from the live response: `{ "queue_level": "normal", "queue_level_label": "人数正常", "previous_queue_level": "few", "previous_queue_level_label": "人少", "status_change": "queue_normalized", "status_change_label": "人数变正常" }`;
|
||||
- executed a direct simulation inside the deployed container using the updated `QueueWindowTracker` code path and printed the actual new `queue_metrics` JSON:
|
||||
- `queue_level`: `crowded`
|
||||
- `queue_level_label`: `人多`
|
||||
- `previous_queue_level`: `null`
|
||||
- `previous_queue_level_label`: `""`
|
||||
- `status_change`: `initial`
|
||||
- `status_change_label`: `初始`
|
||||
- plus the existing `queue_time_threshold_seconds`, `over_threshold_count`, `under_threshold_count`, and `people[]` fields;
|
||||
- noted deployment side effect: restarting `people-flow-project` resets the current rolling 1800-second window, so the next real live `half_hour_report` file/webhook emitted after this restart will be the first persisted artifact that contains the new label fields.
|
||||
- Registry push succeeded for all four images. Observed repo digests:
|
||||
- `managed-portal@sha256:589f699edce8271c80516030eae81abed95d8e62804976955eb86bf211d98f4e`
|
||||
- `managed-portal-web@sha256:f2e99c4745a3c16118a74084585f0a455e4f5295d9eb4cbabf2689b841966d9b`
|
||||
- `people-flow-project@sha256:963ecd41ee8a3f986c581b5330ce7163614571427711d524b936f05c3e84ec96`
|
||||
- `store-dwell-alert@sha256:d324cb2653ef25f6984a12b0cfa92064bf2c86b2946462001d14d254818d243d`
|
||||
- Source and published ZIP sizes match exactly: `1261636056` bytes on `.11` and `.1`.
|
||||
- HTTP validation succeeded:
|
||||
- `http://10.8.0.1/ai_deploy/managed-portal-20260513-330373b-11.zip` => `200 OK`, `Content-Length: 1261636056`
|
||||
- `http://10.8.0.1/ai_deploy/install-managed-portal-20260513-330373b-11.sh` => `200 OK`
|
||||
- ZIP content validation succeeded both at the source and after upload, including `release-manifest.env`, `deploy/docker-compose.ota-release.yml`, and runtime overlay files under `runtime-overlays/.../lap/...`.
|
||||
- Local release asset validation passed:
|
||||
- `sh -n deploy/install-managed-portal-ota.sh`
|
||||
- compose config expansion for `deploy/docker-compose.ota-release.yml` with the `.11` env file and placeholder image refs
|
||||
- Residual risk:
|
||||
- The published installer was validated for syntax and asset consistency, but it was not executed end-to-end on a fresh target host in this task.
|
||||
- The bundle intentionally excludes ephemeral `/tmp`, `/run`, and NVIDIA runtime-injected host libraries; reproducing GPU runtime behavior still depends on the target host having a working NVIDIA container runtime when `gpus: all` is used.
|
||||
|
||||
Reference in New Issue
Block a user