From d1c4b77974f7ad18eee3aaa1769c6327da4d43d5 Mon Sep 17 00:00:00 2001 From: "skye.yue" Date: Tue, 19 May 2026 16:20:13 +0800 Subject: [PATCH] Split OTA weights for Ubuntu installs --- .gitignore | 5 ++ README.md | 8 ++- deploy/install-managed-portal-ota.sh | 86 ++++++++++++++++++++++++++-- deploy/package-managed-portal-ota.sh | 59 +++++++++++++++++-- tasks/lessons.md | 10 ++++ tasks/todo.md | 29 ++++------ 6 files changed, 167 insertions(+), 30 deletions(-) diff --git a/.gitignore b/.gitignore index 02b01e3..15d2cfd 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,8 @@ dist/ coverage/ *.log managed-portal +api_response.json +output.txt +release-manifest.env +sim_workspace/ +test_output/ diff --git a/README.md b/README.md index 2dab2d7..5da3664 100644 --- a/README.md +++ b/README.md @@ -104,19 +104,21 @@ RELEASE_ENV_SOURCE=deploy/managed-portal.10.8.0.12.env \ sh deploy/package-managed-portal-ota.sh ``` -默认情况下,主 ZIP 不包含 `managed/people_flow_project/weights/`。OTA installer 会优先复用主机上的共享权重目录,避免每次只改安装脚本或配置时都重复打包、上传大体积权重。 +默认情况下,主 ZIP 不包含 `managed/people_flow_project/weights/`。打包脚本会额外生成一个独立的 `people-flow-weights-.tar.gz`,用于 Ubuntu 新机器首次安装;已有机器升级时,OTA installer 会优先复用主机上的共享权重目录,避免每次只改安装脚本或配置时都重复打包、上传大体积权重。 -只有两种场景才建议把权重重新打进 ZIP: +只有两种场景才建议重新发布这个独立权重包: - 首次在一台没有预置权重的新主机上安装 - `people_flow_project` 的权重文件本身发生变更 -这两种场景可以临时打开: +只有在你明确想把权重重新并回主 ZIP 时,才需要临时打开: ```bash INCLUDE_WEIGHTS=1 sh deploy/package-managed-portal-ota.sh ``` +Ubuntu 新机器首次安装时,如果系统没有 `unzip`,OTA installer 会自动用 `apt-get` 安装;然后在共享权重目录不存在时自动下载 `people-flow-weights-.tar.gz`。 + ## 模型权重 子服务镜像构建前需要以下权重文件: diff --git a/deploy/install-managed-portal-ota.sh b/deploy/install-managed-portal-ota.sh index cf6baab..5a99cae 100644 --- a/deploy/install-managed-portal-ota.sh +++ b/deploy/install-managed-portal-ota.sh @@ -4,6 +4,7 @@ set -eu RELEASE_VERSION="${RELEASE_VERSION:-20260518-7b32b21-11}" BASE_URL="${BASE_URL:-http://10.8.0.1/ai_deploy}" BUNDLE_NAME="${BUNDLE_NAME:-managed-portal-${RELEASE_VERSION}.zip}" +WEIGHTS_ARCHIVE_NAME="${WEIGHTS_ARCHIVE_NAME:-people-flow-weights-${RELEASE_VERSION}.tar.gz}" INSTALL_ROOT="${INSTALL_ROOT:-/opt/managed-portal-releases}" TARGET_DIR="${TARGET_DIR:-${INSTALL_ROOT}/managed-portal-${RELEASE_VERSION}}" SHARED_ROOT="${SHARED_ROOT:-${INSTALL_ROOT}/shared}" @@ -16,6 +17,46 @@ require_command() { fi } +ensure_ubuntu_package() { + package_name="$1" + command_name="$2" + + if command -v "$command_name" >/dev/null 2>&1; then + return 0 + fi + + if [ ! -r /etc/os-release ]; then + echo "missing required command: $command_name; unable to detect OS for automatic installation" >&2 + exit 1 + fi + + os_id="$(. /etc/os-release && printf '%s' "${ID:-}")" + os_like="$(. /etc/os-release && printf '%s' "${ID_LIKE:-}")" + + case "$os_id:$os_like" in + ubuntu:*|*:ubuntu*|debian:*|*:debian*) + ;; + *) + echo "missing required command: $command_name; automatic installation is only supported on Ubuntu/Debian hosts" >&2 + exit 1 + ;; + esac + + if [ "$(id -u)" -ne 0 ]; then + echo "missing required command: $command_name; rerun installer as root on Ubuntu to auto-install $package_name" >&2 + exit 1 + fi + + export DEBIAN_FRONTEND=noninteractive + apt-get update + apt-get install -y "$package_name" + + if ! command -v "$command_name" >/dev/null 2>&1; then + echo "failed to install required command: $command_name" >&2 + exit 1 + fi +} + pull_or_use_local() { image="$1" @@ -46,6 +87,11 @@ dir_has_files() { [ -d "$directory" ] && [ -n "$(find "$directory" -mindepth 1 -print -quit 2>/dev/null)" ] } +dir_has_payload_files() { + directory="$1" + [ -d "$directory" ] && [ -n "$(find "$directory" -type f ! -name '.gitkeep' -print -quit 2>/dev/null)" ] +} + copy_dir_contents() { source_dir="$1" target_dir="$2" @@ -64,6 +110,16 @@ download_bundle() { echo "$bundle_zip" } +download_weights_archive() { + tmp_dir="$1" + weights_archive="$tmp_dir/$WEIGHTS_ARCHIVE_NAME" + weights_url="${BASE_URL%/}/$WEIGHTS_ARCHIVE_NAME" + + echo "downloading $weights_url" >&2 + curl -fL "$weights_url" -o "$weights_archive" + echo "$weights_archive" +} + build_overlay_image() { overlay_name="$1" base_image="$2" @@ -126,17 +182,36 @@ find_existing_people_flow_weights() { return 1 } +extract_people_flow_weights_archive() { + tmp_dir="$1" + bundle_weights_dir="$TARGET_DIR/managed/people_flow_project/weights" + weights_archive="$(download_weights_archive "$tmp_dir")" + + mkdir -p "$TARGET_DIR/managed" + tar -xzf "$weights_archive" -C "$TARGET_DIR/managed" + + if ! dir_has_payload_files "$bundle_weights_dir"; then + echo "downloaded weights archive did not populate $bundle_weights_dir" >&2 + exit 1 + fi + + printf '%s\n' "$bundle_weights_dir" +} + prepare_people_flow_weights() { + tmp_dir="$1" bundle_weights_dir="$TARGET_DIR/managed/people_flow_project/weights" source_weights_dir="" - if dir_has_files "$bundle_weights_dir"; then + if dir_has_payload_files "$bundle_weights_dir"; then source_weights_dir="$bundle_weights_dir" echo "seeding shared people-flow weights from bundle" >&2 elif source_weights_dir="$(find_existing_people_flow_weights 2>/dev/null)"; then echo "reusing existing people-flow weights from $source_weights_dir" >&2 + elif source_weights_dir="$(extract_people_flow_weights_archive "$tmp_dir" 2>/dev/null)"; then + echo "seeding shared people-flow weights from downloaded archive" >&2 else - echo "people-flow weights not found; seed $MANAGED_PEOPLE_FLOW_WEIGHTS_DIR or include managed/people_flow_project/weights in the release zip" >&2 + echo "people-flow weights not found; seed $MANAGED_PEOPLE_FLOW_WEIGHTS_DIR, publish $WEIGHTS_ARCHIVE_NAME, or include managed/people_flow_project/weights in the release zip" >&2 exit 1 fi @@ -151,7 +226,8 @@ prepare_people_flow_weights() { } require_command curl -require_command unzip +ensure_ubuntu_package unzip unzip +require_command tar require_command docker tmp_dir="$(mktemp -d)" @@ -174,7 +250,7 @@ set +a MANAGED_PEOPLE_FLOW_WEIGHTS_DIR="${MANAGED_PEOPLE_FLOW_WEIGHTS_DIR:-$DEFAULT_PEOPLE_FLOW_WEIGHTS_DIR}" ensure_runtime_directories -prepare_people_flow_weights +prepare_people_flow_weights "$tmp_dir" clear_stale_runtime_state @@ -208,4 +284,4 @@ run_compose \ -f docker-compose.ota-release.yml \ up -d -echo "release installed under $TARGET_DIR" \ No newline at end of file +echo "release installed under $TARGET_DIR" diff --git a/deploy/package-managed-portal-ota.sh b/deploy/package-managed-portal-ota.sh index 9badff1..2bff84a 100644 --- a/deploy/package-managed-portal-ota.sh +++ b/deploy/package-managed-portal-ota.sh @@ -12,7 +12,9 @@ STAGE_DIR="${OUTPUT_DIR}/managed-portal-${RELEASE_VERSION}" BUNDLE_PATH="${OUTPUT_DIR}/managed-portal-${RELEASE_VERSION}.zip" INSTALLER_PATH="${OUTPUT_DIR}/install-managed-portal-${RELEASE_VERSION}.sh" INCLUDE_WEIGHTS="${INCLUDE_WEIGHTS:-0}" +GENERATE_WEIGHTS_ARCHIVE="${GENERATE_WEIGHTS_ARCHIVE:-1}" PEOPLE_FLOW_WEIGHTS_SOURCE="${PEOPLE_FLOW_WEIGHTS_SOURCE:-$REPO_ROOT/managed/people_flow_project/weights}" +WEIGHTS_ARCHIVE_PATH="${OUTPUT_DIR}/people-flow-weights-${RELEASE_VERSION}.tar.gz" require_path() { target="$1" @@ -28,6 +30,11 @@ dir_has_files() { [ -d "$directory" ] && [ -n "$(find "$directory" -mindepth 1 -print -quit 2>/dev/null)" ] } +dir_has_payload_files() { + directory="$1" + [ -d "$directory" ] && [ -n "$(find "$directory" -type f ! -name '.gitkeep' -print -quit 2>/dev/null)" ] +} + copy_dir() { source_dir="$1" target_dir="$2" @@ -56,7 +63,7 @@ copy_dir "$REPO_ROOT/managed/store_dwell_alert/config" "$STAGE_DIR/managed/store copy_dir "$REPO_ROOT/managed/people_flow_project/config" "$STAGE_DIR/managed/people_flow_project/config" if [ "$INCLUDE_WEIGHTS" = "1" ]; then - if ! dir_has_files "$PEOPLE_FLOW_WEIGHTS_SOURCE"; then + if ! dir_has_payload_files "$PEOPLE_FLOW_WEIGHTS_SOURCE"; then echo "people-flow weights requested but missing under $PEOPLE_FLOW_WEIGHTS_SOURCE" >&2 exit 1 fi @@ -86,8 +93,47 @@ with zipfile.ZipFile(bundle_path, "w", compression=zipfile.ZIP_DEFLATED) as arch archive.write(path, arcname.as_posix()) PY -cp "$SCRIPT_DIR/install-managed-portal-ota.sh" "$INSTALLER_PATH" -chmod +x "$INSTALLER_PATH" +python3 - "$SCRIPT_DIR/install-managed-portal-ota.sh" "$INSTALLER_PATH" "$RELEASE_VERSION" <<'PY' +from pathlib import Path +import re +import sys + +source_path = Path(sys.argv[1]) +target_path = Path(sys.argv[2]) +release_version = sys.argv[3] + +content = source_path.read_text(encoding="utf-8") +content = re.sub( + r'^RELEASE_VERSION="\$\{RELEASE_VERSION:-[^"]+\}"$', + f'RELEASE_VERSION="${{RELEASE_VERSION:-{release_version}}}"', + content, + count=1, + flags=re.MULTILINE, +) +target_path.write_text(content, encoding="utf-8") +target_path.chmod(0o755) +PY + +if [ "$GENERATE_WEIGHTS_ARCHIVE" = "1" ] && dir_has_payload_files "$PEOPLE_FLOW_WEIGHTS_SOURCE"; then + python3 - "$PEOPLE_FLOW_WEIGHTS_SOURCE" "$WEIGHTS_ARCHIVE_PATH" <<'PY' +from pathlib import Path +import sys +import tarfile + +source_dir = Path(sys.argv[1]) +archive_path = Path(sys.argv[2]) + +if archive_path.exists(): + archive_path.unlink() + +with tarfile.open(archive_path, "w:gz") as archive: + for path in sorted(source_dir.rglob("*")): + if path.name == ".gitkeep": + continue + arcname = Path("people_flow_project") / "weights" / path.relative_to(source_dir) + archive.add(path, arcname=arcname.as_posix(), recursive=False) +PY +fi echo "OTA bundle created: $BUNDLE_PATH" echo "Versioned installer created: $INSTALLER_PATH" @@ -95,4 +141,9 @@ if [ "$INCLUDE_WEIGHTS" = "1" ]; then echo "Bundle includes managed/people_flow_project/weights" else echo "Bundle excludes managed/people_flow_project/weights; the installer will reuse the shared host weights directory if available" -fi \ No newline at end of file +fi +if [ "$GENERATE_WEIGHTS_ARCHIVE" = "1" ] && dir_has_payload_files "$PEOPLE_FLOW_WEIGHTS_SOURCE"; then + echo "Separate weights archive created: $WEIGHTS_ARCHIVE_PATH" +else + echo "Separate weights archive skipped; no people-flow weights payload found under $PEOPLE_FLOW_WEIGHTS_SOURCE" +fi diff --git a/tasks/lessons.md b/tasks/lessons.md index 35b5eab..d417bed 100644 --- a/tasks/lessons.md +++ b/tasks/lessons.md @@ -29,3 +29,13 @@ - Trigger: the user clarified that OTA installer updates should not keep repackaging and uploading the whole repository tree or fixed `people_flow_project` weights. - Rule: managed-portal OTA releases should ship a minimal ZIP with deploy metadata and managed config only; `people_flow_project` weights should be reused from a stable host location unless the weights themselves changed or the host is new. - Preventive action: when preparing OTA artifacts, use the minimal packaging script, exclude `managed/people_flow_project/weights` by default, and only publish a weights-bearing bundle for first-time installs or actual weight updates. + +## 2026-05-19 + +- Trigger: the user corrected the OTA publication login for `10.8.0.1`. +- Rule: the OTA web host `10.8.0.1` must be published with `root`, not `xiaozheng`. +- Preventive action: for future managed-portal OTA rollouts, verify publication access against `root@10.8.0.1:/var/www/html/ai_deploy` before treating upload as blocked. + +- Trigger: the user clarified that all new installation targets are Ubuntu machines and asked for missing `unzip` to be handled automatically, with weights delivered separately. +- Rule: the managed-portal OTA installer should treat Ubuntu as the first-install baseline, auto-install `unzip` via `apt-get` when needed, and use a separate people-flow weights archive instead of forcing weights into the main ZIP. +- Preventive action: keep the main OTA ZIP minimal, publish `people-flow-weights-.tar.gz` alongside each release when weights are available, and validate that the installer still reuses shared weights on upgrades. diff --git a/tasks/todo.md b/tasks/todo.md index 8e50114..8f7f684 100644 --- a/tasks/todo.md +++ b/tasks/todo.md @@ -2,31 +2,24 @@ ## Checklist -- [x] Classify the remaining modified and untracked files into non-config code changes to commit versus local config/artifact files to exclude. -- [x] Run targeted validation for the code and script changes that will be committed. -- [x] Create a scoped git commit containing the non-config code changes only. -- [ ] Push the scoped commit to `origin/main`. +- [ ] Reuse the already-published `managed-portal-20260519-f3f40b5-11.zip` main bundle and cut updated installer/weights artifacts for the same tag. +- [ ] Publish the updated installer and separate weights archive to `10.8.0.1` and verify the HTTP endpoints. +- [ ] Commit and push the repository changes for the split-weights Ubuntu installer flow. ## Scope And Risks -- Scope: commit and push the remaining local non-config code changes in this repository while excluding local configuration files and generated artifacts. -- Expected touch points: currently modified code/docs/scripts such as `README.md`, `deploy/install-managed-portal-ota.sh`, `deploy/package-managed-portal-ota.sh`, `managed/people_flow_project/src/people_flow/{models.py,pipeline.py}`, `managed/people_flow_project/tests/test_pipeline.py`, and `tasks/lessons.md`. -- Risk: the worktree also contains local artifacts and config-adjacent outputs such as `test_output/`, `sim_workspace/`, `release-manifest.env`, `api_response.json`, and `output.txt`; these must not be swept into the commit by accident. -- Risk: some modified files, especially docs and deployment scripts, are related to earlier OTA work and may need only narrow validation rather than full end-to-end execution. +- Scope: keep the existing OTA application ZIP for `20260519-f3f40b5-11`, generate a refreshed installer plus separate people-flow weights archive for that same release tag, publish them to `10.8.0.1`, and push the supporting repository changes to Git. +- Expected touch points: `.11` release artifacts, `/var/www/html/ai_deploy` on `10.8.0.1`, `deploy/package-managed-portal-ota.sh`, `deploy/install-managed-portal-ota.sh`, `README.md`, `.gitignore`, and task tracking files. +- Risk: reusing the existing main ZIP means the installer and weights archive must remain compatible with the already-published `managed-portal-20260519-f3f40b5-11.zip`. +- Risk: the current local repository does not contain real weights payload files, so the separate weights archive may need to be generated from the `.11` host release workspace or a stable host weights directory instead of local source control. +- Risk: the commit must exclude local artifact files and only capture the intended repo changes. ## Validation Intent -- Prove the selected commit excludes local config files and generated artifacts. -- Run the narrowest meaningful checks for the people-flow pipeline/test changes and at least a syntax-level check for the OTA scripts. +- Prove the refreshed installer and separate weights archive exist for tag `20260519-f3f40b5-11`. +- Prove both artifacts are downloadable from `10.8.0.1/ai_deploy`. +- Prove the Git commit/push contains only the intended repository changes. ## Review - Status: in progress. -- Commit set selected: - - Included: `README.md`, `deploy/install-managed-portal-ota.sh`, `deploy/package-managed-portal-ota.sh`, `managed/people_flow_project/src/people_flow/models.py`, `managed/people_flow_project/src/people_flow/pipeline.py`, `managed/people_flow_project/tests/test_pipeline.py`, `tasks/lessons.md`, `tasks/todo.md`. - - Excluded as local config or generated artifacts: `test_output/`, `sim_workspace/`, `release-manifest.env`, `api_response.json`, `output.txt`. -- Validation completed: - - `PYTHONPATH=. pytest tests/test_pipeline.py tests/test_queue_analytics.py` in `managed/people_flow_project` - - Result: `9 passed in 0.66s` - - `sh -n deploy/install-managed-portal-ota.sh` - - `sh -n deploy/package-managed-portal-ota.sh`