diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..3fefff1 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +.git +.DS_Store +managed +web/node_modules +web/dist +/managed-portal diff --git a/Dockerfile b/Dockerfile index 75f4b76..3641d06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ FROM swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/library/golang:1.25.4-alpine AS builder -ENV TZ=Asia/Shanghai +ENV TZ=Asia/Shanghai \ + GOPROXY=https://goproxy.cn,direct RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories RUN apk add --no-cache ca-certificates tzdata diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index 6dfeffc..c04dae8 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -1,3 +1,5 @@ +name: managed-portal + services: managed-portal: build: diff --git a/findings.md b/findings.md new file mode 100644 index 0000000..c47fdf8 --- /dev/null +++ b/findings.md @@ -0,0 +1,32 @@ +# Managed Portal Deployment Findings + +## Local +- Project root: `/Users/yoilun/code/managed-portal`. +- Existing Docker files: root `Dockerfile`, `deploy/docker-compose.yml`, managed service Dockerfiles. +- Git worktree has uncommitted changes; deployment should use the current filesystem state. +- Docker Compose services: `managed-portal`, `store-dwell-alert`, `people-flow-project`, `managed-portal-web`. +- Compose exposes web UI on `${MANAGED_PORTAL_WEB_PORT:-13000}` and uses `restart: unless-stopped` for all services. +- Model weights/assets found: + - `managed/people_flow_project/weights` is about 1.1G and includes YOLO plus DeepFace `.h5` files. + - `managed/store_dwell_alert/weights` is about 5.4M and includes `yolo11n.pt`. +- Root `.dockerignore` excludes `managed`, but the managed service images build from their own contexts under `managed/`, so their weights remain part of those build contexts. + +## Remote +- Host `192.168.5.185` is Ubuntu Linux x86_64, kernel `6.8.0-111-generic`. +- User `xiaozheng` has sudo group membership. +- Disk space: `/` has about 824G available. +- Docker is installed: `Docker version 29.4.2`. +- Docker Compose is installed: `Docker Compose version v5.1.3`. +- `systemctl` is available. +- Remote deploy path: `/home/xiaozheng/code/managed-portal`. +- Remote GPU: NVIDIA GeForce RTX 3080, with Docker `nvidia` runtime available. +- Remote weight files verified by path and byte size. +- Docker service is enabled on boot. +- Running containers: + - `managed-portal` + - `managed-portal-web` + - `people-flow-project` + - `store-dwell-alert` +- `people-flow-project` and `store-dwell-alert` report healthy in Compose. +- Web UI is exposed on `http://192.168.5.185:13000/`. +- Managed services API is available through the web proxy at `/api/managed-services`. diff --git a/internal/managed/remote_client.go b/internal/managed/remote_client.go index 35e5bca..58178db 100644 --- a/internal/managed/remote_client.go +++ b/internal/managed/remote_client.go @@ -18,13 +18,19 @@ type HTTPDoer interface { type RemoteClient struct { httpClient HTTPDoer + attempts int + retryDelay time.Duration } func NewRemoteClient(client HTTPDoer) *RemoteClient { if client == nil { client = &http.Client{Timeout: 5 * time.Second} } - return &RemoteClient{httpClient: client} + return &RemoteClient{ + httpClient: client, + attempts: 5, + retryDelay: 200 * time.Millisecond, + } } func (c *RemoteClient) GetConfig(ctx context.Context, service Service) (map[string]any, error) { @@ -36,17 +42,18 @@ func (c *RemoteClient) GetConfig(ctx context.Context, service Service) (map[stri } func (c *RemoteClient) UpdateRTSP(ctx context.Context, service Service, rtsp string) (map[string]any, error) { - body := strings.NewReader(fmt.Sprintf(`{"rtsp_url":%q}`, rtsp)) - req, err := c.newRequest(ctx, http.MethodPut, service, "/api/manage/config", body) + resp, err := c.doRequest(ctx, func() (*http.Request, error) { + body := strings.NewReader(fmt.Sprintf(`{"rtsp_url":%q}`, rtsp)) + req, err := c.newRequest(ctx, http.MethodPut, service, "/api/manage/config", body) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + return req, nil + }) if err != nil { return nil, err } - req.Header.Set("Content-Type", "application/json") - - resp, err := c.httpClient.Do(req) - if err != nil { - return nil, fmt.Errorf("request %s %s: %w", req.Method, req.URL.String(), err) - } defer resp.Body.Close() if resp.StatusCode >= 400 { @@ -55,7 +62,7 @@ func (c *RemoteClient) UpdateRTSP(ctx context.Context, service Service, rtsp str var payload map[string]any if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { - return nil, fmt.Errorf("decode response %s: %w", req.URL.String(), err) + return nil, fmt.Errorf("decode response %s: %w", responseURL(resp, service.APIBaseURL+"/api/manage/config"), err) } return payload, nil } @@ -93,14 +100,13 @@ func (c *RemoteClient) PreviewFile(ctx context.Context, service Service, path st func (c *RemoteClient) Download(ctx context.Context, service Service, path string) (*http.Response, error) { query := url.Values{} query.Set("path", path) - req, err := c.newRequest(ctx, http.MethodGet, service, "/api/manage/files/download?"+query.Encode(), nil) + + resp, err := c.doRequest(ctx, func() (*http.Request, error) { + return c.newRequest(ctx, http.MethodGet, service, "/api/manage/files/download?"+query.Encode(), nil) + }) if err != nil { return nil, err } - resp, err := c.httpClient.Do(req) - if err != nil { - return nil, fmt.Errorf("request %s %s: %w", req.Method, req.URL.String(), err) - } if resp.StatusCode >= 400 { defer resp.Body.Close() return nil, decodeAPIError(resp) @@ -109,26 +115,79 @@ func (c *RemoteClient) Download(ctx context.Context, service Service, path strin } func (c *RemoteClient) getJSON(ctx context.Context, service Service, endpoint string, target any) error { - req, err := c.newRequest(ctx, http.MethodGet, service, endpoint, nil) + resp, err := c.doRequest(ctx, func() (*http.Request, error) { + return c.newRequest(ctx, http.MethodGet, service, endpoint, nil) + }) if err != nil { return err } - - resp, err := c.httpClient.Do(req) - if err != nil { - return fmt.Errorf("request %s %s: %w", req.Method, req.URL.String(), err) - } defer resp.Body.Close() if resp.StatusCode >= 400 { return decodeAPIError(resp) } if err := json.NewDecoder(resp.Body).Decode(target); err != nil { - return fmt.Errorf("decode response %s: %w", req.URL.String(), err) + return fmt.Errorf("decode response %s: %w", responseURL(resp, strings.TrimRight(service.APIBaseURL, "/")+endpoint), err) } return nil } +func responseURL(resp *http.Response, fallback string) string { + if resp != nil && resp.Request != nil && resp.Request.URL != nil { + return resp.Request.URL.String() + } + return fallback +} + +func (c *RemoteClient) doRequest(ctx context.Context, newReq func() (*http.Request, error)) (*http.Response, error) { + attempts := c.attempts + if attempts <= 0 { + attempts = 1 + } + delay := c.retryDelay + if delay <= 0 { + delay = 100 * time.Millisecond + } + + var lastReq *http.Request + var lastErr error + for attempt := 1; attempt <= attempts; attempt++ { + req, err := newReq() + if err != nil { + return nil, err + } + lastReq = req + resp, err := c.httpClient.Do(req) + if err == nil { + return resp, nil + } + lastErr = err + if attempt == attempts { + break + } + if err := sleepWithContext(ctx, delay); err != nil { + return nil, err + } + delay *= 2 + } + + if lastReq != nil { + return nil, fmt.Errorf("request %s %s: %w", lastReq.Method, lastReq.URL.String(), lastErr) + } + return nil, lastErr +} + +func sleepWithContext(ctx context.Context, delay time.Duration) error { + timer := time.NewTimer(delay) + defer timer.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } +} + func (c *RemoteClient) newRequest(ctx context.Context, method string, service Service, endpoint string, body io.Reader) (*http.Request, error) { base := strings.TrimRight(service.APIBaseURL, "/") req, err := http.NewRequestWithContext(ctx, method, base+endpoint, body) diff --git a/internal/managed/remote_client_test.go b/internal/managed/remote_client_test.go index 636124d..79b2e5c 100644 --- a/internal/managed/remote_client_test.go +++ b/internal/managed/remote_client_test.go @@ -4,10 +4,12 @@ import ( "bytes" "context" "encoding/json" + "errors" "io" "net/http" "strings" "testing" + "time" ) type roundTripFunc func(req *http.Request) (*http.Response, error) @@ -139,3 +141,37 @@ func TestRemoteClientRoundTrip(t *testing.T) { t.Fatalf("Content-Disposition = %q", resp.Header.Get("Content-Disposition")) } } + +func TestRemoteClientRetriesTransientRequestErrors(t *testing.T) { + t.Parallel() + + attempts := 0 + client := NewRemoteClient(&http.Client{Transport: roundTripFunc(func(r *http.Request) (*http.Response, error) { + attempts++ + if attempts < 3 { + return nil, errors.New("connect: connection refused") + } + return &http.Response{ + StatusCode: http.StatusOK, + Header: make(http.Header), + Body: io.NopCloser(strings.NewReader(`{"config_path":"/srv/store/config/local.yaml"}`)), + }, nil + })}) + client.retryDelay = time.Millisecond + + service := Service{ + ID: "store_dwell_alert", + APIBaseURL: "http://managed.invalid/store", + } + + configPayload, err := client.GetConfig(context.Background(), service) + if err != nil { + t.Fatalf("GetConfig() error = %v", err) + } + if attempts != 3 { + t.Fatalf("attempts = %d", attempts) + } + if got := configPayload["config_path"]; got != "/srv/store/config/local.yaml" { + t.Fatalf("config_path = %#v", got) + } +} diff --git a/internal/webdevice/service.go b/internal/webdevice/service.go index 1f32ed9..8a71197 100644 --- a/internal/webdevice/service.go +++ b/internal/webdevice/service.go @@ -46,6 +46,14 @@ type TCPScanner func(ip, netmask string, port int, excludeIPs map[string]bool) ( type ForwarderFactory func(ip string, port int, listenAddress, targetAddress string) (*webDeviceForwarder, error) type ProxyTargetResolver func(ip string) string +const ( + webDeviceScanConcurrency = 128 + webDeviceScanTimeout = 1500 * time.Millisecond + webDeviceScanAttempts = 2 + webDeviceScanRetryDelay = 100 * time.Millisecond + maxWebDeviceScanAddrs = 256 +) + type Service struct { mu sync.RWMutex allowed map[string]time.Time @@ -74,6 +82,8 @@ func (s *Service) Scan(r *http.Request) (*ScanResult, error) { if err != nil { return nil, err } + scheme, host := requestBase(r) + interfaces = appendRequestHostInterface(interfaces, host) if len(interfaces) == 0 { return &ScanResult{ @@ -94,7 +104,6 @@ func (s *Service) Scan(r *http.Request) (*ScanResult, error) { Errors: []string{}, } - scheme, host := requestBase(r) for _, iface := range interfaces { devices, scanErr := s.tcpScanner(iface.IP, iface.Netmask, 80, excludeIPs) if scanErr != nil { @@ -135,6 +144,22 @@ func (s *Service) Scan(r *http.Request) (*ScanResult, error) { return result, nil } +func appendRequestHostInterface(interfaces []InterfaceInfo, host string) []InterfaceInfo { + if !IsPrivateIPv4Literal(host) { + return interfaces + } + for _, iface := range interfaces { + if iface.IP == host { + return interfaces + } + } + return append(interfaces, InterfaceInfo{ + Name: "request-host", + IP: host, + Netmask: "255.255.255.0", + }) +} + func (s *Service) allowIP(ip string) { s.mu.Lock() defer s.mu.Unlock() @@ -390,8 +415,7 @@ func scanTCP(ip string, netmask string, port int, excludeIPs map[string]bool) ([ var devices []TCPDevice var mu sync.Mutex var wg sync.WaitGroup - semaphore := make(chan struct{}, 20) - timeout := 2 * time.Second + semaphore := make(chan struct{}, webDeviceScanConcurrency) current := make(net.IP, len(ipRange.Start)) copy(current, ipRange.Start) @@ -414,7 +438,7 @@ func scanTCP(ip string, netmask string, port int, excludeIPs map[string]bool) ([ defer wg.Done() defer func() { <-semaphore }() - if scanTCPPort(targetIP, port, timeout) { + if scanTCPPortWithRetry(targetIP, port) { mu.Lock() devices = append(devices, TCPDevice{IP: targetIP, Port: port}) mu.Unlock() @@ -457,7 +481,31 @@ func calculateIPRange(ip string, netmask string) (*ipRange, error) { broadcast[i] |= ^mask[i] } - return &ipRange{Start: network.IP.To4(), End: broadcast}, nil + result := &ipRange{Start: network.IP.To4(), End: broadcast} + if ipToUint32(result.End)-ipToUint32(result.Start)+1 <= maxWebDeviceScanAddrs { + return result, nil + } + + local24Mask := net.CIDRMask(24, 32) + local24Network := parseIP.To4().Mask(local24Mask) + local24Broadcast := make(net.IP, len(local24Network)) + copy(local24Broadcast, local24Network) + for i := 0; i < len(local24Mask); i++ { + local24Broadcast[i] |= ^local24Mask[i] + } + return &ipRange{Start: local24Network, End: local24Broadcast}, nil +} + +func scanTCPPortWithRetry(ip string, port int) bool { + for attempt := 0; attempt < webDeviceScanAttempts; attempt++ { + if scanTCPPort(ip, port, webDeviceScanTimeout) { + return true + } + if attempt < webDeviceScanAttempts-1 { + time.Sleep(webDeviceScanRetryDelay) + } + } + return false } func scanTCPPort(ip string, port int, timeout time.Duration) bool { @@ -487,6 +535,10 @@ func ipv4ToUint32(value string) uint32 { if parsed == nil { return 0 } + return ipToUint32(parsed) +} + +func ipToUint32(parsed net.IP) uint32 { ip := parsed.To4() if ip == nil { return 0 diff --git a/internal/webdevice/service_test.go b/internal/webdevice/service_test.go index bc05b43..6e58c49 100644 --- a/internal/webdevice/service_test.go +++ b/internal/webdevice/service_test.go @@ -1,6 +1,7 @@ package webdevice import ( + "net/http" "net/http/httptest" "testing" ) @@ -71,3 +72,73 @@ func TestScanBuildsDirectURLAndAllowList(t *testing.T) { t.Fatalf("DirectURL = %q", result.Devices[0].DirectURL) } } + +func TestScanIncludesPrivateRequestHostSubnet(t *testing.T) { + t.Parallel() + + svc := NewService() + svc.interfaceGetter = func() ([]InterfaceInfo, error) { + return []InterfaceInfo{{ + Name: "eth0", + IP: "172.18.0.2", + Netmask: "255.255.0.0", + }}, nil + } + svc.tcpScanner = func(ip, netmask string, port int, excludeIPs map[string]bool) ([]TCPDevice, error) { + if ip == "192.168.5.189" { + if netmask != "255.255.255.0" { + t.Fatalf("request host netmask = %q, want 255.255.255.0", netmask) + } + if !excludeIPs["192.168.5.189"] { + t.Fatal("expected request host IP to be excluded") + } + return []TCPDevice{{IP: "192.168.5.124", Port: 80}}, nil + } + return nil, nil + } + svc.newForwarder = func(ip string, port int, listenAddress, targetAddress string) (*webDeviceForwarder, error) { + return &webDeviceForwarder{ip: ip, port: port, targetAddress: targetAddress}, nil + } + + req := httptest.NewRequest(http.MethodGet, "http://192.168.5.189:13000/api/web-devices/scan", nil) + result, err := svc.Scan(req) + if err != nil { + t.Fatalf("Scan() error = %v", err) + } + if result.Count != 1 { + t.Fatalf("result.Count = %d, want 1", result.Count) + } + if result.Devices[0].Interface != "request-host" { + t.Fatalf("Interface = %q, want request-host", result.Devices[0].Interface) + } +} + +func TestCalculateIPRangeCapsLargeSubnetToLocal24(t *testing.T) { + t.Parallel() + + ipRange, err := calculateIPRange("192.168.5.189", "255.255.0.0") + if err != nil { + t.Fatalf("calculateIPRange() error = %v", err) + } + if got := ipRange.Start.String(); got != "192.168.5.0" { + t.Fatalf("Start = %q, want 192.168.5.0", got) + } + if got := ipRange.End.String(); got != "192.168.5.255" { + t.Fatalf("End = %q, want 192.168.5.255", got) + } +} + +func TestCalculateIPRangeKeepsSmallSubnet(t *testing.T) { + t.Parallel() + + ipRange, err := calculateIPRange("192.168.5.189", "255.255.255.240") + if err != nil { + t.Fatalf("calculateIPRange() error = %v", err) + } + if got := ipRange.Start.String(); got != "192.168.5.176" { + t.Fatalf("Start = %q, want 192.168.5.176", got) + } + if got := ipRange.End.String(); got != "192.168.5.191" { + t.Fatalf("End = %q, want 192.168.5.191", got) + } +} diff --git a/managed/people_flow_project/Dockerfile b/managed/people_flow_project/Dockerfile index d6d0bec..f330fcd 100644 --- a/managed/people_flow_project/Dockerfile +++ b/managed/people_flow_project/Dockerfile @@ -9,7 +9,8 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ WORKDIR /opt/people-flow -RUN apt-get update && apt-get install -y --no-install-recommends \ +RUN sed -i 's|http://deb.debian.org/debian|http://mirrors.aliyun.com/debian|g; s|http://deb.debian.org/debian-security|http://mirrors.aliyun.com/debian-security|g' /etc/apt/sources.list.d/debian.sources && \ + apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ libglib2.0-0 \ libgl1 \ @@ -19,11 +20,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ COPY requirements-docker.txt ./requirements-docker.txt RUN python -m pip install --upgrade pip setuptools wheel && \ - pip install "numpy<2" && \ - pip install --extra-index-url https://download.pytorch.org/whl/cpu \ - "torch==2.6.0+cpu" "torchvision==0.21.0+cpu" && \ - pip install "tensorflow==2.16.1" "tf-keras==2.16.0" && \ - pip install -r requirements-docker.txt + pip install "numpy<2" + +RUN pip install --extra-index-url https://download.pytorch.org/whl/cpu \ + "torch==2.6.0+cpu" "torchvision==0.21.0+cpu" + +RUN pip install "tensorflow==2.16.1" "tf-keras==2.16.0" + +RUN pip install -r requirements-docker.txt COPY . . COPY scripts/docker-entrypoint.sh /opt/people-flow/scripts/docker-entrypoint.sh diff --git a/managed/people_flow_project/requirements-docker.txt b/managed/people_flow_project/requirements-docker.txt index 0c3b9a0..b862e96 100644 --- a/managed/people_flow_project/requirements-docker.txt +++ b/managed/people_flow_project/requirements-docker.txt @@ -1,6 +1,7 @@ flask>=3.1.0 ultralytics>=8.3.0 -opencv-python-headless>=4.10.0 +opencv-python==4.11.0.86 +opencv-python-headless==4.11.0.86 deepface>=0.0.93 pyyaml>=6.0.2 pandas>=2.2.3 diff --git a/managed/store_dwell_alert/Dockerfile b/managed/store_dwell_alert/Dockerfile index 433a617..671534d 100644 --- a/managed/store_dwell_alert/Dockerfile +++ b/managed/store_dwell_alert/Dockerfile @@ -7,7 +7,8 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ WORKDIR /app -RUN apt-get update \ +RUN sed -i 's|http://deb.debian.org/debian|http://mirrors.aliyun.com/debian|g; s|http://deb.debian.org/debian-security|http://mirrors.aliyun.com/debian-security|g' /etc/apt/sources.list.d/debian.sources \ + && apt-get update \ && apt-get install -y --no-install-recommends \ ffmpeg \ libgl1 \ diff --git a/progress.md b/progress.md new file mode 100644 index 0000000..4450dcc --- /dev/null +++ b/progress.md @@ -0,0 +1,20 @@ +# Managed Portal Deployment Progress + +## 2026-05-07 +- Started deployment planning. +- Confirmed local project path and initial file inventory. +- Read Docker Compose and service Dockerfiles. +- Located required model weights and approximate transfer size. +- First remote SSH inspection attempt did not return usable output; retrying with a simpler command wrapper. +- Verified remote host prerequisites using interactive SSH. +- Remote deploy parent `/home/xiaozheng/code` exists and target project directory was empty. +- First rsync attempt failed because local rsync lacks `--info=progress2`; switching to portable progress flags. +- Synced 1.36GB of project files to `/home/xiaozheng/code/managed-portal`. +- Verified remote weight files and Docker GPU runtime. +- `docker compose config --quiet` completed successfully on the remote host. +- First remote build was interrupted after pip resolver spent too long backtracking over OpenCV wheels. +- Pinned `people-flow-project` OpenCV dependencies and split the Dockerfile pip install into cacheable layers. +- Rebuilt successfully on the remote host and started all four Compose containers. +- Verified `http://127.0.0.1:13000/api/managed-services` on the remote host returns both managed services with `running` status. +- Verified Compose shows `managed-portal`, `managed-portal-web`, `people-flow-project`, and `store-dwell-alert` running. +- Verified boot autostart path: Docker service is enabled and all Compose containers use `restart: unless-stopped`. diff --git a/task_plan.md b/task_plan.md new file mode 100644 index 0000000..22c0408 --- /dev/null +++ b/task_plan.md @@ -0,0 +1,23 @@ +# Managed Portal Deployment Plan + +## Goal +Deploy `/Users/yoilun/code/managed-portal` to `xiaozheng@192.168.5.185`, including local model weights/assets, run it with Docker, and configure startup on boot. + +## Phases +- [complete] Inspect local project layout, Docker files, and weight/artifact locations. +- [complete] Inspect target host prerequisites and choose deploy path. +- [complete] Sync project contents to target host without losing required assets. +- [complete] Build/start Docker services on target host. +- [complete] Configure boot autostart and verify service health. + +## Decisions +- Preserve existing local uncommitted changes. +- Prefer Docker Compose restart policies for boot autostart when Docker service is enabled. + +## Errors Encountered +| Error | Attempt | Resolution | +|-------|---------|------------| +| First SSH inspect command returned no remote output after password prompt | Attempt 1 | Retry with cleaner `expect` argument grouping for the remote `sh -lc` command. | +| macOS system `rsync` does not support `--info=progress2` | Sync attempt 1 | Retry with portable `--progress --stats`. | +| `people-flow-project` pip resolver downloaded multiple OpenCV wheels during Docker build | Build attempt 1 | Interrupted build, pinned OpenCV packages to `4.11.0.86`, and split pip install into separate Docker cache layers. | +| Initial API verification used `/api/managed/services` | Verification attempt 1 | Checked local routes and verified the correct endpoint `/api/managed-services`. | diff --git a/web/.dockerignore b/web/.dockerignore new file mode 100644 index 0000000..0ca39c0 --- /dev/null +++ b/web/.dockerignore @@ -0,0 +1,3 @@ +node_modules +dist +.DS_Store diff --git a/web/src/views/WebDevices.vue b/web/src/views/WebDevices.vue index 595d4ac..a4bf223 100644 --- a/web/src/views/WebDevices.vue +++ b/web/src/views/WebDevices.vue @@ -37,8 +37,8 @@ show-overflow-tooltip /> @@ -99,7 +99,7 @@ async function handleScan() { } function openDevice(row) { - const url = row?.direct_url || row?.proxy_url; + const url = row?.proxy_url || row?.direct_url; if (!url) { ElMessage.error("设备打开地址无效"); return;