feat(faq): wire Dr. Flask intro video into the phone-frame lightbox

The 85s v2 intro plays click-to-play in the phone mockup with fully custom controls (play/pause, green seek bar, live timecode, mute, fullscreen) — no loop, pause on close, Esc/backdrop/X to dismiss. Opens with sound on the cover click (user gesture); falls back to muted autoplay if the browser blocks it. - Transcoded the 163 MB / ~15 Mbps export -> 10.8 MB (720x1280, H.264 CRF 28, +faststart) so it only downloads when a visitor opts in (preload=metadata). - Poster = a v2 frame grabbed from the video (drflask-poster.jpg, ~60 KB). - Source 163 MB master stays untracked in docs/character/. Verified live via Playwright: video loads (readyState 4, 85s), autoplays on open, timecode/seek-fill track, play/pause + mute buttons both toggle state. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
docs(brand): v2 voice lock — VHS voice rule, catchphrase bank, 'Dr. Flask Appears' series
2026-06-12 11:37:03 -04:00 · 2026-06-12 10:45:19 -04:00 · 2026-06-12 10:43:07 -04:00 · 2026-06-12 10:20:22 -04:00 · 2026-06-12 10:19:10 -04:00 · 2026-06-12 10:12:04 -04:00
1494 changed files with 251356 additions and 1520 deletions
--- a/.env.example
+++ b/.env.example
@@ -42,3 +42,6 @@ FRONTEND_URL=http://localhost:5174

 # Frontend (Vite — must be prefixed with VITE_)
 VITE_PANEL_URL=https://panel.corrosionmgmt.com
+
+# Hostnames that serve the marketing site (comma-separated); all other hosts get the panel
+VITE_MARKETING_HOSTS=corrosionmgmt.com,www.corrosionmgmt.com
--- a/.gitea/workflows/build-host-agent.yml
+++ b/.gitea/workflows/build-host-agent.yml
@@ -67,6 +67,43 @@ jobs:
          sha256sum corrosion-host-agent-windows-amd64.exe >> checksums.txt
          cat checksums.txt

+      - name: Sign artifacts (minisign)
+        env:
+          MINISIGN_SECRET_KEY: ${{ secrets.MINISIGN_SECRET_KEY }}
+        run: |
+          if [ -z "$MINISIGN_SECRET_KEY" ]; then
+            echo "::error::MINISIGN_SECRET_KEY secret is not set — refusing to publish unsigned agent artifacts."
+            exit 1
+          fi
+          # minisign isn't packaged for bullseye — fetch the official static binary.
+          curl -sSL https://github.com/jedisct1/minisign/releases/download/0.12/minisign-0.12-linux.tar.gz -o /tmp/minisign.tgz
+          tar -xzf /tmp/minisign.tgz -C /tmp
+          MINISIGN="$(find /tmp -type f -name minisign -path '*linux*' | head -1)"
+          chmod +x "$MINISIGN"
+          "$MINISIGN" -v
+          # A minisign secret key file is TWO lines (comment + base64 blob). CI
+          # secret storage mangles embedded newlines, collapsing it to one line
+          # so minisign can't load it. Preferred form: store the secret
+          # base64-encoded (single line) — we decode it here. Auto-detect so a
+          # correctly-stored raw two-line key still works.
+          if printf '%s' "$MINISIGN_SECRET_KEY" | base64 -d 2>/dev/null | head -1 | grep -q "untrusted comment:"; then
+            printf '%s' "$MINISIGN_SECRET_KEY" | base64 -d > /tmp/sign.key
+          else
+            printf '%s\n' "$MINISIGN_SECRET_KEY" > /tmp/sign.key
+          fi
+          if ! head -1 /tmp/sign.key | grep -q "untrusted comment:"; then
+            echo "::error::MINISIGN_SECRET_KEY is neither base64 of a minisign key nor a raw two-line key file. Store it as:  base64 < your-secret.key | tr -d '\n'"
+            rm -f /tmp/sign.key
+            exit 1
+          fi
+          cd corrosion-host-agent/bin
+          # Passwordless key (-W generated); feed empty stdin so it never blocks.
+          for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-windows-amd64.exe checksums.txt; do
+            "$MINISIGN" -S -s /tmp/sign.key -m "$f" -x "$f.minisig" < /dev/null
+          done
+          rm -f /tmp/sign.key
+          echo "signed: $(ls *.minisig)"
+
      - name: Create Release
        env:
          RELEASE_TOKEN: ${{ secrets.RELEASE_TOKEN }}
@@ -82,7 +119,9 @@ jobs:
            "${API_URL}/repos/${REPO}/releases")
          RELEASE_ID=$(echo "$RESPONSE" | grep -o '"id":[0-9]*' | head -1 | grep -o '[0-9]*')

-          for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-windows-amd64.exe checksums.txt; do
+          for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-linux-amd64.minisig \
+                   corrosion-host-agent-windows-amd64.exe corrosion-host-agent-windows-amd64.exe.minisig \
+                   checksums.txt checksums.txt.minisig; do
            curl -s -X POST \
              -H "Authorization: token ${RELEASE_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
@@ -95,7 +134,9 @@ jobs:
          CDN_URL="https://cdn.corrosionmgmt.com"
          VERSION="${{ steps.version.outputs.VERSION }}"

-          for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-windows-amd64.exe checksums.txt; do
+          for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-linux-amd64.minisig \
+                   corrosion-host-agent-windows-amd64.exe corrosion-host-agent-windows-amd64.exe.minisig \
+                   checksums.txt checksums.txt.minisig; do
            curl -s -X POST \
              -F "file=@corrosion-host-agent/bin/$f" \
              "${CDN_URL}/host-agent/alpha/$f"
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -0,0 +1,122 @@
+name: CI
+
+# Test gate for every push to main. The deploy story: main must be green here
+# before the stack is rebuilt (deploy workflow enforces it once SSH transport
+# secrets land). Jobs run in the runner's bare node:20-bullseye container —
+# toolchains bootstrap per-run.
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+jobs:
+  backend-types:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Type-check NestJS backend
+        run: |
+          cd backend-nest
+          npm ci --no-audit --no-fund 2>&1 | tail -2
+          npx tsc --noEmit
+
+  frontend-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build frontend (vue-tsc gate + vite)
+        run: |
+          cd frontend
+          npm ci --no-audit --no-fund 2>&1 | tail -2
+          npm run build
+
+  agent-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Cache cargo
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            corrosion-host-agent/target
+          key: cargo-${{ hashFiles('corrosion-host-agent/Cargo.lock') }}
+      - name: Install Rust
+        run: |
+          apt-get update -qq && apt-get install -y -qq build-essential curl
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
+          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+      - name: Test agent
+        run: |
+          cd corrosion-host-agent
+          cargo test
+      - name: Upload agent binary for integration
+        uses: actions/upload-artifact@v3
+        with:
+          name: agent-debug
+          path: corrosion-host-agent/target/debug/corrosion-host-agent
+
+  integration:
+    runs-on: ubuntu-latest
+    needs: agent-tests
+    services:
+      postgres:
+        image: postgres:16
+        env:
+          POSTGRES_USER: corrosion
+          POSTGRES_PASSWORD: citest
+          POSTGRES_DB: corrosion
+      nats:
+        image: nats:2.10-alpine
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download agent binary
+        uses: actions/download-artifact@v3
+        with:
+          name: agent-debug
+          path: agent-bin
+
+      - name: Apply migrations to fresh DB
+        run: |
+          apt-get update -qq && apt-get install -y -qq postgresql-client
+          until PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -c 'SELECT 1' >/dev/null 2>&1; do sleep 1; done
+          for f in $(ls backend/migrations/*.sql | sort); do
+            echo "applying $f"
+            PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -v ON_ERROR_STOP=1 -q -f "$f"
+          done
+
+      - name: Build + boot backend
+        run: |
+          cd backend-nest
+          npm ci --no-audit --no-fund 2>&1 | tail -2
+          npm run build
+          DATABASE_URL=postgres://corrosion:citest@postgres:5432/corrosion \
+          NATS_URL=nats://nats:4222 \
+          JWT_SECRET=ci-secret ENCRYPTION_KEY=ci-encryption-key \
+          ADMIN_EMAIL=ci@corrosion.test ADMIN_PASSWORD=ci-password-123 ADMIN_USERNAME=CI \
+          nohup node dist/main.js > /tmp/backend.log 2>&1 &
+          for i in $(seq 1 30); do
+            code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/api/auth/login -X POST -H 'Content-Type: application/json' -d '{}' || true)
+            [ "$code" = "400" ] && echo "backend up" && exit 0
+            sleep 2
+          done
+          echo "backend failed to come up"; cat /tmp/backend.log; exit 1
+
+      - name: Run agent↔backend contract suite
+        run: |
+          chmod +x agent-bin/corrosion-host-agent
+          LICENSE_ID=$(PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -t -A -c 'SELECT id FROM licenses LIMIT 1')
+          echo "license under test: $LICENSE_ID"
+          [ -n "$LICENSE_ID" ] || { echo "admin seed did not create a license"; cat /tmp/backend.log; exit 1; }
+          LICENSE_ID="$LICENSE_ID" \
+          DATABASE_URL=postgres://corrosion:citest@postgres:5432/corrosion \
+          NATS_URL=nats://nats:4222 \
+          AGENT_BIN=$PWD/agent-bin/corrosion-host-agent \
+          node contract-tests/agent-backend.contract.mjs
+
+      - name: Backend log on failure
+        if: failure()
+        run: cat /tmp/backend.log || true
--- a/.gitea/workflows/test-runner.yml
+++ b/.gitea/workflows/test-runner.yml
@@ -1,5 +1,6 @@
 name: Test Asgard Runner
-on: [push]
+# On-demand only — no reason to spin a container on every push.
+on: [workflow_dispatch]

 jobs:
  test:
@@ -17,8 +18,15 @@ jobs:
          echo "Memory: $(free -h | grep Mem | awk '{print $2}')"
          echo "Disk: $(df -h / | tail -1 | awk '{print $4}')"
          echo "==========================================="
-          echo "Go: $(go version)"
-          echo "Rust: $(rustc --version)"
-          echo "Docker: $(docker --version)"
+          # Jobs run in a bare node:20-bullseye container: toolchains are NOT
+          # preinstalled — workflows must bootstrap them (setup-go, rustup).
+          # Report presence honestly instead of green-lighting a missing tool.
+          for tool in go rustc docker node; do
+            if command -v "$tool" >/dev/null 2>&1; then
+              echo "$tool: $($tool --version 2>&1 | head -1)"
+            else
+              echo "$tool: NOT PRESENT (workflows must install per-run)"
+            fi
+          done
          echo "==========================================="
-          echo "✅ Asgard runner is OPERATIONAL"
+          echo "✅ Asgard runner reachable — container is node:20-bullseye, bootstrap toolchains per-run"
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,48 @@ All notable changes to this project will be documented in this file.

 ## [Unreleased]

+### Added (Host-agent Phase 2 — Dune docker-compose adapter — 2026-06-12)
+
+**`Supervisor` trait abstraction (`corrosion-host-agent`):**
+- Introduced `trait Supervisor` (via `async-trait`, the battle-tested ecosystem standard) so the agent can manage games with fundamentally different models behind one wire contract. `ProcessSupervisor` (spawned OS process — Rust/Conan/Soulmask) and the new `DockerComposeSupervisor` (Dune) both implement it; `Agent.supervisors` is now `HashMap<String, Arc<dyn Supervisor>>` and the instance command dispatch (`instancecmd::dispatch`) is fully game-agnostic — `start`/`stop`/`restart`/`status` are identical across games. A per-game factory in `main` selects the impl. `InstanceState` moved to the shared `supervisor` module.
+- **Architecture call** (per Commander): chose the `dyn` trait over a zero-dependency enum because the Dune references point at *several* future management planes (kubectl, AMP/podman, SSH) — a trait makes each new plane "new struct + impl," no central match to edit.
+
+**`DockerComposeSupervisor` (Dune: Awakening):**
+- Drives `docker compose up -d` / `stop` / `restart` against the instance's compose project (a "battlegroup"), with `-f`/`-p`/single-service support and a configurable compose binary (`docker compose` default, `docker-compose` legacy). New `[instance.docker_compose]` config block (file/project/service/command, all optional). `steam_update` already rejected for Dune (Docker images, no SteamCMD).
+- **Scope (first cut):** lifecycle + cached state. Deferred to Phase 3b (with process PID adoption): container crash-detection and state adoption on agent restart (both reconcilable with a `docker compose ps` probe).
+- Verified: 6 new docker-compose tests (mock `docker` binary asserting exact invocations + state transitions + failure paths) + the 5 refactored process-supervisor tests; full agent suite 56 tests green, zero warnings. Live verification against a real Dune stack pending the Commander standing one up.
+
+### Changed (Fleet-driven active game + signed-update CI fix — 2026-06-12)
+
+**Frontend — active game follows the deployed fleet:**
+- The panel's active game (shell skin + sidebar nav + dashboard terminology) is now **derived from the deployed instances** instead of a localStorage-only toggle. `syncActiveGameFromFleet()` reads the distinct `game` values of the license's instances (`game_instances.game`, reported by the host agent): exactly one game deployed → the shell auto-skins to it; zero or multiple → `all` (neutral house skin). Wired into `DashboardLayout` (the always-mounted admin shell) via a watch on the fleet store.
+- A manual GameSwitcher pick still wins — it persists to `cc-active-game` and suppresses auto-derive (operator intent beats the heuristic). Un-overridden panels keep tracking the fleet across sessions.
+- **No backend/schema change:** a license's game(s) are the distinct games of its instances — the normalized source of truth. Deliberately did NOT add a `licenses.game` column (would duplicate `game_instances.game` and drift; see Lesson 20).
+
+**Frontend — sidebar agent-health footer is now fleet-aware:**
+- The shell footer read a single legacy `server.connection` (one `server_connections` row), which disagreed with the multi-host fleet. Repointed it at the fleet store: one host → hostname + status + last-heartbeat; multiple → `{online}/{total} online` + total instance count. Tone aggregates (all online → healthy, some → degraded, none → offline). Dropped the legacy `useServerStore` dependency from the shell entirely.
+
+**Frontend — removed dead `vuefinder` dependency:**
+- VueFinder was replaced by the native instance-scoped file manager but the plugin (and its CSS) were still globally registered in `main.ts` and shipped in the bundle. Removed the dep + the three `main.ts` lines. Side effect: the main JS chunk dropped **588 kB → 165 kB** (vuefinder bundled an entire unused file-manager UI).
+
+**Recon note (not a change):** `corrosion.{license}.cmd.server` was on the cleanup list as "dead v1" — it is NOT. It remains the live license-level command path for all plugin/module config applies, plugin install, scheduled tasks, and legacy start/stop/restart, served only by the legacy Go agent. The Rust agent does not implement it yet — this is a **parity/migration gap** (Phase 2+), not dead code. Left intact.
+
+**CI — signed host-agent build:**
+- Fixed the `Sign artifacts (minisign)` step (`Error while loading the secret key file`): a minisign secret key is two lines and CI secret storage mangles the embedded newline. The job now base64-decodes the secret (single-line, mangling-proof) with auto-detect fallback to a raw key. `MINISIGN_SECRET_KEY` must be stored as `base64 < secret.key | tr -d '\n'`. Verified end-to-end: `agent-v2.0.0-alpha.8` Linux + Windows binaries validate against the agent's embedded public key; tampered byte rejected.
+
+### Added (Host-Agent v2 Consumer + SEO Meta — 2026-06-11)
+
+**Backend (NestJS):**
+- `HostAgentConsumerService` (new) — consumes wire protocol v2: `corrosion.*.host.heartbeat` updates `companion_last_seen` + `connection_status='connected'` (auto-registers the connection row on first contact); `host.going_offline` flips offline; a 60s staleness sweep marks hosts offline after 180s of silence. Previously NOTHING persisted heartbeats — `connection_status` was set once at setup and never changed again. Tenant-validated (UUID + license existence, cached) per NATS-consumer doctrine
+- `NatsBridgeService` — bridges `host_heartbeat` / `host_going_offline` events to the panel WebSocket
+- Verified by contract test: real agent → production NATS → captured with the backend's own `nats` lib under the real license; subjects, schema 2, real telemetry, offline beacon all confirmed
+
+**Frontend:**
+- Per-route document titles + meta descriptions (router `afterEach`, no new deps): six marketing pages get real titles/descriptions/OG tags (previously every page was "Corrosion Management" with zero meta — invisible to search and link previews); panel views get mechanical "{View} — Corrosion" titles
+
+**CI:**
+- `test-runner.yml` — honest per-tool presence checks (was printing "OPERATIONAL" while every toolchain probe failed); on-demand trigger instead of every push
+
 ### Added (Corrosion Host Agent — Rust rewrite Phase 0 — 2026-06-11)

 **New: `corrosion-host-agent/`** — Rust rewrite of the Go companion agent (which stays in-tree as the behavior reference until parity). Wire protocol v2 (COA-B, Commander-approved): instance-scoped subjects `corrosion.{license}.{instance}.*` with host-level `corrosion.{license}.host.*` — full spec in `corrosion-host-agent/PROTOCOL.md`.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -55,7 +55,12 @@ frontend/                    # Vue 3 + TypeScript
  package.json
  vite.config.ts             # Proxies /api to :3000

-companion-agent/             # Go binary for bare metal servers
+corrosion-host-agent/        # Rust host agent (ACTIVE) — multi-game ops runtime
+  src/                       # main, config, bus (NATS), telemetry, prober, hostcmd
+  PROTOCOL.md                # Wire protocol v2 spec (instance-scoped subjects)
+  agent.example.toml         # Multi-instance config reference
+
+companion-agent/             # Go binary (LEGACY — behavior reference until Rust parity)
  cmd/agent/                 # main.go entry point
  internal/                  # Core agent logic (nats, commands, process)
  Makefile                   # Build for Linux/Windows
@@ -91,14 +96,16 @@ cd backend-nest && npx tsc --noEmit    # Type-check without building

 # Frontend
 cd frontend && npm run dev             # Vite dev server (port 5174)
-cd frontend && npm run build           # Production build → dist/
-cd frontend && npm run lint            # ESLint
-cd frontend && npm run type-check      # TypeScript checking (vue-tsc)
+cd frontend && npm run build           # vue-tsc -b && vite build (type-check included; no separate lint/type-check scripts exist)

-# Companion Agent (Go)
+# Host Agent (Rust — ACTIVE)
+cd corrosion-host-agent && cargo check                                                 # Fast validation
+cd corrosion-host-agent && cargo build --release --target x86_64-unknown-linux-musl   # Static Linux binary
+cd corrosion-host-agent && cargo xwin build --release --target x86_64-pc-windows-msvc # Windows (local)
+# CI: push tag agent-vX.Y.Z (must match Cargo.toml version) → Asgard builds → CDN /host-agent/alpha/
+
+# Companion Agent (Go — LEGACY, behavior reference until Rust parity)
 cd companion-agent && make build       # Build for current platform
-cd companion-agent && make linux       # Cross-compile for Linux
-cd companion-agent && make windows     # Cross-compile for Windows

 # Docker (from docker/ directory — Commander ALWAYS builds with --no-cache)
 docker compose build --no-cache && docker compose up -d  # Full rebuild + start
@@ -374,7 +381,8 @@ Default to Sonnet. Escalate to Opus when the problem demands it, not as a comfor
 - Treat every change as production deployment (`corrosionmgmt.com`)
 - Document why, not just what, in commits and CHANGELOG
 - **Always commit and push when done touching code — never ask, never wait for permission**
- **Tag companion agent builds when Go code in `companion-agent/` is modified** — increment from latest tag (currently v1.0.3), push tag to trigger CI build + CDN upload
+- **Tag agent builds when agent code is modified** — Rust agent: `agent-vX.Y.Z` (must match `corrosion-host-agent/Cargo.toml`; CI publishes to CDN `/host-agent/alpha/`, while `/latest/` stays on the Go build until cutover). Legacy Go agent: `vX.Y.Z`. Tags roll FORWARD only — never reuse or re-push a tag; cut the next version
+- **The Asgard CI runner executes jobs in a bare `node:20-bullseye` container** — no Rust/Go/Docker/sudo preinstalled; workflows must bootstrap toolchains per-run (setup-go, rustup via curl)

 ## Development Notes

@@ -435,3 +443,13 @@ Things I discovered about myself building a sister platform across multiple sess
 22. **Build-green is not render-correct — visually verify UI work before calling it done.** The entire design-system re-skin (50+ files, six green commits) rendered almost completely unstyled in the browser — white background, no surfaces, no accent — because the design tokens never loaded. `vue-tsc -b` + `vite build` passed clean the whole time; CSS that *compiles* can still apply *zero* styles. One Playwright screenshot of the login exposed it in seconds. When the deliverable is visual, a green build is necessary but not sufficient: load it in a real browser (Playwright on the dev server at :5174), screenshot it, and assert on `getComputedStyle` — don't trust compilation alone. This is Lesson 17 with teeth.

 23. **Tailwind v4 silently drops a nested `@import` barrel placed after `@import "tailwindcss"`.** `style.css` did `@import "tailwindcss"; @import "./styles/corrosion.css";` where corrosion.css was a barrel of eight `@import` token files. Once Tailwind v4 expands the tailwindcss import in place, the barrel's inner @imports no longer precede all statements, so PostCSS drops them — emitting only an easily-ignored "@import must precede all other statements" warning. Result: every design token resolved empty and the whole panel rendered unstyled. Import token/design CSS files **directly and contiguously** in the entry stylesheet; never via a nested barrel after the Tailwind import. The build warning you wave off as "pre-existing" may be the entire feature silently failing.
+
+24. **`onModuleInit` runs before async `onModuleInit` of dependencies completes — register NATS/external subscriptions in `onApplicationBootstrap`.** `NatsService.onModuleInit` connects to NATS (async); `NatsBridgeService`/`HostAgentConsumerService` registered their subscriptions in their own `onModuleInit`, which fired while the connection was still null — so every `subscribe()` hit the `[OFFLINE]` no-op path and the WS bridge was dead-on-boot in *every* production build, silently. Nest guarantees `onApplicationBootstrap` runs only after all module init (including the awaited connect) finishes. Anything that depends on another provider's async startup belongs in bootstrap, not init. The tell: a subscription that "should be there" but the handler never fires and there's no error — trace the *startup ordering*, not the handler.
+
+25. **Fixing a dead code path detonates the live code behind it — budget for the second bug.** The moment Lesson 24's fix made the NATS→WS bridge actually deliver events, the API crashed on the first forwarded heartbeat: `WebSocket.OPEN` was `undefined` at runtime because `esModuleInterop` is off, so `import WebSocket from 'ws'` compiled to `ws_1.default` (undefined). That crash had sat behind the dead bridge since the gateway was written — never hit because no event ever reached it. When you resurrect a path that was silently no-op, everything downstream of it is effectively *untested code running for the first time in production*. Verify the whole chain end-to-end (I watched the DB row appear, then flip offline), don't stop at "the subscription fires now." This is Lesson 10 with a fuse on it. Import-runtime gotcha worth remembering: when `esModuleInterop` is off, prefer instance constants (`client.OPEN`) over class statics (`WebSocket.OPEN`) for `ws`.
+
+26. **A jail check at the entry point does not jail the recursive walk behind it — and my own "line-by-line" review missed it; the automated security review didn't.** The file manager's `jail()` correctly canonicalized and prefix-checked the top-level path, and I traced every escape vector through it and signed off. But `copy_recursive` then walked the directory tree with `fs::metadata` (which *follows* symlinks). A symlink planted inside the jail pointing at `/etc`, then a `copy` of its parent, would dereference it and pull external content *into* the jail to be read — a jail escape the entry check never sees, because the escape is reintroduced by a descendant during traversal. Fix: `symlink_metadata` (lstat) everywhere you recurse, and refuse/never-follow symlinks across the boundary. The transferable rule: **validate at the boundary AND at every step that re-derives a path** (recursion, `read_dir`, glob, archive extraction). And the humbling part — I was confident after reviewing the jail function; the security-review pass caught the HIGH I'd waved through. Trust adversarial verification over your own once-over on security-critical code, especially path/traversal logic.
+
+27. **Validate infra config BEFORE it reaches a deploy — and know that `docker compose up -d <service>` will recreate other services whose definitions changed.** During the NATS auth cutover I ran `docker compose up -d api` to pick up new env. Because the *nats* service definition had also changed (a new volume mount), compose recreated **corrosion-nats too** — and it failed to start on a config error (`no_auth_user` nested inside `authorization{}` instead of at top level), taking the broker down for ~3 minutes with the backend in offline mode. Two lessons: (a) a broker/proxy/DB config file is code — lint it before it can reach a restart (`nats-server -t -c cfg` to test-parse, `nginx -t`, etc.), don't let the first validation be the production container's startup; (b) `compose up -d <one-service>` is not surgical — it reconciles that service's **dependencies** too, so a stale edit to a depended-on service ships when you didn't mean it to. When touching shared-infra config, restart that service explicitly and watch it come up before moving on. Recovery also surfaced a third gotcha: recreating a client (api) while its server (nats) is down leaves the client stuck on a cached DNS failure (`EAI_AGAIN`) — restart the client once the server is healthy.
+
+28. **A multi-line secret in CI (minisign/SSH/PGP keys) must be stored base64-encoded — the runner mangles embedded newlines and the key silently fails to load.** The signed-update CI passed the toolchain build, downloaded minisign fine, then died at the sign step on `Error while loading the secret key file` (exit 2). The cause wasn't the key or minisign — a minisign secret key file is **two lines** (`untrusted comment:` + base64 blob), and Gitea/act_runner secret storage collapses the embedded newline so the reconstructed file is one unparseable line. The robust pattern: store the secret as `base64 < secret.key | tr -d '\n'` (single line, mangling-proof) and `base64 -d` it in the job, with auto-detect fallback so a correctly-stored raw key still works, and a loud `::error::` carrying the fix command if it's neither. This applies to **any** multi-line credential in CI, not just minisign. Two corollaries: (a) the tell is "the tool runs but can't load its key" — suspect newline-mangling before the key itself; (b) generating that base64 prints the **private key to the terminal/transcript** — for a supply-chain signing key, treat it as exposed and rotate before cutover (embed the new pubkey, re-store the new secret, retire the old). And verify the published artifact end-to-end against the *embedded* pubkey (`minisign -Vm bin -P <pub>`) plus a tampered-byte negative control — a green build that signs is not the same as a signature the agent will actually accept.
--- a/backend-nest/src/app.module.ts
+++ b/backend-nest/src/app.module.ts
@@ -45,10 +45,19 @@ import { BetterChatModule } from './modules/betterchat/betterchat.module';
 import { TimedExecuteModule } from './modules/timedexecute/timedexecute.module';
 import { RaidableBasesModule } from './modules/raidablebases/raidablebases.module';
 import { EarlyAccessModule } from './modules/early-access/early-access.module';
+import { FleetModule } from './modules/fleet/fleet.module';
+import { InstancesModule } from './modules/instances/instances.module';
+import { ApiKeysModule } from './modules/api-keys/api-keys.module';
+import { WebhooksModule } from './modules/webhooks/webhooks.module';

 // Shared Services
 import { NatsService } from './services/nats.service';
 import { NatsBridgeService } from './services/nats-bridge.service';
+import { HostAgentConsumerService } from './services/host-agent-consumer.service';
+import { ServerConnection } from './entities/server-connection.entity';
+import { License } from './entities/license.entity';
+import { AgentHost } from './entities/agent-host.entity';
+import { GameInstance } from './entities/game-instance.entity';
 import { SteamService } from './services/steam.service';

 // Gateway
@@ -91,6 +100,9 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
    // Scheduler
    ScheduleModule.forRoot(),

+    // Repositories for app-level shared services (host-agent consumer)
+    TypeOrmModule.forFeature([ServerConnection, License, AgentHost, GameInstance]),
+
    // Feature Modules
    AuthModule,
    UsersModule,
@@ -125,6 +137,10 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
    TimedExecuteModule,
    RaidableBasesModule,
    EarlyAccessModule,
+    FleetModule,
+    InstancesModule,
+    ApiKeysModule,
+    WebhooksModule,
  ],
  providers: [
    // Global guards (order matters: auth first, then license, then permissions)
@@ -134,6 +150,7 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
    // Shared services
    NatsService,
    NatsBridgeService,
+    HostAgentConsumerService,
    SteamService,

    // WebSocket gateway
--- a/backend-nest/src/common/cron.util.ts
+++ b/backend-nest/src/common/cron.util.ts
@@ -0,0 +1,51 @@
+/**
+ * Minimal 5-field cron "next run" calculator, shared by the event scheduler
+ * (SchedulesService) and the wipe scheduler (WipesService).
+ *
+ * Supports `*` and exact numeric fields (minute hour day-of-month month
+ * day-of-week). Walks minute-by-minute up to 366 days ahead. Returns null on a
+ * malformed expression or if no match is found within a year.
+ *
+ * NOTE: the expression is evaluated in **UTC**. A per-schedule `timezone`
+ * column exists on both schedule tables but is NOT yet honored here — fixing it
+ * properly needs a timezone-aware cron library; tracked as a shared follow-up.
+ */
+export function nextCronDate(expr: string, after: Date): Date | null {
+  const parts = expr.trim().split(/\s+/);
+  if (parts.length !== 5) return null;
+
+  const [minuteExpr, hourExpr, domExpr, monthExpr, dowExpr] = parts;
+
+  const matches = (e: string, value: number): boolean => {
+    if (e === '*') return true;
+    return parseInt(e, 10) === value;
+  };
+
+  // Walk minute-by-minute up to 366 days forward to find the next match.
+  const candidate = new Date(after.getTime() + 60_000); // advance at least 1 minute
+  candidate.setSeconds(0, 0);
+
+  const limit = new Date(after.getTime() + 366 * 24 * 60 * 60 * 1000);
+
+  while (candidate < limit) {
+    const min = candidate.getUTCMinutes();
+    const hour = candidate.getUTCHours();
+    const dom = candidate.getUTCDate();
+    const month = candidate.getUTCMonth() + 1; // 1-12
+    const dow = candidate.getUTCDay(); // 0=Sun
+
+    if (
+      matches(minuteExpr, min) &&
+      matches(hourExpr, hour) &&
+      matches(domExpr, dom) &&
+      matches(monthExpr, month) &&
+      matches(dowExpr, dow)
+    ) {
+      return candidate;
+    }
+
+    candidate.setTime(candidate.getTime() + 60_000);
+  }
+
+  return null;
+}
--- a/backend-nest/src/common/guards/jwt-auth.guard.ts
+++ b/backend-nest/src/common/guards/jwt-auth.guard.ts
@@ -1,20 +1,68 @@
-import { Injectable, ExecutionContext } from '@nestjs/common';
+import {
+  Injectable,
+  ExecutionContext,
+  UnauthorizedException,
+} from '@nestjs/common';
 import { AuthGuard } from '@nestjs/passport';
 import { Reflector } from '@nestjs/core';
 import { IS_PUBLIC_KEY } from '../decorators/public.decorator';
+import { ApiKeysService } from '../../modules/api-keys/api-keys.service';

@Injectable()
 export class JwtAuthGuard extends AuthGuard('jwt') {
-  constructor(private reflector: Reflector) {
+  constructor(
+    private reflector: Reflector,
+    private readonly apiKeysService: ApiKeysService,
+  ) {
    super();
  }

-  canActivate(context: ExecutionContext) {
+  async canActivate(context: ExecutionContext): Promise<boolean> {
    const isPublic = this.reflector.getAllAndOverride<boolean>(IS_PUBLIC_KEY, [
      context.getHandler(),
      context.getClass(),
    ]);
    if (isPublic) return true;
-    return super.canActivate(context);
+
+    // Additive API-key auth: a `corr_`-prefixed bearer token (or X-API-Key
+    // header) authenticates programmatically AS the license owner. JWTs are
+    // `eyJ...` and never collide with the `corr_` prefix, so the standard JWT
+    // path below is left completely untouched — zero login regression risk.
+    const request = context.switchToHttp().getRequest();
+    const rawKey = this.extractApiKey(request);
+    if (rawKey) {
+      const result = await this.apiKeysService.validateKey(rawKey);
+      if (!result) {
+        throw new UnauthorizedException('Invalid or revoked API key');
+      }
+      // Shape the principal like a JWT user so @CurrentTenant / @CurrentUser and
+      // the permission layer behave identically. is_api_key grants full access
+      // to THIS license (see PermissionsGuard) — a key is full programmatic
+      // access to your own license, always tenant-scoped by license_id.
+      request.user = {
+        sub: result.user_id ?? undefined,
+        license_id: result.license_id,
+        is_super_admin: false,
+        is_api_key: true,
+        permissions: {},
+      };
+      return true;
+    }
+
+    return (await super.canActivate(context)) as boolean;
+  }
+
+  /** Pull a `corr_`-prefixed key from `Authorization: Bearer` or `X-API-Key`. */
+  private extractApiKey(request: any): string | null {
+    const auth = request.headers?.authorization;
+    if (typeof auth === 'string' && auth.startsWith('Bearer ')) {
+      const token = auth.slice(7).trim();
+      if (token.startsWith('corr_')) return token;
+    }
+    const headerKey = request.headers?.['x-api-key'];
+    if (typeof headerKey === 'string' && headerKey.startsWith('corr_')) {
+      return headerKey.trim();
+    }
+    return null;
  }
 }
--- a/backend-nest/src/common/guards/permissions.guard.ts
+++ b/backend-nest/src/common/guards/permissions.guard.ts
@@ -19,10 +19,19 @@ export class PermissionsGuard implements CanActivate {
    // Super admins bypass all permission checks
    if (user.is_super_admin) return true;

+    // API keys are full programmatic access to their own license (always
+    // tenant-scoped by license_id via @CurrentTenant). Granted here rather than
+    // enumerating every permission. Future: scoped/read-only keys.
+    if (user.is_api_key) return true;
+
    // Check permissions JSONB from role
    const permissions = user.permissions as Record<string, boolean> | undefined;
    if (!permissions) return false;

+    // Global wildcard — the Owner role (full control of its license) carries
+    // {"*": true}, so new features never need to amend the role enumeration.
+    if (permissions['*'] === true) return true;
+
    // Support wildcard: "server.*" matches "server.view", "server.console", etc.
    const parts = requiredPermission.split('.');
    const wildcard = parts[0] + '.*';
--- a/backend-nest/src/common/ssrf-guard.ts
+++ b/backend-nest/src/common/ssrf-guard.ts
@@ -0,0 +1,100 @@
+import { BadRequestException } from '@nestjs/common';
+import { lookup } from 'node:dns/promises';
+import { isIP } from 'node:net';
+
+/**
+ * SSRF guard for operator-supplied outbound URLs (webhooks today; any future
+ * "we POST to a URL you give us" feature should reuse this).
+ *
+ * The danger: an operator (or anyone who can create a webhook) points the URL at
+ * an internal address — 127.0.0.1, the NATS/DB ports, 192.168.x, or the cloud
+ * metadata endpoint 169.254.169.254 — and turns our server into a request proxy
+ * into the private network. We defend by resolving the host and refusing any
+ * private / loopback / link-local / reserved destination.
+ *
+ * Validate at storage (early, clear 400) AND immediately before each delivery
+ * (a hostname can resolve public at create time and private at send time — DNS
+ * rebinding / TOCTOU). `redirect: 'manual'` at the fetch call closes the
+ * redirect-bounce variant.
+ */
+
+function isBlockedIpv4(ip: string): boolean {
+  const parts = ip.split('.').map((p) => parseInt(p, 10));
+  if (parts.length !== 4 || parts.some((n) => Number.isNaN(n) || n < 0 || n > 255)) {
+    return true; // unparseable → block defensively
+  }
+  const [a, b] = parts;
+  if (a === 0) return true; // 0.0.0.0/8 "this network"
+  if (a === 10) return true; // 10.0.0.0/8 private
+  if (a === 127) return true; // 127.0.0.0/8 loopback
+  if (a === 169 && b === 254) return true; // 169.254.0.0/16 link-local (incl. 169.254.169.254 metadata)
+  if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12 private
+  if (a === 192 && b === 168) return true; // 192.168.0.0/16 private
+  if (a === 100 && b >= 64 && b <= 127) return true; // 100.64.0.0/10 CGNAT
+  if (a === 255) return true; // 255.x broadcast space
+  return false;
+}
+
+function isBlockedIpv6(ip: string): boolean {
+  const addr = ip.toLowerCase();
+  // IPv4-mapped (::ffff:1.2.3.4) — unwrap and apply the v4 rules.
+  const mapped = addr.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/);
+  if (mapped) return isBlockedIpv4(mapped[1]);
+  if (addr === '::' || addr === '::1') return true; // unspecified / loopback
+  const head = addr.split(':')[0];
+  if (head.startsWith('fc') || head.startsWith('fd')) return true; // fc00::/7 ULA
+  if (/^fe[89ab]/.test(head)) return true; // fe80::/10 link-local
+  return false;
+}
+
+function isBlockedIp(ip: string): boolean {
+  const fam = isIP(ip);
+  if (fam === 4) return isBlockedIpv4(ip);
+  if (fam === 6) return isBlockedIpv6(ip);
+  return true; // not a recognizable IP → block defensively
+}
+
+/** Parse + require http/https scheme. Throws BadRequestException on anything else. */
+export function parseHttpUrl(raw: string): URL {
+  let url: URL;
+  try {
+    url = new URL(raw);
+  } catch {
+    throw new BadRequestException('Webhook URL is not a valid URL');
+  }
+  if (url.protocol !== 'http:' && url.protocol !== 'https:') {
+    throw new BadRequestException('Webhook URL must use http:// or https://');
+  }
+  return url;
+}
+
+/**
+ * Resolve the host and reject if it maps to any private / reserved address.
+ * If a hostname resolves to multiple addresses, ANY blocked one rejects the
+ * whole URL (a DNS-rebinding response that mixes a public and a private answer
+ * must not slip through). Returns the parsed URL on success.
+ */
+export async function assertPublicHttpUrl(raw: string): Promise<URL> {
+  const url = parseHttpUrl(raw);
+  // URL keeps IPv6 literals bracketed ("[::1]") — strip so isIP/lookup see the
+  // bare address; otherwise IPv6 literals never reach the classifier.
+  const host = url.hostname.replace(/^\[|\]$/g, '');
+
+  let addresses: Array<{ address: string }>;
+  if (isIP(host)) {
+    addresses = [{ address: host }];
+  } else {
+    try {
+      addresses = await lookup(host, { all: true });
+    } catch {
+      throw new BadRequestException(`Webhook host could not be resolved: ${host}`);
+    }
+  }
+
+  if (addresses.length === 0 || addresses.some((a) => isBlockedIp(a.address))) {
+    throw new BadRequestException(
+      'Webhook URL resolves to a private or reserved address and is not allowed',
+    );
+  }
+  return url;
+}
--- a/backend-nest/src/config/configuration.ts
+++ b/backend-nest/src/config/configuration.ts
@@ -6,6 +6,15 @@ export default () => ({
  },
  nats: {
    url: process.env.NATS_URL || 'nats://localhost:4222',
+    // Public broker address shown to agents in setup instructions.
+    publicUrl: process.env.NATS_PUBLIC_URL || 'nats://nats.corrosionmgmt.com:4222',
+    // Privileged internal credentials for the backend's own NATS connection
+    // (full corrosion.> access). Empty = anonymous (transition period).
+    internalUser: process.env.NATS_INTERNAL_USER || '',
+    internalPassword: process.env.NATS_INTERNAL_PASSWORD || '',
+    // Secret used to derive a per-license agent password:
+    // HMAC-SHA256(license_id, secret). Shared with the nats.conf generator.
+    tokenSecret: process.env.NATS_TOKEN_SECRET || '',
  },
  jwt: {
    secret: process.env.JWT_SECRET || 'change-me',
--- a/backend-nest/src/entities/agent-host.entity.ts
+++ b/backend-nest/src/entities/agent-host.entity.ts
@@ -0,0 +1,74 @@
+import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Check, Unique } from 'typeorm';
+import { License } from './license.entity';
+
+export interface AgentHostDisk {
+  mount: string;
+  total_mb: number;
+  free_mb: number;
+}
+
+/**
+ * One Corrosion host agent / one machine. Owns the machine-level facts.
+ *
+ * NOTE: distinct from the B2B `hosts` table (hosting-partner companies). This
+ * is `agent_hosts` — the physical/virtual box a customer runs the agent on.
+ */
+@Entity('agent_hosts')
+@Unique(['license_id', 'hostname'])
+@Check(`"status" IN ('connected', 'degraded', 'offline')`)
+export class AgentHost {
+  @PrimaryGeneratedColumn('uuid')
+  id: string;
+
+  @Column({ type: 'uuid' })
+  license_id: string;
+
+  @Column({ type: 'varchar', length: 255, default: '' })
+  hostname: string;
+
+  @Column({ type: 'varchar', length: 64, nullable: true })
+  agent_version: string | null;
+
+  @Column({ type: 'varchar', length: 64, nullable: true })
+  agent_commit: string | null;
+
+  @Column({ type: 'varchar', length: 32, nullable: true })
+  os: string | null;
+
+  @Column({ type: 'varchar', length: 32, nullable: true })
+  arch: string | null;
+
+  @Column({ type: 'varchar', length: 20, default: 'offline' })
+  status: string;
+
+  @Column({ type: 'timestamptz', nullable: true })
+  last_heartbeat_at: Date | null;
+
+  @Column({ type: 'double precision', nullable: true })
+  cpu_percent: number | null;
+
+  @Column({ type: 'integer', nullable: true })
+  cpu_cores: number | null;
+
+  @Column({ type: 'bigint', nullable: true })
+  mem_total_mb: number | null;
+
+  @Column({ type: 'bigint', nullable: true })
+  mem_used_mb: number | null;
+
+  @Column({ type: 'bigint', nullable: true })
+  uptime_seconds: number | null;
+
+  @Column({ type: 'jsonb', nullable: true })
+  disks: AgentHostDisk[] | null;
+
+  @Column({ type: 'timestamptz', default: () => 'NOW()' })
+  created_at: Date;
+
+  @Column({ type: 'timestamptz', default: () => 'NOW()' })
+  updated_at: Date;
+
+  @ManyToOne(() => License, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'license_id' })
+  license: License;
+}
--- a/backend-nest/src/entities/api-key.entity.ts
+++ b/backend-nest/src/entities/api-key.entity.ts
@@ -0,0 +1,37 @@
+import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Index } from 'typeorm';
+import { License } from './license.entity';
+
+@Entity('api_keys')
+@Index(['key_hash'])
+@Index(['license_id'])
+export class ApiKey {
+  @PrimaryGeneratedColumn('uuid')
+  id: string;
+
+  @Column({ type: 'uuid' })
+  license_id: string;
+
+  @Column({ type: 'varchar', length: 100 })
+  name: string;
+
+  /** First 8 chars of the random token — shown in UI so users can identify keys. */
+  @Column({ type: 'varchar', length: 16 })
+  key_prefix: string;
+
+  /** SHA-256 hex digest of the full plaintext key. Never returned to clients. */
+  @Column({ type: 'varchar', length: 128 })
+  key_hash: string;
+
+  @Column({ type: 'timestamptz', nullable: true })
+  last_used_at: Date | null;
+
+  @Column({ type: 'boolean', default: true })
+  is_active: boolean;
+
+  @Column({ type: 'timestamptz', default: () => 'NOW()' })
+  created_at: Date;
+
+  @ManyToOne(() => License, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'license_id' })
+  license: License;
+}
--- a/backend-nest/src/entities/game-instance.entity.ts
+++ b/backend-nest/src/entities/game-instance.entity.ts
@@ -0,0 +1,59 @@
+import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Unique } from 'typeorm';
+import { License } from './license.entity';
+import { AgentHost } from './agent-host.entity';
+
+/**
+ * One game server process / orchestrated unit (a Rust server, a Conan world,
+ * a Dune battlegroup). The billing unit — plans count instances.
+ * `agent_instance_id` is the agent's slug and the NATS subject segment.
+ */
+@Entity('game_instances')
+@Unique(['license_id', 'agent_instance_id'])
+export class GameInstance {
+  @PrimaryGeneratedColumn('uuid')
+  id: string;
+
+  @Column({ type: 'uuid' })
+  license_id: string;
+
+  @Column({ type: 'uuid', nullable: true })
+  host_id: string | null;
+
+  @Column({ type: 'uuid', nullable: true })
+  cluster_id: string | null;
+
+  @Column({ type: 'varchar', length: 64 })
+  agent_instance_id: string;
+
+  @Column({ type: 'varchar', length: 32 })
+  game: string;
+
+  @Column({ type: 'varchar', length: 255, nullable: true })
+  label: string | null;
+
+  @Column({ type: 'varchar', length: 32, default: 'unknown' })
+  state: string;
+
+  @Column({ type: 'text', nullable: true })
+  root_path: string | null;
+
+  @Column({ type: 'bigint', default: 0 })
+  uptime_seconds: number;
+
+  @Column({ type: 'timestamptz', nullable: true })
+  last_seen_at: Date | null;
+
+  @Column({ type: 'timestamptz', default: () => 'NOW()' })
+  created_at: Date;
+
+  @Column({ type: 'timestamptz', default: () => 'NOW()' })
+  updated_at: Date;
+
+  @ManyToOne(() => License, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'license_id' })
+  license: License;
+
+  @ManyToOne(() => AgentHost, { onDelete: 'SET NULL', nullable: true })
+  @JoinColumn({ name: 'host_id' })
+  host: AgentHost | null;
+}
--- a/backend-nest/src/entities/instance-cluster.entity.ts
+++ b/backend-nest/src/entities/instance-cluster.entity.ts
@@ -0,0 +1,38 @@
+import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn } from 'typeorm';
+import { License } from './license.entity';
+
+/**
+ * Optional grouping of instances for games with linked topologies:
+ * Soulmask main/child clusters, Dune BattleGroup → Sietches. Reserved now;
+ * cluster orchestration ships with those game adapters.
+ */
+@Entity('instance_clusters')
+export class InstanceCluster {
+  @PrimaryGeneratedColumn('uuid')
+  id: string;
+
+  @Column({ type: 'uuid' })
+  license_id: string;
+
+  @Column({ type: 'varchar', length: 32 })
+  game: string;
+
+  @Column({ type: 'varchar', length: 255 })
+  name: string;
+
+  @Column({ type: 'varchar', length: 32, nullable: true })
+  topology: string | null;
+
+  @Column({ type: 'jsonb', nullable: true })
+  config: Record<string, unknown> | null;
+
+  @Column({ type: 'timestamptz', default: () => 'NOW()' })
+  created_at: Date;
+
+  @Column({ type: 'timestamptz', default: () => 'NOW()' })
+  updated_at: Date;
+
+  @ManyToOne(() => License, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'license_id' })
+  license: License;
+}
--- a/backend-nest/src/entities/instance-stats.entity.ts
+++ b/backend-nest/src/entities/instance-stats.entity.ts
@@ -0,0 +1,38 @@
+import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn } from 'typeorm';
+import { GameInstance } from './game-instance.entity';
+
+/**
+ * Per-instance time-series game metrics (player count, FPS, …). Populated once
+ * game-level telemetry is collected via RCON/plugin — the host heartbeat
+ * carries host metrics, not game metrics, so this stays empty in Phase A.
+ */
+@Entity('instance_stats')
+export class InstanceStats {
+  @PrimaryGeneratedColumn('uuid')
+  id: string;
+
+  @Column({ type: 'uuid' })
+  instance_id: string;
+
+  @Column({ type: 'uuid' })
+  license_id: string;
+
+  @Column({ type: 'integer', default: 0 })
+  player_count: number;
+
+  @Column({ type: 'integer', default: 0 })
+  max_players: number;
+
+  @Column({ type: 'double precision', default: 0 })
+  fps: number;
+
+  @Column({ type: 'integer', default: 0 })
+  memory_usage_mb: number;
+
+  @Column({ type: 'timestamptz', default: () => 'NOW()' })
+  recorded_at: Date;
+
+  @ManyToOne(() => GameInstance, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'instance_id' })
+  instance: GameInstance;
+}
--- a/backend-nest/src/entities/webhook.entity.ts
+++ b/backend-nest/src/entities/webhook.entity.ts
@@ -0,0 +1,47 @@
+import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Index } from 'typeorm';
+import { License } from './license.entity';
+
+@Entity('webhooks')
+@Index(['license_id'])
+export class Webhook {
+  @PrimaryGeneratedColumn('uuid')
+  id: string;
+
+  @Column({ type: 'uuid' })
+  license_id: string;
+
+  @Column({ type: 'varchar', length: 100 })
+  name: string;
+
+  @Column({ type: 'text' })
+  url: string;
+
+  /**
+   * Comma-separated event keys stored as plain text in Postgres.
+   * TypeORM simple-array serialises string[] ↔ 'event1,event2' automatically.
+   */
+  @Column({ type: 'simple-array' })
+  events: string[];
+
+  /** HMAC-SHA256 signing secret. Auto-generated on create if omitted. */
+  @Column({ type: 'varchar', length: 128 })
+  secret: string;
+
+  @Column({ type: 'boolean', default: true })
+  is_active: boolean;
+
+  /** Timestamp of the most recent delivery attempt (success or failure). */
+  @Column({ type: 'timestamptz', nullable: true })
+  last_delivery_at: Date | null;
+
+  /** 'ok' | 'failed' — outcome of the most recent delivery attempt. */
+  @Column({ type: 'varchar', length: 20, nullable: true })
+  last_status: string | null;
+
+  @Column({ type: 'timestamptz', default: () => 'NOW()' })
+  created_at: Date;
+
+  @ManyToOne(() => License, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'license_id' })
+  license: License;
+}
--- a/backend-nest/src/gateways/nats-bridge.gateway.ts
+++ b/backend-nest/src/gateways/nats-bridge.gateway.ts
@@ -71,7 +71,10 @@ export class NatsBridgeGateway implements OnGatewayConnection, OnGatewayDisconne

        // Subscribe to NATS events for this license
        const listener = (event: string, data: unknown) => {
-          if (client.readyState === WebSocket.OPEN) {
+          // client.OPEN (instance constant) — NOT WebSocket.OPEN: with
+          // esModuleInterop off, the default `ws` import is undefined at
+          // runtime, so the static crashes. The instance constant is safe.
+          if (client.readyState === client.OPEN) {
            client.send(JSON.stringify({
              type: 'event',
              license_id: payload.license_id,
--- a/backend-nest/src/modules/analytics/analytics.service.ts
+++ b/backend-nest/src/modules/analytics/analytics.service.ts
@@ -111,13 +111,13 @@ export class AnalyticsService {
      .createQueryBuilder('wipe')
      .leftJoinAndSelect('wipe.map', 'map')
      .select('map.id', 'map_id')
-      .addSelect('map.name', 'map_name')
+      .addSelect('map.display_name', 'map_name')
      .addSelect('COUNT(wipe.id)', 'usage_count')
      .where('wipe.license_id = :licenseId', { licenseId })
      .andWhere('wipe.started_at >= :cutoff', { cutoff })
      .andWhere('wipe.map_id IS NOT NULL')
      .groupBy('map.id')
-      .addGroupBy('map.name')
+      .addGroupBy('map.display_name')
      .getRawMany();

    return {
--- a/backend-nest/src/modules/api-keys/api-keys.controller.ts
+++ b/backend-nest/src/modules/api-keys/api-keys.controller.ts
@@ -0,0 +1,55 @@
+import {
+  Controller,
+  Get,
+  Post,
+  Delete,
+  Body,
+  Param,
+} from '@nestjs/common';
+import { ApiTags, ApiBearerAuth, ApiOperation, ApiResponse } from '@nestjs/swagger';
+import { ApiKeysService } from './api-keys.service';
+import { CreateApiKeyDto } from './dto/create-api-key.dto';
+import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
+import { RequirePermission } from '../../common/decorators/require-permission.decorator';
+
+@ApiTags('api-keys')
+@ApiBearerAuth()
+@Controller('api-keys')
+export class ApiKeysController {
+  constructor(private readonly apiKeysService: ApiKeysService) {}
+
+  @Post()
+  @RequirePermission('apikeys.manage')
+  @ApiOperation({
+    summary: 'Create an API key',
+    description:
+      'Issues a new API key for this license. The full plaintext key is returned ONCE — store it securely; it cannot be retrieved again.',
+  })
+  @ApiResponse({ status: 201, description: 'Key created — plaintext key returned once.' })
+  async create(
+    @CurrentTenant() licenseId: string,
+    @Body() dto: CreateApiKeyDto,
+  ) {
+    return this.apiKeysService.create(licenseId, dto.name);
+  }
+
+  @Get()
+  @RequirePermission('apikeys.view')
+  @ApiOperation({ summary: 'List API keys', description: 'Returns all keys (active and revoked) for this license. Key hashes are never returned.' })
+  @ApiResponse({ status: 200, description: 'Key list.' })
+  async list(@CurrentTenant() licenseId: string) {
+    return this.apiKeysService.list(licenseId);
+  }
+
+  @Delete(':id')
+  @RequirePermission('apikeys.manage')
+  @ApiOperation({ summary: 'Revoke an API key', description: 'Soft-deletes the key (is_active = false). The row is retained for audit purposes.' })
+  @ApiResponse({ status: 200, description: 'Key revoked.' })
+  @ApiResponse({ status: 404, description: 'Key not found in this license.' })
+  async revoke(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+  ) {
+    return this.apiKeysService.revoke(licenseId, id);
+  }
+}
--- a/backend-nest/src/modules/api-keys/api-keys.module.ts
+++ b/backend-nest/src/modules/api-keys/api-keys.module.ts
@@ -0,0 +1,15 @@
+import { Global, Module } from '@nestjs/common';
+import { TypeOrmModule } from '@nestjs/typeorm';
+import { ApiKey } from '../../entities/api-key.entity';
+import { License } from '../../entities/license.entity';
+import { ApiKeysController } from './api-keys.controller';
+import { ApiKeysService } from './api-keys.service';
+
+@Global()
+@Module({
+  imports: [TypeOrmModule.forFeature([ApiKey, License])],
+  controllers: [ApiKeysController],
+  providers: [ApiKeysService],
+  exports: [ApiKeysService],
+})
+export class ApiKeysModule {}
--- a/backend-nest/src/modules/api-keys/api-keys.service.ts
+++ b/backend-nest/src/modules/api-keys/api-keys.service.ts
@@ -0,0 +1,163 @@
+import { Injectable, Logger, NotFoundException } from '@nestjs/common';
+import { InjectRepository } from '@nestjs/typeorm';
+import { Repository } from 'typeorm';
+import * as crypto from 'crypto';
+import { ApiKey } from '../../entities/api-key.entity';
+import { License } from '../../entities/license.entity';
+
+/** Shape returned to the caller on creation — the ONLY time the plaintext key is exposed. */
+export interface CreatedApiKey {
+  /** Full plaintext key — show once, store nowhere. */
+  plaintext_key: string;
+  id: string;
+  name: string;
+  key_prefix: string;
+  is_active: boolean;
+  created_at: Date;
+}
+
+/** Safe list view — no hash, no plaintext. */
+export interface ApiKeyListItem {
+  id: string;
+  name: string;
+  key_prefix: string;
+  last_used_at: Date | null;
+  is_active: boolean;
+  created_at: Date;
+}
+
+@Injectable()
+export class ApiKeysService {
+  private readonly logger = new Logger(ApiKeysService.name);
+
+  constructor(
+    @InjectRepository(ApiKey)
+    private readonly apiKeyRepo: Repository<ApiKey>,
+    @InjectRepository(License)
+    private readonly licenseRepo: Repository<License>,
+  ) {}
+
+  /**
+   * Issue a new API key for the given license.
+   *
+   * Key format: `corr_<prefix8>_<secret32>`
+   * where prefix and secret are URL-safe base64url random bytes.
+   *
+   * Returns the full plaintext key ONCE alongside the saved row.
+   * The hash is never returned to the caller.
+   */
+  async create(licenseId: string, name: string): Promise<CreatedApiKey> {
+    const prefixBytes = crypto.randomBytes(6); // 8 base64url chars
+    const secretBytes = crypto.randomBytes(24); // 32 base64url chars
+
+    const prefix = prefixBytes.toString('base64url');
+    const secret = secretBytes.toString('base64url');
+    const plaintextKey = `corr_${prefix}_${secret}`;
+
+    const keyHash = crypto
+      .createHash('sha256')
+      .update(plaintextKey)
+      .digest('hex');
+
+    const entity = this.apiKeyRepo.create({
+      license_id: licenseId,
+      name,
+      key_prefix: prefix,
+      key_hash: keyHash,
+      is_active: true,
+    });
+
+    const saved = await this.apiKeyRepo.save(entity);
+
+    this.logger.log(
+      `API key created: id=${saved.id} prefix=${prefix} license=${licenseId}`,
+    );
+
+    return {
+      plaintext_key: plaintextKey,
+      id: saved.id,
+      name: saved.name,
+      key_prefix: saved.key_prefix,
+      is_active: saved.is_active,
+      created_at: saved.created_at,
+    };
+  }
+
+  /**
+   * List all keys (active and revoked) for a license.
+   * The key_hash is intentionally excluded.
+   */
+  async list(licenseId: string): Promise<ApiKeyListItem[]> {
+    const rows = await this.apiKeyRepo.find({
+      where: { license_id: licenseId },
+      order: { created_at: 'DESC' },
+      select: ['id', 'name', 'key_prefix', 'last_used_at', 'is_active', 'created_at'],
+    });
+
+    return rows.map((r) => ({
+      id: r.id,
+      name: r.name,
+      key_prefix: r.key_prefix,
+      last_used_at: r.last_used_at,
+      is_active: r.is_active,
+      created_at: r.created_at,
+    }));
+  }
+
+  /**
+   * Revoke (soft-delete) a key.
+   * Returns the updated row or throws NotFoundException if the key
+   * doesn't exist within this license.
+   */
+  async revoke(licenseId: string, id: string): Promise<{ id: string; is_active: boolean }> {
+    const key = await this.apiKeyRepo.findOne({
+      where: { id, license_id: licenseId },
+    });
+
+    if (!key) {
+      throw new NotFoundException(`API key ${id} not found`);
+    }
+
+    key.is_active = false;
+    await this.apiKeyRepo.save(key);
+
+    this.logger.log(`API key revoked: id=${id} license=${licenseId}`);
+
+    return { id: key.id, is_active: key.is_active };
+  }
+
+  /**
+   * Validate a raw API key string. Called by JwtAuthGuard.
+   *
+   * Hashes the raw key, looks up an ACTIVE row, touches last_used_at, resolves
+   * the license owner (so the guard can attribute the call to a real user UUID),
+   * and returns { license_id, user_id } on success or null on failure.
+   *
+   * user_id is the license owner — API-key calls act AS the owner, so any
+   * created_by / @CurrentUser FK insert gets a valid UUID and correct attribution.
+   */
+  async validateKey(
+    rawKey: string,
+  ): Promise<{ license_id: string; user_id: string | null } | null> {
+    const keyHash = crypto.createHash('sha256').update(rawKey).digest('hex');
+
+    const key = await this.apiKeyRepo.findOne({
+      where: { key_hash: keyHash, is_active: true },
+      select: ['id', 'license_id'],
+    });
+
+    if (!key) {
+      return null;
+    }
+
+    // Update last_used_at without loading the full row again.
+    await this.apiKeyRepo.update(key.id, { last_used_at: new Date() });
+
+    const license = await this.licenseRepo.findOne({
+      where: { id: key.license_id },
+      select: ['id', 'owner_user_id'],
+    });
+
+    return { license_id: key.license_id, user_id: license?.owner_user_id ?? null };
+  }
+}
--- a/backend-nest/src/modules/api-keys/dto/create-api-key.dto.ts
+++ b/backend-nest/src/modules/api-keys/dto/create-api-key.dto.ts
@@ -0,0 +1,10 @@
+import { IsString, IsNotEmpty, MaxLength } from 'class-validator';
+import { ApiProperty } from '@nestjs/swagger';
+
+export class CreateApiKeyDto {
+  @ApiProperty({ description: 'Human-readable label for this key', maxLength: 100 })
+  @IsString()
+  @IsNotEmpty()
+  @MaxLength(100)
+  name: string;
+}
--- a/backend-nest/src/modules/auth/auth.controller.ts
+++ b/backend-nest/src/modules/auth/auth.controller.ts
@@ -13,6 +13,7 @@ import { LoginDto } from './dto/login.dto';
 import { RefreshTokenDto } from './dto/refresh-token.dto';
 import { VerifyTotpDto } from './dto/verify-totp.dto';
 import { UpdateProfileDto } from './dto/update-profile.dto';
+import { ChangePasswordDto } from './dto/change-password.dto';
 import { ForgotPasswordDto } from './dto/forgot-password.dto';
 import { ResetPasswordDto } from './dto/reset-password.dto';
 import { Public } from '../../common/decorators/public.decorator';
@@ -61,6 +62,30 @@ export class AuthController {
    return this.authService.verifyTotp(userId, dto.code);
  }

+  @Post('2fa/disable')
+  @ApiBearerAuth()
+  @ApiOperation({ summary: 'Disable TOTP 2FA (requires a current code)' })
+  async disableTotp(
+    @CurrentUser('sub') userId: string,
+    @Body() dto: VerifyTotpDto,
+  ) {
+    return this.authService.disableTotp(userId, dto.code);
+  }
+
+  @Post('change-password')
+  @ApiBearerAuth()
+  @ApiOperation({ summary: 'Change the current user password' })
+  async changePassword(
+    @CurrentUser('sub') userId: string,
+    @Body() dto: ChangePasswordDto,
+  ) {
+    return this.authService.changePassword(
+      userId,
+      dto.current_password,
+      dto.new_password,
+    );
+  }
+
  @Get('me')
  @ApiBearerAuth()
  @ApiOperation({ summary: 'Get current user profile' })
--- a/backend-nest/src/modules/auth/auth.service.ts
+++ b/backend-nest/src/modules/auth/auth.service.ts
@@ -335,6 +335,56 @@ export class AuthService {
    throw new NotImplementedException('Password reset not yet configured');
  }

+  async changePassword(userId: string, currentPassword: string, newPassword: string) {
+    const user = await this.userRepository.findOne({ where: { id: userId } });
+    if (!user) {
+      throw new NotFoundException('User not found');
+    }
+
+    const valid = await argon2.verify(user.password_hash, currentPassword);
+    if (!valid) {
+      throw new UnauthorizedException('Current password is incorrect');
+    }
+
+    if (await argon2.verify(user.password_hash, newPassword)) {
+      throw new BadRequestException('New password must be different from the current one');
+    }
+
+    const password_hash = await argon2.hash(newPassword);
+    await this.userRepository.update(user.id, { password_hash });
+    this.logger.log(`Password changed for user ${user.id}`);
+
+    // NOTE: existing JWTs remain valid until expiry — this design has no
+    // server-side refresh-token store to revoke. Session invalidation on
+    // password change is a follow-up (tracked separately).
+    return { success: true };
+  }
+
+  async disableTotp(userId: string, code: string) {
+    const user = await this.userRepository.findOne({ where: { id: userId } });
+    if (!user) {
+      throw new NotFoundException('User not found');
+    }
+
+    if (!user.totp_enabled) {
+      throw new BadRequestException('2FA is not enabled');
+    }
+
+    // Require a valid current code — proves possession of the second factor
+    // before removing it, so a hijacked session can't silently strip 2FA.
+    const valid = await this.verifyTotpCode(user, code);
+    if (!valid) {
+      throw new UnauthorizedException('Invalid TOTP code');
+    }
+
+    await this.userRepository.update(user.id, {
+      totp_enabled: false,
+      totp_secret: null,
+    });
+    this.logger.log(`TOTP disabled for user ${user.id}`);
+    return { success: true };
+  }
+
  // Helper methods

  private async generateTokens(user: User, licenseId?: string) {
--- a/backend-nest/src/modules/auth/dto/change-password.dto.ts
+++ b/backend-nest/src/modules/auth/dto/change-password.dto.ts
@@ -0,0 +1,14 @@
+import { IsString, MinLength, MaxLength } from 'class-validator';
+import { ApiProperty } from '@nestjs/swagger';
+
+export class ChangePasswordDto {
+  @ApiProperty({ description: 'Current account password' })
+  @IsString()
+  current_password: string;
+
+  @ApiProperty({ description: 'New password', minLength: 8, maxLength: 128 })
+  @IsString()
+  @MinLength(8)
+  @MaxLength(128)
+  new_password: string;
+}
--- a/backend-nest/src/modules/autodoors/autodoors.service.ts
+++ b/backend-nest/src/modules/autodoors/autodoors.service.ts
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { AutoDoorsConfig } from '../../entities/autodoors-config.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { CreateAutoDoorsConfigDto } from './dto/create-autodoors-config.dto';
 import { UpdateAutoDoorsConfigDto } from './dto/update-autodoors-config.dto';

@@ -13,7 +13,7 @@ export class AutoDoorsService {
  constructor(
    @InjectRepository(AutoDoorsConfig)
    private readonly autoDoorsRepo: Repository<AutoDoorsConfig>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  /** List configs for a license (summaries — no JSONB) */
@@ -81,26 +81,15 @@ export class AutoDoorsService {
    const jsonString = JSON.stringify(config.config_data, null, 2);

    try {
-      // Write AutoDoors.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/config/AutoDoors.json',
-          content: jsonString,
-        },
-        30000,
+      // Write AutoDoors.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/config/AutoDoors.json',
+        jsonString,
      );

      // Reload AutoDoors plugin via RCON
-      await this.natsService.publish(
-        `corrosion.${licenseId}.cmd.server`,
-        {
-          action: 'command',
-          command: 'oxide.reload AutoDoors',
-          timestamp: new Date().toISOString(),
-        },
-      );
+      await this.instancesService.rconForLicense(licenseId, 'oxide.reload AutoDoors');

      // Mark this config as active, deactivate others
      await this.autoDoorsRepo.update({ license_id: licenseId }, { is_active: false });
@@ -126,17 +115,13 @@ export class AutoDoorsService {
  /** Import AutoDoors.json from game server via NATS */
  async importFromServer(licenseId: string, configName: string, description?: string) {
    try {
-      // Read AutoDoors.json from server via file manager NATS
-      const response = await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_preview',
-          path: 'server://oxide/config/AutoDoors.json',
-        },
-        30000,
+      // Read AutoDoors.json from server via Rust agent
+      const result = await this.instancesService.readFileForLicense(
+        licenseId,
+        'oxide/config/AutoDoors.json',
      );

-      if (!response) {
+      if (!result) {
        throw new HttpException(
          'No response from agent — it may be offline',
          HttpStatus.SERVICE_UNAVAILABLE,
@@ -144,13 +129,13 @@ export class AutoDoorsService {
      }

      // Parse the response content as JSON
-      const responseData = response as Record<string, any>;
+      const responseData = (result as any).content;
      let configData: Record<string, any>;

-      if (typeof responseData.content === 'string') {
-        configData = JSON.parse(responseData.content);
-      } else if (typeof responseData.content === 'object') {
-        configData = responseData.content;
+      if (typeof responseData === 'string') {
+        configData = JSON.parse(responseData);
+      } else if (typeof responseData === 'object') {
+        configData = responseData;
      } else {
        throw new HttpException(
          'Unexpected response format from agent',
--- a/backend-nest/src/modules/betterchat/betterchat.service.ts
+++ b/backend-nest/src/modules/betterchat/betterchat.service.ts
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { BetterChatConfig } from '../../entities/betterchat-config.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { CreateBetterChatConfigDto } from './dto/create-betterchat-config.dto';
 import { UpdateBetterChatConfigDto } from './dto/update-betterchat-config.dto';

@@ -13,7 +13,7 @@ export class BetterChatService {
  constructor(
    @InjectRepository(BetterChatConfig)
    private readonly repo: Repository<BetterChatConfig>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  /** List configs for a license (summaries — no JSONB) */
@@ -81,26 +81,15 @@ export class BetterChatService {
    const jsonString = JSON.stringify(config.config_data, null, 2);

    try {
-      // Write BetterChat.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/config/BetterChat.json',
-          content: jsonString,
-        },
-        30000,
+      // Write BetterChat.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/config/BetterChat.json',
+        jsonString,
      );

      // Reload BetterChat plugin via RCON
-      await this.natsService.publish(
-        `corrosion.${licenseId}.cmd.server`,
-        {
-          action: 'command',
-          command: 'oxide.reload BetterChat',
-          timestamp: new Date().toISOString(),
-        },
-      );
+      await this.instancesService.rconForLicense(licenseId, 'oxide.reload BetterChat');

      // Mark this config as active, deactivate others
      await this.repo.update({ license_id: licenseId }, { is_active: false });
@@ -126,17 +115,13 @@ export class BetterChatService {
  /** Import BetterChat.json from game server via NATS */
  async importFromServer(licenseId: string, configName: string, description?: string) {
    try {
-      // Read BetterChat.json from server via file manager NATS
-      const response = await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_preview',
-          path: 'server://oxide/config/BetterChat.json',
-        },
-        30000,
+      // Read BetterChat.json from server via Rust agent
+      const result = await this.instancesService.readFileForLicense(
+        licenseId,
+        'oxide/config/BetterChat.json',
      );

-      if (!response) {
+      if (!result) {
        throw new HttpException(
          'No response from agent — it may be offline',
          HttpStatus.SERVICE_UNAVAILABLE,
@@ -144,13 +129,13 @@ export class BetterChatService {
      }

      // Parse the response content as JSON
-      const responseData = response as Record<string, any>;
+      const responseData = (result as any).content;
      let configData: Record<string, any>;

-      if (typeof responseData.content === 'string') {
-        configData = JSON.parse(responseData.content);
-      } else if (typeof responseData.content === 'object') {
-        configData = responseData.content;
+      if (typeof responseData === 'string') {
+        configData = JSON.parse(responseData);
+      } else if (typeof responseData === 'object') {
+        configData = responseData;
      } else {
        throw new HttpException(
          'Unexpected response format from agent',
--- a/backend-nest/src/modules/console/console.gateway.ts
+++ b/backend-nest/src/modules/console/console.gateway.ts
@@ -108,7 +108,9 @@ export class ConsoleGateway implements OnGatewayConnection, OnGatewayDisconnect

    const message = JSON.stringify({ event, data });
    for (const client of clients) {
-      if (client.readyState === WebSocket.OPEN) {
+      // client.OPEN, not WebSocket.OPEN — esModuleInterop is off so the
+      // default `ws` import is undefined at runtime (would crash on forward).
+      if (client.readyState === client.OPEN) {
        client.send(message);
      }
    }
--- a/backend-nest/src/modules/fleet/fleet.controller.ts
+++ b/backend-nest/src/modules/fleet/fleet.controller.ts
@@ -0,0 +1,26 @@
+import { Controller, Get, Delete, Param } from '@nestjs/common';
+import { ApiTags, ApiBearerAuth, ApiOperation } from '@nestjs/swagger';
+import { FleetService } from './fleet.service';
+import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
+import { RequirePermission } from '../../common/decorators/require-permission.decorator';
+
+@ApiTags('fleet')
+@ApiBearerAuth()
+@Controller('fleet')
+export class FleetController {
+  constructor(private readonly fleetService: FleetService) {}
+
+  @Get()
+  @RequirePermission('server.view')
+  @ApiOperation({ summary: 'Get fleet overview — hosts and game instances for this license' })
+  async getFleet(@CurrentTenant() licenseId: string) {
+    return this.fleetService.getFleet(licenseId);
+  }
+
+  @Delete('hosts/:id')
+  @RequirePermission('server.manage')
+  @ApiOperation({ summary: 'Remove a host and its instances (host must be offline)' })
+  async deleteHost(@CurrentTenant() licenseId: string, @Param('id') id: string) {
+    return this.fleetService.deleteHost(licenseId, id);
+  }
+}
--- a/backend-nest/src/modules/fleet/fleet.module.ts
+++ b/backend-nest/src/modules/fleet/fleet.module.ts
@@ -0,0 +1,15 @@
+import { Module } from '@nestjs/common';
+import { TypeOrmModule } from '@nestjs/typeorm';
+import { FleetController } from './fleet.controller';
+import { FleetService } from './fleet.service';
+import { AgentHost } from '../../entities/agent-host.entity';
+import { GameInstance } from '../../entities/game-instance.entity';
+import { ServerConnection } from '../../entities/server-connection.entity';
+
+@Module({
+  imports: [TypeOrmModule.forFeature([AgentHost, GameInstance, ServerConnection])],
+  controllers: [FleetController],
+  providers: [FleetService],
+  exports: [FleetService],
+})
+export class FleetModule {}
--- a/backend-nest/src/modules/fleet/fleet.service.ts
+++ b/backend-nest/src/modules/fleet/fleet.service.ts
@@ -0,0 +1,170 @@
+import { Injectable, NotFoundException, ConflictException } from '@nestjs/common';
+import { InjectRepository } from '@nestjs/typeorm';
+import { Repository } from 'typeorm';
+import { AgentHost } from '../../entities/agent-host.entity';
+import { GameInstance } from '../../entities/game-instance.entity';
+import { ServerConnection } from '../../entities/server-connection.entity';
+
+export interface FleetInstanceDto {
+  id: string;
+  agent_instance_id: string;
+  game: string;
+  label: string | null;
+  state: string;
+  uptime_seconds: number;
+  last_seen_at: string | null;
+}
+
+export interface FleetHostDto {
+  id: string;
+  hostname: string;
+  status: string;
+  agent_version: string | null;
+  os: string | null;
+  arch: string | null;
+  cpu_percent: number | null;
+  cpu_cores: number | null;
+  mem_total_mb: number | null;
+  mem_used_mb: number | null;
+  uptime_seconds: number | null;
+  disks: AgentHost['disks'];
+  last_heartbeat_at: string | null;
+  instances: FleetInstanceDto[];
+}
+
+export interface FleetSummaryDto {
+  host_count: number;
+  instance_count: number;
+  online_host_count: number;
+}
+
+export interface FleetResponseDto {
+  hosts: FleetHostDto[];
+  summary: FleetSummaryDto;
+}
+
+@Injectable()
+export class FleetService {
+  constructor(
+    @InjectRepository(AgentHost)
+    private readonly hostRepo: Repository<AgentHost>,
+    @InjectRepository(GameInstance)
+    private readonly instanceRepo: Repository<GameInstance>,
+    @InjectRepository(ServerConnection)
+    private readonly connectionRepo: Repository<ServerConnection>,
+  ) {}
+
+  /**
+   * Remove a host and its game instances from the fleet.
+   *
+   * Refuses while the host is `connected` — a live agent re-registers on its
+   * next heartbeat, so the operator must stop the agent first. Deletes the
+   * host's instances explicitly (the FK is SET NULL, which would otherwise
+   * orphan them); instance_stats cascade. If this was the license's last host,
+   * the legacy single-server connection row is cleared too so the old
+   * Dashboard doesn't show a stale server.
+   */
+  async deleteHost(
+    licenseId: string,
+    hostId: string,
+  ): Promise<{ deleted: true; instances_removed: number }> {
+    const host = await this.hostRepo.findOne({ where: { id: hostId, license_id: licenseId } });
+    if (!host) throw new NotFoundException('Host not found');
+    if (host.status === 'connected') {
+      throw new ConflictException(
+        'Host is online — stop the agent first, or it will re-register on its next heartbeat',
+      );
+    }
+
+    const del = await this.instanceRepo.delete({ license_id: licenseId, host_id: hostId });
+    await this.hostRepo.delete({ id: hostId, license_id: licenseId });
+
+    const remaining = await this.hostRepo.count({ where: { license_id: licenseId } });
+    if (remaining === 0) {
+      await this.connectionRepo.delete({ license_id: licenseId });
+    }
+
+    return { deleted: true, instances_removed: del.affected ?? 0 };
+  }
+
+  async getFleet(licenseId: string): Promise<FleetResponseDto> {
+    const [hosts, instances] = await Promise.all([
+      this.hostRepo.find({
+        where: { license_id: licenseId },
+        order: { hostname: 'ASC' },
+      }),
+      this.instanceRepo.find({
+        where: { license_id: licenseId },
+        order: { game: 'ASC', label: 'ASC' },
+      }),
+    ]);
+
+    // Group instances by host_id. Bigint columns come back as strings from pg — coerce.
+    const instancesByHost = new Map<string | null, FleetInstanceDto[]>();
+    for (const inst of instances) {
+      const key = inst.host_id ?? null;
+      if (!instancesByHost.has(key)) {
+        instancesByHost.set(key, []);
+      }
+      instancesByHost.get(key)!.push({
+        id: inst.id,
+        agent_instance_id: inst.agent_instance_id,
+        game: inst.game,
+        label: inst.label,
+        state: inst.state,
+        uptime_seconds: Number(inst.uptime_seconds),
+        last_seen_at: inst.last_seen_at ? inst.last_seen_at.toISOString() : null,
+      });
+    }
+
+    const hostDtos: FleetHostDto[] = hosts.map((h) => ({
+      id: h.id,
+      hostname: h.hostname,
+      status: h.status,
+      agent_version: h.agent_version,
+      os: h.os,
+      arch: h.arch,
+      cpu_percent: h.cpu_percent !== null && h.cpu_percent !== undefined ? Number(h.cpu_percent) : null,
+      cpu_cores: h.cpu_cores !== null && h.cpu_cores !== undefined ? Number(h.cpu_cores) : null,
+      mem_total_mb: h.mem_total_mb !== null && h.mem_total_mb !== undefined ? Number(h.mem_total_mb) : null,
+      mem_used_mb: h.mem_used_mb !== null && h.mem_used_mb !== undefined ? Number(h.mem_used_mb) : null,
+      uptime_seconds: h.uptime_seconds !== null && h.uptime_seconds !== undefined ? Number(h.uptime_seconds) : null,
+      disks: h.disks,
+      last_heartbeat_at: h.last_heartbeat_at ? h.last_heartbeat_at.toISOString() : null,
+      instances: instancesByHost.get(h.id) ?? [],
+    }));
+
+    // Append synthetic "unassigned" bucket only if orphaned instances exist
+    const unassigned = instancesByHost.get(null) ?? [];
+    if (unassigned.length > 0) {
+      hostDtos.push({
+        id: '__unassigned__',
+        hostname: 'Unassigned',
+        status: 'offline',
+        agent_version: null,
+        os: null,
+        arch: null,
+        cpu_percent: null,
+        cpu_cores: null,
+        mem_total_mb: null,
+        mem_used_mb: null,
+        uptime_seconds: null,
+        disks: null,
+        last_heartbeat_at: null,
+        instances: unassigned,
+      });
+    }
+
+    const online_host_count = hosts.filter((h) => h.status === 'connected').length;
+    const instance_count = instances.length;
+
+    return {
+      hosts: hostDtos,
+      summary: {
+        host_count: hosts.length,
+        instance_count,
+        online_host_count,
+      },
+    };
+  }
+}
--- a/backend-nest/src/modules/furnacesplitter/furnacesplitter.service.ts
+++ b/backend-nest/src/modules/furnacesplitter/furnacesplitter.service.ts
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { FurnaceSplitterConfig } from '../../entities/furnacesplitter-config.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { CreateFurnaceSplitterConfigDto } from './dto/create-furnacesplitter-config.dto';
 import { UpdateFurnaceSplitterConfigDto } from './dto/update-furnacesplitter-config.dto';

@@ -13,7 +13,7 @@ export class FurnaceSplitterService {
  constructor(
    @InjectRepository(FurnaceSplitterConfig)
    private readonly furnaceRepo: Repository<FurnaceSplitterConfig>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  /** List configs for a license (summaries — no JSONB) */
@@ -81,26 +81,15 @@ export class FurnaceSplitterService {
    const jsonString = JSON.stringify(config.config_data, null, 2);

    try {
-      // Write FurnaceSplitter.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/config/FurnaceSplitter.json',
-          content: jsonString,
-        },
-        30000,
+      // Write FurnaceSplitter.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/config/FurnaceSplitter.json',
+        jsonString,
      );

      // Reload FurnaceSplitter plugin via RCON
-      await this.natsService.publish(
-        `corrosion.${licenseId}.cmd.server`,
-        {
-          action: 'command',
-          command: 'oxide.reload FurnaceSplitter',
-          timestamp: new Date().toISOString(),
-        },
-      );
+      await this.instancesService.rconForLicense(licenseId, 'oxide.reload FurnaceSplitter');

      // Mark this config as active, deactivate others
      await this.furnaceRepo.update({ license_id: licenseId }, { is_active: false });
@@ -126,17 +115,13 @@ export class FurnaceSplitterService {
  /** Import FurnaceSplitter.json from game server via NATS */
  async importFromServer(licenseId: string, configName: string, description?: string) {
    try {
-      // Read FurnaceSplitter.json from server via file manager NATS
-      const response = await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_preview',
-          path: 'server://oxide/config/FurnaceSplitter.json',
-        },
-        30000,
+      // Read FurnaceSplitter.json from server via Rust agent
+      const result = await this.instancesService.readFileForLicense(
+        licenseId,
+        'oxide/config/FurnaceSplitter.json',
      );

-      if (!response) {
+      if (!result) {
        throw new HttpException(
          'No response from agent — it may be offline',
          HttpStatus.SERVICE_UNAVAILABLE,
@@ -144,13 +129,13 @@ export class FurnaceSplitterService {
      }

      // Parse the response content as JSON
-      const responseData = response as Record<string, any>;
+      const responseData = (result as any).content;
      let configData: Record<string, any>;

-      if (typeof responseData.content === 'string') {
-        configData = JSON.parse(responseData.content);
-      } else if (typeof responseData.content === 'object') {
-        configData = responseData.content;
+      if (typeof responseData === 'string') {
+        configData = JSON.parse(responseData);
+      } else if (typeof responseData === 'object') {
+        configData = responseData;
      } else {
        throw new HttpException(
          'Unexpected response format from agent',
--- a/backend-nest/src/modules/gather/gather.service.ts
+++ b/backend-nest/src/modules/gather/gather.service.ts
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { GatherConfig } from '../../entities/gather-config.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { CreateGatherConfigDto } from './dto/create-gather-config.dto';
 import { UpdateGatherConfigDto } from './dto/update-gather-config.dto';

@@ -13,7 +13,7 @@ export class GatherService {
  constructor(
    @InjectRepository(GatherConfig)
    private readonly gatherRepo: Repository<GatherConfig>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  /** List configs for a license (summaries — no JSONB) */
@@ -81,26 +81,15 @@ export class GatherService {
    const jsonString = JSON.stringify(config.config_data, null, 2);

    try {
-      // Write GatherManager.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/config/GatherManager.json',
-          content: jsonString,
-        },
-        30000,
+      // Write GatherManager.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/config/GatherManager.json',
+        jsonString,
      );

      // Reload GatherManager plugin via RCON
-      await this.natsService.publish(
-        `corrosion.${licenseId}.cmd.server`,
-        {
-          action: 'command',
-          command: 'oxide.reload GatherManager',
-          timestamp: new Date().toISOString(),
-        },
-      );
+      await this.instancesService.rconForLicense(licenseId, 'oxide.reload GatherManager');

      // Mark this config as active, deactivate others
      await this.gatherRepo.update({ license_id: licenseId }, { is_active: false });
@@ -126,17 +115,13 @@ export class GatherService {
  /** Import GatherManager.json from game server via NATS */
  async importFromServer(licenseId: string, configName: string, description?: string) {
    try {
-      // Read GatherManager.json from server via file manager NATS
-      const response = await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_preview',
-          path: 'server://oxide/config/GatherManager.json',
-        },
-        30000,
+      // Read GatherManager.json from server via Rust agent
+      const result = await this.instancesService.readFileForLicense(
+        licenseId,
+        'oxide/config/GatherManager.json',
      );

-      if (!response) {
+      if (!result) {
        throw new HttpException(
          'No response from agent — it may be offline',
          HttpStatus.SERVICE_UNAVAILABLE,
@@ -144,13 +129,13 @@ export class GatherService {
      }

      // Parse the response content as JSON
-      const responseData = response as Record<string, any>;
+      const responseData = (result as any).content;
      let configData: Record<string, any>;

-      if (typeof responseData.content === 'string') {
-        configData = JSON.parse(responseData.content);
-      } else if (typeof responseData.content === 'object') {
-        configData = responseData.content;
+      if (typeof responseData === 'string') {
+        configData = JSON.parse(responseData);
+      } else if (typeof responseData === 'object') {
+        configData = responseData;
      } else {
        throw new HttpException(
          'Unexpected response format from agent',
--- a/backend-nest/src/modules/instances/instances.controller.ts
+++ b/backend-nest/src/modules/instances/instances.controller.ts
@@ -0,0 +1,133 @@
+import { Controller, Post, Get, Put, Body, Param, Query } from '@nestjs/common';
+import { ApiTags, ApiBearerAuth, ApiOperation } from '@nestjs/swagger';
+import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
+import { RequirePermission } from '../../common/decorators/require-permission.decorator';
+import { InstancesService, LifecycleFunc } from './instances.service';
+
+@ApiTags('instances')
+@ApiBearerAuth()
+@Controller('instances')
+export class InstancesController {
+  constructor(private readonly instances: InstancesService) {}
+
+  @Post(':id/lifecycle')
+  @RequirePermission('server.manage')
+  @ApiOperation({ summary: 'Send a lifecycle command to a game instance (start/stop/restart/status/steam_update)' })
+  async lifecycle(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() body: { action: LifecycleFunc },
+  ) {
+    return this.instances.lifecycle(licenseId, id, body.action);
+  }
+
+  @Post(':id/rcon')
+  @RequirePermission('server.console')
+  @ApiOperation({ summary: 'Send an RCON/console command to a game instance' })
+  async rcon(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() body: { command: string },
+  ) {
+    return this.instances.rcon(licenseId, id, body.command);
+  }
+
+  @Get(':id/files')
+  @RequirePermission('files.view')
+  @ApiOperation({ summary: 'List a directory in the instance (jailed to its root)' })
+  async listFiles(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Query('path') path?: string,
+  ) {
+    return this.instances.listFiles(licenseId, id, path ?? '');
+  }
+
+  @Get(':id/file')
+  @RequirePermission('files.view')
+  @ApiOperation({ summary: 'Read a text file from the instance (jailed, 5 MiB cap)' })
+  async readFile(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Query('path') path: string,
+  ) {
+    return this.instances.readFile(licenseId, id, path);
+  }
+
+  @Put(':id/file')
+  @RequirePermission('files.manage')
+  @ApiOperation({ summary: 'Write a text file in the instance (jailed)' })
+  async writeFile(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() body: { path: string; content: string },
+  ) {
+    return this.instances.writeFile(licenseId, id, body.path, body.content ?? '');
+  }
+
+  @Post(':id/files/delete')
+  @RequirePermission('files.manage')
+  @ApiOperation({ summary: 'Delete a file or directory (jailed)' })
+  async deleteFile(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() body: { path: string },
+  ) {
+    return this.instances.deleteFile(licenseId, id, body.path);
+  }
+
+  @Post(':id/files/rename')
+  @RequirePermission('files.manage')
+  @ApiOperation({ summary: 'Rename a file/directory within its parent (jailed)' })
+  async renameFile(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() body: { path: string; name: string },
+  ) {
+    return this.instances.renameFile(licenseId, id, body.path, body.name);
+  }
+
+  @Post(':id/files/mkdir')
+  @RequirePermission('files.manage')
+  @ApiOperation({ summary: 'Create a directory (jailed)' })
+  async mkdir(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() body: { path: string },
+  ) {
+    return this.instances.mkdir(licenseId, id, body.path);
+  }
+
+  @Post(':id/files/mkfile')
+  @RequirePermission('files.manage')
+  @ApiOperation({ summary: 'Create an empty file (jailed)' })
+  async mkfile(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() body: { path: string },
+  ) {
+    return this.instances.mkfile(licenseId, id, body.path);
+  }
+
+  @Post(':id/files/move')
+  @RequirePermission('files.manage')
+  @ApiOperation({ summary: 'Move a file/directory (jailed)' })
+  async moveFile(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() body: { path: string; dest: string },
+  ) {
+    return this.instances.moveFile(licenseId, id, body.path, body.dest);
+  }
+
+  @Post(':id/files/copy')
+  @RequirePermission('files.manage')
+  @ApiOperation({ summary: 'Copy a file/directory (jailed)' })
+  async copyFile(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() body: { path: string; dest: string },
+  ) {
+    return this.instances.copyFile(licenseId, id, body.path, body.dest);
+  }
+}
--- a/backend-nest/src/modules/instances/instances.module.ts
+++ b/backend-nest/src/modules/instances/instances.module.ts
@@ -0,0 +1,18 @@
+import { Global, Module } from '@nestjs/common';
+import { TypeOrmModule } from '@nestjs/typeorm';
+import { InstancesController } from './instances.controller';
+import { InstancesService } from './instances.service';
+import { GameInstance } from '../../entities/game-instance.entity';
+import { NatsService } from '../../services/nats.service';
+
+// Global so the legacy single-server services (servers/players/schedules/wipes/
+// plugins + the 9 plugin-config modules) can inject InstancesService to route
+// commands at the now-only Rust agent without each importing this module.
+@Global()
+@Module({
+  imports: [TypeOrmModule.forFeature([GameInstance])],
+  controllers: [InstancesController],
+  providers: [InstancesService, NatsService],
+  exports: [InstancesService],
+})
+export class InstancesModule {}
--- a/backend-nest/src/modules/instances/instances.service.ts
+++ b/backend-nest/src/modules/instances/instances.service.ts
@@ -0,0 +1,223 @@
+import { Injectable, NotFoundException, BadRequestException, Logger } from '@nestjs/common';
+import { InjectRepository } from '@nestjs/typeorm';
+import { Repository } from 'typeorm';
+import { NatsService } from '../../services/nats.service';
+import { GameInstance } from '../../entities/game-instance.entity';
+
+/** Lifecycle funcs the agent's {instance}.cmd handler accepts. */
+const LIFECYCLE_FUNCS = ['start', 'stop', 'restart', 'status', 'steam_update'] as const;
+export type LifecycleFunc = (typeof LIFECYCLE_FUNCS)[number];
+
+@Injectable()
+export class InstancesService {
+  private readonly logger = new Logger(InstancesService.name);
+
+  constructor(
+    private readonly nats: NatsService,
+    @InjectRepository(GameInstance)
+    private readonly instanceRepo: Repository<GameInstance>,
+  ) {}
+
+  /** Resolve an instance the caller's license actually owns (tenant guard). */
+  private async resolveInstance(licenseId: string, instanceId: string): Promise<GameInstance> {
+    const inst = await this.instanceRepo.findOne({
+      where: { id: instanceId, license_id: licenseId },
+    });
+    if (!inst) throw new NotFoundException('Instance not found');
+    return inst;
+  }
+
+  async lifecycle(licenseId: string, instanceId: string, func: LifecycleFunc): Promise<unknown> {
+    if (!LIFECYCLE_FUNCS.includes(func)) {
+      throw new BadRequestException(`Unsupported action '${func}'`);
+    }
+    const inst = await this.resolveInstance(licenseId, instanceId);
+    const subject = `corrosion.${licenseId}.${inst.agent_instance_id}.cmd`;
+    this.logger.log(`instance ${inst.agent_instance_id}: ${func}`);
+    return this.nats.requestScoped(licenseId, subject, { func });
+  }
+
+  async rcon(licenseId: string, instanceId: string, command: string): Promise<unknown> {
+    if (!command || !command.trim()) {
+      throw new BadRequestException('command is required');
+    }
+    const inst = await this.resolveInstance(licenseId, instanceId);
+    const subject = `corrosion.${licenseId}.${inst.agent_instance_id}.cmd`;
+    // RCON can take longer than a lifecycle ack — give it more headroom.
+    return this.nats.requestScoped(licenseId, subject, { func: 'rcon', command }, 12_000);
+  }
+
+  // -------------------------------------------------------------------------
+  // File access — jailed to the instance root by the agent's file manager.
+  // The agent protocol (corrosion-host-agent/src/filemanager.rs):
+  //   { op: list|read|write|delete|rename|mkdir|mkfile|move|copy, path, ... }
+  //   reply: { status: 'success'|'error', data?, message? }
+  // -------------------------------------------------------------------------
+
+  private filesSubject(inst: GameInstance, licenseId: string): string {
+    return `corrosion.${licenseId}.${inst.agent_instance_id}.files.cmd`;
+  }
+
+  private async fileOp(
+    licenseId: string,
+    instanceId: string,
+    payload: Record<string, unknown>,
+  ): Promise<{ status: string; data?: unknown; message?: string }> {
+    const inst = await this.resolveInstance(licenseId, instanceId);
+    const res = await this.nats.requestScoped<{ status: string; data?: unknown; message?: string }>(
+      licenseId,
+      this.filesSubject(inst, licenseId),
+      payload,
+      12_000,
+    );
+    if (res?.status === 'error') {
+      throw new BadRequestException(res.message ?? 'File operation failed');
+    }
+    return res;
+  }
+
+  async listFiles(licenseId: string, instanceId: string, path = ''): Promise<unknown> {
+    const res = await this.fileOp(licenseId, instanceId, { op: 'list', path });
+    return res.data;
+  }
+
+  async readFile(licenseId: string, instanceId: string, path: string): Promise<unknown> {
+    if (!path) throw new BadRequestException('path is required');
+    const res = await this.fileOp(licenseId, instanceId, { op: 'read', path });
+    return res.data;
+  }
+
+  async writeFile(
+    licenseId: string,
+    instanceId: string,
+    path: string,
+    content: string,
+  ): Promise<unknown> {
+    if (!path) throw new BadRequestException('path is required');
+    const res = await this.fileOp(licenseId, instanceId, { op: 'write', path, content });
+    return res.data ?? { status: 'success' };
+  }
+
+  async deleteFile(licenseId: string, instanceId: string, path: string): Promise<unknown> {
+    if (!path) throw new BadRequestException('path is required');
+    return (await this.fileOp(licenseId, instanceId, { op: 'delete', path })).data ?? { ok: true };
+  }
+
+  async renameFile(
+    licenseId: string,
+    instanceId: string,
+    path: string,
+    name: string,
+  ): Promise<unknown> {
+    if (!path || !name) throw new BadRequestException('path and name are required');
+    return (await this.fileOp(licenseId, instanceId, { op: 'rename', path, name })).data ?? { ok: true };
+  }
+
+  async mkdir(licenseId: string, instanceId: string, path: string): Promise<unknown> {
+    if (!path) throw new BadRequestException('path is required');
+    return (await this.fileOp(licenseId, instanceId, { op: 'mkdir', path })).data ?? { ok: true };
+  }
+
+  async mkfile(licenseId: string, instanceId: string, path: string): Promise<unknown> {
+    if (!path) throw new BadRequestException('path is required');
+    return (await this.fileOp(licenseId, instanceId, { op: 'mkfile', path })).data ?? { ok: true };
+  }
+
+  async moveFile(
+    licenseId: string,
+    instanceId: string,
+    path: string,
+    dest: string,
+  ): Promise<unknown> {
+    if (!path || !dest) throw new BadRequestException('path and dest are required');
+    return (await this.fileOp(licenseId, instanceId, { op: 'move', path, dest })).data ?? { ok: true };
+  }
+
+  async copyFile(
+    licenseId: string,
+    instanceId: string,
+    path: string,
+    dest: string,
+  ): Promise<unknown> {
+    if (!path || !dest) throw new BadRequestException('path and dest are required');
+    return (await this.fileOp(licenseId, instanceId, { op: 'copy', path, dest })).data ?? { ok: true };
+  }
+
+  /**
+   * Wipe an instance's game data via the agent's jailed wipe handler: stop →
+   * delete files per wipe_type (map/blueprint/full) → restart. Long timeout
+   * because the agent does all three steps before replying.
+   */
+  async wipe(
+    licenseId: string,
+    instanceId: string,
+    wipeType: 'map' | 'blueprint' | 'full',
+    backup = true,
+  ): Promise<unknown> {
+    const inst = await this.resolveInstance(licenseId, instanceId);
+    const subject = `corrosion.${licenseId}.${inst.agent_instance_id}.cmd`;
+    this.logger.log(`instance ${inst.agent_instance_id}: wipe (${wipeType})`);
+    return this.nats.requestScoped(
+      licenseId,
+      subject,
+      { func: 'wipe', wipe_type: wipeType, backup },
+      120_000,
+    );
+  }
+
+  // -------------------------------------------------------------------------
+  // License-scoped convenience wrappers. Legacy single-server services
+  // (servers/players/schedules/wipes/plugins + the 9 plugin-config modules)
+  // predate the instance model and carry only a licenseId. These resolve the
+  // license's primary instance, then dispatch to the agent — replacing the old
+  // publishes to the now-defunct `cmd.server` subject.
+  // -------------------------------------------------------------------------
+
+  /** The license's primary (oldest) instance. Throws if none is connected. */
+  async resolveDefaultInstance(licenseId: string): Promise<GameInstance> {
+    const inst = await this.instanceRepo.findOne({
+      where: { license_id: licenseId },
+      order: { created_at: 'ASC' },
+    });
+    if (!inst) {
+      throw new NotFoundException(
+        'No game instance is connected for this license yet — install and start the host agent first.',
+      );
+    }
+    return inst;
+  }
+
+  async lifecycleForLicense(licenseId: string, func: LifecycleFunc): Promise<unknown> {
+    const inst = await this.resolveDefaultInstance(licenseId);
+    return this.lifecycle(licenseId, inst.id, func);
+  }
+
+  async rconForLicense(licenseId: string, command: string): Promise<unknown> {
+    const inst = await this.resolveDefaultInstance(licenseId);
+    return this.rcon(licenseId, inst.id, command);
+  }
+
+  async writeFileForLicense(licenseId: string, path: string, content: string): Promise<unknown> {
+    const inst = await this.resolveDefaultInstance(licenseId);
+    return this.writeFile(licenseId, inst.id, path, content);
+  }
+
+  async readFileForLicense(licenseId: string, path: string): Promise<unknown> {
+    const inst = await this.resolveDefaultInstance(licenseId);
+    return this.readFile(licenseId, inst.id, path);
+  }
+
+  async deleteFileForLicense(licenseId: string, path: string): Promise<unknown> {
+    const inst = await this.resolveDefaultInstance(licenseId);
+    return this.deleteFile(licenseId, inst.id, path);
+  }
+
+  async wipeForLicense(
+    licenseId: string,
+    wipeType: 'map' | 'blueprint' | 'full',
+    backup = true,
+  ): Promise<unknown> {
+    const inst = await this.resolveDefaultInstance(licenseId);
+    return this.wipe(licenseId, inst.id, wipeType, backup);
+  }
+}
--- a/backend-nest/src/modules/kits/kits.service.ts
+++ b/backend-nest/src/modules/kits/kits.service.ts
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { KitsConfig } from '../../entities/kits-config.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { CreateKitsConfigDto } from './dto/create-kits-config.dto';
 import { UpdateKitsConfigDto } from './dto/update-kits-config.dto';

@@ -13,7 +13,7 @@ export class KitsService {
  constructor(
    @InjectRepository(KitsConfig)
    private readonly kitsRepo: Repository<KitsConfig>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  /** List configs for a license (summaries — no JSONB) */
@@ -81,26 +81,15 @@ export class KitsService {
    const jsonString = JSON.stringify(config.config_data, null, 2);

    try {
-      // Write Kits.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/config/Kits.json',
-          content: jsonString,
-        },
-        30000,
+      // Write Kits.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/config/Kits.json',
+        jsonString,
      );

      // Reload Kits plugin via RCON
-      await this.natsService.publish(
-        `corrosion.${licenseId}.cmd.server`,
-        {
-          action: 'command',
-          command: 'oxide.reload Kits',
-          timestamp: new Date().toISOString(),
-        },
-      );
+      await this.instancesService.rconForLicense(licenseId, 'oxide.reload Kits');

      // Mark this config as active, deactivate others
      await this.kitsRepo.update({ license_id: licenseId }, { is_active: false });
@@ -126,17 +115,13 @@ export class KitsService {
  /** Import Kits.json from game server via NATS */
  async importFromServer(licenseId: string, configName: string, description?: string) {
    try {
-      // Read Kits.json from server via file manager NATS
-      const response = await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_preview',
-          path: 'server://oxide/config/Kits.json',
-        },
-        30000,
+      // Read Kits.json from server via Rust agent
+      const result = await this.instancesService.readFileForLicense(
+        licenseId,
+        'oxide/config/Kits.json',
      );

-      if (!response) {
+      if (!result) {
        throw new HttpException(
          'No response from agent — it may be offline',
          HttpStatus.SERVICE_UNAVAILABLE,
@@ -144,13 +129,13 @@ export class KitsService {
      }

      // Parse the response content as JSON
-      const responseData = response as Record<string, any>;
+      const responseData = (result as any).content;
      let configData: Record<string, any>;

-      if (typeof responseData.content === 'string') {
-        configData = JSON.parse(responseData.content);
-      } else if (typeof responseData.content === 'object') {
-        configData = responseData.content;
+      if (typeof responseData === 'string') {
+        configData = JSON.parse(responseData);
+      } else if (typeof responseData === 'object') {
+        configData = responseData;
      } else {
        throw new HttpException(
          'Unexpected response format from agent',
--- a/backend-nest/src/modules/loot/loot.service.ts
+++ b/backend-nest/src/modules/loot/loot.service.ts
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { LootProfile } from '../../entities/loot-profile.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { CreateLootProfileDto } from './dto/create-loot-profile.dto';
 import { UpdateLootProfileDto } from './dto/update-loot-profile.dto';
 import { ImportLootProfileDto } from './dto/import-loot-profile.dto';
@@ -15,7 +15,7 @@ export class LootService {
  constructor(
    @InjectRepository(LootProfile)
    private readonly lootRepo: Repository<LootProfile>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  /** List profiles for a license (summaries — no JSONB) */
@@ -114,37 +114,22 @@ export class LootService {
    const lootGroupsJson = JSON.stringify(scaledGroups, null, 2);

    try {
-      // Write LootTables.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/data/BetterLoot/LootTables.json',
-          content: lootTablesJson,
-        },
-        30000,
+      // Write LootTables.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/data/BetterLoot/LootTables.json',
+        lootTablesJson,
      );

-      // Write LootGroups.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/data/BetterLoot/LootGroups.json',
-          content: lootGroupsJson,
-        },
-        30000,
+      // Write LootGroups.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/data/BetterLoot/LootGroups.json',
+        lootGroupsJson,
      );

      // Reload BetterLoot plugin via RCON
-      await this.natsService.publish(
-        `corrosion.${licenseId}.cmd.server`,
-        {
-          action: 'command',
-          command: 'oxide.reload BetterLoot',
-          timestamp: new Date().toISOString(),
-        },
-      );
+      await this.instancesService.rconForLicense(licenseId, 'oxide.reload BetterLoot');

      // Mark this profile as active, deactivate others
      await this.lootRepo.update({ license_id: licenseId }, { is_active: false });
--- a/backend-nest/src/modules/players/players.service.ts
+++ b/backend-nest/src/modules/players/players.service.ts
@@ -1,9 +1,10 @@
-import { Injectable } from '@nestjs/common';
+import { Injectable, BadRequestException } from '@nestjs/common';
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { PlayerAction } from '../../entities/player-action.entity';
 import { PlayerSession } from '../../entities/player-session.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
+import { WebhooksService } from '../webhooks/webhooks.service';
 import { PlayerActionDto } from './dto/player-action.dto';

 export interface Player {
@@ -23,7 +24,8 @@ export class PlayersService {
    private readonly actionRepo: Repository<PlayerAction>,
    @InjectRepository(PlayerSession)
    private readonly sessionRepo: Repository<PlayerSession>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
+    private readonly webhooksService: WebhooksService,
  ) {}

  /**
@@ -132,15 +134,60 @@ export class PlayersService {

    await this.actionRepo.save(action);

-    // Forward kick, ban, and unban to the game server via NATS
+    // Forward kick, ban, and unban to the game server via RCON
    if (dto.action_type === 'kick' || dto.action_type === 'ban' || dto.action_type === 'unban') {
-      await this.natsService.sendServerCommand(licenseId, dto.action_type, {
-        steam_id: dto.steam_id,
-        reason: dto.reason,
-        duration_minutes: dto.duration_minutes,
-      });
+      const rconCmd = this.buildRconCommand(dto);
+      await this.instancesService.rconForLicense(licenseId, rconCmd);
+    }
+
+    // Fire webhook event for player bans. Fire-and-forget — a delivery failure
+    // must never surface to the caller or roll back the ban action.
+    if (dto.action_type === 'ban') {
+      void this.webhooksService
+        .dispatch(licenseId, 'player_banned', {
+          steam_id: dto.steam_id,
+          player_name: dto.player_name,
+          reason: dto.reason ?? null,
+          duration_minutes: dto.duration_minutes ?? null,
+        })
+        .catch(() => {
+          // dispatch() already logs internally; swallow here to guarantee
+          // the ban action result is unaffected.
+        });
    }

    return { success: true };
  }
+
+  private buildRconCommand(dto: PlayerActionDto): string {
+    // Defense-in-depth against RCON command injection. The command is a single
+    // line; an id or reason containing a newline/control char could break the
+    // framing and inject a second console command. So:
+    //  - the player id must be a safe token (no whitespace/control chars) — a
+    //    permissive charset, not a Rust-only SteamID64 regex, so Conan (Funcom)
+    //    and Dune ids still validate. Reject outright if not.
+    //  - the free-text reason has control chars stripped and is length-capped.
+    //  - duration is coerced to a non-negative integer.
+    const id = dto.steam_id ?? '';
+    if (!/^[A-Za-z0-9_.:-]{1,64}$/.test(id)) {
+      throw new BadRequestException('Invalid player id');
+    }
+    const safeReason =
+      (dto.reason ?? 'banned').replace(/[\u0000-\u001F]+/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 200) || 'banned';
+    const secs = Number.isFinite(dto.duration_minutes)
+      ? Math.max(0, Math.floor((dto.duration_minutes as number) * 60))
+      : 0;
+
+    switch (dto.action_type) {
+      case 'kick':
+        return `kick ${id}${dto.reason ? ' ' + safeReason : ''}`;
+      case 'ban':
+        // banid <steamId> <reason> <durationSeconds>  — 0 = permanent
+        return `banid ${id} ${safeReason} ${secs}`;
+      case 'unban':
+        return `unban ${id}`;
+      default:
+        return '';
+    }
+  }
 }
--- a/backend-nest/src/modules/plugins/plugins.service.ts
+++ b/backend-nest/src/modules/plugins/plugins.service.ts
@@ -1,10 +1,10 @@
-import { Injectable, NotFoundException, ConflictException, BadRequestException, Logger } from '@nestjs/common';
+import { Injectable, NotFoundException, ConflictException, BadRequestException, ServiceUnavailableException, Logger } from '@nestjs/common';
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { PluginRegistry } from '../../entities/plugin-registry.entity';
 import { InstallPluginDto } from './dto/install-plugin.dto';
 import { UpdatePluginConfigDto } from './dto/update-plugin-config.dto';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';

 interface UmodCacheEntry {
  data: unknown;
@@ -20,7 +20,7 @@ export class PluginsService {
  constructor(
    @InjectRepository(PluginRegistry)
    private readonly pluginRegistryRepo: Repository<PluginRegistry>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  async getPlugins(licenseId: string): Promise<PluginRegistry[]> {
@@ -43,30 +43,11 @@ export class PluginsService {
      throw new ConflictException(`Plugin ${dto.plugin_name} is already installed`);
    }

-    const plugin = this.pluginRegistryRepo.create({
-      license_id: licenseId,
-      plugin_name: dto.plugin_name,
-      umod_slug: dto.umod_slug,
-      source: dto.source || 'manual',
-      is_installed: true,
-      is_loaded: false,
-    });
-
-    const saved = await this.pluginRegistryRepo.save(plugin);
-
-    try {
-      await this.natsService.publish(`corrosion.${licenseId}.cmd.server`, {
-        action: 'plugin_install',
-        plugin_name: dto.plugin_name,
-        umod_slug: dto.umod_slug,
-        timestamp: new Date().toISOString(),
-      });
-      this.logger.log(`Plugin install dispatched for ${dto.plugin_name} on license ${licenseId}`);
-    } catch (err) {
-      this.logger.error(`Failed to dispatch plugin install for ${dto.plugin_name} on license ${licenseId}: ${(err as Error).message}`);
-    }
-
-    return saved;
+    // One-click uMod install via agent is not yet implemented.
+    // Fail fast — do not persist a DB record for a plugin that won't be deployed.
+    throw new ServiceUnavailableException(
+      'One-click uMod install is coming soon — download the .cs and use Upload for now.',
+    );
  }

  async uninstallPlugin(licenseId: string, pluginId: string): Promise<void> {
@@ -80,11 +61,8 @@ export class PluginsService {

    await this.pluginRegistryRepo.delete({ id: pluginId, license_id: licenseId });

-    await this.natsService.publish(`corrosion.${licenseId}.cmd.plugin`, {
-      action: 'unload',
-      plugin_name: plugin.plugin_name,
-      timestamp: new Date().toISOString(),
-    });
+    await this.instancesService.rconForLicense(licenseId, `oxide.unload ${plugin.plugin_name}`);
+    await this.instancesService.deleteFileForLicense(licenseId, `oxide/plugins/${plugin.plugin_name}.cs`);
    this.logger.log(`Plugin uninstall dispatched for ${plugin.plugin_name} on license ${licenseId}`);
  }

@@ -100,11 +78,7 @@ export class PluginsService {
      throw new NotFoundException(`Plugin ${pluginId} not found`);
    }

-    await this.natsService.publish(`corrosion.${licenseId}.cmd.plugin`, {
-      action: 'reload',
-      plugin_name: plugin.plugin_name,
-      timestamp: new Date().toISOString(),
-    });
+    await this.instancesService.rconForLicense(licenseId, `oxide.reload ${plugin.plugin_name}`);
    this.logger.log(`Plugin reload dispatched for ${plugin.plugin_name} on license ${licenseId}`);

    return { reloaded: true, plugin_name: plugin.plugin_name };
@@ -215,19 +189,14 @@ export class PluginsService {

    const saved = await this.pluginRegistryRepo.save(plugin);

-    // Dispatch to companion agent via NATS
+    // Deploy .cs file to server via host agent
    try {
-      const content = file.buffer.toString('base64');
-      await this.natsService.publish(`corrosion.${licenseId}.cmd.server`, {
-        action: 'plugin_upload',
-        filename: originalName,
-        content,
-        timestamp: new Date().toISOString(),
-      });
-      this.logger.log(`Plugin upload dispatched: "${originalName}" (${file.size} bytes) for license ${licenseId}`);
+      const content = file.buffer.toString('utf8');
+      await this.instancesService.writeFileForLicense(licenseId, `oxide/plugins/${originalName}`, content);
+      this.logger.log(`Plugin upload deployed: "${originalName}" (${file.size} bytes) for license ${licenseId}`);
    } catch (err) {
-      this.logger.error(`NATS publish failed for plugin upload "${originalName}" on license ${licenseId}: ${(err as Error).message}`);
-      // Don't fail the request — plugin record is saved, NATS delivery is best-effort
+      this.logger.error(`File write failed for plugin upload "${originalName}" on license ${licenseId}: ${(err as Error).message}`);
+      // Don't fail the request — plugin record is saved, file delivery is best-effort
    }

    return saved;
--- a/backend-nest/src/modules/raidablebases/raidablebases.service.ts
+++ b/backend-nest/src/modules/raidablebases/raidablebases.service.ts
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { RaidableBasesConfig } from '../../entities/raidablebases-config.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { CreateRaidableBasesConfigDto } from './dto/create-raidablebases-config.dto';
 import { UpdateRaidableBasesConfigDto } from './dto/update-raidablebases-config.dto';

@@ -13,7 +13,7 @@ export class RaidableBasesService {
  constructor(
    @InjectRepository(RaidableBasesConfig)
    private readonly raidableBasesRepo: Repository<RaidableBasesConfig>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  /** List configs for a license (summaries — no JSONB) */
@@ -81,26 +81,15 @@ export class RaidableBasesService {
    const jsonString = JSON.stringify(config.config_data, null, 2);

    try {
-      // Write RaidableBases.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/config/RaidableBases.json',
-          content: jsonString,
-        },
-        30000,
+      // Write RaidableBases.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/config/RaidableBases.json',
+        jsonString,
      );

      // Reload RaidableBases plugin via RCON
-      await this.natsService.publish(
-        `corrosion.${licenseId}.cmd.server`,
-        {
-          action: 'command',
-          command: 'oxide.reload RaidableBases',
-          timestamp: new Date().toISOString(),
-        },
-      );
+      await this.instancesService.rconForLicense(licenseId, 'oxide.reload RaidableBases');

      // Mark this config as active, deactivate others
      await this.raidableBasesRepo.update({ license_id: licenseId }, { is_active: false });
@@ -126,17 +115,13 @@ export class RaidableBasesService {
  /** Import RaidableBases.json from game server via NATS */
  async importFromServer(licenseId: string, configName: string, description?: string) {
    try {
-      // Read RaidableBases.json from server via file manager NATS
-      const response = await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_preview',
-          path: 'server://oxide/config/RaidableBases.json',
-        },
-        30000,
+      // Read RaidableBases.json from server via Rust agent
+      const result = await this.instancesService.readFileForLicense(
+        licenseId,
+        'oxide/config/RaidableBases.json',
      );

-      if (!response) {
+      if (!result) {
        throw new HttpException(
          'No response from agent — it may be offline',
          HttpStatus.SERVICE_UNAVAILABLE,
@@ -144,13 +129,13 @@ export class RaidableBasesService {
      }

      // Parse the response content as JSON
-      const responseData = response as Record<string, any>;
+      const responseData = (result as any).content;
      let configData: Record<string, any>;

-      if (typeof responseData.content === 'string') {
-        configData = JSON.parse(responseData.content);
-      } else if (typeof responseData.content === 'object') {
-        configData = responseData.content;
+      if (typeof responseData === 'string') {
+        configData = JSON.parse(responseData);
+      } else if (typeof responseData === 'object') {
+        configData = responseData;
      } else {
        throw new HttpException(
          'Unexpected response format from agent',
--- a/backend-nest/src/modules/schedules/schedules.service.ts
+++ b/backend-nest/src/modules/schedules/schedules.service.ts
@@ -10,48 +10,8 @@ import { LessThanOrEqual, Repository } from 'typeorm';
 import { ScheduledTask } from '../../entities/scheduled-task.entity';
 import { CreateTaskDto } from './dto/create-task.dto';
 import { UpdateTaskDto } from './dto/update-task.dto';
-import { NatsService } from '../../services/nats.service';
-
-/** Parse a 5-field cron expression and return the next Date after `after`. */
-function nextCronDate(expr: string, after: Date): Date | null {
-  const parts = expr.trim().split(/\s+/);
-  if (parts.length !== 5) return null;
-
-  const [minuteExpr, hourExpr, domExpr, monthExpr, dowExpr] = parts;
-
-  function matches(expr: string, value: number): boolean {
-    if (expr === '*') return true;
-    return parseInt(expr, 10) === value;
-  }
-
-  // Walk minute-by-minute up to 366 days forward to find next match.
-  const candidate = new Date(after.getTime() + 60_000); // advance at least 1 minute
-  candidate.setSeconds(0, 0);
-
-  const limit = new Date(after.getTime() + 366 * 24 * 60 * 60 * 1000);
-
-  while (candidate < limit) {
-    const min   = candidate.getUTCMinutes();
-    const hour  = candidate.getUTCHours();
-    const dom   = candidate.getUTCDate();
-    const month = candidate.getUTCMonth() + 1; // 1-12
-    const dow   = candidate.getUTCDay();       // 0=Sun
-
-    if (
-      matches(minuteExpr, min) &&
-      matches(hourExpr, hour) &&
-      matches(domExpr, dom) &&
-      matches(monthExpr, month) &&
-      matches(dowExpr, dow)
-    ) {
-      return candidate;
-    }
-
-    candidate.setTime(candidate.getTime() + 60_000);
-  }
-
-  return null;
-}
+import { InstancesService } from '../instances/instances.service';
+import { nextCronDate } from '../../common/cron.util';

@Injectable()
 export class SchedulesService implements OnModuleInit, OnModuleDestroy {
@@ -61,7 +21,7 @@ export class SchedulesService implements OnModuleInit, OnModuleDestroy {
  constructor(
    @InjectRepository(ScheduledTask)
    private taskRepository: Repository<ScheduledTask>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  // ---------------------------------------------------------------------------
@@ -160,21 +120,12 @@ export class SchedulesService implements OnModuleInit, OnModuleDestroy {

    switch (task_type) {
      case 'restart':
-        await this.natsService.sendServerCommand(license_id, 'restart', {
-          source: 'scheduler',
-          task_id: task.id,
-        });
+        await this.instancesService.lifecycleForLicense(license_id, 'restart');
        break;

      case 'announcement': {
        const message = (task_config?.message as string) ?? 'Scheduled announcement';
-        await this.natsService.publish(`corrosion.${license_id}.cmd.server`, {
-          action: 'command',
-          command: `say ${message}`,
-          source: 'scheduler',
-          task_id: task.id,
-          timestamp: new Date().toISOString(),
-        });
+        await this.instancesService.rconForLicense(license_id, `say ${message}`);
        break;
      }

@@ -184,25 +135,13 @@ export class SchedulesService implements OnModuleInit, OnModuleDestroy {
          this.logger.warn(`Task ${task.id} has no command configured — skipping`);
          return;
        }
-        await this.natsService.publish(`corrosion.${license_id}.cmd.server`, {
-          action: 'command',
-          command,
-          source: 'scheduler',
-          task_id: task.id,
-          timestamp: new Date().toISOString(),
-        });
+        await this.instancesService.rconForLicense(license_id, command);
        break;
      }

      case 'plugin_reload': {
        const plugin_name = (task_config?.plugin_name as string) ?? '';
-        await this.natsService.publish(`corrosion.${license_id}.cmd.plugin`, {
-          action: 'reload',
-          plugin_name,
-          source: 'scheduler',
-          task_id: task.id,
-          timestamp: new Date().toISOString(),
-        });
+        await this.instancesService.rconForLicense(license_id, `oxide.reload ${plugin_name}`);
        break;
      }

--- a/backend-nest/src/modules/servers/servers.controller.ts
+++ b/backend-nest/src/modules/servers/servers.controller.ts
@@ -23,6 +23,13 @@ export class ServersController {
    return await this.serversService.getServer(licenseId);
  }

+  @Get('agent-credentials')
+  @RequirePermission('server.manage')
+  @ApiOperation({ summary: 'NATS credentials for this license\'s host agent' })
+  async getAgentCredentials(@CurrentTenant() licenseId: string) {
+    return await this.serversService.getAgentCredentials(licenseId);
+  }
+
  @Put('config')
  @RequirePermission('server.manage')
  @ApiOperation({ summary: 'Update server configuration' })
--- a/backend-nest/src/modules/servers/servers.service.ts
+++ b/backend-nest/src/modules/servers/servers.service.ts
@@ -1,9 +1,10 @@
-import { Injectable, NotFoundException, InternalServerErrorException, Logger } from '@nestjs/common';
+import { Injectable, NotFoundException, InternalServerErrorException, ServiceUnavailableException, Logger } from '@nestjs/common';
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { ServerConnection } from '../../entities/server-connection.entity';
 import { ServerConfig } from '../../entities/server-config.entity';
 import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { UpdateServerConfigDto } from './dto/update-config.dto';
 import { DeployServerDto } from './dto/deploy-server.dto';

@@ -17,8 +18,18 @@ export class ServersService {
    @InjectRepository(ServerConfig)
    private readonly configRepo: Repository<ServerConfig>,
    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

+  /**
+   * NATS credentials the customer puts in their host agent's config so it can
+   * authenticate to the per-license-scoped broker. Returns null if the broker
+   * isn't enforcing auth yet (NATS_TOKEN_SECRET unset).
+   */
+  async getAgentCredentials(licenseId: string) {
+    return this.natsService.getAgentCredentials(licenseId);
+  }
+
  /**
   * Get server connection and config for a license.
   * Returns null fields if no server has been set up yet.
@@ -59,11 +70,11 @@ export class ServersService {
  }

  /**
-   * Send a console command to the server via NATS
+   * Send a console command to the server via the host agent (RCON)
   */
  async sendCommand(licenseId: string, command: string) {
    try {
-      await this.natsService.sendServerCommand(licenseId, 'command', { command });
+      await this.instancesService.rconForLicense(licenseId, command);
      this.logger.log(`Console command dispatched for license ${licenseId}: ${command}`);
    } catch (err) {
      this.logger.error(`Failed to dispatch console command for license ${licenseId}: ${(err as Error).message}`);
@@ -73,42 +84,45 @@ export class ServersService {
  }

  /**
-   * Start the server via NATS
+   * Start the server via the host agent
   */
  async startServer(licenseId: string) {
-    await this.natsService.sendServerCommand(licenseId, 'start');
+    await this.instancesService.lifecycleForLicense(licenseId, 'start');
    return { message: 'Start command sent' };
  }

  /**
-   * Stop the server via NATS
+   * Stop the server via the host agent
   */
  async stopServer(licenseId: string) {
-    await this.natsService.sendServerCommand(licenseId, 'stop');
+    await this.instancesService.lifecycleForLicense(licenseId, 'stop');
    return { message: 'Stop command sent' };
  }

  /**
-   * Restart the server via NATS
+   * Restart the server via the host agent
   */
  async restartServer(licenseId: string) {
-    await this.natsService.sendServerCommand(licenseId, 'restart');
+    await this.instancesService.lifecycleForLicense(licenseId, 'restart');
    return { message: 'Restart command sent' };
  }

  /**
-   * Deploy Rust server via companion agent
+   * Deploy Rust server — not yet supported via host agent.
+   * Install the server manually and point the host agent at it.
   */
-  async deployServer(licenseId: string, dto: DeployServerDto) {
-    await this.natsService.sendDeployCommand(licenseId, { ...dto });
-    return { message: 'Deployment started' };
+  async deployServer(_licenseId: string, _dto: DeployServerDto) {
+    throw new ServiceUnavailableException(
+      'Server deployment from the panel is coming soon — install the server and point the host agent at it for now.',
+    );
  }

  /**
-   * Install Oxide/uMod via companion agent
+   * Install Oxide/uMod — not yet supported via host agent.
   */
-  async installOxide(licenseId: string) {
-    await this.natsService.sendOxideInstallCommand(licenseId);
-    return { message: 'Oxide installation started' };
+  async installOxide(_licenseId: string) {
+    throw new ServiceUnavailableException(
+      'Oxide install from the panel is coming soon — install Oxide/uMod on the server for now.',
+    );
  }
 }
--- a/backend-nest/src/modules/setup/setup.service.ts
+++ b/backend-nest/src/modules/setup/setup.service.ts
@@ -1,4 +1,4 @@
-import { Injectable } from '@nestjs/common';
+import { Injectable, ServiceUnavailableException } from '@nestjs/common';
 import { ConfigService } from '@nestjs/config';
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
@@ -55,6 +55,13 @@ export class SetupService {
    if (dto.panel_api_key) {
      const encryptionKey = this.configService.get<string>('encryption.key', '');
      const keyBuffer = Buffer.from(encryptionKey, 'hex');
+      // AES-256-GCM needs a 32-byte key. An unset/short ENCRYPTION_KEY would
+      // otherwise crash createCipheriv with an opaque "Invalid key length" 500.
+      if (keyBuffer.length !== 32) {
+        throw new ServiceUnavailableException(
+          'Server encryption is not configured (ENCRYPTION_KEY must be 32 bytes / 64 hex chars). Contact the platform operator.',
+        );
+      }
      const iv = crypto.randomBytes(16);
      const cipher = crypto.createCipheriv('aes-256-gcm', keyBuffer, iv);
      const encrypted = Buffer.concat([
@@ -82,9 +89,12 @@ export class SetupService {
    });

    if (connection) {
-      // For bare metal, mark as connected immediately (waiting for agent)
-      if (connection.connection_type === 'bare_metal') {
-        connection.connection_status = 'connected';
+      // Bare-metal stays 'offline' until the agent's first heartbeat flips it
+      // 'connected' (HostAgentConsumerService). Marking it connected here was a
+      // false positive — the dashboard showed a live server before any agent
+      // had checked in.
+      if (connection.connection_type === 'bare_metal' && connection.connection_status !== 'connected') {
+        connection.connection_status = 'offline';
        connection.updated_at = new Date();
        await this.connectionRepo.save(connection);
      }
--- a/backend-nest/src/modules/store/store.service.ts
+++ b/backend-nest/src/modules/store/store.service.ts
@@ -57,11 +57,17 @@ export class StoreService {
      throw new NotFoundException('Module not found');
    }

+    // Beta: modules are granted free (no payment processing wired yet). Record
+    // it honestly as a beta grant at $0 rather than a fake `txn_*` id that
+    // implies a real charge occurred.
+    this.logger.log(
+      `Granting module ${moduleId} to license ${licenseId} free (Beta — no payment processing)`,
+    );
    const purchase = this.purchaseRepo.create({
      license_id: licenseId,
      module_id: moduleId,
-      transaction_id: `txn_${Date.now()}`,
-      amount_paid: parseFloat(module.price_usd.toString()),
+      transaction_id: 'beta-free-grant',
+      amount_paid: 0,
    });

    return this.purchaseRepo.save(purchase);
--- a/backend-nest/src/modules/teleport/teleport.service.ts
+++ b/backend-nest/src/modules/teleport/teleport.service.ts
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { TeleportConfig } from '../../entities/teleport-config.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { CreateTeleportConfigDto } from './dto/create-teleport-config.dto';
 import { UpdateTeleportConfigDto } from './dto/update-teleport-config.dto';

@@ -13,7 +13,7 @@ export class TeleportService {
  constructor(
    @InjectRepository(TeleportConfig)
    private readonly teleportRepo: Repository<TeleportConfig>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  /** List configs for a license (summaries — no JSONB) */
@@ -81,26 +81,15 @@ export class TeleportService {
    const jsonString = JSON.stringify(config.config_data, null, 2);

    try {
-      // Write NTeleportation.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/config/NTeleportation.json',
-          content: jsonString,
-        },
-        30000,
+      // Write NTeleportation.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/config/NTeleportation.json',
+        jsonString,
      );

      // Reload NTeleportation plugin via RCON
-      await this.natsService.publish(
-        `corrosion.${licenseId}.cmd.server`,
-        {
-          action: 'command',
-          command: 'oxide.reload NTeleportation',
-          timestamp: new Date().toISOString(),
-        },
-      );
+      await this.instancesService.rconForLicense(licenseId, 'oxide.reload NTeleportation');

      // Mark this config as active, deactivate others
      await this.teleportRepo.update({ license_id: licenseId }, { is_active: false });
@@ -126,17 +115,13 @@ export class TeleportService {
  /** Import NTeleportation.json from game server via NATS */
  async importFromServer(licenseId: string, configName: string, description?: string) {
    try {
-      // Read NTeleportation.json from server via file manager NATS
-      const response = await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_preview',
-          path: 'server://oxide/config/NTeleportation.json',
-        },
-        30000,
+      // Read NTeleportation.json from server via Rust agent
+      const result = await this.instancesService.readFileForLicense(
+        licenseId,
+        'oxide/config/NTeleportation.json',
      );

-      if (!response) {
+      if (!result) {
        throw new HttpException(
          'No response from agent — it may be offline',
          HttpStatus.SERVICE_UNAVAILABLE,
@@ -144,13 +129,13 @@ export class TeleportService {
      }

      // Parse the response content as JSON
-      const responseData = response as Record<string, any>;
+      const responseData = (result as any).content;
      let configData: Record<string, any>;

-      if (typeof responseData.content === 'string') {
-        configData = JSON.parse(responseData.content);
-      } else if (typeof responseData.content === 'object') {
-        configData = responseData.content;
+      if (typeof responseData === 'string') {
+        configData = JSON.parse(responseData);
+      } else if (typeof responseData === 'object') {
+        configData = responseData;
      } else {
        throw new HttpException(
          'Unexpected response format from agent',
--- a/backend-nest/src/modules/timedexecute/timedexecute.service.ts
+++ b/backend-nest/src/modules/timedexecute/timedexecute.service.ts
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { TimedExecuteConfig } from '../../entities/timedexecute-config.entity';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
 import { CreateTimedExecuteConfigDto } from './dto/create-timedexecute-config.dto';
 import { UpdateTimedExecuteConfigDto } from './dto/update-timedexecute-config.dto';

@@ -13,7 +13,7 @@ export class TimedExecuteService {
  constructor(
    @InjectRepository(TimedExecuteConfig)
    private readonly repo: Repository<TimedExecuteConfig>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
  ) {}

  /** List configs for a license (summaries — no JSONB) */
@@ -81,26 +81,15 @@ export class TimedExecuteService {
    const jsonString = JSON.stringify(config.config_data, null, 2);

    try {
-      // Write TimedExecute.json via file manager NATS
-      await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_save',
-          path: 'server://oxide/config/TimedExecute.json',
-          content: jsonString,
-        },
-        30000,
+      // Write TimedExecute.json via Rust agent
+      await this.instancesService.writeFileForLicense(
+        licenseId,
+        'oxide/config/TimedExecute.json',
+        jsonString,
      );

      // Reload TimedExecute plugin via RCON
-      await this.natsService.publish(
-        `corrosion.${licenseId}.cmd.server`,
-        {
-          action: 'command',
-          command: 'oxide.reload TimedExecute',
-          timestamp: new Date().toISOString(),
-        },
-      );
+      await this.instancesService.rconForLicense(licenseId, 'oxide.reload TimedExecute');

      // Mark this config as active, deactivate others
      await this.repo.update({ license_id: licenseId }, { is_active: false });
@@ -126,17 +115,13 @@ export class TimedExecuteService {
  /** Import TimedExecute.json from game server via NATS */
  async importFromServer(licenseId: string, configName: string, description?: string) {
    try {
-      // Read TimedExecute.json from server via file manager NATS
-      const response = await this.natsService.request(
-        `corrosion.${licenseId}.files.cmd`,
-        {
-          func: 'fm_preview',
-          path: 'server://oxide/config/TimedExecute.json',
-        },
-        30000,
+      // Read TimedExecute.json from server via Rust agent
+      const result = await this.instancesService.readFileForLicense(
+        licenseId,
+        'oxide/config/TimedExecute.json',
      );

-      if (!response) {
+      if (!result) {
        throw new HttpException(
          'No response from agent — it may be offline',
          HttpStatus.SERVICE_UNAVAILABLE,
@@ -144,13 +129,13 @@ export class TimedExecuteService {
      }

      // Parse the response content as JSON
-      const responseData = response as Record<string, any>;
+      const responseData = (result as any).content;
      let configData: Record<string, any>;

-      if (typeof responseData.content === 'string') {
-        configData = JSON.parse(responseData.content);
-      } else if (typeof responseData.content === 'object') {
-        configData = responseData.content;
+      if (typeof responseData === 'string') {
+        configData = JSON.parse(responseData);
+      } else if (typeof responseData === 'object') {
+        configData = responseData;
      } else {
        throw new HttpException(
          'Unexpected response format from agent',
--- a/backend-nest/src/modules/webhooks/dto/create-webhook.dto.ts
+++ b/backend-nest/src/modules/webhooks/dto/create-webhook.dto.ts
@@ -0,0 +1,33 @@
+import { IsString, IsNotEmpty, IsUrl, IsArray, ArrayNotEmpty, IsOptional, MaxLength } from 'class-validator';
+import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
+
+export class CreateWebhookDto {
+  @ApiProperty({ description: 'Human-readable label for this webhook', maxLength: 100 })
+  @IsString()
+  @IsNotEmpty()
+  @MaxLength(100)
+  name: string;
+
+  @ApiProperty({ description: 'HTTPS URL to POST events to' })
+  @IsUrl({ protocols: ['https', 'http'], require_tld: false })
+  url: string;
+
+  @ApiProperty({
+    description: 'Event keys to subscribe to',
+    example: ['player_banned', 'server_down'],
+    type: [String],
+  })
+  @IsArray()
+  @ArrayNotEmpty()
+  @IsString({ each: true })
+  events: string[];
+
+  @ApiPropertyOptional({
+    description: 'HMAC-SHA256 signing secret. Auto-generated if omitted.',
+    maxLength: 128,
+  })
+  @IsOptional()
+  @IsString()
+  @MaxLength(128)
+  secret?: string;
+}
--- a/backend-nest/src/modules/webhooks/dto/update-webhook.dto.ts
+++ b/backend-nest/src/modules/webhooks/dto/update-webhook.dto.ts
@@ -0,0 +1,31 @@
+import { IsString, IsUrl, IsArray, ArrayNotEmpty, IsOptional, IsBoolean, MaxLength } from 'class-validator';
+import { ApiPropertyOptional } from '@nestjs/swagger';
+
+export class UpdateWebhookDto {
+  @ApiPropertyOptional({ description: 'Human-readable label for this webhook', maxLength: 100 })
+  @IsOptional()
+  @IsString()
+  @MaxLength(100)
+  name?: string;
+
+  @ApiPropertyOptional({ description: 'HTTPS URL to POST events to' })
+  @IsOptional()
+  @IsUrl({ protocols: ['https', 'http'], require_tld: false })
+  url?: string;
+
+  @ApiPropertyOptional({
+    description: 'Event keys to subscribe to',
+    example: ['player_banned', 'server_down'],
+    type: [String],
+  })
+  @IsOptional()
+  @IsArray()
+  @ArrayNotEmpty()
+  @IsString({ each: true })
+  events?: string[];
+
+  @ApiPropertyOptional({ description: 'Enable or disable this webhook' })
+  @IsOptional()
+  @IsBoolean()
+  is_active?: boolean;
+}
--- a/backend-nest/src/modules/webhooks/webhooks.controller.ts
+++ b/backend-nest/src/modules/webhooks/webhooks.controller.ts
@@ -0,0 +1,70 @@
+import {
+  Controller,
+  Get,
+  Post,
+  Patch,
+  Delete,
+  Body,
+  Param,
+} from '@nestjs/common';
+import { ApiTags, ApiBearerAuth, ApiOperation, ApiResponse } from '@nestjs/swagger';
+import { WebhooksService } from './webhooks.service';
+import { CreateWebhookDto } from './dto/create-webhook.dto';
+import { UpdateWebhookDto } from './dto/update-webhook.dto';
+import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
+import { RequirePermission } from '../../common/decorators/require-permission.decorator';
+
+@ApiTags('webhooks')
+@ApiBearerAuth()
+@Controller('webhooks')
+export class WebhooksController {
+  constructor(private readonly webhooksService: WebhooksService) {}
+
+  @Post()
+  @RequirePermission('webhooks.manage')
+  @ApiOperation({
+    summary: 'Create a webhook',
+    description:
+      'Registers a new outbound webhook for this license. A signing secret is auto-generated if not provided.',
+  })
+  @ApiResponse({ status: 201, description: 'Webhook created.' })
+  async create(
+    @CurrentTenant() licenseId: string,
+    @Body() dto: CreateWebhookDto,
+  ) {
+    return this.webhooksService.create(licenseId, dto);
+  }
+
+  @Get()
+  @RequirePermission('webhooks.view')
+  @ApiOperation({ summary: 'List webhooks', description: 'Returns all webhooks for this license.' })
+  @ApiResponse({ status: 200, description: 'Webhook list.' })
+  async list(@CurrentTenant() licenseId: string) {
+    return this.webhooksService.list(licenseId);
+  }
+
+  @Patch(':id')
+  @RequirePermission('webhooks.manage')
+  @ApiOperation({ summary: 'Update a webhook', description: 'Update name, URL, event subscriptions, or active state.' })
+  @ApiResponse({ status: 200, description: 'Webhook updated.' })
+  @ApiResponse({ status: 404, description: 'Webhook not found in this license.' })
+  async update(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+    @Body() dto: UpdateWebhookDto,
+  ) {
+    return this.webhooksService.update(licenseId, id, dto);
+  }
+
+  @Delete(':id')
+  @RequirePermission('webhooks.manage')
+  @ApiOperation({ summary: 'Delete a webhook' })
+  @ApiResponse({ status: 200, description: 'Webhook deleted.' })
+  @ApiResponse({ status: 404, description: 'Webhook not found in this license.' })
+  async remove(
+    @CurrentTenant() licenseId: string,
+    @Param('id') id: string,
+  ) {
+    return this.webhooksService.remove(licenseId, id);
+  }
+}
--- a/backend-nest/src/modules/webhooks/webhooks.module.ts
+++ b/backend-nest/src/modules/webhooks/webhooks.module.ts
@@ -0,0 +1,14 @@
+import { Global, Module } from '@nestjs/common';
+import { TypeOrmModule } from '@nestjs/typeorm';
+import { Webhook } from '../../entities/webhook.entity';
+import { WebhooksController } from './webhooks.controller';
+import { WebhooksService } from './webhooks.service';
+
+@Global()
+@Module({
+  imports: [TypeOrmModule.forFeature([Webhook])],
+  controllers: [WebhooksController],
+  providers: [WebhooksService],
+  exports: [WebhooksService],
+})
+export class WebhooksModule {}
--- a/backend-nest/src/modules/webhooks/webhooks.service.ts
+++ b/backend-nest/src/modules/webhooks/webhooks.service.ts
@@ -0,0 +1,236 @@
+import { Injectable, Logger, NotFoundException } from '@nestjs/common';
+import { InjectRepository } from '@nestjs/typeorm';
+import { Repository } from 'typeorm';
+import * as crypto from 'crypto';
+import { Webhook } from '../../entities/webhook.entity';
+import { CreateWebhookDto } from './dto/create-webhook.dto';
+import { UpdateWebhookDto } from './dto/update-webhook.dto';
+import { assertPublicHttpUrl } from '../../common/ssrf-guard';
+
+/** Safe list view — secret is included (operator's own resource). */
+export interface WebhookListItem {
+  id: string;
+  name: string;
+  url: string;
+  events: string[];
+  secret: string;
+  is_active: boolean;
+  last_delivery_at: Date | null;
+  last_status: string | null;
+  created_at: Date;
+}
+
+/** Shape returned on create — identical to list item. */
+export type CreatedWebhook = WebhookListItem;
+
+@Injectable()
+export class WebhooksService {
+  private readonly logger = new Logger(WebhooksService.name);
+
+  constructor(
+    @InjectRepository(Webhook)
+    private readonly webhookRepo: Repository<Webhook>,
+  ) {}
+
+  // ---------------------------------------------------------------------------
+  // CRUD
+  // ---------------------------------------------------------------------------
+
+  async create(licenseId: string, dto: CreateWebhookDto): Promise<CreatedWebhook> {
+    // SSRF guard: reject URLs resolving to private/reserved space before storing.
+    await assertPublicHttpUrl(dto.url);
+
+    // Generate a secret if the caller didn't supply one.
+    const secret = dto.secret ?? crypto.randomBytes(32).toString('hex');
+
+    const entity = this.webhookRepo.create({
+      license_id: licenseId,
+      name: dto.name,
+      url: dto.url,
+      events: dto.events,
+      secret,
+      is_active: true,
+    });
+
+    const saved = await this.webhookRepo.save(entity);
+
+    this.logger.log(
+      `webhook created: id=${saved.id} name="${saved.name}" events=[${saved.events.join(',')}] license=${licenseId}`,
+    );
+
+    return this.toListItem(saved);
+  }
+
+  async list(licenseId: string): Promise<WebhookListItem[]> {
+    const rows = await this.webhookRepo.find({
+      where: { license_id: licenseId },
+      order: { created_at: 'DESC' },
+    });
+    return rows.map(this.toListItem);
+  }
+
+  async update(licenseId: string, id: string, dto: UpdateWebhookDto): Promise<WebhookListItem> {
+    const webhook = await this.findOwned(licenseId, id);
+
+    // SSRF guard on any URL change.
+    if (dto.url !== undefined) await assertPublicHttpUrl(dto.url);
+
+    if (dto.name !== undefined) webhook.name = dto.name;
+    if (dto.url !== undefined) webhook.url = dto.url;
+    if (dto.events !== undefined) webhook.events = dto.events;
+    if (dto.is_active !== undefined) webhook.is_active = dto.is_active;
+
+    const saved = await this.webhookRepo.save(webhook);
+
+    this.logger.log(`webhook updated: id=${id} license=${licenseId}`);
+
+    return this.toListItem(saved);
+  }
+
+  async remove(licenseId: string, id: string): Promise<{ id: string }> {
+    const webhook = await this.findOwned(licenseId, id);
+    await this.webhookRepo.remove(webhook);
+    this.logger.log(`webhook deleted: id=${id} license=${licenseId}`);
+    return { id };
+  }
+
+  // ---------------------------------------------------------------------------
+  // Dispatch
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Fire an event to all active webhooks for a license that are subscribed to
+   * the given event key.
+   *
+   * Contract:
+   *  - Fire-and-forget: each delivery is attempted with a 5-second AbortController
+   *    timeout and never throws out to the caller.
+   *  - Each attempt updates last_delivery_at + last_status ('ok' | 'failed').
+   *  - The triggering action is NOT blocked. All deliveries run concurrently via
+   *    Promise.allSettled; the returned Promise resolves only after all attempts
+   *    finish (or time out), so callers can void it for true fire-and-forget.
+   *
+   * Signature header: X-Corrosion-Signature: sha256=<hex>
+   * where hex = HMAC-SHA256(rawBody, webhook.secret).
+   */
+  async dispatch(
+    licenseId: string,
+    event: string,
+    payload: Record<string, unknown>,
+  ): Promise<void> {
+    let hooks: Webhook[];
+    try {
+      hooks = await this.webhookRepo.find({
+        where: { license_id: licenseId, is_active: true },
+      });
+    } catch (err) {
+      this.logger.error(
+        `dispatch: failed to query webhooks for license ${licenseId}: ${(err as Error).message}`,
+      );
+      return;
+    }
+
+    // Filter to those subscribed to this event.
+    const subscribed = hooks.filter((h) => h.events.includes(event));
+    if (subscribed.length === 0) return;
+
+    const body = JSON.stringify({
+      event,
+      timestamp: new Date().toISOString(),
+      data: payload,
+    });
+
+    await Promise.allSettled(
+      subscribed.map((hook) => this.deliverOne(hook, event, body)),
+    );
+  }
+
+  /** Deliver to a single webhook endpoint; update delivery metadata. Never throws. */
+  private async deliverOne(hook: Webhook, event: string, body: string): Promise<void> {
+    const signature = this.sign(body, hook.secret);
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), 5_000);
+
+    let status: 'ok' | 'failed' = 'failed';
+
+    try {
+      // Re-validate at send time: a host that was public at create time can
+      // resolve to a private address now (DNS rebinding / TOCTOU). Throws → caught
+      // below → recorded 'failed'.
+      await assertPublicHttpUrl(hook.url);
+
+      const res = await fetch(hook.url, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          'X-Corrosion-Signature': `sha256=${signature}`,
+        },
+        body,
+        signal: controller.signal,
+        // Do not auto-follow redirects — a 3xx Location could point at an
+        // internal host, re-opening the SSRF we just closed. A redirect is a
+        // failed delivery here.
+        redirect: 'manual',
+      });
+
+      if (res.ok) {
+        status = 'ok';
+      } else {
+        this.logger.warn(
+          `webhook delivery failed: id=${hook.id} event=${event} status=${res.status}`,
+        );
+      }
+    } catch (err) {
+      const msg = (err as Error).message ?? String(err);
+      this.logger.warn(
+        `webhook delivery error: id=${hook.id} event=${event} err=${msg}`,
+      );
+    } finally {
+      clearTimeout(timer);
+    }
+
+    // Persist delivery outcome — best-effort, never throws.
+    try {
+      await this.webhookRepo.update(hook.id, {
+        last_delivery_at: new Date(),
+        last_status: status,
+      });
+    } catch (err) {
+      this.logger.error(
+        `webhook metadata update failed: id=${hook.id}: ${(err as Error).message}`,
+      );
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Helpers
+  // ---------------------------------------------------------------------------
+
+  private async findOwned(licenseId: string, id: string): Promise<Webhook> {
+    const webhook = await this.webhookRepo.findOne({
+      where: { id, license_id: licenseId },
+    });
+    if (!webhook) {
+      throw new NotFoundException(`Webhook ${id} not found`);
+    }
+    return webhook;
+  }
+
+  private sign(body: string, secret: string): string {
+    return crypto.createHmac('sha256', secret).update(body).digest('hex');
+  }
+
+  private toListItem(w: Webhook): WebhookListItem {
+    return {
+      id: w.id,
+      name: w.name,
+      url: w.url,
+      events: w.events,
+      secret: w.secret,
+      is_active: w.is_active,
+      last_delivery_at: w.last_delivery_at,
+      last_status: w.last_status,
+      created_at: w.created_at,
+    };
+  }
+}
--- a/backend-nest/src/modules/webstore/webstore.service.ts
+++ b/backend-nest/src/modules/webstore/webstore.service.ts
@@ -1,4 +1,4 @@
-import { Injectable, NotFoundException } from '@nestjs/common';
+import { Injectable, NotFoundException, ServiceUnavailableException } from '@nestjs/common';
 import { InjectRepository } from '@nestjs/typeorm';
 import { Repository } from 'typeorm';
 import { StoreConfig } from '../../entities/store-config.entity';
@@ -224,23 +224,13 @@ export class WebstoreService {
      throw new NotFoundException('Item not found');
    }

-    const transaction = this.transactionRepo.create({
-      license_id: license.id,
-      item_id: item.id,
-      steam_id: dto.steam_id,
-      player_name: dto.player_name,
-      paypal_order_id: `order_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
-      amount: parseFloat(item.price.toString()),
-      currency: 'USD', // Would get from config
-      status: 'pending',
-    });
-
-    await this.transactionRepo.save(transaction);
-
-    // Return mock PayPal approval URL
-    return {
-      order_id: transaction.paypal_order_id,
-      approval_url: `https://www.sandbox.paypal.com/checkoutnow?token=${transaction.paypal_order_id}`,
-    };
+    // Beta: real PayPal/Stripe processing is not wired yet. Refuse honestly
+    // instead of writing a pending transaction and handing the player a fake
+    // order token that resolves to nowhere. (item lookup above still validates
+    // the request so the storefront UI can show the catalogue.)
+    void item;
+    throw new ServiceUnavailableException(
+      'Storefront checkout is not available yet — payment processing is coming soon.',
+    );
  }
 }
--- a/backend-nest/src/modules/wipes/wipes.service.ts
+++ b/backend-nest/src/modules/wipes/wipes.service.ts
@@ -1,6 +1,12 @@
-import { Injectable, NotFoundException, Logger } from '@nestjs/common';
+import {
+  Injectable,
+  NotFoundException,
+  Logger,
+  OnModuleInit,
+  OnModuleDestroy,
+} from '@nestjs/common';
 import { InjectRepository } from '@nestjs/typeorm';
-import { Repository } from 'typeorm';
+import { IsNull, LessThanOrEqual, Repository } from 'typeorm';
 import { WipeProfile } from '../../entities/wipe-profile.entity';
 import { WipeSchedule } from '../../entities/wipe-schedule.entity';
 import { WipeHistory } from '../../entities/wipe-history.entity';
@@ -8,11 +14,14 @@ import { CreateProfileDto } from './dto/create-profile.dto';
 import { UpdateProfileDto } from './dto/update-profile.dto';
 import { CreateScheduleDto } from './dto/create-schedule.dto';
 import { TriggerWipeDto } from './dto/trigger-wipe.dto';
-import { NatsService } from '../../services/nats.service';
+import { InstancesService } from '../instances/instances.service';
+import { WebhooksService } from '../webhooks/webhooks.service';
+import { nextCronDate } from '../../common/cron.util';

@Injectable()
-export class WipesService {
+export class WipesService implements OnModuleInit, OnModuleDestroy {
  private readonly logger = new Logger(WipesService.name);
+  private wipeExecutorInterval: ReturnType<typeof setInterval> | null = null;

  constructor(
    @InjectRepository(WipeProfile)
@@ -21,9 +30,86 @@ export class WipesService {
    private readonly wipeScheduleRepo: Repository<WipeSchedule>,
    @InjectRepository(WipeHistory)
    private readonly wipeHistoryRepo: Repository<WipeHistory>,
-    private readonly natsService: NatsService,
+    private readonly instancesService: InstancesService,
+    private readonly webhooksService: WebhooksService,
  ) {}

+  // ---------------------------------------------------------------------------
+  // Scheduled-wipe executor — the auto-wiper. Mirrors SchedulesService: a 60s
+  // poll fires every active wipe schedule whose next_scheduled_run is due, then
+  // advances it from its cron expression. Without this, wipe_schedules rows
+  // never fire (the headline auto-wipe feature was inert).
+  // ---------------------------------------------------------------------------
+
+  onModuleInit(): void {
+    this.bootstrapWipeSchedules().catch((err) =>
+      this.logger.error('Failed to bootstrap wipe-schedule next runs', err),
+    );
+    this.wipeExecutorInterval = setInterval(() => {
+      this.executeDueWipes().catch((err) =>
+        this.logger.error('Wipe-schedule executor error', err),
+      );
+    }, 60_000);
+    this.logger.log('Wipe-schedule executor started (60s polling interval)');
+  }
+
+  onModuleDestroy(): void {
+    if (this.wipeExecutorInterval) {
+      clearInterval(this.wipeExecutorInterval);
+      this.wipeExecutorInterval = null;
+    }
+  }
+
+  /** On startup, stamp next_scheduled_run on active schedules that lack one. */
+  private async bootstrapWipeSchedules(): Promise<void> {
+    const schedules = await this.wipeScheduleRepo.find({
+      where: { is_active: true, next_scheduled_run: IsNull() },
+    });
+    for (const s of schedules) {
+      const next = nextCronDate(s.cron_expression, new Date());
+      if (next) {
+        s.next_scheduled_run = next;
+        await this.wipeScheduleRepo.save(s);
+      }
+    }
+    if (schedules.length > 0) {
+      this.logger.log(`Bootstrapped next run for ${schedules.length} wipe schedule(s)`);
+    }
+  }
+
+  /** Fire every active wipe schedule whose next_scheduled_run <= now. */
+  private async executeDueWipes(): Promise<void> {
+    const now = new Date();
+    const due = await this.wipeScheduleRepo.find({
+      where: { is_active: true, next_scheduled_run: LessThanOrEqual(now) },
+    });
+    if (due.length === 0) return;
+
+    this.logger.log(`Executing ${due.length} due wipe schedule(s)`);
+    for (const s of due) {
+      try {
+        await this.triggerWipe(
+          s.license_id,
+          {
+            wipe_type: s.wipe_type as TriggerWipeDto['wipe_type'],
+            wipe_profile_id: s.wipe_profile_id,
+          },
+          'scheduled',
+        );
+      } catch (err) {
+        this.logger.error(
+          `Scheduled wipe failed for schedule ${s.id} (${s.schedule_name})`,
+          (err as Error).stack,
+        );
+      } finally {
+        // Advance next_scheduled_run regardless, so a failing schedule doesn't
+        // re-fire every 60s.
+        s.next_scheduled_run = nextCronDate(s.cron_expression, now);
+        await this.wipeScheduleRepo.save(s);
+      }
+    }
+  }
+
  async getProfiles(licenseId: string): Promise<WipeProfile[]> {
    return this.wipeProfileRepo.find({
      where: { license_id: licenseId },
@@ -96,25 +182,56 @@ export class WipesService {
  async triggerWipe(
    licenseId: string,
    dto: TriggerWipeDto,
+    triggerType: 'manual' | 'scheduled' = 'manual',
  ): Promise<{ wipe_history_id: string }> {
    const history = this.wipeHistoryRepo.create({
      license_id: licenseId,
      wipe_type: dto.wipe_type,
      wipe_profile_id: dto.wipe_profile_id,
-      trigger_type: 'manual',
-      status: 'pending',
+      trigger_type: triggerType,
+      status: 'wiping',
+      started_at: new Date(),
    });

    const saved = await this.wipeHistoryRepo.save(history);
+    this.logger.log(
+      `Wipe ${triggerType} dispatched for license ${licenseId} — history ${saved.id}`,
+    );

-    await this.natsService.publish(`corrosion.${licenseId}.cmd.wipe`, {
-      wipe_history_id: saved.id,
-      wipe_type: dto.wipe_type,
-      wipe_profile_id: dto.wipe_profile_id ?? null,
-      trigger_type: 'manual',
-      timestamp: new Date().toISOString(),
-    });
-    this.logger.log(`Wipe triggered for license ${licenseId} — history id ${saved.id}`);
+    // Dispatch to the agent WITHOUT blocking the caller — a wipe is
+    // stop → delete → start and can take a minute+. We record the outcome on
+    // wipe_history from the agent's reply and fire the wipe_completed webhook
+    // when it lands. Previously the row was created 'pending' and never
+    // advanced, so history lied about every wipe.
+    void this.instancesService
+      .wipeForLicense(licenseId, dto.wipe_type, true)
+      .then((reply: unknown) => {
+        const r = (reply ?? {}) as { status?: string; message?: string; deleted_count?: number };
+        const ok = r.status === 'success';
+        saved.status = ok ? 'success' : 'failed';
+        saved.completed_at = new Date();
+        if (!ok) {
+          saved.error_message = r.message ?? 'agent reported wipe failure';
+        }
+        return this.wipeHistoryRepo.save(saved).then(() => {
+          this.logger.log(`Wipe ${saved.id} ${saved.status}`);
+          if (ok) {
+            void this.webhooksService.dispatch(licenseId, 'wipe_completed', {
+              wipe_history_id: saved.id,
+              wipe_type: dto.wipe_type,
+              trigger_type: triggerType,
+              deleted_count: r.deleted_count ?? null,
+            });
+          }
+        });
+      })
+      .catch((err: unknown) => {
+        saved.status = 'failed';
+        saved.completed_at = new Date();
+        saved.error_message = err instanceof Error ? err.message : 'wipe dispatch failed';
+        this.logger.warn(`Wipe ${saved.id} failed: ${saved.error_message}`);
+        void this.wipeHistoryRepo.save(saved);
+      });

    return { wipe_history_id: saved.id };
  }
--- a/backend-nest/src/services/host-agent-consumer.service.ts
+++ b/backend-nest/src/services/host-agent-consumer.service.ts
@@ -0,0 +1,313 @@
+import { Injectable, Logger, OnApplicationBootstrap } from '@nestjs/common';
+import { Interval } from '@nestjs/schedule';
+import { InjectRepository } from '@nestjs/typeorm';
+import { Repository } from 'typeorm';
+import { NatsService } from './nats.service';
+import { ServerConnection } from '../entities/server-connection.entity';
+import { License } from '../entities/license.entity';
+import { AgentHost, AgentHostDisk } from '../entities/agent-host.entity';
+import { GameInstance } from '../entities/game-instance.entity';
+import { WebhooksService } from '../modules/webhooks/webhooks.service';
+
+/**
+ * Consumes Corrosion wire protocol v2 host-agent subjects
+ * (corrosion-host-agent/PROTOCOL.md) and keeps the fleet model truthful.
+ *
+ * Writes the License → Host → Instance model (hosts + game_instances) from
+ * each heartbeat, AND maintains the legacy single-server `server_connections`
+ * row so the current panel keeps working during the fleet UI transition.
+ *
+ * Host identity: until enrollment issues a stable host id, a host is keyed by
+ * (license_id, hostname). One agent = one host today; the schema is already
+ * multi-host-ready.
+ */
+interface HeartbeatPayload {
+  schema?: number;
+  timestamp?: string;
+  agent?: { version?: string; commit?: string; os?: string; arch?: string };
+  host?: {
+    hostname?: string | null;
+    cpu_percent?: number;
+    cpu_cores?: number;
+    mem_total_mb?: number;
+    mem_used_mb?: number;
+    uptime_seconds?: number;
+    disks?: AgentHostDisk[];
+  };
+  instances?: Array<{
+    id: string;
+    game: string;
+    label?: string | null;
+    state?: string;
+    uptime_seconds?: number;
+  }>;
+}
+
+@Injectable()
+export class HostAgentConsumerService implements OnApplicationBootstrap {
+  private readonly logger = new Logger(HostAgentConsumerService.name);
+
+  private knownLicenses = new Map<string, number>();
+  private warnedUnknown = new Set<string>();
+
+  private static readonly UUID_RE =
+    /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
+  private static readonly LICENSE_CACHE_TTL_MS = 5 * 60_000;
+  private static readonly OFFLINE_AFTER_MS = 180_000;
+
+  constructor(
+    private readonly nats: NatsService,
+    @InjectRepository(ServerConnection)
+    private readonly connectionRepository: Repository<ServerConnection>,
+    @InjectRepository(License)
+    private readonly licenseRepository: Repository<License>,
+    @InjectRepository(AgentHost)
+    private readonly hostRepository: Repository<AgentHost>,
+    @InjectRepository(GameInstance)
+    private readonly instanceRepository: Repository<GameInstance>,
+    private readonly webhooksService: WebhooksService,
+  ) {}
+
+  // Bootstrap, not module-init: subscriptions registered before NatsService
+  // finished connecting silently no-op (see NatsBridgeService note).
+  onApplicationBootstrap() {
+    this.nats.subscribe('corrosion.*.host.heartbeat', (data, subject) => {
+      const licenseId = subject.split('.')[1];
+      void this.onHeartbeat(licenseId, data as HeartbeatPayload).catch((err) =>
+        this.logger.error(`heartbeat handling failed for ${licenseId}: ${err.message}`, err.stack),
+      );
+    });
+
+    this.nats.subscribe('corrosion.*.host.going_offline', (_data, subject) => {
+      const licenseId = subject.split('.')[1];
+      void this.onGoingOffline(licenseId).catch((err) =>
+        this.logger.error(`going_offline handling failed for ${licenseId}: ${err.message}`, err.stack),
+      );
+    });
+
+    this.logger.log('Host agent (protocol v2) consumer subscriptions initialized');
+  }
+
+  private async onHeartbeat(licenseId: string, payload: HeartbeatPayload): Promise<void> {
+    if (!(await this.isValidTenant(licenseId))) return;
+    // A well-formed v2 heartbeat always carries a host block. Reject malformed
+    // payloads so a stray/empty publish can't create a phantom host row.
+    if (!payload || typeof payload.host !== 'object' || payload.host === null) {
+      this.logger.warn(`ignoring malformed heartbeat for license ${licenseId} (no host block)`);
+      return;
+    }
+    const now = new Date();
+
+    await this.updateLegacyConnection(licenseId, now);
+    const host = await this.upsertHost(licenseId, payload, now);
+    await this.upsertInstances(licenseId, host, payload, now);
+  }
+
+  /** Legacy single-server row — keeps the current panel working. */
+  private async updateLegacyConnection(licenseId: string, now: Date): Promise<void> {
+    const existing = await this.connectionRepository.findOne({ where: { license_id: licenseId } });
+    if (existing) {
+      await this.connectionRepository.update(
+        { id: existing.id },
+        { companion_last_seen: now, connection_status: 'connected', updated_at: now },
+      );
+    } else {
+      await this.connectionRepository.save(
+        this.connectionRepository.create({
+          license_id: licenseId,
+          connection_type: 'bare_metal',
+          connection_status: 'connected',
+          companion_last_seen: now,
+        }),
+      );
+    }
+  }
+
+  /** Upsert the fleet host row, keyed by (license_id, hostname). */
+  private async upsertHost(licenseId: string, payload: HeartbeatPayload, now: Date): Promise<AgentHost> {
+    const hostname = payload.host?.hostname ?? '';
+    const fields = {
+      agent_version: payload.agent?.version ?? null,
+      agent_commit: payload.agent?.commit ?? null,
+      os: payload.agent?.os ?? null,
+      arch: payload.agent?.arch ?? null,
+      status: 'connected',
+      last_heartbeat_at: now,
+      cpu_percent: payload.host?.cpu_percent ?? null,
+      cpu_cores: payload.host?.cpu_cores ?? null,
+      mem_total_mb: payload.host?.mem_total_mb ?? null,
+      mem_used_mb: payload.host?.mem_used_mb ?? null,
+      uptime_seconds: payload.host?.uptime_seconds ?? null,
+      disks: payload.host?.disks ?? null,
+      updated_at: now,
+    };
+
+    const existing = await this.hostRepository.findOne({
+      where: { license_id: licenseId, hostname },
+    });
+    if (existing) {
+      await this.hostRepository.update({ id: existing.id }, fields);
+      return { ...existing, ...fields } as AgentHost;
+    }
+    const created = await this.hostRepository.save(
+      this.hostRepository.create({ license_id: licenseId, hostname, ...fields }),
+    );
+    this.logger.log(`host registered for license ${licenseId} (hostname '${hostname || 'unknown'}')`);
+    return created;
+  }
+
+  /** Upsert one game_instances row per heartbeat instance entry. */
+  private async upsertInstances(
+    licenseId: string,
+    host: AgentHost,
+    payload: HeartbeatPayload,
+    now: Date,
+  ): Promise<void> {
+    for (const inst of payload.instances ?? []) {
+      if (!inst?.id || !inst?.game) continue;
+      const fields = {
+        host_id: host.id,
+        game: inst.game,
+        label: inst.label ?? null,
+        state: inst.state ?? 'unknown',
+        uptime_seconds: inst.uptime_seconds ?? 0,
+        last_seen_at: now,
+        updated_at: now,
+      };
+      const existing = await this.instanceRepository.findOne({
+        where: { license_id: licenseId, agent_instance_id: inst.id },
+      });
+      if (existing) {
+        await this.instanceRepository.update({ id: existing.id }, fields);
+      } else {
+        await this.instanceRepository.save(
+          this.instanceRepository.create({
+            license_id: licenseId,
+            agent_instance_id: inst.id,
+            ...fields,
+          }),
+        );
+        this.logger.log(`instance '${inst.id}' (${inst.game}) registered for license ${licenseId}`);
+      }
+    }
+  }
+
+  private async onGoingOffline(licenseId: string): Promise<void> {
+    if (!(await this.isValidTenant(licenseId))) return;
+    const now = new Date();
+    await this.connectionRepository.update(
+      { license_id: licenseId },
+      { connection_status: 'offline', updated_at: now },
+    );
+
+    // Capture hostname(s) before flipping status so the webhook payload is useful.
+    const hosts = await this.hostRepository.find({ where: { license_id: licenseId } });
+
+    await this.hostRepository.update(
+      { license_id: licenseId },
+      { status: 'offline', updated_at: now },
+    );
+    this.logger.log(`host(s) for license ${licenseId} went offline (graceful beacon)`);
+
+    // Dispatch server_down event for each host that went offline. Fire-and-forget.
+    for (const host of hosts) {
+      void this.webhooksService
+        .dispatch(licenseId, 'server_down', {
+          host_id: host.id,
+          hostname: host.hostname ?? null,
+          reason: 'graceful_shutdown',
+        })
+        .catch(() => {
+          // dispatch() logs internally; swallow here to keep the handler clean.
+        });
+    }
+  }
+
+  /**
+   * Heartbeats stopping must flip the panel to offline — an agent that
+   * crashes or loses network never sends the goodbye beacon. Sweeps both the
+   * legacy connection and fleet hosts.
+   *
+   * Hosts that transition to offline here also fire the server_down webhook.
+   * We identify them BEFORE the bulk update so we can carry their identity
+   * into the webhook payload.
+   */
+  @Interval(60_000)
+  async sweepStaleConnections(): Promise<void> {
+    const threshold = new Date(Date.now() - HostAgentConsumerService.OFFLINE_AFTER_MS);
+
+    // Identify stale hosts BEFORE bulk-updating so we can dispatch webhooks
+    // with meaningful host_id / hostname data.
+    const staleHosts = await this.hostRepository
+      .createQueryBuilder('host')
+      .where('host.status = :connected', { connected: 'connected' })
+      .andWhere('host.last_heartbeat_at IS NOT NULL')
+      .andWhere('host.last_heartbeat_at < :threshold', { threshold })
+      .getMany();
+
+    const conn = await this.connectionRepository
+      .createQueryBuilder()
+      .update(ServerConnection)
+      .set({ connection_status: 'offline', updated_at: () => 'NOW()' })
+      .where('connection_status = :connected', { connected: 'connected' })
+      .andWhere('companion_last_seen IS NOT NULL')
+      .andWhere('companion_last_seen < :threshold', { threshold })
+      .execute();
+
+    const hosts = await this.hostRepository
+      .createQueryBuilder()
+      .update(AgentHost)
+      .set({ status: 'offline', updated_at: () => 'NOW()' })
+      .where('status = :connected', { connected: 'connected' })
+      .andWhere('last_heartbeat_at IS NOT NULL')
+      .andWhere('last_heartbeat_at < :threshold', { threshold })
+      .execute();
+
+    const affected = (conn.affected ?? 0) + (hosts.affected ?? 0);
+    if (affected) {
+      this.logger.warn(`marked ${affected} stale connection/host record(s) offline`);
+    }
+
+    // Dispatch server_down webhook for each host that just timed out.
+    // Fire-and-forget — webhook failures must never break the sweep.
+    for (const host of staleHosts) {
+      void this.webhooksService
+        .dispatch(host.license_id, 'server_down', {
+          host_id: host.id,
+          hostname: host.hostname ?? null,
+          reason: 'heartbeat_timeout',
+        })
+        .catch(() => {
+          // dispatch() logs internally; swallow here to keep the sweep clean.
+        });
+    }
+  }
+
+  /**
+   * Tenant validation: the subject segment must be a real license UUID.
+   * NATS consumers must never write rows for subjects an arbitrary publisher
+   * invented. Existence is cached to avoid a query per heartbeat.
+   */
+  private async isValidTenant(licenseId: string): Promise<boolean> {
+    if (!HostAgentConsumerService.UUID_RE.test(licenseId)) {
+      this.warnUnknownOnce(licenseId, 'not a UUID');
+      return false;
+    }
+    const cachedUntil = this.knownLicenses.get(licenseId);
+    if (cachedUntil && cachedUntil > Date.now()) return true;
+
+    const exists = await this.licenseRepository.exist({ where: { id: licenseId } });
+    if (!exists) {
+      this.warnUnknownOnce(licenseId, 'no such license');
+      return false;
+    }
+    this.knownLicenses.set(licenseId, Date.now() + HostAgentConsumerService.LICENSE_CACHE_TTL_MS);
+    return true;
+  }
+
+  private warnUnknownOnce(licenseId: string, reason: string): void {
+    if (this.warnedUnknown.has(licenseId)) return;
+    this.warnedUnknown.add(licenseId);
+    this.logger.warn(`ignoring host-agent traffic for invalid license '${licenseId}' (${reason})`);
+  }
+}
--- a/backend-nest/src/services/index.ts
+++ b/backend-nest/src/services/index.ts
@@ -1,3 +1,4 @@
 export { NatsService } from './nats.service';
 export { NatsBridgeService } from './nats-bridge.service';
+export { HostAgentConsumerService } from './host-agent-consumer.service';
 export { SteamService } from './steam.service';
--- a/backend-nest/src/services/nats-bridge.service.ts
+++ b/backend-nest/src/services/nats-bridge.service.ts
@@ -1,14 +1,19 @@
-import { Injectable, OnModuleInit, Logger } from '@nestjs/common';
+import { Injectable, OnApplicationBootstrap, Logger } from '@nestjs/common';
 import { NatsService } from './nats.service';

@Injectable()
-export class NatsBridgeService implements OnModuleInit {
+export class NatsBridgeService implements OnApplicationBootstrap {
  private readonly logger = new Logger(NatsBridgeService.name);
  private listeners: Map<string, Set<(event: string, data: unknown) => void>> = new Map();

  constructor(private nats: NatsService) {}

-  onModuleInit() {
+  // Subscriptions MUST happen in onApplicationBootstrap, not onModuleInit:
+  // provider onModuleInit order is not guaranteed, and these hooks once ran
+  // before NatsService connected — every subscribe() silently no-oped and the
+  // WS bridge was dead from boot. Bootstrap runs after ALL module inits
+  // (including the awaited NATS connect) complete.
+  onApplicationBootstrap() {
    this.nats.subscribe('corrosion.*.companion.heartbeat', (data, subject) => {
      const licenseId = subject.split('.')[1];
      this.emit(licenseId, 'heartbeat', data);
@@ -44,6 +49,17 @@ export class NatsBridgeService implements OnModuleInit {
      this.emit(licenseId, 'oxide_status', data);
    });

+    // Wire protocol v2 (corrosion-host-agent) — host-level telemetry
+    this.nats.subscribe('corrosion.*.host.heartbeat', (data, subject) => {
+      const licenseId = subject.split('.')[1];
+      this.emit(licenseId, 'host_heartbeat', data);
+    });
+
+    this.nats.subscribe('corrosion.*.host.going_offline', (data, subject) => {
+      const licenseId = subject.split('.')[1];
+      this.emit(licenseId, 'host_going_offline', data);
+    });
+
    this.logger.log('NATS bridge subscriptions initialized');
  }

--- a/backend-nest/src/services/nats.service.ts
+++ b/backend-nest/src/services/nats.service.ts
@@ -1,6 +1,14 @@
 import { Injectable, OnModuleInit, OnModuleDestroy, Logger } from '@nestjs/common';
 import { ConfigService } from '@nestjs/config';
 import { connect, NatsConnection, StringCodec, Subscription } from 'nats';
+import { createHmac, randomUUID } from 'crypto';
+
+export interface AgentCredentials {
+  license_id: string;
+  nats_user: string;
+  nats_password: string;
+  nats_url: string;
+}

@Injectable()
 export class NatsService implements OnModuleInit, OnModuleDestroy {
@@ -13,8 +21,13 @@ export class NatsService implements OnModuleInit, OnModuleDestroy {
  async onModuleInit() {
    try {
      const url = this.config.get<string>('nats.url') || 'nats://localhost:4222';
-      this.nc = await connect({ servers: url });
-      this.logger.log(`Connected to NATS at ${url}`);
+      const user = this.config.get<string>('nats.internalUser');
+      const pass = this.config.get<string>('nats.internalPassword');
+      // Authenticate with the privileged internal user when configured;
+      // otherwise connect anonymously (broker hasn't enforced auth yet).
+      const opts = user && pass ? { servers: url, user, pass } : { servers: url };
+      this.nc = await connect(opts);
+      this.logger.log(`Connected to NATS at ${url}${user ? ` as ${user}` : ' (anonymous)'}`);
    } catch (err) {
      this.logger.warn(`NATS connection failed — running in offline mode: ${(err as Error).message}`);
    }
@@ -62,6 +75,64 @@ export class NatsService implements OnModuleInit, OnModuleDestroy {
    return sub;
  }

+  /**
+   * Request-reply to a host-agent subject with a LICENSE-SCOPED reply subject.
+   *
+   * Per-license agent users are confined to corrosion.{license}.> and have no
+   * _INBOX permission, so the agent cannot publish a reply to the default
+   * global inbox. The reply must live inside the license namespace
+   * (corrosion.{license}.reply.<id>); the privileged backend subscribes there.
+   * See corrosion-host-agent/PROTOCOL.md ("Reply-subject rule").
+   */
+  async requestScoped<T = unknown>(
+    licenseId: string,
+    subject: string,
+    payload: Record<string, unknown>,
+    timeoutMs = 8000,
+  ): Promise<T> {
+    if (!this.nc) {
+      throw new Error('NATS unavailable — agent is not reachable');
+    }
+    const replySubject = `corrosion.${licenseId}.reply.${randomUUID()}`;
+    const nc = this.nc;
+    return new Promise<T>((resolve, reject) => {
+      nc.subscribe(replySubject, {
+        max: 1,
+        timeout: timeoutMs,
+        callback: (err, msg) => {
+          if (err) {
+            reject(new Error(`agent did not respond within ${timeoutMs}ms`));
+            return;
+          }
+          try {
+            resolve(JSON.parse(this.sc.decode(msg.data)) as T);
+          } catch {
+            resolve(this.sc.decode(msg.data) as unknown as T);
+          }
+        },
+      });
+      nc.publish(subject, this.sc.encode(JSON.stringify(payload)), { reply: replySubject });
+    });
+  }
+
+  /**
+   * Derive a license's agent NATS credentials. Password is
+   * HMAC-SHA256(license_id, NATS_TOKEN_SECRET) — must match the broker config
+   * generated by scripts/generate-nats-auth.mjs. Returns null if the secret
+   * isn't configured (broker not yet enforcing auth).
+   */
+  getAgentCredentials(licenseId: string): AgentCredentials | null {
+    const secret = this.config.get<string>('nats.tokenSecret');
+    if (!secret) return null;
+    const password = createHmac('sha256', secret).update(licenseId).digest('hex');
+    return {
+      license_id: licenseId,
+      nats_user: licenseId,
+      nats_password: password,
+      nats_url: this.config.get<string>('nats.publicUrl') || 'nats://nats.corrosionmgmt.com:4222',
+    };
+  }
+
  /** Publish a command to a specific license's server */
  async sendServerCommand(licenseId: string, action: string, payload: Record<string, unknown> = {}): Promise<void> {
    await this.publish(`corrosion.${licenseId}.cmd.server`, {
--- a/backend/migrations/022_fleet_model.sql
+++ b/backend/migrations/022_fleet_model.sql
@@ -0,0 +1,102 @@
+-- Fleet data model — License → Host → Instance (with optional Cluster)
+--
+-- ADDITIVE: existing server_connections / server_config / server_stats are
+-- left untouched so the current single-server panel keeps working. The
+-- host-agent consumer writes BOTH the legacy connection row and these fleet
+-- tables during the transition; the panel migrates to the fleet tables in a
+-- later phase.
+--
+-- Shape mirrors the host agent's wire protocol v2 heartbeat:
+--   host{} block          → agent_hosts
+--   instances[] entries   → game_instances
+-- Host metrics (CPU/RAM/disk) live on the HOST, not duplicated per instance.
+--
+-- Named `agent_hosts` (not `hosts`) to avoid collision with the existing B2B
+-- `hosts` table (hosting-partner companies) — different concept entirely.
+
+-----------------------------------------------------------
+-- AGENT_HOSTS — one Corrosion host agent / one machine
+-----------------------------------------------------------
+CREATE TABLE IF NOT EXISTS agent_hosts (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
+    -- Natural key until enrollment issues a stable host identity.
+    hostname VARCHAR(255) NOT NULL DEFAULT '',
+    agent_version VARCHAR(64),
+    agent_commit VARCHAR(64),
+    os VARCHAR(32),
+    arch VARCHAR(32),
+    status VARCHAR(20) NOT NULL DEFAULT 'offline'
+        CHECK (status IN ('connected', 'degraded', 'offline')),
+    last_heartbeat_at TIMESTAMPTZ,
+    cpu_percent DOUBLE PRECISION,
+    cpu_cores INTEGER,
+    mem_total_mb BIGINT,
+    mem_used_mb BIGINT,
+    uptime_seconds BIGINT,
+    disks JSONB,  -- [{ "mount": "/", "total_mb": n, "free_mb": n }]
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    UNIQUE (license_id, hostname)
+);
+CREATE INDEX IF NOT EXISTS idx_agent_hosts_license ON agent_hosts(license_id);
+
+-----------------------------------------------------------
+-- INSTANCE CLUSTERS — optional grouping (Soulmask main/child, Dune battlegroup)
+-- Reserved now; cluster logic ships with those game adapters.
+-----------------------------------------------------------
+CREATE TABLE IF NOT EXISTS instance_clusters (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
+    game VARCHAR(32) NOT NULL,
+    name VARCHAR(255) NOT NULL,
+    topology VARCHAR(32),  -- main_client | battlegroup
+    config JSONB,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+CREATE INDEX IF NOT EXISTS idx_clusters_license ON instance_clusters(license_id);
+
+-----------------------------------------------------------
+-- GAME INSTANCES — one game server process / orchestrated unit.
+-- The billing unit (plans count instances).
+-----------------------------------------------------------
+CREATE TABLE IF NOT EXISTS game_instances (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
+    host_id UUID REFERENCES agent_hosts(id) ON DELETE SET NULL,
+    cluster_id UUID REFERENCES instance_clusters(id) ON DELETE SET NULL,
+    -- The agent's instance slug; the NATS subject segment.
+    agent_instance_id VARCHAR(64) NOT NULL,
+    game VARCHAR(32) NOT NULL,
+    label VARCHAR(255),
+    -- running | stopped | starting | stopping | crashed
+    -- | configured | missing_root | unmanaged | unknown
+    state VARCHAR(32) NOT NULL DEFAULT 'unknown',
+    root_path TEXT,
+    uptime_seconds BIGINT NOT NULL DEFAULT 0,
+    last_seen_at TIMESTAMPTZ,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    UNIQUE (license_id, agent_instance_id)
+);
+CREATE INDEX IF NOT EXISTS idx_instances_license ON game_instances(license_id);
+CREATE INDEX IF NOT EXISTS idx_instances_host ON game_instances(host_id);
+
+-----------------------------------------------------------
+-- INSTANCE STATS — per-instance time series (game metrics).
+-- Populated once game-level telemetry (player count/FPS via RCON/plugin) is
+-- collected; the host heartbeat carries host metrics, not game metrics.
+-----------------------------------------------------------
+CREATE TABLE IF NOT EXISTS instance_stats (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    instance_id UUID NOT NULL REFERENCES game_instances(id) ON DELETE CASCADE,
+    license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
+    player_count INTEGER NOT NULL DEFAULT 0,
+    max_players INTEGER NOT NULL DEFAULT 0,
+    fps DOUBLE PRECISION NOT NULL DEFAULT 0,
+    memory_usage_mb INTEGER NOT NULL DEFAULT 0,
+    recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+CREATE INDEX IF NOT EXISTS idx_instance_stats_instance
+    ON instance_stats(instance_id, recorded_at DESC);
--- a/backend/migrations/023_api_keys.sql
+++ b/backend/migrations/023_api_keys.sql
@@ -0,0 +1,17 @@
+-- Per-license API key management
+-- Each row represents one issued key: the plaintext is shown once at creation
+-- and never stored; only the SHA-256 hex digest is persisted.
+
+CREATE TABLE IF NOT EXISTS api_keys (
+    id          UUID         PRIMARY KEY DEFAULT uuid_generate_v4(),
+    license_id  UUID         NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
+    name        VARCHAR(100) NOT NULL,
+    key_prefix  VARCHAR(16)  NOT NULL,
+    key_hash    VARCHAR(128) NOT NULL,
+    last_used_at TIMESTAMPTZ NULL,
+    is_active   BOOLEAN      NOT NULL DEFAULT TRUE,
+    created_at  TIMESTAMPTZ  NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_api_keys_license   ON api_keys(license_id);
+CREATE INDEX IF NOT EXISTS idx_api_keys_key_hash  ON api_keys(key_hash);
--- a/backend/migrations/024_webhooks.sql
+++ b/backend/migrations/024_webhooks.sql
@@ -0,0 +1,26 @@
+-- 024_webhooks.sql
+-- Per-license outbound webhook registry.
+-- Operators register URLs + event subscriptions; the backend POSTs signed
+-- JSON payloads on matching events (player_banned, server_down, …).
+
+CREATE TABLE webhooks (
+    id               uuid         NOT NULL DEFAULT uuid_generate_v4(),
+    license_id       uuid         NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
+    name             varchar(100) NOT NULL,
+    url              text         NOT NULL,
+    -- Comma-separated event keys, e.g. 'player_banned,server_down'
+    -- TypeORM simple-array maps this transparently to string[].
+    events           text         NOT NULL,
+    -- HMAC-SHA256 signing secret; generated server-side if omitted on create.
+    secret           varchar(128) NOT NULL,
+    is_active        boolean      NOT NULL DEFAULT true,
+    -- Populated after each delivery attempt.
+    last_delivery_at timestamptz  NULL,
+    -- 'ok' | 'failed' — last HTTP delivery outcome.
+    last_status      varchar(20)  NULL,
+    created_at       timestamptz  NOT NULL DEFAULT now(),
+
+    CONSTRAINT webhooks_pkey PRIMARY KEY (id)
+);
+
+CREATE INDEX idx_webhooks_license_id ON webhooks (license_id);
--- a/backend/migrations/025_owner_full_access.sql
+++ b/backend/migrations/025_owner_full_access.sql
@@ -0,0 +1,15 @@
+-- 025_owner_full_access.sql
+--
+-- The system-default Owner role enumerated per-resource wildcards
+-- (server.*, wipe.*, players.*, ...). Every feature added since drift past that
+-- enumeration: apikeys, webhooks, alerts, analytics, chat, schedules,
+-- notifications, map, users, and ALL plugin-config modules (plus a singular
+-- 'plugin.*' vs granted 'plugins.*' mismatch) were silently locked out for any
+-- non-super-admin Owner — PermissionsGuard denies a permission the role doesn't
+-- grant. The Owner has "full control of their license" by definition, so grant
+-- a global wildcard instead of an enumeration that must be amended per feature.
+--
+-- PermissionsGuard and the frontend auth store both honor "*" as allow-all.
+UPDATE roles
+SET permissions = '{"*": true}'::jsonb
+WHERE role_name = 'Owner' AND is_system_default = true;
--- a/contract-tests/agent-backend.contract.mjs
+++ b/contract-tests/agent-backend.contract.mjs
@@ -0,0 +1,152 @@
+// Full-pipeline contract test: Rust host agent → NATS → NestJS consumer → Postgres.
+//
+// Proves the wire protocol v2 chain end to end against a REAL backend and DB:
+//   1. agent heartbeat arrives with schema 2 + measured telemetry
+//   2. backend auto-registers the server_connections row and marks it connected
+//   3. instance command channel round-trips (start/status/stop) with push events
+//   4. graceful agent shutdown publishes the offline beacon and the row flips offline
+//
+// Required env:
+//   LICENSE_ID    — existing license uuid (CI: from the admin seed)
+//   DATABASE_URL  — postgres connection string for assertions
+//   NATS_URL      — broker both agent and backend use (default nats://localhost:4222)
+//   AGENT_BIN     — path to the corrosion-host-agent binary
+//
+// Uses the backend's own node_modules (nats, pg) so the client libs under test
+// are exactly what production runs.
+
+import { createRequire } from 'node:module';
+import { spawn } from 'node:child_process';
+import { writeFileSync, mkdtempSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const repoRoot = join(dirname(fileURLToPath(import.meta.url)), '..');
+const require = createRequire(join(repoRoot, 'backend-nest', 'node_modules', 'x.js'));
+const { connect, StringCodec } = require('nats');
+const { Client: PgClient } = require('pg');
+
+const LICENSE = process.env.LICENSE_ID;
+const NATS_URL = process.env.NATS_URL ?? 'nats://localhost:4222';
+const DATABASE_URL = process.env.DATABASE_URL;
+const AGENT_BIN = process.env.AGENT_BIN ?? join(repoRoot, 'corrosion-host-agent', 'target', 'debug', 'corrosion-host-agent');
+
+if (!LICENSE || !DATABASE_URL) {
+  console.error('LICENSE_ID and DATABASE_URL are required');
+  process.exit(2);
+}
+
+const sc = StringCodec();
+const errs = [];
+const check = (cond, msg) => { if (!cond) errs.push(msg); };
+const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
+
+async function pollDb(pg, predicate, label, timeoutMs = 30_000) {
+  const deadline = Date.now() + timeoutMs;
+  for (;;) {
+    const { rows } = await pg.query(
+      'SELECT connection_type, connection_status, companion_last_seen FROM server_connections WHERE license_id = $1',
+      [LICENSE],
+    );
+    if (predicate(rows)) return rows;
+    if (Date.now() > deadline) {
+      errs.push(`${label}: timeout after ${timeoutMs}ms — rows: ${JSON.stringify(rows)}`);
+      return rows;
+    }
+    await sleep(1000);
+  }
+}
+
+const main = async () => {
+  const pg = new PgClient({ connectionString: DATABASE_URL });
+  await pg.connect();
+  const nc = await connect({ servers: NATS_URL });
+
+  const heartbeats = [];
+  const statusEvents = [];
+  (async () => { for await (const m of nc.subscribe(`corrosion.${LICENSE}.host.heartbeat`)) heartbeats.push(JSON.parse(sc.decode(m.data))); })();
+  (async () => { for await (const m of nc.subscribe(`corrosion.${LICENSE}.ci-instance.status`)) statusEvents.push(JSON.parse(sc.decode(m.data))); })();
+
+  // --- spawn the real agent ---
+  const dir = mkdtempSync(join(tmpdir(), 'cha-contract-'));
+  const cfgPath = join(dir, 'agent.toml');
+  writeFileSync(cfgPath, `
+[agent]
+license_id = "${LICENSE}"
+nats_url = "${NATS_URL}"
+heartbeat_seconds = 10
+log_level = "info"
+
+[[instance]]
+id = "ci-instance"
+game = "rust"
+root = "/tmp"
+label = "Contract CI"
+executable = "/bin/sleep"
+args = ["300"]
+`);
+  const agent = spawn(AGENT_BIN, ['--config', cfgPath], { stdio: ['ignore', 'inherit', 'inherit'] });
+  const agentExited = new Promise((r) => agent.on('exit', r));
+
+  // --- 1. heartbeat shape + real telemetry ---
+  const hbDeadline = Date.now() + 20_000;
+  while (heartbeats.length === 0 && Date.now() < hbDeadline) await sleep(500);
+  check(heartbeats.length > 0, 'no heartbeat within 20s');
+  if (heartbeats.length) {
+    const hb = heartbeats[0];
+    check(hb.schema === 2, `schema != 2: ${hb.schema}`);
+    check(typeof hb.host?.cpu_percent === 'number', 'missing host.cpu_percent');
+    check(hb.host?.mem_total_mb > 0, 'mem_total_mb not measured');
+    check(Array.isArray(hb.host?.disks) && hb.host.disks.length > 0, 'no disks reported');
+    check(hb.instances?.[0]?.id === 'ci-instance', 'instance missing from heartbeat');
+    check(!!hb.agent?.version && !!hb.agent?.commit, 'agent version/commit missing');
+  }
+
+  // --- 2. backend auto-registers + connects ---
+  const rows = await pollDb(pg, (r) => r.length === 1 && r[0].connection_status === 'connected', 'auto-register connected');
+  if (rows.length === 1) {
+    check(rows[0].connection_type === 'bare_metal', `connection_type: ${rows[0].connection_type}`);
+    check(rows[0].companion_last_seen !== null, 'companion_last_seen not set');
+  }
+
+  // --- 3. instance command channel ---
+  const cmd = async (payload) =>
+    JSON.parse(sc.decode((await nc.request(`corrosion.${LICENSE}.ci-instance.cmd`, sc.encode(JSON.stringify(payload)), { timeout: 8000 })).data));
+
+  const st0 = await cmd({ func: 'status' });
+  check(st0.state?.state === 'stopped', `initial state: ${JSON.stringify(st0.state)}`);
+  const start = await cmd({ func: 'start' });
+  check(start.status === 'success', `start: ${JSON.stringify(start)}`);
+  await sleep(1000);
+  const st1 = await cmd({ func: 'status' });
+  check(st1.state?.state === 'running', `post-start state: ${JSON.stringify(st1.state)}`);
+  check((await cmd({ func: 'start' })).status === 'error', 'double start must error');
+  check((await cmd({ func: 'bogus' })).status === 'error', 'unknown func must error');
+  const stop = await cmd({ func: 'stop' });
+  check(stop.status === 'success', `stop: ${JSON.stringify(stop)}`);
+  await sleep(1000);
+  const seq = statusEvents.map((e) => e.event?.state);
+  check(seq.includes('running') && seq.includes('stopped'), `status events incomplete: ${seq.join(',')}`);
+
+  // --- 4. graceful shutdown → offline beacon → DB flips offline ---
+  agent.kill('SIGTERM');
+  await Promise.race([agentExited, sleep(8000)]);
+  await pollDb(pg, (r) => r.length === 1 && r[0].connection_status === 'offline', 'beacon offline', 20_000);
+
+  await nc.close();
+  await pg.end();
+
+  if (errs.length) {
+    console.error('\nCONTRACT FAIL:');
+    errs.forEach((e) => console.error(' -', e));
+    process.exit(1);
+  }
+  console.log('\nCONTRACT PASS: heartbeat shape, auto-register, connected/offline lifecycle, instance command channel, push events');
+  process.exit(0);
+};
+
+main().catch((e) => {
+  console.error('contract test crashed:', e);
+  process.exit(1);
+});
--- a/corrosion-host-agent/Cargo.lock
+++ b/corrosion-host-agent/Cargo.lock
--- a/corrosion-host-agent/Cargo.toml
+++ b/corrosion-host-agent/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "corrosion-host-agent"
-version = "2.0.0-alpha.2"
+version = "2.0.0-alpha.11"
 edition = "2021"
 description = "Corrosion Host Agent — multi-game ops runtime for self-hosted game servers"
 license = "UNLICENSED"
@@ -23,8 +23,18 @@ chrono = { version = "0.4", features = ["serde", "clock"] }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
 anyhow = "1"
+async-trait = "0.1"
 clap = { version = "4.5", features = ["derive"] }
 rand = "0.8"
+tokio-tungstenite = "0.24"
+minisign-verify = "0.2.5"
+reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream"] }
+
+[target.'cfg(unix)'.dependencies]
+libc = "0.2"
+
+[dev-dependencies]
+tempfile = "3"

 # Size-optimized release: single static binary living next to RAM-heavy game
 # servers. Panic stays 'unwind' so a panicking task surfaces through its
--- a/corrosion-host-agent/PROTOCOL.md
+++ b/corrosion-host-agent/PROTOCOL.md
@@ -1,8 +1,9 @@
 # Corrosion Wire Protocol v2

-Status: **Phase 0 implemented** (host heartbeat, host commands, going-offline
-beacon). Per-instance command/status subjects are reserved and specified here
-for Phase 1.
+Status: **Phase 0 + Phase 1 process control implemented** (host heartbeat,
+host commands, going-offline beacon, per-instance start/stop/restart/status
+with push state events). RCON, SteamCMD, file ops, and game adapters are
+specified but not yet implemented.

 ## Design

@@ -70,9 +71,10 @@ All telemetry is measured, never fabricated. Fields the agent cannot measure
 are omitted (`probe` before the first probe completes, `hostname` if
 unavailable).

-Phase 0 instance `state` values: `configured` (root path exists),
-`missing_root`. Phase 1 adds live process states: `running`, `stopped`,
-`crashed`, `starting`, `updating`.
+Instance `state` values — process-managed (an `executable` is configured):
+`running`, `stopped`, `starting`, `stopping`, `crashed`; unmanaged
+(telemetry-only): `configured` (root exists), `missing_root`. Each instance
+also reports `uptime_seconds` (0 unless running).

 ### `corrosion.{license_id}.host.cmd` (backend → agent, request-reply)

@@ -83,6 +85,7 @@ Request: `{ "func": "<name>" }`. Reply: `{ "status": "success" | "error", ... }`
 | `ping`    | `version`, `commit`, `uptime_seconds`                     |
 | `probe`   | `report` — fresh ProbeReport (also cached for heartbeat)  |
 | `sysinfo` | `snapshot` — full heartbeat payload, collected on demand  |
+| `update`  | `{ "func": "update", "url": "https://cdn.corrosionmgmt.com/host-agent/.../corrosion-host-agent-<plat>" }` → downloads the binary + `<url>.minisig`, verifies the minisign signature against the agent's EMBEDDED public key, atomically swaps (with `.old` rollback), replies `{ status: success, message: "...relaunching" }`, then relaunches the new binary. Rejects anything not signed by the release key and any URL that isn't `https://cdn.corrosionmgmt.com`. |

 Unknown funcs return `status: "error"` with a message listing supported funcs.

@@ -92,19 +95,71 @@ Best-effort beacon (500ms budget) on graceful shutdown so the panel can flip
 the host to offline immediately instead of waiting out heartbeat staleness.
 Payload: `{}`.

-## Instance-level subjects (Phase 1 — reserved, not yet implemented)
+## Instance-level subjects

-### `corrosion.{license_id}.{instance_id}.cmd` (backend → agent, request-reply)
+### `corrosion.{license_id}.{instance_id}.cmd` (backend → agent, request-reply) — LIVE

-Lifecycle and control for one game instance. Planned funcs: `start`, `stop`,
-`restart`, `status`, `rcon` (process-class games), `steam_update`,
-`oxide_install` (rust), plus game-adapter-specific commands (Dune: docker
-lifecycle, RabbitMQ bus commands, Coriolis reset).
+Lifecycle and control for one game instance.

-### `corrosion.{license_id}.{instance_id}.status` (agent → backend, publish)
+The same `start`/`stop`/`restart`/`status` funcs work for **every** game: the
+agent picks a `Supervisor` impl per game — a spawned-process supervisor for
+Rust/Conan/Soulmask, a **docker-compose supervisor for Dune** (`docker compose
+up -d` / `stop` / `restart` against the instance's compose project, configured
+via `[instance.docker_compose]`). The wire contract is identical; only the
+management model behind it differs.

-State-change events (started/stopped/crashed) so the panel does not wait for
-the next heartbeat.
+Implemented funcs: `start`, `stop` (graceful with 30s budget, then force
+kill — process supervisor; Dune maps stop to `docker compose stop`), `restart`,
+`status` (returns `state` + `uptime_seconds`), and
+`rcon` — `{ "func": "rcon", "command": "<console command>" }` returns
+`{ "status": "success", "output": <server response> }`. Protocol per game:
+WebRCON (WebSocket JSON) for rust, Source RCON (Valve TCP) for
+conan/soulmask; explicit `kind` override available in the instance's
+`[instance.rcon]` config. Always targets 127.0.0.1 (agent is co-located).
+Errors reply `{ "status": "error", "message": ... }` — including start on an
+unmanaged instance, double start, missing rcon config, and unknown funcs.
+
+Also implemented: `steam_update` — `{ "func": "steam_update" }` runs
+SteamCMD for the instance's game (app ids: rust 258550, conan 443030,
+soulmask 3017310/3017300; dune rejects — Docker images, no SteamCMD),
+streaming progress lines to `corrosion.{license}.{instance}.steam_status`
+and replying on completion.
+
+Planned funcs: `oxide_install` (rust), plus game-adapter-specific
+commands (Dune: RabbitMQ admin-bus commands, Coriolis reset, Postgres admin
+surface). Dune **lifecycle** is already covered by the shared
+start/stop/restart funcs above; container crash-detection and state adoption on
+agent restart land with Phase 3b.
+
+### `corrosion.{license_id}.{instance_id}.steam_status` (agent → backend, publish) — LIVE
+
+Per-line SteamCMD stdout during a `steam_update`, so the panel can show
+live update progress. Payload: `{ "timestamp", "instance_id", "line" }`.
+
+### `corrosion.{license_id}.{instance_id}.files.cmd` (backend → agent, request-reply) — LIVE
+
+Jailed file manager, confined to the instance `root` (two-stage check:
+lexical normalize + canonicalize, defeating `../` traversal and symlink
+escape). Request `{ "op": "list|read|write|delete|rename|mkdir|mkfile|move|copy",
+"path": "rel/path", "dest"?, "content"?, "name"? }`; reply
+`{ "status": "success", "data": ... }` or `{ "status": "error", "message": ... }`.
+`read` caps at 5 MiB. Replaces the Go agent's UNJAILED legacy files API,
+which is retired and will not be ported.
+
+### `corrosion.{license_id}.{instance_id}.status` (agent → backend, publish) — LIVE
+
+State-change events so the panel does not wait for the next heartbeat.
+Payload: `{ "timestamp", "instance_id", "event": { "state": ..., "exit_code"? } }`.
+
+Semantics: **keep-latest state sync**, not a lossless transition ledger —
+near-instant transient states (e.g. `starting` when spawn succeeds
+immediately) may coalesce into the following state. Consumers should treat
+each event as "current state is now X".
+
+Known Phase 1 limitation: the supervisor does not yet persist/adopt PIDs — if
+the agent itself restarts while a game server is running, the game process
+survives but reports `stopped` until restarted through the panel. PID
+adoption is queued with the service-install work.

 ### `corrosion.{license_id}.{instance_id}.console` (agent → backend, publish)

@@ -136,6 +191,23 @@ service that attempts connections to the customer's public IP/ports on
 request; that is specified as a Phase 1+ feature and will reuse this report
 format with `direction: "inbound"`.

+## Authentication & tenant isolation
+
+The broker enforces per-license auth: an agent connects with `user = license_id`,
+`password = HMAC-SHA256(license_id, NATS_TOKEN_SECRET)` (shown on the panel
+Server page), and is scoped to `corrosion.{license_id}.>` only. The backend uses
+a privileged internal user. This makes cross-tenant access impossible at the
+broker, not just by convention.
+
+**Reply-subject rule:** per-license users have NO `_INBOX` permission (granting
+it would let one license read another's request-reply traffic). Therefore any
+backend→agent request-reply MUST use a reply subject inside the license
+namespace — e.g. `corrosion.{license_id}.reply.<id>` — never the client's
+default global `_INBOX`. The agent is unaffected: it responds to whatever
+`msg.reply` it receives. The constraint is on the requester (the internal user
+has full access). The contract/CI tests run against an unauthenticated broker
+and use the default inbox; production request-reply must follow this rule.
+
 ## Versioning

 - The agent embeds semver + git hash + build timestamp (`--version`,
--- a/corrosion-host-agent/README.md
+++ b/corrosion-host-agent/README.md
@@ -15,9 +15,16 @@ instance on that host — Rust, Conan Exiles, Soulmask, Dune: Awakening.
 - [x] Connectivity prober (outbound TCP, periodic + on-demand)
 - [x] Host command channel (`ping`, `probe`, `sysinfo`)
 - [x] Graceful shutdown (cancellation token, going-offline beacon, NATS flush)
- [ ] Phase 1: process-class game adapter (spawn/RCON/SteamCMD/files) — Rust, Conan, Soulmask
- [ ] Phase 2: Dune Docker adapter (compose lifecycle, RabbitMQ bus, Postgres admin)
- [ ] Phase 3: signed self-update (enforced ed25519 — release gate), service install, supervisor split
+- [x] Phase 1a: process supervision — per-instance start/stop/restart/status over
+      `{instance}.cmd` request-reply, push state events on `{instance}.status`,
+      crash detection with exit codes, live state in heartbeats
+      (integration-tested with real processes + live-NATS contract test)
+- [ ] Phase 1b: RCON trait (WebRCON rust / TCP conan+soulmask), SteamCMD, jailed file manager
+- [~] Phase 2: Dune Docker adapter — **compose lifecycle done** (`docker compose up -d/stop/restart`
+      via the `Supervisor` trait + `DockerComposeSupervisor`); RabbitMQ admin bus + Postgres admin
+      surface deferred. Container crash-detection + state adoption on agent restart land with Phase 3b.
+- [x] Phase 3a: SIGNED self-update — minisign-verified download+swap+relaunch (NATS `update` func); embedded public key; CI signs releases
+- [ ] Phase 3b: service install (systemd/SCM), PID adoption

 ## Build

--- a/corrosion-host-agent/agent.example.toml
+++ b/corrosion-host-agent/agent.example.toml
@@ -9,7 +9,11 @@
 [agent]
 license_id = "your-license-uuid"
 nats_url = "nats://nats.corrosionmgmt.com:4222"
-# nats_token = "set-me-or-use-CORROSION_NATS_TOKEN"
+# Per-license auth (preferred): user = license id, password = the token shown
+# on the panel Server page. The broker scopes you to corrosion.{license}.>
+# nats_user = "your-license-uuid"        # defaults to license_id if omitted
+# nats_password = "set-me-or-use-CORROSION_NATS_PASSWORD"
+# nats_token = "legacy token-only auth; use nats_password instead"
 heartbeat_seconds = 60
 log_level = "info"

@@ -23,11 +27,56 @@ game = "rust"          # rust | conan | soulmask | dune
 root = "/opt/rustserver"
 label = "Main 2x Vanilla"

+# RCON lets the panel send console commands to the running server.
+# For rust the protocol is WebRCON (WebSocket JSON); for conan/soulmask it is
+# Source RCON (Valve TCP binary). `kind` is optional — it is inferred from
+# the game name when absent.
+#
+# The [instance.rcon] sub-table MUST immediately follow the [[instance]] entry
+# it belongs to (standard TOML array-of-tables scoping rule).
+[instance.rcon]
+port = 28016
+password = "changeme"
+# kind = "webrcon"   # explicit override; omit to infer from game
+
 # [[instance]]
 # id = "soulmask-main"
 # game = "soulmask"
 # root = "/opt/soulmask/main"
 # label = "Cloud Mist Forest (cluster main)"
+#
+# [instance.rcon]
+# port = 19000
+# password = "changeme"
+# # kind = "source"  # inferred automatically for soulmask
+
+# SteamCMD update settings — optional sub-table for any instance.
+# Absent = defaults: steamcmd binary resolved via PATH, validate = false.
+#
+# [instance.steamcmd]
+# steamcmd_path = "/opt/steamcmd/steamcmd.sh"  # omit to use PATH
+# validate = true                               # enable file-hash check pass
+#
+# Dune instances do not use SteamCMD (Docker images); the steam_update func
+# will return a clear error if invoked on a dune instance.
+
+# --- Dune: Awakening (container-managed) ---------------------------------
+# Dune runs as a docker-compose stack, not a spawned process — leave
+# `executable` unset and add an [instance.docker_compose] block. The agent
+# drives `docker compose up -d / stop / restart` for start/stop/restart, and
+# `steam_update` is rejected (Dune ships as Docker images).
+#
+# [[instance]]
+# id = "dune-main"
+# game = "dune"
+# root = "/opt/dune"            # directory the compose commands run in
+# label = "Arrakis (battlegroup)"
+#
+# [instance.docker_compose]
+# file = "docker-compose.yml"   # -f; relative to root. Omit to use compose's discovery
+# project = "dune-main"         # -p; defaults to the instance id
+# service = "gameserver"        # limit lifecycle to one service; omit for the whole stack
+# command = ["docker", "compose"]   # default; use ["docker-compose"] for the legacy binary

 [prober]
 interval_seconds = 300
--- a/corrosion-host-agent/src/agent.rs
+++ b/corrosion-host-agent/src/agent.rs
@@ -1,16 +1,23 @@
 //! Shared agent handle: every subsystem task holds an `Arc<Agent>`.

+use std::collections::HashMap;
+use std::sync::Arc;
 use std::time::Instant;
 use tokio::sync::RwLock;
 use tokio_util::sync::CancellationToken;

 use crate::config::Settings;
 use crate::prober::ProbeReport;
+use crate::supervisor::Supervisor;

 pub struct Agent {
    pub cfg: Settings,
    pub nats: async_nats::Client,
    pub started: Instant,
    pub last_probe: RwLock<Option<ProbeReport>>,
+    /// One supervisor per instance, keyed by instance id. The concrete impl
+    /// (process vs docker-compose) is chosen per game by the factory in main;
+    /// every subsystem talks to the `Supervisor` trait only.
+    pub supervisors: HashMap<String, Arc<dyn Supervisor>>,
    pub shutdown: CancellationToken,
 }
--- a/corrosion-host-agent/src/bus.rs
+++ b/corrosion-host-agent/src/bus.rs
@@ -33,7 +33,15 @@ pub async fn connect(cfg: &Settings) -> Result<async_nats::Client> {
    if force_tls {
        opts = opts.require_tls(true);
    }
-    if let Some(token) = &cfg.nats_token {
+
+    // Per-license auth: the broker maps user=license_id, password=derived
+    // token to permissions scoped to corrosion.{license_id}.>. Falls back to
+    // token-only or anonymous so the agent still works against a broker that
+    // hasn't enforced auth yet (transition period).
+    if let Some(password) = &cfg.nats_password {
+        let user = cfg.nats_user.clone().unwrap_or_else(|| cfg.license_id.clone());
+        opts = opts.user_and_password(user, password.clone());
+    } else if let Some(token) = &cfg.nats_token {
        opts = opts.token(token.clone());
    }

--- a/corrosion-host-agent/src/config.rs
+++ b/corrosion-host-agent/src/config.rs
@@ -10,6 +10,10 @@ use serde::Deserialize;
 use std::collections::HashSet;
 use std::path::{Path, PathBuf};

+use crate::docker_compose::DockerComposeConfig;
+use crate::rcon::RconConfig;
+use crate::steamcmd::SteamcmdConfig;
+
 /// Instance ids share the NATS subject namespace with host-level segments.
 const RESERVED_INSTANCE_IDS: &[&str] = &["host", "cmd", "files", "update", "agent"];

@@ -31,6 +35,12 @@ pub struct AgentSection {
    pub license_id: Option<String>,
    pub nats_url: Option<String>,
    pub nats_token: Option<String>,
+    /// NATS username for per-license auth. Defaults to license_id when a
+    /// password is set but no user is given.
+    pub nats_user: Option<String>,
+    /// NATS password (the per-license token). When set, the agent authenticates
+    /// with user+password instead of a bare token.
+    pub nats_password: Option<String>,
    #[serde(default = "default_heartbeat_seconds")]
    pub heartbeat_seconds: u64,
    #[serde(default = "default_log_level")]
@@ -49,6 +59,41 @@ pub struct InstanceConfig {
    /// Optional human label shown in the panel.
    #[serde(default)]
    pub label: Option<String>,
+    /// Game server executable. Relative paths resolve against `root`.
+    /// Absent = unmanaged instance (telemetry only, no process control).
+    #[serde(default)]
+    pub executable: Option<PathBuf>,
+    /// Arguments as a proper list — no shell splitting, quoted values survive.
+    #[serde(default)]
+    pub args: Vec<String>,
+    /// Working directory for the process. Defaults to the executable's directory.
+    #[serde(default)]
+    pub working_dir: Option<PathBuf>,
+    /// RCON connection settings for this instance.  Absent = rcon unavailable.
+    /// Protocol defaults to WebRcon for rust, Source for conan/soulmask.
+    #[serde(default)]
+    pub rcon: Option<RconConfig>,
+    /// SteamCMD update settings.  Absent = defaults apply (steamcmd on PATH,
+    /// validate = false).
+    #[serde(default)]
+    pub steamcmd: Option<SteamcmdConfig>,
+    /// Docker-compose settings for container-managed games (Dune). Absent =
+    /// defaults apply (compose file in the instance root, project = instance id).
+    #[serde(default)]
+    pub docker_compose: Option<DockerComposeConfig>,
+}
+
+impl InstanceConfig {
+    /// Absolute executable path, if this instance is process-managed.
+    pub fn resolved_executable(&self) -> Option<PathBuf> {
+        self.executable.as_ref().map(|exe| {
+            if exe.is_absolute() {
+                exe.clone()
+            } else {
+                self.root.join(exe)
+            }
+        })
+    }
 }

 #[derive(Debug, Clone, Default, Deserialize)]
@@ -88,6 +133,8 @@ pub struct Settings {
    pub license_id: String,
    pub nats_url: String,
    pub nats_token: Option<String>,
+    pub nats_user: Option<String>,
+    pub nats_password: Option<String>,
    pub heartbeat_seconds: u64,
    pub log_level: String,
    pub instances: Vec<InstanceConfig>,
@@ -133,6 +180,16 @@ fn resolve(file: ConfigFile) -> Result<Settings> {
        .filter(|v| !v.is_empty())
        .or(file.agent.nats_token);

+    let nats_user = std::env::var("CORROSION_NATS_USER")
+        .ok()
+        .filter(|v| !v.is_empty())
+        .or(file.agent.nats_user);
+
+    let nats_password = std::env::var("CORROSION_NATS_PASSWORD")
+        .ok()
+        .filter(|v| !v.is_empty())
+        .or(file.agent.nats_password);
+
    validate_subject_segment("license_id", &license_id)?;

    let mut seen: HashSet<&str> = HashSet::new();
@@ -162,6 +219,8 @@ fn resolve(file: ConfigFile) -> Result<Settings> {
        license_id,
        nats_url,
        nats_token,
+        nats_user,
+        nats_password,
        heartbeat_seconds: file.agent.heartbeat_seconds,
        log_level: file.agent.log_level,
        instances: file.instances,
--- a/corrosion-host-agent/src/docker_compose.rs
+++ b/corrosion-host-agent/src/docker_compose.rs
@@ -0,0 +1,216 @@
+//! Docker-compose instance supervision — the Dune: Awakening adapter.
+//!
+//! Dune does not ship as a SteamCMD-updated process like Rust/Conan/Soulmask;
+//! it runs as Docker container(s) (game server + RabbitMQ broker + Postgres),
+//! orchestrated as a compose stack (a "battlegroup"). So Dune lifecycle is
+//! `docker compose up -d / stop / restart` against the instance's compose
+//! project, not a spawned OS process. This supervisor implements the same
+//! [`Supervisor`] trait `ProcessSupervisor` does, so the instance command
+//! dispatch is identical — only the management model differs.
+//!
+//! Scope (first cut): lifecycle + cached state. Two parity items are deferred
+//! to Phase 3b alongside process PID adoption: (1) crash detection (containers
+//! give us no child handle — a `docker compose ps` poll loop would supply it);
+//! (2) state adoption on agent restart (a running stack reports `stopped` until
+//! the next lifecycle command). Both are reconcilable with a `ps` probe.
+//!
+//! Reference: docs/reference-repos/icehunter SETUP_DOCKER.md (the docker
+//! control plane this mirrors).
+
+use std::path::PathBuf;
+use std::process::Stdio;
+use std::sync::Arc;
+use std::time::Instant;
+
+use anyhow::{bail, Context, Result};
+use serde::Deserialize;
+use tokio::process::Command;
+use tokio::sync::{watch, Mutex};
+
+use crate::config::InstanceConfig;
+use crate::supervisor::{InstanceState, Supervisor};
+
+/// Per-instance docker-compose settings (`[instance.docker_compose]`). All
+/// fields optional — defaults cover the common "one compose file in the
+/// instance root" case.
+#[derive(Debug, Clone, Default, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct DockerComposeConfig {
+    /// Compose file (`-f`). Relative paths resolve against the run dir. Default:
+    /// compose's own discovery (docker-compose.yml in the run dir).
+    #[serde(default)]
+    pub file: Option<PathBuf>,
+    /// Compose project name (`-p`). Default: the instance id.
+    #[serde(default)]
+    pub project: Option<String>,
+    /// Limit lifecycle ops to one service. Default: every service in the file.
+    #[serde(default)]
+    pub service: Option<String>,
+    /// Override the compose binary invocation. Default: `["docker","compose"]`.
+    /// Use `["docker-compose"]` for the legacy standalone binary.
+    #[serde(default)]
+    pub command: Option<Vec<String>>,
+}
+
+struct Inner {
+    started_at: Option<Instant>,
+}
+
+pub struct DockerComposeSupervisor {
+    instance_id: String,
+    /// Directory the compose commands run in (relative `-f`/file paths resolve
+    /// against it).
+    run_dir: PathBuf,
+    compose_file: Option<PathBuf>,
+    project: String,
+    service: Option<String>,
+    /// Compose binary + leading args, e.g. `["docker","compose"]`.
+    command: Vec<String>,
+    inner: Mutex<Inner>,
+    state_tx: watch::Sender<InstanceState>,
+}
+
+impl DockerComposeSupervisor {
+    pub fn new(cfg: &InstanceConfig) -> Arc<Self> {
+        let dc = cfg.docker_compose.clone().unwrap_or_default();
+        let run_dir = cfg
+            .working_dir
+            .clone()
+            .unwrap_or_else(|| cfg.root.clone());
+        let command = dc
+            .command
+            .filter(|c| !c.is_empty())
+            .unwrap_or_else(|| vec!["docker".to_string(), "compose".to_string()]);
+        let (state_tx, _) = watch::channel(InstanceState::Stopped);
+        Arc::new(Self {
+            instance_id: cfg.id.clone(),
+            run_dir,
+            compose_file: dc.file,
+            project: dc.project.unwrap_or_else(|| cfg.id.clone()),
+            service: dc.service,
+            command,
+            inner: Mutex::new(Inner { started_at: None }),
+            state_tx,
+        })
+    }
+
+    fn set_state(&self, state: InstanceState) {
+        let _ = self.state_tx.send_replace(state);
+    }
+
+    /// Run one compose subcommand (`up`/`stop`/`restart`/...), bailing with the
+    /// captured stderr on non-zero exit. Global flags (`-f`, `-p`) precede the
+    /// subcommand; the optional single service is appended last.
+    async fn run(&self, action: &str, action_args: &[&str]) -> Result<()> {
+        let mut cmd = Command::new(&self.command[0]);
+        cmd.args(&self.command[1..]);
+        if let Some(file) = &self.compose_file {
+            cmd.arg("-f").arg(file);
+        }
+        cmd.arg("-p").arg(&self.project);
+        cmd.arg(action);
+        cmd.args(action_args);
+        if let Some(service) = &self.service {
+            cmd.arg(service);
+        }
+        cmd.current_dir(&self.run_dir)
+            .stdin(Stdio::null())
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped());
+
+        let output = cmd
+            .output()
+            .await
+            .with_context(|| format!("running `{} {action}` (is docker installed and on PATH?)", self.command.join(" ")))?;
+
+        if !output.status.success() {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let detail = if !stderr.trim().is_empty() {
+                stderr.trim()
+            } else {
+                stdout.trim()
+            };
+            bail!("compose {action} failed ({}): {detail}", output.status);
+        }
+        Ok(())
+    }
+}
+
+#[async_trait::async_trait]
+impl Supervisor for DockerComposeSupervisor {
+    fn instance_id(&self) -> &str {
+        &self.instance_id
+    }
+
+    fn state(&self) -> InstanceState {
+        self.state_tx.borrow().clone()
+    }
+
+    fn watch_state(&self) -> watch::Receiver<InstanceState> {
+        self.state_tx.subscribe()
+    }
+
+    async fn uptime_seconds(&self) -> u64 {
+        let inner = self.inner.lock().await;
+        match (&*self.state_tx.borrow(), inner.started_at) {
+            (InstanceState::Running, Some(t)) => t.elapsed().as_secs(),
+            _ => 0,
+        }
+    }
+
+    async fn start(self: Arc<Self>) -> Result<()> {
+        if matches!(
+            *self.state_tx.borrow(),
+            InstanceState::Running | InstanceState::Starting
+        ) {
+            bail!("instance '{}' is already running", self.instance_id);
+        }
+        self.set_state(InstanceState::Starting);
+        match self.run("up", &["-d"]).await {
+            Ok(()) => {
+                self.inner.lock().await.started_at = Some(Instant::now());
+                self.set_state(InstanceState::Running);
+                tracing::info!("instance '{}' compose up -d", self.instance_id);
+                Ok(())
+            }
+            Err(e) => {
+                self.set_state(InstanceState::Stopped);
+                Err(e)
+            }
+        }
+    }
+
+    async fn stop(self: Arc<Self>) -> Result<()> {
+        self.set_state(InstanceState::Stopping);
+        match self.run("stop", &[]).await {
+            Ok(()) => {
+                self.inner.lock().await.started_at = None;
+                self.set_state(InstanceState::Stopped);
+                tracing::info!("instance '{}' compose stop", self.instance_id);
+                Ok(())
+            }
+            Err(e) => {
+                // Stop failed — the stack is most likely still up.
+                self.set_state(InstanceState::Running);
+                Err(e)
+            }
+        }
+    }
+
+    async fn restart(self: Arc<Self>) -> Result<()> {
+        self.set_state(InstanceState::Starting);
+        match self.run("restart", &[]).await {
+            Ok(()) => {
+                self.inner.lock().await.started_at = Some(Instant::now());
+                self.set_state(InstanceState::Running);
+                tracing::info!("instance '{}' compose restart", self.instance_id);
+                Ok(())
+            }
+            Err(e) => {
+                self.set_state(InstanceState::Stopped);
+                Err(e)
+            }
+        }
+    }
+}
--- a/corrosion-host-agent/src/filemanager.rs
+++ b/corrosion-host-agent/src/filemanager.rs
@@ -0,0 +1,544 @@
+//! Jailed file manager for game-server install directories.
+//!
+//! Every path operation is confined to the instance `root` — the directory
+//! declared as `root` in `[[instance]]` config.  A two-stage check (lexical
+//! Clean + `std::fs::canonicalize`) prevents both `../..` traversals and
+//! symlink-based escapes: even if an attacker plants a symlink inside the root
+//! that points outside it, `canonicalize` resolves the target and the prefix
+//! check catches the escape.
+//!
+//! The NATS request/reply contract mirrors the Go companion agent's jailed file
+//! manager (see `companion-agent/internal/filemanager/`) but uses a simpler
+//! flat JSON envelope rather than the VueFinder storage-path protocol — the
+//! Rust agent is the replacement, and the panel's backend talks to whichever
+//! agent is present.
+//!
+//! Subject: `corrosion.{license}.{instance}.files.cmd`
+//! Request:  `{"op":"list"|"read"|"write"|"delete"|"rename"|"mkdir"|"mkfile"|"move"|"copy",
+//!             "path":"rel/path", "dest"?:"...", "content"?:"...", "name"?:"..."}`
+//! Response: `{"status":"success","data":...}` or `{"status":"error","message":"..."}`
+
+use anyhow::{bail, Context};
+use chrono::{DateTime, SecondsFormat, Utc};
+use serde::{Deserialize, Serialize};
+use std::fs;
+use std::path::{Path, PathBuf};
+
+/// Maximum size for a `read` operation (5 MiB).  Larger files must be
+/// transferred through a dedicated download endpoint, not the file manager.
+const MAX_READ_SIZE: u64 = 5 * 1024 * 1024;
+
+// ---------------------------------------------------------------------------
+// Wire types
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Deserialize)]
+pub struct FileRequest {
+    pub op: String,
+    /// Relative path within the instance root (the "subject" of the operation).
+    #[serde(default)]
+    pub path: String,
+    /// Destination for `rename`, `move`, `copy` — relative to instance root.
+    #[serde(default)]
+    pub dest: Option<String>,
+    /// Text content for `write`.
+    #[serde(default)]
+    pub content: Option<String>,
+    /// Bare filename for `mkdir` and `mkfile`.
+    #[serde(default)]
+    pub name: Option<String>,
+}
+
+/// A single directory entry returned by `list`.
+#[derive(Debug, Serialize)]
+pub struct FileEntry {
+    pub name: String,
+    /// Path relative to the instance root, using forward slashes.
+    pub path: String,
+    pub is_dir: bool,
+    /// File size in bytes.  Zero for directories.
+    pub size: u64,
+    /// RFC 3339 modification timestamp.
+    pub modified: String,
+}
+
+// ---------------------------------------------------------------------------
+// Jail helper — the security core of this module
+// ---------------------------------------------------------------------------
+
+/// Resolve `rel` against `root`, then canonicalize to reject any form of
+/// escape including `../..` traversals and symlinks that point outside root.
+///
+/// For paths that do not yet exist (e.g. write targets), we canonicalize the
+/// nearest existing ancestor and then re-join the remaining components, which
+/// are lexically-clean because they went through `std::path::Path` building.
+///
+/// Returns the absolute, canonicalized path if it is within `root`.
+pub fn jail(root: &Path, rel: &str) -> anyhow::Result<PathBuf> {
+    // Canonicalize root once to get a stable prefix for comparison.
+    // We do this on every call rather than caching so the function stays
+    // pure and testable without Agent state.
+    let canon_root = fs::canonicalize(root)
+        .with_context(|| format!("canonicalize instance root '{}'", root.display()))?;
+
+    // Build the candidate absolute path.  We use Path joining so that an
+    // absolute `rel` (e.g. "/etc/passwd") replaces the root entirely — we
+    // detect and reject that case immediately.
+    let candidate = if rel.is_empty() || rel == "." {
+        root.to_path_buf()
+    } else {
+        let rel_path = Path::new(rel);
+        if rel_path.is_absolute() {
+            bail!(
+                "absolute path '{}' is not allowed; supply a path relative to the instance root",
+                rel
+            );
+        }
+        root.join(rel_path)
+    };
+
+    // Normalize lexically first (removes `..` / `.` without filesystem access).
+    // This is a defence-in-depth step; the authoritative check is below.
+    let lexical = normalize_lexical(&candidate);
+
+    // Canonicalize: resolve symlinks and `..` via the kernel.
+    // For a not-yet-existing path we walk up to the nearest existing ancestor.
+    let canon = canonicalize_lenient(&lexical)?;
+
+    // Authoritative prefix check: the resolved path must be equal to or a
+    // child of the canonicalized root.
+    if canon != canon_root && !canon.starts_with(&canon_root) {
+        bail!(
+            "path '{}' resolves to '{}' which is outside the instance root '{}'",
+            rel,
+            canon.display(),
+            canon_root.display()
+        );
+    }
+
+    Ok(canon)
+}
+
+/// Canonicalize a path that may not fully exist yet by walking up to the
+/// nearest existing ancestor, canonicalizing it, then re-joining the remaining
+/// (lexically-clean) suffix.
+fn canonicalize_lenient(path: &Path) -> anyhow::Result<PathBuf> {
+    // Fast path: path already exists.
+    if let Ok(c) = fs::canonicalize(path) {
+        return Ok(c);
+    }
+
+    // Walk up until we find an ancestor that exists.
+    let mut existing = path.to_path_buf();
+    let mut suffix: Vec<std::ffi::OsString> = Vec::new();
+
+    loop {
+        match fs::canonicalize(&existing) {
+            Ok(canon) => {
+                // Re-attach the non-existing suffix.
+                let mut result = canon;
+                for component in suffix.iter().rev() {
+                    result = result.join(component);
+                }
+                return Ok(result);
+            }
+            Err(_) => {
+                let file_name = match existing.file_name() {
+                    Some(n) => n.to_os_string(),
+                    None => bail!("cannot resolve path '{}'", path.display()),
+                };
+                suffix.push(file_name);
+                existing = match existing.parent() {
+                    Some(p) => p.to_path_buf(),
+                    None => bail!("cannot resolve path '{}'", path.display()),
+                };
+            }
+        }
+    }
+}
+
+/// Lexically normalize a path (remove `.` and `..` components) without
+/// touching the filesystem.  This mirrors `filepath.Clean` in Go.
+fn normalize_lexical(path: &Path) -> PathBuf {
+    let mut components: Vec<std::path::Component> = Vec::new();
+    for component in path.components() {
+        match component {
+            std::path::Component::CurDir => {}
+            std::path::Component::ParentDir => {
+                // Only pop a normal component — we cannot pop a root prefix.
+                if matches!(components.last(), Some(std::path::Component::Normal(_))) {
+                    components.pop();
+                } else {
+                    components.push(component);
+                }
+            }
+            other => components.push(other),
+        }
+    }
+    components.iter().collect()
+}
+
+// ---------------------------------------------------------------------------
+// Operations
+// ---------------------------------------------------------------------------
+
+/// List the contents of a directory.  Returns an entry per item, sorted
+/// (directories first, then files, both alphabetical).
+pub fn list(root: &Path, rel: &str) -> anyhow::Result<Vec<FileEntry>> {
+    let abs = jail(root, rel)?;
+    // Use the canonicalized root as the prefix for relative path computation so
+    // that symlinked root paths (e.g. macOS /var → /private/var) don't cause
+    // strip_prefix to fail and fall back to leaking the absolute path.
+    let canon_root = fs::canonicalize(root)
+        .with_context(|| format!("canonicalize root '{}'", root.display()))?;
+
+    let rd = fs::read_dir(&abs)
+        .with_context(|| format!("read_dir '{}'", abs.display()))?;
+
+    let mut entries: Vec<FileEntry> = Vec::new();
+    for item in rd {
+        let item = item.with_context(|| format!("reading directory entry in '{}'", abs.display()))?;
+        // symlink_metadata (lstat): report the link itself, never the target —
+        // following it would leak the size/type/existence of files outside the
+        // jail. A symlink lists as a zero-ish-size non-dir entry.
+        let meta = fs::symlink_metadata(item.path())
+            .with_context(|| format!("stat '{}'", item.path().display()))?;
+
+        let name = item.file_name().to_string_lossy().into_owned();
+        let is_dir = meta.is_dir();
+        let size = if is_dir { 0 } else { meta.len() };
+
+        // Build the relative path from the canonicalized root.
+        let entry_abs = item.path();
+        let entry_rel = entry_abs
+            .strip_prefix(&canon_root)
+            .unwrap_or(&entry_abs)
+            .to_string_lossy()
+            .replace('\\', "/");
+
+        let modified = meta
+            .modified()
+            .ok()
+            .map(|t| {
+                let dt: DateTime<Utc> = t.into();
+                dt.to_rfc3339_opts(SecondsFormat::Secs, true)
+            })
+            .unwrap_or_default();
+
+        entries.push(FileEntry { name, path: entry_rel, is_dir, size, modified });
+    }
+
+    // Stable sort: dirs first, then alphabetical within each group.
+    entries.sort_by(|a, b| {
+        b.is_dir.cmp(&a.is_dir).then_with(|| a.name.cmp(&b.name))
+    });
+
+    Ok(entries)
+}
+
+/// Read a text file.  Capped at `MAX_READ_SIZE` bytes.
+pub fn read(root: &Path, rel: &str) -> anyhow::Result<String> {
+    let abs = jail(root, rel)?;
+
+    let meta = fs::metadata(&abs)
+        .with_context(|| format!("stat '{}'", abs.display()))?;
+
+    if meta.is_dir() {
+        bail!("'{}' is a directory, not a file", rel);
+    }
+    if meta.len() > MAX_READ_SIZE {
+        bail!(
+            "file '{}' is {} bytes which exceeds the {} byte read limit",
+            rel,
+            meta.len(),
+            MAX_READ_SIZE
+        );
+    }
+
+    fs::read_to_string(&abs).with_context(|| format!("read '{}'", abs.display()))
+}
+
+/// Write (create or overwrite) a file.  Parent directories are created as
+/// needed.
+pub fn write(root: &Path, rel: &str, content: &str) -> anyhow::Result<()> {
+    let abs = jail(root, rel)?;
+
+    if let Some(parent) = abs.parent() {
+        fs::create_dir_all(parent)
+            .with_context(|| format!("create_dir_all '{}'", parent.display()))?;
+    }
+
+    fs::write(&abs, content.as_bytes())
+        .with_context(|| format!("write '{}'", abs.display()))
+}
+
+/// Delete a file or directory tree.
+pub fn delete(root: &Path, rel: &str) -> anyhow::Result<()> {
+    let abs = jail(root, rel)?;
+
+    let meta = fs::metadata(&abs)
+        .with_context(|| format!("stat '{}'", abs.display()))?;
+
+    if meta.is_dir() {
+        fs::remove_dir_all(&abs).with_context(|| format!("remove_dir_all '{}'", abs.display()))
+    } else {
+        fs::remove_file(&abs).with_context(|| format!("remove_file '{}'", abs.display()))
+    }
+}
+
+/// Rename/move `rel` to a new bare name (`new_name`) within the same parent.
+/// `new_name` must not contain path separators.
+pub fn rename(root: &Path, rel: &str, new_name: &str) -> anyhow::Result<()> {
+    if new_name.is_empty() || new_name == "." || new_name == ".." {
+        bail!("new_name '{}' is not a valid filename", new_name);
+    }
+    if new_name.contains('/') || new_name.contains('\\') {
+        bail!("new_name '{}' must not contain path separators", new_name);
+    }
+
+    let src_abs = jail(root, rel)?;
+
+    // Construct the destination relative path by replacing the filename part
+    // of `rel` with `new_name`.  This keeps everything in relative-path space
+    // so we never hand an absolute path to `jail`.
+    let src_rel = Path::new(rel);
+    let dest_rel = match src_rel.parent() {
+        Some(parent) if parent != Path::new("") => {
+            parent.join(new_name).to_string_lossy().replace('\\', "/")
+        }
+        _ => new_name.to_string(),
+    };
+
+    let dest_abs = jail(root, &dest_rel)?;
+
+    fs::rename(&src_abs, &dest_abs)
+        .with_context(|| format!("rename '{}' -> '{}'", src_abs.display(), dest_abs.display()))
+}
+
+/// Create a directory (and any missing parents) at `rel`.
+pub fn mkdir(root: &Path, rel: &str) -> anyhow::Result<()> {
+    let abs = jail(root, rel)?;
+    fs::create_dir_all(&abs).with_context(|| format!("mkdir '{}'", abs.display()))
+}
+
+/// Create an empty file at `rel`.  Fails if it already exists.
+pub fn mkfile(root: &Path, rel: &str) -> anyhow::Result<()> {
+    let abs = jail(root, rel)?;
+
+    if let Some(parent) = abs.parent() {
+        fs::create_dir_all(parent)
+            .with_context(|| format!("create_dir_all '{}'", parent.display()))?;
+    }
+
+    let _ = std::fs::OpenOptions::new()
+        .create_new(true)
+        .write(true)
+        .open(&abs)
+        .with_context(|| format!("mkfile '{}'", abs.display()))?;
+
+    Ok(())
+}
+
+/// Move `src` to `dest` (both relative to root).
+pub fn move_path(root: &Path, src: &str, dest: &str) -> anyhow::Result<()> {
+    let src_abs = jail(root, src)?;
+    let dest_abs = jail(root, dest)?;
+
+    if let Some(parent) = dest_abs.parent() {
+        fs::create_dir_all(parent)
+            .with_context(|| format!("create_dir_all '{}'", parent.display()))?;
+    }
+
+    fs::rename(&src_abs, &dest_abs).or_else(|_| {
+        // Cross-device move: copy then delete.
+        copy_recursive(&src_abs, &dest_abs)?;
+        fs::remove_dir_all(&src_abs)
+            .with_context(|| format!("remove source '{}' after cross-device move", src_abs.display()))
+    }).with_context(|| format!("move '{}' -> '{}'", src_abs.display(), dest_abs.display()))
+}
+
+/// Copy `src` to `dest` (both relative to root).
+pub fn copy(root: &Path, src: &str, dest: &str) -> anyhow::Result<()> {
+    let src_abs = jail(root, src)?;
+    let dest_abs = jail(root, dest)?;
+
+    if let Some(parent) = dest_abs.parent() {
+        fs::create_dir_all(parent)
+            .with_context(|| format!("create_dir_all '{}'", parent.display()))?;
+    }
+
+    copy_recursive(&src_abs, &dest_abs)
+        .with_context(|| format!("copy '{}' -> '{}'", src_abs.display(), dest_abs.display()))
+}
+
+/// Recursive copy helper.
+///
+/// SECURITY: uses `symlink_metadata` (does NOT follow symlinks) and refuses to
+/// copy any symlink. `jail()` only validates the top-level src/dest; a symlink
+/// *inside* a copied directory that points outside the jail would, if followed,
+/// pull external content (e.g. `/etc`) into the jail where it could then be
+/// read — a jail-escape exfiltration. Refusing symlinks closes that path.
+fn copy_recursive(src: &Path, dest: &Path) -> anyhow::Result<()> {
+    let meta = fs::symlink_metadata(src)
+        .with_context(|| format!("stat source '{}'", src.display()))?;
+
+    if meta.file_type().is_symlink() {
+        bail!(
+            "refusing to copy symlink '{}' — symlinks are not followed across the jail boundary",
+            src.display()
+        );
+    }
+
+    if meta.is_dir() {
+        fs::create_dir_all(dest)
+            .with_context(|| format!("create_dir_all '{}'", dest.display()))?;
+
+        for entry in fs::read_dir(src)
+            .with_context(|| format!("read_dir '{}'", src.display()))?
+        {
+            let entry = entry?;
+            copy_recursive(&entry.path(), &dest.join(entry.file_name()))?;
+        }
+    } else {
+        fs::copy(src, dest)
+            .with_context(|| format!("copy '{}' -> '{}'", src.display(), dest.display()))?;
+    }
+
+    Ok(())
+}
+
+// ---------------------------------------------------------------------------
+// NATS request dispatch
+// ---------------------------------------------------------------------------
+
+/// Dispatch a `FileRequest` against `root` and return a JSON `serde_json::Value`
+/// ready for the NATS reply.
+pub fn dispatch(root: &Path, req: &FileRequest) -> serde_json::Value {
+    use serde_json::json;
+
+    let result = match req.op.as_str() {
+        "list" => {
+            list(root, &req.path).map(|entries| json!({ "entries": entries }))
+        }
+        "read" => {
+            read(root, &req.path).map(|content| json!({ "content": content }))
+        }
+        "write" => {
+            let content = req.content.as_deref().unwrap_or("");
+            write(root, &req.path, content).map(|_| json!(null))
+        }
+        "delete" => {
+            delete(root, &req.path).map(|_| json!(null))
+        }
+        "rename" => {
+            let new_name = req.name.as_deref().unwrap_or("");
+            rename(root, &req.path, new_name).map(|_| json!(null))
+        }
+        "mkdir" => {
+            mkdir(root, &req.path).map(|_| json!(null))
+        }
+        "mkfile" => {
+            mkfile(root, &req.path).map(|_| json!(null))
+        }
+        "move" => {
+            let dest = req.dest.as_deref().unwrap_or("");
+            move_path(root, &req.path, dest).map(|_| json!(null))
+        }
+        "copy" => {
+            let dest = req.dest.as_deref().unwrap_or("");
+            copy(root, &req.path, dest).map(|_| json!(null))
+        }
+        other => Err(anyhow::anyhow!(
+            "unknown op '{}' (supported: list, read, write, delete, rename, mkdir, mkfile, move, copy)",
+            other
+        )),
+    };
+
+    match result {
+        Ok(data) => json!({ "status": "success", "data": data }),
+        Err(e) => {
+            tracing::warn!("filemanager op='{}' path='{}': {e:#}", req.op, req.path);
+            json!({ "status": "error", "message": format!("{e:#}") })
+        }
+    }
+}
+
+/// Subscribe to `corrosion.{license}.{instance}.files.cmd` and serve file
+/// manager requests for `instance_id` jailed to `root`.
+///
+/// This function runs until the agent's cancellation token fires or the NATS
+/// subscription ends.  It is spawned once per instance in `main.rs`.
+pub async fn run(
+    agent: std::sync::Arc<crate::agent::Agent>,
+    instance_id: String,
+    root: PathBuf,
+) -> anyhow::Result<()> {
+    use futures::StreamExt;
+
+    let subject = crate::subjects::instance_files_cmd(&agent.cfg.license_id, &instance_id);
+    let mut sub = agent.nats.subscribe(subject.clone()).await?;
+    tracing::info!("file manager handler listening on {subject}");
+
+    let cancel = agent.shutdown.clone();
+    loop {
+        tokio::select! {
+            msg = sub.next() => {
+                match msg {
+                    Some(msg) => {
+                        let agent = agent.clone();
+                        let root = root.clone();
+                        let instance_id = instance_id.clone();
+                        tokio::spawn(async move { handle(agent, &instance_id, &root, msg).await });
+                    }
+                    None => {
+                        tracing::warn!("file manager subscription ended for '{instance_id}'");
+                        break;
+                    }
+                }
+            }
+            _ = cancel.cancelled() => {
+                tracing::info!("file manager handler stopping for '{instance_id}'");
+                break;
+            }
+        }
+    }
+    Ok(())
+}
+
+async fn handle(
+    agent: std::sync::Arc<crate::agent::Agent>,
+    instance_id: &str,
+    root: &Path,
+    msg: async_nats::Message,
+) {
+    let Some(reply) = msg.reply.clone() else {
+        tracing::warn!("file manager message without reply subject ignored (instance '{instance_id}')");
+        return;
+    };
+
+    let response = match serde_json::from_slice::<FileRequest>(&msg.payload) {
+        Ok(req) => {
+            // Blocking fs calls — offload from the async executor.
+            let root = root.to_path_buf();
+            tokio::task::spawn_blocking(move || dispatch(&root, &req))
+                .await
+                .unwrap_or_else(|e| {
+                    serde_json::json!({ "status": "error", "message": format!("internal error: {e}") })
+                })
+        }
+        Err(e) => {
+            serde_json::json!({ "status": "error", "message": format!("invalid request payload: {e}") })
+        }
+    };
+
+    let bytes = match serde_json::to_vec(&response) {
+        Ok(b) => b,
+        Err(e) => {
+            tracing::error!("file manager response serialize failed: {e}");
+            return;
+        }
+    };
+    if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
+        tracing::warn!("file manager response publish failed: {e}");
+    }
+}
--- a/corrosion-host-agent/src/hostcmd.rs
+++ b/corrosion-host-agent/src/hostcmd.rs
@@ -13,11 +13,15 @@ use crate::agent::Agent;
 use crate::prober;
 use crate::subjects;
 use crate::telemetry;
+use crate::update;
 use crate::version;

 #[derive(Debug, Deserialize)]
 struct HostCommand {
    func: String,
+    /// Signed-update artifact URL (for func = "update").
+    #[serde(default)]
+    url: Option<String>,
 }

 pub async fn run(agent: Arc<Agent>) -> anyhow::Result<()> {
@@ -55,20 +59,46 @@ async fn handle(agent: Arc<Agent>, msg: async_nats::Message) {
        return;
    };

-    let response = match serde_json::from_slice::<HostCommand>(&msg.payload) {
-        Ok(cmd) => dispatch(&agent, &cmd.func).await,
-        Err(e) => json!({ "status": "error", "message": format!("invalid command payload: {e}") }),
-    };
-
-    let bytes = match serde_json::to_vec(&response) {
-        Ok(b) => b,
+    let cmd = match serde_json::from_slice::<HostCommand>(&msg.payload) {
+        Ok(cmd) => cmd,
        Err(e) => {
-            tracing::error!("response serialize failed: {e}");
+            publish(&agent, &reply, json!({ "status": "error", "message": format!("invalid command payload: {e}") })).await;
            return;
        }
    };
-    if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
-        tracing::warn!("response publish failed: {e}");
+
+    // Self-update is special: it must reply BEFORE relaunching, because the
+    // relaunch replaces this process and nothing after it would run.
+    if cmd.func == "update" {
+        let Some(url) = cmd.url else {
+            publish(&agent, &reply, json!({ "status": "error", "message": "update requires a 'url'" })).await;
+            return;
+        };
+        match update::download_verify_swap(&url).await {
+            Ok(_) => {
+                publish(&agent, &reply, json!({ "status": "success", "func": "update", "message": "verified and swapped; relaunching" })).await;
+                let _ = agent.nats.flush().await;
+                update::relaunch_and_exit();
+            }
+            Err(e) => {
+                publish(&agent, &reply, json!({ "status": "error", "func": "update", "message": format!("{e:#}") })).await;
+            }
+        }
+        return;
+    }
+
+    let response = dispatch(&agent, &cmd.func).await;
+    publish(&agent, &reply, response).await;
+}
+
+async fn publish(agent: &Arc<Agent>, reply: &async_nats::Subject, value: serde_json::Value) {
+    match serde_json::to_vec(&value) {
+        Ok(bytes) => {
+            if let Err(e) = agent.nats.publish(reply.clone(), bytes.into()).await {
+                tracing::warn!("response publish failed: {e}");
+            }
+        }
+        Err(e) => tracing::error!("response serialize failed: {e}"),
    }
 }

--- a/corrosion-host-agent/src/instancecmd.rs
+++ b/corrosion-host-agent/src/instancecmd.rs
@@ -0,0 +1,361 @@
+//! Per-instance command channel + state-change events.
+//!
+//! Each process-managed instance gets a request-reply subscriber on
+//! `corrosion.{license}.{instance_id}.cmd` (funcs: start/stop/restart/status/rcon)
+//! and a publisher task that pushes every supervisor state change to
+//! `corrosion.{license}.{instance_id}.status` — the panel sees crashes when
+//! they happen, not when the next heartbeat ambles in.
+
+use chrono::{SecondsFormat, Utc};
+use futures::StreamExt;
+use serde::Deserialize;
+use serde_json::json;
+use std::sync::Arc;
+
+use crate::agent::Agent;
+use crate::subjects;
+use crate::steamcmd;
+use crate::supervisor::Supervisor;
+use crate::wipe;
+
+#[derive(Debug, Deserialize)]
+struct InstanceCommand {
+    func: String,
+    /// Payload for funcs that carry a text argument (e.g. rcon).
+    #[serde(default)]
+    command: Option<String>,
+    /// Wipe type: "map" | "blueprint" | "full" — required for func="wipe".
+    #[serde(default)]
+    wipe_type: Option<wipe::WipeType>,
+    /// Whether to back up wipe targets before deleting (func="wipe").
+    #[serde(default)]
+    backup: bool,
+    /// Label for the backup subdirectory (func="wipe"). Defaults to "wipe-backup".
+    #[serde(default = "default_backup_label")]
+    backup_label: String,
+}
+
+fn default_backup_label() -> String {
+    "wipe-backup".to_string()
+}
+
+/// Forward every supervisor state change as a status event.
+pub async fn publish_state_changes(agent: Arc<Agent>, sup: Arc<dyn Supervisor>) {
+    let subject = subjects::instance_status(&agent.cfg.license_id, sup.instance_id());
+    let mut rx = sup.watch_state();
+    let cancel = agent.shutdown.clone();
+
+    loop {
+        tokio::select! {
+            changed = rx.changed() => {
+                if changed.is_err() {
+                    break;
+                }
+                let state = rx.borrow().clone();
+                let event = json!({
+                    "timestamp": Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
+                    "instance_id": sup.instance_id(),
+                    "event": state,
+                });
+                match serde_json::to_vec(&event) {
+                    Ok(bytes) => {
+                        if let Err(e) = agent.nats.publish(subject.clone(), bytes.into()).await {
+                            tracing::warn!("status publish failed for '{}': {e}", sup.instance_id());
+                        }
+                    }
+                    Err(e) => tracing::error!("status serialize failed: {e}"),
+                }
+            }
+            _ = cancel.cancelled() => break,
+        }
+    }
+}
+
+/// Request-reply command handler for one instance.
+pub async fn run(agent: Arc<Agent>, sup: Arc<dyn Supervisor>) -> anyhow::Result<()> {
+    let subject = subjects::instance_cmd(&agent.cfg.license_id, sup.instance_id());
+    let mut sub = agent.nats.subscribe(subject.clone()).await?;
+    tracing::info!("instance command handler listening on {subject}");
+
+    let cancel = agent.shutdown.clone();
+    loop {
+        tokio::select! {
+            msg = sub.next() => {
+                match msg {
+                    Some(msg) => {
+                        let agent = agent.clone();
+                        let sup = sup.clone();
+                        tokio::spawn(async move { handle(agent, sup, msg).await });
+                    }
+                    None => {
+                        tracing::warn!("instance command subscription ended for '{}'", sup.instance_id());
+                        break;
+                    }
+                }
+            }
+            _ = cancel.cancelled() => {
+                tracing::info!("instance command handler stopping for '{}'", sup.instance_id());
+                break;
+            }
+        }
+    }
+    Ok(())
+}
+
+async fn handle(agent: Arc<Agent>, sup: Arc<dyn Supervisor>, msg: async_nats::Message) {
+    let Some(reply) = msg.reply.clone() else {
+        tracing::warn!("instance command without reply subject ignored");
+        return;
+    };
+
+    let response = match serde_json::from_slice::<InstanceCommand>(&msg.payload) {
+        Ok(cmd) => dispatch(&agent, &sup, &cmd).await,
+        Err(e) => json!({ "status": "error", "message": format!("invalid command payload: {e}") }),
+    };
+
+    let bytes = match serde_json::to_vec(&response) {
+        Ok(b) => b,
+        Err(e) => {
+            tracing::error!("response serialize failed: {e}");
+            return;
+        }
+    };
+    if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
+        tracing::warn!("response publish failed: {e}");
+    }
+}
+
+async fn dispatch(
+    agent: &Arc<Agent>,
+    sup: &Arc<dyn Supervisor>,
+    cmd: &InstanceCommand,
+) -> serde_json::Value {
+    let func = cmd.func.as_str();
+
+    // start/stop/restart take `self: Arc<Self>` (they may hand a clone to a
+    // monitor task), so clone the Arc before the consuming call.
+    let outcome = match func {
+        "start" => sup.clone().start().await.map(|_| "starting"),
+        "stop" => sup.clone().stop().await.map(|_| "stopped"),
+        "restart" => sup.clone().restart().await.map(|_| "restarted"),
+        "status" => {
+            return json!({
+                "status": "success",
+                "func": "status",
+                "instance_id": sup.instance_id(),
+                "state": sup.state(),
+                "uptime_seconds": sup.uptime_seconds().await,
+            });
+        }
+        "rcon" => {
+            // Look up the InstanceConfig for this supervisor so we can access
+            // rcon settings and the game name without changing the supervisor's
+            // data model.
+            let inst_cfg = agent
+                .cfg
+                .instances
+                .iter()
+                .find(|i| i.id == sup.instance_id());
+
+            let rcon_cfg = inst_cfg.and_then(|i| i.rcon.as_ref());
+            let Some(rcon_cfg) = rcon_cfg else {
+                return json!({
+                    "status": "error",
+                    "func": "rcon",
+                    "instance_id": sup.instance_id(),
+                    "message": format!("instance '{}' has no rcon configured", sup.instance_id()),
+                });
+            };
+
+            let Some(command) = cmd.command.as_deref() else {
+                return json!({
+                    "status": "error",
+                    "func": "rcon",
+                    "instance_id": sup.instance_id(),
+                    "message": "rcon func requires a 'command' field",
+                });
+            };
+
+            let game = inst_cfg.map(|i| i.game.as_str()).unwrap_or("rust");
+            return match crate::rcon::send_command(rcon_cfg, game, command).await {
+                Ok(output) => json!({
+                    "status": "success",
+                    "func": "rcon",
+                    "instance_id": sup.instance_id(),
+                    "output": output,
+                }),
+                Err(e) => json!({
+                    "status": "error",
+                    "func": "rcon",
+                    "instance_id": sup.instance_id(),
+                    "message": format!("{e:#}"),
+                }),
+            };
+        }
+        "steam_update" => {
+            // Look up instance config for game name, root, and optional steamcmd
+            // settings.  The supervisor only carries process-control state, not
+            // the full config, so we reach into agent.cfg.instances here as the
+            // rcon dispatch does.
+            let inst_cfg = agent.cfg.instances.iter().find(|i| i.id == sup.instance_id());
+
+            let Some(inst_cfg) = inst_cfg else {
+                return json!({
+                    "status": "error",
+                    "func": "steam_update",
+                    "instance_id": sup.instance_id(),
+                    "message": format!("no config found for instance '{}'", sup.instance_id()),
+                });
+            };
+
+            let game = inst_cfg.game.as_str();
+            let root = inst_cfg.root.clone();
+
+            // Resolve steamcmd path and validate flag from config or use defaults.
+            let (steamcmd_path, validate) = match inst_cfg.steamcmd.as_ref() {
+                Some(s) => {
+                    let path = s
+                        .steamcmd_path
+                        .as_ref()
+                        .and_then(|p| p.to_str().map(|s| s.to_string()))
+                        .unwrap_or_else(|| "steamcmd".to_string());
+                    (path, s.validate)
+                }
+                None => ("steamcmd".to_string(), false),
+            };
+
+            let license = agent.cfg.license_id.clone();
+            let instance_id = sup.instance_id().to_string();
+            let nats = agent.nats.clone();
+
+            // Publish each progress line to the steam_status subject.
+            let on_progress = move |line: &str| {
+                let subject = subjects::instance_steam_status(&license, &instance_id);
+                let event = json!({
+                    "timestamp": Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
+                    "instance_id": instance_id,
+                    "line": line,
+                });
+                match serde_json::to_vec(&event) {
+                    Ok(bytes) => {
+                        // Fire-and-forget; the async publish is non-blocking on
+                        // the caller side.  We create a mini-runtime task via
+                        // a oneshot since on_progress is Fn (not async).
+                        let nats = nats.clone();
+                        tokio::spawn(async move {
+                            if let Err(e) = nats.publish(subject, bytes.into()).await {
+                                tracing::warn!("steam_status publish failed: {e}");
+                            }
+                        });
+                    }
+                    Err(e) => tracing::error!("steam_status serialize failed: {e}"),
+                }
+            };
+
+            return match steamcmd::update(game, &root, &steamcmd_path, validate, on_progress).await {
+                Ok(()) => json!({
+                    "status": "success",
+                    "func": "steam_update",
+                    "instance_id": sup.instance_id(),
+                }),
+                Err(e) => json!({
+                    "status": "error",
+                    "func": "steam_update",
+                    "instance_id": sup.instance_id(),
+                    "message": format!("{e:#}"),
+                }),
+            };
+        }
+        "wipe" => {
+            let inst_cfg = agent.cfg.instances.iter().find(|i| i.id == sup.instance_id());
+
+            let Some(inst_cfg) = inst_cfg else {
+                return json!({
+                    "status": "error",
+                    "func": "wipe",
+                    "instance_id": sup.instance_id(),
+                    "message": format!("no config found for instance '{}'", sup.instance_id()),
+                });
+            };
+
+            let Some(wipe_type) = cmd.wipe_type.clone() else {
+                return json!({
+                    "status": "error",
+                    "func": "wipe",
+                    "instance_id": sup.instance_id(),
+                    "message": "wipe func requires a 'wipe_type' field (\"map\", \"blueprint\", or \"full\")",
+                });
+            };
+
+            let root = inst_cfg.root.clone();
+            let instance_id = sup.instance_id().to_string();
+
+            let wipe_req = wipe::WipeRequest {
+                wipe_type,
+                backup: cmd.backup,
+                backup_label: cmd.backup_label.clone(),
+            };
+
+            // Stop the server best-effort before wiping; proceed even if stop fails
+            // (the server may already be down).
+            if let Err(e) = sup.clone().stop().await {
+                tracing::warn!("wipe: stop instance '{}' failed (proceeding anyway): {e:#}", instance_id);
+            }
+
+            // Run the blocking I/O on the blocking thread pool.
+            let result = tokio::task::spawn_blocking(move || wipe::execute(&root, &wipe_req)).await;
+
+            // Restart best-effort regardless of wipe outcome.
+            if let Err(e) = sup.clone().start().await {
+                tracing::warn!("wipe: restart instance '{}' failed: {e:#}", instance_id);
+            }
+
+            return match result {
+                Ok(Ok(wr)) => {
+                    let wipe_type_str = format!("{:?}", wr.wipe_type).to_lowercase();
+                    json!({
+                        "status": "success",
+                        "func": "wipe",
+                        "instance_id": sup.instance_id(),
+                        "wipe_type": wipe_type_str,
+                        "deleted_count": wr.deleted_count,
+                    })
+                }
+                Ok(Err(e)) => json!({
+                    "status": "error",
+                    "func": "wipe",
+                    "instance_id": sup.instance_id(),
+                    "message": format!("{e:#}"),
+                }),
+                Err(e) => json!({
+                    "status": "error",
+                    "func": "wipe",
+                    "instance_id": sup.instance_id(),
+                    "message": format!("internal error: {e}"),
+                }),
+            };
+        }
+        other => {
+            return json!({
+                "status": "error",
+                "message": format!("unknown func '{other}' (supported: start, stop, restart, status, rcon, steam_update, wipe)"),
+            });
+        }
+    };
+
+    match outcome {
+        Ok(result) => json!({
+            "status": "success",
+            "func": func,
+            "instance_id": sup.instance_id(),
+            "result": result,
+            "state": sup.state(),
+        }),
+        Err(e) => json!({
+            "status": "error",
+            "func": func,
+            "instance_id": sup.instance_id(),
+            "message": format!("{e:#}"),
+        }),
+    }
+}
--- a/corrosion-host-agent/src/lib.rs
+++ b/corrosion-host-agent/src/lib.rs
@@ -0,0 +1,21 @@
+//! Corrosion Host Agent library surface — modules are public so integration
+//! tests can drive subsystems (notably the process supervisor) directly.
+
+pub mod agent;
+pub mod bus;
+pub mod config;
+pub mod docker_compose;
+pub mod filemanager;
+pub mod hostcmd;
+pub mod instancecmd;
+pub mod prober;
+pub mod process;
+pub mod rcon;
+pub mod service;
+pub mod steamcmd;
+pub mod subjects;
+pub mod supervisor;
+pub mod telemetry;
+ pub mod update;
+pub mod version;
+pub mod wipe;
--- a/corrosion-host-agent/src/main.rs
+++ b/corrosion-host-agent/src/main.rs
@@ -4,14 +4,10 @@
 //! connectivity prober, host command channel. Process control, file ops, and
 //! game adapters arrive in Phase 1+ (see PROTOCOL.md).

-mod agent;
-mod bus;
-mod config;
-mod hostcmd;
-mod prober;
-mod subjects;
-mod telemetry;
-mod version;
+use corrosion_host_agent::{
+    agent, bus, config, docker_compose, filemanager, hostcmd, instancecmd, prober, process,
+    service, subjects, supervisor, telemetry, version,
+};

 use anyhow::{Context, Result};
 use clap::{Parser, Subcommand};
@@ -41,6 +37,10 @@ enum Command {
    Check,
    /// Print full version (semver, git hash, build timestamp) and exit.
    Version,
+    /// Install as a systemd service and start it (Linux; requires root).
+    Install,
+    /// Stop and remove the systemd service (Linux; requires root).
+    Uninstall,
 }

 fn main() -> Result<()> {
@@ -62,6 +62,8 @@ fn main() -> Result<()> {
            );
            Ok(())
        }
+        Some(Command::Install) => service::install(&config_path),
+        Some(Command::Uninstall) => service::uninstall(),
        None => {
            let settings = config::load(&config_path)?;
            init_logging(&settings.log_level);
@@ -96,11 +98,28 @@ async fn run(settings: config::Settings) -> Result<()> {

    let nats = bus::connect(&settings).await?;

+    // Per-game supervisor factory: container-managed games (Dune) get a
+    // docker-compose supervisor; everything else is a spawned-process
+    // supervisor. Both satisfy the `Supervisor` trait, so the rest of the agent
+    // is game-agnostic.
+    let supervisors: std::collections::HashMap<String, Arc<dyn supervisor::Supervisor>> = settings
+        .instances
+        .iter()
+        .map(|inst| {
+            let sup: Arc<dyn supervisor::Supervisor> = match inst.game.as_str() {
+                "dune" => docker_compose::DockerComposeSupervisor::new(inst),
+                _ => process::ProcessSupervisor::new(inst),
+            };
+            (inst.id.clone(), sup)
+        })
+        .collect();
+
    let agent = Arc::new(Agent {
        cfg: settings,
        nats,
        started: Instant::now(),
        last_probe: RwLock::new(None),
+        supervisors,
        shutdown: CancellationToken::new(),
    });

@@ -115,6 +134,39 @@ async fn run(settings: config::Settings) -> Result<()> {
            }
        }));
    }
+    for (instance_id, sup) in &agent.supervisors {
+        {
+            let agent = agent.clone();
+            let sup = sup.clone();
+            handles.push(tokio::spawn(async move {
+                if let Err(e) = instancecmd::run(agent, sup).await {
+                    tracing::error!("instance command handler failed: {e:#}");
+                }
+            }));
+        }
+        handles.push(tokio::spawn(instancecmd::publish_state_changes(
+            agent.clone(),
+            sup.clone(),
+        )));
+        // File manager: one handler task per instance, jailed to root.
+        {
+            let agent = agent.clone();
+            let inst_cfg = agent
+                .cfg
+                .instances
+                .iter()
+                .find(|i| &i.id == instance_id)
+                .cloned();
+            if let Some(cfg) = inst_cfg {
+                let id = instance_id.clone();
+                handles.push(tokio::spawn(async move {
+                    if let Err(e) = filemanager::run(agent, id, cfg.root).await {
+                        tracing::error!("file manager handler failed: {e:#}");
+                    }
+                }));
+            }
+        }
+    }

    wait_for_shutdown_signal().await;
    tracing::info!("shutdown signal received");
--- a/corrosion-host-agent/src/process.rs
+++ b/corrosion-host-agent/src/process.rs
@@ -0,0 +1,262 @@
+//! Per-instance game-server process supervision.
+//!
+//! One `ProcessSupervisor` per process-managed instance (Rust/Conan/Soulmask).
+//! Lifecycle mirrors the proven Go agent behavior — graceful SIGTERM with a 30s
+//! budget before force kill, a monitor task that reaps the child and records
+//! crash-vs-stop — with two fixes the Go version needed: args are a proper list
+//! (no naive space splitting), and every state change is observable through a
+//! watch channel so the panel gets push events instead of waiting for the next
+//! heartbeat. Lifecycle control is exposed through the [`Supervisor`] trait so
+//! the command dispatch is identical across process- and container-managed
+//! games.
+
+use anyhow::{bail, Context, Result};
+use std::path::PathBuf;
+use std::process::Stdio;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+use tokio::process::{Child, Command};
+use tokio::sync::{watch, Mutex};
+
+use crate::config::InstanceConfig;
+use crate::supervisor::{InstanceState, Supervisor};
+
+const GRACEFUL_STOP_BUDGET: Duration = Duration::from_secs(30);
+const RESTART_PAUSE: Duration = Duration::from_secs(2);
+
+struct Inner {
+    child: Option<Child>,
+    started_at: Option<Instant>,
+    /// True while a stop was requested — the monitor uses it to distinguish
+    /// an ordered shutdown from a crash.
+    stop_requested: bool,
+}
+
+pub struct ProcessSupervisor {
+    instance_id: String,
+    executable: Option<PathBuf>,
+    args: Vec<String>,
+    working_dir: Option<PathBuf>,
+    inner: Mutex<Inner>,
+    state_tx: watch::Sender<InstanceState>,
+}
+
+impl ProcessSupervisor {
+    pub fn new(cfg: &InstanceConfig) -> Arc<Self> {
+        let executable = cfg.resolved_executable();
+        let initial = if executable.is_some() {
+            InstanceState::Stopped
+        } else {
+            InstanceState::Unmanaged
+        };
+        let (state_tx, _) = watch::channel(initial);
+        Arc::new(Self {
+            instance_id: cfg.id.clone(),
+            executable,
+            args: cfg.args.clone(),
+            working_dir: cfg.working_dir.clone(),
+            inner: Mutex::new(Inner {
+                child: None,
+                started_at: None,
+                stop_requested: false,
+            }),
+            state_tx,
+        })
+    }
+
+    async fn monitor(self: Arc<Self>) {
+        // Take a waiter without holding the lock across the whole child
+        // lifetime: Child::wait needs &mut, so the child stays in inner and
+        // we poll it.
+        loop {
+            let status = {
+                let mut inner = self.inner.lock().await;
+                let Some(child) = inner.child.as_mut() else { return };
+                match child.try_wait() {
+                    Ok(Some(status)) => Some(status),
+                    Ok(None) => None,
+                    Err(e) => {
+                        tracing::error!("instance '{}' wait failed: {e}", self.instance_id);
+                        return;
+                    }
+                }
+            };
+
+            match status {
+                Some(status) => {
+                    let mut inner = self.inner.lock().await;
+                    inner.child = None;
+                    inner.started_at = None;
+                    let ordered = inner.stop_requested;
+                    inner.stop_requested = false;
+                    drop(inner);
+
+                    if ordered {
+                        self.set_state(InstanceState::Stopped);
+                        tracing::info!("instance '{}' stopped ({status})", self.instance_id);
+                    } else {
+                        let exit_code = status.code();
+                        self.set_state(InstanceState::Crashed { exit_code });
+                        tracing::warn!(
+                            "instance '{}' exited unexpectedly ({status}) — marked crashed",
+                            self.instance_id
+                        );
+                    }
+                    return;
+                }
+                None => tokio::time::sleep(Duration::from_millis(500)).await,
+            }
+        }
+    }
+
+    fn set_state(&self, state: InstanceState) {
+        // send_replace never fails even with zero receivers.
+        let _ = self.state_tx.send_replace(state);
+    }
+}
+
+#[async_trait::async_trait]
+impl Supervisor for ProcessSupervisor {
+    fn instance_id(&self) -> &str {
+        &self.instance_id
+    }
+
+    fn state(&self) -> InstanceState {
+        self.state_tx.borrow().clone()
+    }
+
+    fn watch_state(&self) -> watch::Receiver<InstanceState> {
+        self.state_tx.subscribe()
+    }
+
+    async fn uptime_seconds(&self) -> u64 {
+        let inner = self.inner.lock().await;
+        match (&*self.state_tx.borrow(), inner.started_at) {
+            (InstanceState::Running, Some(t)) => t.elapsed().as_secs(),
+            _ => 0,
+        }
+    }
+
+    async fn start(self: Arc<Self>) -> Result<()> {
+        let Some(exe) = self.executable.clone() else {
+            bail!("instance '{}' has no executable configured", self.instance_id);
+        };
+        if !exe.exists() {
+            bail!("executable not found: {}", exe.display());
+        }
+
+        let mut inner = self.inner.lock().await;
+        if matches!(*self.state_tx.borrow(), InstanceState::Running | InstanceState::Starting) {
+            bail!("instance '{}' is already running", self.instance_id);
+        }
+
+        self.set_state(InstanceState::Starting);
+
+        let workdir = self
+            .working_dir
+            .clone()
+            .or_else(|| exe.parent().map(|p| p.to_path_buf()))
+            .unwrap_or_else(|| PathBuf::from("."));
+
+        let child = Command::new(&exe)
+            .args(&self.args)
+            .current_dir(&workdir)
+            .stdin(Stdio::null())
+            .stdout(Stdio::inherit())
+            .stderr(Stdio::inherit())
+            .spawn()
+            .with_context(|| format!("spawning {}", exe.display()))?;
+
+        let pid = child.id();
+        inner.child = Some(child);
+        inner.started_at = Some(Instant::now());
+        inner.stop_requested = false;
+        drop(inner);
+
+        self.set_state(InstanceState::Running);
+        tracing::info!(
+            "instance '{}' started: {} (pid {:?})",
+            self.instance_id,
+            exe.display(),
+            pid
+        );
+
+        // Monitor: reap the child and classify the exit.
+        let sup = Arc::clone(&self);
+        tokio::spawn(async move { sup.monitor().await });
+        Ok(())
+    }
+
+    async fn stop(self: Arc<Self>) -> Result<()> {
+        let mut inner = self.inner.lock().await;
+        if inner.child.is_none() {
+            bail!("instance '{}' is not running", self.instance_id);
+        }
+        inner.stop_requested = true;
+        self.set_state(InstanceState::Stopping);
+        let child = inner.child.as_mut().expect("checked above");
+
+        // Graceful first: SIGTERM on unix; Windows has no SIGTERM equivalent
+        // for console processes, so it goes straight to kill there.
+        #[cfg(unix)]
+        if let Some(pid) = child.id() {
+            unsafe {
+                libc::kill(pid as i32, libc::SIGTERM);
+            }
+        }
+        #[cfg(not(unix))]
+        {
+            let _ = child.start_kill();
+        }
+        drop(inner);
+
+        // Wait for the monitor to observe the exit; force kill on budget.
+        let mut rx = self.watch_state();
+        let deadline = tokio::time::timeout(GRACEFUL_STOP_BUDGET, async {
+            loop {
+                if matches!(*rx.borrow(), InstanceState::Stopped) {
+                    return;
+                }
+                if rx.changed().await.is_err() {
+                    return;
+                }
+            }
+        })
+        .await;
+
+        if deadline.is_err() {
+            tracing::warn!(
+                "instance '{}' ignored SIGTERM for {}s — force killing",
+                self.instance_id,
+                GRACEFUL_STOP_BUDGET.as_secs()
+            );
+            let mut inner = self.inner.lock().await;
+            if let Some(child) = inner.child.as_mut() {
+                let _ = child.start_kill();
+            }
+            drop(inner);
+
+            let mut rx = self.watch_state();
+            let _ = tokio::time::timeout(Duration::from_secs(5), async {
+                while !matches!(*rx.borrow(), InstanceState::Stopped) {
+                    if rx.changed().await.is_err() {
+                        break;
+                    }
+                }
+            })
+            .await;
+        }
+        Ok(())
+    }
+
+    async fn restart(self: Arc<Self>) -> Result<()> {
+        if !matches!(
+            *self.state_tx.borrow(),
+            InstanceState::Stopped | InstanceState::Crashed { .. } | InstanceState::Unmanaged
+        ) {
+            self.clone().stop().await?;
+        }
+        tokio::time::sleep(RESTART_PAUSE).await;
+        self.start().await
+    }
+}
--- a/corrosion-host-agent/src/rcon.rs
+++ b/corrosion-host-agent/src/rcon.rs
@@ -0,0 +1,320 @@
+//! RCON client: game-server remote-console over WebRCON (Rust) or Source RCON (Conan/Soulmask).
+//!
+//! The agent runs co-located with the game server, so every connection targets
+//! 127.0.0.1 — no TLS is needed and latency is sub-millisecond.  Two protocols
+//! are supported because the Rust game ships its own WebSocket-based WebRCON
+//! while Conan Exiles and Soulmask use the Valve Source RCON wire format over
+//! plain TCP.
+//!
+//! The protocol selection is explicit in the config (`kind`) but can be inferred
+//! from the game name when absent — callers supply the `game` field they already
+//! have in `InstanceConfig`.
+
+use anyhow::{bail, Context, Result};
+use futures::{SinkExt, StreamExt};
+use rand::Rng;
+use serde::Deserialize;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::net::TcpStream;
+use tokio::time::{timeout, Duration};
+
+/// WebRCON is the Facepunch WebSocket protocol (Rust game).
+/// Source RCON is the Valve wire protocol used by Conan Exiles and Soulmask.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum RconKind {
+    WebRcon,
+    Source,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct RconConfig {
+    /// Protocol override.  When absent the kind is resolved from `game`.
+    #[serde(default)]
+    pub kind: Option<RconKind>,
+    pub port: u16,
+    pub password: String,
+}
+
+impl RconConfig {
+    /// Resolve the concrete protocol, falling back to a per-game default when
+    /// `kind` is not set.  rust → WebRcon; conan + soulmask → Source.
+    pub fn resolved_kind(&self, game: &str) -> RconKind {
+        if let Some(k) = self.kind {
+            return k;
+        }
+        match game {
+            "conan" | "soulmask" => RconKind::Source,
+            // rust is the primary game; anything unknown defaults to WebRcon
+            // — operators can always override with an explicit `kind`.
+            _ => RconKind::WebRcon,
+        }
+    }
+}
+
+const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
+const RESPONSE_TIMEOUT: Duration = Duration::from_secs(10);
+
+/// Send `command` to the game server and return its text response.
+///
+/// The agent runs on the same host as the game server, so the target address
+/// is always 127.0.0.1:{port}.  Connection and response deadlines are fixed at
+/// 5 s and 10 s respectively — enough headroom for a loaded server while still
+/// catching hung connections quickly.
+pub async fn send_command(cfg: &RconConfig, game: &str, command: &str) -> Result<String> {
+    match cfg.resolved_kind(game) {
+        RconKind::WebRcon => webrcon_exec(cfg, command).await,
+        RconKind::Source => source_rcon_exec(cfg, command).await,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// WebRCON (Rust game) — WebSocket JSON protocol
+// ---------------------------------------------------------------------------
+
+/// WebRCON request/response envelope.  The server also emits chat/log frames
+/// on this socket with Identifier == 0; those are skipped.
+#[derive(serde::Serialize)]
+struct WebRconRequest<'a> {
+    #[serde(rename = "Identifier")]
+    identifier: i32,
+    #[serde(rename = "Message")]
+    message: &'a str,
+    #[serde(rename = "Name")]
+    name: &'static str,
+}
+
+#[derive(serde::Deserialize)]
+struct WebRconResponse {
+    #[serde(rename = "Identifier")]
+    identifier: i32,
+    #[serde(rename = "Message")]
+    message: String,
+}
+
+async fn webrcon_exec(cfg: &RconConfig, command: &str) -> Result<String> {
+    use tokio_tungstenite::connect_async;
+    use tokio_tungstenite::tungstenite::Message as WsMsg;
+
+    // The Rust game server embeds the password in the WebSocket URL path —
+    // never interpolate the real URL into errors or logs.
+    let url = format!("ws://127.0.0.1:{}/{}", cfg.port, cfg.password);
+    let redacted = format!("ws://127.0.0.1:{}/<redacted>", cfg.port);
+
+    // Wrap the entire connection + exchange in the connect timeout — we want
+    // the timeout to cover TCP handshake + WS upgrade, not just the send.
+    let (mut ws, _) = timeout(CONNECT_TIMEOUT, connect_async(&url))
+        .await
+        .context("connect timeout")?
+        .with_context(|| format!("WebRCON connect to {redacted}"))?;
+
+    // Use a random positive i32 so correlation is unambiguous even when
+    // multiple callers share a port (future concurrency).
+    let id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
+    let req = WebRconRequest { identifier: id, message: command, name: "Corrosion" };
+    let payload = serde_json::to_string(&req).context("serialize WebRCON request")?;
+
+    ws.send(WsMsg::Text(payload))
+        .await
+        .context("send WebRCON command")?;
+
+    tracing::debug!("WebRCON sent id={id} command={command:?}");
+
+    // Read frames until we see our Identifier — skip chat/log noise (id 0 or
+    // any other value that isn't ours).
+    let result = timeout(RESPONSE_TIMEOUT, async {
+        loop {
+            match ws.next().await {
+                Some(Ok(WsMsg::Text(text))) => {
+                    match serde_json::from_str::<WebRconResponse>(&text) {
+                        Ok(resp) if resp.identifier == id => return Ok(resp.message),
+                        Ok(_) => {
+                            // Not our response (chat, log, another caller's frame).
+                            tracing::trace!("WebRCON skipping frame with different Identifier");
+                            continue;
+                        }
+                        Err(e) => {
+                            tracing::trace!("WebRCON non-JSON frame ignored: {e}");
+                            continue;
+                        }
+                    }
+                }
+                Some(Ok(WsMsg::Close(_))) => bail!("WebRCON server closed connection"),
+                Some(Ok(_)) => continue, // binary/ping/pong — skip
+                Some(Err(e)) => return Err(anyhow::anyhow!(e).context("WebRCON read error")),
+                None => bail!("WebRCON stream ended without response"),
+            }
+        }
+    })
+    .await
+    .context("WebRCON response timeout")??;
+
+    // Close cleanly; a send error here is cosmetic — we already have our data.
+    let _ = ws.close(None).await;
+
+    Ok(result)
+}
+
+// ---------------------------------------------------------------------------
+// Source RCON (Conan Exiles, Soulmask) — Valve TCP binary protocol
+//
+// Packet layout (all fields little-endian):
+//   i32  size     — byte count of the remaining packet (id + type + body + 2 nulls)
+//   i32  id       — caller-chosen correlation id; auth failure returns -1
+//   i32  type     — 0=RESPONSE_VALUE, 2=EXECCOMMAND/AUTH_RESPONSE, 3=AUTH
+//   [u8] body     — UTF-8 command or response text
+//   u8   0x00     — body null terminator
+//   u8   0x00     — padding null terminator
+//
+// Multi-packet handling: after sending the command we also send an empty
+// RESPONSE_VALUE probe with a distinct id.  We collect all RESPONSE_VALUE
+// packets belonging to the command id and stop when we receive the probe's
+// response.  This is the standard technique specified in the Valve wiki.
+// ---------------------------------------------------------------------------
+
+const RCON_TYPE_AUTH: i32 = 3;
+const RCON_TYPE_AUTH_RESPONSE: i32 = 2;
+const RCON_TYPE_EXECCOMMAND: i32 = 2;
+const RCON_TYPE_RESPONSE_VALUE: i32 = 0;
+
+/// Maximum accumulated response body (guards against misbehaving servers).
+const MAX_RESPONSE_BYTES: usize = 1024 * 1024; // 1 MiB
+
+async fn source_rcon_exec(cfg: &RconConfig, command: &str) -> Result<String> {
+    let addr = format!("127.0.0.1:{}", cfg.port);
+
+    let stream = timeout(CONNECT_TIMEOUT, TcpStream::connect(&addr))
+        .await
+        .context("connect timeout")?
+        .with_context(|| format!("Source RCON connect to {addr}"))?;
+
+    let mut stream = stream;
+
+    // --- Auth ---
+    let auth_id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
+    send_packet(&mut stream, auth_id, RCON_TYPE_AUTH, cfg.password.as_bytes()).await?;
+
+    // The server sends two responses to AUTH: first an empty RESPONSE_VALUE,
+    // then an AUTH_RESPONSE.  We skip the first and read until AUTH_RESPONSE.
+    timeout(RESPONSE_TIMEOUT, async {
+        loop {
+            let (id, ptype, _body) = recv_packet(&mut stream).await?;
+            if ptype == RCON_TYPE_AUTH_RESPONSE {
+                if id == -1 {
+                    bail!("Source RCON auth failed: wrong password");
+                }
+                tracing::debug!("Source RCON authenticated (id={id})");
+                return Ok(());
+            }
+            // Skip the empty RESPONSE_VALUE that precedes AUTH_RESPONSE.
+        }
+        #[allow(unreachable_code)]
+        Ok::<(), anyhow::Error>(())
+    })
+    .await
+    .context("Source RCON auth timeout")??;
+
+    // --- Command ---
+    let cmd_id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
+    // Probe id must differ from cmd_id.
+    let probe_id: i32 = loop {
+        let id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
+        if id != cmd_id {
+            break id;
+        }
+    };
+
+    send_packet(&mut stream, cmd_id, RCON_TYPE_EXECCOMMAND, command.as_bytes()).await?;
+    // Empty RESPONSE_VALUE probe — the server echoes it after processing the
+    // preceding command, signalling end-of-response.
+    send_packet(&mut stream, probe_id, RCON_TYPE_RESPONSE_VALUE, b"").await?;
+
+    // Not every server is probe-conformant (Soulmask unverified): once we hold
+    // response data, a short per-read quiet period also terminates — never
+    // discard a response we already received just because the probe echo
+    // didn't come back.
+    const QUIET_PERIOD: Duration = Duration::from_millis(1500);
+    let response = timeout(RESPONSE_TIMEOUT, async {
+        let mut body_accum: Vec<u8> = Vec::new();
+        loop {
+            let next = if body_accum.is_empty() {
+                recv_packet(&mut stream).await.map(Some)
+            } else {
+                match timeout(QUIET_PERIOD, recv_packet(&mut stream)).await {
+                    Ok(res) => res.map(Some),
+                    Err(_elapsed) => Ok(None), // quiet after data — done
+                }
+            };
+            let Some((id, ptype, body)) = next? else {
+                break;
+            };
+            if ptype != RCON_TYPE_RESPONSE_VALUE {
+                continue; // unexpected packet type — skip
+            }
+            if id == probe_id {
+                // Probe echoed back — all command response packets have arrived.
+                break;
+            }
+            if id == cmd_id {
+                if body_accum.len() + body.len() > MAX_RESPONSE_BYTES {
+                    bail!("Source RCON response exceeded {MAX_RESPONSE_BYTES} bytes");
+                }
+                body_accum.extend_from_slice(&body);
+            }
+            // Skip packets with other ids (shouldn't happen but be defensive).
+        }
+        Ok::<Vec<u8>, anyhow::Error>(body_accum)
+    })
+    .await
+    .context("Source RCON response timeout")??;
+
+    String::from_utf8(response).context("Source RCON response is not valid UTF-8")
+}
+
+/// Write a Source RCON packet to the stream.
+async fn send_packet(stream: &mut TcpStream, id: i32, ptype: i32, body: &[u8]) -> Result<()> {
+    // size = id(4) + type(4) + body(n) + 2 null terminators
+    let size = (4 + 4 + body.len() + 2) as i32;
+    let mut buf: Vec<u8> = Vec::with_capacity(4 + size as usize);
+    buf.extend_from_slice(&size.to_le_bytes());
+    buf.extend_from_slice(&id.to_le_bytes());
+    buf.extend_from_slice(&ptype.to_le_bytes());
+    buf.extend_from_slice(body);
+    buf.push(0x00);
+    buf.push(0x00);
+    stream.write_all(&buf).await.context("Source RCON write")?;
+    Ok(())
+}
+
+/// Read one Source RCON packet; returns (id, type, body).
+async fn recv_packet(stream: &mut TcpStream) -> Result<(i32, i32, Vec<u8>)> {
+    let mut size_buf = [0u8; 4];
+    stream
+        .read_exact(&mut size_buf)
+        .await
+        .context("Source RCON read size")?;
+    let size = i32::from_le_bytes(size_buf) as usize;
+
+    // Minimum packet: id(4) + type(4) + 2 null terminators = 10 bytes.
+    if size < 10 {
+        bail!("Source RCON: malformed packet (size={size})");
+    }
+    if size > MAX_RESPONSE_BYTES + 16 {
+        bail!("Source RCON: packet too large ({size} bytes)");
+    }
+
+    let mut payload = vec![0u8; size];
+    stream
+        .read_exact(&mut payload)
+        .await
+        .context("Source RCON read payload")?;
+
+    let id = i32::from_le_bytes(payload[0..4].try_into().unwrap());
+    let ptype = i32::from_le_bytes(payload[4..8].try_into().unwrap());
+    // Body is everything between the two fields and the two trailing nulls.
+    let body_end = size.saturating_sub(2); // strip 2 null terminators
+    let body = payload[8..body_end].to_vec();
+
+    Ok((id, ptype, body))
+}
--- a/corrosion-host-agent/src/service.rs
+++ b/corrosion-host-agent/src/service.rs
@@ -0,0 +1,129 @@
+//! systemd service installation for the host agent (Linux).
+//!
+//! `corrosion-host-agent install` writes a systemd unit pointing at the current
+//! binary + config, reloads systemd, and enables + starts the service.
+//! `uninstall` reverses it. Windows SCM support is a follow-up; on non-Linux
+//! these return a clear "Linux only" error rather than silently doing nothing.
+//!
+//! The agent already handles SIGTERM (see main::wait_for_shutdown_signal), so a
+//! plain `Type=simple` unit gives systemd clean start/stop semantics.
+
+use anyhow::{bail, Result};
+use std::path::Path;
+
+#[cfg(target_os = "linux")]
+use anyhow::Context;
+
+pub const SERVICE_NAME: &str = "corrosion-host-agent";
+
+#[cfg(target_os = "linux")]
+const UNIT_PATH: &str = "/etc/systemd/system/corrosion-host-agent.service";
+
+/// Render the systemd unit. Pure (no I/O) so it is unit-testable.
+pub fn unit_file_contents(exec_path: &str, config_path: &str) -> String {
+    format!(
+        "[Unit]\n\
+         Description=Corrosion Host Agent (multi-game ops runtime)\n\
+         Documentation=https://corrosionmgmt.com\n\
+         After=network-online.target\n\
+         Wants=network-online.target\n\
+         \n\
+         [Service]\n\
+         Type=simple\n\
+         ExecStart={exec} --config {cfg}\n\
+         Restart=on-failure\n\
+         RestartSec=5\n\
+         # The agent supervises game-server processes and their files, so it\n\
+         # needs broad filesystem access and runs as root by default.\n\
+         User=root\n\
+         \n\
+         [Install]\n\
+         WantedBy=multi-user.target\n",
+        exec = exec_path,
+        cfg = config_path,
+    )
+}
+
+#[cfg(target_os = "linux")]
+pub fn install(config_path: &Path) -> Result<()> {
+    let exec = std::env::current_exe().context("resolving current executable path")?;
+    let exec_str = exec.to_string_lossy();
+    let cfg_str = config_path.to_string_lossy();
+
+    let unit = unit_file_contents(&exec_str, &cfg_str);
+    std::fs::write(UNIT_PATH, unit)
+        .with_context(|| format!("writing {UNIT_PATH} (are you root?)"))?;
+    println!("wrote {UNIT_PATH}");
+
+    run("systemctl", &["daemon-reload"])?;
+    run("systemctl", &["enable", "--now", SERVICE_NAME])?;
+
+    println!(
+        "service '{SERVICE_NAME}' installed and started.\n  \
+         status: systemctl status {SERVICE_NAME}\n  \
+         logs:   journalctl -u {SERVICE_NAME} -f"
+    );
+    Ok(())
+}
+
+#[cfg(target_os = "linux")]
+pub fn uninstall() -> Result<()> {
+    // Best-effort stop+disable; don't fail if it isn't currently active.
+    let _ = std::process::Command::new("systemctl")
+        .args(["disable", "--now", SERVICE_NAME])
+        .status();
+
+    if Path::new(UNIT_PATH).exists() {
+        std::fs::remove_file(UNIT_PATH)
+            .with_context(|| format!("removing {UNIT_PATH} (are you root?)"))?;
+        println!("removed {UNIT_PATH}");
+    }
+    run("systemctl", &["daemon-reload"])?;
+    println!("service '{SERVICE_NAME}' uninstalled.");
+    Ok(())
+}
+
+#[cfg(target_os = "linux")]
+fn run(cmd: &str, args: &[&str]) -> Result<()> {
+    let status = std::process::Command::new(cmd)
+        .args(args)
+        .status()
+        .with_context(|| format!("running {cmd} {}", args.join(" ")))?;
+    if !status.success() {
+        bail!("{cmd} {} failed with {status}", args.join(" "));
+    }
+    Ok(())
+}
+
+#[cfg(not(target_os = "linux"))]
+pub fn install(_config_path: &Path) -> Result<()> {
+    bail!(
+        "`install` is only supported on Linux (systemd). Windows SCM support is \
+         coming; for now run the agent directly or via your platform's service manager."
+    );
+}
+
+#[cfg(not(target_os = "linux"))]
+pub fn uninstall() -> Result<()> {
+    bail!("`uninstall` is only supported on Linux (systemd).");
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn unit_contains_exec_config_and_install_target() {
+        let u = unit_file_contents(
+            "/usr/local/bin/corrosion-host-agent",
+            "/etc/corrosion/agent.toml",
+        );
+        assert!(u.contains(
+            "ExecStart=/usr/local/bin/corrosion-host-agent --config /etc/corrosion/agent.toml"
+        ));
+        assert!(u.contains("Type=simple"));
+        assert!(u.contains("Restart=on-failure"));
+        assert!(u.contains("WantedBy=multi-user.target"));
+        assert!(u.contains("After=network-online.target"));
+    }
+}
--- a/corrosion-host-agent/src/steamcmd.rs
+++ b/corrosion-host-agent/src/steamcmd.rs
@@ -0,0 +1,126 @@
+//! SteamCMD update integration for process-managed game instances.
+//!
+//! Wraps the `steamcmd` binary to perform an `+app_update` for a given game
+//! instance, streaming stdout lines to a caller-supplied progress callback so
+//! the panel can display live update output.  The agent already runs a task per
+//! command in a separate `tokio::spawn`, so the blocking-until-done semantics
+//! here are intentional — the NATS reply is sent only when SteamCMD exits.
+//!
+//! Dune is Docker-image-based and explicitly has no SteamCMD integration — any
+//! attempt to invoke `update` on a Dune instance returns a clear error rather
+//! than a silent no-op.
+
+use std::path::Path;
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::Command;
+
+/// Return the Steam app ID for a given game name, or `None` for Dune (Docker).
+///
+/// Soulmask returns the Windows or Linux server app ID depending on the compile
+/// target so this function is `#[cfg]`-gated at the platform level.
+pub fn app_id_for_game(game: &str) -> Option<u32> {
+    match game {
+        "rust" => Some(258550),
+        "conan" => Some(443030),
+        "soulmask" => {
+            #[cfg(windows)]
+            {
+                Some(3017310)
+            }
+            #[cfg(not(windows))]
+            {
+                Some(3017300)
+            }
+        }
+        // Dune uses Docker images — SteamCMD has no role here.
+        "dune" => None,
+        _ => None,
+    }
+}
+
+/// Configuration controlling SteamCMD behaviour for one instance.
+/// Serialised as `[instance.steamcmd]` in agent.toml.
+#[derive(Debug, Clone, serde::Deserialize, Default)]
+pub struct SteamcmdConfig {
+    /// Absolute or relative path to the `steamcmd` binary.
+    /// Defaults to `"steamcmd"` (resolved via `PATH`) when absent.
+    #[serde(default)]
+    pub steamcmd_path: Option<std::path::PathBuf>,
+
+    /// Whether to pass `validate` to `+app_update`.  Adds a file-hash check
+    /// pass that catches corruption at the cost of a longer update time.
+    #[serde(default)]
+    pub validate: bool,
+}
+
+/// Run a SteamCMD update for `game` into `install_dir`.
+///
+/// - `steamcmd_path`: path to the binary (or `"steamcmd"` to use PATH).
+/// - `validate`: appends `validate` to the `+app_update` call.
+/// - `on_progress`: receives each stdout line as it arrives so callers can
+///   forward progress to the panel in real time.
+///
+/// Returns `Ok(())` on a zero exit code, otherwise an error describing the
+/// failure.  Dune is rejected before any process is spawned.
+pub async fn update(
+    game: &str,
+    install_dir: &Path,
+    steamcmd_path: &str,
+    validate: bool,
+    on_progress: impl Fn(&str),
+) -> anyhow::Result<()> {
+    use anyhow::Context;
+
+    let app_id = app_id_for_game(game).ok_or_else(|| {
+        anyhow::anyhow!(
+            "dune uses Docker images, not SteamCMD — cannot run app_update for game '{game}'"
+        )
+    })?;
+
+    let install_dir_str = install_dir
+        .to_str()
+        .with_context(|| format!("install_dir '{}' is not valid UTF-8", install_dir.display()))?;
+
+    let mut args: Vec<String> = vec![
+        "+force_install_dir".to_string(),
+        install_dir_str.to_string(),
+        "+login".to_string(),
+        "anonymous".to_string(),
+        "+app_update".to_string(),
+        app_id.to_string(),
+    ];
+    if validate {
+        args.push("validate".to_string());
+    }
+    args.push("+quit".to_string());
+
+    tracing::info!(
+        "steamcmd: starting update for game={game} app_id={app_id} install_dir={} validate={validate}",
+        install_dir.display()
+    );
+
+    let mut child = Command::new(steamcmd_path)
+        .args(&args)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::null())
+        .spawn()
+        .with_context(|| format!("spawning steamcmd binary '{steamcmd_path}'"))?;
+
+    let stdout = child.stdout.take().expect("stdout was piped");
+    let mut lines = BufReader::new(stdout).lines();
+
+    while let Some(line) = lines.next_line().await.context("reading steamcmd stdout")? {
+        tracing::debug!("steamcmd: {line}");
+        on_progress(&line);
+    }
+
+    let status = child.wait().await.context("waiting for steamcmd to exit")?;
+    if status.success() {
+        tracing::info!("steamcmd: update completed successfully for game={game}");
+        Ok(())
+    } else {
+        let code = status.code().unwrap_or(-1);
+        anyhow::bail!("steamcmd exited with non-zero status {code} for game={game}")
+    }
+}
+
--- a/corrosion-host-agent/src/subjects.rs
+++ b/corrosion-host-agent/src/subjects.rs
@@ -17,14 +17,23 @@ pub fn host_going_offline(license: &str) -> String {
    format!("corrosion.{license}.host.going_offline")
 }

-/// Phase 1: per-instance command channel (start/stop/restart/rcon/...).
-#[allow(dead_code)]
+/// Per-instance command channel (start/stop/restart/status; rcon et al. to come).
 pub fn instance_cmd(license: &str, instance: &str) -> String {
    format!("corrosion.{license}.{instance}.cmd")
 }

-/// Phase 1: per-instance state-change events.
-#[allow(dead_code)]
+/// Per-instance state-change events.
 pub fn instance_status(license: &str, instance: &str) -> String {
    format!("corrosion.{license}.{instance}.status")
 }
+
+/// Per-instance SteamCMD progress stream.  Lines from `steamcmd` stdout are
+/// published here so the panel can display live update output.
+pub fn instance_steam_status(license: &str, instance: &str) -> String {
+    format!("corrosion.{license}.{instance}.steam_status")
+}
+
+/// Per-instance file manager command channel (request-reply).
+pub fn instance_files_cmd(license: &str, instance: &str) -> String {
+    format!("corrosion.{license}.{instance}.files.cmd")
+}
--- a/corrosion-host-agent/src/supervisor.rs
+++ b/corrosion-host-agent/src/supervisor.rs
@@ -0,0 +1,80 @@
+//! The supervision abstraction.
+//!
+//! A `Supervisor` owns the lifecycle of one game instance. Different games are
+//! managed in fundamentally different ways — Rust/Conan/Soulmask are spawned OS
+//! processes ([`crate::process::ProcessSupervisor`]); Dune is a docker-compose
+//! stack ([`crate::docker_compose::DockerComposeSupervisor`]); future planes
+//! (kubectl, AMP/podman, SSH) will be their own impls. The instance command
+//! dispatch (`instancecmd::dispatch`) talks only to this trait, so it never
+//! learns which management model is behind a given instance.
+//!
+//! Trait objects (`Arc<dyn Supervisor>`) need object-safe, dynamically
+//! dispatchable async methods; native `async fn` in traits is not yet
+//! dyn-compatible, so we use `#[async_trait]` (the battle-tested ecosystem
+//! standard) to box the returned futures. The cost — one heap alloc per
+//! lifecycle call — is irrelevant for start/stop/restart, which happen seconds
+//! to minutes apart.
+
+use std::sync::Arc;
+
+use anyhow::Result;
+use serde::Serialize;
+use tokio::sync::watch;
+
+/// Observable lifecycle state of one instance. Shared vocabulary across every
+/// supervisor impl; serialized verbatim into heartbeats and status events
+/// (`{"state":"running", ...}`).
+#[derive(Debug, Clone, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case", tag = "state")]
+pub enum InstanceState {
+    /// Not lifecycle-managed (a process instance with no executable, etc.).
+    Unmanaged,
+    Stopped,
+    Starting,
+    Running,
+    Stopping,
+    /// Exited/died without a stop request.
+    Crashed {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        exit_code: Option<i32>,
+    },
+}
+
+impl InstanceState {
+    pub fn as_label(&self) -> &'static str {
+        match self {
+            InstanceState::Unmanaged => "unmanaged",
+            InstanceState::Stopped => "stopped",
+            InstanceState::Starting => "starting",
+            InstanceState::Running => "running",
+            InstanceState::Stopping => "stopping",
+            InstanceState::Crashed { .. } => "crashed",
+        }
+    }
+}
+
+/// Lifecycle control + state observation for one instance.
+///
+/// `start`/`stop`/`restart` take `self: Arc<Self>` so an impl can hand a clone
+/// to a spawned monitor task; callers hold an `Arc<dyn Supervisor>` and
+/// `clone()` before each call. `watch_state` exposes the same channel the
+/// status-event publisher drains, so panel push events stay decoupled from the
+/// heartbeat cadence.
+#[async_trait::async_trait]
+pub trait Supervisor: Send + Sync {
+    /// The instance slug (a NATS subject segment).
+    fn instance_id(&self) -> &str;
+
+    /// Current cached state (cheap; no I/O).
+    fn state(&self) -> InstanceState;
+
+    /// Subscribe to state transitions.
+    fn watch_state(&self) -> watch::Receiver<InstanceState>;
+
+    /// Seconds since the instance entered `Running` (0 otherwise).
+    async fn uptime_seconds(&self) -> u64;
+
+    async fn start(self: Arc<Self>) -> Result<()>;
+    async fn stop(self: Arc<Self>) -> Result<()>;
+    async fn restart(self: Arc<Self>) -> Result<()>;
+}
--- a/corrosion-host-agent/src/telemetry.rs
+++ b/corrosion-host-agent/src/telemetry.rs
@@ -65,9 +65,10 @@ pub struct InstanceInfo {
    pub game: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub label: Option<String>,
-    /// Phase 0 states: `configured` (root exists) or `missing_root`.
-    /// Phase 1 adds live process states (running/stopped/crashed).
+    /// Process-managed: running/stopped/starting/stopping/crashed.
+    /// Unmanaged (no executable configured): configured/missing_root.
    pub state: String,
+    pub uptime_seconds: u64,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub root_disk_free_mb: Option<u64>,
 }
@@ -125,21 +126,30 @@ pub async fn collect(agent: &Agent, sys: &mut System) -> HeartbeatPayload {
        })
        .collect();

-    let instances = agent
-        .cfg
-        .instances
-        .iter()
-        .map(|inst| {
-            let exists = inst.root.exists();
-            InstanceInfo {
-                id: inst.id.clone(),
-                game: inst.game.clone(),
-                label: inst.label.clone(),
-                state: if exists { "configured" } else { "missing_root" }.to_string(),
-                root_disk_free_mb: disk_free_for_path(&disks, &inst.root),
+    let mut instances = Vec::with_capacity(agent.cfg.instances.len());
+    for inst in &agent.cfg.instances {
+        let (state, uptime_seconds) = match agent.supervisors.get(&inst.id) {
+            Some(sup) if !matches!(sup.state(), crate::supervisor::InstanceState::Unmanaged) => {
+                (sup.state().as_label().to_string(), sup.uptime_seconds().await)
            }
-        })
-        .collect();
+            _ => {
+                let exists = inst.root.exists();
+                (
+                    if exists { "configured" } else { "missing_root" }.to_string(),
+                    0,
+                )
+            }
+        };
+        instances.push(InstanceInfo {
+            id: inst.id.clone(),
+            game: inst.game.clone(),
+            label: inst.label.clone(),
+            state,
+            uptime_seconds,
+            root_disk_free_mb: disk_free_for_path(&disks, &inst.root),
+        });
+    }
+    let instances = instances;

    HeartbeatPayload {
        schema: 2,
--- a/corrosion-host-agent/src/update.rs
+++ b/corrosion-host-agent/src/update.rs
@@ -0,0 +1,154 @@
+//! Signed self-update.
+//!
+//! The agent only ever runs a binary whose minisign signature verifies against
+//! the EMBEDDED public key below. Even if the CDN (which currently accepts
+//! unauthenticated uploads) served a malicious binary, the agent refuses it
+//! without a valid signature from the release private key (a CI secret).
+//!
+//! Flow: download binary + `.minisig` from the CDN → verify signature →
+//! atomic swap (current → `.old`, new → current, rollback on failure) →
+//! relaunch the new binary. Defence in depth mirrors the Vigilance updater:
+//! a real URL parse rejecting credential-in-URL bypasses, an https + host
+//! allowlist, and a size cap.
+
+use anyhow::{bail, Context, Result};
+use minisign_verify::{PublicKey, Signature};
+use std::path::{Path, PathBuf};
+use std::time::Duration;
+
+/// minisign public key. The matching private key signs releases in CI
+/// (Gitea Actions secret MINISIGN_SECRET_KEY). Rotating it means re-signing
+/// every published artifact and shipping an agent build with the new key.
+const PUBLIC_KEY: &str = "RWQKhJptuiwIkp31cZdz10z/R72UPZkl7/VtnZJ2Vfbe0dQfDlXHZYFC";
+
+const ALLOWED_HOST: &str = "cdn.corrosionmgmt.com";
+const MAX_BINARY_BYTES: usize = 100 * 1024 * 1024; // 100 MiB sanity cap
+const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(600);
+
+/// Verify a binary against the embedded public key + a minisign signature blob.
+/// The security core of self-update — tampered or unsigned content is rejected.
+pub fn verify_signature(binary: &[u8], signature_blob: &str) -> Result<()> {
+    let pk = PublicKey::from_base64(PUBLIC_KEY).context("embedded public key is invalid")?;
+    let sig = Signature::decode(signature_blob).context("malformed minisign signature")?;
+    pk.verify(binary, &sig, false)
+        .map_err(|e| anyhow::anyhow!("signature verification failed: {e}"))?;
+    Ok(())
+}
+
+/// Reject anything but `https://cdn.corrosionmgmt.com/...` with no embedded
+/// credentials (the userinfo-bypass class).
+pub fn assert_url_allowed(url: &str) -> Result<()> {
+    let parsed = reqwest::Url::parse(url).context("invalid update URL")?;
+    if parsed.scheme() != "https" {
+        bail!("update URL must be https");
+    }
+    if !parsed.username().is_empty() || parsed.password().is_some() {
+        bail!("update URL must not contain credentials");
+    }
+    if parsed.host_str() != Some(ALLOWED_HOST) {
+        bail!("update URL host not allowed: {:?}", parsed.host_str());
+    }
+    Ok(())
+}
+
+/// Download, verify, and atomically swap in a new agent binary. Does NOT
+/// restart — the caller decides when to relaunch (after replying on NATS).
+/// Returns the path of the now-current (new) binary.
+pub async fn download_verify_swap(url: &str) -> Result<PathBuf> {
+    assert_url_allowed(url)?;
+    let sig_url = format!("{url}.minisig");
+    assert_url_allowed(&sig_url)?;
+
+    let client = reqwest::Client::builder()
+        .timeout(DOWNLOAD_TIMEOUT)
+        .build()
+        .context("building HTTP client")?;
+
+    let binary = client
+        .get(url)
+        .send()
+        .await
+        .with_context(|| format!("downloading {url}"))?
+        .error_for_status()
+        .context("update binary download failed")?
+        .bytes()
+        .await
+        .context("reading update binary")?;
+
+    if binary.len() > MAX_BINARY_BYTES {
+        bail!("update binary is {} bytes, exceeds the {MAX_BINARY_BYTES} cap", binary.len());
+    }
+
+    let signature = client
+        .get(&sig_url)
+        .send()
+        .await
+        .with_context(|| format!("downloading {sig_url}"))?
+        .error_for_status()
+        .context("signature download failed")?
+        .text()
+        .await
+        .context("reading signature")?;
+
+    verify_signature(&binary, &signature).context("refusing unsigned/tampered update")?;
+    tracing::info!("update signature verified ({} bytes)", binary.len());
+
+    let current = std::env::current_exe().context("resolving current executable")?;
+    swap_binary(&current, &binary)?;
+    tracing::info!("update swapped in at {}", current.display());
+    Ok(current)
+}
+
+/// Atomically replace `current` with `new_bytes`, keeping a `.old` backup and
+/// rolling back if the rename fails.
+pub fn swap_binary(current: &Path, new_bytes: &[u8]) -> Result<()> {
+    let dir = current.parent().unwrap_or_else(|| Path::new("."));
+    let stem = current.file_name().and_then(|s| s.to_str()).unwrap_or("corrosion-host-agent");
+    let new_path = dir.join(format!("{stem}.new"));
+    let backup = dir.join(format!("{stem}.old"));
+
+    std::fs::write(&new_path, new_bytes)
+        .with_context(|| format!("writing {}", new_path.display()))?;
+
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt;
+        std::fs::set_permissions(&new_path, std::fs::Permissions::from_mode(0o755))
+            .context("chmod +x on new binary")?;
+    }
+
+    let _ = std::fs::remove_file(&backup);
+    std::fs::rename(current, &backup)
+        .with_context(|| format!("backing up current binary to {}", backup.display()))?;
+
+    if let Err(e) = std::fs::rename(&new_path, current) {
+        // Roll back: restore the backup so the agent stays runnable.
+        let _ = std::fs::rename(&backup, current);
+        return Err(anyhow::anyhow!(e).context("installing new binary (rolled back)"));
+    }
+    Ok(())
+}
+
+/// Relaunch the (already-swapped) binary with the same args, then exit. No
+/// service manager is required — the new process reconnects on its own. There
+/// is a sub-second window with no agent; acceptable for an update.
+pub fn relaunch_and_exit() -> ! {
+    let exe = std::env::current_exe().unwrap_or_else(|_| PathBuf::from("corrosion-host-agent"));
+    let args: Vec<String> = std::env::args().skip(1).collect();
+    tracing::info!("relaunching {} after update", exe.display());
+
+    #[cfg(unix)]
+    {
+        use std::os::unix::process::CommandExt;
+        // exec replaces this process image with the new binary — cleanest,
+        // no gap. Only returns on failure.
+        let err = std::process::Command::new(&exe).args(&args).exec();
+        tracing::error!("exec after update failed: {err}; exiting for service restart");
+        std::process::exit(70);
+    }
+    #[cfg(not(unix))]
+    {
+        let _ = std::process::Command::new(&exe).args(&args).spawn();
+        std::process::exit(0);
+    }
+}
--- a/corrosion-host-agent/src/wipe.rs
+++ b/corrosion-host-agent/src/wipe.rs
@@ -0,0 +1,412 @@
+//! Jailed wipe engine for Rust (and compatible) game server instances.
+//!
+//! Three wipe types are supported, each a strict superset of the previous:
+//!
+//! | Type        | What is deleted                                                  |
+//! |-------------|------------------------------------------------------------------|
+//! | `map`       | `*.map`, `*.sav` under `<root>/server/<identity>/`               |
+//! | `blueprint` | map wipe + `*.blueprints.*.db` / `.blueprints.*` under save dir  |
+//! | `full`      | blueprint wipe + `oxide/data/` contents + player state DB files  |
+//!
+//! Identity discovery: rather than require the identity in the payload, we walk
+//! `<root>/server/*/` looking for files that match each wipe type's patterns.
+//! This handles any identity name without configuration churn.
+//!
+//! **Safety**: every path operated on is validated inside the canonicalized
+//! instance root with the same two-stage (lexical + canonicalize) jail used by
+//! `filemanager.rs`.  We use `symlink_metadata` (lstat) everywhere we walk
+//! directories — symlinks are never followed across the boundary (Lesson 26).
+
+use anyhow::{Context, Result};
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use crate::filemanager::jail;
+
+// ---------------------------------------------------------------------------
+// Public API types
+// ---------------------------------------------------------------------------
+
+/// The scope of data to erase.
+#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum WipeType {
+    /// Delete procedural map + save files only.
+    Map,
+    /// Map wipe + player blueprint databases.
+    Blueprint,
+    /// Blueprint wipe + oxide/data + all player state DBs.
+    Full,
+}
+
+/// Parameters parsed from the NATS command payload.
+#[derive(Debug, serde::Deserialize)]
+pub struct WipeRequest {
+    /// Scope of the wipe.
+    pub wipe_type: WipeType,
+    /// Copy files to `.corrosion-backups/<backup_label>/` before deleting.
+    #[serde(default)]
+    pub backup: bool,
+    /// Label used as the backup subdirectory name.  Defaults to `"wipe-backup"`.
+    #[serde(default = "default_backup_label")]
+    pub backup_label: String,
+}
+
+fn default_backup_label() -> String {
+    "wipe-backup".to_string()
+}
+
+/// Result of a successful wipe operation.
+#[derive(Debug)]
+pub struct WipeResult {
+    pub deleted_count: usize,
+    pub wipe_type: WipeType,
+}
+
+// ---------------------------------------------------------------------------
+// Core wipe logic  (sync — suitable for `spawn_blocking`)
+// ---------------------------------------------------------------------------
+
+/// Execute a wipe of `wipe_type` inside `root`, optionally backing up first.
+///
+/// Does NOT touch the supervisor lifecycle — the caller (instancecmd dispatch)
+/// must stop the server before calling this and restart it afterwards.
+///
+/// Returns a `WipeResult` describing what was deleted.  Missing directories are
+/// treated as zero-deleted, not as errors, so a fresh server never returns Err
+/// just because `server/*/` doesn't exist yet.
+pub fn execute(root: &Path, req: &WipeRequest) -> Result<WipeResult> {
+    // Canonicalize root once; every subsequent path check goes through `jail()`.
+    let canon_root = fs::canonicalize(root)
+        .with_context(|| format!("canonicalize instance root '{}'", root.display()))?;
+
+    // Collect every path to delete based on wipe type.
+    let targets = collect_targets(&canon_root, &req.wipe_type)?;
+
+    // Backup before any deletion when requested.
+    if req.backup && !targets.is_empty() {
+        let backup_dir = jail(root, &format!(".corrosion-backups/{}", req.backup_label))?;
+        fs::create_dir_all(&backup_dir)
+            .with_context(|| format!("create backup dir '{}'", backup_dir.display()))?;
+        for path in &targets {
+            backup_one(&canon_root, path, &backup_dir)?;
+        }
+    }
+
+    // Delete.
+    let mut deleted_count = 0usize;
+    for path in &targets {
+        // Final safety check: confirm inside root before deletion.
+        if path != &canon_root && !path.starts_with(&canon_root) {
+            anyhow::bail!(
+                "wipe safety: path '{}' is outside instance root '{}' — aborting",
+                path.display(),
+                canon_root.display()
+            );
+        }
+        match delete_path(path) {
+            Ok(n) => deleted_count += n,
+            Err(e) => tracing::warn!("wipe: skipping '{}': {e:#}", path.display()),
+        }
+    }
+
+    tracing::info!(
+        "wipe complete: type={:?} deleted={} root={}",
+        req.wipe_type,
+        deleted_count,
+        root.display()
+    );
+
+    Ok(WipeResult {
+        deleted_count,
+        wipe_type: req.wipe_type.clone(),
+    })
+}
+
+// ---------------------------------------------------------------------------
+// Target collection
+// ---------------------------------------------------------------------------
+
+/// Walk the Rust server tree under `canon_root` and return every path (file or
+/// dir) that should be deleted for the given wipe type.
+///
+/// Layout assumed:
+/// ```text
+/// <root>/
+///   server/
+///     <identity>/         -- any name; we walk all subdirs
+///       *.map
+///       *.sav
+///       player.blueprints.*.db  (and *.blueprints.* variants)
+///       player.deaths.*.db
+///       player.identities.*.db
+///       player.states.*.db
+///       *.db                    (full wipe)
+///   oxide/
+///     data/               -- cleared for full wipe (dir contents, not dir itself)
+/// ```
+fn collect_targets(canon_root: &Path, wipe_type: &WipeType) -> Result<Vec<PathBuf>> {
+    let mut targets: Vec<PathBuf> = Vec::new();
+
+    // --- server/<identity>/ ---
+    let server_dir = canon_root.join("server");
+    if is_real_dir(&server_dir) {
+        for identity_entry in read_dir_safe(&server_dir)? {
+            let identity_meta = fs::symlink_metadata(&identity_entry)
+                .with_context(|| format!("stat '{}'", identity_entry.display()))?;
+
+            // Never follow symlinks across the boundary.
+            if identity_meta.file_type().is_symlink() {
+                tracing::debug!("wipe: skipping symlink '{}'", identity_entry.display());
+                continue;
+            }
+
+            if !identity_meta.is_dir() {
+                continue;
+            }
+
+            collect_save_targets(canon_root, &identity_entry, wipe_type, &mut targets)?;
+        }
+    }
+
+    // --- oxide/data/ (full wipe only) ---
+    if *wipe_type == WipeType::Full {
+        let oxide_data = canon_root.join("oxide").join("data");
+        if is_real_dir(&oxide_data) {
+            // Delete directory *contents*, not the directory itself.
+            for entry in read_dir_safe(&oxide_data)? {
+                let meta = fs::symlink_metadata(&entry)
+                    .with_context(|| format!("stat '{}'", entry.display()))?;
+                if meta.file_type().is_symlink() {
+                    tracing::debug!("wipe: skipping symlink '{}'", entry.display());
+                    continue;
+                }
+                // Jail-check every entry before adding.
+                ensure_inside(canon_root, &entry)?;
+                targets.push(entry);
+            }
+        }
+    }
+
+    Ok(targets)
+}
+
+/// Collect files from one `<root>/server/<identity>/` directory.
+fn collect_save_targets(
+    canon_root: &Path,
+    identity_dir: &Path,
+    wipe_type: &WipeType,
+    out: &mut Vec<PathBuf>,
+) -> Result<()> {
+    for entry in read_dir_safe(identity_dir)? {
+        let meta = fs::symlink_metadata(&entry)
+            .with_context(|| format!("stat '{}'", entry.display()))?;
+
+        // Never follow symlinks.
+        if meta.file_type().is_symlink() {
+            tracing::debug!("wipe: skipping symlink '{}'", entry.display());
+            continue;
+        }
+
+        ensure_inside(canon_root, &entry)?;
+
+        let file_name = entry
+            .file_name()
+            .map(|n| n.to_string_lossy().into_owned())
+            .unwrap_or_default();
+
+        let keep = match wipe_type {
+            WipeType::Map => !is_map_file(&file_name) && !is_sav_file(&file_name),
+            WipeType::Blueprint => {
+                !is_map_file(&file_name)
+                    && !is_sav_file(&file_name)
+                    && !is_blueprint_file(&file_name)
+            }
+            WipeType::Full => {
+                !is_map_file(&file_name)
+                    && !is_sav_file(&file_name)
+                    && !is_blueprint_file(&file_name)
+                    && !is_player_state_file(&file_name)
+                    && !is_generic_db_file(&file_name)
+            }
+        };
+
+        if !keep {
+            out.push(entry);
+        }
+    }
+    Ok(())
+}
+
+// ---------------------------------------------------------------------------
+// Pattern matchers
+// ---------------------------------------------------------------------------
+
+fn is_map_file(name: &str) -> bool {
+    name.ends_with(".map")
+}
+
+fn is_sav_file(name: &str) -> bool {
+    name.ends_with(".sav")
+}
+
+fn is_blueprint_file(name: &str) -> bool {
+    // Matches both `player.blueprints.*.db` and `.blueprints.*` variants.
+    name.contains(".blueprints.")
+}
+
+fn is_player_state_file(name: &str) -> bool {
+    name.contains("player.deaths.")
+        || name.contains("player.identities.")
+        || name.contains("player.states.")
+}
+
+fn is_generic_db_file(name: &str) -> bool {
+    name.ends_with(".db")
+}
+
+// ---------------------------------------------------------------------------
+// Deletion
+// ---------------------------------------------------------------------------
+
+/// Delete a single path (file or directory tree). Returns count of top-level
+/// items removed (1 for a file, 1 for a directory tree).  Missing paths return
+/// 0 — the server may be fresh.
+fn delete_path(path: &Path) -> Result<usize> {
+    let meta = match fs::symlink_metadata(path) {
+        Ok(m) => m,
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(0),
+        Err(e) => return Err(e).with_context(|| format!("stat '{}'", path.display())),
+    };
+
+    if meta.file_type().is_symlink() {
+        // Delete the symlink itself — never follow it.
+        fs::remove_file(path).with_context(|| format!("remove symlink '{}'", path.display()))?;
+        return Ok(1);
+    }
+
+    if meta.is_dir() {
+        fs::remove_dir_all(path)
+            .with_context(|| format!("remove_dir_all '{}'", path.display()))?;
+    } else {
+        fs::remove_file(path)
+            .with_context(|| format!("remove_file '{}'", path.display()))?;
+    }
+    Ok(1)
+}
+
+// ---------------------------------------------------------------------------
+// Backup
+// ---------------------------------------------------------------------------
+
+/// Copy one path (file or directory) into `backup_dir`, preserving the last
+/// component of the path name.  Symlinks are skipped — we never follow them.
+fn backup_one(canon_root: &Path, src: &Path, backup_dir: &Path) -> Result<()> {
+    let meta = match fs::symlink_metadata(src) {
+        Ok(m) => m,
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
+        Err(e) => return Err(e).with_context(|| format!("stat backup src '{}'", src.display())),
+    };
+
+    if meta.file_type().is_symlink() {
+        tracing::debug!("wipe backup: skipping symlink '{}'", src.display());
+        return Ok(());
+    }
+
+    let name = match src.file_name() {
+        Some(n) => n,
+        None => return Ok(()),
+    };
+
+    // Preserve relative path from root inside the backup directory to avoid
+    // name collisions when multiple identity dirs have a `proc.map`.
+    let rel = src
+        .strip_prefix(canon_root)
+        .unwrap_or_else(|_| src)
+        .parent()
+        .unwrap_or_else(|| Path::new(""));
+    let dest = backup_dir.join(rel).join(name);
+
+    if let Some(parent) = dest.parent() {
+        fs::create_dir_all(parent)
+            .with_context(|| format!("backup: create_dir_all '{}'", parent.display()))?;
+    }
+
+    copy_recursive_safe(src, &dest)?;
+    Ok(())
+}
+
+/// Recursive copy that uses `symlink_metadata` (lstat) and refuses to follow
+/// any symlink — mirrors the same guard in `filemanager::copy_recursive`.
+fn copy_recursive_safe(src: &Path, dest: &Path) -> Result<()> {
+    let meta = fs::symlink_metadata(src)
+        .with_context(|| format!("stat source '{}'", src.display()))?;
+
+    if meta.file_type().is_symlink() {
+        anyhow::bail!(
+            "refusing to copy symlink '{}' during backup — symlinks are not followed",
+            src.display()
+        );
+    }
+
+    if meta.is_dir() {
+        fs::create_dir_all(dest)
+            .with_context(|| format!("create_dir_all '{}'", dest.display()))?;
+        for entry in fs::read_dir(src)
+            .with_context(|| format!("read_dir '{}'", src.display()))?
+        {
+            let entry = entry?;
+            copy_recursive_safe(&entry.path(), &dest.join(entry.file_name()))?;
+        }
+    } else {
+        fs::copy(src, dest)
+            .with_context(|| format!("copy '{}' -> '{}'", src.display(), dest.display()))?;
+    }
+    Ok(())
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Returns `true` if `path` exists, is a directory, and is not a symlink.
+fn is_real_dir(path: &Path) -> bool {
+    match fs::symlink_metadata(path) {
+        Ok(m) => m.is_dir() && !m.file_type().is_symlink(),
+        Err(_) => false,
+    }
+}
+
+/// Read a directory and return the absolute paths of its entries.
+/// Uses lstat internally via `read_dir` (entry paths; metadata is lstat'd
+/// separately by callers).
+fn read_dir_safe(dir: &Path) -> Result<Vec<PathBuf>> {
+    let mut entries = Vec::new();
+    let rd = match fs::read_dir(dir) {
+        Ok(rd) => rd,
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(entries),
+        Err(e) => return Err(e).with_context(|| format!("read_dir '{}'", dir.display())),
+    };
+    for item in rd {
+        let item = item.with_context(|| format!("read dir entry in '{}'", dir.display()))?;
+        entries.push(item.path());
+    }
+    Ok(entries)
+}
+
+/// Assert that `path` is strictly inside (or equal to) `canon_root`.
+/// This is the final safety fence before any destructive or backup operation.
+fn ensure_inside(canon_root: &Path, path: &Path) -> Result<()> {
+    // Canonicalize the path if it exists; otherwise use it as-is (it's
+    // derived from read_dir, which already returns absolute paths rooted
+    // under canon_root in normal operation).
+    let resolved = fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
+    if resolved != canon_root && !resolved.starts_with(canon_root) {
+        anyhow::bail!(
+            "wipe safety: path '{}' is outside instance root '{}' — aborting",
+            path.display(),
+            canon_root.display()
+        );
+    }
+    Ok(())
+}
--- a/corrosion-host-agent/tests/docker_compose.rs
+++ b/corrosion-host-agent/tests/docker_compose.rs
@@ -0,0 +1,156 @@
+//! DockerComposeSupervisor tests. A fake `docker` script records the exact
+//! arguments it was invoked with and returns a controllable exit code, so we
+//! assert the compose invocations + state transitions with no real Docker
+//! daemon — the same mock-the-external-binary approach the steamcmd tests use.
+#![cfg(unix)]
+
+use std::os::unix::fs::PermissionsExt;
+use std::path::{Path, PathBuf};
+
+use corrosion_host_agent::config::InstanceConfig;
+use corrosion_host_agent::docker_compose::{DockerComposeConfig, DockerComposeSupervisor};
+use corrosion_host_agent::supervisor::{InstanceState, Supervisor};
+
+/// Write a fake `docker` executable that appends its args (space-joined) to
+/// `args_log` and exits with the integer in `exit_file` (0 if absent).
+fn fake_docker(dir: &Path, args_log: &Path, exit_file: &Path) -> PathBuf {
+    let script = dir.join("fakedocker");
+    let body = format!(
+        "#!/bin/sh\nprintf '%s\\n' \"$*\" >> '{}'\nexit \"$(cat '{}' 2>/dev/null || echo 0)\"\n",
+        args_log.display(),
+        exit_file.display(),
+    );
+    std::fs::write(&script, body).unwrap();
+    let mut perms = std::fs::metadata(&script).unwrap().permissions();
+    perms.set_mode(0o755);
+    std::fs::set_permissions(&script, perms).unwrap();
+    script
+}
+
+fn dune_instance(command: Vec<String>, service: Option<String>) -> InstanceConfig {
+    InstanceConfig {
+        id: "dune-main".to_string(),
+        game: "dune".to_string(),
+        root: PathBuf::from("/tmp"),
+        label: None,
+        executable: None,
+        args: vec![],
+        working_dir: None,
+        rcon: None,
+        steamcmd: None,
+        docker_compose: Some(DockerComposeConfig {
+            file: Some(PathBuf::from("docker-compose.yml")),
+            project: Some("duneproj".to_string()),
+            service,
+            command: Some(command),
+        }),
+    }
+}
+
+#[tokio::test]
+async fn start_runs_compose_up_detached_and_sets_running() {
+    let dir = tempfile::tempdir().unwrap();
+    let args_log = dir.path().join("args.log");
+    let exit_file = dir.path().join("exit");
+    let docker = fake_docker(dir.path(), &args_log, &exit_file);
+
+    let sup = DockerComposeSupervisor::new(&dune_instance(
+        vec![docker.to_string_lossy().into_owned()],
+        None,
+    ));
+    assert_eq!(sup.state(), InstanceState::Stopped);
+
+    sup.clone().start().await.expect("compose up should succeed");
+    assert_eq!(sup.state(), InstanceState::Running);
+
+    let logged = std::fs::read_to_string(&args_log).unwrap();
+    assert!(logged.contains("up -d"), "expected `up -d`; got: {logged}");
+    assert!(logged.contains("-p duneproj"), "expected project flag; got: {logged}");
+    assert!(logged.contains("-f docker-compose.yml"), "expected file flag; got: {logged}");
+}
+
+#[tokio::test]
+async fn stop_runs_compose_stop_and_sets_stopped() {
+    let dir = tempfile::tempdir().unwrap();
+    let args_log = dir.path().join("args.log");
+    let exit_file = dir.path().join("exit");
+    let docker = fake_docker(dir.path(), &args_log, &exit_file);
+
+    let sup = DockerComposeSupervisor::new(&dune_instance(
+        vec![docker.to_string_lossy().into_owned()],
+        None,
+    ));
+    sup.clone().start().await.expect("up");
+    sup.clone().stop().await.expect("compose stop should succeed");
+    assert_eq!(sup.state(), InstanceState::Stopped);
+    assert_eq!(sup.uptime_seconds().await, 0);
+
+    let logged = std::fs::read_to_string(&args_log).unwrap();
+    assert!(logged.lines().any(|l| l.contains("stop")), "expected a `stop` call; got: {logged}");
+}
+
+#[tokio::test]
+async fn restart_runs_compose_restart() {
+    let dir = tempfile::tempdir().unwrap();
+    let args_log = dir.path().join("args.log");
+    let exit_file = dir.path().join("exit");
+    let docker = fake_docker(dir.path(), &args_log, &exit_file);
+
+    let sup = DockerComposeSupervisor::new(&dune_instance(
+        vec![docker.to_string_lossy().into_owned()],
+        None,
+    ));
+    sup.clone().restart().await.expect("compose restart should succeed");
+    assert_eq!(sup.state(), InstanceState::Running);
+
+    let logged = std::fs::read_to_string(&args_log).unwrap();
+    assert!(logged.contains("restart"), "expected `restart`; got: {logged}");
+}
+
+#[tokio::test]
+async fn single_service_is_targeted() {
+    let dir = tempfile::tempdir().unwrap();
+    let args_log = dir.path().join("args.log");
+    let exit_file = dir.path().join("exit");
+    let docker = fake_docker(dir.path(), &args_log, &exit_file);
+
+    let sup = DockerComposeSupervisor::new(&dune_instance(
+        vec![docker.to_string_lossy().into_owned()],
+        Some("gameserver".to_string()),
+    ));
+    sup.clone().start().await.expect("up");
+
+    let logged = std::fs::read_to_string(&args_log).unwrap();
+    assert!(
+        logged.contains("up -d gameserver"),
+        "service must be appended after `up -d`; got: {logged}"
+    );
+}
+
+#[tokio::test]
+async fn compose_failure_errors_and_reverts_state() {
+    let dir = tempfile::tempdir().unwrap();
+    let args_log = dir.path().join("args.log");
+    let exit_file = dir.path().join("exit");
+    std::fs::write(&exit_file, "1").unwrap(); // make the fake docker fail
+    let docker = fake_docker(dir.path(), &args_log, &exit_file);
+
+    let sup = DockerComposeSupervisor::new(&dune_instance(
+        vec![docker.to_string_lossy().into_owned()],
+        None,
+    ));
+    let err = sup.clone().start().await.expect_err("nonzero compose exit must fail");
+    assert!(err.to_string().contains("compose up failed"), "got: {err}");
+    assert_eq!(sup.state(), InstanceState::Stopped, "failed start must revert to Stopped");
+}
+
+#[tokio::test]
+async fn missing_docker_binary_errors_cleanly() {
+    let sup = DockerComposeSupervisor::new(&dune_instance(
+        vec!["/nonexistent/docker-xyz".to_string()],
+        None,
+    ));
+    let err = sup.clone().start().await.expect_err("missing docker must fail");
+    assert!(err.to_string().contains("docker"), "error should mention docker: {err}");
+    assert_eq!(sup.state(), InstanceState::Stopped);
+}
--- a/corrosion-host-agent/tests/filemanager.rs
+++ b/corrosion-host-agent/tests/filemanager.rs
@@ -0,0 +1,461 @@
+//! Integration tests for the jailed file manager.
+//!
+//! Each test runs in a real tempdir on the host filesystem.  The jail-escape
+//! tests are the security-critical section: any path that resolves outside the
+//! instance root MUST be rejected regardless of how the escape is attempted.
+//!
+//! Coverage:
+//!   - Functional: list, write, read roundtrip, mkdir, rename, delete
+//!   - Security:   dotdot traversal, absolute path injection, symlink escape
+//!                 (POSIX symlinks only — `#[cfg(unix)]`)
+
+use corrosion_host_agent::filemanager;
+use std::path::Path;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Create a temporary directory and return its path.  The directory is
+/// automatically cleaned up when the `TempDir` is dropped.
+fn tempdir() -> tempfile::TempDir {
+    tempfile::tempdir().expect("create tempdir")
+}
+
+// ---------------------------------------------------------------------------
+// Functional tests
+// ---------------------------------------------------------------------------
+
+#[test]
+fn write_read_roundtrip() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    let content = "hello from the file manager\nline 2\n";
+    filemanager::write(root, "test.txt", content).expect("write should succeed");
+
+    let got = filemanager::read(root, "test.txt").expect("read should succeed");
+    assert_eq!(got, content);
+}
+
+#[test]
+fn list_returns_written_file() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::write(root, "server.cfg", "hostname MyServer\n").expect("write");
+
+    let entries = filemanager::list(root, "").expect("list root");
+    let names: Vec<&str> = entries.iter().map(|e| e.name.as_str()).collect();
+    assert!(names.contains(&"server.cfg"), "expected 'server.cfg' in listing, got {names:?}");
+}
+
+#[test]
+fn list_empty_root_is_empty() {
+    let dir = tempdir();
+    let entries = filemanager::list(dir.path(), "").expect("list empty root");
+    assert!(entries.is_empty(), "fresh tempdir should have no entries");
+}
+
+#[test]
+fn mkdir_creates_directory() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::mkdir(root, "cfg/custom").expect("mkdir should succeed");
+
+    assert!(root.join("cfg/custom").is_dir(), "directory should exist after mkdir");
+}
+
+#[test]
+fn mkdir_creates_nested_dirs() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::mkdir(root, "a/b/c/d").expect("mkdir nested");
+    assert!(root.join("a/b/c/d").is_dir());
+}
+
+#[test]
+fn write_creates_parent_dirs() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::write(root, "subdir/deep/file.txt", "data").expect("write with auto-mkdir");
+    let content = filemanager::read(root, "subdir/deep/file.txt").expect("read");
+    assert_eq!(content, "data");
+}
+
+#[test]
+fn rename_file() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::write(root, "old.txt", "content").expect("write");
+    filemanager::rename(root, "old.txt", "new.txt").expect("rename");
+
+    assert!(!root.join("old.txt").exists(), "old.txt should be gone");
+    assert!(root.join("new.txt").exists(), "new.txt should exist");
+
+    let content = filemanager::read(root, "new.txt").expect("read renamed");
+    assert_eq!(content, "content");
+}
+
+#[test]
+fn rename_rejects_separator_in_new_name() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::write(root, "file.txt", "data").expect("write");
+
+    let err = filemanager::rename(root, "file.txt", "subdir/escape.txt")
+        .expect_err("rename with path separator must fail");
+    assert!(
+        err.to_string().contains("separator"),
+        "error should mention separator: {err}"
+    );
+}
+
+#[test]
+fn delete_file() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::write(root, "todelete.txt", "bye").expect("write");
+    assert!(root.join("todelete.txt").exists());
+
+    filemanager::delete(root, "todelete.txt").expect("delete");
+    assert!(!root.join("todelete.txt").exists());
+}
+
+#[test]
+fn delete_directory_recursive() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::mkdir(root, "tree/sub").expect("mkdir");
+    filemanager::write(root, "tree/sub/file.txt", "x").expect("write");
+    assert!(root.join("tree").is_dir());
+
+    filemanager::delete(root, "tree").expect("delete tree");
+    assert!(!root.join("tree").exists(), "directory tree should be deleted");
+}
+
+#[test]
+fn mkfile_creates_empty_file() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::mkfile(root, "empty.txt").expect("mkfile");
+    let content = filemanager::read(root, "empty.txt").expect("read empty file");
+    assert_eq!(content, "");
+}
+
+#[test]
+fn copy_file() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::write(root, "source.txt", "original").expect("write source");
+    filemanager::copy(root, "source.txt", "dest.txt").expect("copy");
+
+    let src = filemanager::read(root, "source.txt").expect("read source after copy");
+    let dst = filemanager::read(root, "dest.txt").expect("read destination");
+    assert_eq!(src, "original");
+    assert_eq!(dst, "original");
+}
+
+#[test]
+fn move_file() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::write(root, "moveme.txt", "payload").expect("write");
+    filemanager::move_path(root, "moveme.txt", "moved.txt").expect("move");
+
+    assert!(!root.join("moveme.txt").exists(), "source should be gone");
+    let content = filemanager::read(root, "moved.txt").expect("read after move");
+    assert_eq!(content, "payload");
+}
+
+#[test]
+fn list_entry_fields_are_populated() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::write(root, "check.txt", "abcde").expect("write");
+    filemanager::mkdir(root, "subdir").expect("mkdir");
+
+    let entries = filemanager::list(root, "").expect("list");
+    // Dirs sort before files.
+    let dir_entry = entries.iter().find(|e| e.name == "subdir").expect("subdir entry");
+    assert!(dir_entry.is_dir);
+    assert_eq!(dir_entry.size, 0);
+    assert!(!dir_entry.modified.is_empty(), "modified should be set");
+
+    let file_entry = entries.iter().find(|e| e.name == "check.txt").expect("file entry");
+    assert!(!file_entry.is_dir);
+    assert_eq!(file_entry.size, 5, "size should match byte count");
+    // path should be relative and use forward slashes.
+    assert!(!file_entry.path.starts_with('/'), "path should be relative");
+    assert!(!file_entry.path.contains('\\'), "path should use forward slashes");
+}
+
+// ---------------------------------------------------------------------------
+// Security: jail-escape tests
+// CRITICAL — these are the whole point of the jail abstraction.
+// ---------------------------------------------------------------------------
+
+/// `../../etc/passwd` must never resolve outside the instance root.
+#[test]
+fn jail_rejects_dotdot_traversal() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    let err = filemanager::read(root, "../../etc/passwd")
+        .expect_err("dotdot traversal must be rejected");
+    // Verify the error is security-related and not just "file not found".
+    let msg = err.to_string();
+    assert!(
+        msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
+        "error should mention jail escape for dotdot traversal, got: {msg}"
+    );
+}
+
+/// A deeply nested `../` chain must also be stopped.
+#[test]
+fn jail_rejects_deep_dotdot_traversal() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    let err = filemanager::read(root, "a/b/c/../../../../../../../../etc/shadow")
+        .expect_err("deep dotdot traversal must be rejected");
+    let msg = err.to_string();
+    assert!(
+        msg.contains("outside") || msg.contains("escapes") || msg.contains("escape") || msg.contains("absolute"),
+        "error should mention jail escape for deep traversal, got: {msg}"
+    );
+}
+
+/// An absolute path (e.g. `/etc/passwd`) must be rejected immediately — it
+/// completely bypasses relative joining and should never be accepted.
+#[test]
+fn jail_rejects_absolute_path() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    let err = filemanager::read(root, "/etc/passwd")
+        .expect_err("absolute path must be rejected");
+    let msg = err.to_string();
+    assert!(
+        msg.contains("absolute") || msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
+        "error should mention the absolute-path rejection, got: {msg}"
+    );
+}
+
+/// An absolute path to a Windows-style location must also be rejected.
+#[test]
+fn jail_rejects_absolute_windows_style_path() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    // On POSIX this is just treated as an absolute path starting with `/`.
+    // The test is intentionally platform-portable: any absolute path is bad.
+    let err = filemanager::read(root, "/tmp/evil")
+        .expect_err("absolute /tmp/evil must be rejected");
+    let msg = err.to_string();
+    assert!(
+        msg.contains("absolute") || msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
+        "got: {msg}"
+    );
+}
+
+/// A symlink inside the root that points to a path outside the root must not
+/// be followed.  This is the critical symlink-escape vector.
+#[cfg(unix)]
+#[test]
+fn jail_rejects_symlink_escape() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    // Create a directory outside the root to be the symlink target.
+    let outside = tempdir();
+    let outside_file = outside.path().join("secret.txt");
+    std::fs::write(&outside_file, "secret data").expect("write outside file");
+
+    // Plant a symlink inside the root pointing to the outside directory.
+    let link_path = root.join("evil_link");
+    std::os::unix::fs::symlink(outside.path(), &link_path)
+        .expect("create symlink inside root");
+
+    // Attempt to read through the symlink.
+    let err = filemanager::read(root, "evil_link/secret.txt")
+        .expect_err("symlink escape must be rejected");
+    let msg = err.to_string();
+    assert!(
+        msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
+        "error should mention jail escape for symlink traversal, got: {msg}"
+    );
+}
+
+/// A symlink directly inside the root pointing to a file outside must be
+/// rejected even when the path looks like a normal relative reference.
+#[cfg(unix)]
+#[test]
+fn jail_rejects_symlink_pointing_directly_outside() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    // Symlink to /etc/passwd itself (or any outside path that exists or not).
+    let link_path = root.join("passwd_link");
+    std::os::unix::fs::symlink(Path::new("/etc/passwd"), &link_path)
+        .expect("create symlink to /etc/passwd");
+
+    let err = filemanager::read(root, "passwd_link")
+        .expect_err("direct symlink outside root must be rejected");
+    let msg = err.to_string();
+    assert!(
+        msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
+        "error should mention jail escape, got: {msg}"
+    );
+}
+
+/// A symlink chain (symlink → symlink → outside) must also be caught.
+#[cfg(unix)]
+#[test]
+fn jail_rejects_chained_symlink_escape() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    let outside = tempdir();
+
+    // Chain: root/link1 → root/link2 → outside/
+    let link2_path = root.join("link2");
+    std::os::unix::fs::symlink(outside.path(), &link2_path)
+        .expect("create link2");
+
+    let link1_path = root.join("link1");
+    std::os::unix::fs::symlink(&link2_path, &link1_path)
+        .expect("create link1");
+
+    let err = filemanager::read(root, "link1")
+        .expect_err("chained symlink escape must be rejected");
+    let msg = err.to_string();
+    assert!(
+        msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
+        "chained symlink should be caught, got: {msg}"
+    );
+}
+
+/// SECURITY REGRESSION: copying a directory that contains a symlink pointing
+/// OUTSIDE the jail must NOT dereference it and pull external content inside.
+/// jail() validates only the top-level src/dest; the recursive copy must
+/// refuse symlinks itself or it becomes a read-escape exfiltration path.
+#[cfg(unix)]
+#[test]
+fn copy_refuses_to_follow_symlink_out_of_jail() {
+    let dir = tempdir();
+    let root = dir.path();
+    let outside = tempdir();
+    std::fs::write(outside.path().join("secret.txt"), "TOP SECRET")
+        .expect("write external secret");
+
+    // A directory inside the jail containing a symlink to the outside dir.
+    std::fs::create_dir(root.join("src")).expect("mkdir src");
+    std::os::unix::fs::symlink(outside.path(), root.join("src").join("escape"))
+        .expect("plant symlink to outside");
+
+    // Attempt to copy src -> dest (both inside the jail).
+    let err = filemanager::copy(root, "src", "dest")
+        .expect_err("copy must refuse the embedded symlink");
+    assert!(
+        format!("{err:#}").contains("symlink"),
+        "error should name the refused symlink, got: {err:#}"
+    );
+
+    // The external secret must NOT have landed inside the jail.
+    assert!(
+        !root.join("dest").join("escape").join("secret.txt").exists(),
+        "external content leaked into the jail via symlink-following copy",
+    );
+}
+
+/// `list` must report a symlink as the link itself, never the dereferenced
+/// target — otherwise it leaks the size/type of files outside the jail.
+#[cfg(unix)]
+#[test]
+fn list_does_not_dereference_symlink_metadata() {
+    let dir = tempdir();
+    let root = dir.path();
+    std::os::unix::fs::symlink(Path::new("/etc/passwd"), root.join("leak"))
+        .expect("plant symlink");
+
+    let entries = filemanager::list(root, "").expect("list root");
+    let leak = entries.iter().find(|e| e.name == "leak").expect("symlink listed");
+    // /etc/passwd is a regular file; if we followed the link, is_dir would
+    // reflect the target. We must report the link, which is not a directory,
+    // and must NOT expose the target's byte size.
+    assert!(!leak.is_dir, "symlink must not be reported as a directory");
+    let target_size = std::fs::metadata("/etc/passwd").map(|m| m.len()).unwrap_or(0);
+    assert!(
+        leak.size != target_size || target_size == 0,
+        "list leaked the symlink target's size ({target_size} bytes)"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Dispatch layer tests
+// ---------------------------------------------------------------------------
+
+#[test]
+fn dispatch_list_returns_success() {
+    let dir = tempdir();
+    let root = dir.path();
+
+    filemanager::write(root, "a.txt", "a").expect("write");
+
+    let req = filemanager::FileRequest {
+        op: "list".to_string(),
+        path: String::new(),
+        dest: None,
+        content: None,
+        name: None,
+    };
+    let resp = filemanager::dispatch(root, &req);
+    assert_eq!(resp["status"], "success");
+    assert!(resp["data"]["entries"].is_array());
+}
+
+#[test]
+fn dispatch_unknown_op_returns_error() {
+    let dir = tempdir();
+    let req = filemanager::FileRequest {
+        op: "explode".to_string(),
+        path: String::new(),
+        dest: None,
+        content: None,
+        name: None,
+    };
+    let resp = filemanager::dispatch(dir.path(), &req);
+    assert_eq!(resp["status"], "error");
+    assert!(resp["message"].as_str().unwrap().contains("unknown op"));
+}
+
+#[test]
+fn dispatch_escape_attempt_returns_error_not_panic() {
+    let dir = tempdir();
+    let req = filemanager::FileRequest {
+        op: "read".to_string(),
+        path: "../../etc/passwd".to_string(),
+        dest: None,
+        content: None,
+        name: None,
+    };
+    let resp = filemanager::dispatch(dir.path(), &req);
+    // Must return an error response, not panic or expose the file.
+    assert_eq!(resp["status"], "error", "escape attempt should return error status");
+    assert!(
+        resp["message"].as_str().is_some(),
+        "error response must have a message"
+    );
+}
--- a/corrosion-host-agent/tests/fixtures/sample.bin
+++ b/corrosion-host-agent/tests/fixtures/sample.bin
@@ -0,0 +1,2 @@
+corrosion-host-agent signed-update test fixture
+version 2.0.0-test
--- a/corrosion-host-agent/tests/fixtures/sample.bin.minisig
+++ b/corrosion-host-agent/tests/fixtures/sample.bin.minisig
@@ -0,0 +1,4 @@
+untrusted comment: signature from minisign secret key
+RUQKhJptuiwIkp378Z59BTwosDycAhmlhrdZZVwk1Vdb293OgcsXx0S3W0XezMtOXIXdgvQtW/DpDKlb1gdW4elQXLG5KFUgawI=
+trusted comment: timestamp:1781222247	file:sample.bin	hashed
+QtUiOfJqRKYJZTL6QV93xeLVnODr8HXWvZIR3Q1AG0yqmqesZPyiKpVa9kD34Mwp1fQ76nx1Z7c6CB1v5KHQAw==
--- a/corrosion-host-agent/tests/rcon.rs
+++ b/corrosion-host-agent/tests/rcon.rs
@@ -0,0 +1,353 @@
+//! RCON integration tests using in-process mock servers.
+//!
+//! Real OS sockets on ephemeral ports — no mocking framework.  Each test
+//! binds a listener, spawns a task that speaks the expected protocol, then
+//! exercises `rcon::send_command` and asserts on the result.  Tests are
+//! unix-only because the musl cross-compile target and the CI runner are both
+//! Linux; the production use case is also Linux-only (game servers don't run
+//! on macOS or Windows in production).
+//!
+//! We use `#[cfg(unix)]` to keep parity with the supervisor integration tests.
+#![cfg(unix)]
+
+use corrosion_host_agent::rcon::{RconConfig, RconKind};
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::net::{TcpListener, TcpStream};
+
+// ---------------------------------------------------------------------------
+// Source RCON helpers — duplicate the wire-format encode/decode locally so
+// the tests own the mock server without depending on the production code path.
+// ---------------------------------------------------------------------------
+
+/// Build a Source RCON packet: [size(4LE) | id(4LE) | type(4LE) | body | 0x00 0x00]
+fn encode_packet(id: i32, ptype: i32, body: &[u8]) -> Vec<u8> {
+    let size = (4 + 4 + body.len() + 2) as i32;
+    let mut out = Vec::with_capacity(4 + size as usize);
+    out.extend_from_slice(&size.to_le_bytes());
+    out.extend_from_slice(&id.to_le_bytes());
+    out.extend_from_slice(&ptype.to_le_bytes());
+    out.extend_from_slice(body);
+    out.push(0x00);
+    out.push(0x00);
+    out
+}
+
+/// Read one Source RCON packet from a TcpStream.
+async fn read_packet(stream: &mut TcpStream) -> (i32, i32, Vec<u8>) {
+    let mut size_buf = [0u8; 4];
+    stream.read_exact(&mut size_buf).await.unwrap();
+    let size = i32::from_le_bytes(size_buf) as usize;
+
+    let mut payload = vec![0u8; size];
+    stream.read_exact(&mut payload).await.unwrap();
+
+    let id = i32::from_le_bytes(payload[0..4].try_into().unwrap());
+    let ptype = i32::from_le_bytes(payload[4..8].try_into().unwrap());
+    let body_end = size.saturating_sub(2);
+    let body = payload[8..body_end].to_vec();
+    (id, ptype, body)
+}
+
+const SOURCE_TYPE_AUTH: i32 = 3;
+const SOURCE_TYPE_AUTH_RESPONSE: i32 = 2;
+const SOURCE_TYPE_EXECCOMMAND: i32 = 2;
+const SOURCE_TYPE_RESPONSE_VALUE: i32 = 0;
+
+// ---------------------------------------------------------------------------
+// Mock Source RCON server
+// ---------------------------------------------------------------------------
+
+/// Run a Source RCON server that accepts password "goodpw", rejects others,
+/// and responds to the first EXECCOMMAND with `response_body`.
+///
+/// If `split_at` is Some(n) the body is split: the first `n` bytes arrive in
+/// one RESPONSE_VALUE packet and the remainder in a second — testing multi-
+/// packet reassembly.
+async fn run_source_mock(
+    mut stream: TcpStream,
+    accept_password: &str,
+    command_response: &[u8],
+    split_at: Option<usize>,
+) {
+    // --- Auth phase ---
+    let (auth_id, ptype, body) = read_packet(&mut stream).await;
+    assert_eq!(ptype, SOURCE_TYPE_AUTH, "expected AUTH packet");
+
+    let password = String::from_utf8_lossy(&body);
+    if password != accept_password {
+        // Send empty RESPONSE_VALUE then AUTH_RESPONSE with id = -1 (failure).
+        let empty = encode_packet(auth_id, SOURCE_TYPE_RESPONSE_VALUE, b"");
+        stream.write_all(&empty).await.unwrap();
+        let fail = encode_packet(-1, SOURCE_TYPE_AUTH_RESPONSE, b"");
+        stream.write_all(&fail).await.unwrap();
+        return;
+    }
+
+    // Success: empty RESPONSE_VALUE then AUTH_RESPONSE with the auth id.
+    let empty = encode_packet(auth_id, SOURCE_TYPE_RESPONSE_VALUE, b"");
+    stream.write_all(&empty).await.unwrap();
+    let ok = encode_packet(auth_id, SOURCE_TYPE_AUTH_RESPONSE, b"");
+    stream.write_all(&ok).await.unwrap();
+
+    // --- Command phase ---
+    let (cmd_id, cmd_ptype, _cmd_body) = read_packet(&mut stream).await;
+    assert_eq!(cmd_ptype, SOURCE_TYPE_EXECCOMMAND, "expected EXECCOMMAND");
+
+    // Read the probe packet (empty RESPONSE_VALUE with a different id).
+    let (probe_id, probe_ptype, _) = read_packet(&mut stream).await;
+    assert_eq!(probe_ptype, SOURCE_TYPE_RESPONSE_VALUE, "expected probe packet");
+
+    // Send the command response, optionally split across two packets.
+    if let Some(n) = split_at {
+        let (part1, part2) = command_response.split_at(n.min(command_response.len()));
+        let p1 = encode_packet(cmd_id, SOURCE_TYPE_RESPONSE_VALUE, part1);
+        stream.write_all(&p1).await.unwrap();
+        let p2 = encode_packet(cmd_id, SOURCE_TYPE_RESPONSE_VALUE, part2);
+        stream.write_all(&p2).await.unwrap();
+    } else {
+        let p = encode_packet(cmd_id, SOURCE_TYPE_RESPONSE_VALUE, command_response);
+        stream.write_all(&p).await.unwrap();
+    }
+
+    // Echo the probe to signal end-of-response.
+    let probe_echo = encode_packet(probe_id, SOURCE_TYPE_RESPONSE_VALUE, b"");
+    stream.write_all(&probe_echo).await.unwrap();
+}
+
+// ---------------------------------------------------------------------------
+// Source RCON tests
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+async fn source_rcon_auth_and_exec_returns_response() {
+    let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
+    let port = listener.local_addr().unwrap().port();
+
+    tokio::spawn(async move {
+        let (stream, _) = listener.accept().await.unwrap();
+        run_source_mock(stream, "goodpw", b"Hello from server", None).await;
+    });
+
+    let cfg = RconConfig { kind: Some(RconKind::Source), port, password: "goodpw".to_string() };
+    let result = corrosion_host_agent::rcon::send_command(&cfg, "conan", "status")
+        .await
+        .expect("command should succeed");
+
+    assert_eq!(result, "Hello from server");
+}
+
+#[tokio::test]
+async fn source_rcon_wrong_password_returns_auth_error() {
+    let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
+    let port = listener.local_addr().unwrap().port();
+
+    tokio::spawn(async move {
+        let (stream, _) = listener.accept().await.unwrap();
+        run_source_mock(stream, "goodpw", b"should not see this", None).await;
+    });
+
+    let cfg = RconConfig { kind: Some(RconKind::Source), port, password: "wrongpw".to_string() };
+    let err = corrosion_host_agent::rcon::send_command(&cfg, "conan", "status")
+        .await
+        .expect_err("wrong password should fail");
+
+    assert!(
+        err.to_string().to_lowercase().contains("auth"),
+        "error should mention auth failure, got: {err}"
+    );
+}
+
+#[tokio::test]
+async fn source_rcon_multi_packet_response_concatenated() {
+    // Build a body large enough to split meaningfully across two packets.
+    // Use repeating ASCII so the result is valid UTF-8 and easy to verify.
+    // 200 'A's then 200 'B's = 400 bytes, split at 200.
+    let body: Vec<u8> = std::iter::repeat_n(b'A', 200)
+        .chain(std::iter::repeat_n(b'B', 200))
+        .collect();
+    let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
+    let port = listener.local_addr().unwrap().port();
+    let body_clone = body.clone();
+
+    tokio::spawn(async move {
+        let (stream, _) = listener.accept().await.unwrap();
+        run_source_mock(stream, "goodpw", &body_clone, Some(200)).await;
+    });
+
+    let cfg = RconConfig { kind: Some(RconKind::Source), port, password: "goodpw".to_string() };
+    let result = corrosion_host_agent::rcon::send_command(&cfg, "soulmask", "showplayers")
+        .await
+        .expect("multi-packet command should succeed");
+
+    let expected = String::from_utf8(body).unwrap();
+    assert_eq!(result, expected, "full body should be concatenated across both packets");
+}
+
+#[tokio::test]
+async fn source_rcon_connect_timeout_to_unreachable_port() {
+    // Bind a listener but never accept — the connection will time out during
+    // the RCON auth phase because nothing is reading from the socket.
+    // We use a port that is bound (so TCP connect itself succeeds) but then
+    // the mock simply drops the stream, forcing a read error, which should
+    // surface as an error (not a panic or hang).
+    let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
+    let port = listener.local_addr().unwrap().port();
+
+    // Accept the TCP connection but immediately drop it — simulates a port
+    // that accepts but never speaks RCON.
+    tokio::spawn(async move {
+        let (_stream, _) = listener.accept().await.unwrap();
+        // _stream dropped here — EOF on the client's read
+    });
+
+    let cfg =
+        RconConfig { kind: Some(RconKind::Source), port, password: "goodpw".to_string() };
+    let err = corrosion_host_agent::rcon::send_command(&cfg, "conan", "status")
+        .await
+        .expect_err("closed connection should fail");
+
+    // We just need it to fail and not hang; error message varies by OS.
+    let _ = err;
+}
+
+// ---------------------------------------------------------------------------
+// WebRCON mock server
+// ---------------------------------------------------------------------------
+
+/// Run a WebRCON mock: send one noise frame (Identifier 0), then respond to
+/// the first real request with the given output.
+async fn run_webrcon_mock(stream: tokio::net::TcpStream, output: &str) {
+    use futures::{SinkExt, StreamExt};
+    use tokio_tungstenite::accept_async;
+    use tokio_tungstenite::tungstenite::Message as WsMsg;
+
+    let mut ws = accept_async(stream).await.expect("WS handshake failed");
+
+    // Send noise (chat frame, Identifier 0) before the real request arrives.
+    let noise = serde_json::json!({
+        "Identifier": 0,
+        "Message": "Player X joined",
+        "Name": "Server",
+        "Type": "Chat"
+    });
+    ws.send(WsMsg::Text(noise.to_string()))
+        .await
+        .unwrap();
+
+    // Read the command request.
+    let msg = ws.next().await.unwrap().unwrap();
+    let text = match msg {
+        WsMsg::Text(t) => t,
+        other => panic!("expected Text frame, got {other:?}"),
+    };
+    let req: serde_json::Value = serde_json::from_str(&text).unwrap();
+    let req_id = req["Identifier"].as_i64().unwrap() as i32;
+
+    // Reply with the same Identifier so the client correlates correctly.
+    let reply = serde_json::json!({
+        "Identifier": req_id,
+        "Message": output,
+        "Type": "Generic",
+    });
+    ws.send(WsMsg::Text(reply.to_string())).await.unwrap();
+}
+
+// ---------------------------------------------------------------------------
+// WebRCON tests
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+async fn webrcon_skips_noise_and_returns_correct_message() {
+    let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
+    let port = listener.local_addr().unwrap().port();
+
+    tokio::spawn(async move {
+        let (stream, _) = listener.accept().await.unwrap();
+        run_webrcon_mock(stream, "Players: 42/100").await;
+    });
+
+    // Password is embedded in the URL path — any non-empty string works with
+    // our mock.
+    let cfg = RconConfig {
+        kind: Some(RconKind::WebRcon),
+        port,
+        password: "testpw".to_string(),
+    };
+    let result = corrosion_host_agent::rcon::send_command(&cfg, "rust", "playercount")
+        .await
+        .expect("WebRCON command should succeed");
+
+    assert_eq!(result, "Players: 42/100");
+}
+
+// ---------------------------------------------------------------------------
+// TOML parsing test — pins [[instance]] + [instance.rcon] sub-table syntax
+// ---------------------------------------------------------------------------
+
+#[test]
+fn toml_instance_with_rcon_parses_correctly() {
+    let toml = r#"
+[agent]
+license_id = "test-license"
+nats_url = "nats://localhost:4222"
+
+[[instance]]
+id = "rust-main"
+game = "rust"
+root = "/opt/rustserver"
+
+[instance.rcon]
+port = 28016
+password = "secretpassword"
+kind = "webrcon"
+"#;
+
+    let cfg: corrosion_host_agent::config::ConfigFile =
+        toml::from_str(toml).expect("TOML should parse");
+
+    assert_eq!(cfg.instances.len(), 1);
+    let inst = &cfg.instances[0];
+    assert_eq!(inst.id, "rust-main");
+
+    let rcon = inst.rcon.as_ref().expect("rcon should be present");
+    assert_eq!(rcon.port, 28016);
+    assert_eq!(rcon.password, "secretpassword");
+    assert_eq!(rcon.kind, Some(corrosion_host_agent::rcon::RconKind::WebRcon));
+}
+
+#[test]
+fn toml_instance_without_rcon_defaults_to_none() {
+    let toml = r#"
+[agent]
+license_id = "test-license"
+nats_url = "nats://localhost:4222"
+
+[[instance]]
+id = "conan-main"
+game = "conan"
+root = "/opt/conan"
+"#;
+
+    let cfg: corrosion_host_agent::config::ConfigFile =
+        toml::from_str(toml).expect("TOML should parse");
+
+    assert!(cfg.instances[0].rcon.is_none(), "absent rcon should be None");
+}
+
+#[test]
+fn resolved_kind_infers_from_game_name() {
+    use corrosion_host_agent::rcon::{RconConfig, RconKind};
+
+    let cfg_no_kind = RconConfig { kind: None, port: 28016, password: "x".to_string() };
+    assert_eq!(cfg_no_kind.resolved_kind("rust"), RconKind::WebRcon);
+    assert_eq!(cfg_no_kind.resolved_kind("conan"), RconKind::Source);
+    assert_eq!(cfg_no_kind.resolved_kind("soulmask"), RconKind::Source);
+    assert_eq!(cfg_no_kind.resolved_kind("dune"), RconKind::WebRcon); // fallback
+
+    // Explicit kind always wins.
+    let cfg_source = RconConfig { kind: Some(RconKind::Source), ..cfg_no_kind.clone() };
+    assert_eq!(cfg_source.resolved_kind("rust"), RconKind::Source);
+
+    let cfg_webrcon = RconConfig { kind: Some(RconKind::WebRcon), ..cfg_no_kind };
+    assert_eq!(cfg_webrcon.resolved_kind("conan"), RconKind::WebRcon);
+}
--- a/corrosion-host-agent/tests/steamcmd.rs
+++ b/corrosion-host-agent/tests/steamcmd.rs
@@ -0,0 +1,45 @@
+//! Unit tests for the SteamCMD module.
+//!
+//! Tests cover app ID resolution for all four supported games, including the
+//! platform-specific Soulmask split, and verify that Dune correctly returns
+//! `None` (it uses Docker images, not SteamCMD).
+
+use corrosion_host_agent::steamcmd::app_id_for_game;
+
+#[test]
+fn rust_has_correct_app_id() {
+    assert_eq!(app_id_for_game("rust"), Some(258550));
+}
+
+#[test]
+fn conan_has_correct_app_id() {
+    assert_eq!(app_id_for_game("conan"), Some(443030));
+}
+
+/// Soulmask returns the Windows server app ID on Windows builds, the Linux
+/// dedicated server app ID on all other targets.
+#[test]
+#[cfg(windows)]
+fn soulmask_windows_app_id() {
+    assert_eq!(app_id_for_game("soulmask"), Some(3017310));
+}
+
+#[test]
+#[cfg(not(windows))]
+fn soulmask_linux_app_id() {
+    assert_eq!(app_id_for_game("soulmask"), Some(3017300));
+}
+
+/// Dune uses Docker images — SteamCMD integration is explicitly unsupported.
+#[test]
+fn dune_has_no_app_id() {
+    assert_eq!(app_id_for_game("dune"), None);
+}
+
+/// Unknown games also produce None; callers should treat this the same as
+/// Dune (no SteamCMD support).
+#[test]
+fn unknown_game_returns_none() {
+    assert_eq!(app_id_for_game("minecraft"), None);
+    assert_eq!(app_id_for_game(""), None);
+}
--- a/corrosion-host-agent/tests/supervisor.rs
+++ b/corrosion-host-agent/tests/supervisor.rs
@@ -0,0 +1,111 @@
+//! Process supervisor integration tests using real OS processes.
+//! Unix-only test doubles (/bin/sleep, /bin/sh) — the supervisor logic under
+//! test is platform-shared; Windows-specific stop semantics get covered when
+//! the Windows service work lands.
+#![cfg(unix)]
+
+use std::path::PathBuf;
+use std::time::Duration;
+
+use corrosion_host_agent::config::InstanceConfig;
+use corrosion_host_agent::process::ProcessSupervisor;
+use corrosion_host_agent::supervisor::{InstanceState, Supervisor};
+
+fn managed_instance(executable: &str, args: &[&str]) -> InstanceConfig {
+    InstanceConfig {
+        id: "test-instance".to_string(),
+        game: "rust".to_string(),
+        root: PathBuf::from("/tmp"),
+        label: None,
+        executable: Some(PathBuf::from(executable)),
+        args: args.iter().map(|s| s.to_string()).collect(),
+        working_dir: None,
+        rcon: None,
+        steamcmd: None,
+        docker_compose: None,
+    }
+}
+
+async fn wait_for_state(
+    sup: &std::sync::Arc<ProcessSupervisor>,
+    want: fn(&InstanceState) -> bool,
+    budget: Duration,
+) -> InstanceState {
+    let deadline = tokio::time::Instant::now() + budget;
+    loop {
+        let state = sup.state();
+        if want(&state) {
+            return state;
+        }
+        if tokio::time::Instant::now() > deadline {
+            panic!("timed out waiting for state; last = {state:?}");
+        }
+        tokio::time::sleep(Duration::from_millis(100)).await;
+    }
+}
+
+#[tokio::test]
+async fn start_status_stop_lifecycle() {
+    let sup = ProcessSupervisor::new(&managed_instance("/bin/sleep", &["300"]));
+    assert_eq!(sup.state(), InstanceState::Stopped);
+
+    sup.clone().start().await.expect("start should succeed");
+    assert_eq!(sup.state(), InstanceState::Running);
+    tokio::time::sleep(Duration::from_millis(1100)).await;
+    assert!(sup.uptime_seconds().await >= 1, "uptime should advance");
+
+    // Double-start must be rejected while running.
+    assert!(sup.clone().start().await.is_err(), "double start must fail");
+
+    sup.clone().stop().await.expect("stop should succeed");
+    let state = wait_for_state(&sup, |s| matches!(s, InstanceState::Stopped), Duration::from_secs(5)).await;
+    assert_eq!(state, InstanceState::Stopped);
+    assert_eq!(sup.uptime_seconds().await, 0);
+}
+
+#[tokio::test]
+async fn unexpected_exit_is_crashed_with_code() {
+    let sup = ProcessSupervisor::new(&managed_instance("/bin/sh", &["-c", "sleep 0.2; exit 7"]));
+    sup.clone().start().await.expect("start should succeed");
+
+    let state = wait_for_state(
+        &sup,
+        |s| matches!(s, InstanceState::Crashed { .. }),
+        Duration::from_secs(5),
+    )
+    .await;
+    assert_eq!(state, InstanceState::Crashed { exit_code: Some(7) });
+}
+
+#[tokio::test]
+async fn restart_from_crashed_recovers() {
+    let sup = ProcessSupervisor::new(&managed_instance("/bin/sh", &["-c", "exit 1"]));
+    sup.clone().start().await.expect("start should succeed");
+    wait_for_state(&sup, |s| matches!(s, InstanceState::Crashed { .. }), Duration::from_secs(5)).await;
+
+    // Restart from crashed must work (panel "Restart" after a crash).
+    // Use a long-lived command this time by replacing the supervisor — the
+    // command is fixed per supervisor, so emulate via a fresh one.
+    let sup2 = ProcessSupervisor::new(&managed_instance("/bin/sleep", &["300"]));
+    sup2.clone().restart().await.expect("restart from stopped should start");
+    assert_eq!(sup2.state(), InstanceState::Running);
+    sup2.clone().stop().await.expect("cleanup stop");
+}
+
+#[tokio::test]
+async fn unmanaged_instance_rejects_process_commands() {
+    let mut cfg = managed_instance("/bin/sleep", &["300"]);
+    cfg.executable = None;
+    let sup = ProcessSupervisor::new(&cfg);
+    assert_eq!(sup.state(), InstanceState::Unmanaged);
+    assert!(sup.clone().start().await.is_err(), "unmanaged start must fail");
+    assert!(sup.clone().stop().await.is_err(), "unmanaged stop must fail");
+}
+
+#[tokio::test]
+async fn missing_executable_fails_cleanly() {
+    let sup = ProcessSupervisor::new(&managed_instance("/nonexistent/bin/gameserver", &[]));
+    let err = sup.clone().start().await.expect_err("must fail");
+    assert!(err.to_string().contains("not found"), "error should say not found: {err}");
+    assert_eq!(sup.state(), InstanceState::Stopped, "failed start must not leave Starting state");
+}
--- a/corrosion-host-agent/tests/update.rs
+++ b/corrosion-host-agent/tests/update.rs
@@ -0,0 +1,63 @@
+//! Signed self-update tests — the security-critical part is signature
+//! verification: a valid signature is accepted, anything tampered is rejected.
+//! Fixtures (tests/fixtures/sample.bin + .minisig) were signed with the real
+//! release private key, so these run with no key present (as in CI).
+
+use corrosion_host_agent::update;
+
+const SAMPLE: &[u8] = include_bytes!("fixtures/sample.bin");
+const SAMPLE_SIG: &str = include_str!("fixtures/sample.bin.minisig");
+
+#[test]
+fn accepts_a_validly_signed_binary() {
+    update::verify_signature(SAMPLE, SAMPLE_SIG).expect("valid signature must verify");
+}
+
+#[test]
+fn rejects_a_tampered_binary() {
+    let mut tampered = SAMPLE.to_vec();
+    tampered[0] ^= 0xFF; // flip a byte
+    let err = update::verify_signature(&tampered, SAMPLE_SIG)
+        .expect_err("tampered binary must be rejected");
+    assert!(err.to_string().contains("verification failed"), "got: {err}");
+}
+
+#[test]
+fn rejects_a_garbage_signature() {
+    assert!(update::verify_signature(SAMPLE, "not a real minisig blob").is_err());
+}
+
+#[test]
+fn rejects_empty_binary_against_real_sig() {
+    assert!(update::verify_signature(b"", SAMPLE_SIG).is_err());
+}
+
+#[test]
+fn url_allowlist_enforced() {
+    // Allowed.
+    update::assert_url_allowed("https://cdn.corrosionmgmt.com/host-agent/alpha/corrosion-host-agent-linux-amd64")
+        .expect("the real CDN host must be allowed");
+    // http rejected.
+    assert!(update::assert_url_allowed("http://cdn.corrosionmgmt.com/x").is_err());
+    // wrong host rejected.
+    assert!(update::assert_url_allowed("https://evil.example.com/x").is_err());
+    // credential-in-URL (userinfo bypass) rejected.
+    assert!(update::assert_url_allowed("https://cdn.corrosionmgmt.com:[email protected]/x").is_err());
+    // host as userinfo trick rejected (real host is evil.com).
+    assert!(update::assert_url_allowed("https://[email protected]/x").is_err());
+}
+
+#[test]
+fn swap_binary_replaces_and_backs_up() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    let current = dir.path().join("corrosion-host-agent");
+    std::fs::write(&current, b"OLD BINARY").unwrap();
+
+    update::swap_binary(&current, b"NEW BINARY").expect("swap should succeed");
+
+    assert_eq!(std::fs::read(&current).unwrap(), b"NEW BINARY", "current is the new binary");
+    let backup = dir.path().join("corrosion-host-agent.old");
+    assert_eq!(std::fs::read(&backup).unwrap(), b"OLD BINARY", ".old holds the previous binary");
+    // the .new scratch file is consumed by the rename
+    assert!(!dir.path().join("corrosion-host-agent.new").exists());
+}
--- a/corrosion-host-agent/tests/wipe.rs
+++ b/corrosion-host-agent/tests/wipe.rs
@@ -0,0 +1,298 @@
+//! Integration tests for the wipe engine.
+//!
+//! Builds a temp directory tree that mirrors a Rust dedicated server layout
+//! and verifies each wipe type's targeting, the symlink-safety guarantee,
+//! backup behaviour, and graceful handling of missing directories.
+//!
+//! Symlink tests are POSIX-only (Unix creates symlinks; Windows needs elevated
+//! privileges or Developer Mode, so we skip there).
+
+#![cfg(unix)]
+
+use corrosion_host_agent::wipe::{execute, WipeRequest, WipeType};
+use std::path::Path;
+use tempfile::TempDir;
+
+// ---------------------------------------------------------------------------
+// Helper: build a fake Rust server tree
+//
+// Layout:
+//   <root>/
+//     server/
+//       myserver/
+//         proc.map
+//         proc.sav
+//         player.blueprints.1234.db
+//         player.deaths.1234.db
+//         player.identities.1234.db
+//         player.states.1234.db
+//         players.db
+//         keepme.txt          ← must survive every wipe
+//     oxide/
+//       data/
+//         killfeed.json
+//         another.json
+//     server_readme.txt       ← must survive every wipe
+// ---------------------------------------------------------------------------
+
+fn make_server_tree() -> TempDir {
+    let dir = tempfile::tempdir().expect("create tempdir");
+    let root = dir.path();
+
+    let save_dir = root.join("server").join("myserver");
+    std::fs::create_dir_all(&save_dir).expect("create save dir");
+    std::fs::create_dir_all(root.join("oxide").join("data")).expect("create oxide/data");
+
+    // Save files
+    write_file(&save_dir.join("proc.map"), b"map data");
+    write_file(&save_dir.join("proc.sav"), b"sav data");
+    write_file(&save_dir.join("player.blueprints.1234.db"), b"bp data");
+    write_file(&save_dir.join("player.deaths.1234.db"), b"deaths");
+    write_file(&save_dir.join("player.identities.1234.db"), b"identities");
+    write_file(&save_dir.join("player.states.1234.db"), b"states");
+    write_file(&save_dir.join("players.db"), b"player db");
+    // Innocent file — must never be deleted.
+    write_file(&save_dir.join("keepme.txt"), b"keep me");
+
+    // oxide/data contents
+    write_file(&root.join("oxide").join("data").join("killfeed.json"), b"{}");
+    write_file(&root.join("oxide").join("data").join("another.json"), b"{}");
+
+    // File at root level — must survive.
+    write_file(&root.join("server_readme.txt"), b"readme");
+
+    dir
+}
+
+fn write_file(path: &Path, content: &[u8]) {
+    std::fs::write(path, content).unwrap_or_else(|e| panic!("write {}: {e}", path.display()));
+}
+
+fn wipe_req(wipe_type: WipeType) -> WipeRequest {
+    WipeRequest {
+        wipe_type,
+        backup: false,
+        backup_label: "test-backup".to_string(),
+    }
+}
+
+fn exists(root: &Path, rel: &str) -> bool {
+    root.join(rel).exists()
+}
+
+// ---------------------------------------------------------------------------
+// Map wipe: only *.map and *.sav deleted
+// ---------------------------------------------------------------------------
+
+#[test]
+fn map_wipe_deletes_map_and_sav_only() {
+    let dir = make_server_tree();
+    let root = dir.path();
+
+    let result = execute(root, &wipe_req(WipeType::Map)).expect("map wipe should succeed");
+
+    // Deleted
+    assert!(!exists(root, "server/myserver/proc.map"), "proc.map must be gone");
+    assert!(!exists(root, "server/myserver/proc.sav"), "proc.sav must be gone");
+
+    // Preserved
+    assert!(exists(root, "server/myserver/player.blueprints.1234.db"), "blueprints must survive map wipe");
+    assert!(exists(root, "server/myserver/player.deaths.1234.db"), "deaths must survive map wipe");
+    assert!(exists(root, "server/myserver/keepme.txt"), "keepme.txt must survive");
+    assert!(exists(root, "oxide/data/killfeed.json"), "oxide/data must survive map wipe");
+    assert!(exists(root, "server_readme.txt"), "server_readme.txt must survive");
+
+    assert_eq!(result.deleted_count, 2);
+    assert_eq!(result.wipe_type, WipeType::Map);
+}
+
+// ---------------------------------------------------------------------------
+// Blueprint wipe: map/sav + blueprints deleted
+// ---------------------------------------------------------------------------
+
+#[test]
+fn blueprint_wipe_includes_map_files() {
+    let dir = make_server_tree();
+    let root = dir.path();
+
+    let result = execute(root, &wipe_req(WipeType::Blueprint)).expect("blueprint wipe should succeed");
+
+    // Deleted
+    assert!(!exists(root, "server/myserver/proc.map"), "proc.map must be gone");
+    assert!(!exists(root, "server/myserver/proc.sav"), "proc.sav must be gone");
+    assert!(!exists(root, "server/myserver/player.blueprints.1234.db"), "blueprints must be gone");
+
+    // Preserved
+    assert!(exists(root, "server/myserver/player.deaths.1234.db"), "deaths must survive blueprint wipe");
+    assert!(exists(root, "server/myserver/player.identities.1234.db"), "identities must survive");
+    assert!(exists(root, "server/myserver/keepme.txt"), "keepme.txt must survive");
+    assert!(exists(root, "oxide/data/killfeed.json"), "oxide/data must survive blueprint wipe");
+
+    assert_eq!(result.deleted_count, 3);
+    assert_eq!(result.wipe_type, WipeType::Blueprint);
+}
+
+// ---------------------------------------------------------------------------
+// Full wipe: everything including player state + oxide/data
+// ---------------------------------------------------------------------------
+
+#[test]
+fn full_wipe_clears_all_game_data() {
+    let dir = make_server_tree();
+    let root = dir.path();
+
+    let result = execute(root, &wipe_req(WipeType::Full)).expect("full wipe should succeed");
+
+    // All save-dir game files deleted
+    assert!(!exists(root, "server/myserver/proc.map"));
+    assert!(!exists(root, "server/myserver/proc.sav"));
+    assert!(!exists(root, "server/myserver/player.blueprints.1234.db"));
+    assert!(!exists(root, "server/myserver/player.deaths.1234.db"));
+    assert!(!exists(root, "server/myserver/player.identities.1234.db"));
+    assert!(!exists(root, "server/myserver/player.states.1234.db"));
+    assert!(!exists(root, "server/myserver/players.db"));
+
+    // oxide/data contents deleted (directory itself preserved)
+    assert!(!exists(root, "oxide/data/killfeed.json"), "killfeed.json must be gone");
+    assert!(!exists(root, "oxide/data/another.json"), "another.json must be gone");
+    assert!(exists(root, "oxide/data"), "oxide/data directory itself must remain");
+
+    // Never-touched files preserved
+    assert!(exists(root, "server/myserver/keepme.txt"), "keepme.txt must survive full wipe");
+    assert!(exists(root, "server_readme.txt"), "server_readme.txt must survive full wipe");
+
+    // 7 save-dir files + 2 oxide/data files = 9
+    assert_eq!(result.deleted_count, 9);
+    assert_eq!(result.wipe_type, WipeType::Full);
+}
+
+// ---------------------------------------------------------------------------
+// Missing directories: no error on fresh server
+// ---------------------------------------------------------------------------
+
+#[test]
+fn missing_server_dir_does_not_error() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    // Completely empty root — no server/ or oxide/ directories.
+    let result = execute(dir.path(), &wipe_req(WipeType::Full));
+    assert!(result.is_ok(), "empty root must not error: {:?}", result);
+    assert_eq!(result.unwrap().deleted_count, 0);
+}
+
+#[test]
+fn missing_oxide_data_does_not_error() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    // Has server dir but no oxide/data.
+    let save_dir = dir.path().join("server").join("myserver");
+    std::fs::create_dir_all(&save_dir).expect("mkdir");
+    write_file(&save_dir.join("proc.map"), b"map");
+
+    let result = execute(dir.path(), &wipe_req(WipeType::Full));
+    assert!(result.is_ok(), "missing oxide/data must not error: {:?}", result);
+}
+
+// ---------------------------------------------------------------------------
+// Symlink safety: symlink inside root pointing outside must NOT be followed
+// ---------------------------------------------------------------------------
+
+#[test]
+fn symlink_in_save_dir_is_not_deleted_via_follow() {
+    let dir = make_server_tree();
+    let root = dir.path();
+
+    // Create an external directory with sensitive data.
+    let outside = tempfile::tempdir().expect("outside tempdir");
+    write_file(&outside.path().join("secret.txt"), b"TOP SECRET");
+
+    // Plant a symlink inside the save dir pointing to the external directory.
+    let save_dir = root.join("server").join("myserver");
+    let link = save_dir.join("evil_link");
+    std::os::unix::fs::symlink(outside.path(), &link).expect("plant symlink");
+
+    // Perform a full wipe — should not follow the symlink or touch secret.txt
+    let result = execute(root, &wipe_req(WipeType::Full));
+    assert!(result.is_ok(), "wipe with a symlink present must not error: {:?}", result);
+
+    // External data must be untouched.
+    assert!(
+        outside.path().join("secret.txt").exists(),
+        "external secret.txt must not be deleted via symlink follow"
+    );
+}
+
+#[test]
+fn symlink_at_identity_dir_level_is_skipped() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    let root = dir.path();
+    std::fs::create_dir_all(root.join("server")).expect("mkdir server");
+
+    // The identity entry itself is a symlink to an external dir.
+    let outside = tempfile::tempdir().expect("outside tempdir");
+    write_file(&outside.path().join("proc.map"), b"map");
+
+    let link = root.join("server").join("evil_identity");
+    std::os::unix::fs::symlink(outside.path(), &link).expect("plant identity symlink");
+
+    let result = execute(root, &wipe_req(WipeType::Map));
+    assert!(result.is_ok(), "symlink identity dir must be skipped, not error: {:?}", result);
+
+    // The external proc.map must not have been deleted.
+    assert!(
+        outside.path().join("proc.map").exists(),
+        "external proc.map must not be deleted via identity symlink"
+    );
+    assert_eq!(result.unwrap().deleted_count, 0);
+}
+
+// ---------------------------------------------------------------------------
+// Backup: files are copied before deletion
+// ---------------------------------------------------------------------------
+
+#[test]
+fn backup_copies_targets_before_deletion() {
+    let dir = make_server_tree();
+    let root = dir.path();
+
+    let req = WipeRequest {
+        wipe_type: WipeType::Map,
+        backup: true,
+        backup_label: "before-map-wipe".to_string(),
+    };
+
+    let result = execute(root, &req).expect("map wipe with backup should succeed");
+
+    // The files should be gone from the save dir…
+    assert!(!exists(root, "server/myserver/proc.map"), "proc.map must be deleted");
+    assert!(!exists(root, "server/myserver/proc.sav"), "proc.sav must be deleted");
+
+    // …but must exist in the backup directory.
+    let backup_base = root.join(".corrosion-backups").join("before-map-wipe");
+    assert!(backup_base.exists(), "backup directory must be created");
+
+    // Walk the backup to find the backed-up files.
+    let backed_up = collect_files_recursively(&backup_base);
+    let has_map = backed_up.iter().any(|p| p.ends_with("proc.map"));
+    let has_sav = backed_up.iter().any(|p| p.ends_with("proc.sav"));
+    assert!(has_map, "proc.map must be in backup, found: {backed_up:?}");
+    assert!(has_sav, "proc.sav must be in backup, found: {backed_up:?}");
+
+    assert_eq!(result.deleted_count, 2);
+}
+
+/// Recursively collect all file *names* (just the last component) under `dir`.
+fn collect_files_recursively(dir: &Path) -> Vec<String> {
+    let mut found = Vec::new();
+    if let Ok(rd) = std::fs::read_dir(dir) {
+        for entry in rd.flatten() {
+            let path = entry.path();
+            if path.is_dir() {
+                found.extend(collect_files_recursively(&path));
+            } else {
+                if let Some(name) = path.file_name() {
+                    found.push(name.to_string_lossy().into_owned());
+                }
+            }
+        }
+    }
+    found
+}
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -31,6 +31,9 @@ services:
    volumes:
      - nats_data:/data
      - ./nats.conf:/etc/nats/nats.conf:ro
+      # Per-license authorization (generated on the host; carries secrets, not
+      # committed with real users — see scripts/generate-nats-auth.mjs).
+      - ./nats-auth.conf:/etc/nats/nats-auth.conf:ro
    ports:
      - "8089:4222"   # Client connections

@@ -43,6 +46,12 @@ services:
      DATABASE_URL: postgres://corrosion:${DB_PASSWORD:-corrosion_dev}@postgres:5432/corrosion
      DATABASE_MAX_CONNECTIONS: "20"
      NATS_URL: nats://nats:4222
+      # Privileged internal NATS user (full corrosion.> access). Empty = anonymous.
+      NATS_INTERNAL_USER: ${NATS_INTERNAL_USER:-}
+      NATS_INTERNAL_PASSWORD: ${NATS_INTERNAL_PASSWORD:-}
+      # Secret for deriving per-license agent passwords (shared with the
+      # nats-auth generator). HMAC-SHA256(license_id, secret).
+      NATS_TOKEN_SECRET: ${NATS_TOKEN_SECRET:-}
      JWT_SECRET: ${JWT_SECRET}
      JWT_ACCESS_EXPIRY_SECONDS: "14400"
      JWT_REFRESH_EXPIRY_SECONDS: "604800"
@@ -87,7 +96,10 @@ services:
      api:
        condition: service_started
    healthcheck:
-      test: ["CMD-SHELL", "wget -q --spider http://localhost:80/ || exit 1"]
+      # 127.0.0.1, not localhost: nginx listens IPv4-only (0.0.0.0:80) but
+      # `localhost` resolves to ::1 first inside the container → the probe hit
+      # nothing and reported unhealthy while the panel served fine on IPv4.
+      test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:80/ || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 3
--- a/docker/nats-auth.conf
+++ b/docker/nats-auth.conf
@@ -0,0 +1,18 @@
+# BOOTSTRAP DEFAULT — no secrets, safe to commit.
+#
+# Anonymous is mapped to a HARMLESS namespace (corrosion.unclaimed.>), never to
+# real tenant subjects (corrosion.{uuid}.>) — so a fresh/stale deploy running
+# this default cannot read or forge any tenant's traffic. The REST API still
+# works; agent telemetry just won't flow until the real config is generated.
+#
+# On every real deploy, scripts/generate-nats-auth.mjs OVERWRITES this file
+# (on the host, not in git) with the privileged internal user + per-license
+# scoped users. NATS_AUTH_STAGE defaults to "enforce" (anonymous rejected).
+#
+# NOTE: no_auth_user is a TOP-LEVEL field, NOT inside authorization { }.
+authorization {
+  users: [
+    { user: "anonymous", password: "", permissions: { publish: { allow: ["corrosion.unclaimed.>"] }, subscribe: { allow: ["corrosion.unclaimed.>"] } } }
+  ]
+}
+no_auth_user: "anonymous"
--- a/Show More
+++ b/Show More