Compare commits
44 Commits
agent-v2.0
...
agent-v2.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
440474290b | ||
|
|
6f783bfac8 | ||
|
|
f2ea415840 | ||
|
|
d13f2cb8b1 | ||
|
|
651a35d4be | ||
|
|
0715492ddf | ||
|
|
4ef5db5b0d | ||
|
|
bb71763714 | ||
|
|
f18b45e3f2 | ||
|
|
702de24e28 | ||
|
|
6b3e805ac2 | ||
|
|
7c84912ff5 | ||
|
|
355a53f6e3 | ||
|
|
589516a021 | ||
|
|
f60e6abd33 | ||
|
|
877fadcb6c | ||
|
|
e897a4802f | ||
|
|
c0b20f2f78 | ||
|
|
06e832fca1 | ||
|
|
009ceb86ad | ||
|
|
6f31c41dc3 | ||
|
|
99433a09d1 | ||
|
|
b442ef4102 | ||
|
|
856106174a | ||
|
|
463908b18e | ||
|
|
00cff51ce5 | ||
|
|
7a07d600e7 | ||
|
|
4a4ae7a5d4 | ||
|
|
930f655bf5 | ||
|
|
700dc2254d | ||
|
|
7fdca2cd4f | ||
|
|
18f978dde1 | ||
|
|
9e5e828c8d | ||
|
|
fccd5c61c5 | ||
|
|
c72a280361 | ||
|
|
a3b4b5cc7d | ||
|
|
4e184ca571 | ||
|
|
fde0926d52 | ||
|
|
4d99c9d99d | ||
|
|
b8f0ccba3c | ||
|
|
068a476f39 | ||
|
|
f706c3c47e | ||
|
|
4c9c322c29 | ||
|
|
47fa72763c |
@@ -42,3 +42,6 @@ FRONTEND_URL=http://localhost:5174
|
||||
|
||||
# Frontend (Vite — must be prefixed with VITE_)
|
||||
VITE_PANEL_URL=https://panel.corrosionmgmt.com
|
||||
|
||||
# Hostnames that serve the marketing site (comma-separated); all other hosts get the panel
|
||||
VITE_MARKETING_HOSTS=corrosionmgmt.com,www.corrosionmgmt.com
|
||||
|
||||
@@ -67,6 +67,43 @@ jobs:
|
||||
sha256sum corrosion-host-agent-windows-amd64.exe >> checksums.txt
|
||||
cat checksums.txt
|
||||
|
||||
- name: Sign artifacts (minisign)
|
||||
env:
|
||||
MINISIGN_SECRET_KEY: ${{ secrets.MINISIGN_SECRET_KEY }}
|
||||
run: |
|
||||
if [ -z "$MINISIGN_SECRET_KEY" ]; then
|
||||
echo "::error::MINISIGN_SECRET_KEY secret is not set — refusing to publish unsigned agent artifacts."
|
||||
exit 1
|
||||
fi
|
||||
# minisign isn't packaged for bullseye — fetch the official static binary.
|
||||
curl -sSL https://github.com/jedisct1/minisign/releases/download/0.12/minisign-0.12-linux.tar.gz -o /tmp/minisign.tgz
|
||||
tar -xzf /tmp/minisign.tgz -C /tmp
|
||||
MINISIGN="$(find /tmp -type f -name minisign -path '*linux*' | head -1)"
|
||||
chmod +x "$MINISIGN"
|
||||
"$MINISIGN" -v
|
||||
# A minisign secret key file is TWO lines (comment + base64 blob). CI
|
||||
# secret storage mangles embedded newlines, collapsing it to one line
|
||||
# so minisign can't load it. Preferred form: store the secret
|
||||
# base64-encoded (single line) — we decode it here. Auto-detect so a
|
||||
# correctly-stored raw two-line key still works.
|
||||
if printf '%s' "$MINISIGN_SECRET_KEY" | base64 -d 2>/dev/null | head -1 | grep -q "untrusted comment:"; then
|
||||
printf '%s' "$MINISIGN_SECRET_KEY" | base64 -d > /tmp/sign.key
|
||||
else
|
||||
printf '%s\n' "$MINISIGN_SECRET_KEY" > /tmp/sign.key
|
||||
fi
|
||||
if ! head -1 /tmp/sign.key | grep -q "untrusted comment:"; then
|
||||
echo "::error::MINISIGN_SECRET_KEY is neither base64 of a minisign key nor a raw two-line key file. Store it as: base64 < your-secret.key | tr -d '\n'"
|
||||
rm -f /tmp/sign.key
|
||||
exit 1
|
||||
fi
|
||||
cd corrosion-host-agent/bin
|
||||
# Passwordless key (-W generated); feed empty stdin so it never blocks.
|
||||
for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-windows-amd64.exe checksums.txt; do
|
||||
"$MINISIGN" -S -s /tmp/sign.key -m "$f" -x "$f.minisig" < /dev/null
|
||||
done
|
||||
rm -f /tmp/sign.key
|
||||
echo "signed: $(ls *.minisig)"
|
||||
|
||||
- name: Create Release
|
||||
env:
|
||||
RELEASE_TOKEN: ${{ secrets.RELEASE_TOKEN }}
|
||||
@@ -82,7 +119,9 @@ jobs:
|
||||
"${API_URL}/repos/${REPO}/releases")
|
||||
RELEASE_ID=$(echo "$RESPONSE" | grep -o '"id":[0-9]*' | head -1 | grep -o '[0-9]*')
|
||||
|
||||
for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-windows-amd64.exe checksums.txt; do
|
||||
for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-linux-amd64.minisig \
|
||||
corrosion-host-agent-windows-amd64.exe corrosion-host-agent-windows-amd64.exe.minisig \
|
||||
checksums.txt checksums.txt.minisig; do
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token ${RELEASE_TOKEN}" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
@@ -95,7 +134,9 @@ jobs:
|
||||
CDN_URL="https://cdn.corrosionmgmt.com"
|
||||
VERSION="${{ steps.version.outputs.VERSION }}"
|
||||
|
||||
for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-windows-amd64.exe checksums.txt; do
|
||||
for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-linux-amd64.minisig \
|
||||
corrosion-host-agent-windows-amd64.exe corrosion-host-agent-windows-amd64.exe.minisig \
|
||||
checksums.txt checksums.txt.minisig; do
|
||||
curl -s -X POST \
|
||||
-F "file=@corrosion-host-agent/bin/$f" \
|
||||
"${CDN_URL}/host-agent/alpha/$f"
|
||||
|
||||
122
.gitea/workflows/ci.yml
Normal file
122
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,122 @@
|
||||
name: CI
|
||||
|
||||
# Test gate for every push to main. The deploy story: main must be green here
|
||||
# before the stack is rebuilt (deploy workflow enforces it once SSH transport
|
||||
# secrets land). Jobs run in the runner's bare node:20-bullseye container —
|
||||
# toolchains bootstrap per-run.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
backend-types:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Type-check NestJS backend
|
||||
run: |
|
||||
cd backend-nest
|
||||
npm ci --no-audit --no-fund 2>&1 | tail -2
|
||||
npx tsc --noEmit
|
||||
|
||||
frontend-build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build frontend (vue-tsc gate + vite)
|
||||
run: |
|
||||
cd frontend
|
||||
npm ci --no-audit --no-fund 2>&1 | tail -2
|
||||
npm run build
|
||||
|
||||
agent-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Cache cargo
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
corrosion-host-agent/target
|
||||
key: cargo-${{ hashFiles('corrosion-host-agent/Cargo.lock') }}
|
||||
- name: Install Rust
|
||||
run: |
|
||||
apt-get update -qq && apt-get install -y -qq build-essential curl
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
- name: Test agent
|
||||
run: |
|
||||
cd corrosion-host-agent
|
||||
cargo test
|
||||
- name: Upload agent binary for integration
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: agent-debug
|
||||
path: corrosion-host-agent/target/debug/corrosion-host-agent
|
||||
|
||||
integration:
|
||||
runs-on: ubuntu-latest
|
||||
needs: agent-tests
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16
|
||||
env:
|
||||
POSTGRES_USER: corrosion
|
||||
POSTGRES_PASSWORD: citest
|
||||
POSTGRES_DB: corrosion
|
||||
nats:
|
||||
image: nats:2.10-alpine
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download agent binary
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: agent-debug
|
||||
path: agent-bin
|
||||
|
||||
- name: Apply migrations to fresh DB
|
||||
run: |
|
||||
apt-get update -qq && apt-get install -y -qq postgresql-client
|
||||
until PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -c 'SELECT 1' >/dev/null 2>&1; do sleep 1; done
|
||||
for f in $(ls backend/migrations/*.sql | sort); do
|
||||
echo "applying $f"
|
||||
PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -v ON_ERROR_STOP=1 -q -f "$f"
|
||||
done
|
||||
|
||||
- name: Build + boot backend
|
||||
run: |
|
||||
cd backend-nest
|
||||
npm ci --no-audit --no-fund 2>&1 | tail -2
|
||||
npm run build
|
||||
DATABASE_URL=postgres://corrosion:citest@postgres:5432/corrosion \
|
||||
NATS_URL=nats://nats:4222 \
|
||||
JWT_SECRET=ci-secret ENCRYPTION_KEY=ci-encryption-key \
|
||||
ADMIN_EMAIL=ci@corrosion.test ADMIN_PASSWORD=ci-password-123 ADMIN_USERNAME=CI \
|
||||
nohup node dist/main.js > /tmp/backend.log 2>&1 &
|
||||
for i in $(seq 1 30); do
|
||||
code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/api/auth/login -X POST -H 'Content-Type: application/json' -d '{}' || true)
|
||||
[ "$code" = "400" ] && echo "backend up" && exit 0
|
||||
sleep 2
|
||||
done
|
||||
echo "backend failed to come up"; cat /tmp/backend.log; exit 1
|
||||
|
||||
- name: Run agent↔backend contract suite
|
||||
run: |
|
||||
chmod +x agent-bin/corrosion-host-agent
|
||||
LICENSE_ID=$(PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -t -A -c 'SELECT id FROM licenses LIMIT 1')
|
||||
echo "license under test: $LICENSE_ID"
|
||||
[ -n "$LICENSE_ID" ] || { echo "admin seed did not create a license"; cat /tmp/backend.log; exit 1; }
|
||||
LICENSE_ID="$LICENSE_ID" \
|
||||
DATABASE_URL=postgres://corrosion:citest@postgres:5432/corrosion \
|
||||
NATS_URL=nats://nats:4222 \
|
||||
AGENT_BIN=$PWD/agent-bin/corrosion-host-agent \
|
||||
node contract-tests/agent-backend.contract.mjs
|
||||
|
||||
- name: Backend log on failure
|
||||
if: failure()
|
||||
run: cat /tmp/backend.log || true
|
||||
@@ -1,5 +1,6 @@
|
||||
name: Test Asgard Runner
|
||||
on: [push]
|
||||
# On-demand only — no reason to spin a container on every push.
|
||||
on: [workflow_dispatch]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
@@ -17,8 +18,15 @@ jobs:
|
||||
echo "Memory: $(free -h | grep Mem | awk '{print $2}')"
|
||||
echo "Disk: $(df -h / | tail -1 | awk '{print $4}')"
|
||||
echo "==========================================="
|
||||
echo "Go: $(go version)"
|
||||
echo "Rust: $(rustc --version)"
|
||||
echo "Docker: $(docker --version)"
|
||||
# Jobs run in a bare node:20-bullseye container: toolchains are NOT
|
||||
# preinstalled — workflows must bootstrap them (setup-go, rustup).
|
||||
# Report presence honestly instead of green-lighting a missing tool.
|
||||
for tool in go rustc docker node; do
|
||||
if command -v "$tool" >/dev/null 2>&1; then
|
||||
echo "$tool: $($tool --version 2>&1 | head -1)"
|
||||
else
|
||||
echo "$tool: NOT PRESENT (workflows must install per-run)"
|
||||
fi
|
||||
done
|
||||
echo "==========================================="
|
||||
echo "✅ Asgard runner is OPERATIONAL"
|
||||
echo "✅ Asgard runner reachable — container is node:20-bullseye, bootstrap toolchains per-run"
|
||||
|
||||
42
CHANGELOG.md
42
CHANGELOG.md
@@ -4,6 +4,48 @@ All notable changes to this project will be documented in this file.
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added (Host-agent Phase 2 — Dune docker-compose adapter — 2026-06-12)
|
||||
|
||||
**`Supervisor` trait abstraction (`corrosion-host-agent`):**
|
||||
- Introduced `trait Supervisor` (via `async-trait`, the battle-tested ecosystem standard) so the agent can manage games with fundamentally different models behind one wire contract. `ProcessSupervisor` (spawned OS process — Rust/Conan/Soulmask) and the new `DockerComposeSupervisor` (Dune) both implement it; `Agent.supervisors` is now `HashMap<String, Arc<dyn Supervisor>>` and the instance command dispatch (`instancecmd::dispatch`) is fully game-agnostic — `start`/`stop`/`restart`/`status` are identical across games. A per-game factory in `main` selects the impl. `InstanceState` moved to the shared `supervisor` module.
|
||||
- **Architecture call** (per Commander): chose the `dyn` trait over a zero-dependency enum because the Dune references point at *several* future management planes (kubectl, AMP/podman, SSH) — a trait makes each new plane "new struct + impl," no central match to edit.
|
||||
|
||||
**`DockerComposeSupervisor` (Dune: Awakening):**
|
||||
- Drives `docker compose up -d` / `stop` / `restart` against the instance's compose project (a "battlegroup"), with `-f`/`-p`/single-service support and a configurable compose binary (`docker compose` default, `docker-compose` legacy). New `[instance.docker_compose]` config block (file/project/service/command, all optional). `steam_update` already rejected for Dune (Docker images, no SteamCMD).
|
||||
- **Scope (first cut):** lifecycle + cached state. Deferred to Phase 3b (with process PID adoption): container crash-detection and state adoption on agent restart (both reconcilable with a `docker compose ps` probe).
|
||||
- Verified: 6 new docker-compose tests (mock `docker` binary asserting exact invocations + state transitions + failure paths) + the 5 refactored process-supervisor tests; full agent suite 56 tests green, zero warnings. Live verification against a real Dune stack pending the Commander standing one up.
|
||||
|
||||
### Changed (Fleet-driven active game + signed-update CI fix — 2026-06-12)
|
||||
|
||||
**Frontend — active game follows the deployed fleet:**
|
||||
- The panel's active game (shell skin + sidebar nav + dashboard terminology) is now **derived from the deployed instances** instead of a localStorage-only toggle. `syncActiveGameFromFleet()` reads the distinct `game` values of the license's instances (`game_instances.game`, reported by the host agent): exactly one game deployed → the shell auto-skins to it; zero or multiple → `all` (neutral house skin). Wired into `DashboardLayout` (the always-mounted admin shell) via a watch on the fleet store.
|
||||
- A manual GameSwitcher pick still wins — it persists to `cc-active-game` and suppresses auto-derive (operator intent beats the heuristic). Un-overridden panels keep tracking the fleet across sessions.
|
||||
- **No backend/schema change:** a license's game(s) are the distinct games of its instances — the normalized source of truth. Deliberately did NOT add a `licenses.game` column (would duplicate `game_instances.game` and drift; see Lesson 20).
|
||||
|
||||
**Frontend — sidebar agent-health footer is now fleet-aware:**
|
||||
- The shell footer read a single legacy `server.connection` (one `server_connections` row), which disagreed with the multi-host fleet. Repointed it at the fleet store: one host → hostname + status + last-heartbeat; multiple → `{online}/{total} online` + total instance count. Tone aggregates (all online → healthy, some → degraded, none → offline). Dropped the legacy `useServerStore` dependency from the shell entirely.
|
||||
|
||||
**Frontend — removed dead `vuefinder` dependency:**
|
||||
- VueFinder was replaced by the native instance-scoped file manager but the plugin (and its CSS) were still globally registered in `main.ts` and shipped in the bundle. Removed the dep + the three `main.ts` lines. Side effect: the main JS chunk dropped **588 kB → 165 kB** (vuefinder bundled an entire unused file-manager UI).
|
||||
|
||||
**Recon note (not a change):** `corrosion.{license}.cmd.server` was on the cleanup list as "dead v1" — it is NOT. It remains the live license-level command path for all plugin/module config applies, plugin install, scheduled tasks, and legacy start/stop/restart, served only by the legacy Go agent. The Rust agent does not implement it yet — this is a **parity/migration gap** (Phase 2+), not dead code. Left intact.
|
||||
|
||||
**CI — signed host-agent build:**
|
||||
- Fixed the `Sign artifacts (minisign)` step (`Error while loading the secret key file`): a minisign secret key is two lines and CI secret storage mangles the embedded newline. The job now base64-decodes the secret (single-line, mangling-proof) with auto-detect fallback to a raw key. `MINISIGN_SECRET_KEY` must be stored as `base64 < secret.key | tr -d '\n'`. Verified end-to-end: `agent-v2.0.0-alpha.8` Linux + Windows binaries validate against the agent's embedded public key; tampered byte rejected.
|
||||
|
||||
### Added (Host-Agent v2 Consumer + SEO Meta — 2026-06-11)
|
||||
|
||||
**Backend (NestJS):**
|
||||
- `HostAgentConsumerService` (new) — consumes wire protocol v2: `corrosion.*.host.heartbeat` updates `companion_last_seen` + `connection_status='connected'` (auto-registers the connection row on first contact); `host.going_offline` flips offline; a 60s staleness sweep marks hosts offline after 180s of silence. Previously NOTHING persisted heartbeats — `connection_status` was set once at setup and never changed again. Tenant-validated (UUID + license existence, cached) per NATS-consumer doctrine
|
||||
- `NatsBridgeService` — bridges `host_heartbeat` / `host_going_offline` events to the panel WebSocket
|
||||
- Verified by contract test: real agent → production NATS → captured with the backend's own `nats` lib under the real license; subjects, schema 2, real telemetry, offline beacon all confirmed
|
||||
|
||||
**Frontend:**
|
||||
- Per-route document titles + meta descriptions (router `afterEach`, no new deps): six marketing pages get real titles/descriptions/OG tags (previously every page was "Corrosion Management" with zero meta — invisible to search and link previews); panel views get mechanical "{View} — Corrosion" titles
|
||||
|
||||
**CI:**
|
||||
- `test-runner.yml` — honest per-tool presence checks (was printing "OPERATIONAL" while every toolchain probe failed); on-demand trigger instead of every push
|
||||
|
||||
### Added (Corrosion Host Agent — Rust rewrite Phase 0 — 2026-06-11)
|
||||
|
||||
**New: `corrosion-host-agent/`** — Rust rewrite of the Go companion agent (which stays in-tree as the behavior reference until parity). Wire protocol v2 (COA-B, Commander-approved): instance-scoped subjects `corrosion.{license}.{instance}.*` with host-level `corrosion.{license}.host.*` — full spec in `corrosion-host-agent/PROTOCOL.md`.
|
||||
|
||||
34
CLAUDE.md
34
CLAUDE.md
@@ -55,7 +55,12 @@ frontend/ # Vue 3 + TypeScript
|
||||
package.json
|
||||
vite.config.ts # Proxies /api to :3000
|
||||
|
||||
companion-agent/ # Go binary for bare metal servers
|
||||
corrosion-host-agent/ # Rust host agent (ACTIVE) — multi-game ops runtime
|
||||
src/ # main, config, bus (NATS), telemetry, prober, hostcmd
|
||||
PROTOCOL.md # Wire protocol v2 spec (instance-scoped subjects)
|
||||
agent.example.toml # Multi-instance config reference
|
||||
|
||||
companion-agent/ # Go binary (LEGACY — behavior reference until Rust parity)
|
||||
cmd/agent/ # main.go entry point
|
||||
internal/ # Core agent logic (nats, commands, process)
|
||||
Makefile # Build for Linux/Windows
|
||||
@@ -91,14 +96,16 @@ cd backend-nest && npx tsc --noEmit # Type-check without building
|
||||
|
||||
# Frontend
|
||||
cd frontend && npm run dev # Vite dev server (port 5174)
|
||||
cd frontend && npm run build # Production build → dist/
|
||||
cd frontend && npm run lint # ESLint
|
||||
cd frontend && npm run type-check # TypeScript checking (vue-tsc)
|
||||
cd frontend && npm run build # vue-tsc -b && vite build (type-check included; no separate lint/type-check scripts exist)
|
||||
|
||||
# Companion Agent (Go)
|
||||
# Host Agent (Rust — ACTIVE)
|
||||
cd corrosion-host-agent && cargo check # Fast validation
|
||||
cd corrosion-host-agent && cargo build --release --target x86_64-unknown-linux-musl # Static Linux binary
|
||||
cd corrosion-host-agent && cargo xwin build --release --target x86_64-pc-windows-msvc # Windows (local)
|
||||
# CI: push tag agent-vX.Y.Z (must match Cargo.toml version) → Asgard builds → CDN /host-agent/alpha/
|
||||
|
||||
# Companion Agent (Go — LEGACY, behavior reference until Rust parity)
|
||||
cd companion-agent && make build # Build for current platform
|
||||
cd companion-agent && make linux # Cross-compile for Linux
|
||||
cd companion-agent && make windows # Cross-compile for Windows
|
||||
|
||||
# Docker (from docker/ directory — Commander ALWAYS builds with --no-cache)
|
||||
docker compose build --no-cache && docker compose up -d # Full rebuild + start
|
||||
@@ -374,7 +381,8 @@ Default to Sonnet. Escalate to Opus when the problem demands it, not as a comfor
|
||||
- Treat every change as production deployment (`corrosionmgmt.com`)
|
||||
- Document why, not just what, in commits and CHANGELOG
|
||||
- **Always commit and push when done touching code — never ask, never wait for permission**
|
||||
- **Tag companion agent builds when Go code in `companion-agent/` is modified** — increment from latest tag (currently v1.0.3), push tag to trigger CI build + CDN upload
|
||||
- **Tag agent builds when agent code is modified** — Rust agent: `agent-vX.Y.Z` (must match `corrosion-host-agent/Cargo.toml`; CI publishes to CDN `/host-agent/alpha/`, while `/latest/` stays on the Go build until cutover). Legacy Go agent: `vX.Y.Z`. Tags roll FORWARD only — never reuse or re-push a tag; cut the next version
|
||||
- **The Asgard CI runner executes jobs in a bare `node:20-bullseye` container** — no Rust/Go/Docker/sudo preinstalled; workflows must bootstrap toolchains per-run (setup-go, rustup via curl)
|
||||
|
||||
## Development Notes
|
||||
|
||||
@@ -435,3 +443,13 @@ Things I discovered about myself building a sister platform across multiple sess
|
||||
22. **Build-green is not render-correct — visually verify UI work before calling it done.** The entire design-system re-skin (50+ files, six green commits) rendered almost completely unstyled in the browser — white background, no surfaces, no accent — because the design tokens never loaded. `vue-tsc -b` + `vite build` passed clean the whole time; CSS that *compiles* can still apply *zero* styles. One Playwright screenshot of the login exposed it in seconds. When the deliverable is visual, a green build is necessary but not sufficient: load it in a real browser (Playwright on the dev server at :5174), screenshot it, and assert on `getComputedStyle` — don't trust compilation alone. This is Lesson 17 with teeth.
|
||||
|
||||
23. **Tailwind v4 silently drops a nested `@import` barrel placed after `@import "tailwindcss"`.** `style.css` did `@import "tailwindcss"; @import "./styles/corrosion.css";` where corrosion.css was a barrel of eight `@import` token files. Once Tailwind v4 expands the tailwindcss import in place, the barrel's inner @imports no longer precede all statements, so PostCSS drops them — emitting only an easily-ignored "@import must precede all other statements" warning. Result: every design token resolved empty and the whole panel rendered unstyled. Import token/design CSS files **directly and contiguously** in the entry stylesheet; never via a nested barrel after the Tailwind import. The build warning you wave off as "pre-existing" may be the entire feature silently failing.
|
||||
|
||||
24. **`onModuleInit` runs before async `onModuleInit` of dependencies completes — register NATS/external subscriptions in `onApplicationBootstrap`.** `NatsService.onModuleInit` connects to NATS (async); `NatsBridgeService`/`HostAgentConsumerService` registered their subscriptions in their own `onModuleInit`, which fired while the connection was still null — so every `subscribe()` hit the `[OFFLINE]` no-op path and the WS bridge was dead-on-boot in *every* production build, silently. Nest guarantees `onApplicationBootstrap` runs only after all module init (including the awaited connect) finishes. Anything that depends on another provider's async startup belongs in bootstrap, not init. The tell: a subscription that "should be there" but the handler never fires and there's no error — trace the *startup ordering*, not the handler.
|
||||
|
||||
25. **Fixing a dead code path detonates the live code behind it — budget for the second bug.** The moment Lesson 24's fix made the NATS→WS bridge actually deliver events, the API crashed on the first forwarded heartbeat: `WebSocket.OPEN` was `undefined` at runtime because `esModuleInterop` is off, so `import WebSocket from 'ws'` compiled to `ws_1.default` (undefined). That crash had sat behind the dead bridge since the gateway was written — never hit because no event ever reached it. When you resurrect a path that was silently no-op, everything downstream of it is effectively *untested code running for the first time in production*. Verify the whole chain end-to-end (I watched the DB row appear, then flip offline), don't stop at "the subscription fires now." This is Lesson 10 with a fuse on it. Import-runtime gotcha worth remembering: when `esModuleInterop` is off, prefer instance constants (`client.OPEN`) over class statics (`WebSocket.OPEN`) for `ws`.
|
||||
|
||||
26. **A jail check at the entry point does not jail the recursive walk behind it — and my own "line-by-line" review missed it; the automated security review didn't.** The file manager's `jail()` correctly canonicalized and prefix-checked the top-level path, and I traced every escape vector through it and signed off. But `copy_recursive` then walked the directory tree with `fs::metadata` (which *follows* symlinks). A symlink planted inside the jail pointing at `/etc`, then a `copy` of its parent, would dereference it and pull external content *into* the jail to be read — a jail escape the entry check never sees, because the escape is reintroduced by a descendant during traversal. Fix: `symlink_metadata` (lstat) everywhere you recurse, and refuse/never-follow symlinks across the boundary. The transferable rule: **validate at the boundary AND at every step that re-derives a path** (recursion, `read_dir`, glob, archive extraction). And the humbling part — I was confident after reviewing the jail function; the security-review pass caught the HIGH I'd waved through. Trust adversarial verification over your own once-over on security-critical code, especially path/traversal logic.
|
||||
|
||||
27. **Validate infra config BEFORE it reaches a deploy — and know that `docker compose up -d <service>` will recreate other services whose definitions changed.** During the NATS auth cutover I ran `docker compose up -d api` to pick up new env. Because the *nats* service definition had also changed (a new volume mount), compose recreated **corrosion-nats too** — and it failed to start on a config error (`no_auth_user` nested inside `authorization{}` instead of at top level), taking the broker down for ~3 minutes with the backend in offline mode. Two lessons: (a) a broker/proxy/DB config file is code — lint it before it can reach a restart (`nats-server -t -c cfg` to test-parse, `nginx -t`, etc.), don't let the first validation be the production container's startup; (b) `compose up -d <one-service>` is not surgical — it reconciles that service's **dependencies** too, so a stale edit to a depended-on service ships when you didn't mean it to. When touching shared-infra config, restart that service explicitly and watch it come up before moving on. Recovery also surfaced a third gotcha: recreating a client (api) while its server (nats) is down leaves the client stuck on a cached DNS failure (`EAI_AGAIN`) — restart the client once the server is healthy.
|
||||
|
||||
28. **A multi-line secret in CI (minisign/SSH/PGP keys) must be stored base64-encoded — the runner mangles embedded newlines and the key silently fails to load.** The signed-update CI passed the toolchain build, downloaded minisign fine, then died at the sign step on `Error while loading the secret key file` (exit 2). The cause wasn't the key or minisign — a minisign secret key file is **two lines** (`untrusted comment:` + base64 blob), and Gitea/act_runner secret storage collapses the embedded newline so the reconstructed file is one unparseable line. The robust pattern: store the secret as `base64 < secret.key | tr -d '\n'` (single line, mangling-proof) and `base64 -d` it in the job, with auto-detect fallback so a correctly-stored raw key still works, and a loud `::error::` carrying the fix command if it's neither. This applies to **any** multi-line credential in CI, not just minisign. Two corollaries: (a) the tell is "the tool runs but can't load its key" — suspect newline-mangling before the key itself; (b) generating that base64 prints the **private key to the terminal/transcript** — for a supply-chain signing key, treat it as exposed and rotate before cutover (embed the new pubkey, re-store the new secret, retire the old). And verify the published artifact end-to-end against the *embedded* pubkey (`minisign -Vm bin -P <pub>`) plus a tampered-byte negative control — a green build that signs is not the same as a signature the agent will actually accept.
|
||||
|
||||
@@ -45,10 +45,17 @@ import { BetterChatModule } from './modules/betterchat/betterchat.module';
|
||||
import { TimedExecuteModule } from './modules/timedexecute/timedexecute.module';
|
||||
import { RaidableBasesModule } from './modules/raidablebases/raidablebases.module';
|
||||
import { EarlyAccessModule } from './modules/early-access/early-access.module';
|
||||
import { FleetModule } from './modules/fleet/fleet.module';
|
||||
import { InstancesModule } from './modules/instances/instances.module';
|
||||
|
||||
// Shared Services
|
||||
import { NatsService } from './services/nats.service';
|
||||
import { NatsBridgeService } from './services/nats-bridge.service';
|
||||
import { HostAgentConsumerService } from './services/host-agent-consumer.service';
|
||||
import { ServerConnection } from './entities/server-connection.entity';
|
||||
import { License } from './entities/license.entity';
|
||||
import { AgentHost } from './entities/agent-host.entity';
|
||||
import { GameInstance } from './entities/game-instance.entity';
|
||||
import { SteamService } from './services/steam.service';
|
||||
|
||||
// Gateway
|
||||
@@ -91,6 +98,9 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
|
||||
// Scheduler
|
||||
ScheduleModule.forRoot(),
|
||||
|
||||
// Repositories for app-level shared services (host-agent consumer)
|
||||
TypeOrmModule.forFeature([ServerConnection, License, AgentHost, GameInstance]),
|
||||
|
||||
// Feature Modules
|
||||
AuthModule,
|
||||
UsersModule,
|
||||
@@ -125,6 +135,8 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
|
||||
TimedExecuteModule,
|
||||
RaidableBasesModule,
|
||||
EarlyAccessModule,
|
||||
FleetModule,
|
||||
InstancesModule,
|
||||
],
|
||||
providers: [
|
||||
// Global guards (order matters: auth first, then license, then permissions)
|
||||
@@ -134,6 +146,7 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
|
||||
// Shared services
|
||||
NatsService,
|
||||
NatsBridgeService,
|
||||
HostAgentConsumerService,
|
||||
SteamService,
|
||||
|
||||
// WebSocket gateway
|
||||
|
||||
@@ -6,6 +6,15 @@ export default () => ({
|
||||
},
|
||||
nats: {
|
||||
url: process.env.NATS_URL || 'nats://localhost:4222',
|
||||
// Public broker address shown to agents in setup instructions.
|
||||
publicUrl: process.env.NATS_PUBLIC_URL || 'nats://nats.corrosionmgmt.com:4222',
|
||||
// Privileged internal credentials for the backend's own NATS connection
|
||||
// (full corrosion.> access). Empty = anonymous (transition period).
|
||||
internalUser: process.env.NATS_INTERNAL_USER || '',
|
||||
internalPassword: process.env.NATS_INTERNAL_PASSWORD || '',
|
||||
// Secret used to derive a per-license agent password:
|
||||
// HMAC-SHA256(license_id, secret). Shared with the nats.conf generator.
|
||||
tokenSecret: process.env.NATS_TOKEN_SECRET || '',
|
||||
},
|
||||
jwt: {
|
||||
secret: process.env.JWT_SECRET || 'change-me',
|
||||
|
||||
74
backend-nest/src/entities/agent-host.entity.ts
Normal file
74
backend-nest/src/entities/agent-host.entity.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Check, Unique } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
|
||||
export interface AgentHostDisk {
|
||||
mount: string;
|
||||
total_mb: number;
|
||||
free_mb: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* One Corrosion host agent / one machine. Owns the machine-level facts.
|
||||
*
|
||||
* NOTE: distinct from the B2B `hosts` table (hosting-partner companies). This
|
||||
* is `agent_hosts` — the physical/virtual box a customer runs the agent on.
|
||||
*/
|
||||
@Entity('agent_hosts')
|
||||
@Unique(['license_id', 'hostname'])
|
||||
@Check(`"status" IN ('connected', 'degraded', 'offline')`)
|
||||
export class AgentHost {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, default: '' })
|
||||
hostname: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 64, nullable: true })
|
||||
agent_version: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 64, nullable: true })
|
||||
agent_commit: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, nullable: true })
|
||||
os: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, nullable: true })
|
||||
arch: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 20, default: 'offline' })
|
||||
status: string;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
last_heartbeat_at: Date | null;
|
||||
|
||||
@Column({ type: 'double precision', nullable: true })
|
||||
cpu_percent: number | null;
|
||||
|
||||
@Column({ type: 'integer', nullable: true })
|
||||
cpu_cores: number | null;
|
||||
|
||||
@Column({ type: 'bigint', nullable: true })
|
||||
mem_total_mb: number | null;
|
||||
|
||||
@Column({ type: 'bigint', nullable: true })
|
||||
mem_used_mb: number | null;
|
||||
|
||||
@Column({ type: 'bigint', nullable: true })
|
||||
uptime_seconds: number | null;
|
||||
|
||||
@Column({ type: 'jsonb', nullable: true })
|
||||
disks: AgentHostDisk[] | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
updated_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
}
|
||||
59
backend-nest/src/entities/game-instance.entity.ts
Normal file
59
backend-nest/src/entities/game-instance.entity.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Unique } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
import { AgentHost } from './agent-host.entity';
|
||||
|
||||
/**
|
||||
* One game server process / orchestrated unit (a Rust server, a Conan world,
|
||||
* a Dune battlegroup). The billing unit — plans count instances.
|
||||
* `agent_instance_id` is the agent's slug and the NATS subject segment.
|
||||
*/
|
||||
@Entity('game_instances')
|
||||
@Unique(['license_id', 'agent_instance_id'])
|
||||
export class GameInstance {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
host_id: string | null;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
cluster_id: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 64 })
|
||||
agent_instance_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32 })
|
||||
game: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, nullable: true })
|
||||
label: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, default: 'unknown' })
|
||||
state: string;
|
||||
|
||||
@Column({ type: 'text', nullable: true })
|
||||
root_path: string | null;
|
||||
|
||||
@Column({ type: 'bigint', default: 0 })
|
||||
uptime_seconds: number;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
last_seen_at: Date | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
updated_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
|
||||
@ManyToOne(() => AgentHost, { onDelete: 'SET NULL', nullable: true })
|
||||
@JoinColumn({ name: 'host_id' })
|
||||
host: AgentHost | null;
|
||||
}
|
||||
38
backend-nest/src/entities/instance-cluster.entity.ts
Normal file
38
backend-nest/src/entities/instance-cluster.entity.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
|
||||
/**
|
||||
* Optional grouping of instances for games with linked topologies:
|
||||
* Soulmask main/child clusters, Dune BattleGroup → Sietches. Reserved now;
|
||||
* cluster orchestration ships with those game adapters.
|
||||
*/
|
||||
@Entity('instance_clusters')
|
||||
export class InstanceCluster {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32 })
|
||||
game: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255 })
|
||||
name: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, nullable: true })
|
||||
topology: string | null;
|
||||
|
||||
@Column({ type: 'jsonb', nullable: true })
|
||||
config: Record<string, unknown> | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
updated_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
}
|
||||
38
backend-nest/src/entities/instance-stats.entity.ts
Normal file
38
backend-nest/src/entities/instance-stats.entity.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn } from 'typeorm';
|
||||
import { GameInstance } from './game-instance.entity';
|
||||
|
||||
/**
|
||||
* Per-instance time-series game metrics (player count, FPS, …). Populated once
|
||||
* game-level telemetry is collected via RCON/plugin — the host heartbeat
|
||||
* carries host metrics, not game metrics, so this stays empty in Phase A.
|
||||
*/
|
||||
@Entity('instance_stats')
|
||||
export class InstanceStats {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
instance_id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'integer', default: 0 })
|
||||
player_count: number;
|
||||
|
||||
@Column({ type: 'integer', default: 0 })
|
||||
max_players: number;
|
||||
|
||||
@Column({ type: 'double precision', default: 0 })
|
||||
fps: number;
|
||||
|
||||
@Column({ type: 'integer', default: 0 })
|
||||
memory_usage_mb: number;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
recorded_at: Date;
|
||||
|
||||
@ManyToOne(() => GameInstance, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'instance_id' })
|
||||
instance: GameInstance;
|
||||
}
|
||||
@@ -71,7 +71,10 @@ export class NatsBridgeGateway implements OnGatewayConnection, OnGatewayDisconne
|
||||
|
||||
// Subscribe to NATS events for this license
|
||||
const listener = (event: string, data: unknown) => {
|
||||
if (client.readyState === WebSocket.OPEN) {
|
||||
// client.OPEN (instance constant) — NOT WebSocket.OPEN: with
|
||||
// esModuleInterop off, the default `ws` import is undefined at
|
||||
// runtime, so the static crashes. The instance constant is safe.
|
||||
if (client.readyState === client.OPEN) {
|
||||
client.send(JSON.stringify({
|
||||
type: 'event',
|
||||
license_id: payload.license_id,
|
||||
|
||||
@@ -111,13 +111,13 @@ export class AnalyticsService {
|
||||
.createQueryBuilder('wipe')
|
||||
.leftJoinAndSelect('wipe.map', 'map')
|
||||
.select('map.id', 'map_id')
|
||||
.addSelect('map.name', 'map_name')
|
||||
.addSelect('map.display_name', 'map_name')
|
||||
.addSelect('COUNT(wipe.id)', 'usage_count')
|
||||
.where('wipe.license_id = :licenseId', { licenseId })
|
||||
.andWhere('wipe.started_at >= :cutoff', { cutoff })
|
||||
.andWhere('wipe.map_id IS NOT NULL')
|
||||
.groupBy('map.id')
|
||||
.addGroupBy('map.name')
|
||||
.addGroupBy('map.display_name')
|
||||
.getRawMany();
|
||||
|
||||
return {
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { AutoDoorsConfig } from '../../entities/autodoors-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateAutoDoorsConfigDto } from './dto/create-autodoors-config.dto';
|
||||
import { UpdateAutoDoorsConfigDto } from './dto/update-autodoors-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class AutoDoorsService {
|
||||
constructor(
|
||||
@InjectRepository(AutoDoorsConfig)
|
||||
private readonly autoDoorsRepo: Repository<AutoDoorsConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class AutoDoorsService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write AutoDoors.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/AutoDoors.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write AutoDoors.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/AutoDoors.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload AutoDoors plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload AutoDoors',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload AutoDoors');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.autoDoorsRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class AutoDoorsService {
|
||||
/** Import AutoDoors.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read AutoDoors.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/AutoDoors.json',
|
||||
},
|
||||
30000,
|
||||
// Read AutoDoors.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/AutoDoors.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class AutoDoorsService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { BetterChatConfig } from '../../entities/betterchat-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateBetterChatConfigDto } from './dto/create-betterchat-config.dto';
|
||||
import { UpdateBetterChatConfigDto } from './dto/update-betterchat-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class BetterChatService {
|
||||
constructor(
|
||||
@InjectRepository(BetterChatConfig)
|
||||
private readonly repo: Repository<BetterChatConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class BetterChatService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write BetterChat.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/BetterChat.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write BetterChat.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/BetterChat.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload BetterChat plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload BetterChat',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload BetterChat');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.repo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class BetterChatService {
|
||||
/** Import BetterChat.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read BetterChat.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/BetterChat.json',
|
||||
},
|
||||
30000,
|
||||
// Read BetterChat.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/BetterChat.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class BetterChatService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -108,7 +108,9 @@ export class ConsoleGateway implements OnGatewayConnection, OnGatewayDisconnect
|
||||
|
||||
const message = JSON.stringify({ event, data });
|
||||
for (const client of clients) {
|
||||
if (client.readyState === WebSocket.OPEN) {
|
||||
// client.OPEN, not WebSocket.OPEN — esModuleInterop is off so the
|
||||
// default `ws` import is undefined at runtime (would crash on forward).
|
||||
if (client.readyState === client.OPEN) {
|
||||
client.send(message);
|
||||
}
|
||||
}
|
||||
|
||||
26
backend-nest/src/modules/fleet/fleet.controller.ts
Normal file
26
backend-nest/src/modules/fleet/fleet.controller.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { Controller, Get, Delete, Param } from '@nestjs/common';
|
||||
import { ApiTags, ApiBearerAuth, ApiOperation } from '@nestjs/swagger';
|
||||
import { FleetService } from './fleet.service';
|
||||
import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
|
||||
import { RequirePermission } from '../../common/decorators/require-permission.decorator';
|
||||
|
||||
@ApiTags('fleet')
|
||||
@ApiBearerAuth()
|
||||
@Controller('fleet')
|
||||
export class FleetController {
|
||||
constructor(private readonly fleetService: FleetService) {}
|
||||
|
||||
@Get()
|
||||
@RequirePermission('server.view')
|
||||
@ApiOperation({ summary: 'Get fleet overview — hosts and game instances for this license' })
|
||||
async getFleet(@CurrentTenant() licenseId: string) {
|
||||
return this.fleetService.getFleet(licenseId);
|
||||
}
|
||||
|
||||
@Delete('hosts/:id')
|
||||
@RequirePermission('server.manage')
|
||||
@ApiOperation({ summary: 'Remove a host and its instances (host must be offline)' })
|
||||
async deleteHost(@CurrentTenant() licenseId: string, @Param('id') id: string) {
|
||||
return this.fleetService.deleteHost(licenseId, id);
|
||||
}
|
||||
}
|
||||
15
backend-nest/src/modules/fleet/fleet.module.ts
Normal file
15
backend-nest/src/modules/fleet/fleet.module.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||
import { FleetController } from './fleet.controller';
|
||||
import { FleetService } from './fleet.service';
|
||||
import { AgentHost } from '../../entities/agent-host.entity';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
import { ServerConnection } from '../../entities/server-connection.entity';
|
||||
|
||||
@Module({
|
||||
imports: [TypeOrmModule.forFeature([AgentHost, GameInstance, ServerConnection])],
|
||||
controllers: [FleetController],
|
||||
providers: [FleetService],
|
||||
exports: [FleetService],
|
||||
})
|
||||
export class FleetModule {}
|
||||
170
backend-nest/src/modules/fleet/fleet.service.ts
Normal file
170
backend-nest/src/modules/fleet/fleet.service.ts
Normal file
@@ -0,0 +1,170 @@
|
||||
import { Injectable, NotFoundException, ConflictException } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { AgentHost } from '../../entities/agent-host.entity';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
import { ServerConnection } from '../../entities/server-connection.entity';
|
||||
|
||||
export interface FleetInstanceDto {
|
||||
id: string;
|
||||
agent_instance_id: string;
|
||||
game: string;
|
||||
label: string | null;
|
||||
state: string;
|
||||
uptime_seconds: number;
|
||||
last_seen_at: string | null;
|
||||
}
|
||||
|
||||
export interface FleetHostDto {
|
||||
id: string;
|
||||
hostname: string;
|
||||
status: string;
|
||||
agent_version: string | null;
|
||||
os: string | null;
|
||||
arch: string | null;
|
||||
cpu_percent: number | null;
|
||||
cpu_cores: number | null;
|
||||
mem_total_mb: number | null;
|
||||
mem_used_mb: number | null;
|
||||
uptime_seconds: number | null;
|
||||
disks: AgentHost['disks'];
|
||||
last_heartbeat_at: string | null;
|
||||
instances: FleetInstanceDto[];
|
||||
}
|
||||
|
||||
export interface FleetSummaryDto {
|
||||
host_count: number;
|
||||
instance_count: number;
|
||||
online_host_count: number;
|
||||
}
|
||||
|
||||
export interface FleetResponseDto {
|
||||
hosts: FleetHostDto[];
|
||||
summary: FleetSummaryDto;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class FleetService {
|
||||
constructor(
|
||||
@InjectRepository(AgentHost)
|
||||
private readonly hostRepo: Repository<AgentHost>,
|
||||
@InjectRepository(GameInstance)
|
||||
private readonly instanceRepo: Repository<GameInstance>,
|
||||
@InjectRepository(ServerConnection)
|
||||
private readonly connectionRepo: Repository<ServerConnection>,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Remove a host and its game instances from the fleet.
|
||||
*
|
||||
* Refuses while the host is `connected` — a live agent re-registers on its
|
||||
* next heartbeat, so the operator must stop the agent first. Deletes the
|
||||
* host's instances explicitly (the FK is SET NULL, which would otherwise
|
||||
* orphan them); instance_stats cascade. If this was the license's last host,
|
||||
* the legacy single-server connection row is cleared too so the old
|
||||
* Dashboard doesn't show a stale server.
|
||||
*/
|
||||
async deleteHost(
|
||||
licenseId: string,
|
||||
hostId: string,
|
||||
): Promise<{ deleted: true; instances_removed: number }> {
|
||||
const host = await this.hostRepo.findOne({ where: { id: hostId, license_id: licenseId } });
|
||||
if (!host) throw new NotFoundException('Host not found');
|
||||
if (host.status === 'connected') {
|
||||
throw new ConflictException(
|
||||
'Host is online — stop the agent first, or it will re-register on its next heartbeat',
|
||||
);
|
||||
}
|
||||
|
||||
const del = await this.instanceRepo.delete({ license_id: licenseId, host_id: hostId });
|
||||
await this.hostRepo.delete({ id: hostId, license_id: licenseId });
|
||||
|
||||
const remaining = await this.hostRepo.count({ where: { license_id: licenseId } });
|
||||
if (remaining === 0) {
|
||||
await this.connectionRepo.delete({ license_id: licenseId });
|
||||
}
|
||||
|
||||
return { deleted: true, instances_removed: del.affected ?? 0 };
|
||||
}
|
||||
|
||||
async getFleet(licenseId: string): Promise<FleetResponseDto> {
|
||||
const [hosts, instances] = await Promise.all([
|
||||
this.hostRepo.find({
|
||||
where: { license_id: licenseId },
|
||||
order: { hostname: 'ASC' },
|
||||
}),
|
||||
this.instanceRepo.find({
|
||||
where: { license_id: licenseId },
|
||||
order: { game: 'ASC', label: 'ASC' },
|
||||
}),
|
||||
]);
|
||||
|
||||
// Group instances by host_id. Bigint columns come back as strings from pg — coerce.
|
||||
const instancesByHost = new Map<string | null, FleetInstanceDto[]>();
|
||||
for (const inst of instances) {
|
||||
const key = inst.host_id ?? null;
|
||||
if (!instancesByHost.has(key)) {
|
||||
instancesByHost.set(key, []);
|
||||
}
|
||||
instancesByHost.get(key)!.push({
|
||||
id: inst.id,
|
||||
agent_instance_id: inst.agent_instance_id,
|
||||
game: inst.game,
|
||||
label: inst.label,
|
||||
state: inst.state,
|
||||
uptime_seconds: Number(inst.uptime_seconds),
|
||||
last_seen_at: inst.last_seen_at ? inst.last_seen_at.toISOString() : null,
|
||||
});
|
||||
}
|
||||
|
||||
const hostDtos: FleetHostDto[] = hosts.map((h) => ({
|
||||
id: h.id,
|
||||
hostname: h.hostname,
|
||||
status: h.status,
|
||||
agent_version: h.agent_version,
|
||||
os: h.os,
|
||||
arch: h.arch,
|
||||
cpu_percent: h.cpu_percent !== null && h.cpu_percent !== undefined ? Number(h.cpu_percent) : null,
|
||||
cpu_cores: h.cpu_cores !== null && h.cpu_cores !== undefined ? Number(h.cpu_cores) : null,
|
||||
mem_total_mb: h.mem_total_mb !== null && h.mem_total_mb !== undefined ? Number(h.mem_total_mb) : null,
|
||||
mem_used_mb: h.mem_used_mb !== null && h.mem_used_mb !== undefined ? Number(h.mem_used_mb) : null,
|
||||
uptime_seconds: h.uptime_seconds !== null && h.uptime_seconds !== undefined ? Number(h.uptime_seconds) : null,
|
||||
disks: h.disks,
|
||||
last_heartbeat_at: h.last_heartbeat_at ? h.last_heartbeat_at.toISOString() : null,
|
||||
instances: instancesByHost.get(h.id) ?? [],
|
||||
}));
|
||||
|
||||
// Append synthetic "unassigned" bucket only if orphaned instances exist
|
||||
const unassigned = instancesByHost.get(null) ?? [];
|
||||
if (unassigned.length > 0) {
|
||||
hostDtos.push({
|
||||
id: '__unassigned__',
|
||||
hostname: 'Unassigned',
|
||||
status: 'offline',
|
||||
agent_version: null,
|
||||
os: null,
|
||||
arch: null,
|
||||
cpu_percent: null,
|
||||
cpu_cores: null,
|
||||
mem_total_mb: null,
|
||||
mem_used_mb: null,
|
||||
uptime_seconds: null,
|
||||
disks: null,
|
||||
last_heartbeat_at: null,
|
||||
instances: unassigned,
|
||||
});
|
||||
}
|
||||
|
||||
const online_host_count = hosts.filter((h) => h.status === 'connected').length;
|
||||
const instance_count = instances.length;
|
||||
|
||||
return {
|
||||
hosts: hostDtos,
|
||||
summary: {
|
||||
host_count: hosts.length,
|
||||
instance_count,
|
||||
online_host_count,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { FurnaceSplitterConfig } from '../../entities/furnacesplitter-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateFurnaceSplitterConfigDto } from './dto/create-furnacesplitter-config.dto';
|
||||
import { UpdateFurnaceSplitterConfigDto } from './dto/update-furnacesplitter-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class FurnaceSplitterService {
|
||||
constructor(
|
||||
@InjectRepository(FurnaceSplitterConfig)
|
||||
private readonly furnaceRepo: Repository<FurnaceSplitterConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class FurnaceSplitterService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write FurnaceSplitter.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/FurnaceSplitter.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write FurnaceSplitter.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/FurnaceSplitter.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload FurnaceSplitter plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload FurnaceSplitter',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload FurnaceSplitter');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.furnaceRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class FurnaceSplitterService {
|
||||
/** Import FurnaceSplitter.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read FurnaceSplitter.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/FurnaceSplitter.json',
|
||||
},
|
||||
30000,
|
||||
// Read FurnaceSplitter.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/FurnaceSplitter.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class FurnaceSplitterService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { GatherConfig } from '../../entities/gather-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateGatherConfigDto } from './dto/create-gather-config.dto';
|
||||
import { UpdateGatherConfigDto } from './dto/update-gather-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class GatherService {
|
||||
constructor(
|
||||
@InjectRepository(GatherConfig)
|
||||
private readonly gatherRepo: Repository<GatherConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class GatherService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write GatherManager.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/GatherManager.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write GatherManager.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/GatherManager.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload GatherManager plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload GatherManager',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload GatherManager');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.gatherRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class GatherService {
|
||||
/** Import GatherManager.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read GatherManager.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/GatherManager.json',
|
||||
},
|
||||
30000,
|
||||
// Read GatherManager.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/GatherManager.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class GatherService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
133
backend-nest/src/modules/instances/instances.controller.ts
Normal file
133
backend-nest/src/modules/instances/instances.controller.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
import { Controller, Post, Get, Put, Body, Param, Query } from '@nestjs/common';
|
||||
import { ApiTags, ApiBearerAuth, ApiOperation } from '@nestjs/swagger';
|
||||
import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
|
||||
import { RequirePermission } from '../../common/decorators/require-permission.decorator';
|
||||
import { InstancesService, LifecycleFunc } from './instances.service';
|
||||
|
||||
@ApiTags('instances')
|
||||
@ApiBearerAuth()
|
||||
@Controller('instances')
|
||||
export class InstancesController {
|
||||
constructor(private readonly instances: InstancesService) {}
|
||||
|
||||
@Post(':id/lifecycle')
|
||||
@RequirePermission('server.manage')
|
||||
@ApiOperation({ summary: 'Send a lifecycle command to a game instance (start/stop/restart/status/steam_update)' })
|
||||
async lifecycle(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { action: LifecycleFunc },
|
||||
) {
|
||||
return this.instances.lifecycle(licenseId, id, body.action);
|
||||
}
|
||||
|
||||
@Post(':id/rcon')
|
||||
@RequirePermission('server.console')
|
||||
@ApiOperation({ summary: 'Send an RCON/console command to a game instance' })
|
||||
async rcon(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { command: string },
|
||||
) {
|
||||
return this.instances.rcon(licenseId, id, body.command);
|
||||
}
|
||||
|
||||
@Get(':id/files')
|
||||
@RequirePermission('files.view')
|
||||
@ApiOperation({ summary: 'List a directory in the instance (jailed to its root)' })
|
||||
async listFiles(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Query('path') path?: string,
|
||||
) {
|
||||
return this.instances.listFiles(licenseId, id, path ?? '');
|
||||
}
|
||||
|
||||
@Get(':id/file')
|
||||
@RequirePermission('files.view')
|
||||
@ApiOperation({ summary: 'Read a text file from the instance (jailed, 5 MiB cap)' })
|
||||
async readFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Query('path') path: string,
|
||||
) {
|
||||
return this.instances.readFile(licenseId, id, path);
|
||||
}
|
||||
|
||||
@Put(':id/file')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Write a text file in the instance (jailed)' })
|
||||
async writeFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string; content: string },
|
||||
) {
|
||||
return this.instances.writeFile(licenseId, id, body.path, body.content ?? '');
|
||||
}
|
||||
|
||||
@Post(':id/files/delete')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Delete a file or directory (jailed)' })
|
||||
async deleteFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string },
|
||||
) {
|
||||
return this.instances.deleteFile(licenseId, id, body.path);
|
||||
}
|
||||
|
||||
@Post(':id/files/rename')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Rename a file/directory within its parent (jailed)' })
|
||||
async renameFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string; name: string },
|
||||
) {
|
||||
return this.instances.renameFile(licenseId, id, body.path, body.name);
|
||||
}
|
||||
|
||||
@Post(':id/files/mkdir')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Create a directory (jailed)' })
|
||||
async mkdir(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string },
|
||||
) {
|
||||
return this.instances.mkdir(licenseId, id, body.path);
|
||||
}
|
||||
|
||||
@Post(':id/files/mkfile')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Create an empty file (jailed)' })
|
||||
async mkfile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string },
|
||||
) {
|
||||
return this.instances.mkfile(licenseId, id, body.path);
|
||||
}
|
||||
|
||||
@Post(':id/files/move')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Move a file/directory (jailed)' })
|
||||
async moveFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string; dest: string },
|
||||
) {
|
||||
return this.instances.moveFile(licenseId, id, body.path, body.dest);
|
||||
}
|
||||
|
||||
@Post(':id/files/copy')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Copy a file/directory (jailed)' })
|
||||
async copyFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string; dest: string },
|
||||
) {
|
||||
return this.instances.copyFile(licenseId, id, body.path, body.dest);
|
||||
}
|
||||
}
|
||||
18
backend-nest/src/modules/instances/instances.module.ts
Normal file
18
backend-nest/src/modules/instances/instances.module.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import { Global, Module } from '@nestjs/common';
|
||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||
import { InstancesController } from './instances.controller';
|
||||
import { InstancesService } from './instances.service';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
|
||||
// Global so the legacy single-server services (servers/players/schedules/wipes/
|
||||
// plugins + the 9 plugin-config modules) can inject InstancesService to route
|
||||
// commands at the now-only Rust agent without each importing this module.
|
||||
@Global()
|
||||
@Module({
|
||||
imports: [TypeOrmModule.forFeature([GameInstance])],
|
||||
controllers: [InstancesController],
|
||||
providers: [InstancesService, NatsService],
|
||||
exports: [InstancesService],
|
||||
})
|
||||
export class InstancesModule {}
|
||||
223
backend-nest/src/modules/instances/instances.service.ts
Normal file
223
backend-nest/src/modules/instances/instances.service.ts
Normal file
@@ -0,0 +1,223 @@
|
||||
import { Injectable, NotFoundException, BadRequestException, Logger } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
|
||||
/** Lifecycle funcs the agent's {instance}.cmd handler accepts. */
|
||||
const LIFECYCLE_FUNCS = ['start', 'stop', 'restart', 'status', 'steam_update'] as const;
|
||||
export type LifecycleFunc = (typeof LIFECYCLE_FUNCS)[number];
|
||||
|
||||
@Injectable()
|
||||
export class InstancesService {
|
||||
private readonly logger = new Logger(InstancesService.name);
|
||||
|
||||
constructor(
|
||||
private readonly nats: NatsService,
|
||||
@InjectRepository(GameInstance)
|
||||
private readonly instanceRepo: Repository<GameInstance>,
|
||||
) {}
|
||||
|
||||
/** Resolve an instance the caller's license actually owns (tenant guard). */
|
||||
private async resolveInstance(licenseId: string, instanceId: string): Promise<GameInstance> {
|
||||
const inst = await this.instanceRepo.findOne({
|
||||
where: { id: instanceId, license_id: licenseId },
|
||||
});
|
||||
if (!inst) throw new NotFoundException('Instance not found');
|
||||
return inst;
|
||||
}
|
||||
|
||||
async lifecycle(licenseId: string, instanceId: string, func: LifecycleFunc): Promise<unknown> {
|
||||
if (!LIFECYCLE_FUNCS.includes(func)) {
|
||||
throw new BadRequestException(`Unsupported action '${func}'`);
|
||||
}
|
||||
const inst = await this.resolveInstance(licenseId, instanceId);
|
||||
const subject = `corrosion.${licenseId}.${inst.agent_instance_id}.cmd`;
|
||||
this.logger.log(`instance ${inst.agent_instance_id}: ${func}`);
|
||||
return this.nats.requestScoped(licenseId, subject, { func });
|
||||
}
|
||||
|
||||
async rcon(licenseId: string, instanceId: string, command: string): Promise<unknown> {
|
||||
if (!command || !command.trim()) {
|
||||
throw new BadRequestException('command is required');
|
||||
}
|
||||
const inst = await this.resolveInstance(licenseId, instanceId);
|
||||
const subject = `corrosion.${licenseId}.${inst.agent_instance_id}.cmd`;
|
||||
// RCON can take longer than a lifecycle ack — give it more headroom.
|
||||
return this.nats.requestScoped(licenseId, subject, { func: 'rcon', command }, 12_000);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// File access — jailed to the instance root by the agent's file manager.
|
||||
// The agent protocol (corrosion-host-agent/src/filemanager.rs):
|
||||
// { op: list|read|write|delete|rename|mkdir|mkfile|move|copy, path, ... }
|
||||
// reply: { status: 'success'|'error', data?, message? }
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private filesSubject(inst: GameInstance, licenseId: string): string {
|
||||
return `corrosion.${licenseId}.${inst.agent_instance_id}.files.cmd`;
|
||||
}
|
||||
|
||||
private async fileOp(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
payload: Record<string, unknown>,
|
||||
): Promise<{ status: string; data?: unknown; message?: string }> {
|
||||
const inst = await this.resolveInstance(licenseId, instanceId);
|
||||
const res = await this.nats.requestScoped<{ status: string; data?: unknown; message?: string }>(
|
||||
licenseId,
|
||||
this.filesSubject(inst, licenseId),
|
||||
payload,
|
||||
12_000,
|
||||
);
|
||||
if (res?.status === 'error') {
|
||||
throw new BadRequestException(res.message ?? 'File operation failed');
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
async listFiles(licenseId: string, instanceId: string, path = ''): Promise<unknown> {
|
||||
const res = await this.fileOp(licenseId, instanceId, { op: 'list', path });
|
||||
return res.data;
|
||||
}
|
||||
|
||||
async readFile(licenseId: string, instanceId: string, path: string): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
const res = await this.fileOp(licenseId, instanceId, { op: 'read', path });
|
||||
return res.data;
|
||||
}
|
||||
|
||||
async writeFile(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
path: string,
|
||||
content: string,
|
||||
): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
const res = await this.fileOp(licenseId, instanceId, { op: 'write', path, content });
|
||||
return res.data ?? { status: 'success' };
|
||||
}
|
||||
|
||||
async deleteFile(licenseId: string, instanceId: string, path: string): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'delete', path })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async renameFile(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
path: string,
|
||||
name: string,
|
||||
): Promise<unknown> {
|
||||
if (!path || !name) throw new BadRequestException('path and name are required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'rename', path, name })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async mkdir(licenseId: string, instanceId: string, path: string): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'mkdir', path })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async mkfile(licenseId: string, instanceId: string, path: string): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'mkfile', path })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async moveFile(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
path: string,
|
||||
dest: string,
|
||||
): Promise<unknown> {
|
||||
if (!path || !dest) throw new BadRequestException('path and dest are required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'move', path, dest })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async copyFile(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
path: string,
|
||||
dest: string,
|
||||
): Promise<unknown> {
|
||||
if (!path || !dest) throw new BadRequestException('path and dest are required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'copy', path, dest })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Wipe an instance's game data via the agent's jailed wipe handler: stop →
|
||||
* delete files per wipe_type (map/blueprint/full) → restart. Long timeout
|
||||
* because the agent does all three steps before replying.
|
||||
*/
|
||||
async wipe(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
wipeType: 'map' | 'blueprint' | 'full',
|
||||
backup = true,
|
||||
): Promise<unknown> {
|
||||
const inst = await this.resolveInstance(licenseId, instanceId);
|
||||
const subject = `corrosion.${licenseId}.${inst.agent_instance_id}.cmd`;
|
||||
this.logger.log(`instance ${inst.agent_instance_id}: wipe (${wipeType})`);
|
||||
return this.nats.requestScoped(
|
||||
licenseId,
|
||||
subject,
|
||||
{ func: 'wipe', wipe_type: wipeType, backup },
|
||||
120_000,
|
||||
);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// License-scoped convenience wrappers. Legacy single-server services
|
||||
// (servers/players/schedules/wipes/plugins + the 9 plugin-config modules)
|
||||
// predate the instance model and carry only a licenseId. These resolve the
|
||||
// license's primary instance, then dispatch to the agent — replacing the old
|
||||
// publishes to the now-defunct `cmd.server` subject.
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** The license's primary (oldest) instance. Throws if none is connected. */
|
||||
async resolveDefaultInstance(licenseId: string): Promise<GameInstance> {
|
||||
const inst = await this.instanceRepo.findOne({
|
||||
where: { license_id: licenseId },
|
||||
order: { created_at: 'ASC' },
|
||||
});
|
||||
if (!inst) {
|
||||
throw new NotFoundException(
|
||||
'No game instance is connected for this license yet — install and start the host agent first.',
|
||||
);
|
||||
}
|
||||
return inst;
|
||||
}
|
||||
|
||||
async lifecycleForLicense(licenseId: string, func: LifecycleFunc): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.lifecycle(licenseId, inst.id, func);
|
||||
}
|
||||
|
||||
async rconForLicense(licenseId: string, command: string): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.rcon(licenseId, inst.id, command);
|
||||
}
|
||||
|
||||
async writeFileForLicense(licenseId: string, path: string, content: string): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.writeFile(licenseId, inst.id, path, content);
|
||||
}
|
||||
|
||||
async readFileForLicense(licenseId: string, path: string): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.readFile(licenseId, inst.id, path);
|
||||
}
|
||||
|
||||
async deleteFileForLicense(licenseId: string, path: string): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.deleteFile(licenseId, inst.id, path);
|
||||
}
|
||||
|
||||
async wipeForLicense(
|
||||
licenseId: string,
|
||||
wipeType: 'map' | 'blueprint' | 'full',
|
||||
backup = true,
|
||||
): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.wipe(licenseId, inst.id, wipeType, backup);
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { KitsConfig } from '../../entities/kits-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateKitsConfigDto } from './dto/create-kits-config.dto';
|
||||
import { UpdateKitsConfigDto } from './dto/update-kits-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class KitsService {
|
||||
constructor(
|
||||
@InjectRepository(KitsConfig)
|
||||
private readonly kitsRepo: Repository<KitsConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class KitsService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write Kits.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/Kits.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write Kits.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/Kits.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload Kits plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload Kits',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload Kits');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.kitsRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class KitsService {
|
||||
/** Import Kits.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read Kits.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/Kits.json',
|
||||
},
|
||||
30000,
|
||||
// Read Kits.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/Kits.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class KitsService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { LootProfile } from '../../entities/loot-profile.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateLootProfileDto } from './dto/create-loot-profile.dto';
|
||||
import { UpdateLootProfileDto } from './dto/update-loot-profile.dto';
|
||||
import { ImportLootProfileDto } from './dto/import-loot-profile.dto';
|
||||
@@ -15,7 +15,7 @@ export class LootService {
|
||||
constructor(
|
||||
@InjectRepository(LootProfile)
|
||||
private readonly lootRepo: Repository<LootProfile>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List profiles for a license (summaries — no JSONB) */
|
||||
@@ -114,37 +114,22 @@ export class LootService {
|
||||
const lootGroupsJson = JSON.stringify(scaledGroups, null, 2);
|
||||
|
||||
try {
|
||||
// Write LootTables.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/data/BetterLoot/LootTables.json',
|
||||
content: lootTablesJson,
|
||||
},
|
||||
30000,
|
||||
// Write LootTables.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/data/BetterLoot/LootTables.json',
|
||||
lootTablesJson,
|
||||
);
|
||||
|
||||
// Write LootGroups.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/data/BetterLoot/LootGroups.json',
|
||||
content: lootGroupsJson,
|
||||
},
|
||||
30000,
|
||||
// Write LootGroups.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/data/BetterLoot/LootGroups.json',
|
||||
lootGroupsJson,
|
||||
);
|
||||
|
||||
// Reload BetterLoot plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload BetterLoot',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload BetterLoot');
|
||||
|
||||
// Mark this profile as active, deactivate others
|
||||
await this.lootRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
|
||||
@@ -3,7 +3,7 @@ import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { PlayerAction } from '../../entities/player-action.entity';
|
||||
import { PlayerSession } from '../../entities/player-session.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { PlayerActionDto } from './dto/player-action.dto';
|
||||
|
||||
export interface Player {
|
||||
@@ -23,7 +23,7 @@ export class PlayersService {
|
||||
private readonly actionRepo: Repository<PlayerAction>,
|
||||
@InjectRepository(PlayerSession)
|
||||
private readonly sessionRepo: Repository<PlayerSession>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
@@ -132,15 +132,26 @@ export class PlayersService {
|
||||
|
||||
await this.actionRepo.save(action);
|
||||
|
||||
// Forward kick, ban, and unban to the game server via NATS
|
||||
// Forward kick, ban, and unban to the game server via RCON
|
||||
if (dto.action_type === 'kick' || dto.action_type === 'ban' || dto.action_type === 'unban') {
|
||||
await this.natsService.sendServerCommand(licenseId, dto.action_type, {
|
||||
steam_id: dto.steam_id,
|
||||
reason: dto.reason,
|
||||
duration_minutes: dto.duration_minutes,
|
||||
});
|
||||
const rconCmd = this.buildRconCommand(dto);
|
||||
await this.instancesService.rconForLicense(licenseId, rconCmd);
|
||||
}
|
||||
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
private buildRconCommand(dto: PlayerActionDto): string {
|
||||
switch (dto.action_type) {
|
||||
case 'kick':
|
||||
return `kick ${dto.steam_id}${dto.reason ? ' ' + dto.reason : ''}`;
|
||||
case 'ban':
|
||||
// banid <steamId> <reason> <durationSeconds> — 0 = permanent
|
||||
return `banid ${dto.steam_id} ${dto.reason ?? 'banned'} ${dto.duration_minutes ? dto.duration_minutes * 60 : 0}`;
|
||||
case 'unban':
|
||||
return `unban ${dto.steam_id}`;
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { Injectable, NotFoundException, ConflictException, BadRequestException, Logger } from '@nestjs/common';
|
||||
import { Injectable, NotFoundException, ConflictException, BadRequestException, ServiceUnavailableException, Logger } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { PluginRegistry } from '../../entities/plugin-registry.entity';
|
||||
import { InstallPluginDto } from './dto/install-plugin.dto';
|
||||
import { UpdatePluginConfigDto } from './dto/update-plugin-config.dto';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
|
||||
interface UmodCacheEntry {
|
||||
data: unknown;
|
||||
@@ -20,7 +20,7 @@ export class PluginsService {
|
||||
constructor(
|
||||
@InjectRepository(PluginRegistry)
|
||||
private readonly pluginRegistryRepo: Repository<PluginRegistry>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
async getPlugins(licenseId: string): Promise<PluginRegistry[]> {
|
||||
@@ -43,30 +43,11 @@ export class PluginsService {
|
||||
throw new ConflictException(`Plugin ${dto.plugin_name} is already installed`);
|
||||
}
|
||||
|
||||
const plugin = this.pluginRegistryRepo.create({
|
||||
license_id: licenseId,
|
||||
plugin_name: dto.plugin_name,
|
||||
umod_slug: dto.umod_slug,
|
||||
source: dto.source || 'manual',
|
||||
is_installed: true,
|
||||
is_loaded: false,
|
||||
});
|
||||
|
||||
const saved = await this.pluginRegistryRepo.save(plugin);
|
||||
|
||||
try {
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.server`, {
|
||||
action: 'plugin_install',
|
||||
plugin_name: dto.plugin_name,
|
||||
umod_slug: dto.umod_slug,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
this.logger.log(`Plugin install dispatched for ${dto.plugin_name} on license ${licenseId}`);
|
||||
} catch (err) {
|
||||
this.logger.error(`Failed to dispatch plugin install for ${dto.plugin_name} on license ${licenseId}: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
return saved;
|
||||
// One-click uMod install via agent is not yet implemented.
|
||||
// Fail fast — do not persist a DB record for a plugin that won't be deployed.
|
||||
throw new ServiceUnavailableException(
|
||||
'One-click uMod install is coming soon — download the .cs and use Upload for now.',
|
||||
);
|
||||
}
|
||||
|
||||
async uninstallPlugin(licenseId: string, pluginId: string): Promise<void> {
|
||||
@@ -80,11 +61,8 @@ export class PluginsService {
|
||||
|
||||
await this.pluginRegistryRepo.delete({ id: pluginId, license_id: licenseId });
|
||||
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.plugin`, {
|
||||
action: 'unload',
|
||||
plugin_name: plugin.plugin_name,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(licenseId, `oxide.unload ${plugin.plugin_name}`);
|
||||
await this.instancesService.deleteFileForLicense(licenseId, `oxide/plugins/${plugin.plugin_name}.cs`);
|
||||
this.logger.log(`Plugin uninstall dispatched for ${plugin.plugin_name} on license ${licenseId}`);
|
||||
}
|
||||
|
||||
@@ -100,11 +78,7 @@ export class PluginsService {
|
||||
throw new NotFoundException(`Plugin ${pluginId} not found`);
|
||||
}
|
||||
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.plugin`, {
|
||||
action: 'reload',
|
||||
plugin_name: plugin.plugin_name,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(licenseId, `oxide.reload ${plugin.plugin_name}`);
|
||||
this.logger.log(`Plugin reload dispatched for ${plugin.plugin_name} on license ${licenseId}`);
|
||||
|
||||
return { reloaded: true, plugin_name: plugin.plugin_name };
|
||||
@@ -215,19 +189,14 @@ export class PluginsService {
|
||||
|
||||
const saved = await this.pluginRegistryRepo.save(plugin);
|
||||
|
||||
// Dispatch to companion agent via NATS
|
||||
// Deploy .cs file to server via host agent
|
||||
try {
|
||||
const content = file.buffer.toString('base64');
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.server`, {
|
||||
action: 'plugin_upload',
|
||||
filename: originalName,
|
||||
content,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
this.logger.log(`Plugin upload dispatched: "${originalName}" (${file.size} bytes) for license ${licenseId}`);
|
||||
const content = file.buffer.toString('utf8');
|
||||
await this.instancesService.writeFileForLicense(licenseId, `oxide/plugins/${originalName}`, content);
|
||||
this.logger.log(`Plugin upload deployed: "${originalName}" (${file.size} bytes) for license ${licenseId}`);
|
||||
} catch (err) {
|
||||
this.logger.error(`NATS publish failed for plugin upload "${originalName}" on license ${licenseId}: ${(err as Error).message}`);
|
||||
// Don't fail the request — plugin record is saved, NATS delivery is best-effort
|
||||
this.logger.error(`File write failed for plugin upload "${originalName}" on license ${licenseId}: ${(err as Error).message}`);
|
||||
// Don't fail the request — plugin record is saved, file delivery is best-effort
|
||||
}
|
||||
|
||||
return saved;
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { RaidableBasesConfig } from '../../entities/raidablebases-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateRaidableBasesConfigDto } from './dto/create-raidablebases-config.dto';
|
||||
import { UpdateRaidableBasesConfigDto } from './dto/update-raidablebases-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class RaidableBasesService {
|
||||
constructor(
|
||||
@InjectRepository(RaidableBasesConfig)
|
||||
private readonly raidableBasesRepo: Repository<RaidableBasesConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class RaidableBasesService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write RaidableBases.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/RaidableBases.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write RaidableBases.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/RaidableBases.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload RaidableBases plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload RaidableBases',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload RaidableBases');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.raidableBasesRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class RaidableBasesService {
|
||||
/** Import RaidableBases.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read RaidableBases.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/RaidableBases.json',
|
||||
},
|
||||
30000,
|
||||
// Read RaidableBases.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/RaidableBases.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class RaidableBasesService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -10,7 +10,7 @@ import { LessThanOrEqual, Repository } from 'typeorm';
|
||||
import { ScheduledTask } from '../../entities/scheduled-task.entity';
|
||||
import { CreateTaskDto } from './dto/create-task.dto';
|
||||
import { UpdateTaskDto } from './dto/update-task.dto';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
|
||||
/** Parse a 5-field cron expression and return the next Date after `after`. */
|
||||
function nextCronDate(expr: string, after: Date): Date | null {
|
||||
@@ -61,7 +61,7 @@ export class SchedulesService implements OnModuleInit, OnModuleDestroy {
|
||||
constructor(
|
||||
@InjectRepository(ScheduledTask)
|
||||
private taskRepository: Repository<ScheduledTask>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -160,21 +160,12 @@ export class SchedulesService implements OnModuleInit, OnModuleDestroy {
|
||||
|
||||
switch (task_type) {
|
||||
case 'restart':
|
||||
await this.natsService.sendServerCommand(license_id, 'restart', {
|
||||
source: 'scheduler',
|
||||
task_id: task.id,
|
||||
});
|
||||
await this.instancesService.lifecycleForLicense(license_id, 'restart');
|
||||
break;
|
||||
|
||||
case 'announcement': {
|
||||
const message = (task_config?.message as string) ?? 'Scheduled announcement';
|
||||
await this.natsService.publish(`corrosion.${license_id}.cmd.server`, {
|
||||
action: 'command',
|
||||
command: `say ${message}`,
|
||||
source: 'scheduler',
|
||||
task_id: task.id,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(license_id, `say ${message}`);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -184,25 +175,13 @@ export class SchedulesService implements OnModuleInit, OnModuleDestroy {
|
||||
this.logger.warn(`Task ${task.id} has no command configured — skipping`);
|
||||
return;
|
||||
}
|
||||
await this.natsService.publish(`corrosion.${license_id}.cmd.server`, {
|
||||
action: 'command',
|
||||
command,
|
||||
source: 'scheduler',
|
||||
task_id: task.id,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(license_id, command);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'plugin_reload': {
|
||||
const plugin_name = (task_config?.plugin_name as string) ?? '';
|
||||
await this.natsService.publish(`corrosion.${license_id}.cmd.plugin`, {
|
||||
action: 'reload',
|
||||
plugin_name,
|
||||
source: 'scheduler',
|
||||
task_id: task.id,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(license_id, `oxide.reload ${plugin_name}`);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,13 @@ export class ServersController {
|
||||
return await this.serversService.getServer(licenseId);
|
||||
}
|
||||
|
||||
@Get('agent-credentials')
|
||||
@RequirePermission('server.manage')
|
||||
@ApiOperation({ summary: 'NATS credentials for this license\'s host agent' })
|
||||
async getAgentCredentials(@CurrentTenant() licenseId: string) {
|
||||
return await this.serversService.getAgentCredentials(licenseId);
|
||||
}
|
||||
|
||||
@Put('config')
|
||||
@RequirePermission('server.manage')
|
||||
@ApiOperation({ summary: 'Update server configuration' })
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import { Injectable, NotFoundException, InternalServerErrorException, Logger } from '@nestjs/common';
|
||||
import { Injectable, NotFoundException, InternalServerErrorException, ServiceUnavailableException, Logger } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { ServerConnection } from '../../entities/server-connection.entity';
|
||||
import { ServerConfig } from '../../entities/server-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { UpdateServerConfigDto } from './dto/update-config.dto';
|
||||
import { DeployServerDto } from './dto/deploy-server.dto';
|
||||
|
||||
@@ -17,8 +18,18 @@ export class ServersService {
|
||||
@InjectRepository(ServerConfig)
|
||||
private readonly configRepo: Repository<ServerConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* NATS credentials the customer puts in their host agent's config so it can
|
||||
* authenticate to the per-license-scoped broker. Returns null if the broker
|
||||
* isn't enforcing auth yet (NATS_TOKEN_SECRET unset).
|
||||
*/
|
||||
async getAgentCredentials(licenseId: string) {
|
||||
return this.natsService.getAgentCredentials(licenseId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get server connection and config for a license.
|
||||
* Returns null fields if no server has been set up yet.
|
||||
@@ -59,11 +70,11 @@ export class ServersService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a console command to the server via NATS
|
||||
* Send a console command to the server via the host agent (RCON)
|
||||
*/
|
||||
async sendCommand(licenseId: string, command: string) {
|
||||
try {
|
||||
await this.natsService.sendServerCommand(licenseId, 'command', { command });
|
||||
await this.instancesService.rconForLicense(licenseId, command);
|
||||
this.logger.log(`Console command dispatched for license ${licenseId}: ${command}`);
|
||||
} catch (err) {
|
||||
this.logger.error(`Failed to dispatch console command for license ${licenseId}: ${(err as Error).message}`);
|
||||
@@ -73,42 +84,45 @@ export class ServersService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the server via NATS
|
||||
* Start the server via the host agent
|
||||
*/
|
||||
async startServer(licenseId: string) {
|
||||
await this.natsService.sendServerCommand(licenseId, 'start');
|
||||
await this.instancesService.lifecycleForLicense(licenseId, 'start');
|
||||
return { message: 'Start command sent' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the server via NATS
|
||||
* Stop the server via the host agent
|
||||
*/
|
||||
async stopServer(licenseId: string) {
|
||||
await this.natsService.sendServerCommand(licenseId, 'stop');
|
||||
await this.instancesService.lifecycleForLicense(licenseId, 'stop');
|
||||
return { message: 'Stop command sent' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Restart the server via NATS
|
||||
* Restart the server via the host agent
|
||||
*/
|
||||
async restartServer(licenseId: string) {
|
||||
await this.natsService.sendServerCommand(licenseId, 'restart');
|
||||
await this.instancesService.lifecycleForLicense(licenseId, 'restart');
|
||||
return { message: 'Restart command sent' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Deploy Rust server via companion agent
|
||||
* Deploy Rust server — not yet supported via host agent.
|
||||
* Install the server manually and point the host agent at it.
|
||||
*/
|
||||
async deployServer(licenseId: string, dto: DeployServerDto) {
|
||||
await this.natsService.sendDeployCommand(licenseId, { ...dto });
|
||||
return { message: 'Deployment started' };
|
||||
async deployServer(_licenseId: string, _dto: DeployServerDto) {
|
||||
throw new ServiceUnavailableException(
|
||||
'Server deployment from the panel is coming soon — install the server and point the host agent at it for now.',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Install Oxide/uMod via companion agent
|
||||
* Install Oxide/uMod — not yet supported via host agent.
|
||||
*/
|
||||
async installOxide(licenseId: string) {
|
||||
await this.natsService.sendOxideInstallCommand(licenseId);
|
||||
return { message: 'Oxide installation started' };
|
||||
async installOxide(_licenseId: string) {
|
||||
throw new ServiceUnavailableException(
|
||||
'Oxide install from the panel is coming soon — install Oxide/uMod on the server for now.',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Injectable, ServiceUnavailableException } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
@@ -55,6 +55,13 @@ export class SetupService {
|
||||
if (dto.panel_api_key) {
|
||||
const encryptionKey = this.configService.get<string>('encryption.key', '');
|
||||
const keyBuffer = Buffer.from(encryptionKey, 'hex');
|
||||
// AES-256-GCM needs a 32-byte key. An unset/short ENCRYPTION_KEY would
|
||||
// otherwise crash createCipheriv with an opaque "Invalid key length" 500.
|
||||
if (keyBuffer.length !== 32) {
|
||||
throw new ServiceUnavailableException(
|
||||
'Server encryption is not configured (ENCRYPTION_KEY must be 32 bytes / 64 hex chars). Contact the platform operator.',
|
||||
);
|
||||
}
|
||||
const iv = crypto.randomBytes(16);
|
||||
const cipher = crypto.createCipheriv('aes-256-gcm', keyBuffer, iv);
|
||||
const encrypted = Buffer.concat([
|
||||
@@ -82,9 +89,12 @@ export class SetupService {
|
||||
});
|
||||
|
||||
if (connection) {
|
||||
// For bare metal, mark as connected immediately (waiting for agent)
|
||||
if (connection.connection_type === 'bare_metal') {
|
||||
connection.connection_status = 'connected';
|
||||
// Bare-metal stays 'offline' until the agent's first heartbeat flips it
|
||||
// 'connected' (HostAgentConsumerService). Marking it connected here was a
|
||||
// false positive — the dashboard showed a live server before any agent
|
||||
// had checked in.
|
||||
if (connection.connection_type === 'bare_metal' && connection.connection_status !== 'connected') {
|
||||
connection.connection_status = 'offline';
|
||||
connection.updated_at = new Date();
|
||||
await this.connectionRepo.save(connection);
|
||||
}
|
||||
|
||||
@@ -57,11 +57,17 @@ export class StoreService {
|
||||
throw new NotFoundException('Module not found');
|
||||
}
|
||||
|
||||
// Beta: modules are granted free (no payment processing wired yet). Record
|
||||
// it honestly as a beta grant at $0 rather than a fake `txn_*` id that
|
||||
// implies a real charge occurred.
|
||||
this.logger.log(
|
||||
`Granting module ${moduleId} to license ${licenseId} free (Beta — no payment processing)`,
|
||||
);
|
||||
const purchase = this.purchaseRepo.create({
|
||||
license_id: licenseId,
|
||||
module_id: moduleId,
|
||||
transaction_id: `txn_${Date.now()}`,
|
||||
amount_paid: parseFloat(module.price_usd.toString()),
|
||||
transaction_id: 'beta-free-grant',
|
||||
amount_paid: 0,
|
||||
});
|
||||
|
||||
return this.purchaseRepo.save(purchase);
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { TeleportConfig } from '../../entities/teleport-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateTeleportConfigDto } from './dto/create-teleport-config.dto';
|
||||
import { UpdateTeleportConfigDto } from './dto/update-teleport-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class TeleportService {
|
||||
constructor(
|
||||
@InjectRepository(TeleportConfig)
|
||||
private readonly teleportRepo: Repository<TeleportConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class TeleportService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write NTeleportation.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/NTeleportation.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write NTeleportation.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/NTeleportation.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload NTeleportation plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload NTeleportation',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload NTeleportation');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.teleportRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class TeleportService {
|
||||
/** Import NTeleportation.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read NTeleportation.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/NTeleportation.json',
|
||||
},
|
||||
30000,
|
||||
// Read NTeleportation.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/NTeleportation.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class TeleportService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { TimedExecuteConfig } from '../../entities/timedexecute-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateTimedExecuteConfigDto } from './dto/create-timedexecute-config.dto';
|
||||
import { UpdateTimedExecuteConfigDto } from './dto/update-timedexecute-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class TimedExecuteService {
|
||||
constructor(
|
||||
@InjectRepository(TimedExecuteConfig)
|
||||
private readonly repo: Repository<TimedExecuteConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class TimedExecuteService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write TimedExecute.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/TimedExecute.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write TimedExecute.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/TimedExecute.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload TimedExecute plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload TimedExecute',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload TimedExecute');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.repo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class TimedExecuteService {
|
||||
/** Import TimedExecute.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read TimedExecute.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/TimedExecute.json',
|
||||
},
|
||||
30000,
|
||||
// Read TimedExecute.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/TimedExecute.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class TimedExecuteService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Injectable, NotFoundException } from '@nestjs/common';
|
||||
import { Injectable, NotFoundException, ServiceUnavailableException } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { StoreConfig } from '../../entities/store-config.entity';
|
||||
@@ -224,23 +224,13 @@ export class WebstoreService {
|
||||
throw new NotFoundException('Item not found');
|
||||
}
|
||||
|
||||
const transaction = this.transactionRepo.create({
|
||||
license_id: license.id,
|
||||
item_id: item.id,
|
||||
steam_id: dto.steam_id,
|
||||
player_name: dto.player_name,
|
||||
paypal_order_id: `order_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
||||
amount: parseFloat(item.price.toString()),
|
||||
currency: 'USD', // Would get from config
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
await this.transactionRepo.save(transaction);
|
||||
|
||||
// Return mock PayPal approval URL
|
||||
return {
|
||||
order_id: transaction.paypal_order_id,
|
||||
approval_url: `https://www.sandbox.paypal.com/checkoutnow?token=${transaction.paypal_order_id}`,
|
||||
};
|
||||
// Beta: real PayPal/Stripe processing is not wired yet. Refuse honestly
|
||||
// instead of writing a pending transaction and handing the player a fake
|
||||
// order token that resolves to nowhere. (item lookup above still validates
|
||||
// the request so the storefront UI can show the catalogue.)
|
||||
void item;
|
||||
throw new ServiceUnavailableException(
|
||||
'Storefront checkout is not available yet — payment processing is coming soon.',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import { CreateProfileDto } from './dto/create-profile.dto';
|
||||
import { UpdateProfileDto } from './dto/update-profile.dto';
|
||||
import { CreateScheduleDto } from './dto/create-schedule.dto';
|
||||
import { TriggerWipeDto } from './dto/trigger-wipe.dto';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
|
||||
@Injectable()
|
||||
export class WipesService {
|
||||
@@ -21,7 +21,7 @@ export class WipesService {
|
||||
private readonly wipeScheduleRepo: Repository<WipeSchedule>,
|
||||
@InjectRepository(WipeHistory)
|
||||
private readonly wipeHistoryRepo: Repository<WipeHistory>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
async getProfiles(licenseId: string): Promise<WipeProfile[]> {
|
||||
@@ -107,13 +107,7 @@ export class WipesService {
|
||||
|
||||
const saved = await this.wipeHistoryRepo.save(history);
|
||||
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.wipe`, {
|
||||
wipe_history_id: saved.id,
|
||||
wipe_type: dto.wipe_type,
|
||||
wipe_profile_id: dto.wipe_profile_id ?? null,
|
||||
trigger_type: 'manual',
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.wipeForLicense(licenseId, dto.wipe_type, true);
|
||||
this.logger.log(`Wipe triggered for license ${licenseId} — history id ${saved.id}`);
|
||||
|
||||
return { wipe_history_id: saved.id };
|
||||
|
||||
267
backend-nest/src/services/host-agent-consumer.service.ts
Normal file
267
backend-nest/src/services/host-agent-consumer.service.ts
Normal file
@@ -0,0 +1,267 @@
|
||||
import { Injectable, Logger, OnApplicationBootstrap } from '@nestjs/common';
|
||||
import { Interval } from '@nestjs/schedule';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { NatsService } from './nats.service';
|
||||
import { ServerConnection } from '../entities/server-connection.entity';
|
||||
import { License } from '../entities/license.entity';
|
||||
import { AgentHost, AgentHostDisk } from '../entities/agent-host.entity';
|
||||
import { GameInstance } from '../entities/game-instance.entity';
|
||||
|
||||
/**
|
||||
* Consumes Corrosion wire protocol v2 host-agent subjects
|
||||
* (corrosion-host-agent/PROTOCOL.md) and keeps the fleet model truthful.
|
||||
*
|
||||
* Writes the License → Host → Instance model (hosts + game_instances) from
|
||||
* each heartbeat, AND maintains the legacy single-server `server_connections`
|
||||
* row so the current panel keeps working during the fleet UI transition.
|
||||
*
|
||||
* Host identity: until enrollment issues a stable host id, a host is keyed by
|
||||
* (license_id, hostname). One agent = one host today; the schema is already
|
||||
* multi-host-ready.
|
||||
*/
|
||||
interface HeartbeatPayload {
|
||||
schema?: number;
|
||||
timestamp?: string;
|
||||
agent?: { version?: string; commit?: string; os?: string; arch?: string };
|
||||
host?: {
|
||||
hostname?: string | null;
|
||||
cpu_percent?: number;
|
||||
cpu_cores?: number;
|
||||
mem_total_mb?: number;
|
||||
mem_used_mb?: number;
|
||||
uptime_seconds?: number;
|
||||
disks?: AgentHostDisk[];
|
||||
};
|
||||
instances?: Array<{
|
||||
id: string;
|
||||
game: string;
|
||||
label?: string | null;
|
||||
state?: string;
|
||||
uptime_seconds?: number;
|
||||
}>;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class HostAgentConsumerService implements OnApplicationBootstrap {
|
||||
private readonly logger = new Logger(HostAgentConsumerService.name);
|
||||
|
||||
private knownLicenses = new Map<string, number>();
|
||||
private warnedUnknown = new Set<string>();
|
||||
|
||||
private static readonly UUID_RE =
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
||||
private static readonly LICENSE_CACHE_TTL_MS = 5 * 60_000;
|
||||
private static readonly OFFLINE_AFTER_MS = 180_000;
|
||||
|
||||
constructor(
|
||||
private readonly nats: NatsService,
|
||||
@InjectRepository(ServerConnection)
|
||||
private readonly connectionRepository: Repository<ServerConnection>,
|
||||
@InjectRepository(License)
|
||||
private readonly licenseRepository: Repository<License>,
|
||||
@InjectRepository(AgentHost)
|
||||
private readonly hostRepository: Repository<AgentHost>,
|
||||
@InjectRepository(GameInstance)
|
||||
private readonly instanceRepository: Repository<GameInstance>,
|
||||
) {}
|
||||
|
||||
// Bootstrap, not module-init: subscriptions registered before NatsService
|
||||
// finished connecting silently no-op (see NatsBridgeService note).
|
||||
onApplicationBootstrap() {
|
||||
this.nats.subscribe('corrosion.*.host.heartbeat', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
void this.onHeartbeat(licenseId, data as HeartbeatPayload).catch((err) =>
|
||||
this.logger.error(`heartbeat handling failed for ${licenseId}: ${err.message}`, err.stack),
|
||||
);
|
||||
});
|
||||
|
||||
this.nats.subscribe('corrosion.*.host.going_offline', (_data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
void this.onGoingOffline(licenseId).catch((err) =>
|
||||
this.logger.error(`going_offline handling failed for ${licenseId}: ${err.message}`, err.stack),
|
||||
);
|
||||
});
|
||||
|
||||
this.logger.log('Host agent (protocol v2) consumer subscriptions initialized');
|
||||
}
|
||||
|
||||
private async onHeartbeat(licenseId: string, payload: HeartbeatPayload): Promise<void> {
|
||||
if (!(await this.isValidTenant(licenseId))) return;
|
||||
// A well-formed v2 heartbeat always carries a host block. Reject malformed
|
||||
// payloads so a stray/empty publish can't create a phantom host row.
|
||||
if (!payload || typeof payload.host !== 'object' || payload.host === null) {
|
||||
this.logger.warn(`ignoring malformed heartbeat for license ${licenseId} (no host block)`);
|
||||
return;
|
||||
}
|
||||
const now = new Date();
|
||||
|
||||
await this.updateLegacyConnection(licenseId, now);
|
||||
const host = await this.upsertHost(licenseId, payload, now);
|
||||
await this.upsertInstances(licenseId, host, payload, now);
|
||||
}
|
||||
|
||||
/** Legacy single-server row — keeps the current panel working. */
|
||||
private async updateLegacyConnection(licenseId: string, now: Date): Promise<void> {
|
||||
const existing = await this.connectionRepository.findOne({ where: { license_id: licenseId } });
|
||||
if (existing) {
|
||||
await this.connectionRepository.update(
|
||||
{ id: existing.id },
|
||||
{ companion_last_seen: now, connection_status: 'connected', updated_at: now },
|
||||
);
|
||||
} else {
|
||||
await this.connectionRepository.save(
|
||||
this.connectionRepository.create({
|
||||
license_id: licenseId,
|
||||
connection_type: 'bare_metal',
|
||||
connection_status: 'connected',
|
||||
companion_last_seen: now,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** Upsert the fleet host row, keyed by (license_id, hostname). */
|
||||
private async upsertHost(licenseId: string, payload: HeartbeatPayload, now: Date): Promise<AgentHost> {
|
||||
const hostname = payload.host?.hostname ?? '';
|
||||
const fields = {
|
||||
agent_version: payload.agent?.version ?? null,
|
||||
agent_commit: payload.agent?.commit ?? null,
|
||||
os: payload.agent?.os ?? null,
|
||||
arch: payload.agent?.arch ?? null,
|
||||
status: 'connected',
|
||||
last_heartbeat_at: now,
|
||||
cpu_percent: payload.host?.cpu_percent ?? null,
|
||||
cpu_cores: payload.host?.cpu_cores ?? null,
|
||||
mem_total_mb: payload.host?.mem_total_mb ?? null,
|
||||
mem_used_mb: payload.host?.mem_used_mb ?? null,
|
||||
uptime_seconds: payload.host?.uptime_seconds ?? null,
|
||||
disks: payload.host?.disks ?? null,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
const existing = await this.hostRepository.findOne({
|
||||
where: { license_id: licenseId, hostname },
|
||||
});
|
||||
if (existing) {
|
||||
await this.hostRepository.update({ id: existing.id }, fields);
|
||||
return { ...existing, ...fields } as AgentHost;
|
||||
}
|
||||
const created = await this.hostRepository.save(
|
||||
this.hostRepository.create({ license_id: licenseId, hostname, ...fields }),
|
||||
);
|
||||
this.logger.log(`host registered for license ${licenseId} (hostname '${hostname || 'unknown'}')`);
|
||||
return created;
|
||||
}
|
||||
|
||||
/** Upsert one game_instances row per heartbeat instance entry. */
|
||||
private async upsertInstances(
|
||||
licenseId: string,
|
||||
host: AgentHost,
|
||||
payload: HeartbeatPayload,
|
||||
now: Date,
|
||||
): Promise<void> {
|
||||
for (const inst of payload.instances ?? []) {
|
||||
if (!inst?.id || !inst?.game) continue;
|
||||
const fields = {
|
||||
host_id: host.id,
|
||||
game: inst.game,
|
||||
label: inst.label ?? null,
|
||||
state: inst.state ?? 'unknown',
|
||||
uptime_seconds: inst.uptime_seconds ?? 0,
|
||||
last_seen_at: now,
|
||||
updated_at: now,
|
||||
};
|
||||
const existing = await this.instanceRepository.findOne({
|
||||
where: { license_id: licenseId, agent_instance_id: inst.id },
|
||||
});
|
||||
if (existing) {
|
||||
await this.instanceRepository.update({ id: existing.id }, fields);
|
||||
} else {
|
||||
await this.instanceRepository.save(
|
||||
this.instanceRepository.create({
|
||||
license_id: licenseId,
|
||||
agent_instance_id: inst.id,
|
||||
...fields,
|
||||
}),
|
||||
);
|
||||
this.logger.log(`instance '${inst.id}' (${inst.game}) registered for license ${licenseId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async onGoingOffline(licenseId: string): Promise<void> {
|
||||
if (!(await this.isValidTenant(licenseId))) return;
|
||||
const now = new Date();
|
||||
await this.connectionRepository.update(
|
||||
{ license_id: licenseId },
|
||||
{ connection_status: 'offline', updated_at: now },
|
||||
);
|
||||
await this.hostRepository.update(
|
||||
{ license_id: licenseId },
|
||||
{ status: 'offline', updated_at: now },
|
||||
);
|
||||
this.logger.log(`host(s) for license ${licenseId} went offline (graceful beacon)`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Heartbeats stopping must flip the panel to offline — an agent that
|
||||
* crashes or loses network never sends the goodbye beacon. Sweeps both the
|
||||
* legacy connection and fleet hosts.
|
||||
*/
|
||||
@Interval(60_000)
|
||||
async sweepStaleConnections(): Promise<void> {
|
||||
const threshold = new Date(Date.now() - HostAgentConsumerService.OFFLINE_AFTER_MS);
|
||||
|
||||
const conn = await this.connectionRepository
|
||||
.createQueryBuilder()
|
||||
.update(ServerConnection)
|
||||
.set({ connection_status: 'offline', updated_at: () => 'NOW()' })
|
||||
.where('connection_status = :connected', { connected: 'connected' })
|
||||
.andWhere('companion_last_seen IS NOT NULL')
|
||||
.andWhere('companion_last_seen < :threshold', { threshold })
|
||||
.execute();
|
||||
|
||||
const hosts = await this.hostRepository
|
||||
.createQueryBuilder()
|
||||
.update(AgentHost)
|
||||
.set({ status: 'offline', updated_at: () => 'NOW()' })
|
||||
.where('status = :connected', { connected: 'connected' })
|
||||
.andWhere('last_heartbeat_at IS NOT NULL')
|
||||
.andWhere('last_heartbeat_at < :threshold', { threshold })
|
||||
.execute();
|
||||
|
||||
const affected = (conn.affected ?? 0) + (hosts.affected ?? 0);
|
||||
if (affected) {
|
||||
this.logger.warn(`marked ${affected} stale connection/host record(s) offline`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tenant validation: the subject segment must be a real license UUID.
|
||||
* NATS consumers must never write rows for subjects an arbitrary publisher
|
||||
* invented. Existence is cached to avoid a query per heartbeat.
|
||||
*/
|
||||
private async isValidTenant(licenseId: string): Promise<boolean> {
|
||||
if (!HostAgentConsumerService.UUID_RE.test(licenseId)) {
|
||||
this.warnUnknownOnce(licenseId, 'not a UUID');
|
||||
return false;
|
||||
}
|
||||
const cachedUntil = this.knownLicenses.get(licenseId);
|
||||
if (cachedUntil && cachedUntil > Date.now()) return true;
|
||||
|
||||
const exists = await this.licenseRepository.exist({ where: { id: licenseId } });
|
||||
if (!exists) {
|
||||
this.warnUnknownOnce(licenseId, 'no such license');
|
||||
return false;
|
||||
}
|
||||
this.knownLicenses.set(licenseId, Date.now() + HostAgentConsumerService.LICENSE_CACHE_TTL_MS);
|
||||
return true;
|
||||
}
|
||||
|
||||
private warnUnknownOnce(licenseId: string, reason: string): void {
|
||||
if (this.warnedUnknown.has(licenseId)) return;
|
||||
this.warnedUnknown.add(licenseId);
|
||||
this.logger.warn(`ignoring host-agent traffic for invalid license '${licenseId}' (${reason})`);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
export { NatsService } from './nats.service';
|
||||
export { NatsBridgeService } from './nats-bridge.service';
|
||||
export { HostAgentConsumerService } from './host-agent-consumer.service';
|
||||
export { SteamService } from './steam.service';
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
import { Injectable, OnModuleInit, Logger } from '@nestjs/common';
|
||||
import { Injectable, OnApplicationBootstrap, Logger } from '@nestjs/common';
|
||||
import { NatsService } from './nats.service';
|
||||
|
||||
@Injectable()
|
||||
export class NatsBridgeService implements OnModuleInit {
|
||||
export class NatsBridgeService implements OnApplicationBootstrap {
|
||||
private readonly logger = new Logger(NatsBridgeService.name);
|
||||
private listeners: Map<string, Set<(event: string, data: unknown) => void>> = new Map();
|
||||
|
||||
constructor(private nats: NatsService) {}
|
||||
|
||||
onModuleInit() {
|
||||
// Subscriptions MUST happen in onApplicationBootstrap, not onModuleInit:
|
||||
// provider onModuleInit order is not guaranteed, and these hooks once ran
|
||||
// before NatsService connected — every subscribe() silently no-oped and the
|
||||
// WS bridge was dead from boot. Bootstrap runs after ALL module inits
|
||||
// (including the awaited NATS connect) complete.
|
||||
onApplicationBootstrap() {
|
||||
this.nats.subscribe('corrosion.*.companion.heartbeat', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
this.emit(licenseId, 'heartbeat', data);
|
||||
@@ -44,6 +49,17 @@ export class NatsBridgeService implements OnModuleInit {
|
||||
this.emit(licenseId, 'oxide_status', data);
|
||||
});
|
||||
|
||||
// Wire protocol v2 (corrosion-host-agent) — host-level telemetry
|
||||
this.nats.subscribe('corrosion.*.host.heartbeat', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
this.emit(licenseId, 'host_heartbeat', data);
|
||||
});
|
||||
|
||||
this.nats.subscribe('corrosion.*.host.going_offline', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
this.emit(licenseId, 'host_going_offline', data);
|
||||
});
|
||||
|
||||
this.logger.log('NATS bridge subscriptions initialized');
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
import { Injectable, OnModuleInit, OnModuleDestroy, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { connect, NatsConnection, StringCodec, Subscription } from 'nats';
|
||||
import { createHmac, randomUUID } from 'crypto';
|
||||
|
||||
export interface AgentCredentials {
|
||||
license_id: string;
|
||||
nats_user: string;
|
||||
nats_password: string;
|
||||
nats_url: string;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class NatsService implements OnModuleInit, OnModuleDestroy {
|
||||
@@ -13,8 +21,13 @@ export class NatsService implements OnModuleInit, OnModuleDestroy {
|
||||
async onModuleInit() {
|
||||
try {
|
||||
const url = this.config.get<string>('nats.url') || 'nats://localhost:4222';
|
||||
this.nc = await connect({ servers: url });
|
||||
this.logger.log(`Connected to NATS at ${url}`);
|
||||
const user = this.config.get<string>('nats.internalUser');
|
||||
const pass = this.config.get<string>('nats.internalPassword');
|
||||
// Authenticate with the privileged internal user when configured;
|
||||
// otherwise connect anonymously (broker hasn't enforced auth yet).
|
||||
const opts = user && pass ? { servers: url, user, pass } : { servers: url };
|
||||
this.nc = await connect(opts);
|
||||
this.logger.log(`Connected to NATS at ${url}${user ? ` as ${user}` : ' (anonymous)'}`);
|
||||
} catch (err) {
|
||||
this.logger.warn(`NATS connection failed — running in offline mode: ${(err as Error).message}`);
|
||||
}
|
||||
@@ -62,6 +75,64 @@ export class NatsService implements OnModuleInit, OnModuleDestroy {
|
||||
return sub;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request-reply to a host-agent subject with a LICENSE-SCOPED reply subject.
|
||||
*
|
||||
* Per-license agent users are confined to corrosion.{license}.> and have no
|
||||
* _INBOX permission, so the agent cannot publish a reply to the default
|
||||
* global inbox. The reply must live inside the license namespace
|
||||
* (corrosion.{license}.reply.<id>); the privileged backend subscribes there.
|
||||
* See corrosion-host-agent/PROTOCOL.md ("Reply-subject rule").
|
||||
*/
|
||||
async requestScoped<T = unknown>(
|
||||
licenseId: string,
|
||||
subject: string,
|
||||
payload: Record<string, unknown>,
|
||||
timeoutMs = 8000,
|
||||
): Promise<T> {
|
||||
if (!this.nc) {
|
||||
throw new Error('NATS unavailable — agent is not reachable');
|
||||
}
|
||||
const replySubject = `corrosion.${licenseId}.reply.${randomUUID()}`;
|
||||
const nc = this.nc;
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
nc.subscribe(replySubject, {
|
||||
max: 1,
|
||||
timeout: timeoutMs,
|
||||
callback: (err, msg) => {
|
||||
if (err) {
|
||||
reject(new Error(`agent did not respond within ${timeoutMs}ms`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(JSON.parse(this.sc.decode(msg.data)) as T);
|
||||
} catch {
|
||||
resolve(this.sc.decode(msg.data) as unknown as T);
|
||||
}
|
||||
},
|
||||
});
|
||||
nc.publish(subject, this.sc.encode(JSON.stringify(payload)), { reply: replySubject });
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive a license's agent NATS credentials. Password is
|
||||
* HMAC-SHA256(license_id, NATS_TOKEN_SECRET) — must match the broker config
|
||||
* generated by scripts/generate-nats-auth.mjs. Returns null if the secret
|
||||
* isn't configured (broker not yet enforcing auth).
|
||||
*/
|
||||
getAgentCredentials(licenseId: string): AgentCredentials | null {
|
||||
const secret = this.config.get<string>('nats.tokenSecret');
|
||||
if (!secret) return null;
|
||||
const password = createHmac('sha256', secret).update(licenseId).digest('hex');
|
||||
return {
|
||||
license_id: licenseId,
|
||||
nats_user: licenseId,
|
||||
nats_password: password,
|
||||
nats_url: this.config.get<string>('nats.publicUrl') || 'nats://nats.corrosionmgmt.com:4222',
|
||||
};
|
||||
}
|
||||
|
||||
/** Publish a command to a specific license's server */
|
||||
async sendServerCommand(licenseId: string, action: string, payload: Record<string, unknown> = {}): Promise<void> {
|
||||
await this.publish(`corrosion.${licenseId}.cmd.server`, {
|
||||
|
||||
102
backend/migrations/022_fleet_model.sql
Normal file
102
backend/migrations/022_fleet_model.sql
Normal file
@@ -0,0 +1,102 @@
|
||||
-- Fleet data model — License → Host → Instance (with optional Cluster)
|
||||
--
|
||||
-- ADDITIVE: existing server_connections / server_config / server_stats are
|
||||
-- left untouched so the current single-server panel keeps working. The
|
||||
-- host-agent consumer writes BOTH the legacy connection row and these fleet
|
||||
-- tables during the transition; the panel migrates to the fleet tables in a
|
||||
-- later phase.
|
||||
--
|
||||
-- Shape mirrors the host agent's wire protocol v2 heartbeat:
|
||||
-- host{} block → agent_hosts
|
||||
-- instances[] entries → game_instances
|
||||
-- Host metrics (CPU/RAM/disk) live on the HOST, not duplicated per instance.
|
||||
--
|
||||
-- Named `agent_hosts` (not `hosts`) to avoid collision with the existing B2B
|
||||
-- `hosts` table (hosting-partner companies) — different concept entirely.
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- AGENT_HOSTS — one Corrosion host agent / one machine
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS agent_hosts (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
-- Natural key until enrollment issues a stable host identity.
|
||||
hostname VARCHAR(255) NOT NULL DEFAULT '',
|
||||
agent_version VARCHAR(64),
|
||||
agent_commit VARCHAR(64),
|
||||
os VARCHAR(32),
|
||||
arch VARCHAR(32),
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'offline'
|
||||
CHECK (status IN ('connected', 'degraded', 'offline')),
|
||||
last_heartbeat_at TIMESTAMPTZ,
|
||||
cpu_percent DOUBLE PRECISION,
|
||||
cpu_cores INTEGER,
|
||||
mem_total_mb BIGINT,
|
||||
mem_used_mb BIGINT,
|
||||
uptime_seconds BIGINT,
|
||||
disks JSONB, -- [{ "mount": "/", "total_mb": n, "free_mb": n }]
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (license_id, hostname)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_agent_hosts_license ON agent_hosts(license_id);
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- INSTANCE CLUSTERS — optional grouping (Soulmask main/child, Dune battlegroup)
|
||||
-- Reserved now; cluster logic ships with those game adapters.
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS instance_clusters (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
game VARCHAR(32) NOT NULL,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
topology VARCHAR(32), -- main_client | battlegroup
|
||||
config JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_clusters_license ON instance_clusters(license_id);
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- GAME INSTANCES — one game server process / orchestrated unit.
|
||||
-- The billing unit (plans count instances).
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS game_instances (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
host_id UUID REFERENCES agent_hosts(id) ON DELETE SET NULL,
|
||||
cluster_id UUID REFERENCES instance_clusters(id) ON DELETE SET NULL,
|
||||
-- The agent's instance slug; the NATS subject segment.
|
||||
agent_instance_id VARCHAR(64) NOT NULL,
|
||||
game VARCHAR(32) NOT NULL,
|
||||
label VARCHAR(255),
|
||||
-- running | stopped | starting | stopping | crashed
|
||||
-- | configured | missing_root | unmanaged | unknown
|
||||
state VARCHAR(32) NOT NULL DEFAULT 'unknown',
|
||||
root_path TEXT,
|
||||
uptime_seconds BIGINT NOT NULL DEFAULT 0,
|
||||
last_seen_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (license_id, agent_instance_id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_instances_license ON game_instances(license_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_instances_host ON game_instances(host_id);
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- INSTANCE STATS — per-instance time series (game metrics).
|
||||
-- Populated once game-level telemetry (player count/FPS via RCON/plugin) is
|
||||
-- collected; the host heartbeat carries host metrics, not game metrics.
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS instance_stats (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
instance_id UUID NOT NULL REFERENCES game_instances(id) ON DELETE CASCADE,
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
player_count INTEGER NOT NULL DEFAULT 0,
|
||||
max_players INTEGER NOT NULL DEFAULT 0,
|
||||
fps DOUBLE PRECISION NOT NULL DEFAULT 0,
|
||||
memory_usage_mb INTEGER NOT NULL DEFAULT 0,
|
||||
recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_instance_stats_instance
|
||||
ON instance_stats(instance_id, recorded_at DESC);
|
||||
152
contract-tests/agent-backend.contract.mjs
Normal file
152
contract-tests/agent-backend.contract.mjs
Normal file
@@ -0,0 +1,152 @@
|
||||
// Full-pipeline contract test: Rust host agent → NATS → NestJS consumer → Postgres.
|
||||
//
|
||||
// Proves the wire protocol v2 chain end to end against a REAL backend and DB:
|
||||
// 1. agent heartbeat arrives with schema 2 + measured telemetry
|
||||
// 2. backend auto-registers the server_connections row and marks it connected
|
||||
// 3. instance command channel round-trips (start/status/stop) with push events
|
||||
// 4. graceful agent shutdown publishes the offline beacon and the row flips offline
|
||||
//
|
||||
// Required env:
|
||||
// LICENSE_ID — existing license uuid (CI: from the admin seed)
|
||||
// DATABASE_URL — postgres connection string for assertions
|
||||
// NATS_URL — broker both agent and backend use (default nats://localhost:4222)
|
||||
// AGENT_BIN — path to the corrosion-host-agent binary
|
||||
//
|
||||
// Uses the backend's own node_modules (nats, pg) so the client libs under test
|
||||
// are exactly what production runs.
|
||||
|
||||
import { createRequire } from 'node:module';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { writeFileSync, mkdtempSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const repoRoot = join(dirname(fileURLToPath(import.meta.url)), '..');
|
||||
const require = createRequire(join(repoRoot, 'backend-nest', 'node_modules', 'x.js'));
|
||||
const { connect, StringCodec } = require('nats');
|
||||
const { Client: PgClient } = require('pg');
|
||||
|
||||
const LICENSE = process.env.LICENSE_ID;
|
||||
const NATS_URL = process.env.NATS_URL ?? 'nats://localhost:4222';
|
||||
const DATABASE_URL = process.env.DATABASE_URL;
|
||||
const AGENT_BIN = process.env.AGENT_BIN ?? join(repoRoot, 'corrosion-host-agent', 'target', 'debug', 'corrosion-host-agent');
|
||||
|
||||
if (!LICENSE || !DATABASE_URL) {
|
||||
console.error('LICENSE_ID and DATABASE_URL are required');
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
const sc = StringCodec();
|
||||
const errs = [];
|
||||
const check = (cond, msg) => { if (!cond) errs.push(msg); };
|
||||
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
||||
|
||||
async function pollDb(pg, predicate, label, timeoutMs = 30_000) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
for (;;) {
|
||||
const { rows } = await pg.query(
|
||||
'SELECT connection_type, connection_status, companion_last_seen FROM server_connections WHERE license_id = $1',
|
||||
[LICENSE],
|
||||
);
|
||||
if (predicate(rows)) return rows;
|
||||
if (Date.now() > deadline) {
|
||||
errs.push(`${label}: timeout after ${timeoutMs}ms — rows: ${JSON.stringify(rows)}`);
|
||||
return rows;
|
||||
}
|
||||
await sleep(1000);
|
||||
}
|
||||
}
|
||||
|
||||
const main = async () => {
|
||||
const pg = new PgClient({ connectionString: DATABASE_URL });
|
||||
await pg.connect();
|
||||
const nc = await connect({ servers: NATS_URL });
|
||||
|
||||
const heartbeats = [];
|
||||
const statusEvents = [];
|
||||
(async () => { for await (const m of nc.subscribe(`corrosion.${LICENSE}.host.heartbeat`)) heartbeats.push(JSON.parse(sc.decode(m.data))); })();
|
||||
(async () => { for await (const m of nc.subscribe(`corrosion.${LICENSE}.ci-instance.status`)) statusEvents.push(JSON.parse(sc.decode(m.data))); })();
|
||||
|
||||
// --- spawn the real agent ---
|
||||
const dir = mkdtempSync(join(tmpdir(), 'cha-contract-'));
|
||||
const cfgPath = join(dir, 'agent.toml');
|
||||
writeFileSync(cfgPath, `
|
||||
[agent]
|
||||
license_id = "${LICENSE}"
|
||||
nats_url = "${NATS_URL}"
|
||||
heartbeat_seconds = 10
|
||||
log_level = "info"
|
||||
|
||||
[[instance]]
|
||||
id = "ci-instance"
|
||||
game = "rust"
|
||||
root = "/tmp"
|
||||
label = "Contract CI"
|
||||
executable = "/bin/sleep"
|
||||
args = ["300"]
|
||||
`);
|
||||
const agent = spawn(AGENT_BIN, ['--config', cfgPath], { stdio: ['ignore', 'inherit', 'inherit'] });
|
||||
const agentExited = new Promise((r) => agent.on('exit', r));
|
||||
|
||||
// --- 1. heartbeat shape + real telemetry ---
|
||||
const hbDeadline = Date.now() + 20_000;
|
||||
while (heartbeats.length === 0 && Date.now() < hbDeadline) await sleep(500);
|
||||
check(heartbeats.length > 0, 'no heartbeat within 20s');
|
||||
if (heartbeats.length) {
|
||||
const hb = heartbeats[0];
|
||||
check(hb.schema === 2, `schema != 2: ${hb.schema}`);
|
||||
check(typeof hb.host?.cpu_percent === 'number', 'missing host.cpu_percent');
|
||||
check(hb.host?.mem_total_mb > 0, 'mem_total_mb not measured');
|
||||
check(Array.isArray(hb.host?.disks) && hb.host.disks.length > 0, 'no disks reported');
|
||||
check(hb.instances?.[0]?.id === 'ci-instance', 'instance missing from heartbeat');
|
||||
check(!!hb.agent?.version && !!hb.agent?.commit, 'agent version/commit missing');
|
||||
}
|
||||
|
||||
// --- 2. backend auto-registers + connects ---
|
||||
const rows = await pollDb(pg, (r) => r.length === 1 && r[0].connection_status === 'connected', 'auto-register connected');
|
||||
if (rows.length === 1) {
|
||||
check(rows[0].connection_type === 'bare_metal', `connection_type: ${rows[0].connection_type}`);
|
||||
check(rows[0].companion_last_seen !== null, 'companion_last_seen not set');
|
||||
}
|
||||
|
||||
// --- 3. instance command channel ---
|
||||
const cmd = async (payload) =>
|
||||
JSON.parse(sc.decode((await nc.request(`corrosion.${LICENSE}.ci-instance.cmd`, sc.encode(JSON.stringify(payload)), { timeout: 8000 })).data));
|
||||
|
||||
const st0 = await cmd({ func: 'status' });
|
||||
check(st0.state?.state === 'stopped', `initial state: ${JSON.stringify(st0.state)}`);
|
||||
const start = await cmd({ func: 'start' });
|
||||
check(start.status === 'success', `start: ${JSON.stringify(start)}`);
|
||||
await sleep(1000);
|
||||
const st1 = await cmd({ func: 'status' });
|
||||
check(st1.state?.state === 'running', `post-start state: ${JSON.stringify(st1.state)}`);
|
||||
check((await cmd({ func: 'start' })).status === 'error', 'double start must error');
|
||||
check((await cmd({ func: 'bogus' })).status === 'error', 'unknown func must error');
|
||||
const stop = await cmd({ func: 'stop' });
|
||||
check(stop.status === 'success', `stop: ${JSON.stringify(stop)}`);
|
||||
await sleep(1000);
|
||||
const seq = statusEvents.map((e) => e.event?.state);
|
||||
check(seq.includes('running') && seq.includes('stopped'), `status events incomplete: ${seq.join(',')}`);
|
||||
|
||||
// --- 4. graceful shutdown → offline beacon → DB flips offline ---
|
||||
agent.kill('SIGTERM');
|
||||
await Promise.race([agentExited, sleep(8000)]);
|
||||
await pollDb(pg, (r) => r.length === 1 && r[0].connection_status === 'offline', 'beacon offline', 20_000);
|
||||
|
||||
await nc.close();
|
||||
await pg.end();
|
||||
|
||||
if (errs.length) {
|
||||
console.error('\nCONTRACT FAIL:');
|
||||
errs.forEach((e) => console.error(' -', e));
|
||||
process.exit(1);
|
||||
}
|
||||
console.log('\nCONTRACT PASS: heartbeat shape, auto-register, connected/offline lifecycle, instance command channel, push events');
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
main().catch((e) => {
|
||||
console.error('contract test crashed:', e);
|
||||
process.exit(1);
|
||||
});
|
||||
806
corrosion-host-agent/Cargo.lock
generated
806
corrosion-host-agent/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "corrosion-host-agent"
|
||||
version = "2.0.0-alpha.2"
|
||||
version = "2.0.0-alpha.10"
|
||||
edition = "2021"
|
||||
description = "Corrosion Host Agent — multi-game ops runtime for self-hosted game servers"
|
||||
license = "UNLICENSED"
|
||||
@@ -23,8 +23,18 @@ chrono = { version = "0.4", features = ["serde", "clock"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||
anyhow = "1"
|
||||
async-trait = "0.1"
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
rand = "0.8"
|
||||
tokio-tungstenite = "0.24"
|
||||
minisign-verify = "0.2.5"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream"] }
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
libc = "0.2"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
||||
# Size-optimized release: single static binary living next to RAM-heavy game
|
||||
# servers. Panic stays 'unwind' so a panicking task surfaces through its
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
# Corrosion Wire Protocol v2
|
||||
|
||||
Status: **Phase 0 implemented** (host heartbeat, host commands, going-offline
|
||||
beacon). Per-instance command/status subjects are reserved and specified here
|
||||
for Phase 1.
|
||||
Status: **Phase 0 + Phase 1 process control implemented** (host heartbeat,
|
||||
host commands, going-offline beacon, per-instance start/stop/restart/status
|
||||
with push state events). RCON, SteamCMD, file ops, and game adapters are
|
||||
specified but not yet implemented.
|
||||
|
||||
## Design
|
||||
|
||||
@@ -70,9 +71,10 @@ All telemetry is measured, never fabricated. Fields the agent cannot measure
|
||||
are omitted (`probe` before the first probe completes, `hostname` if
|
||||
unavailable).
|
||||
|
||||
Phase 0 instance `state` values: `configured` (root path exists),
|
||||
`missing_root`. Phase 1 adds live process states: `running`, `stopped`,
|
||||
`crashed`, `starting`, `updating`.
|
||||
Instance `state` values — process-managed (an `executable` is configured):
|
||||
`running`, `stopped`, `starting`, `stopping`, `crashed`; unmanaged
|
||||
(telemetry-only): `configured` (root exists), `missing_root`. Each instance
|
||||
also reports `uptime_seconds` (0 unless running).
|
||||
|
||||
### `corrosion.{license_id}.host.cmd` (backend → agent, request-reply)
|
||||
|
||||
@@ -83,6 +85,7 @@ Request: `{ "func": "<name>" }`. Reply: `{ "status": "success" | "error", ... }`
|
||||
| `ping` | `version`, `commit`, `uptime_seconds` |
|
||||
| `probe` | `report` — fresh ProbeReport (also cached for heartbeat) |
|
||||
| `sysinfo` | `snapshot` — full heartbeat payload, collected on demand |
|
||||
| `update` | `{ "func": "update", "url": "https://cdn.corrosionmgmt.com/host-agent/.../corrosion-host-agent-<plat>" }` → downloads the binary + `<url>.minisig`, verifies the minisign signature against the agent's EMBEDDED public key, atomically swaps (with `.old` rollback), replies `{ status: success, message: "...relaunching" }`, then relaunches the new binary. Rejects anything not signed by the release key and any URL that isn't `https://cdn.corrosionmgmt.com`. |
|
||||
|
||||
Unknown funcs return `status: "error"` with a message listing supported funcs.
|
||||
|
||||
@@ -92,19 +95,71 @@ Best-effort beacon (500ms budget) on graceful shutdown so the panel can flip
|
||||
the host to offline immediately instead of waiting out heartbeat staleness.
|
||||
Payload: `{}`.
|
||||
|
||||
## Instance-level subjects (Phase 1 — reserved, not yet implemented)
|
||||
## Instance-level subjects
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.cmd` (backend → agent, request-reply)
|
||||
### `corrosion.{license_id}.{instance_id}.cmd` (backend → agent, request-reply) — LIVE
|
||||
|
||||
Lifecycle and control for one game instance. Planned funcs: `start`, `stop`,
|
||||
`restart`, `status`, `rcon` (process-class games), `steam_update`,
|
||||
`oxide_install` (rust), plus game-adapter-specific commands (Dune: docker
|
||||
lifecycle, RabbitMQ bus commands, Coriolis reset).
|
||||
Lifecycle and control for one game instance.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.status` (agent → backend, publish)
|
||||
The same `start`/`stop`/`restart`/`status` funcs work for **every** game: the
|
||||
agent picks a `Supervisor` impl per game — a spawned-process supervisor for
|
||||
Rust/Conan/Soulmask, a **docker-compose supervisor for Dune** (`docker compose
|
||||
up -d` / `stop` / `restart` against the instance's compose project, configured
|
||||
via `[instance.docker_compose]`). The wire contract is identical; only the
|
||||
management model behind it differs.
|
||||
|
||||
State-change events (started/stopped/crashed) so the panel does not wait for
|
||||
the next heartbeat.
|
||||
Implemented funcs: `start`, `stop` (graceful with 30s budget, then force
|
||||
kill — process supervisor; Dune maps stop to `docker compose stop`), `restart`,
|
||||
`status` (returns `state` + `uptime_seconds`), and
|
||||
`rcon` — `{ "func": "rcon", "command": "<console command>" }` returns
|
||||
`{ "status": "success", "output": <server response> }`. Protocol per game:
|
||||
WebRCON (WebSocket JSON) for rust, Source RCON (Valve TCP) for
|
||||
conan/soulmask; explicit `kind` override available in the instance's
|
||||
`[instance.rcon]` config. Always targets 127.0.0.1 (agent is co-located).
|
||||
Errors reply `{ "status": "error", "message": ... }` — including start on an
|
||||
unmanaged instance, double start, missing rcon config, and unknown funcs.
|
||||
|
||||
Also implemented: `steam_update` — `{ "func": "steam_update" }` runs
|
||||
SteamCMD for the instance's game (app ids: rust 258550, conan 443030,
|
||||
soulmask 3017310/3017300; dune rejects — Docker images, no SteamCMD),
|
||||
streaming progress lines to `corrosion.{license}.{instance}.steam_status`
|
||||
and replying on completion.
|
||||
|
||||
Planned funcs: `oxide_install` (rust), plus game-adapter-specific
|
||||
commands (Dune: RabbitMQ admin-bus commands, Coriolis reset, Postgres admin
|
||||
surface). Dune **lifecycle** is already covered by the shared
|
||||
start/stop/restart funcs above; container crash-detection and state adoption on
|
||||
agent restart land with Phase 3b.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.steam_status` (agent → backend, publish) — LIVE
|
||||
|
||||
Per-line SteamCMD stdout during a `steam_update`, so the panel can show
|
||||
live update progress. Payload: `{ "timestamp", "instance_id", "line" }`.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.files.cmd` (backend → agent, request-reply) — LIVE
|
||||
|
||||
Jailed file manager, confined to the instance `root` (two-stage check:
|
||||
lexical normalize + canonicalize, defeating `../` traversal and symlink
|
||||
escape). Request `{ "op": "list|read|write|delete|rename|mkdir|mkfile|move|copy",
|
||||
"path": "rel/path", "dest"?, "content"?, "name"? }`; reply
|
||||
`{ "status": "success", "data": ... }` or `{ "status": "error", "message": ... }`.
|
||||
`read` caps at 5 MiB. Replaces the Go agent's UNJAILED legacy files API,
|
||||
which is retired and will not be ported.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.status` (agent → backend, publish) — LIVE
|
||||
|
||||
State-change events so the panel does not wait for the next heartbeat.
|
||||
Payload: `{ "timestamp", "instance_id", "event": { "state": ..., "exit_code"? } }`.
|
||||
|
||||
Semantics: **keep-latest state sync**, not a lossless transition ledger —
|
||||
near-instant transient states (e.g. `starting` when spawn succeeds
|
||||
immediately) may coalesce into the following state. Consumers should treat
|
||||
each event as "current state is now X".
|
||||
|
||||
Known Phase 1 limitation: the supervisor does not yet persist/adopt PIDs — if
|
||||
the agent itself restarts while a game server is running, the game process
|
||||
survives but reports `stopped` until restarted through the panel. PID
|
||||
adoption is queued with the service-install work.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.console` (agent → backend, publish)
|
||||
|
||||
@@ -136,6 +191,23 @@ service that attempts connections to the customer's public IP/ports on
|
||||
request; that is specified as a Phase 1+ feature and will reuse this report
|
||||
format with `direction: "inbound"`.
|
||||
|
||||
## Authentication & tenant isolation
|
||||
|
||||
The broker enforces per-license auth: an agent connects with `user = license_id`,
|
||||
`password = HMAC-SHA256(license_id, NATS_TOKEN_SECRET)` (shown on the panel
|
||||
Server page), and is scoped to `corrosion.{license_id}.>` only. The backend uses
|
||||
a privileged internal user. This makes cross-tenant access impossible at the
|
||||
broker, not just by convention.
|
||||
|
||||
**Reply-subject rule:** per-license users have NO `_INBOX` permission (granting
|
||||
it would let one license read another's request-reply traffic). Therefore any
|
||||
backend→agent request-reply MUST use a reply subject inside the license
|
||||
namespace — e.g. `corrosion.{license_id}.reply.<id>` — never the client's
|
||||
default global `_INBOX`. The agent is unaffected: it responds to whatever
|
||||
`msg.reply` it receives. The constraint is on the requester (the internal user
|
||||
has full access). The contract/CI tests run against an unauthenticated broker
|
||||
and use the default inbox; production request-reply must follow this rule.
|
||||
|
||||
## Versioning
|
||||
|
||||
- The agent embeds semver + git hash + build timestamp (`--version`,
|
||||
|
||||
@@ -15,9 +15,16 @@ instance on that host — Rust, Conan Exiles, Soulmask, Dune: Awakening.
|
||||
- [x] Connectivity prober (outbound TCP, periodic + on-demand)
|
||||
- [x] Host command channel (`ping`, `probe`, `sysinfo`)
|
||||
- [x] Graceful shutdown (cancellation token, going-offline beacon, NATS flush)
|
||||
- [ ] Phase 1: process-class game adapter (spawn/RCON/SteamCMD/files) — Rust, Conan, Soulmask
|
||||
- [ ] Phase 2: Dune Docker adapter (compose lifecycle, RabbitMQ bus, Postgres admin)
|
||||
- [ ] Phase 3: signed self-update (enforced ed25519 — release gate), service install, supervisor split
|
||||
- [x] Phase 1a: process supervision — per-instance start/stop/restart/status over
|
||||
`{instance}.cmd` request-reply, push state events on `{instance}.status`,
|
||||
crash detection with exit codes, live state in heartbeats
|
||||
(integration-tested with real processes + live-NATS contract test)
|
||||
- [ ] Phase 1b: RCON trait (WebRCON rust / TCP conan+soulmask), SteamCMD, jailed file manager
|
||||
- [~] Phase 2: Dune Docker adapter — **compose lifecycle done** (`docker compose up -d/stop/restart`
|
||||
via the `Supervisor` trait + `DockerComposeSupervisor`); RabbitMQ admin bus + Postgres admin
|
||||
surface deferred. Container crash-detection + state adoption on agent restart land with Phase 3b.
|
||||
- [x] Phase 3a: SIGNED self-update — minisign-verified download+swap+relaunch (NATS `update` func); embedded public key; CI signs releases
|
||||
- [ ] Phase 3b: service install (systemd/SCM), PID adoption
|
||||
|
||||
## Build
|
||||
|
||||
|
||||
@@ -9,7 +9,11 @@
|
||||
[agent]
|
||||
license_id = "your-license-uuid"
|
||||
nats_url = "nats://nats.corrosionmgmt.com:4222"
|
||||
# nats_token = "set-me-or-use-CORROSION_NATS_TOKEN"
|
||||
# Per-license auth (preferred): user = license id, password = the token shown
|
||||
# on the panel Server page. The broker scopes you to corrosion.{license}.>
|
||||
# nats_user = "your-license-uuid" # defaults to license_id if omitted
|
||||
# nats_password = "set-me-or-use-CORROSION_NATS_PASSWORD"
|
||||
# nats_token = "legacy token-only auth; use nats_password instead"
|
||||
heartbeat_seconds = 60
|
||||
log_level = "info"
|
||||
|
||||
@@ -23,11 +27,56 @@ game = "rust" # rust | conan | soulmask | dune
|
||||
root = "/opt/rustserver"
|
||||
label = "Main 2x Vanilla"
|
||||
|
||||
# RCON lets the panel send console commands to the running server.
|
||||
# For rust the protocol is WebRCON (WebSocket JSON); for conan/soulmask it is
|
||||
# Source RCON (Valve TCP binary). `kind` is optional — it is inferred from
|
||||
# the game name when absent.
|
||||
#
|
||||
# The [instance.rcon] sub-table MUST immediately follow the [[instance]] entry
|
||||
# it belongs to (standard TOML array-of-tables scoping rule).
|
||||
[instance.rcon]
|
||||
port = 28016
|
||||
password = "changeme"
|
||||
# kind = "webrcon" # explicit override; omit to infer from game
|
||||
|
||||
# [[instance]]
|
||||
# id = "soulmask-main"
|
||||
# game = "soulmask"
|
||||
# root = "/opt/soulmask/main"
|
||||
# label = "Cloud Mist Forest (cluster main)"
|
||||
#
|
||||
# [instance.rcon]
|
||||
# port = 19000
|
||||
# password = "changeme"
|
||||
# # kind = "source" # inferred automatically for soulmask
|
||||
|
||||
# SteamCMD update settings — optional sub-table for any instance.
|
||||
# Absent = defaults: steamcmd binary resolved via PATH, validate = false.
|
||||
#
|
||||
# [instance.steamcmd]
|
||||
# steamcmd_path = "/opt/steamcmd/steamcmd.sh" # omit to use PATH
|
||||
# validate = true # enable file-hash check pass
|
||||
#
|
||||
# Dune instances do not use SteamCMD (Docker images); the steam_update func
|
||||
# will return a clear error if invoked on a dune instance.
|
||||
|
||||
# --- Dune: Awakening (container-managed) ---------------------------------
|
||||
# Dune runs as a docker-compose stack, not a spawned process — leave
|
||||
# `executable` unset and add an [instance.docker_compose] block. The agent
|
||||
# drives `docker compose up -d / stop / restart` for start/stop/restart, and
|
||||
# `steam_update` is rejected (Dune ships as Docker images).
|
||||
#
|
||||
# [[instance]]
|
||||
# id = "dune-main"
|
||||
# game = "dune"
|
||||
# root = "/opt/dune" # directory the compose commands run in
|
||||
# label = "Arrakis (battlegroup)"
|
||||
#
|
||||
# [instance.docker_compose]
|
||||
# file = "docker-compose.yml" # -f; relative to root. Omit to use compose's discovery
|
||||
# project = "dune-main" # -p; defaults to the instance id
|
||||
# service = "gameserver" # limit lifecycle to one service; omit for the whole stack
|
||||
# command = ["docker", "compose"] # default; use ["docker-compose"] for the legacy binary
|
||||
|
||||
[prober]
|
||||
interval_seconds = 300
|
||||
|
||||
@@ -1,16 +1,23 @@
|
||||
//! Shared agent handle: every subsystem task holds an `Arc<Agent>`.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::config::Settings;
|
||||
use crate::prober::ProbeReport;
|
||||
use crate::supervisor::Supervisor;
|
||||
|
||||
pub struct Agent {
|
||||
pub cfg: Settings,
|
||||
pub nats: async_nats::Client,
|
||||
pub started: Instant,
|
||||
pub last_probe: RwLock<Option<ProbeReport>>,
|
||||
/// One supervisor per instance, keyed by instance id. The concrete impl
|
||||
/// (process vs docker-compose) is chosen per game by the factory in main;
|
||||
/// every subsystem talks to the `Supervisor` trait only.
|
||||
pub supervisors: HashMap<String, Arc<dyn Supervisor>>,
|
||||
pub shutdown: CancellationToken,
|
||||
}
|
||||
|
||||
@@ -33,7 +33,15 @@ pub async fn connect(cfg: &Settings) -> Result<async_nats::Client> {
|
||||
if force_tls {
|
||||
opts = opts.require_tls(true);
|
||||
}
|
||||
if let Some(token) = &cfg.nats_token {
|
||||
|
||||
// Per-license auth: the broker maps user=license_id, password=derived
|
||||
// token to permissions scoped to corrosion.{license_id}.>. Falls back to
|
||||
// token-only or anonymous so the agent still works against a broker that
|
||||
// hasn't enforced auth yet (transition period).
|
||||
if let Some(password) = &cfg.nats_password {
|
||||
let user = cfg.nats_user.clone().unwrap_or_else(|| cfg.license_id.clone());
|
||||
opts = opts.user_and_password(user, password.clone());
|
||||
} else if let Some(token) = &cfg.nats_token {
|
||||
opts = opts.token(token.clone());
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,10 @@ use serde::Deserialize;
|
||||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::docker_compose::DockerComposeConfig;
|
||||
use crate::rcon::RconConfig;
|
||||
use crate::steamcmd::SteamcmdConfig;
|
||||
|
||||
/// Instance ids share the NATS subject namespace with host-level segments.
|
||||
const RESERVED_INSTANCE_IDS: &[&str] = &["host", "cmd", "files", "update", "agent"];
|
||||
|
||||
@@ -31,6 +35,12 @@ pub struct AgentSection {
|
||||
pub license_id: Option<String>,
|
||||
pub nats_url: Option<String>,
|
||||
pub nats_token: Option<String>,
|
||||
/// NATS username for per-license auth. Defaults to license_id when a
|
||||
/// password is set but no user is given.
|
||||
pub nats_user: Option<String>,
|
||||
/// NATS password (the per-license token). When set, the agent authenticates
|
||||
/// with user+password instead of a bare token.
|
||||
pub nats_password: Option<String>,
|
||||
#[serde(default = "default_heartbeat_seconds")]
|
||||
pub heartbeat_seconds: u64,
|
||||
#[serde(default = "default_log_level")]
|
||||
@@ -49,6 +59,41 @@ pub struct InstanceConfig {
|
||||
/// Optional human label shown in the panel.
|
||||
#[serde(default)]
|
||||
pub label: Option<String>,
|
||||
/// Game server executable. Relative paths resolve against `root`.
|
||||
/// Absent = unmanaged instance (telemetry only, no process control).
|
||||
#[serde(default)]
|
||||
pub executable: Option<PathBuf>,
|
||||
/// Arguments as a proper list — no shell splitting, quoted values survive.
|
||||
#[serde(default)]
|
||||
pub args: Vec<String>,
|
||||
/// Working directory for the process. Defaults to the executable's directory.
|
||||
#[serde(default)]
|
||||
pub working_dir: Option<PathBuf>,
|
||||
/// RCON connection settings for this instance. Absent = rcon unavailable.
|
||||
/// Protocol defaults to WebRcon for rust, Source for conan/soulmask.
|
||||
#[serde(default)]
|
||||
pub rcon: Option<RconConfig>,
|
||||
/// SteamCMD update settings. Absent = defaults apply (steamcmd on PATH,
|
||||
/// validate = false).
|
||||
#[serde(default)]
|
||||
pub steamcmd: Option<SteamcmdConfig>,
|
||||
/// Docker-compose settings for container-managed games (Dune). Absent =
|
||||
/// defaults apply (compose file in the instance root, project = instance id).
|
||||
#[serde(default)]
|
||||
pub docker_compose: Option<DockerComposeConfig>,
|
||||
}
|
||||
|
||||
impl InstanceConfig {
|
||||
/// Absolute executable path, if this instance is process-managed.
|
||||
pub fn resolved_executable(&self) -> Option<PathBuf> {
|
||||
self.executable.as_ref().map(|exe| {
|
||||
if exe.is_absolute() {
|
||||
exe.clone()
|
||||
} else {
|
||||
self.root.join(exe)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize)]
|
||||
@@ -88,6 +133,8 @@ pub struct Settings {
|
||||
pub license_id: String,
|
||||
pub nats_url: String,
|
||||
pub nats_token: Option<String>,
|
||||
pub nats_user: Option<String>,
|
||||
pub nats_password: Option<String>,
|
||||
pub heartbeat_seconds: u64,
|
||||
pub log_level: String,
|
||||
pub instances: Vec<InstanceConfig>,
|
||||
@@ -133,6 +180,16 @@ fn resolve(file: ConfigFile) -> Result<Settings> {
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_token);
|
||||
|
||||
let nats_user = std::env::var("CORROSION_NATS_USER")
|
||||
.ok()
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_user);
|
||||
|
||||
let nats_password = std::env::var("CORROSION_NATS_PASSWORD")
|
||||
.ok()
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_password);
|
||||
|
||||
validate_subject_segment("license_id", &license_id)?;
|
||||
|
||||
let mut seen: HashSet<&str> = HashSet::new();
|
||||
@@ -162,6 +219,8 @@ fn resolve(file: ConfigFile) -> Result<Settings> {
|
||||
license_id,
|
||||
nats_url,
|
||||
nats_token,
|
||||
nats_user,
|
||||
nats_password,
|
||||
heartbeat_seconds: file.agent.heartbeat_seconds,
|
||||
log_level: file.agent.log_level,
|
||||
instances: file.instances,
|
||||
|
||||
216
corrosion-host-agent/src/docker_compose.rs
Normal file
216
corrosion-host-agent/src/docker_compose.rs
Normal file
@@ -0,0 +1,216 @@
|
||||
//! Docker-compose instance supervision — the Dune: Awakening adapter.
|
||||
//!
|
||||
//! Dune does not ship as a SteamCMD-updated process like Rust/Conan/Soulmask;
|
||||
//! it runs as Docker container(s) (game server + RabbitMQ broker + Postgres),
|
||||
//! orchestrated as a compose stack (a "battlegroup"). So Dune lifecycle is
|
||||
//! `docker compose up -d / stop / restart` against the instance's compose
|
||||
//! project, not a spawned OS process. This supervisor implements the same
|
||||
//! [`Supervisor`] trait `ProcessSupervisor` does, so the instance command
|
||||
//! dispatch is identical — only the management model differs.
|
||||
//!
|
||||
//! Scope (first cut): lifecycle + cached state. Two parity items are deferred
|
||||
//! to Phase 3b alongside process PID adoption: (1) crash detection (containers
|
||||
//! give us no child handle — a `docker compose ps` poll loop would supply it);
|
||||
//! (2) state adoption on agent restart (a running stack reports `stopped` until
|
||||
//! the next lifecycle command). Both are reconcilable with a `ps` probe.
|
||||
//!
|
||||
//! Reference: docs/reference-repos/icehunter SETUP_DOCKER.md (the docker
|
||||
//! control plane this mirrors).
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use serde::Deserialize;
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::{watch, Mutex};
|
||||
|
||||
use crate::config::InstanceConfig;
|
||||
use crate::supervisor::{InstanceState, Supervisor};
|
||||
|
||||
/// Per-instance docker-compose settings (`[instance.docker_compose]`). All
|
||||
/// fields optional — defaults cover the common "one compose file in the
|
||||
/// instance root" case.
|
||||
#[derive(Debug, Clone, Default, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct DockerComposeConfig {
|
||||
/// Compose file (`-f`). Relative paths resolve against the run dir. Default:
|
||||
/// compose's own discovery (docker-compose.yml in the run dir).
|
||||
#[serde(default)]
|
||||
pub file: Option<PathBuf>,
|
||||
/// Compose project name (`-p`). Default: the instance id.
|
||||
#[serde(default)]
|
||||
pub project: Option<String>,
|
||||
/// Limit lifecycle ops to one service. Default: every service in the file.
|
||||
#[serde(default)]
|
||||
pub service: Option<String>,
|
||||
/// Override the compose binary invocation. Default: `["docker","compose"]`.
|
||||
/// Use `["docker-compose"]` for the legacy standalone binary.
|
||||
#[serde(default)]
|
||||
pub command: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
started_at: Option<Instant>,
|
||||
}
|
||||
|
||||
pub struct DockerComposeSupervisor {
|
||||
instance_id: String,
|
||||
/// Directory the compose commands run in (relative `-f`/file paths resolve
|
||||
/// against it).
|
||||
run_dir: PathBuf,
|
||||
compose_file: Option<PathBuf>,
|
||||
project: String,
|
||||
service: Option<String>,
|
||||
/// Compose binary + leading args, e.g. `["docker","compose"]`.
|
||||
command: Vec<String>,
|
||||
inner: Mutex<Inner>,
|
||||
state_tx: watch::Sender<InstanceState>,
|
||||
}
|
||||
|
||||
impl DockerComposeSupervisor {
|
||||
pub fn new(cfg: &InstanceConfig) -> Arc<Self> {
|
||||
let dc = cfg.docker_compose.clone().unwrap_or_default();
|
||||
let run_dir = cfg
|
||||
.working_dir
|
||||
.clone()
|
||||
.unwrap_or_else(|| cfg.root.clone());
|
||||
let command = dc
|
||||
.command
|
||||
.filter(|c| !c.is_empty())
|
||||
.unwrap_or_else(|| vec!["docker".to_string(), "compose".to_string()]);
|
||||
let (state_tx, _) = watch::channel(InstanceState::Stopped);
|
||||
Arc::new(Self {
|
||||
instance_id: cfg.id.clone(),
|
||||
run_dir,
|
||||
compose_file: dc.file,
|
||||
project: dc.project.unwrap_or_else(|| cfg.id.clone()),
|
||||
service: dc.service,
|
||||
command,
|
||||
inner: Mutex::new(Inner { started_at: None }),
|
||||
state_tx,
|
||||
})
|
||||
}
|
||||
|
||||
fn set_state(&self, state: InstanceState) {
|
||||
let _ = self.state_tx.send_replace(state);
|
||||
}
|
||||
|
||||
/// Run one compose subcommand (`up`/`stop`/`restart`/...), bailing with the
|
||||
/// captured stderr on non-zero exit. Global flags (`-f`, `-p`) precede the
|
||||
/// subcommand; the optional single service is appended last.
|
||||
async fn run(&self, action: &str, action_args: &[&str]) -> Result<()> {
|
||||
let mut cmd = Command::new(&self.command[0]);
|
||||
cmd.args(&self.command[1..]);
|
||||
if let Some(file) = &self.compose_file {
|
||||
cmd.arg("-f").arg(file);
|
||||
}
|
||||
cmd.arg("-p").arg(&self.project);
|
||||
cmd.arg(action);
|
||||
cmd.args(action_args);
|
||||
if let Some(service) = &self.service {
|
||||
cmd.arg(service);
|
||||
}
|
||||
cmd.current_dir(&self.run_dir)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
let output = cmd
|
||||
.output()
|
||||
.await
|
||||
.with_context(|| format!("running `{} {action}` (is docker installed and on PATH?)", self.command.join(" ")))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let detail = if !stderr.trim().is_empty() {
|
||||
stderr.trim()
|
||||
} else {
|
||||
stdout.trim()
|
||||
};
|
||||
bail!("compose {action} failed ({}): {detail}", output.status);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Supervisor for DockerComposeSupervisor {
|
||||
fn instance_id(&self) -> &str {
|
||||
&self.instance_id
|
||||
}
|
||||
|
||||
fn state(&self) -> InstanceState {
|
||||
self.state_tx.borrow().clone()
|
||||
}
|
||||
|
||||
fn watch_state(&self) -> watch::Receiver<InstanceState> {
|
||||
self.state_tx.subscribe()
|
||||
}
|
||||
|
||||
async fn uptime_seconds(&self) -> u64 {
|
||||
let inner = self.inner.lock().await;
|
||||
match (&*self.state_tx.borrow(), inner.started_at) {
|
||||
(InstanceState::Running, Some(t)) => t.elapsed().as_secs(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
async fn start(self: Arc<Self>) -> Result<()> {
|
||||
if matches!(
|
||||
*self.state_tx.borrow(),
|
||||
InstanceState::Running | InstanceState::Starting
|
||||
) {
|
||||
bail!("instance '{}' is already running", self.instance_id);
|
||||
}
|
||||
self.set_state(InstanceState::Starting);
|
||||
match self.run("up", &["-d"]).await {
|
||||
Ok(()) => {
|
||||
self.inner.lock().await.started_at = Some(Instant::now());
|
||||
self.set_state(InstanceState::Running);
|
||||
tracing::info!("instance '{}' compose up -d", self.instance_id);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
self.set_state(InstanceState::Stopped);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn stop(self: Arc<Self>) -> Result<()> {
|
||||
self.set_state(InstanceState::Stopping);
|
||||
match self.run("stop", &[]).await {
|
||||
Ok(()) => {
|
||||
self.inner.lock().await.started_at = None;
|
||||
self.set_state(InstanceState::Stopped);
|
||||
tracing::info!("instance '{}' compose stop", self.instance_id);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
// Stop failed — the stack is most likely still up.
|
||||
self.set_state(InstanceState::Running);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn restart(self: Arc<Self>) -> Result<()> {
|
||||
self.set_state(InstanceState::Starting);
|
||||
match self.run("restart", &[]).await {
|
||||
Ok(()) => {
|
||||
self.inner.lock().await.started_at = Some(Instant::now());
|
||||
self.set_state(InstanceState::Running);
|
||||
tracing::info!("instance '{}' compose restart", self.instance_id);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
self.set_state(InstanceState::Stopped);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
544
corrosion-host-agent/src/filemanager.rs
Normal file
544
corrosion-host-agent/src/filemanager.rs
Normal file
@@ -0,0 +1,544 @@
|
||||
//! Jailed file manager for game-server install directories.
|
||||
//!
|
||||
//! Every path operation is confined to the instance `root` — the directory
|
||||
//! declared as `root` in `[[instance]]` config. A two-stage check (lexical
|
||||
//! Clean + `std::fs::canonicalize`) prevents both `../..` traversals and
|
||||
//! symlink-based escapes: even if an attacker plants a symlink inside the root
|
||||
//! that points outside it, `canonicalize` resolves the target and the prefix
|
||||
//! check catches the escape.
|
||||
//!
|
||||
//! The NATS request/reply contract mirrors the Go companion agent's jailed file
|
||||
//! manager (see `companion-agent/internal/filemanager/`) but uses a simpler
|
||||
//! flat JSON envelope rather than the VueFinder storage-path protocol — the
|
||||
//! Rust agent is the replacement, and the panel's backend talks to whichever
|
||||
//! agent is present.
|
||||
//!
|
||||
//! Subject: `corrosion.{license}.{instance}.files.cmd`
|
||||
//! Request: `{"op":"list"|"read"|"write"|"delete"|"rename"|"mkdir"|"mkfile"|"move"|"copy",
|
||||
//! "path":"rel/path", "dest"?:"...", "content"?:"...", "name"?:"..."}`
|
||||
//! Response: `{"status":"success","data":...}` or `{"status":"error","message":"..."}`
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use chrono::{DateTime, SecondsFormat, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Maximum size for a `read` operation (5 MiB). Larger files must be
|
||||
/// transferred through a dedicated download endpoint, not the file manager.
|
||||
const MAX_READ_SIZE: u64 = 5 * 1024 * 1024;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Wire types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct FileRequest {
|
||||
pub op: String,
|
||||
/// Relative path within the instance root (the "subject" of the operation).
|
||||
#[serde(default)]
|
||||
pub path: String,
|
||||
/// Destination for `rename`, `move`, `copy` — relative to instance root.
|
||||
#[serde(default)]
|
||||
pub dest: Option<String>,
|
||||
/// Text content for `write`.
|
||||
#[serde(default)]
|
||||
pub content: Option<String>,
|
||||
/// Bare filename for `mkdir` and `mkfile`.
|
||||
#[serde(default)]
|
||||
pub name: Option<String>,
|
||||
}
|
||||
|
||||
/// A single directory entry returned by `list`.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct FileEntry {
|
||||
pub name: String,
|
||||
/// Path relative to the instance root, using forward slashes.
|
||||
pub path: String,
|
||||
pub is_dir: bool,
|
||||
/// File size in bytes. Zero for directories.
|
||||
pub size: u64,
|
||||
/// RFC 3339 modification timestamp.
|
||||
pub modified: String,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Jail helper — the security core of this module
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Resolve `rel` against `root`, then canonicalize to reject any form of
|
||||
/// escape including `../..` traversals and symlinks that point outside root.
|
||||
///
|
||||
/// For paths that do not yet exist (e.g. write targets), we canonicalize the
|
||||
/// nearest existing ancestor and then re-join the remaining components, which
|
||||
/// are lexically-clean because they went through `std::path::Path` building.
|
||||
///
|
||||
/// Returns the absolute, canonicalized path if it is within `root`.
|
||||
pub fn jail(root: &Path, rel: &str) -> anyhow::Result<PathBuf> {
|
||||
// Canonicalize root once to get a stable prefix for comparison.
|
||||
// We do this on every call rather than caching so the function stays
|
||||
// pure and testable without Agent state.
|
||||
let canon_root = fs::canonicalize(root)
|
||||
.with_context(|| format!("canonicalize instance root '{}'", root.display()))?;
|
||||
|
||||
// Build the candidate absolute path. We use Path joining so that an
|
||||
// absolute `rel` (e.g. "/etc/passwd") replaces the root entirely — we
|
||||
// detect and reject that case immediately.
|
||||
let candidate = if rel.is_empty() || rel == "." {
|
||||
root.to_path_buf()
|
||||
} else {
|
||||
let rel_path = Path::new(rel);
|
||||
if rel_path.is_absolute() {
|
||||
bail!(
|
||||
"absolute path '{}' is not allowed; supply a path relative to the instance root",
|
||||
rel
|
||||
);
|
||||
}
|
||||
root.join(rel_path)
|
||||
};
|
||||
|
||||
// Normalize lexically first (removes `..` / `.` without filesystem access).
|
||||
// This is a defence-in-depth step; the authoritative check is below.
|
||||
let lexical = normalize_lexical(&candidate);
|
||||
|
||||
// Canonicalize: resolve symlinks and `..` via the kernel.
|
||||
// For a not-yet-existing path we walk up to the nearest existing ancestor.
|
||||
let canon = canonicalize_lenient(&lexical)?;
|
||||
|
||||
// Authoritative prefix check: the resolved path must be equal to or a
|
||||
// child of the canonicalized root.
|
||||
if canon != canon_root && !canon.starts_with(&canon_root) {
|
||||
bail!(
|
||||
"path '{}' resolves to '{}' which is outside the instance root '{}'",
|
||||
rel,
|
||||
canon.display(),
|
||||
canon_root.display()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(canon)
|
||||
}
|
||||
|
||||
/// Canonicalize a path that may not fully exist yet by walking up to the
|
||||
/// nearest existing ancestor, canonicalizing it, then re-joining the remaining
|
||||
/// (lexically-clean) suffix.
|
||||
fn canonicalize_lenient(path: &Path) -> anyhow::Result<PathBuf> {
|
||||
// Fast path: path already exists.
|
||||
if let Ok(c) = fs::canonicalize(path) {
|
||||
return Ok(c);
|
||||
}
|
||||
|
||||
// Walk up until we find an ancestor that exists.
|
||||
let mut existing = path.to_path_buf();
|
||||
let mut suffix: Vec<std::ffi::OsString> = Vec::new();
|
||||
|
||||
loop {
|
||||
match fs::canonicalize(&existing) {
|
||||
Ok(canon) => {
|
||||
// Re-attach the non-existing suffix.
|
||||
let mut result = canon;
|
||||
for component in suffix.iter().rev() {
|
||||
result = result.join(component);
|
||||
}
|
||||
return Ok(result);
|
||||
}
|
||||
Err(_) => {
|
||||
let file_name = match existing.file_name() {
|
||||
Some(n) => n.to_os_string(),
|
||||
None => bail!("cannot resolve path '{}'", path.display()),
|
||||
};
|
||||
suffix.push(file_name);
|
||||
existing = match existing.parent() {
|
||||
Some(p) => p.to_path_buf(),
|
||||
None => bail!("cannot resolve path '{}'", path.display()),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexically normalize a path (remove `.` and `..` components) without
|
||||
/// touching the filesystem. This mirrors `filepath.Clean` in Go.
|
||||
fn normalize_lexical(path: &Path) -> PathBuf {
|
||||
let mut components: Vec<std::path::Component> = Vec::new();
|
||||
for component in path.components() {
|
||||
match component {
|
||||
std::path::Component::CurDir => {}
|
||||
std::path::Component::ParentDir => {
|
||||
// Only pop a normal component — we cannot pop a root prefix.
|
||||
if matches!(components.last(), Some(std::path::Component::Normal(_))) {
|
||||
components.pop();
|
||||
} else {
|
||||
components.push(component);
|
||||
}
|
||||
}
|
||||
other => components.push(other),
|
||||
}
|
||||
}
|
||||
components.iter().collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Operations
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// List the contents of a directory. Returns an entry per item, sorted
|
||||
/// (directories first, then files, both alphabetical).
|
||||
pub fn list(root: &Path, rel: &str) -> anyhow::Result<Vec<FileEntry>> {
|
||||
let abs = jail(root, rel)?;
|
||||
// Use the canonicalized root as the prefix for relative path computation so
|
||||
// that symlinked root paths (e.g. macOS /var → /private/var) don't cause
|
||||
// strip_prefix to fail and fall back to leaking the absolute path.
|
||||
let canon_root = fs::canonicalize(root)
|
||||
.with_context(|| format!("canonicalize root '{}'", root.display()))?;
|
||||
|
||||
let rd = fs::read_dir(&abs)
|
||||
.with_context(|| format!("read_dir '{}'", abs.display()))?;
|
||||
|
||||
let mut entries: Vec<FileEntry> = Vec::new();
|
||||
for item in rd {
|
||||
let item = item.with_context(|| format!("reading directory entry in '{}'", abs.display()))?;
|
||||
// symlink_metadata (lstat): report the link itself, never the target —
|
||||
// following it would leak the size/type/existence of files outside the
|
||||
// jail. A symlink lists as a zero-ish-size non-dir entry.
|
||||
let meta = fs::symlink_metadata(item.path())
|
||||
.with_context(|| format!("stat '{}'", item.path().display()))?;
|
||||
|
||||
let name = item.file_name().to_string_lossy().into_owned();
|
||||
let is_dir = meta.is_dir();
|
||||
let size = if is_dir { 0 } else { meta.len() };
|
||||
|
||||
// Build the relative path from the canonicalized root.
|
||||
let entry_abs = item.path();
|
||||
let entry_rel = entry_abs
|
||||
.strip_prefix(&canon_root)
|
||||
.unwrap_or(&entry_abs)
|
||||
.to_string_lossy()
|
||||
.replace('\\', "/");
|
||||
|
||||
let modified = meta
|
||||
.modified()
|
||||
.ok()
|
||||
.map(|t| {
|
||||
let dt: DateTime<Utc> = t.into();
|
||||
dt.to_rfc3339_opts(SecondsFormat::Secs, true)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
entries.push(FileEntry { name, path: entry_rel, is_dir, size, modified });
|
||||
}
|
||||
|
||||
// Stable sort: dirs first, then alphabetical within each group.
|
||||
entries.sort_by(|a, b| {
|
||||
b.is_dir.cmp(&a.is_dir).then_with(|| a.name.cmp(&b.name))
|
||||
});
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Read a text file. Capped at `MAX_READ_SIZE` bytes.
|
||||
pub fn read(root: &Path, rel: &str) -> anyhow::Result<String> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
let meta = fs::metadata(&abs)
|
||||
.with_context(|| format!("stat '{}'", abs.display()))?;
|
||||
|
||||
if meta.is_dir() {
|
||||
bail!("'{}' is a directory, not a file", rel);
|
||||
}
|
||||
if meta.len() > MAX_READ_SIZE {
|
||||
bail!(
|
||||
"file '{}' is {} bytes which exceeds the {} byte read limit",
|
||||
rel,
|
||||
meta.len(),
|
||||
MAX_READ_SIZE
|
||||
);
|
||||
}
|
||||
|
||||
fs::read_to_string(&abs).with_context(|| format!("read '{}'", abs.display()))
|
||||
}
|
||||
|
||||
/// Write (create or overwrite) a file. Parent directories are created as
|
||||
/// needed.
|
||||
pub fn write(root: &Path, rel: &str, content: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
if let Some(parent) = abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
fs::write(&abs, content.as_bytes())
|
||||
.with_context(|| format!("write '{}'", abs.display()))
|
||||
}
|
||||
|
||||
/// Delete a file or directory tree.
|
||||
pub fn delete(root: &Path, rel: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
let meta = fs::metadata(&abs)
|
||||
.with_context(|| format!("stat '{}'", abs.display()))?;
|
||||
|
||||
if meta.is_dir() {
|
||||
fs::remove_dir_all(&abs).with_context(|| format!("remove_dir_all '{}'", abs.display()))
|
||||
} else {
|
||||
fs::remove_file(&abs).with_context(|| format!("remove_file '{}'", abs.display()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Rename/move `rel` to a new bare name (`new_name`) within the same parent.
|
||||
/// `new_name` must not contain path separators.
|
||||
pub fn rename(root: &Path, rel: &str, new_name: &str) -> anyhow::Result<()> {
|
||||
if new_name.is_empty() || new_name == "." || new_name == ".." {
|
||||
bail!("new_name '{}' is not a valid filename", new_name);
|
||||
}
|
||||
if new_name.contains('/') || new_name.contains('\\') {
|
||||
bail!("new_name '{}' must not contain path separators", new_name);
|
||||
}
|
||||
|
||||
let src_abs = jail(root, rel)?;
|
||||
|
||||
// Construct the destination relative path by replacing the filename part
|
||||
// of `rel` with `new_name`. This keeps everything in relative-path space
|
||||
// so we never hand an absolute path to `jail`.
|
||||
let src_rel = Path::new(rel);
|
||||
let dest_rel = match src_rel.parent() {
|
||||
Some(parent) if parent != Path::new("") => {
|
||||
parent.join(new_name).to_string_lossy().replace('\\', "/")
|
||||
}
|
||||
_ => new_name.to_string(),
|
||||
};
|
||||
|
||||
let dest_abs = jail(root, &dest_rel)?;
|
||||
|
||||
fs::rename(&src_abs, &dest_abs)
|
||||
.with_context(|| format!("rename '{}' -> '{}'", src_abs.display(), dest_abs.display()))
|
||||
}
|
||||
|
||||
/// Create a directory (and any missing parents) at `rel`.
|
||||
pub fn mkdir(root: &Path, rel: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
fs::create_dir_all(&abs).with_context(|| format!("mkdir '{}'", abs.display()))
|
||||
}
|
||||
|
||||
/// Create an empty file at `rel`. Fails if it already exists.
|
||||
pub fn mkfile(root: &Path, rel: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
if let Some(parent) = abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
let _ = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&abs)
|
||||
.with_context(|| format!("mkfile '{}'", abs.display()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Move `src` to `dest` (both relative to root).
|
||||
pub fn move_path(root: &Path, src: &str, dest: &str) -> anyhow::Result<()> {
|
||||
let src_abs = jail(root, src)?;
|
||||
let dest_abs = jail(root, dest)?;
|
||||
|
||||
if let Some(parent) = dest_abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
fs::rename(&src_abs, &dest_abs).or_else(|_| {
|
||||
// Cross-device move: copy then delete.
|
||||
copy_recursive(&src_abs, &dest_abs)?;
|
||||
fs::remove_dir_all(&src_abs)
|
||||
.with_context(|| format!("remove source '{}' after cross-device move", src_abs.display()))
|
||||
}).with_context(|| format!("move '{}' -> '{}'", src_abs.display(), dest_abs.display()))
|
||||
}
|
||||
|
||||
/// Copy `src` to `dest` (both relative to root).
|
||||
pub fn copy(root: &Path, src: &str, dest: &str) -> anyhow::Result<()> {
|
||||
let src_abs = jail(root, src)?;
|
||||
let dest_abs = jail(root, dest)?;
|
||||
|
||||
if let Some(parent) = dest_abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
copy_recursive(&src_abs, &dest_abs)
|
||||
.with_context(|| format!("copy '{}' -> '{}'", src_abs.display(), dest_abs.display()))
|
||||
}
|
||||
|
||||
/// Recursive copy helper.
|
||||
///
|
||||
/// SECURITY: uses `symlink_metadata` (does NOT follow symlinks) and refuses to
|
||||
/// copy any symlink. `jail()` only validates the top-level src/dest; a symlink
|
||||
/// *inside* a copied directory that points outside the jail would, if followed,
|
||||
/// pull external content (e.g. `/etc`) into the jail where it could then be
|
||||
/// read — a jail-escape exfiltration. Refusing symlinks closes that path.
|
||||
fn copy_recursive(src: &Path, dest: &Path) -> anyhow::Result<()> {
|
||||
let meta = fs::symlink_metadata(src)
|
||||
.with_context(|| format!("stat source '{}'", src.display()))?;
|
||||
|
||||
if meta.file_type().is_symlink() {
|
||||
bail!(
|
||||
"refusing to copy symlink '{}' — symlinks are not followed across the jail boundary",
|
||||
src.display()
|
||||
);
|
||||
}
|
||||
|
||||
if meta.is_dir() {
|
||||
fs::create_dir_all(dest)
|
||||
.with_context(|| format!("create_dir_all '{}'", dest.display()))?;
|
||||
|
||||
for entry in fs::read_dir(src)
|
||||
.with_context(|| format!("read_dir '{}'", src.display()))?
|
||||
{
|
||||
let entry = entry?;
|
||||
copy_recursive(&entry.path(), &dest.join(entry.file_name()))?;
|
||||
}
|
||||
} else {
|
||||
fs::copy(src, dest)
|
||||
.with_context(|| format!("copy '{}' -> '{}'", src.display(), dest.display()))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// NATS request dispatch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Dispatch a `FileRequest` against `root` and return a JSON `serde_json::Value`
|
||||
/// ready for the NATS reply.
|
||||
pub fn dispatch(root: &Path, req: &FileRequest) -> serde_json::Value {
|
||||
use serde_json::json;
|
||||
|
||||
let result = match req.op.as_str() {
|
||||
"list" => {
|
||||
list(root, &req.path).map(|entries| json!({ "entries": entries }))
|
||||
}
|
||||
"read" => {
|
||||
read(root, &req.path).map(|content| json!({ "content": content }))
|
||||
}
|
||||
"write" => {
|
||||
let content = req.content.as_deref().unwrap_or("");
|
||||
write(root, &req.path, content).map(|_| json!(null))
|
||||
}
|
||||
"delete" => {
|
||||
delete(root, &req.path).map(|_| json!(null))
|
||||
}
|
||||
"rename" => {
|
||||
let new_name = req.name.as_deref().unwrap_or("");
|
||||
rename(root, &req.path, new_name).map(|_| json!(null))
|
||||
}
|
||||
"mkdir" => {
|
||||
mkdir(root, &req.path).map(|_| json!(null))
|
||||
}
|
||||
"mkfile" => {
|
||||
mkfile(root, &req.path).map(|_| json!(null))
|
||||
}
|
||||
"move" => {
|
||||
let dest = req.dest.as_deref().unwrap_or("");
|
||||
move_path(root, &req.path, dest).map(|_| json!(null))
|
||||
}
|
||||
"copy" => {
|
||||
let dest = req.dest.as_deref().unwrap_or("");
|
||||
copy(root, &req.path, dest).map(|_| json!(null))
|
||||
}
|
||||
other => Err(anyhow::anyhow!(
|
||||
"unknown op '{}' (supported: list, read, write, delete, rename, mkdir, mkfile, move, copy)",
|
||||
other
|
||||
)),
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(data) => json!({ "status": "success", "data": data }),
|
||||
Err(e) => {
|
||||
tracing::warn!("filemanager op='{}' path='{}': {e:#}", req.op, req.path);
|
||||
json!({ "status": "error", "message": format!("{e:#}") })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Subscribe to `corrosion.{license}.{instance}.files.cmd` and serve file
|
||||
/// manager requests for `instance_id` jailed to `root`.
|
||||
///
|
||||
/// This function runs until the agent's cancellation token fires or the NATS
|
||||
/// subscription ends. It is spawned once per instance in `main.rs`.
|
||||
pub async fn run(
|
||||
agent: std::sync::Arc<crate::agent::Agent>,
|
||||
instance_id: String,
|
||||
root: PathBuf,
|
||||
) -> anyhow::Result<()> {
|
||||
use futures::StreamExt;
|
||||
|
||||
let subject = crate::subjects::instance_files_cmd(&agent.cfg.license_id, &instance_id);
|
||||
let mut sub = agent.nats.subscribe(subject.clone()).await?;
|
||||
tracing::info!("file manager handler listening on {subject}");
|
||||
|
||||
let cancel = agent.shutdown.clone();
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = sub.next() => {
|
||||
match msg {
|
||||
Some(msg) => {
|
||||
let agent = agent.clone();
|
||||
let root = root.clone();
|
||||
let instance_id = instance_id.clone();
|
||||
tokio::spawn(async move { handle(agent, &instance_id, &root, msg).await });
|
||||
}
|
||||
None => {
|
||||
tracing::warn!("file manager subscription ended for '{instance_id}'");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
tracing::info!("file manager handler stopping for '{instance_id}'");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle(
|
||||
agent: std::sync::Arc<crate::agent::Agent>,
|
||||
instance_id: &str,
|
||||
root: &Path,
|
||||
msg: async_nats::Message,
|
||||
) {
|
||||
let Some(reply) = msg.reply.clone() else {
|
||||
tracing::warn!("file manager message without reply subject ignored (instance '{instance_id}')");
|
||||
return;
|
||||
};
|
||||
|
||||
let response = match serde_json::from_slice::<FileRequest>(&msg.payload) {
|
||||
Ok(req) => {
|
||||
// Blocking fs calls — offload from the async executor.
|
||||
let root = root.to_path_buf();
|
||||
tokio::task::spawn_blocking(move || dispatch(&root, &req))
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
serde_json::json!({ "status": "error", "message": format!("internal error: {e}") })
|
||||
})
|
||||
}
|
||||
Err(e) => {
|
||||
serde_json::json!({ "status": "error", "message": format!("invalid request payload: {e}") })
|
||||
}
|
||||
};
|
||||
|
||||
let bytes = match serde_json::to_vec(&response) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
tracing::error!("file manager response serialize failed: {e}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
|
||||
tracing::warn!("file manager response publish failed: {e}");
|
||||
}
|
||||
}
|
||||
@@ -13,11 +13,15 @@ use crate::agent::Agent;
|
||||
use crate::prober;
|
||||
use crate::subjects;
|
||||
use crate::telemetry;
|
||||
use crate::update;
|
||||
use crate::version;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HostCommand {
|
||||
func: String,
|
||||
/// Signed-update artifact URL (for func = "update").
|
||||
#[serde(default)]
|
||||
url: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn run(agent: Arc<Agent>) -> anyhow::Result<()> {
|
||||
@@ -55,20 +59,46 @@ async fn handle(agent: Arc<Agent>, msg: async_nats::Message) {
|
||||
return;
|
||||
};
|
||||
|
||||
let response = match serde_json::from_slice::<HostCommand>(&msg.payload) {
|
||||
Ok(cmd) => dispatch(&agent, &cmd.func).await,
|
||||
Err(e) => json!({ "status": "error", "message": format!("invalid command payload: {e}") }),
|
||||
};
|
||||
|
||||
let bytes = match serde_json::to_vec(&response) {
|
||||
Ok(b) => b,
|
||||
let cmd = match serde_json::from_slice::<HostCommand>(&msg.payload) {
|
||||
Ok(cmd) => cmd,
|
||||
Err(e) => {
|
||||
tracing::error!("response serialize failed: {e}");
|
||||
publish(&agent, &reply, json!({ "status": "error", "message": format!("invalid command payload: {e}") })).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
|
||||
tracing::warn!("response publish failed: {e}");
|
||||
|
||||
// Self-update is special: it must reply BEFORE relaunching, because the
|
||||
// relaunch replaces this process and nothing after it would run.
|
||||
if cmd.func == "update" {
|
||||
let Some(url) = cmd.url else {
|
||||
publish(&agent, &reply, json!({ "status": "error", "message": "update requires a 'url'" })).await;
|
||||
return;
|
||||
};
|
||||
match update::download_verify_swap(&url).await {
|
||||
Ok(_) => {
|
||||
publish(&agent, &reply, json!({ "status": "success", "func": "update", "message": "verified and swapped; relaunching" })).await;
|
||||
let _ = agent.nats.flush().await;
|
||||
update::relaunch_and_exit();
|
||||
}
|
||||
Err(e) => {
|
||||
publish(&agent, &reply, json!({ "status": "error", "func": "update", "message": format!("{e:#}") })).await;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let response = dispatch(&agent, &cmd.func).await;
|
||||
publish(&agent, &reply, response).await;
|
||||
}
|
||||
|
||||
async fn publish(agent: &Arc<Agent>, reply: &async_nats::Subject, value: serde_json::Value) {
|
||||
match serde_json::to_vec(&value) {
|
||||
Ok(bytes) => {
|
||||
if let Err(e) = agent.nats.publish(reply.clone(), bytes.into()).await {
|
||||
tracing::warn!("response publish failed: {e}");
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::error!("response serialize failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
361
corrosion-host-agent/src/instancecmd.rs
Normal file
361
corrosion-host-agent/src/instancecmd.rs
Normal file
@@ -0,0 +1,361 @@
|
||||
//! Per-instance command channel + state-change events.
|
||||
//!
|
||||
//! Each process-managed instance gets a request-reply subscriber on
|
||||
//! `corrosion.{license}.{instance_id}.cmd` (funcs: start/stop/restart/status/rcon)
|
||||
//! and a publisher task that pushes every supervisor state change to
|
||||
//! `corrosion.{license}.{instance_id}.status` — the panel sees crashes when
|
||||
//! they happen, not when the next heartbeat ambles in.
|
||||
|
||||
use chrono::{SecondsFormat, Utc};
|
||||
use futures::StreamExt;
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::agent::Agent;
|
||||
use crate::subjects;
|
||||
use crate::steamcmd;
|
||||
use crate::supervisor::Supervisor;
|
||||
use crate::wipe;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct InstanceCommand {
|
||||
func: String,
|
||||
/// Payload for funcs that carry a text argument (e.g. rcon).
|
||||
#[serde(default)]
|
||||
command: Option<String>,
|
||||
/// Wipe type: "map" | "blueprint" | "full" — required for func="wipe".
|
||||
#[serde(default)]
|
||||
wipe_type: Option<wipe::WipeType>,
|
||||
/// Whether to back up wipe targets before deleting (func="wipe").
|
||||
#[serde(default)]
|
||||
backup: bool,
|
||||
/// Label for the backup subdirectory (func="wipe"). Defaults to "wipe-backup".
|
||||
#[serde(default = "default_backup_label")]
|
||||
backup_label: String,
|
||||
}
|
||||
|
||||
fn default_backup_label() -> String {
|
||||
"wipe-backup".to_string()
|
||||
}
|
||||
|
||||
/// Forward every supervisor state change as a status event.
|
||||
pub async fn publish_state_changes(agent: Arc<Agent>, sup: Arc<dyn Supervisor>) {
|
||||
let subject = subjects::instance_status(&agent.cfg.license_id, sup.instance_id());
|
||||
let mut rx = sup.watch_state();
|
||||
let cancel = agent.shutdown.clone();
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
changed = rx.changed() => {
|
||||
if changed.is_err() {
|
||||
break;
|
||||
}
|
||||
let state = rx.borrow().clone();
|
||||
let event = json!({
|
||||
"timestamp": Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
"instance_id": sup.instance_id(),
|
||||
"event": state,
|
||||
});
|
||||
match serde_json::to_vec(&event) {
|
||||
Ok(bytes) => {
|
||||
if let Err(e) = agent.nats.publish(subject.clone(), bytes.into()).await {
|
||||
tracing::warn!("status publish failed for '{}': {e}", sup.instance_id());
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::error!("status serialize failed: {e}"),
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Request-reply command handler for one instance.
|
||||
pub async fn run(agent: Arc<Agent>, sup: Arc<dyn Supervisor>) -> anyhow::Result<()> {
|
||||
let subject = subjects::instance_cmd(&agent.cfg.license_id, sup.instance_id());
|
||||
let mut sub = agent.nats.subscribe(subject.clone()).await?;
|
||||
tracing::info!("instance command handler listening on {subject}");
|
||||
|
||||
let cancel = agent.shutdown.clone();
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = sub.next() => {
|
||||
match msg {
|
||||
Some(msg) => {
|
||||
let agent = agent.clone();
|
||||
let sup = sup.clone();
|
||||
tokio::spawn(async move { handle(agent, sup, msg).await });
|
||||
}
|
||||
None => {
|
||||
tracing::warn!("instance command subscription ended for '{}'", sup.instance_id());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
tracing::info!("instance command handler stopping for '{}'", sup.instance_id());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle(agent: Arc<Agent>, sup: Arc<dyn Supervisor>, msg: async_nats::Message) {
|
||||
let Some(reply) = msg.reply.clone() else {
|
||||
tracing::warn!("instance command without reply subject ignored");
|
||||
return;
|
||||
};
|
||||
|
||||
let response = match serde_json::from_slice::<InstanceCommand>(&msg.payload) {
|
||||
Ok(cmd) => dispatch(&agent, &sup, &cmd).await,
|
||||
Err(e) => json!({ "status": "error", "message": format!("invalid command payload: {e}") }),
|
||||
};
|
||||
|
||||
let bytes = match serde_json::to_vec(&response) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
tracing::error!("response serialize failed: {e}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
|
||||
tracing::warn!("response publish failed: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn dispatch(
|
||||
agent: &Arc<Agent>,
|
||||
sup: &Arc<dyn Supervisor>,
|
||||
cmd: &InstanceCommand,
|
||||
) -> serde_json::Value {
|
||||
let func = cmd.func.as_str();
|
||||
|
||||
// start/stop/restart take `self: Arc<Self>` (they may hand a clone to a
|
||||
// monitor task), so clone the Arc before the consuming call.
|
||||
let outcome = match func {
|
||||
"start" => sup.clone().start().await.map(|_| "starting"),
|
||||
"stop" => sup.clone().stop().await.map(|_| "stopped"),
|
||||
"restart" => sup.clone().restart().await.map(|_| "restarted"),
|
||||
"status" => {
|
||||
return json!({
|
||||
"status": "success",
|
||||
"func": "status",
|
||||
"instance_id": sup.instance_id(),
|
||||
"state": sup.state(),
|
||||
"uptime_seconds": sup.uptime_seconds().await,
|
||||
});
|
||||
}
|
||||
"rcon" => {
|
||||
// Look up the InstanceConfig for this supervisor so we can access
|
||||
// rcon settings and the game name without changing the supervisor's
|
||||
// data model.
|
||||
let inst_cfg = agent
|
||||
.cfg
|
||||
.instances
|
||||
.iter()
|
||||
.find(|i| i.id == sup.instance_id());
|
||||
|
||||
let rcon_cfg = inst_cfg.and_then(|i| i.rcon.as_ref());
|
||||
let Some(rcon_cfg) = rcon_cfg else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("instance '{}' has no rcon configured", sup.instance_id()),
|
||||
});
|
||||
};
|
||||
|
||||
let Some(command) = cmd.command.as_deref() else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": "rcon func requires a 'command' field",
|
||||
});
|
||||
};
|
||||
|
||||
let game = inst_cfg.map(|i| i.game.as_str()).unwrap_or("rust");
|
||||
return match crate::rcon::send_command(rcon_cfg, game, command).await {
|
||||
Ok(output) => json!({
|
||||
"status": "success",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id(),
|
||||
"output": output,
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
};
|
||||
}
|
||||
"steam_update" => {
|
||||
// Look up instance config for game name, root, and optional steamcmd
|
||||
// settings. The supervisor only carries process-control state, not
|
||||
// the full config, so we reach into agent.cfg.instances here as the
|
||||
// rcon dispatch does.
|
||||
let inst_cfg = agent.cfg.instances.iter().find(|i| i.id == sup.instance_id());
|
||||
|
||||
let Some(inst_cfg) = inst_cfg else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "steam_update",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("no config found for instance '{}'", sup.instance_id()),
|
||||
});
|
||||
};
|
||||
|
||||
let game = inst_cfg.game.as_str();
|
||||
let root = inst_cfg.root.clone();
|
||||
|
||||
// Resolve steamcmd path and validate flag from config or use defaults.
|
||||
let (steamcmd_path, validate) = match inst_cfg.steamcmd.as_ref() {
|
||||
Some(s) => {
|
||||
let path = s
|
||||
.steamcmd_path
|
||||
.as_ref()
|
||||
.and_then(|p| p.to_str().map(|s| s.to_string()))
|
||||
.unwrap_or_else(|| "steamcmd".to_string());
|
||||
(path, s.validate)
|
||||
}
|
||||
None => ("steamcmd".to_string(), false),
|
||||
};
|
||||
|
||||
let license = agent.cfg.license_id.clone();
|
||||
let instance_id = sup.instance_id().to_string();
|
||||
let nats = agent.nats.clone();
|
||||
|
||||
// Publish each progress line to the steam_status subject.
|
||||
let on_progress = move |line: &str| {
|
||||
let subject = subjects::instance_steam_status(&license, &instance_id);
|
||||
let event = json!({
|
||||
"timestamp": Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
"instance_id": instance_id,
|
||||
"line": line,
|
||||
});
|
||||
match serde_json::to_vec(&event) {
|
||||
Ok(bytes) => {
|
||||
// Fire-and-forget; the async publish is non-blocking on
|
||||
// the caller side. We create a mini-runtime task via
|
||||
// a oneshot since on_progress is Fn (not async).
|
||||
let nats = nats.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = nats.publish(subject, bytes.into()).await {
|
||||
tracing::warn!("steam_status publish failed: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(e) => tracing::error!("steam_status serialize failed: {e}"),
|
||||
}
|
||||
};
|
||||
|
||||
return match steamcmd::update(game, &root, &steamcmd_path, validate, on_progress).await {
|
||||
Ok(()) => json!({
|
||||
"status": "success",
|
||||
"func": "steam_update",
|
||||
"instance_id": sup.instance_id(),
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": "steam_update",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
};
|
||||
}
|
||||
"wipe" => {
|
||||
let inst_cfg = agent.cfg.instances.iter().find(|i| i.id == sup.instance_id());
|
||||
|
||||
let Some(inst_cfg) = inst_cfg else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("no config found for instance '{}'", sup.instance_id()),
|
||||
});
|
||||
};
|
||||
|
||||
let Some(wipe_type) = cmd.wipe_type.clone() else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": "wipe func requires a 'wipe_type' field (\"map\", \"blueprint\", or \"full\")",
|
||||
});
|
||||
};
|
||||
|
||||
let root = inst_cfg.root.clone();
|
||||
let instance_id = sup.instance_id().to_string();
|
||||
|
||||
let wipe_req = wipe::WipeRequest {
|
||||
wipe_type,
|
||||
backup: cmd.backup,
|
||||
backup_label: cmd.backup_label.clone(),
|
||||
};
|
||||
|
||||
// Stop the server best-effort before wiping; proceed even if stop fails
|
||||
// (the server may already be down).
|
||||
if let Err(e) = sup.clone().stop().await {
|
||||
tracing::warn!("wipe: stop instance '{}' failed (proceeding anyway): {e:#}", instance_id);
|
||||
}
|
||||
|
||||
// Run the blocking I/O on the blocking thread pool.
|
||||
let result = tokio::task::spawn_blocking(move || wipe::execute(&root, &wipe_req)).await;
|
||||
|
||||
// Restart best-effort regardless of wipe outcome.
|
||||
if let Err(e) = sup.clone().start().await {
|
||||
tracing::warn!("wipe: restart instance '{}' failed: {e:#}", instance_id);
|
||||
}
|
||||
|
||||
return match result {
|
||||
Ok(Ok(wr)) => {
|
||||
let wipe_type_str = format!("{:?}", wr.wipe_type).to_lowercase();
|
||||
json!({
|
||||
"status": "success",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"wipe_type": wipe_type_str,
|
||||
"deleted_count": wr.deleted_count,
|
||||
})
|
||||
}
|
||||
Ok(Err(e)) => json!({
|
||||
"status": "error",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("internal error: {e}"),
|
||||
}),
|
||||
};
|
||||
}
|
||||
other => {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"message": format!("unknown func '{other}' (supported: start, stop, restart, status, rcon, steam_update, wipe)"),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
match outcome {
|
||||
Ok(result) => json!({
|
||||
"status": "success",
|
||||
"func": func,
|
||||
"instance_id": sup.instance_id(),
|
||||
"result": result,
|
||||
"state": sup.state(),
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": func,
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
}
|
||||
}
|
||||
20
corrosion-host-agent/src/lib.rs
Normal file
20
corrosion-host-agent/src/lib.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
//! Corrosion Host Agent library surface — modules are public so integration
|
||||
//! tests can drive subsystems (notably the process supervisor) directly.
|
||||
|
||||
pub mod agent;
|
||||
pub mod bus;
|
||||
pub mod config;
|
||||
pub mod docker_compose;
|
||||
pub mod filemanager;
|
||||
pub mod hostcmd;
|
||||
pub mod instancecmd;
|
||||
pub mod prober;
|
||||
pub mod process;
|
||||
pub mod rcon;
|
||||
pub mod steamcmd;
|
||||
pub mod subjects;
|
||||
pub mod supervisor;
|
||||
pub mod telemetry;
|
||||
pub mod update;
|
||||
pub mod version;
|
||||
pub mod wipe;
|
||||
@@ -4,14 +4,10 @@
|
||||
//! connectivity prober, host command channel. Process control, file ops, and
|
||||
//! game adapters arrive in Phase 1+ (see PROTOCOL.md).
|
||||
|
||||
mod agent;
|
||||
mod bus;
|
||||
mod config;
|
||||
mod hostcmd;
|
||||
mod prober;
|
||||
mod subjects;
|
||||
mod telemetry;
|
||||
mod version;
|
||||
use corrosion_host_agent::{
|
||||
agent, bus, config, docker_compose, filemanager, hostcmd, instancecmd, prober, process,
|
||||
subjects, supervisor, telemetry, version,
|
||||
};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use clap::{Parser, Subcommand};
|
||||
@@ -96,11 +92,28 @@ async fn run(settings: config::Settings) -> Result<()> {
|
||||
|
||||
let nats = bus::connect(&settings).await?;
|
||||
|
||||
// Per-game supervisor factory: container-managed games (Dune) get a
|
||||
// docker-compose supervisor; everything else is a spawned-process
|
||||
// supervisor. Both satisfy the `Supervisor` trait, so the rest of the agent
|
||||
// is game-agnostic.
|
||||
let supervisors: std::collections::HashMap<String, Arc<dyn supervisor::Supervisor>> = settings
|
||||
.instances
|
||||
.iter()
|
||||
.map(|inst| {
|
||||
let sup: Arc<dyn supervisor::Supervisor> = match inst.game.as_str() {
|
||||
"dune" => docker_compose::DockerComposeSupervisor::new(inst),
|
||||
_ => process::ProcessSupervisor::new(inst),
|
||||
};
|
||||
(inst.id.clone(), sup)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let agent = Arc::new(Agent {
|
||||
cfg: settings,
|
||||
nats,
|
||||
started: Instant::now(),
|
||||
last_probe: RwLock::new(None),
|
||||
supervisors,
|
||||
shutdown: CancellationToken::new(),
|
||||
});
|
||||
|
||||
@@ -115,6 +128,39 @@ async fn run(settings: config::Settings) -> Result<()> {
|
||||
}
|
||||
}));
|
||||
}
|
||||
for (instance_id, sup) in &agent.supervisors {
|
||||
{
|
||||
let agent = agent.clone();
|
||||
let sup = sup.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
if let Err(e) = instancecmd::run(agent, sup).await {
|
||||
tracing::error!("instance command handler failed: {e:#}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
handles.push(tokio::spawn(instancecmd::publish_state_changes(
|
||||
agent.clone(),
|
||||
sup.clone(),
|
||||
)));
|
||||
// File manager: one handler task per instance, jailed to root.
|
||||
{
|
||||
let agent = agent.clone();
|
||||
let inst_cfg = agent
|
||||
.cfg
|
||||
.instances
|
||||
.iter()
|
||||
.find(|i| &i.id == instance_id)
|
||||
.cloned();
|
||||
if let Some(cfg) = inst_cfg {
|
||||
let id = instance_id.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
if let Err(e) = filemanager::run(agent, id, cfg.root).await {
|
||||
tracing::error!("file manager handler failed: {e:#}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wait_for_shutdown_signal().await;
|
||||
tracing::info!("shutdown signal received");
|
||||
|
||||
262
corrosion-host-agent/src/process.rs
Normal file
262
corrosion-host-agent/src/process.rs
Normal file
@@ -0,0 +1,262 @@
|
||||
//! Per-instance game-server process supervision.
|
||||
//!
|
||||
//! One `ProcessSupervisor` per process-managed instance (Rust/Conan/Soulmask).
|
||||
//! Lifecycle mirrors the proven Go agent behavior — graceful SIGTERM with a 30s
|
||||
//! budget before force kill, a monitor task that reaps the child and records
|
||||
//! crash-vs-stop — with two fixes the Go version needed: args are a proper list
|
||||
//! (no naive space splitting), and every state change is observable through a
|
||||
//! watch channel so the panel gets push events instead of waiting for the next
|
||||
//! heartbeat. Lifecycle control is exposed through the [`Supervisor`] trait so
|
||||
//! the command dispatch is identical across process- and container-managed
|
||||
//! games.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::process::{Child, Command};
|
||||
use tokio::sync::{watch, Mutex};
|
||||
|
||||
use crate::config::InstanceConfig;
|
||||
use crate::supervisor::{InstanceState, Supervisor};
|
||||
|
||||
const GRACEFUL_STOP_BUDGET: Duration = Duration::from_secs(30);
|
||||
const RESTART_PAUSE: Duration = Duration::from_secs(2);
|
||||
|
||||
struct Inner {
|
||||
child: Option<Child>,
|
||||
started_at: Option<Instant>,
|
||||
/// True while a stop was requested — the monitor uses it to distinguish
|
||||
/// an ordered shutdown from a crash.
|
||||
stop_requested: bool,
|
||||
}
|
||||
|
||||
pub struct ProcessSupervisor {
|
||||
instance_id: String,
|
||||
executable: Option<PathBuf>,
|
||||
args: Vec<String>,
|
||||
working_dir: Option<PathBuf>,
|
||||
inner: Mutex<Inner>,
|
||||
state_tx: watch::Sender<InstanceState>,
|
||||
}
|
||||
|
||||
impl ProcessSupervisor {
|
||||
pub fn new(cfg: &InstanceConfig) -> Arc<Self> {
|
||||
let executable = cfg.resolved_executable();
|
||||
let initial = if executable.is_some() {
|
||||
InstanceState::Stopped
|
||||
} else {
|
||||
InstanceState::Unmanaged
|
||||
};
|
||||
let (state_tx, _) = watch::channel(initial);
|
||||
Arc::new(Self {
|
||||
instance_id: cfg.id.clone(),
|
||||
executable,
|
||||
args: cfg.args.clone(),
|
||||
working_dir: cfg.working_dir.clone(),
|
||||
inner: Mutex::new(Inner {
|
||||
child: None,
|
||||
started_at: None,
|
||||
stop_requested: false,
|
||||
}),
|
||||
state_tx,
|
||||
})
|
||||
}
|
||||
|
||||
async fn monitor(self: Arc<Self>) {
|
||||
// Take a waiter without holding the lock across the whole child
|
||||
// lifetime: Child::wait needs &mut, so the child stays in inner and
|
||||
// we poll it.
|
||||
loop {
|
||||
let status = {
|
||||
let mut inner = self.inner.lock().await;
|
||||
let Some(child) = inner.child.as_mut() else { return };
|
||||
match child.try_wait() {
|
||||
Ok(Some(status)) => Some(status),
|
||||
Ok(None) => None,
|
||||
Err(e) => {
|
||||
tracing::error!("instance '{}' wait failed: {e}", self.instance_id);
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match status {
|
||||
Some(status) => {
|
||||
let mut inner = self.inner.lock().await;
|
||||
inner.child = None;
|
||||
inner.started_at = None;
|
||||
let ordered = inner.stop_requested;
|
||||
inner.stop_requested = false;
|
||||
drop(inner);
|
||||
|
||||
if ordered {
|
||||
self.set_state(InstanceState::Stopped);
|
||||
tracing::info!("instance '{}' stopped ({status})", self.instance_id);
|
||||
} else {
|
||||
let exit_code = status.code();
|
||||
self.set_state(InstanceState::Crashed { exit_code });
|
||||
tracing::warn!(
|
||||
"instance '{}' exited unexpectedly ({status}) — marked crashed",
|
||||
self.instance_id
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
None => tokio::time::sleep(Duration::from_millis(500)).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn set_state(&self, state: InstanceState) {
|
||||
// send_replace never fails even with zero receivers.
|
||||
let _ = self.state_tx.send_replace(state);
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Supervisor for ProcessSupervisor {
|
||||
fn instance_id(&self) -> &str {
|
||||
&self.instance_id
|
||||
}
|
||||
|
||||
fn state(&self) -> InstanceState {
|
||||
self.state_tx.borrow().clone()
|
||||
}
|
||||
|
||||
fn watch_state(&self) -> watch::Receiver<InstanceState> {
|
||||
self.state_tx.subscribe()
|
||||
}
|
||||
|
||||
async fn uptime_seconds(&self) -> u64 {
|
||||
let inner = self.inner.lock().await;
|
||||
match (&*self.state_tx.borrow(), inner.started_at) {
|
||||
(InstanceState::Running, Some(t)) => t.elapsed().as_secs(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
async fn start(self: Arc<Self>) -> Result<()> {
|
||||
let Some(exe) = self.executable.clone() else {
|
||||
bail!("instance '{}' has no executable configured", self.instance_id);
|
||||
};
|
||||
if !exe.exists() {
|
||||
bail!("executable not found: {}", exe.display());
|
||||
}
|
||||
|
||||
let mut inner = self.inner.lock().await;
|
||||
if matches!(*self.state_tx.borrow(), InstanceState::Running | InstanceState::Starting) {
|
||||
bail!("instance '{}' is already running", self.instance_id);
|
||||
}
|
||||
|
||||
self.set_state(InstanceState::Starting);
|
||||
|
||||
let workdir = self
|
||||
.working_dir
|
||||
.clone()
|
||||
.or_else(|| exe.parent().map(|p| p.to_path_buf()))
|
||||
.unwrap_or_else(|| PathBuf::from("."));
|
||||
|
||||
let child = Command::new(&exe)
|
||||
.args(&self.args)
|
||||
.current_dir(&workdir)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::inherit())
|
||||
.stderr(Stdio::inherit())
|
||||
.spawn()
|
||||
.with_context(|| format!("spawning {}", exe.display()))?;
|
||||
|
||||
let pid = child.id();
|
||||
inner.child = Some(child);
|
||||
inner.started_at = Some(Instant::now());
|
||||
inner.stop_requested = false;
|
||||
drop(inner);
|
||||
|
||||
self.set_state(InstanceState::Running);
|
||||
tracing::info!(
|
||||
"instance '{}' started: {} (pid {:?})",
|
||||
self.instance_id,
|
||||
exe.display(),
|
||||
pid
|
||||
);
|
||||
|
||||
// Monitor: reap the child and classify the exit.
|
||||
let sup = Arc::clone(&self);
|
||||
tokio::spawn(async move { sup.monitor().await });
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn stop(self: Arc<Self>) -> Result<()> {
|
||||
let mut inner = self.inner.lock().await;
|
||||
if inner.child.is_none() {
|
||||
bail!("instance '{}' is not running", self.instance_id);
|
||||
}
|
||||
inner.stop_requested = true;
|
||||
self.set_state(InstanceState::Stopping);
|
||||
let child = inner.child.as_mut().expect("checked above");
|
||||
|
||||
// Graceful first: SIGTERM on unix; Windows has no SIGTERM equivalent
|
||||
// for console processes, so it goes straight to kill there.
|
||||
#[cfg(unix)]
|
||||
if let Some(pid) = child.id() {
|
||||
unsafe {
|
||||
libc::kill(pid as i32, libc::SIGTERM);
|
||||
}
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = child.start_kill();
|
||||
}
|
||||
drop(inner);
|
||||
|
||||
// Wait for the monitor to observe the exit; force kill on budget.
|
||||
let mut rx = self.watch_state();
|
||||
let deadline = tokio::time::timeout(GRACEFUL_STOP_BUDGET, async {
|
||||
loop {
|
||||
if matches!(*rx.borrow(), InstanceState::Stopped) {
|
||||
return;
|
||||
}
|
||||
if rx.changed().await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
if deadline.is_err() {
|
||||
tracing::warn!(
|
||||
"instance '{}' ignored SIGTERM for {}s — force killing",
|
||||
self.instance_id,
|
||||
GRACEFUL_STOP_BUDGET.as_secs()
|
||||
);
|
||||
let mut inner = self.inner.lock().await;
|
||||
if let Some(child) = inner.child.as_mut() {
|
||||
let _ = child.start_kill();
|
||||
}
|
||||
drop(inner);
|
||||
|
||||
let mut rx = self.watch_state();
|
||||
let _ = tokio::time::timeout(Duration::from_secs(5), async {
|
||||
while !matches!(*rx.borrow(), InstanceState::Stopped) {
|
||||
if rx.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn restart(self: Arc<Self>) -> Result<()> {
|
||||
if !matches!(
|
||||
*self.state_tx.borrow(),
|
||||
InstanceState::Stopped | InstanceState::Crashed { .. } | InstanceState::Unmanaged
|
||||
) {
|
||||
self.clone().stop().await?;
|
||||
}
|
||||
tokio::time::sleep(RESTART_PAUSE).await;
|
||||
self.start().await
|
||||
}
|
||||
}
|
||||
320
corrosion-host-agent/src/rcon.rs
Normal file
320
corrosion-host-agent/src/rcon.rs
Normal file
@@ -0,0 +1,320 @@
|
||||
//! RCON client: game-server remote-console over WebRCON (Rust) or Source RCON (Conan/Soulmask).
|
||||
//!
|
||||
//! The agent runs co-located with the game server, so every connection targets
|
||||
//! 127.0.0.1 — no TLS is needed and latency is sub-millisecond. Two protocols
|
||||
//! are supported because the Rust game ships its own WebSocket-based WebRCON
|
||||
//! while Conan Exiles and Soulmask use the Valve Source RCON wire format over
|
||||
//! plain TCP.
|
||||
//!
|
||||
//! The protocol selection is explicit in the config (`kind`) but can be inferred
|
||||
//! from the game name when absent — callers supply the `game` field they already
|
||||
//! have in `InstanceConfig`.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use rand::Rng;
|
||||
use serde::Deserialize;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::time::{timeout, Duration};
|
||||
|
||||
/// WebRCON is the Facepunch WebSocket protocol (Rust game).
|
||||
/// Source RCON is the Valve wire protocol used by Conan Exiles and Soulmask.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum RconKind {
|
||||
WebRcon,
|
||||
Source,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct RconConfig {
|
||||
/// Protocol override. When absent the kind is resolved from `game`.
|
||||
#[serde(default)]
|
||||
pub kind: Option<RconKind>,
|
||||
pub port: u16,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
impl RconConfig {
|
||||
/// Resolve the concrete protocol, falling back to a per-game default when
|
||||
/// `kind` is not set. rust → WebRcon; conan + soulmask → Source.
|
||||
pub fn resolved_kind(&self, game: &str) -> RconKind {
|
||||
if let Some(k) = self.kind {
|
||||
return k;
|
||||
}
|
||||
match game {
|
||||
"conan" | "soulmask" => RconKind::Source,
|
||||
// rust is the primary game; anything unknown defaults to WebRcon
|
||||
// — operators can always override with an explicit `kind`.
|
||||
_ => RconKind::WebRcon,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
const RESPONSE_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Send `command` to the game server and return its text response.
|
||||
///
|
||||
/// The agent runs on the same host as the game server, so the target address
|
||||
/// is always 127.0.0.1:{port}. Connection and response deadlines are fixed at
|
||||
/// 5 s and 10 s respectively — enough headroom for a loaded server while still
|
||||
/// catching hung connections quickly.
|
||||
pub async fn send_command(cfg: &RconConfig, game: &str, command: &str) -> Result<String> {
|
||||
match cfg.resolved_kind(game) {
|
||||
RconKind::WebRcon => webrcon_exec(cfg, command).await,
|
||||
RconKind::Source => source_rcon_exec(cfg, command).await,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebRCON (Rust game) — WebSocket JSON protocol
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// WebRCON request/response envelope. The server also emits chat/log frames
|
||||
/// on this socket with Identifier == 0; those are skipped.
|
||||
#[derive(serde::Serialize)]
|
||||
struct WebRconRequest<'a> {
|
||||
#[serde(rename = "Identifier")]
|
||||
identifier: i32,
|
||||
#[serde(rename = "Message")]
|
||||
message: &'a str,
|
||||
#[serde(rename = "Name")]
|
||||
name: &'static str,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct WebRconResponse {
|
||||
#[serde(rename = "Identifier")]
|
||||
identifier: i32,
|
||||
#[serde(rename = "Message")]
|
||||
message: String,
|
||||
}
|
||||
|
||||
async fn webrcon_exec(cfg: &RconConfig, command: &str) -> Result<String> {
|
||||
use tokio_tungstenite::connect_async;
|
||||
use tokio_tungstenite::tungstenite::Message as WsMsg;
|
||||
|
||||
// The Rust game server embeds the password in the WebSocket URL path —
|
||||
// never interpolate the real URL into errors or logs.
|
||||
let url = format!("ws://127.0.0.1:{}/{}", cfg.port, cfg.password);
|
||||
let redacted = format!("ws://127.0.0.1:{}/<redacted>", cfg.port);
|
||||
|
||||
// Wrap the entire connection + exchange in the connect timeout — we want
|
||||
// the timeout to cover TCP handshake + WS upgrade, not just the send.
|
||||
let (mut ws, _) = timeout(CONNECT_TIMEOUT, connect_async(&url))
|
||||
.await
|
||||
.context("connect timeout")?
|
||||
.with_context(|| format!("WebRCON connect to {redacted}"))?;
|
||||
|
||||
// Use a random positive i32 so correlation is unambiguous even when
|
||||
// multiple callers share a port (future concurrency).
|
||||
let id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
let req = WebRconRequest { identifier: id, message: command, name: "Corrosion" };
|
||||
let payload = serde_json::to_string(&req).context("serialize WebRCON request")?;
|
||||
|
||||
ws.send(WsMsg::Text(payload))
|
||||
.await
|
||||
.context("send WebRCON command")?;
|
||||
|
||||
tracing::debug!("WebRCON sent id={id} command={command:?}");
|
||||
|
||||
// Read frames until we see our Identifier — skip chat/log noise (id 0 or
|
||||
// any other value that isn't ours).
|
||||
let result = timeout(RESPONSE_TIMEOUT, async {
|
||||
loop {
|
||||
match ws.next().await {
|
||||
Some(Ok(WsMsg::Text(text))) => {
|
||||
match serde_json::from_str::<WebRconResponse>(&text) {
|
||||
Ok(resp) if resp.identifier == id => return Ok(resp.message),
|
||||
Ok(_) => {
|
||||
// Not our response (chat, log, another caller's frame).
|
||||
tracing::trace!("WebRCON skipping frame with different Identifier");
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::trace!("WebRCON non-JSON frame ignored: {e}");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(Ok(WsMsg::Close(_))) => bail!("WebRCON server closed connection"),
|
||||
Some(Ok(_)) => continue, // binary/ping/pong — skip
|
||||
Some(Err(e)) => return Err(anyhow::anyhow!(e).context("WebRCON read error")),
|
||||
None => bail!("WebRCON stream ended without response"),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.context("WebRCON response timeout")??;
|
||||
|
||||
// Close cleanly; a send error here is cosmetic — we already have our data.
|
||||
let _ = ws.close(None).await;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Source RCON (Conan Exiles, Soulmask) — Valve TCP binary protocol
|
||||
//
|
||||
// Packet layout (all fields little-endian):
|
||||
// i32 size — byte count of the remaining packet (id + type + body + 2 nulls)
|
||||
// i32 id — caller-chosen correlation id; auth failure returns -1
|
||||
// i32 type — 0=RESPONSE_VALUE, 2=EXECCOMMAND/AUTH_RESPONSE, 3=AUTH
|
||||
// [u8] body — UTF-8 command or response text
|
||||
// u8 0x00 — body null terminator
|
||||
// u8 0x00 — padding null terminator
|
||||
//
|
||||
// Multi-packet handling: after sending the command we also send an empty
|
||||
// RESPONSE_VALUE probe with a distinct id. We collect all RESPONSE_VALUE
|
||||
// packets belonging to the command id and stop when we receive the probe's
|
||||
// response. This is the standard technique specified in the Valve wiki.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const RCON_TYPE_AUTH: i32 = 3;
|
||||
const RCON_TYPE_AUTH_RESPONSE: i32 = 2;
|
||||
const RCON_TYPE_EXECCOMMAND: i32 = 2;
|
||||
const RCON_TYPE_RESPONSE_VALUE: i32 = 0;
|
||||
|
||||
/// Maximum accumulated response body (guards against misbehaving servers).
|
||||
const MAX_RESPONSE_BYTES: usize = 1024 * 1024; // 1 MiB
|
||||
|
||||
async fn source_rcon_exec(cfg: &RconConfig, command: &str) -> Result<String> {
|
||||
let addr = format!("127.0.0.1:{}", cfg.port);
|
||||
|
||||
let stream = timeout(CONNECT_TIMEOUT, TcpStream::connect(&addr))
|
||||
.await
|
||||
.context("connect timeout")?
|
||||
.with_context(|| format!("Source RCON connect to {addr}"))?;
|
||||
|
||||
let mut stream = stream;
|
||||
|
||||
// --- Auth ---
|
||||
let auth_id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
send_packet(&mut stream, auth_id, RCON_TYPE_AUTH, cfg.password.as_bytes()).await?;
|
||||
|
||||
// The server sends two responses to AUTH: first an empty RESPONSE_VALUE,
|
||||
// then an AUTH_RESPONSE. We skip the first and read until AUTH_RESPONSE.
|
||||
timeout(RESPONSE_TIMEOUT, async {
|
||||
loop {
|
||||
let (id, ptype, _body) = recv_packet(&mut stream).await?;
|
||||
if ptype == RCON_TYPE_AUTH_RESPONSE {
|
||||
if id == -1 {
|
||||
bail!("Source RCON auth failed: wrong password");
|
||||
}
|
||||
tracing::debug!("Source RCON authenticated (id={id})");
|
||||
return Ok(());
|
||||
}
|
||||
// Skip the empty RESPONSE_VALUE that precedes AUTH_RESPONSE.
|
||||
}
|
||||
#[allow(unreachable_code)]
|
||||
Ok::<(), anyhow::Error>(())
|
||||
})
|
||||
.await
|
||||
.context("Source RCON auth timeout")??;
|
||||
|
||||
// --- Command ---
|
||||
let cmd_id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
// Probe id must differ from cmd_id.
|
||||
let probe_id: i32 = loop {
|
||||
let id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
if id != cmd_id {
|
||||
break id;
|
||||
}
|
||||
};
|
||||
|
||||
send_packet(&mut stream, cmd_id, RCON_TYPE_EXECCOMMAND, command.as_bytes()).await?;
|
||||
// Empty RESPONSE_VALUE probe — the server echoes it after processing the
|
||||
// preceding command, signalling end-of-response.
|
||||
send_packet(&mut stream, probe_id, RCON_TYPE_RESPONSE_VALUE, b"").await?;
|
||||
|
||||
// Not every server is probe-conformant (Soulmask unverified): once we hold
|
||||
// response data, a short per-read quiet period also terminates — never
|
||||
// discard a response we already received just because the probe echo
|
||||
// didn't come back.
|
||||
const QUIET_PERIOD: Duration = Duration::from_millis(1500);
|
||||
let response = timeout(RESPONSE_TIMEOUT, async {
|
||||
let mut body_accum: Vec<u8> = Vec::new();
|
||||
loop {
|
||||
let next = if body_accum.is_empty() {
|
||||
recv_packet(&mut stream).await.map(Some)
|
||||
} else {
|
||||
match timeout(QUIET_PERIOD, recv_packet(&mut stream)).await {
|
||||
Ok(res) => res.map(Some),
|
||||
Err(_elapsed) => Ok(None), // quiet after data — done
|
||||
}
|
||||
};
|
||||
let Some((id, ptype, body)) = next? else {
|
||||
break;
|
||||
};
|
||||
if ptype != RCON_TYPE_RESPONSE_VALUE {
|
||||
continue; // unexpected packet type — skip
|
||||
}
|
||||
if id == probe_id {
|
||||
// Probe echoed back — all command response packets have arrived.
|
||||
break;
|
||||
}
|
||||
if id == cmd_id {
|
||||
if body_accum.len() + body.len() > MAX_RESPONSE_BYTES {
|
||||
bail!("Source RCON response exceeded {MAX_RESPONSE_BYTES} bytes");
|
||||
}
|
||||
body_accum.extend_from_slice(&body);
|
||||
}
|
||||
// Skip packets with other ids (shouldn't happen but be defensive).
|
||||
}
|
||||
Ok::<Vec<u8>, anyhow::Error>(body_accum)
|
||||
})
|
||||
.await
|
||||
.context("Source RCON response timeout")??;
|
||||
|
||||
String::from_utf8(response).context("Source RCON response is not valid UTF-8")
|
||||
}
|
||||
|
||||
/// Write a Source RCON packet to the stream.
|
||||
async fn send_packet(stream: &mut TcpStream, id: i32, ptype: i32, body: &[u8]) -> Result<()> {
|
||||
// size = id(4) + type(4) + body(n) + 2 null terminators
|
||||
let size = (4 + 4 + body.len() + 2) as i32;
|
||||
let mut buf: Vec<u8> = Vec::with_capacity(4 + size as usize);
|
||||
buf.extend_from_slice(&size.to_le_bytes());
|
||||
buf.extend_from_slice(&id.to_le_bytes());
|
||||
buf.extend_from_slice(&ptype.to_le_bytes());
|
||||
buf.extend_from_slice(body);
|
||||
buf.push(0x00);
|
||||
buf.push(0x00);
|
||||
stream.write_all(&buf).await.context("Source RCON write")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read one Source RCON packet; returns (id, type, body).
|
||||
async fn recv_packet(stream: &mut TcpStream) -> Result<(i32, i32, Vec<u8>)> {
|
||||
let mut size_buf = [0u8; 4];
|
||||
stream
|
||||
.read_exact(&mut size_buf)
|
||||
.await
|
||||
.context("Source RCON read size")?;
|
||||
let size = i32::from_le_bytes(size_buf) as usize;
|
||||
|
||||
// Minimum packet: id(4) + type(4) + 2 null terminators = 10 bytes.
|
||||
if size < 10 {
|
||||
bail!("Source RCON: malformed packet (size={size})");
|
||||
}
|
||||
if size > MAX_RESPONSE_BYTES + 16 {
|
||||
bail!("Source RCON: packet too large ({size} bytes)");
|
||||
}
|
||||
|
||||
let mut payload = vec![0u8; size];
|
||||
stream
|
||||
.read_exact(&mut payload)
|
||||
.await
|
||||
.context("Source RCON read payload")?;
|
||||
|
||||
let id = i32::from_le_bytes(payload[0..4].try_into().unwrap());
|
||||
let ptype = i32::from_le_bytes(payload[4..8].try_into().unwrap());
|
||||
// Body is everything between the two fields and the two trailing nulls.
|
||||
let body_end = size.saturating_sub(2); // strip 2 null terminators
|
||||
let body = payload[8..body_end].to_vec();
|
||||
|
||||
Ok((id, ptype, body))
|
||||
}
|
||||
126
corrosion-host-agent/src/steamcmd.rs
Normal file
126
corrosion-host-agent/src/steamcmd.rs
Normal file
@@ -0,0 +1,126 @@
|
||||
//! SteamCMD update integration for process-managed game instances.
|
||||
//!
|
||||
//! Wraps the `steamcmd` binary to perform an `+app_update` for a given game
|
||||
//! instance, streaming stdout lines to a caller-supplied progress callback so
|
||||
//! the panel can display live update output. The agent already runs a task per
|
||||
//! command in a separate `tokio::spawn`, so the blocking-until-done semantics
|
||||
//! here are intentional — the NATS reply is sent only when SteamCMD exits.
|
||||
//!
|
||||
//! Dune is Docker-image-based and explicitly has no SteamCMD integration — any
|
||||
//! attempt to invoke `update` on a Dune instance returns a clear error rather
|
||||
//! than a silent no-op.
|
||||
|
||||
use std::path::Path;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
|
||||
/// Return the Steam app ID for a given game name, or `None` for Dune (Docker).
|
||||
///
|
||||
/// Soulmask returns the Windows or Linux server app ID depending on the compile
|
||||
/// target so this function is `#[cfg]`-gated at the platform level.
|
||||
pub fn app_id_for_game(game: &str) -> Option<u32> {
|
||||
match game {
|
||||
"rust" => Some(258550),
|
||||
"conan" => Some(443030),
|
||||
"soulmask" => {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
Some(3017310)
|
||||
}
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
Some(3017300)
|
||||
}
|
||||
}
|
||||
// Dune uses Docker images — SteamCMD has no role here.
|
||||
"dune" => None,
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration controlling SteamCMD behaviour for one instance.
|
||||
/// Serialised as `[instance.steamcmd]` in agent.toml.
|
||||
#[derive(Debug, Clone, serde::Deserialize, Default)]
|
||||
pub struct SteamcmdConfig {
|
||||
/// Absolute or relative path to the `steamcmd` binary.
|
||||
/// Defaults to `"steamcmd"` (resolved via `PATH`) when absent.
|
||||
#[serde(default)]
|
||||
pub steamcmd_path: Option<std::path::PathBuf>,
|
||||
|
||||
/// Whether to pass `validate` to `+app_update`. Adds a file-hash check
|
||||
/// pass that catches corruption at the cost of a longer update time.
|
||||
#[serde(default)]
|
||||
pub validate: bool,
|
||||
}
|
||||
|
||||
/// Run a SteamCMD update for `game` into `install_dir`.
|
||||
///
|
||||
/// - `steamcmd_path`: path to the binary (or `"steamcmd"` to use PATH).
|
||||
/// - `validate`: appends `validate` to the `+app_update` call.
|
||||
/// - `on_progress`: receives each stdout line as it arrives so callers can
|
||||
/// forward progress to the panel in real time.
|
||||
///
|
||||
/// Returns `Ok(())` on a zero exit code, otherwise an error describing the
|
||||
/// failure. Dune is rejected before any process is spawned.
|
||||
pub async fn update(
|
||||
game: &str,
|
||||
install_dir: &Path,
|
||||
steamcmd_path: &str,
|
||||
validate: bool,
|
||||
on_progress: impl Fn(&str),
|
||||
) -> anyhow::Result<()> {
|
||||
use anyhow::Context;
|
||||
|
||||
let app_id = app_id_for_game(game).ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"dune uses Docker images, not SteamCMD — cannot run app_update for game '{game}'"
|
||||
)
|
||||
})?;
|
||||
|
||||
let install_dir_str = install_dir
|
||||
.to_str()
|
||||
.with_context(|| format!("install_dir '{}' is not valid UTF-8", install_dir.display()))?;
|
||||
|
||||
let mut args: Vec<String> = vec![
|
||||
"+force_install_dir".to_string(),
|
||||
install_dir_str.to_string(),
|
||||
"+login".to_string(),
|
||||
"anonymous".to_string(),
|
||||
"+app_update".to_string(),
|
||||
app_id.to_string(),
|
||||
];
|
||||
if validate {
|
||||
args.push("validate".to_string());
|
||||
}
|
||||
args.push("+quit".to_string());
|
||||
|
||||
tracing::info!(
|
||||
"steamcmd: starting update for game={game} app_id={app_id} install_dir={} validate={validate}",
|
||||
install_dir.display()
|
||||
);
|
||||
|
||||
let mut child = Command::new(steamcmd_path)
|
||||
.args(&args)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.spawn()
|
||||
.with_context(|| format!("spawning steamcmd binary '{steamcmd_path}'"))?;
|
||||
|
||||
let stdout = child.stdout.take().expect("stdout was piped");
|
||||
let mut lines = BufReader::new(stdout).lines();
|
||||
|
||||
while let Some(line) = lines.next_line().await.context("reading steamcmd stdout")? {
|
||||
tracing::debug!("steamcmd: {line}");
|
||||
on_progress(&line);
|
||||
}
|
||||
|
||||
let status = child.wait().await.context("waiting for steamcmd to exit")?;
|
||||
if status.success() {
|
||||
tracing::info!("steamcmd: update completed successfully for game={game}");
|
||||
Ok(())
|
||||
} else {
|
||||
let code = status.code().unwrap_or(-1);
|
||||
anyhow::bail!("steamcmd exited with non-zero status {code} for game={game}")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,14 +17,23 @@ pub fn host_going_offline(license: &str) -> String {
|
||||
format!("corrosion.{license}.host.going_offline")
|
||||
}
|
||||
|
||||
/// Phase 1: per-instance command channel (start/stop/restart/rcon/...).
|
||||
#[allow(dead_code)]
|
||||
/// Per-instance command channel (start/stop/restart/status; rcon et al. to come).
|
||||
pub fn instance_cmd(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.cmd")
|
||||
}
|
||||
|
||||
/// Phase 1: per-instance state-change events.
|
||||
#[allow(dead_code)]
|
||||
/// Per-instance state-change events.
|
||||
pub fn instance_status(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.status")
|
||||
}
|
||||
|
||||
/// Per-instance SteamCMD progress stream. Lines from `steamcmd` stdout are
|
||||
/// published here so the panel can display live update output.
|
||||
pub fn instance_steam_status(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.steam_status")
|
||||
}
|
||||
|
||||
/// Per-instance file manager command channel (request-reply).
|
||||
pub fn instance_files_cmd(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.files.cmd")
|
||||
}
|
||||
|
||||
80
corrosion-host-agent/src/supervisor.rs
Normal file
80
corrosion-host-agent/src/supervisor.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
//! The supervision abstraction.
|
||||
//!
|
||||
//! A `Supervisor` owns the lifecycle of one game instance. Different games are
|
||||
//! managed in fundamentally different ways — Rust/Conan/Soulmask are spawned OS
|
||||
//! processes ([`crate::process::ProcessSupervisor`]); Dune is a docker-compose
|
||||
//! stack ([`crate::docker_compose::DockerComposeSupervisor`]); future planes
|
||||
//! (kubectl, AMP/podman, SSH) will be their own impls. The instance command
|
||||
//! dispatch (`instancecmd::dispatch`) talks only to this trait, so it never
|
||||
//! learns which management model is behind a given instance.
|
||||
//!
|
||||
//! Trait objects (`Arc<dyn Supervisor>`) need object-safe, dynamically
|
||||
//! dispatchable async methods; native `async fn` in traits is not yet
|
||||
//! dyn-compatible, so we use `#[async_trait]` (the battle-tested ecosystem
|
||||
//! standard) to box the returned futures. The cost — one heap alloc per
|
||||
//! lifecycle call — is irrelevant for start/stop/restart, which happen seconds
|
||||
//! to minutes apart.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use serde::Serialize;
|
||||
use tokio::sync::watch;
|
||||
|
||||
/// Observable lifecycle state of one instance. Shared vocabulary across every
|
||||
/// supervisor impl; serialized verbatim into heartbeats and status events
|
||||
/// (`{"state":"running", ...}`).
|
||||
#[derive(Debug, Clone, PartialEq, Serialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "state")]
|
||||
pub enum InstanceState {
|
||||
/// Not lifecycle-managed (a process instance with no executable, etc.).
|
||||
Unmanaged,
|
||||
Stopped,
|
||||
Starting,
|
||||
Running,
|
||||
Stopping,
|
||||
/// Exited/died without a stop request.
|
||||
Crashed {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
exit_code: Option<i32>,
|
||||
},
|
||||
}
|
||||
|
||||
impl InstanceState {
|
||||
pub fn as_label(&self) -> &'static str {
|
||||
match self {
|
||||
InstanceState::Unmanaged => "unmanaged",
|
||||
InstanceState::Stopped => "stopped",
|
||||
InstanceState::Starting => "starting",
|
||||
InstanceState::Running => "running",
|
||||
InstanceState::Stopping => "stopping",
|
||||
InstanceState::Crashed { .. } => "crashed",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lifecycle control + state observation for one instance.
|
||||
///
|
||||
/// `start`/`stop`/`restart` take `self: Arc<Self>` so an impl can hand a clone
|
||||
/// to a spawned monitor task; callers hold an `Arc<dyn Supervisor>` and
|
||||
/// `clone()` before each call. `watch_state` exposes the same channel the
|
||||
/// status-event publisher drains, so panel push events stay decoupled from the
|
||||
/// heartbeat cadence.
|
||||
#[async_trait::async_trait]
|
||||
pub trait Supervisor: Send + Sync {
|
||||
/// The instance slug (a NATS subject segment).
|
||||
fn instance_id(&self) -> &str;
|
||||
|
||||
/// Current cached state (cheap; no I/O).
|
||||
fn state(&self) -> InstanceState;
|
||||
|
||||
/// Subscribe to state transitions.
|
||||
fn watch_state(&self) -> watch::Receiver<InstanceState>;
|
||||
|
||||
/// Seconds since the instance entered `Running` (0 otherwise).
|
||||
async fn uptime_seconds(&self) -> u64;
|
||||
|
||||
async fn start(self: Arc<Self>) -> Result<()>;
|
||||
async fn stop(self: Arc<Self>) -> Result<()>;
|
||||
async fn restart(self: Arc<Self>) -> Result<()>;
|
||||
}
|
||||
@@ -65,9 +65,10 @@ pub struct InstanceInfo {
|
||||
pub game: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub label: Option<String>,
|
||||
/// Phase 0 states: `configured` (root exists) or `missing_root`.
|
||||
/// Phase 1 adds live process states (running/stopped/crashed).
|
||||
/// Process-managed: running/stopped/starting/stopping/crashed.
|
||||
/// Unmanaged (no executable configured): configured/missing_root.
|
||||
pub state: String,
|
||||
pub uptime_seconds: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub root_disk_free_mb: Option<u64>,
|
||||
}
|
||||
@@ -125,21 +126,30 @@ pub async fn collect(agent: &Agent, sys: &mut System) -> HeartbeatPayload {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let instances = agent
|
||||
.cfg
|
||||
.instances
|
||||
.iter()
|
||||
.map(|inst| {
|
||||
let exists = inst.root.exists();
|
||||
InstanceInfo {
|
||||
id: inst.id.clone(),
|
||||
game: inst.game.clone(),
|
||||
label: inst.label.clone(),
|
||||
state: if exists { "configured" } else { "missing_root" }.to_string(),
|
||||
root_disk_free_mb: disk_free_for_path(&disks, &inst.root),
|
||||
let mut instances = Vec::with_capacity(agent.cfg.instances.len());
|
||||
for inst in &agent.cfg.instances {
|
||||
let (state, uptime_seconds) = match agent.supervisors.get(&inst.id) {
|
||||
Some(sup) if !matches!(sup.state(), crate::supervisor::InstanceState::Unmanaged) => {
|
||||
(sup.state().as_label().to_string(), sup.uptime_seconds().await)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
_ => {
|
||||
let exists = inst.root.exists();
|
||||
(
|
||||
if exists { "configured" } else { "missing_root" }.to_string(),
|
||||
0,
|
||||
)
|
||||
}
|
||||
};
|
||||
instances.push(InstanceInfo {
|
||||
id: inst.id.clone(),
|
||||
game: inst.game.clone(),
|
||||
label: inst.label.clone(),
|
||||
state,
|
||||
uptime_seconds,
|
||||
root_disk_free_mb: disk_free_for_path(&disks, &inst.root),
|
||||
});
|
||||
}
|
||||
let instances = instances;
|
||||
|
||||
HeartbeatPayload {
|
||||
schema: 2,
|
||||
|
||||
154
corrosion-host-agent/src/update.rs
Normal file
154
corrosion-host-agent/src/update.rs
Normal file
@@ -0,0 +1,154 @@
|
||||
//! Signed self-update.
|
||||
//!
|
||||
//! The agent only ever runs a binary whose minisign signature verifies against
|
||||
//! the EMBEDDED public key below. Even if the CDN (which currently accepts
|
||||
//! unauthenticated uploads) served a malicious binary, the agent refuses it
|
||||
//! without a valid signature from the release private key (a CI secret).
|
||||
//!
|
||||
//! Flow: download binary + `.minisig` from the CDN → verify signature →
|
||||
//! atomic swap (current → `.old`, new → current, rollback on failure) →
|
||||
//! relaunch the new binary. Defence in depth mirrors the Vigilance updater:
|
||||
//! a real URL parse rejecting credential-in-URL bypasses, an https + host
|
||||
//! allowlist, and a size cap.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use minisign_verify::{PublicKey, Signature};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Duration;
|
||||
|
||||
/// minisign public key. The matching private key signs releases in CI
|
||||
/// (Gitea Actions secret MINISIGN_SECRET_KEY). Rotating it means re-signing
|
||||
/// every published artifact and shipping an agent build with the new key.
|
||||
const PUBLIC_KEY: &str = "RWQKhJptuiwIkp31cZdz10z/R72UPZkl7/VtnZJ2Vfbe0dQfDlXHZYFC";
|
||||
|
||||
const ALLOWED_HOST: &str = "cdn.corrosionmgmt.com";
|
||||
const MAX_BINARY_BYTES: usize = 100 * 1024 * 1024; // 100 MiB sanity cap
|
||||
const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(600);
|
||||
|
||||
/// Verify a binary against the embedded public key + a minisign signature blob.
|
||||
/// The security core of self-update — tampered or unsigned content is rejected.
|
||||
pub fn verify_signature(binary: &[u8], signature_blob: &str) -> Result<()> {
|
||||
let pk = PublicKey::from_base64(PUBLIC_KEY).context("embedded public key is invalid")?;
|
||||
let sig = Signature::decode(signature_blob).context("malformed minisign signature")?;
|
||||
pk.verify(binary, &sig, false)
|
||||
.map_err(|e| anyhow::anyhow!("signature verification failed: {e}"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reject anything but `https://cdn.corrosionmgmt.com/...` with no embedded
|
||||
/// credentials (the userinfo-bypass class).
|
||||
pub fn assert_url_allowed(url: &str) -> Result<()> {
|
||||
let parsed = reqwest::Url::parse(url).context("invalid update URL")?;
|
||||
if parsed.scheme() != "https" {
|
||||
bail!("update URL must be https");
|
||||
}
|
||||
if !parsed.username().is_empty() || parsed.password().is_some() {
|
||||
bail!("update URL must not contain credentials");
|
||||
}
|
||||
if parsed.host_str() != Some(ALLOWED_HOST) {
|
||||
bail!("update URL host not allowed: {:?}", parsed.host_str());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Download, verify, and atomically swap in a new agent binary. Does NOT
|
||||
/// restart — the caller decides when to relaunch (after replying on NATS).
|
||||
/// Returns the path of the now-current (new) binary.
|
||||
pub async fn download_verify_swap(url: &str) -> Result<PathBuf> {
|
||||
assert_url_allowed(url)?;
|
||||
let sig_url = format!("{url}.minisig");
|
||||
assert_url_allowed(&sig_url)?;
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(DOWNLOAD_TIMEOUT)
|
||||
.build()
|
||||
.context("building HTTP client")?;
|
||||
|
||||
let binary = client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("downloading {url}"))?
|
||||
.error_for_status()
|
||||
.context("update binary download failed")?
|
||||
.bytes()
|
||||
.await
|
||||
.context("reading update binary")?;
|
||||
|
||||
if binary.len() > MAX_BINARY_BYTES {
|
||||
bail!("update binary is {} bytes, exceeds the {MAX_BINARY_BYTES} cap", binary.len());
|
||||
}
|
||||
|
||||
let signature = client
|
||||
.get(&sig_url)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("downloading {sig_url}"))?
|
||||
.error_for_status()
|
||||
.context("signature download failed")?
|
||||
.text()
|
||||
.await
|
||||
.context("reading signature")?;
|
||||
|
||||
verify_signature(&binary, &signature).context("refusing unsigned/tampered update")?;
|
||||
tracing::info!("update signature verified ({} bytes)", binary.len());
|
||||
|
||||
let current = std::env::current_exe().context("resolving current executable")?;
|
||||
swap_binary(¤t, &binary)?;
|
||||
tracing::info!("update swapped in at {}", current.display());
|
||||
Ok(current)
|
||||
}
|
||||
|
||||
/// Atomically replace `current` with `new_bytes`, keeping a `.old` backup and
|
||||
/// rolling back if the rename fails.
|
||||
pub fn swap_binary(current: &Path, new_bytes: &[u8]) -> Result<()> {
|
||||
let dir = current.parent().unwrap_or_else(|| Path::new("."));
|
||||
let stem = current.file_name().and_then(|s| s.to_str()).unwrap_or("corrosion-host-agent");
|
||||
let new_path = dir.join(format!("{stem}.new"));
|
||||
let backup = dir.join(format!("{stem}.old"));
|
||||
|
||||
std::fs::write(&new_path, new_bytes)
|
||||
.with_context(|| format!("writing {}", new_path.display()))?;
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
std::fs::set_permissions(&new_path, std::fs::Permissions::from_mode(0o755))
|
||||
.context("chmod +x on new binary")?;
|
||||
}
|
||||
|
||||
let _ = std::fs::remove_file(&backup);
|
||||
std::fs::rename(current, &backup)
|
||||
.with_context(|| format!("backing up current binary to {}", backup.display()))?;
|
||||
|
||||
if let Err(e) = std::fs::rename(&new_path, current) {
|
||||
// Roll back: restore the backup so the agent stays runnable.
|
||||
let _ = std::fs::rename(&backup, current);
|
||||
return Err(anyhow::anyhow!(e).context("installing new binary (rolled back)"));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Relaunch the (already-swapped) binary with the same args, then exit. No
|
||||
/// service manager is required — the new process reconnects on its own. There
|
||||
/// is a sub-second window with no agent; acceptable for an update.
|
||||
pub fn relaunch_and_exit() -> ! {
|
||||
let exe = std::env::current_exe().unwrap_or_else(|_| PathBuf::from("corrosion-host-agent"));
|
||||
let args: Vec<String> = std::env::args().skip(1).collect();
|
||||
tracing::info!("relaunching {} after update", exe.display());
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::process::CommandExt;
|
||||
// exec replaces this process image with the new binary — cleanest,
|
||||
// no gap. Only returns on failure.
|
||||
let err = std::process::Command::new(&exe).args(&args).exec();
|
||||
tracing::error!("exec after update failed: {err}; exiting for service restart");
|
||||
std::process::exit(70);
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = std::process::Command::new(&exe).args(&args).spawn();
|
||||
std::process::exit(0);
|
||||
}
|
||||
}
|
||||
412
corrosion-host-agent/src/wipe.rs
Normal file
412
corrosion-host-agent/src/wipe.rs
Normal file
@@ -0,0 +1,412 @@
|
||||
//! Jailed wipe engine for Rust (and compatible) game server instances.
|
||||
//!
|
||||
//! Three wipe types are supported, each a strict superset of the previous:
|
||||
//!
|
||||
//! | Type | What is deleted |
|
||||
//! |-------------|------------------------------------------------------------------|
|
||||
//! | `map` | `*.map`, `*.sav` under `<root>/server/<identity>/` |
|
||||
//! | `blueprint` | map wipe + `*.blueprints.*.db` / `.blueprints.*` under save dir |
|
||||
//! | `full` | blueprint wipe + `oxide/data/` contents + player state DB files |
|
||||
//!
|
||||
//! Identity discovery: rather than require the identity in the payload, we walk
|
||||
//! `<root>/server/*/` looking for files that match each wipe type's patterns.
|
||||
//! This handles any identity name without configuration churn.
|
||||
//!
|
||||
//! **Safety**: every path operated on is validated inside the canonicalized
|
||||
//! instance root with the same two-stage (lexical + canonicalize) jail used by
|
||||
//! `filemanager.rs`. We use `symlink_metadata` (lstat) everywhere we walk
|
||||
//! directories — symlinks are never followed across the boundary (Lesson 26).
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::filemanager::jail;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// The scope of data to erase.
|
||||
#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum WipeType {
|
||||
/// Delete procedural map + save files only.
|
||||
Map,
|
||||
/// Map wipe + player blueprint databases.
|
||||
Blueprint,
|
||||
/// Blueprint wipe + oxide/data + all player state DBs.
|
||||
Full,
|
||||
}
|
||||
|
||||
/// Parameters parsed from the NATS command payload.
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct WipeRequest {
|
||||
/// Scope of the wipe.
|
||||
pub wipe_type: WipeType,
|
||||
/// Copy files to `.corrosion-backups/<backup_label>/` before deleting.
|
||||
#[serde(default)]
|
||||
pub backup: bool,
|
||||
/// Label used as the backup subdirectory name. Defaults to `"wipe-backup"`.
|
||||
#[serde(default = "default_backup_label")]
|
||||
pub backup_label: String,
|
||||
}
|
||||
|
||||
fn default_backup_label() -> String {
|
||||
"wipe-backup".to_string()
|
||||
}
|
||||
|
||||
/// Result of a successful wipe operation.
|
||||
#[derive(Debug)]
|
||||
pub struct WipeResult {
|
||||
pub deleted_count: usize,
|
||||
pub wipe_type: WipeType,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Core wipe logic (sync — suitable for `spawn_blocking`)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Execute a wipe of `wipe_type` inside `root`, optionally backing up first.
|
||||
///
|
||||
/// Does NOT touch the supervisor lifecycle — the caller (instancecmd dispatch)
|
||||
/// must stop the server before calling this and restart it afterwards.
|
||||
///
|
||||
/// Returns a `WipeResult` describing what was deleted. Missing directories are
|
||||
/// treated as zero-deleted, not as errors, so a fresh server never returns Err
|
||||
/// just because `server/*/` doesn't exist yet.
|
||||
pub fn execute(root: &Path, req: &WipeRequest) -> Result<WipeResult> {
|
||||
// Canonicalize root once; every subsequent path check goes through `jail()`.
|
||||
let canon_root = fs::canonicalize(root)
|
||||
.with_context(|| format!("canonicalize instance root '{}'", root.display()))?;
|
||||
|
||||
// Collect every path to delete based on wipe type.
|
||||
let targets = collect_targets(&canon_root, &req.wipe_type)?;
|
||||
|
||||
// Backup before any deletion when requested.
|
||||
if req.backup && !targets.is_empty() {
|
||||
let backup_dir = jail(root, &format!(".corrosion-backups/{}", req.backup_label))?;
|
||||
fs::create_dir_all(&backup_dir)
|
||||
.with_context(|| format!("create backup dir '{}'", backup_dir.display()))?;
|
||||
for path in &targets {
|
||||
backup_one(&canon_root, path, &backup_dir)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Delete.
|
||||
let mut deleted_count = 0usize;
|
||||
for path in &targets {
|
||||
// Final safety check: confirm inside root before deletion.
|
||||
if path != &canon_root && !path.starts_with(&canon_root) {
|
||||
anyhow::bail!(
|
||||
"wipe safety: path '{}' is outside instance root '{}' — aborting",
|
||||
path.display(),
|
||||
canon_root.display()
|
||||
);
|
||||
}
|
||||
match delete_path(path) {
|
||||
Ok(n) => deleted_count += n,
|
||||
Err(e) => tracing::warn!("wipe: skipping '{}': {e:#}", path.display()),
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"wipe complete: type={:?} deleted={} root={}",
|
||||
req.wipe_type,
|
||||
deleted_count,
|
||||
root.display()
|
||||
);
|
||||
|
||||
Ok(WipeResult {
|
||||
deleted_count,
|
||||
wipe_type: req.wipe_type.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Target collection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Walk the Rust server tree under `canon_root` and return every path (file or
|
||||
/// dir) that should be deleted for the given wipe type.
|
||||
///
|
||||
/// Layout assumed:
|
||||
/// ```text
|
||||
/// <root>/
|
||||
/// server/
|
||||
/// <identity>/ -- any name; we walk all subdirs
|
||||
/// *.map
|
||||
/// *.sav
|
||||
/// player.blueprints.*.db (and *.blueprints.* variants)
|
||||
/// player.deaths.*.db
|
||||
/// player.identities.*.db
|
||||
/// player.states.*.db
|
||||
/// *.db (full wipe)
|
||||
/// oxide/
|
||||
/// data/ -- cleared for full wipe (dir contents, not dir itself)
|
||||
/// ```
|
||||
fn collect_targets(canon_root: &Path, wipe_type: &WipeType) -> Result<Vec<PathBuf>> {
|
||||
let mut targets: Vec<PathBuf> = Vec::new();
|
||||
|
||||
// --- server/<identity>/ ---
|
||||
let server_dir = canon_root.join("server");
|
||||
if is_real_dir(&server_dir) {
|
||||
for identity_entry in read_dir_safe(&server_dir)? {
|
||||
let identity_meta = fs::symlink_metadata(&identity_entry)
|
||||
.with_context(|| format!("stat '{}'", identity_entry.display()))?;
|
||||
|
||||
// Never follow symlinks across the boundary.
|
||||
if identity_meta.file_type().is_symlink() {
|
||||
tracing::debug!("wipe: skipping symlink '{}'", identity_entry.display());
|
||||
continue;
|
||||
}
|
||||
|
||||
if !identity_meta.is_dir() {
|
||||
continue;
|
||||
}
|
||||
|
||||
collect_save_targets(canon_root, &identity_entry, wipe_type, &mut targets)?;
|
||||
}
|
||||
}
|
||||
|
||||
// --- oxide/data/ (full wipe only) ---
|
||||
if *wipe_type == WipeType::Full {
|
||||
let oxide_data = canon_root.join("oxide").join("data");
|
||||
if is_real_dir(&oxide_data) {
|
||||
// Delete directory *contents*, not the directory itself.
|
||||
for entry in read_dir_safe(&oxide_data)? {
|
||||
let meta = fs::symlink_metadata(&entry)
|
||||
.with_context(|| format!("stat '{}'", entry.display()))?;
|
||||
if meta.file_type().is_symlink() {
|
||||
tracing::debug!("wipe: skipping symlink '{}'", entry.display());
|
||||
continue;
|
||||
}
|
||||
// Jail-check every entry before adding.
|
||||
ensure_inside(canon_root, &entry)?;
|
||||
targets.push(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(targets)
|
||||
}
|
||||
|
||||
/// Collect files from one `<root>/server/<identity>/` directory.
|
||||
fn collect_save_targets(
|
||||
canon_root: &Path,
|
||||
identity_dir: &Path,
|
||||
wipe_type: &WipeType,
|
||||
out: &mut Vec<PathBuf>,
|
||||
) -> Result<()> {
|
||||
for entry in read_dir_safe(identity_dir)? {
|
||||
let meta = fs::symlink_metadata(&entry)
|
||||
.with_context(|| format!("stat '{}'", entry.display()))?;
|
||||
|
||||
// Never follow symlinks.
|
||||
if meta.file_type().is_symlink() {
|
||||
tracing::debug!("wipe: skipping symlink '{}'", entry.display());
|
||||
continue;
|
||||
}
|
||||
|
||||
ensure_inside(canon_root, &entry)?;
|
||||
|
||||
let file_name = entry
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().into_owned())
|
||||
.unwrap_or_default();
|
||||
|
||||
let keep = match wipe_type {
|
||||
WipeType::Map => !is_map_file(&file_name) && !is_sav_file(&file_name),
|
||||
WipeType::Blueprint => {
|
||||
!is_map_file(&file_name)
|
||||
&& !is_sav_file(&file_name)
|
||||
&& !is_blueprint_file(&file_name)
|
||||
}
|
||||
WipeType::Full => {
|
||||
!is_map_file(&file_name)
|
||||
&& !is_sav_file(&file_name)
|
||||
&& !is_blueprint_file(&file_name)
|
||||
&& !is_player_state_file(&file_name)
|
||||
&& !is_generic_db_file(&file_name)
|
||||
}
|
||||
};
|
||||
|
||||
if !keep {
|
||||
out.push(entry);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pattern matchers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn is_map_file(name: &str) -> bool {
|
||||
name.ends_with(".map")
|
||||
}
|
||||
|
||||
fn is_sav_file(name: &str) -> bool {
|
||||
name.ends_with(".sav")
|
||||
}
|
||||
|
||||
fn is_blueprint_file(name: &str) -> bool {
|
||||
// Matches both `player.blueprints.*.db` and `.blueprints.*` variants.
|
||||
name.contains(".blueprints.")
|
||||
}
|
||||
|
||||
fn is_player_state_file(name: &str) -> bool {
|
||||
name.contains("player.deaths.")
|
||||
|| name.contains("player.identities.")
|
||||
|| name.contains("player.states.")
|
||||
}
|
||||
|
||||
fn is_generic_db_file(name: &str) -> bool {
|
||||
name.ends_with(".db")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Deletion
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Delete a single path (file or directory tree). Returns count of top-level
|
||||
/// items removed (1 for a file, 1 for a directory tree). Missing paths return
|
||||
/// 0 — the server may be fresh.
|
||||
fn delete_path(path: &Path) -> Result<usize> {
|
||||
let meta = match fs::symlink_metadata(path) {
|
||||
Ok(m) => m,
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(0),
|
||||
Err(e) => return Err(e).with_context(|| format!("stat '{}'", path.display())),
|
||||
};
|
||||
|
||||
if meta.file_type().is_symlink() {
|
||||
// Delete the symlink itself — never follow it.
|
||||
fs::remove_file(path).with_context(|| format!("remove symlink '{}'", path.display()))?;
|
||||
return Ok(1);
|
||||
}
|
||||
|
||||
if meta.is_dir() {
|
||||
fs::remove_dir_all(path)
|
||||
.with_context(|| format!("remove_dir_all '{}'", path.display()))?;
|
||||
} else {
|
||||
fs::remove_file(path)
|
||||
.with_context(|| format!("remove_file '{}'", path.display()))?;
|
||||
}
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Backup
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Copy one path (file or directory) into `backup_dir`, preserving the last
|
||||
/// component of the path name. Symlinks are skipped — we never follow them.
|
||||
fn backup_one(canon_root: &Path, src: &Path, backup_dir: &Path) -> Result<()> {
|
||||
let meta = match fs::symlink_metadata(src) {
|
||||
Ok(m) => m,
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
|
||||
Err(e) => return Err(e).with_context(|| format!("stat backup src '{}'", src.display())),
|
||||
};
|
||||
|
||||
if meta.file_type().is_symlink() {
|
||||
tracing::debug!("wipe backup: skipping symlink '{}'", src.display());
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let name = match src.file_name() {
|
||||
Some(n) => n,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
// Preserve relative path from root inside the backup directory to avoid
|
||||
// name collisions when multiple identity dirs have a `proc.map`.
|
||||
let rel = src
|
||||
.strip_prefix(canon_root)
|
||||
.unwrap_or_else(|_| src)
|
||||
.parent()
|
||||
.unwrap_or_else(|| Path::new(""));
|
||||
let dest = backup_dir.join(rel).join(name);
|
||||
|
||||
if let Some(parent) = dest.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("backup: create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
copy_recursive_safe(src, &dest)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Recursive copy that uses `symlink_metadata` (lstat) and refuses to follow
|
||||
/// any symlink — mirrors the same guard in `filemanager::copy_recursive`.
|
||||
fn copy_recursive_safe(src: &Path, dest: &Path) -> Result<()> {
|
||||
let meta = fs::symlink_metadata(src)
|
||||
.with_context(|| format!("stat source '{}'", src.display()))?;
|
||||
|
||||
if meta.file_type().is_symlink() {
|
||||
anyhow::bail!(
|
||||
"refusing to copy symlink '{}' during backup — symlinks are not followed",
|
||||
src.display()
|
||||
);
|
||||
}
|
||||
|
||||
if meta.is_dir() {
|
||||
fs::create_dir_all(dest)
|
||||
.with_context(|| format!("create_dir_all '{}'", dest.display()))?;
|
||||
for entry in fs::read_dir(src)
|
||||
.with_context(|| format!("read_dir '{}'", src.display()))?
|
||||
{
|
||||
let entry = entry?;
|
||||
copy_recursive_safe(&entry.path(), &dest.join(entry.file_name()))?;
|
||||
}
|
||||
} else {
|
||||
fs::copy(src, dest)
|
||||
.with_context(|| format!("copy '{}' -> '{}'", src.display(), dest.display()))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Returns `true` if `path` exists, is a directory, and is not a symlink.
|
||||
fn is_real_dir(path: &Path) -> bool {
|
||||
match fs::symlink_metadata(path) {
|
||||
Ok(m) => m.is_dir() && !m.file_type().is_symlink(),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a directory and return the absolute paths of its entries.
|
||||
/// Uses lstat internally via `read_dir` (entry paths; metadata is lstat'd
|
||||
/// separately by callers).
|
||||
fn read_dir_safe(dir: &Path) -> Result<Vec<PathBuf>> {
|
||||
let mut entries = Vec::new();
|
||||
let rd = match fs::read_dir(dir) {
|
||||
Ok(rd) => rd,
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(entries),
|
||||
Err(e) => return Err(e).with_context(|| format!("read_dir '{}'", dir.display())),
|
||||
};
|
||||
for item in rd {
|
||||
let item = item.with_context(|| format!("read dir entry in '{}'", dir.display()))?;
|
||||
entries.push(item.path());
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Assert that `path` is strictly inside (or equal to) `canon_root`.
|
||||
/// This is the final safety fence before any destructive or backup operation.
|
||||
fn ensure_inside(canon_root: &Path, path: &Path) -> Result<()> {
|
||||
// Canonicalize the path if it exists; otherwise use it as-is (it's
|
||||
// derived from read_dir, which already returns absolute paths rooted
|
||||
// under canon_root in normal operation).
|
||||
let resolved = fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
|
||||
if resolved != canon_root && !resolved.starts_with(canon_root) {
|
||||
anyhow::bail!(
|
||||
"wipe safety: path '{}' is outside instance root '{}' — aborting",
|
||||
path.display(),
|
||||
canon_root.display()
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
156
corrosion-host-agent/tests/docker_compose.rs
Normal file
156
corrosion-host-agent/tests/docker_compose.rs
Normal file
@@ -0,0 +1,156 @@
|
||||
//! DockerComposeSupervisor tests. A fake `docker` script records the exact
|
||||
//! arguments it was invoked with and returns a controllable exit code, so we
|
||||
//! assert the compose invocations + state transitions with no real Docker
|
||||
//! daemon — the same mock-the-external-binary approach the steamcmd tests use.
|
||||
#![cfg(unix)]
|
||||
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use corrosion_host_agent::config::InstanceConfig;
|
||||
use corrosion_host_agent::docker_compose::{DockerComposeConfig, DockerComposeSupervisor};
|
||||
use corrosion_host_agent::supervisor::{InstanceState, Supervisor};
|
||||
|
||||
/// Write a fake `docker` executable that appends its args (space-joined) to
|
||||
/// `args_log` and exits with the integer in `exit_file` (0 if absent).
|
||||
fn fake_docker(dir: &Path, args_log: &Path, exit_file: &Path) -> PathBuf {
|
||||
let script = dir.join("fakedocker");
|
||||
let body = format!(
|
||||
"#!/bin/sh\nprintf '%s\\n' \"$*\" >> '{}'\nexit \"$(cat '{}' 2>/dev/null || echo 0)\"\n",
|
||||
args_log.display(),
|
||||
exit_file.display(),
|
||||
);
|
||||
std::fs::write(&script, body).unwrap();
|
||||
let mut perms = std::fs::metadata(&script).unwrap().permissions();
|
||||
perms.set_mode(0o755);
|
||||
std::fs::set_permissions(&script, perms).unwrap();
|
||||
script
|
||||
}
|
||||
|
||||
fn dune_instance(command: Vec<String>, service: Option<String>) -> InstanceConfig {
|
||||
InstanceConfig {
|
||||
id: "dune-main".to_string(),
|
||||
game: "dune".to_string(),
|
||||
root: PathBuf::from("/tmp"),
|
||||
label: None,
|
||||
executable: None,
|
||||
args: vec![],
|
||||
working_dir: None,
|
||||
rcon: None,
|
||||
steamcmd: None,
|
||||
docker_compose: Some(DockerComposeConfig {
|
||||
file: Some(PathBuf::from("docker-compose.yml")),
|
||||
project: Some("duneproj".to_string()),
|
||||
service,
|
||||
command: Some(command),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn start_runs_compose_up_detached_and_sets_running() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let args_log = dir.path().join("args.log");
|
||||
let exit_file = dir.path().join("exit");
|
||||
let docker = fake_docker(dir.path(), &args_log, &exit_file);
|
||||
|
||||
let sup = DockerComposeSupervisor::new(&dune_instance(
|
||||
vec![docker.to_string_lossy().into_owned()],
|
||||
None,
|
||||
));
|
||||
assert_eq!(sup.state(), InstanceState::Stopped);
|
||||
|
||||
sup.clone().start().await.expect("compose up should succeed");
|
||||
assert_eq!(sup.state(), InstanceState::Running);
|
||||
|
||||
let logged = std::fs::read_to_string(&args_log).unwrap();
|
||||
assert!(logged.contains("up -d"), "expected `up -d`; got: {logged}");
|
||||
assert!(logged.contains("-p duneproj"), "expected project flag; got: {logged}");
|
||||
assert!(logged.contains("-f docker-compose.yml"), "expected file flag; got: {logged}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stop_runs_compose_stop_and_sets_stopped() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let args_log = dir.path().join("args.log");
|
||||
let exit_file = dir.path().join("exit");
|
||||
let docker = fake_docker(dir.path(), &args_log, &exit_file);
|
||||
|
||||
let sup = DockerComposeSupervisor::new(&dune_instance(
|
||||
vec![docker.to_string_lossy().into_owned()],
|
||||
None,
|
||||
));
|
||||
sup.clone().start().await.expect("up");
|
||||
sup.clone().stop().await.expect("compose stop should succeed");
|
||||
assert_eq!(sup.state(), InstanceState::Stopped);
|
||||
assert_eq!(sup.uptime_seconds().await, 0);
|
||||
|
||||
let logged = std::fs::read_to_string(&args_log).unwrap();
|
||||
assert!(logged.lines().any(|l| l.contains("stop")), "expected a `stop` call; got: {logged}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn restart_runs_compose_restart() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let args_log = dir.path().join("args.log");
|
||||
let exit_file = dir.path().join("exit");
|
||||
let docker = fake_docker(dir.path(), &args_log, &exit_file);
|
||||
|
||||
let sup = DockerComposeSupervisor::new(&dune_instance(
|
||||
vec![docker.to_string_lossy().into_owned()],
|
||||
None,
|
||||
));
|
||||
sup.clone().restart().await.expect("compose restart should succeed");
|
||||
assert_eq!(sup.state(), InstanceState::Running);
|
||||
|
||||
let logged = std::fs::read_to_string(&args_log).unwrap();
|
||||
assert!(logged.contains("restart"), "expected `restart`; got: {logged}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn single_service_is_targeted() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let args_log = dir.path().join("args.log");
|
||||
let exit_file = dir.path().join("exit");
|
||||
let docker = fake_docker(dir.path(), &args_log, &exit_file);
|
||||
|
||||
let sup = DockerComposeSupervisor::new(&dune_instance(
|
||||
vec![docker.to_string_lossy().into_owned()],
|
||||
Some("gameserver".to_string()),
|
||||
));
|
||||
sup.clone().start().await.expect("up");
|
||||
|
||||
let logged = std::fs::read_to_string(&args_log).unwrap();
|
||||
assert!(
|
||||
logged.contains("up -d gameserver"),
|
||||
"service must be appended after `up -d`; got: {logged}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn compose_failure_errors_and_reverts_state() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let args_log = dir.path().join("args.log");
|
||||
let exit_file = dir.path().join("exit");
|
||||
std::fs::write(&exit_file, "1").unwrap(); // make the fake docker fail
|
||||
let docker = fake_docker(dir.path(), &args_log, &exit_file);
|
||||
|
||||
let sup = DockerComposeSupervisor::new(&dune_instance(
|
||||
vec![docker.to_string_lossy().into_owned()],
|
||||
None,
|
||||
));
|
||||
let err = sup.clone().start().await.expect_err("nonzero compose exit must fail");
|
||||
assert!(err.to_string().contains("compose up failed"), "got: {err}");
|
||||
assert_eq!(sup.state(), InstanceState::Stopped, "failed start must revert to Stopped");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn missing_docker_binary_errors_cleanly() {
|
||||
let sup = DockerComposeSupervisor::new(&dune_instance(
|
||||
vec!["/nonexistent/docker-xyz".to_string()],
|
||||
None,
|
||||
));
|
||||
let err = sup.clone().start().await.expect_err("missing docker must fail");
|
||||
assert!(err.to_string().contains("docker"), "error should mention docker: {err}");
|
||||
assert_eq!(sup.state(), InstanceState::Stopped);
|
||||
}
|
||||
461
corrosion-host-agent/tests/filemanager.rs
Normal file
461
corrosion-host-agent/tests/filemanager.rs
Normal file
@@ -0,0 +1,461 @@
|
||||
//! Integration tests for the jailed file manager.
|
||||
//!
|
||||
//! Each test runs in a real tempdir on the host filesystem. The jail-escape
|
||||
//! tests are the security-critical section: any path that resolves outside the
|
||||
//! instance root MUST be rejected regardless of how the escape is attempted.
|
||||
//!
|
||||
//! Coverage:
|
||||
//! - Functional: list, write, read roundtrip, mkdir, rename, delete
|
||||
//! - Security: dotdot traversal, absolute path injection, symlink escape
|
||||
//! (POSIX symlinks only — `#[cfg(unix)]`)
|
||||
|
||||
use corrosion_host_agent::filemanager;
|
||||
use std::path::Path;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Create a temporary directory and return its path. The directory is
|
||||
/// automatically cleaned up when the `TempDir` is dropped.
|
||||
fn tempdir() -> tempfile::TempDir {
|
||||
tempfile::tempdir().expect("create tempdir")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Functional tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn write_read_roundtrip() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let content = "hello from the file manager\nline 2\n";
|
||||
filemanager::write(root, "test.txt", content).expect("write should succeed");
|
||||
|
||||
let got = filemanager::read(root, "test.txt").expect("read should succeed");
|
||||
assert_eq!(got, content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_returns_written_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "server.cfg", "hostname MyServer\n").expect("write");
|
||||
|
||||
let entries = filemanager::list(root, "").expect("list root");
|
||||
let names: Vec<&str> = entries.iter().map(|e| e.name.as_str()).collect();
|
||||
assert!(names.contains(&"server.cfg"), "expected 'server.cfg' in listing, got {names:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_empty_root_is_empty() {
|
||||
let dir = tempdir();
|
||||
let entries = filemanager::list(dir.path(), "").expect("list empty root");
|
||||
assert!(entries.is_empty(), "fresh tempdir should have no entries");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mkdir_creates_directory() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::mkdir(root, "cfg/custom").expect("mkdir should succeed");
|
||||
|
||||
assert!(root.join("cfg/custom").is_dir(), "directory should exist after mkdir");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mkdir_creates_nested_dirs() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::mkdir(root, "a/b/c/d").expect("mkdir nested");
|
||||
assert!(root.join("a/b/c/d").is_dir());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_creates_parent_dirs() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "subdir/deep/file.txt", "data").expect("write with auto-mkdir");
|
||||
let content = filemanager::read(root, "subdir/deep/file.txt").expect("read");
|
||||
assert_eq!(content, "data");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rename_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "old.txt", "content").expect("write");
|
||||
filemanager::rename(root, "old.txt", "new.txt").expect("rename");
|
||||
|
||||
assert!(!root.join("old.txt").exists(), "old.txt should be gone");
|
||||
assert!(root.join("new.txt").exists(), "new.txt should exist");
|
||||
|
||||
let content = filemanager::read(root, "new.txt").expect("read renamed");
|
||||
assert_eq!(content, "content");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rename_rejects_separator_in_new_name() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "file.txt", "data").expect("write");
|
||||
|
||||
let err = filemanager::rename(root, "file.txt", "subdir/escape.txt")
|
||||
.expect_err("rename with path separator must fail");
|
||||
assert!(
|
||||
err.to_string().contains("separator"),
|
||||
"error should mention separator: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "todelete.txt", "bye").expect("write");
|
||||
assert!(root.join("todelete.txt").exists());
|
||||
|
||||
filemanager::delete(root, "todelete.txt").expect("delete");
|
||||
assert!(!root.join("todelete.txt").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_directory_recursive() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::mkdir(root, "tree/sub").expect("mkdir");
|
||||
filemanager::write(root, "tree/sub/file.txt", "x").expect("write");
|
||||
assert!(root.join("tree").is_dir());
|
||||
|
||||
filemanager::delete(root, "tree").expect("delete tree");
|
||||
assert!(!root.join("tree").exists(), "directory tree should be deleted");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mkfile_creates_empty_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::mkfile(root, "empty.txt").expect("mkfile");
|
||||
let content = filemanager::read(root, "empty.txt").expect("read empty file");
|
||||
assert_eq!(content, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn copy_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "source.txt", "original").expect("write source");
|
||||
filemanager::copy(root, "source.txt", "dest.txt").expect("copy");
|
||||
|
||||
let src = filemanager::read(root, "source.txt").expect("read source after copy");
|
||||
let dst = filemanager::read(root, "dest.txt").expect("read destination");
|
||||
assert_eq!(src, "original");
|
||||
assert_eq!(dst, "original");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn move_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "moveme.txt", "payload").expect("write");
|
||||
filemanager::move_path(root, "moveme.txt", "moved.txt").expect("move");
|
||||
|
||||
assert!(!root.join("moveme.txt").exists(), "source should be gone");
|
||||
let content = filemanager::read(root, "moved.txt").expect("read after move");
|
||||
assert_eq!(content, "payload");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_entry_fields_are_populated() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "check.txt", "abcde").expect("write");
|
||||
filemanager::mkdir(root, "subdir").expect("mkdir");
|
||||
|
||||
let entries = filemanager::list(root, "").expect("list");
|
||||
// Dirs sort before files.
|
||||
let dir_entry = entries.iter().find(|e| e.name == "subdir").expect("subdir entry");
|
||||
assert!(dir_entry.is_dir);
|
||||
assert_eq!(dir_entry.size, 0);
|
||||
assert!(!dir_entry.modified.is_empty(), "modified should be set");
|
||||
|
||||
let file_entry = entries.iter().find(|e| e.name == "check.txt").expect("file entry");
|
||||
assert!(!file_entry.is_dir);
|
||||
assert_eq!(file_entry.size, 5, "size should match byte count");
|
||||
// path should be relative and use forward slashes.
|
||||
assert!(!file_entry.path.starts_with('/'), "path should be relative");
|
||||
assert!(!file_entry.path.contains('\\'), "path should use forward slashes");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Security: jail-escape tests
|
||||
// CRITICAL — these are the whole point of the jail abstraction.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// `../../etc/passwd` must never resolve outside the instance root.
|
||||
#[test]
|
||||
fn jail_rejects_dotdot_traversal() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let err = filemanager::read(root, "../../etc/passwd")
|
||||
.expect_err("dotdot traversal must be rejected");
|
||||
// Verify the error is security-related and not just "file not found".
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"error should mention jail escape for dotdot traversal, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// A deeply nested `../` chain must also be stopped.
|
||||
#[test]
|
||||
fn jail_rejects_deep_dotdot_traversal() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let err = filemanager::read(root, "a/b/c/../../../../../../../../etc/shadow")
|
||||
.expect_err("deep dotdot traversal must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape") || msg.contains("absolute"),
|
||||
"error should mention jail escape for deep traversal, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// An absolute path (e.g. `/etc/passwd`) must be rejected immediately — it
|
||||
/// completely bypasses relative joining and should never be accepted.
|
||||
#[test]
|
||||
fn jail_rejects_absolute_path() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let err = filemanager::read(root, "/etc/passwd")
|
||||
.expect_err("absolute path must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("absolute") || msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"error should mention the absolute-path rejection, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// An absolute path to a Windows-style location must also be rejected.
|
||||
#[test]
|
||||
fn jail_rejects_absolute_windows_style_path() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
// On POSIX this is just treated as an absolute path starting with `/`.
|
||||
// The test is intentionally platform-portable: any absolute path is bad.
|
||||
let err = filemanager::read(root, "/tmp/evil")
|
||||
.expect_err("absolute /tmp/evil must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("absolute") || msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// A symlink inside the root that points to a path outside the root must not
|
||||
/// be followed. This is the critical symlink-escape vector.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn jail_rejects_symlink_escape() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
// Create a directory outside the root to be the symlink target.
|
||||
let outside = tempdir();
|
||||
let outside_file = outside.path().join("secret.txt");
|
||||
std::fs::write(&outside_file, "secret data").expect("write outside file");
|
||||
|
||||
// Plant a symlink inside the root pointing to the outside directory.
|
||||
let link_path = root.join("evil_link");
|
||||
std::os::unix::fs::symlink(outside.path(), &link_path)
|
||||
.expect("create symlink inside root");
|
||||
|
||||
// Attempt to read through the symlink.
|
||||
let err = filemanager::read(root, "evil_link/secret.txt")
|
||||
.expect_err("symlink escape must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"error should mention jail escape for symlink traversal, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// A symlink directly inside the root pointing to a file outside must be
|
||||
/// rejected even when the path looks like a normal relative reference.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn jail_rejects_symlink_pointing_directly_outside() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
// Symlink to /etc/passwd itself (or any outside path that exists or not).
|
||||
let link_path = root.join("passwd_link");
|
||||
std::os::unix::fs::symlink(Path::new("/etc/passwd"), &link_path)
|
||||
.expect("create symlink to /etc/passwd");
|
||||
|
||||
let err = filemanager::read(root, "passwd_link")
|
||||
.expect_err("direct symlink outside root must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"error should mention jail escape, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// A symlink chain (symlink → symlink → outside) must also be caught.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn jail_rejects_chained_symlink_escape() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let outside = tempdir();
|
||||
|
||||
// Chain: root/link1 → root/link2 → outside/
|
||||
let link2_path = root.join("link2");
|
||||
std::os::unix::fs::symlink(outside.path(), &link2_path)
|
||||
.expect("create link2");
|
||||
|
||||
let link1_path = root.join("link1");
|
||||
std::os::unix::fs::symlink(&link2_path, &link1_path)
|
||||
.expect("create link1");
|
||||
|
||||
let err = filemanager::read(root, "link1")
|
||||
.expect_err("chained symlink escape must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"chained symlink should be caught, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// SECURITY REGRESSION: copying a directory that contains a symlink pointing
|
||||
/// OUTSIDE the jail must NOT dereference it and pull external content inside.
|
||||
/// jail() validates only the top-level src/dest; the recursive copy must
|
||||
/// refuse symlinks itself or it becomes a read-escape exfiltration path.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn copy_refuses_to_follow_symlink_out_of_jail() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
let outside = tempdir();
|
||||
std::fs::write(outside.path().join("secret.txt"), "TOP SECRET")
|
||||
.expect("write external secret");
|
||||
|
||||
// A directory inside the jail containing a symlink to the outside dir.
|
||||
std::fs::create_dir(root.join("src")).expect("mkdir src");
|
||||
std::os::unix::fs::symlink(outside.path(), root.join("src").join("escape"))
|
||||
.expect("plant symlink to outside");
|
||||
|
||||
// Attempt to copy src -> dest (both inside the jail).
|
||||
let err = filemanager::copy(root, "src", "dest")
|
||||
.expect_err("copy must refuse the embedded symlink");
|
||||
assert!(
|
||||
format!("{err:#}").contains("symlink"),
|
||||
"error should name the refused symlink, got: {err:#}"
|
||||
);
|
||||
|
||||
// The external secret must NOT have landed inside the jail.
|
||||
assert!(
|
||||
!root.join("dest").join("escape").join("secret.txt").exists(),
|
||||
"external content leaked into the jail via symlink-following copy",
|
||||
);
|
||||
}
|
||||
|
||||
/// `list` must report a symlink as the link itself, never the dereferenced
|
||||
/// target — otherwise it leaks the size/type of files outside the jail.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn list_does_not_dereference_symlink_metadata() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
std::os::unix::fs::symlink(Path::new("/etc/passwd"), root.join("leak"))
|
||||
.expect("plant symlink");
|
||||
|
||||
let entries = filemanager::list(root, "").expect("list root");
|
||||
let leak = entries.iter().find(|e| e.name == "leak").expect("symlink listed");
|
||||
// /etc/passwd is a regular file; if we followed the link, is_dir would
|
||||
// reflect the target. We must report the link, which is not a directory,
|
||||
// and must NOT expose the target's byte size.
|
||||
assert!(!leak.is_dir, "symlink must not be reported as a directory");
|
||||
let target_size = std::fs::metadata("/etc/passwd").map(|m| m.len()).unwrap_or(0);
|
||||
assert!(
|
||||
leak.size != target_size || target_size == 0,
|
||||
"list leaked the symlink target's size ({target_size} bytes)"
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Dispatch layer tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn dispatch_list_returns_success() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "a.txt", "a").expect("write");
|
||||
|
||||
let req = filemanager::FileRequest {
|
||||
op: "list".to_string(),
|
||||
path: String::new(),
|
||||
dest: None,
|
||||
content: None,
|
||||
name: None,
|
||||
};
|
||||
let resp = filemanager::dispatch(root, &req);
|
||||
assert_eq!(resp["status"], "success");
|
||||
assert!(resp["data"]["entries"].is_array());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dispatch_unknown_op_returns_error() {
|
||||
let dir = tempdir();
|
||||
let req = filemanager::FileRequest {
|
||||
op: "explode".to_string(),
|
||||
path: String::new(),
|
||||
dest: None,
|
||||
content: None,
|
||||
name: None,
|
||||
};
|
||||
let resp = filemanager::dispatch(dir.path(), &req);
|
||||
assert_eq!(resp["status"], "error");
|
||||
assert!(resp["message"].as_str().unwrap().contains("unknown op"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dispatch_escape_attempt_returns_error_not_panic() {
|
||||
let dir = tempdir();
|
||||
let req = filemanager::FileRequest {
|
||||
op: "read".to_string(),
|
||||
path: "../../etc/passwd".to_string(),
|
||||
dest: None,
|
||||
content: None,
|
||||
name: None,
|
||||
};
|
||||
let resp = filemanager::dispatch(dir.path(), &req);
|
||||
// Must return an error response, not panic or expose the file.
|
||||
assert_eq!(resp["status"], "error", "escape attempt should return error status");
|
||||
assert!(
|
||||
resp["message"].as_str().is_some(),
|
||||
"error response must have a message"
|
||||
);
|
||||
}
|
||||
2
corrosion-host-agent/tests/fixtures/sample.bin
vendored
Normal file
2
corrosion-host-agent/tests/fixtures/sample.bin
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
corrosion-host-agent signed-update test fixture
|
||||
version 2.0.0-test
|
||||
4
corrosion-host-agent/tests/fixtures/sample.bin.minisig
vendored
Normal file
4
corrosion-host-agent/tests/fixtures/sample.bin.minisig
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
untrusted comment: signature from minisign secret key
|
||||
RUQKhJptuiwIkp378Z59BTwosDycAhmlhrdZZVwk1Vdb293OgcsXx0S3W0XezMtOXIXdgvQtW/DpDKlb1gdW4elQXLG5KFUgawI=
|
||||
trusted comment: timestamp:1781222247 file:sample.bin hashed
|
||||
QtUiOfJqRKYJZTL6QV93xeLVnODr8HXWvZIR3Q1AG0yqmqesZPyiKpVa9kD34Mwp1fQ76nx1Z7c6CB1v5KHQAw==
|
||||
353
corrosion-host-agent/tests/rcon.rs
Normal file
353
corrosion-host-agent/tests/rcon.rs
Normal file
@@ -0,0 +1,353 @@
|
||||
//! RCON integration tests using in-process mock servers.
|
||||
//!
|
||||
//! Real OS sockets on ephemeral ports — no mocking framework. Each test
|
||||
//! binds a listener, spawns a task that speaks the expected protocol, then
|
||||
//! exercises `rcon::send_command` and asserts on the result. Tests are
|
||||
//! unix-only because the musl cross-compile target and the CI runner are both
|
||||
//! Linux; the production use case is also Linux-only (game servers don't run
|
||||
//! on macOS or Windows in production).
|
||||
//!
|
||||
//! We use `#[cfg(unix)]` to keep parity with the supervisor integration tests.
|
||||
#![cfg(unix)]
|
||||
|
||||
use corrosion_host_agent::rcon::{RconConfig, RconKind};
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Source RCON helpers — duplicate the wire-format encode/decode locally so
|
||||
// the tests own the mock server without depending on the production code path.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Build a Source RCON packet: [size(4LE) | id(4LE) | type(4LE) | body | 0x00 0x00]
|
||||
fn encode_packet(id: i32, ptype: i32, body: &[u8]) -> Vec<u8> {
|
||||
let size = (4 + 4 + body.len() + 2) as i32;
|
||||
let mut out = Vec::with_capacity(4 + size as usize);
|
||||
out.extend_from_slice(&size.to_le_bytes());
|
||||
out.extend_from_slice(&id.to_le_bytes());
|
||||
out.extend_from_slice(&ptype.to_le_bytes());
|
||||
out.extend_from_slice(body);
|
||||
out.push(0x00);
|
||||
out.push(0x00);
|
||||
out
|
||||
}
|
||||
|
||||
/// Read one Source RCON packet from a TcpStream.
|
||||
async fn read_packet(stream: &mut TcpStream) -> (i32, i32, Vec<u8>) {
|
||||
let mut size_buf = [0u8; 4];
|
||||
stream.read_exact(&mut size_buf).await.unwrap();
|
||||
let size = i32::from_le_bytes(size_buf) as usize;
|
||||
|
||||
let mut payload = vec![0u8; size];
|
||||
stream.read_exact(&mut payload).await.unwrap();
|
||||
|
||||
let id = i32::from_le_bytes(payload[0..4].try_into().unwrap());
|
||||
let ptype = i32::from_le_bytes(payload[4..8].try_into().unwrap());
|
||||
let body_end = size.saturating_sub(2);
|
||||
let body = payload[8..body_end].to_vec();
|
||||
(id, ptype, body)
|
||||
}
|
||||
|
||||
const SOURCE_TYPE_AUTH: i32 = 3;
|
||||
const SOURCE_TYPE_AUTH_RESPONSE: i32 = 2;
|
||||
const SOURCE_TYPE_EXECCOMMAND: i32 = 2;
|
||||
const SOURCE_TYPE_RESPONSE_VALUE: i32 = 0;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock Source RCON server
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Run a Source RCON server that accepts password "goodpw", rejects others,
|
||||
/// and responds to the first EXECCOMMAND with `response_body`.
|
||||
///
|
||||
/// If `split_at` is Some(n) the body is split: the first `n` bytes arrive in
|
||||
/// one RESPONSE_VALUE packet and the remainder in a second — testing multi-
|
||||
/// packet reassembly.
|
||||
async fn run_source_mock(
|
||||
mut stream: TcpStream,
|
||||
accept_password: &str,
|
||||
command_response: &[u8],
|
||||
split_at: Option<usize>,
|
||||
) {
|
||||
// --- Auth phase ---
|
||||
let (auth_id, ptype, body) = read_packet(&mut stream).await;
|
||||
assert_eq!(ptype, SOURCE_TYPE_AUTH, "expected AUTH packet");
|
||||
|
||||
let password = String::from_utf8_lossy(&body);
|
||||
if password != accept_password {
|
||||
// Send empty RESPONSE_VALUE then AUTH_RESPONSE with id = -1 (failure).
|
||||
let empty = encode_packet(auth_id, SOURCE_TYPE_RESPONSE_VALUE, b"");
|
||||
stream.write_all(&empty).await.unwrap();
|
||||
let fail = encode_packet(-1, SOURCE_TYPE_AUTH_RESPONSE, b"");
|
||||
stream.write_all(&fail).await.unwrap();
|
||||
return;
|
||||
}
|
||||
|
||||
// Success: empty RESPONSE_VALUE then AUTH_RESPONSE with the auth id.
|
||||
let empty = encode_packet(auth_id, SOURCE_TYPE_RESPONSE_VALUE, b"");
|
||||
stream.write_all(&empty).await.unwrap();
|
||||
let ok = encode_packet(auth_id, SOURCE_TYPE_AUTH_RESPONSE, b"");
|
||||
stream.write_all(&ok).await.unwrap();
|
||||
|
||||
// --- Command phase ---
|
||||
let (cmd_id, cmd_ptype, _cmd_body) = read_packet(&mut stream).await;
|
||||
assert_eq!(cmd_ptype, SOURCE_TYPE_EXECCOMMAND, "expected EXECCOMMAND");
|
||||
|
||||
// Read the probe packet (empty RESPONSE_VALUE with a different id).
|
||||
let (probe_id, probe_ptype, _) = read_packet(&mut stream).await;
|
||||
assert_eq!(probe_ptype, SOURCE_TYPE_RESPONSE_VALUE, "expected probe packet");
|
||||
|
||||
// Send the command response, optionally split across two packets.
|
||||
if let Some(n) = split_at {
|
||||
let (part1, part2) = command_response.split_at(n.min(command_response.len()));
|
||||
let p1 = encode_packet(cmd_id, SOURCE_TYPE_RESPONSE_VALUE, part1);
|
||||
stream.write_all(&p1).await.unwrap();
|
||||
let p2 = encode_packet(cmd_id, SOURCE_TYPE_RESPONSE_VALUE, part2);
|
||||
stream.write_all(&p2).await.unwrap();
|
||||
} else {
|
||||
let p = encode_packet(cmd_id, SOURCE_TYPE_RESPONSE_VALUE, command_response);
|
||||
stream.write_all(&p).await.unwrap();
|
||||
}
|
||||
|
||||
// Echo the probe to signal end-of-response.
|
||||
let probe_echo = encode_packet(probe_id, SOURCE_TYPE_RESPONSE_VALUE, b"");
|
||||
stream.write_all(&probe_echo).await.unwrap();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Source RCON tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::test]
|
||||
async fn source_rcon_auth_and_exec_returns_response() {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
run_source_mock(stream, "goodpw", b"Hello from server", None).await;
|
||||
});
|
||||
|
||||
let cfg = RconConfig { kind: Some(RconKind::Source), port, password: "goodpw".to_string() };
|
||||
let result = corrosion_host_agent::rcon::send_command(&cfg, "conan", "status")
|
||||
.await
|
||||
.expect("command should succeed");
|
||||
|
||||
assert_eq!(result, "Hello from server");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn source_rcon_wrong_password_returns_auth_error() {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
run_source_mock(stream, "goodpw", b"should not see this", None).await;
|
||||
});
|
||||
|
||||
let cfg = RconConfig { kind: Some(RconKind::Source), port, password: "wrongpw".to_string() };
|
||||
let err = corrosion_host_agent::rcon::send_command(&cfg, "conan", "status")
|
||||
.await
|
||||
.expect_err("wrong password should fail");
|
||||
|
||||
assert!(
|
||||
err.to_string().to_lowercase().contains("auth"),
|
||||
"error should mention auth failure, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn source_rcon_multi_packet_response_concatenated() {
|
||||
// Build a body large enough to split meaningfully across two packets.
|
||||
// Use repeating ASCII so the result is valid UTF-8 and easy to verify.
|
||||
// 200 'A's then 200 'B's = 400 bytes, split at 200.
|
||||
let body: Vec<u8> = std::iter::repeat_n(b'A', 200)
|
||||
.chain(std::iter::repeat_n(b'B', 200))
|
||||
.collect();
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
let body_clone = body.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
run_source_mock(stream, "goodpw", &body_clone, Some(200)).await;
|
||||
});
|
||||
|
||||
let cfg = RconConfig { kind: Some(RconKind::Source), port, password: "goodpw".to_string() };
|
||||
let result = corrosion_host_agent::rcon::send_command(&cfg, "soulmask", "showplayers")
|
||||
.await
|
||||
.expect("multi-packet command should succeed");
|
||||
|
||||
let expected = String::from_utf8(body).unwrap();
|
||||
assert_eq!(result, expected, "full body should be concatenated across both packets");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn source_rcon_connect_timeout_to_unreachable_port() {
|
||||
// Bind a listener but never accept — the connection will time out during
|
||||
// the RCON auth phase because nothing is reading from the socket.
|
||||
// We use a port that is bound (so TCP connect itself succeeds) but then
|
||||
// the mock simply drops the stream, forcing a read error, which should
|
||||
// surface as an error (not a panic or hang).
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
|
||||
// Accept the TCP connection but immediately drop it — simulates a port
|
||||
// that accepts but never speaks RCON.
|
||||
tokio::spawn(async move {
|
||||
let (_stream, _) = listener.accept().await.unwrap();
|
||||
// _stream dropped here — EOF on the client's read
|
||||
});
|
||||
|
||||
let cfg =
|
||||
RconConfig { kind: Some(RconKind::Source), port, password: "goodpw".to_string() };
|
||||
let err = corrosion_host_agent::rcon::send_command(&cfg, "conan", "status")
|
||||
.await
|
||||
.expect_err("closed connection should fail");
|
||||
|
||||
// We just need it to fail and not hang; error message varies by OS.
|
||||
let _ = err;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebRCON mock server
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Run a WebRCON mock: send one noise frame (Identifier 0), then respond to
|
||||
/// the first real request with the given output.
|
||||
async fn run_webrcon_mock(stream: tokio::net::TcpStream, output: &str) {
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use tokio_tungstenite::accept_async;
|
||||
use tokio_tungstenite::tungstenite::Message as WsMsg;
|
||||
|
||||
let mut ws = accept_async(stream).await.expect("WS handshake failed");
|
||||
|
||||
// Send noise (chat frame, Identifier 0) before the real request arrives.
|
||||
let noise = serde_json::json!({
|
||||
"Identifier": 0,
|
||||
"Message": "Player X joined",
|
||||
"Name": "Server",
|
||||
"Type": "Chat"
|
||||
});
|
||||
ws.send(WsMsg::Text(noise.to_string()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Read the command request.
|
||||
let msg = ws.next().await.unwrap().unwrap();
|
||||
let text = match msg {
|
||||
WsMsg::Text(t) => t,
|
||||
other => panic!("expected Text frame, got {other:?}"),
|
||||
};
|
||||
let req: serde_json::Value = serde_json::from_str(&text).unwrap();
|
||||
let req_id = req["Identifier"].as_i64().unwrap() as i32;
|
||||
|
||||
// Reply with the same Identifier so the client correlates correctly.
|
||||
let reply = serde_json::json!({
|
||||
"Identifier": req_id,
|
||||
"Message": output,
|
||||
"Type": "Generic",
|
||||
});
|
||||
ws.send(WsMsg::Text(reply.to_string())).await.unwrap();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebRCON tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::test]
|
||||
async fn webrcon_skips_noise_and_returns_correct_message() {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
run_webrcon_mock(stream, "Players: 42/100").await;
|
||||
});
|
||||
|
||||
// Password is embedded in the URL path — any non-empty string works with
|
||||
// our mock.
|
||||
let cfg = RconConfig {
|
||||
kind: Some(RconKind::WebRcon),
|
||||
port,
|
||||
password: "testpw".to_string(),
|
||||
};
|
||||
let result = corrosion_host_agent::rcon::send_command(&cfg, "rust", "playercount")
|
||||
.await
|
||||
.expect("WebRCON command should succeed");
|
||||
|
||||
assert_eq!(result, "Players: 42/100");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TOML parsing test — pins [[instance]] + [instance.rcon] sub-table syntax
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn toml_instance_with_rcon_parses_correctly() {
|
||||
let toml = r#"
|
||||
[agent]
|
||||
license_id = "test-license"
|
||||
nats_url = "nats://localhost:4222"
|
||||
|
||||
[[instance]]
|
||||
id = "rust-main"
|
||||
game = "rust"
|
||||
root = "/opt/rustserver"
|
||||
|
||||
[instance.rcon]
|
||||
port = 28016
|
||||
password = "secretpassword"
|
||||
kind = "webrcon"
|
||||
"#;
|
||||
|
||||
let cfg: corrosion_host_agent::config::ConfigFile =
|
||||
toml::from_str(toml).expect("TOML should parse");
|
||||
|
||||
assert_eq!(cfg.instances.len(), 1);
|
||||
let inst = &cfg.instances[0];
|
||||
assert_eq!(inst.id, "rust-main");
|
||||
|
||||
let rcon = inst.rcon.as_ref().expect("rcon should be present");
|
||||
assert_eq!(rcon.port, 28016);
|
||||
assert_eq!(rcon.password, "secretpassword");
|
||||
assert_eq!(rcon.kind, Some(corrosion_host_agent::rcon::RconKind::WebRcon));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn toml_instance_without_rcon_defaults_to_none() {
|
||||
let toml = r#"
|
||||
[agent]
|
||||
license_id = "test-license"
|
||||
nats_url = "nats://localhost:4222"
|
||||
|
||||
[[instance]]
|
||||
id = "conan-main"
|
||||
game = "conan"
|
||||
root = "/opt/conan"
|
||||
"#;
|
||||
|
||||
let cfg: corrosion_host_agent::config::ConfigFile =
|
||||
toml::from_str(toml).expect("TOML should parse");
|
||||
|
||||
assert!(cfg.instances[0].rcon.is_none(), "absent rcon should be None");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolved_kind_infers_from_game_name() {
|
||||
use corrosion_host_agent::rcon::{RconConfig, RconKind};
|
||||
|
||||
let cfg_no_kind = RconConfig { kind: None, port: 28016, password: "x".to_string() };
|
||||
assert_eq!(cfg_no_kind.resolved_kind("rust"), RconKind::WebRcon);
|
||||
assert_eq!(cfg_no_kind.resolved_kind("conan"), RconKind::Source);
|
||||
assert_eq!(cfg_no_kind.resolved_kind("soulmask"), RconKind::Source);
|
||||
assert_eq!(cfg_no_kind.resolved_kind("dune"), RconKind::WebRcon); // fallback
|
||||
|
||||
// Explicit kind always wins.
|
||||
let cfg_source = RconConfig { kind: Some(RconKind::Source), ..cfg_no_kind.clone() };
|
||||
assert_eq!(cfg_source.resolved_kind("rust"), RconKind::Source);
|
||||
|
||||
let cfg_webrcon = RconConfig { kind: Some(RconKind::WebRcon), ..cfg_no_kind };
|
||||
assert_eq!(cfg_webrcon.resolved_kind("conan"), RconKind::WebRcon);
|
||||
}
|
||||
45
corrosion-host-agent/tests/steamcmd.rs
Normal file
45
corrosion-host-agent/tests/steamcmd.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
//! Unit tests for the SteamCMD module.
|
||||
//!
|
||||
//! Tests cover app ID resolution for all four supported games, including the
|
||||
//! platform-specific Soulmask split, and verify that Dune correctly returns
|
||||
//! `None` (it uses Docker images, not SteamCMD).
|
||||
|
||||
use corrosion_host_agent::steamcmd::app_id_for_game;
|
||||
|
||||
#[test]
|
||||
fn rust_has_correct_app_id() {
|
||||
assert_eq!(app_id_for_game("rust"), Some(258550));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conan_has_correct_app_id() {
|
||||
assert_eq!(app_id_for_game("conan"), Some(443030));
|
||||
}
|
||||
|
||||
/// Soulmask returns the Windows server app ID on Windows builds, the Linux
|
||||
/// dedicated server app ID on all other targets.
|
||||
#[test]
|
||||
#[cfg(windows)]
|
||||
fn soulmask_windows_app_id() {
|
||||
assert_eq!(app_id_for_game("soulmask"), Some(3017310));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(windows))]
|
||||
fn soulmask_linux_app_id() {
|
||||
assert_eq!(app_id_for_game("soulmask"), Some(3017300));
|
||||
}
|
||||
|
||||
/// Dune uses Docker images — SteamCMD integration is explicitly unsupported.
|
||||
#[test]
|
||||
fn dune_has_no_app_id() {
|
||||
assert_eq!(app_id_for_game("dune"), None);
|
||||
}
|
||||
|
||||
/// Unknown games also produce None; callers should treat this the same as
|
||||
/// Dune (no SteamCMD support).
|
||||
#[test]
|
||||
fn unknown_game_returns_none() {
|
||||
assert_eq!(app_id_for_game("minecraft"), None);
|
||||
assert_eq!(app_id_for_game(""), None);
|
||||
}
|
||||
111
corrosion-host-agent/tests/supervisor.rs
Normal file
111
corrosion-host-agent/tests/supervisor.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
//! Process supervisor integration tests using real OS processes.
|
||||
//! Unix-only test doubles (/bin/sleep, /bin/sh) — the supervisor logic under
|
||||
//! test is platform-shared; Windows-specific stop semantics get covered when
|
||||
//! the Windows service work lands.
|
||||
#![cfg(unix)]
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use corrosion_host_agent::config::InstanceConfig;
|
||||
use corrosion_host_agent::process::ProcessSupervisor;
|
||||
use corrosion_host_agent::supervisor::{InstanceState, Supervisor};
|
||||
|
||||
fn managed_instance(executable: &str, args: &[&str]) -> InstanceConfig {
|
||||
InstanceConfig {
|
||||
id: "test-instance".to_string(),
|
||||
game: "rust".to_string(),
|
||||
root: PathBuf::from("/tmp"),
|
||||
label: None,
|
||||
executable: Some(PathBuf::from(executable)),
|
||||
args: args.iter().map(|s| s.to_string()).collect(),
|
||||
working_dir: None,
|
||||
rcon: None,
|
||||
steamcmd: None,
|
||||
docker_compose: None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_state(
|
||||
sup: &std::sync::Arc<ProcessSupervisor>,
|
||||
want: fn(&InstanceState) -> bool,
|
||||
budget: Duration,
|
||||
) -> InstanceState {
|
||||
let deadline = tokio::time::Instant::now() + budget;
|
||||
loop {
|
||||
let state = sup.state();
|
||||
if want(&state) {
|
||||
return state;
|
||||
}
|
||||
if tokio::time::Instant::now() > deadline {
|
||||
panic!("timed out waiting for state; last = {state:?}");
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn start_status_stop_lifecycle() {
|
||||
let sup = ProcessSupervisor::new(&managed_instance("/bin/sleep", &["300"]));
|
||||
assert_eq!(sup.state(), InstanceState::Stopped);
|
||||
|
||||
sup.clone().start().await.expect("start should succeed");
|
||||
assert_eq!(sup.state(), InstanceState::Running);
|
||||
tokio::time::sleep(Duration::from_millis(1100)).await;
|
||||
assert!(sup.uptime_seconds().await >= 1, "uptime should advance");
|
||||
|
||||
// Double-start must be rejected while running.
|
||||
assert!(sup.clone().start().await.is_err(), "double start must fail");
|
||||
|
||||
sup.clone().stop().await.expect("stop should succeed");
|
||||
let state = wait_for_state(&sup, |s| matches!(s, InstanceState::Stopped), Duration::from_secs(5)).await;
|
||||
assert_eq!(state, InstanceState::Stopped);
|
||||
assert_eq!(sup.uptime_seconds().await, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unexpected_exit_is_crashed_with_code() {
|
||||
let sup = ProcessSupervisor::new(&managed_instance("/bin/sh", &["-c", "sleep 0.2; exit 7"]));
|
||||
sup.clone().start().await.expect("start should succeed");
|
||||
|
||||
let state = wait_for_state(
|
||||
&sup,
|
||||
|s| matches!(s, InstanceState::Crashed { .. }),
|
||||
Duration::from_secs(5),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(state, InstanceState::Crashed { exit_code: Some(7) });
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn restart_from_crashed_recovers() {
|
||||
let sup = ProcessSupervisor::new(&managed_instance("/bin/sh", &["-c", "exit 1"]));
|
||||
sup.clone().start().await.expect("start should succeed");
|
||||
wait_for_state(&sup, |s| matches!(s, InstanceState::Crashed { .. }), Duration::from_secs(5)).await;
|
||||
|
||||
// Restart from crashed must work (panel "Restart" after a crash).
|
||||
// Use a long-lived command this time by replacing the supervisor — the
|
||||
// command is fixed per supervisor, so emulate via a fresh one.
|
||||
let sup2 = ProcessSupervisor::new(&managed_instance("/bin/sleep", &["300"]));
|
||||
sup2.clone().restart().await.expect("restart from stopped should start");
|
||||
assert_eq!(sup2.state(), InstanceState::Running);
|
||||
sup2.clone().stop().await.expect("cleanup stop");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unmanaged_instance_rejects_process_commands() {
|
||||
let mut cfg = managed_instance("/bin/sleep", &["300"]);
|
||||
cfg.executable = None;
|
||||
let sup = ProcessSupervisor::new(&cfg);
|
||||
assert_eq!(sup.state(), InstanceState::Unmanaged);
|
||||
assert!(sup.clone().start().await.is_err(), "unmanaged start must fail");
|
||||
assert!(sup.clone().stop().await.is_err(), "unmanaged stop must fail");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn missing_executable_fails_cleanly() {
|
||||
let sup = ProcessSupervisor::new(&managed_instance("/nonexistent/bin/gameserver", &[]));
|
||||
let err = sup.clone().start().await.expect_err("must fail");
|
||||
assert!(err.to_string().contains("not found"), "error should say not found: {err}");
|
||||
assert_eq!(sup.state(), InstanceState::Stopped, "failed start must not leave Starting state");
|
||||
}
|
||||
63
corrosion-host-agent/tests/update.rs
Normal file
63
corrosion-host-agent/tests/update.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
//! Signed self-update tests — the security-critical part is signature
|
||||
//! verification: a valid signature is accepted, anything tampered is rejected.
|
||||
//! Fixtures (tests/fixtures/sample.bin + .minisig) were signed with the real
|
||||
//! release private key, so these run with no key present (as in CI).
|
||||
|
||||
use corrosion_host_agent::update;
|
||||
|
||||
const SAMPLE: &[u8] = include_bytes!("fixtures/sample.bin");
|
||||
const SAMPLE_SIG: &str = include_str!("fixtures/sample.bin.minisig");
|
||||
|
||||
#[test]
|
||||
fn accepts_a_validly_signed_binary() {
|
||||
update::verify_signature(SAMPLE, SAMPLE_SIG).expect("valid signature must verify");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_a_tampered_binary() {
|
||||
let mut tampered = SAMPLE.to_vec();
|
||||
tampered[0] ^= 0xFF; // flip a byte
|
||||
let err = update::verify_signature(&tampered, SAMPLE_SIG)
|
||||
.expect_err("tampered binary must be rejected");
|
||||
assert!(err.to_string().contains("verification failed"), "got: {err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_a_garbage_signature() {
|
||||
assert!(update::verify_signature(SAMPLE, "not a real minisig blob").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_empty_binary_against_real_sig() {
|
||||
assert!(update::verify_signature(b"", SAMPLE_SIG).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn url_allowlist_enforced() {
|
||||
// Allowed.
|
||||
update::assert_url_allowed("https://cdn.corrosionmgmt.com/host-agent/alpha/corrosion-host-agent-linux-amd64")
|
||||
.expect("the real CDN host must be allowed");
|
||||
// http rejected.
|
||||
assert!(update::assert_url_allowed("http://cdn.corrosionmgmt.com/x").is_err());
|
||||
// wrong host rejected.
|
||||
assert!(update::assert_url_allowed("https://evil.example.com/x").is_err());
|
||||
// credential-in-URL (userinfo bypass) rejected.
|
||||
assert!(update::assert_url_allowed("https://cdn.corrosionmgmt.com:[email protected]/x").is_err());
|
||||
// host as userinfo trick rejected (real host is evil.com).
|
||||
assert!(update::assert_url_allowed("https://[email protected]/x").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swap_binary_replaces_and_backs_up() {
|
||||
let dir = tempfile::tempdir().expect("tempdir");
|
||||
let current = dir.path().join("corrosion-host-agent");
|
||||
std::fs::write(¤t, b"OLD BINARY").unwrap();
|
||||
|
||||
update::swap_binary(¤t, b"NEW BINARY").expect("swap should succeed");
|
||||
|
||||
assert_eq!(std::fs::read(¤t).unwrap(), b"NEW BINARY", "current is the new binary");
|
||||
let backup = dir.path().join("corrosion-host-agent.old");
|
||||
assert_eq!(std::fs::read(&backup).unwrap(), b"OLD BINARY", ".old holds the previous binary");
|
||||
// the .new scratch file is consumed by the rename
|
||||
assert!(!dir.path().join("corrosion-host-agent.new").exists());
|
||||
}
|
||||
298
corrosion-host-agent/tests/wipe.rs
Normal file
298
corrosion-host-agent/tests/wipe.rs
Normal file
@@ -0,0 +1,298 @@
|
||||
//! Integration tests for the wipe engine.
|
||||
//!
|
||||
//! Builds a temp directory tree that mirrors a Rust dedicated server layout
|
||||
//! and verifies each wipe type's targeting, the symlink-safety guarantee,
|
||||
//! backup behaviour, and graceful handling of missing directories.
|
||||
//!
|
||||
//! Symlink tests are POSIX-only (Unix creates symlinks; Windows needs elevated
|
||||
//! privileges or Developer Mode, so we skip there).
|
||||
|
||||
#![cfg(unix)]
|
||||
|
||||
use corrosion_host_agent::wipe::{execute, WipeRequest, WipeType};
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: build a fake Rust server tree
|
||||
//
|
||||
// Layout:
|
||||
// <root>/
|
||||
// server/
|
||||
// myserver/
|
||||
// proc.map
|
||||
// proc.sav
|
||||
// player.blueprints.1234.db
|
||||
// player.deaths.1234.db
|
||||
// player.identities.1234.db
|
||||
// player.states.1234.db
|
||||
// players.db
|
||||
// keepme.txt ← must survive every wipe
|
||||
// oxide/
|
||||
// data/
|
||||
// killfeed.json
|
||||
// another.json
|
||||
// server_readme.txt ← must survive every wipe
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn make_server_tree() -> TempDir {
|
||||
let dir = tempfile::tempdir().expect("create tempdir");
|
||||
let root = dir.path();
|
||||
|
||||
let save_dir = root.join("server").join("myserver");
|
||||
std::fs::create_dir_all(&save_dir).expect("create save dir");
|
||||
std::fs::create_dir_all(root.join("oxide").join("data")).expect("create oxide/data");
|
||||
|
||||
// Save files
|
||||
write_file(&save_dir.join("proc.map"), b"map data");
|
||||
write_file(&save_dir.join("proc.sav"), b"sav data");
|
||||
write_file(&save_dir.join("player.blueprints.1234.db"), b"bp data");
|
||||
write_file(&save_dir.join("player.deaths.1234.db"), b"deaths");
|
||||
write_file(&save_dir.join("player.identities.1234.db"), b"identities");
|
||||
write_file(&save_dir.join("player.states.1234.db"), b"states");
|
||||
write_file(&save_dir.join("players.db"), b"player db");
|
||||
// Innocent file — must never be deleted.
|
||||
write_file(&save_dir.join("keepme.txt"), b"keep me");
|
||||
|
||||
// oxide/data contents
|
||||
write_file(&root.join("oxide").join("data").join("killfeed.json"), b"{}");
|
||||
write_file(&root.join("oxide").join("data").join("another.json"), b"{}");
|
||||
|
||||
// File at root level — must survive.
|
||||
write_file(&root.join("server_readme.txt"), b"readme");
|
||||
|
||||
dir
|
||||
}
|
||||
|
||||
fn write_file(path: &Path, content: &[u8]) {
|
||||
std::fs::write(path, content).unwrap_or_else(|e| panic!("write {}: {e}", path.display()));
|
||||
}
|
||||
|
||||
fn wipe_req(wipe_type: WipeType) -> WipeRequest {
|
||||
WipeRequest {
|
||||
wipe_type,
|
||||
backup: false,
|
||||
backup_label: "test-backup".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn exists(root: &Path, rel: &str) -> bool {
|
||||
root.join(rel).exists()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Map wipe: only *.map and *.sav deleted
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn map_wipe_deletes_map_and_sav_only() {
|
||||
let dir = make_server_tree();
|
||||
let root = dir.path();
|
||||
|
||||
let result = execute(root, &wipe_req(WipeType::Map)).expect("map wipe should succeed");
|
||||
|
||||
// Deleted
|
||||
assert!(!exists(root, "server/myserver/proc.map"), "proc.map must be gone");
|
||||
assert!(!exists(root, "server/myserver/proc.sav"), "proc.sav must be gone");
|
||||
|
||||
// Preserved
|
||||
assert!(exists(root, "server/myserver/player.blueprints.1234.db"), "blueprints must survive map wipe");
|
||||
assert!(exists(root, "server/myserver/player.deaths.1234.db"), "deaths must survive map wipe");
|
||||
assert!(exists(root, "server/myserver/keepme.txt"), "keepme.txt must survive");
|
||||
assert!(exists(root, "oxide/data/killfeed.json"), "oxide/data must survive map wipe");
|
||||
assert!(exists(root, "server_readme.txt"), "server_readme.txt must survive");
|
||||
|
||||
assert_eq!(result.deleted_count, 2);
|
||||
assert_eq!(result.wipe_type, WipeType::Map);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Blueprint wipe: map/sav + blueprints deleted
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn blueprint_wipe_includes_map_files() {
|
||||
let dir = make_server_tree();
|
||||
let root = dir.path();
|
||||
|
||||
let result = execute(root, &wipe_req(WipeType::Blueprint)).expect("blueprint wipe should succeed");
|
||||
|
||||
// Deleted
|
||||
assert!(!exists(root, "server/myserver/proc.map"), "proc.map must be gone");
|
||||
assert!(!exists(root, "server/myserver/proc.sav"), "proc.sav must be gone");
|
||||
assert!(!exists(root, "server/myserver/player.blueprints.1234.db"), "blueprints must be gone");
|
||||
|
||||
// Preserved
|
||||
assert!(exists(root, "server/myserver/player.deaths.1234.db"), "deaths must survive blueprint wipe");
|
||||
assert!(exists(root, "server/myserver/player.identities.1234.db"), "identities must survive");
|
||||
assert!(exists(root, "server/myserver/keepme.txt"), "keepme.txt must survive");
|
||||
assert!(exists(root, "oxide/data/killfeed.json"), "oxide/data must survive blueprint wipe");
|
||||
|
||||
assert_eq!(result.deleted_count, 3);
|
||||
assert_eq!(result.wipe_type, WipeType::Blueprint);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Full wipe: everything including player state + oxide/data
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn full_wipe_clears_all_game_data() {
|
||||
let dir = make_server_tree();
|
||||
let root = dir.path();
|
||||
|
||||
let result = execute(root, &wipe_req(WipeType::Full)).expect("full wipe should succeed");
|
||||
|
||||
// All save-dir game files deleted
|
||||
assert!(!exists(root, "server/myserver/proc.map"));
|
||||
assert!(!exists(root, "server/myserver/proc.sav"));
|
||||
assert!(!exists(root, "server/myserver/player.blueprints.1234.db"));
|
||||
assert!(!exists(root, "server/myserver/player.deaths.1234.db"));
|
||||
assert!(!exists(root, "server/myserver/player.identities.1234.db"));
|
||||
assert!(!exists(root, "server/myserver/player.states.1234.db"));
|
||||
assert!(!exists(root, "server/myserver/players.db"));
|
||||
|
||||
// oxide/data contents deleted (directory itself preserved)
|
||||
assert!(!exists(root, "oxide/data/killfeed.json"), "killfeed.json must be gone");
|
||||
assert!(!exists(root, "oxide/data/another.json"), "another.json must be gone");
|
||||
assert!(exists(root, "oxide/data"), "oxide/data directory itself must remain");
|
||||
|
||||
// Never-touched files preserved
|
||||
assert!(exists(root, "server/myserver/keepme.txt"), "keepme.txt must survive full wipe");
|
||||
assert!(exists(root, "server_readme.txt"), "server_readme.txt must survive full wipe");
|
||||
|
||||
// 7 save-dir files + 2 oxide/data files = 9
|
||||
assert_eq!(result.deleted_count, 9);
|
||||
assert_eq!(result.wipe_type, WipeType::Full);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Missing directories: no error on fresh server
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn missing_server_dir_does_not_error() {
|
||||
let dir = tempfile::tempdir().expect("tempdir");
|
||||
// Completely empty root — no server/ or oxide/ directories.
|
||||
let result = execute(dir.path(), &wipe_req(WipeType::Full));
|
||||
assert!(result.is_ok(), "empty root must not error: {:?}", result);
|
||||
assert_eq!(result.unwrap().deleted_count, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_oxide_data_does_not_error() {
|
||||
let dir = tempfile::tempdir().expect("tempdir");
|
||||
// Has server dir but no oxide/data.
|
||||
let save_dir = dir.path().join("server").join("myserver");
|
||||
std::fs::create_dir_all(&save_dir).expect("mkdir");
|
||||
write_file(&save_dir.join("proc.map"), b"map");
|
||||
|
||||
let result = execute(dir.path(), &wipe_req(WipeType::Full));
|
||||
assert!(result.is_ok(), "missing oxide/data must not error: {:?}", result);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Symlink safety: symlink inside root pointing outside must NOT be followed
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn symlink_in_save_dir_is_not_deleted_via_follow() {
|
||||
let dir = make_server_tree();
|
||||
let root = dir.path();
|
||||
|
||||
// Create an external directory with sensitive data.
|
||||
let outside = tempfile::tempdir().expect("outside tempdir");
|
||||
write_file(&outside.path().join("secret.txt"), b"TOP SECRET");
|
||||
|
||||
// Plant a symlink inside the save dir pointing to the external directory.
|
||||
let save_dir = root.join("server").join("myserver");
|
||||
let link = save_dir.join("evil_link");
|
||||
std::os::unix::fs::symlink(outside.path(), &link).expect("plant symlink");
|
||||
|
||||
// Perform a full wipe — should not follow the symlink or touch secret.txt
|
||||
let result = execute(root, &wipe_req(WipeType::Full));
|
||||
assert!(result.is_ok(), "wipe with a symlink present must not error: {:?}", result);
|
||||
|
||||
// External data must be untouched.
|
||||
assert!(
|
||||
outside.path().join("secret.txt").exists(),
|
||||
"external secret.txt must not be deleted via symlink follow"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symlink_at_identity_dir_level_is_skipped() {
|
||||
let dir = tempfile::tempdir().expect("tempdir");
|
||||
let root = dir.path();
|
||||
std::fs::create_dir_all(root.join("server")).expect("mkdir server");
|
||||
|
||||
// The identity entry itself is a symlink to an external dir.
|
||||
let outside = tempfile::tempdir().expect("outside tempdir");
|
||||
write_file(&outside.path().join("proc.map"), b"map");
|
||||
|
||||
let link = root.join("server").join("evil_identity");
|
||||
std::os::unix::fs::symlink(outside.path(), &link).expect("plant identity symlink");
|
||||
|
||||
let result = execute(root, &wipe_req(WipeType::Map));
|
||||
assert!(result.is_ok(), "symlink identity dir must be skipped, not error: {:?}", result);
|
||||
|
||||
// The external proc.map must not have been deleted.
|
||||
assert!(
|
||||
outside.path().join("proc.map").exists(),
|
||||
"external proc.map must not be deleted via identity symlink"
|
||||
);
|
||||
assert_eq!(result.unwrap().deleted_count, 0);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Backup: files are copied before deletion
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn backup_copies_targets_before_deletion() {
|
||||
let dir = make_server_tree();
|
||||
let root = dir.path();
|
||||
|
||||
let req = WipeRequest {
|
||||
wipe_type: WipeType::Map,
|
||||
backup: true,
|
||||
backup_label: "before-map-wipe".to_string(),
|
||||
};
|
||||
|
||||
let result = execute(root, &req).expect("map wipe with backup should succeed");
|
||||
|
||||
// The files should be gone from the save dir…
|
||||
assert!(!exists(root, "server/myserver/proc.map"), "proc.map must be deleted");
|
||||
assert!(!exists(root, "server/myserver/proc.sav"), "proc.sav must be deleted");
|
||||
|
||||
// …but must exist in the backup directory.
|
||||
let backup_base = root.join(".corrosion-backups").join("before-map-wipe");
|
||||
assert!(backup_base.exists(), "backup directory must be created");
|
||||
|
||||
// Walk the backup to find the backed-up files.
|
||||
let backed_up = collect_files_recursively(&backup_base);
|
||||
let has_map = backed_up.iter().any(|p| p.ends_with("proc.map"));
|
||||
let has_sav = backed_up.iter().any(|p| p.ends_with("proc.sav"));
|
||||
assert!(has_map, "proc.map must be in backup, found: {backed_up:?}");
|
||||
assert!(has_sav, "proc.sav must be in backup, found: {backed_up:?}");
|
||||
|
||||
assert_eq!(result.deleted_count, 2);
|
||||
}
|
||||
|
||||
/// Recursively collect all file *names* (just the last component) under `dir`.
|
||||
fn collect_files_recursively(dir: &Path) -> Vec<String> {
|
||||
let mut found = Vec::new();
|
||||
if let Ok(rd) = std::fs::read_dir(dir) {
|
||||
for entry in rd.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
found.extend(collect_files_recursively(&path));
|
||||
} else {
|
||||
if let Some(name) = path.file_name() {
|
||||
found.push(name.to_string_lossy().into_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
found
|
||||
}
|
||||
@@ -31,6 +31,9 @@ services:
|
||||
volumes:
|
||||
- nats_data:/data
|
||||
- ./nats.conf:/etc/nats/nats.conf:ro
|
||||
# Per-license authorization (generated on the host; carries secrets, not
|
||||
# committed with real users — see scripts/generate-nats-auth.mjs).
|
||||
- ./nats-auth.conf:/etc/nats/nats-auth.conf:ro
|
||||
ports:
|
||||
- "8089:4222" # Client connections
|
||||
|
||||
@@ -43,6 +46,12 @@ services:
|
||||
DATABASE_URL: postgres://corrosion:${DB_PASSWORD:-corrosion_dev}@postgres:5432/corrosion
|
||||
DATABASE_MAX_CONNECTIONS: "20"
|
||||
NATS_URL: nats://nats:4222
|
||||
# Privileged internal NATS user (full corrosion.> access). Empty = anonymous.
|
||||
NATS_INTERNAL_USER: ${NATS_INTERNAL_USER:-}
|
||||
NATS_INTERNAL_PASSWORD: ${NATS_INTERNAL_PASSWORD:-}
|
||||
# Secret for deriving per-license agent passwords (shared with the
|
||||
# nats-auth generator). HMAC-SHA256(license_id, secret).
|
||||
NATS_TOKEN_SECRET: ${NATS_TOKEN_SECRET:-}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
JWT_ACCESS_EXPIRY_SECONDS: "14400"
|
||||
JWT_REFRESH_EXPIRY_SECONDS: "604800"
|
||||
@@ -87,7 +96,10 @@ services:
|
||||
api:
|
||||
condition: service_started
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q --spider http://localhost:80/ || exit 1"]
|
||||
# 127.0.0.1, not localhost: nginx listens IPv4-only (0.0.0.0:80) but
|
||||
# `localhost` resolves to ::1 first inside the container → the probe hit
|
||||
# nothing and reported unhealthy while the panel served fine on IPv4.
|
||||
test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:80/ || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
18
docker/nats-auth.conf
Normal file
18
docker/nats-auth.conf
Normal file
@@ -0,0 +1,18 @@
|
||||
# BOOTSTRAP DEFAULT — no secrets, safe to commit.
|
||||
#
|
||||
# Anonymous is mapped to a HARMLESS namespace (corrosion.unclaimed.>), never to
|
||||
# real tenant subjects (corrosion.{uuid}.>) — so a fresh/stale deploy running
|
||||
# this default cannot read or forge any tenant's traffic. The REST API still
|
||||
# works; agent telemetry just won't flow until the real config is generated.
|
||||
#
|
||||
# On every real deploy, scripts/generate-nats-auth.mjs OVERWRITES this file
|
||||
# (on the host, not in git) with the privileged internal user + per-license
|
||||
# scoped users. NATS_AUTH_STAGE defaults to "enforce" (anonymous rejected).
|
||||
#
|
||||
# NOTE: no_auth_user is a TOP-LEVEL field, NOT inside authorization { }.
|
||||
authorization {
|
||||
users: [
|
||||
{ user: "anonymous", password: "", permissions: { publish: { allow: ["corrosion.unclaimed.>"] }, subscribe: { allow: ["corrosion.unclaimed.>"] } } }
|
||||
]
|
||||
}
|
||||
no_auth_user: "anonymous"
|
||||
@@ -28,8 +28,11 @@ logtime: true
|
||||
max_payload: 8MB # Support map file transfer metadata
|
||||
max_connections: 10000
|
||||
|
||||
# Authorization — tokens validated per-connection
|
||||
# Plugin and companion agents authenticate with license-specific tokens
|
||||
authorization {
|
||||
timeout: 5
|
||||
}
|
||||
# Authorization — per-license isolation.
|
||||
# The committed nats-auth.conf is the SAFE OPEN default (anonymous full access,
|
||||
# no secrets — same as before). On deploy, scripts/generate-nats-auth.mjs
|
||||
# regenerates this file from the licenses table with the privileged internal
|
||||
# user + per-license scoped users; flip NATS_AUTH_STAGE=enforce to reject
|
||||
# anonymous. The host copy carries secrets and is NOT committed
|
||||
# (git update-index --assume-unchanged docker/nats-auth.conf).
|
||||
include "nats-auth.conf"
|
||||
|
||||
100
docs/PRICING.md
100
docs/PRICING.md
@@ -1,27 +1,95 @@
|
||||
# Pricing
|
||||
|
||||
> This document mirrors the live pricing page at corrosionmgmt.com/pricing.
|
||||
|
||||
---
|
||||
|
||||
## Base License — $50 (Launch Price)
|
||||
## Hobby — $9.99/month
|
||||
|
||||
One server. Lifetime access.
|
||||
1–5 game server instances · non-commercial use only.
|
||||
|
||||
Includes:
|
||||
|
||||
* Full control plane
|
||||
* Auto-Wiper
|
||||
* Plugin management
|
||||
* Public site
|
||||
* RBAC
|
||||
|
||||
## Webstore Add-On — $10/month
|
||||
|
||||
Integrated monetization platform.
|
||||
|
||||
## Modules — $9.99+
|
||||
|
||||
Optional feature expansions.
|
||||
- Up to 5 game server instances
|
||||
- Non-commercial servers only
|
||||
- Auto-wiper with rollback
|
||||
- Plugin management (Rust uMod/Oxide)
|
||||
- File manager + real-time console
|
||||
- Scheduled tasks
|
||||
- Public server page
|
||||
- Community support
|
||||
|
||||
---
|
||||
|
||||
Simple. Transparent. No hidden tiers.
|
||||
## Community — $19.99/month
|
||||
|
||||
6–10 game server instances · non-commercial use only.
|
||||
|
||||
Includes:
|
||||
|
||||
- Up to 10 game server instances
|
||||
- Non-commercial servers only
|
||||
- Auto-wiper with rollback
|
||||
- Plugin management (Rust uMod/Oxide)
|
||||
- File manager + real-time console
|
||||
- Scheduled tasks
|
||||
- Public server page
|
||||
- Community support
|
||||
|
||||
---
|
||||
|
||||
## Operator — $99.99/month _(Most popular)_
|
||||
|
||||
Commercial use permitted, or up to 50 servers.
|
||||
|
||||
Includes:
|
||||
|
||||
- Up to 50 game server instances
|
||||
- Commercial use permitted
|
||||
- All games: Rust, Dune: Awakening, Soulmask, Conan Exiles
|
||||
- Auto-wiper with rollback
|
||||
- Plugin + mod management
|
||||
- File manager + real-time console
|
||||
- Scheduled tasks + maintenance windows
|
||||
- Player management + RBAC team access
|
||||
- Public server page + storefront
|
||||
- Community support + priority bug triage
|
||||
|
||||
---
|
||||
|
||||
## Network — Custom pricing
|
||||
|
||||
50+ servers · hosting partners and fleets. Contact support@corrosionmgmt.com for pricing.
|
||||
|
||||
Includes:
|
||||
|
||||
- 50 servers base included
|
||||
- Fleet Blocks: +$49.99/mo per additional 50 servers
|
||||
- Commercial use permitted
|
||||
- All games + multi-game hosts
|
||||
- Full Operator feature set
|
||||
- Fleet-level management
|
||||
- Priority bug triage for platform issues
|
||||
- Community support
|
||||
|
||||
---
|
||||
|
||||
## Fleet Block Add-On — +$49.99/month per 50 servers
|
||||
|
||||
Stack as many Fleet Blocks as your Network plan operation requires.
|
||||
|
||||
---
|
||||
|
||||
## Direct 1:1 Support — $125/hour (prepaid 1-hour blocks)
|
||||
|
||||
Available to any customer. Billed time with a human — not a support tier. Community support (docs, forum, diagnostics, structured bug reports) is included with every plan at no extra charge.
|
||||
|
||||
---
|
||||
|
||||
## Commercial Use Definition
|
||||
|
||||
Commercial use includes monetized communities, paid access, VIP slots, donations, sponsorship-supported servers, hosting providers, or managing servers for others. Hobby and Community plans are non-commercial only. Operator and Network plans permit commercial use.
|
||||
|
||||
---
|
||||
|
||||
Simple. Transparent. No per-seat charges. No hidden tiers.
|
||||
|
||||
69
docs/reference-repos/README.md
Normal file
69
docs/reference-repos/README.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# Reference Repos
|
||||
|
||||
Third-party Dune: Awakening server-management projects, kept here as **behavior
|
||||
references** for Phase 2 (the Corrosion host-agent Dune adapter + future panel
|
||||
Dune features). These are NOT Corrosion code and are not built or shipped — they
|
||||
are read-only references. `.git` histories, `node_modules`, and compiled
|
||||
binaries were stripped on import (the 38 MB `icehunter/web/dune-admin` build
|
||||
artifact and a Tauri `.icns` are intentionally absent).
|
||||
|
||||
> Imported 2026-06-12 from `/tmp/dune-re`. Each was a separate upstream repo;
|
||||
> see each project's own `LICENSE` and `README.md`. Treat as documentation.
|
||||
|
||||
## Why these are here
|
||||
|
||||
Dune: Awakening does **not** use SteamCMD or a plain game-server process like
|
||||
Rust/Conan/Soulmask. It ships as **Docker container(s)** fronted by a **RabbitMQ
|
||||
broker** (admin + game vhosts) and a **PostgreSQL** admin database (`dune`
|
||||
schema), orchestrated as a "**battlegroup**". The game process is
|
||||
`DuneSandboxServer-Linux-Shipping` (one per partition). Server settings live in
|
||||
INI files (`UserEngine.ini` / `UserGame.ini`) and only take effect after a
|
||||
restart. Our Dune adapter must model that container/broker/DB world instead of
|
||||
the process+SteamCMD model — these repos are how that world actually works in
|
||||
the wild.
|
||||
|
||||
## The references
|
||||
|
||||
### `icehunter/` — `dune-admin` (Go backend + React SPA)
|
||||
The richest ops reference. A web admin panel with **four interchangeable control
|
||||
planes**: `docker`, `kubectl`, `local`, and `amp` (CubeCoders AMP / podman).
|
||||
Most relevant to us:
|
||||
- **`SETUP_DOCKER.md`** — the Docker control plane: `docker start/stop/restart`
|
||||
for lifecycle, `docker logs -f` for streaming, `docker exec` into the broker
|
||||
container for RabbitMQ (`rabbitmqctl`) commands, direct TCP to the `dune`
|
||||
Postgres. Optional SSH tunnelling when the admin is off-host. **This is the
|
||||
closest analog to what the Corrosion host-agent Dune adapter must do.**
|
||||
- `cmd/dune-admin/control_docker.go` / `control_kubectl.go` / `control_local.go`
|
||||
/ `control_amp.go` — the `ControlPlane` interface and its implementations
|
||||
(the start/stop/restart/status/log/broker abstraction we mirror as a Rust
|
||||
game-adapter trait).
|
||||
- `db.go` / `model.go` — the full Dune admin data model (players, bases,
|
||||
inventory, exchange/market) for when Corrosion grows a richer Dune admin
|
||||
surface beyond lifecycle.
|
||||
- `CLAUDE.md` — upstream's own engineering notes; the AMP section documents the
|
||||
INI-vs-API server-settings gotcha (AMP regenerates INIs on start).
|
||||
|
||||
### `adainrivers/` — Dune Dedicated Server Manager (Rust / Tauri desktop)
|
||||
**The Rust reference.** Manages already-provisioned servers over **SSH +
|
||||
Kubernetes** ("BattleGroup" start/stop/restart/update), with secure SSH tunnels
|
||||
to Director / File Browser / Postgres / PgHero, an in-game admin console (item
|
||||
grants, vehicle spawns, journey/XP tags), and a bundled **`dune-server-service`**
|
||||
daemon for scheduled maintenance (timed restarts with in-game warnings, backups,
|
||||
update apply). Closest to our stack idiomatically — read it for Rust patterns on
|
||||
SSH control, the maintenance-daemon design, and the in-game command surface.
|
||||
|
||||
### `the4rchangel/` — Dune: Awakening Server Manager (Node.js local web UI)
|
||||
**Matches the Commander's exact self-host path.** A local dashboard that
|
||||
replaces the `battlegroup.bat` terminal menu — guided VM import (Hyper-V),
|
||||
network, SSH, bootstrap, then daily ops: battlegroup start/stop/restart/update,
|
||||
character editor, visual game-config editor (PvP, sandstorms, sandworms, mining
|
||||
rates, decay, building limits), monitoring, DB access. Read it to understand the
|
||||
`battlegroup.bat` workflow our agent has to drive on a Windows/Hyper-V host.
|
||||
|
||||
## How we use them
|
||||
|
||||
- **Lifecycle/control** → mirror `icehunter`'s `ControlPlane` docker provider as
|
||||
the agent's Dune game-adapter (compose/`docker` lifecycle, `docker logs`
|
||||
console, reject SteamCMD).
|
||||
- **Rust idioms / maintenance daemon / SSH** → `adainrivers`.
|
||||
- **Battlegroup.bat reality / setup flow / game-config schema** → `the4rchangel`.
|
||||
71
docs/reference-repos/adainrivers/.github/workflows/ci.yml
vendored
Normal file
71
docs/reference-repos/adainrivers/.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
checks:
|
||||
name: Workspace checks (${{ matrix.platform }})
|
||||
runs-on: ${{ matrix.platform }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform: [windows-latest, ubuntu-22.04, macos-latest]
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Install Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
cache-dependency-path: app/package-lock.json
|
||||
|
||||
- name: Install Linux Tauri dependencies
|
||||
if: matrix.platform == 'ubuntu-22.04'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf pkg-config libssl-dev
|
||||
|
||||
- name: Install frontend dependencies
|
||||
working-directory: app
|
||||
run: npm ci
|
||||
|
||||
- name: Rust format
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
- name: Rust check
|
||||
run: cargo check --workspace
|
||||
|
||||
- name: Rust tests
|
||||
run: cargo test --workspace
|
||||
|
||||
- name: Core API docs
|
||||
run: cargo doc -p dune-manager-core --no-deps
|
||||
|
||||
- name: Frontend build
|
||||
working-directory: app
|
||||
run: npm run build
|
||||
|
||||
- name: Tauri shell check
|
||||
run: cargo check -p dune-dedicated-server-manager-app
|
||||
|
||||
- name: Secret and machine-constant scan
|
||||
if: matrix.platform == 'windows-latest'
|
||||
shell: pwsh
|
||||
run: |
|
||||
rg -n -S "I:|AutoUpdate|192\.168\.2\.|menna|dune-awakening|C:\\WINDOWS\\System32\\OpenSSH|C:\\Windows\\System32\\OpenSSH|change-me-before-exposing|c05564d|d177d3bbc40be761|qRmQx|FuncomLiveServices__ServiceAuthToken" . -g "!app/**/target/**" -g "!crates/**/target/**" -g "!target/**" -g "!app/node_modules/**" -g "!app/dist/**" -g "!*.md" -g "!app/steamcmd/**" -g "!app/dune-server/**" -g "!app/vm/**" -g "!app/vm-*/**" -g "!vm/**" -g "!.tmp/**"
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
throw "Secret or machine-specific constant scan found matches."
|
||||
}
|
||||
if ($LASTEXITCODE -ne 1) {
|
||||
exit $LASTEXITCODE
|
||||
}
|
||||
203
docs/reference-repos/adainrivers/.github/workflows/release.yml
vendored
Normal file
203
docs/reference-repos/adainrivers/.github/workflows/release.yml
vendored
Normal file
@@ -0,0 +1,203 @@
|
||||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*.*.*"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: "Version to release, for example 0.1.0"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
linux-service-binary:
|
||||
name: Build dune-server-service (musl)
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
targets: x86_64-unknown-linux-musl
|
||||
|
||||
- name: Install Zig
|
||||
uses: mlugg/setup-zig@v1
|
||||
with:
|
||||
version: 0.13.0
|
||||
|
||||
- name: Install cargo-zigbuild
|
||||
run: cargo install --locked cargo-zigbuild
|
||||
|
||||
- name: Resolve release version
|
||||
shell: bash
|
||||
env:
|
||||
WORKFLOW_VERSION: ${{ inputs.version }}
|
||||
run: |
|
||||
version="$WORKFLOW_VERSION"
|
||||
if [ -z "$version" ]; then
|
||||
version="${GITHUB_REF_NAME#v}"
|
||||
fi
|
||||
if [ -z "$version" ]; then
|
||||
echo "could not resolve release version" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "RELEASE_VERSION=$version" >> "$GITHUB_ENV"
|
||||
echo "RELEASE_TAG=v$version" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Build musl binary
|
||||
run: |
|
||||
cargo zigbuild -p dune-server-service --release --target x86_64-unknown-linux-musl
|
||||
strip target/x86_64-unknown-linux-musl/release/dune-server-service
|
||||
|
||||
- name: Stage release artifacts
|
||||
run: |
|
||||
mkdir -p release-artifacts
|
||||
cp target/x86_64-unknown-linux-musl/release/dune-server-service release-artifacts/dune-server-service
|
||||
cp crates/dune-server-service/systemd/dune-server-service.service release-artifacts/dune-server-service.service
|
||||
cp crates/dune-server-service/openrc/dune-server-service release-artifacts/dune-server-service.openrc
|
||||
|
||||
- name: Upload artifact for desktop bundle
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dune-server-service-musl
|
||||
path: release-artifacts/
|
||||
retention-days: 7
|
||||
|
||||
- name: Resolve release notes
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
shell: bash
|
||||
run: |
|
||||
notes_path="release-notes/${RELEASE_VERSION}.md"
|
||||
if [ -f "$notes_path" ]; then
|
||||
echo "RELEASE_BODY_PATH=$notes_path" >> "$GITHUB_ENV"
|
||||
else
|
||||
tmp=$(mktemp)
|
||||
printf 'Release v%s. No release-notes/%s.md was provided — see the commit log for details.\n' \
|
||||
"$RELEASE_VERSION" "$RELEASE_VERSION" > "$tmp"
|
||||
echo "RELEASE_BODY_PATH=$tmp" >> "$GITHUB_ENV"
|
||||
fi
|
||||
|
||||
- name: Attach to GitHub release
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ env.RELEASE_TAG }}
|
||||
body_path: ${{ env.RELEASE_BODY_PATH }}
|
||||
files: |
|
||||
release-artifacts/dune-server-service
|
||||
release-artifacts/dune-server-service.service
|
||||
release-artifacts/dune-server-service.openrc
|
||||
|
||||
desktop-app:
|
||||
name: Build ${{ matrix.name }} app
|
||||
needs: linux-service-binary
|
||||
runs-on: ${{ matrix.platform }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- name: Windows
|
||||
platform: windows-latest
|
||||
args: --bundles nsis
|
||||
- name: Linux
|
||||
platform: ubuntu-22.04
|
||||
args: --bundles appimage,deb
|
||||
- name: macOS Apple Silicon
|
||||
platform: macos-latest
|
||||
args: --target aarch64-apple-darwin --bundles dmg
|
||||
- name: macOS Intel
|
||||
platform: macos-latest
|
||||
args: --target x86_64-apple-darwin --bundles dmg
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
targets: ${{ startsWith(matrix.name, 'macOS') && 'aarch64-apple-darwin,x86_64-apple-darwin' || '' }}
|
||||
|
||||
- name: Install Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
cache-dependency-path: app/package-lock.json
|
||||
|
||||
- name: Install Linux Tauri dependencies
|
||||
if: matrix.platform == 'ubuntu-22.04'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf pkg-config libssl-dev
|
||||
|
||||
- name: Install frontend dependencies
|
||||
working-directory: app
|
||||
run: npm ci
|
||||
|
||||
- name: Download bundled dune-server-service binary
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: dune-server-service-musl
|
||||
path: app/src-tauri/binaries/
|
||||
|
||||
- name: Resolve release version
|
||||
shell: pwsh
|
||||
env:
|
||||
WORKFLOW_VERSION: ${{ inputs.version }}
|
||||
run: |
|
||||
$version = $env:WORKFLOW_VERSION
|
||||
if ([string]::IsNullOrWhiteSpace($version)) {
|
||||
$version = "${{ github.ref_name }}".TrimStart("v")
|
||||
}
|
||||
if ([string]::IsNullOrWhiteSpace($version)) {
|
||||
throw "Release version could not be resolved."
|
||||
}
|
||||
"RELEASE_VERSION=$version" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||
"RELEASE_TAG=v$version" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||
|
||||
- name: Prepare release config
|
||||
shell: pwsh
|
||||
run: |
|
||||
$version = $env:RELEASE_VERSION
|
||||
|
||||
Push-Location app
|
||||
npm version --no-git-tag-version --allow-same-version $version
|
||||
Pop-Location
|
||||
|
||||
$tauriConfigPath = "app/src-tauri/tauri.conf.json"
|
||||
$config = Get-Content $tauriConfigPath -Raw
|
||||
$config = $config -replace '"version":\s*"[^"]+"', ('"version": "' + $version + '"')
|
||||
# Release builds publish signed updater artifacts; the checked-in
|
||||
# default keeps this off so local debug builds do not require
|
||||
# TAURI_SIGNING_PRIVATE_KEY.
|
||||
$config = $config -replace '"createUpdaterArtifacts":\s*false', '"createUpdaterArtifacts": true'
|
||||
Set-Content -Path $tauriConfigPath -Value $config -NoNewline
|
||||
|
||||
# The body is set by the linux-service-binary job's softprops step.
|
||||
# tauri-action only uploads desktop bundles + the signed updater
|
||||
# artifacts here; we don't pass releaseBody to avoid clobbering.
|
||||
- name: Build and publish Tauri release
|
||||
uses: tauri-apps/tauri-action@v0
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
TAURI_SIGNING_PRIVATE_KEY: ${{ secrets.TAURI_SIGNING_PRIVATE_KEY }}
|
||||
TAURI_SIGNING_PRIVATE_KEY_PASSWORD: ${{ secrets.TAURI_SIGNING_PRIVATE_KEY_PASSWORD }}
|
||||
VITE_ENABLE_STARTUP_UPDATE_CHECK: "true"
|
||||
with:
|
||||
projectPath: app
|
||||
tagName: ${{ env.RELEASE_TAG }}
|
||||
releaseName: "Dune Dedicated Server Manager ${{ env.RELEASE_TAG }}"
|
||||
releaseDraft: false
|
||||
prerelease: false
|
||||
args: ${{ matrix.args }}
|
||||
68
docs/reference-repos/adainrivers/.gitignore
vendored
Normal file
68
docs/reference-repos/adainrivers/.gitignore
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
# Dependencies
|
||||
node_modules/
|
||||
app/node_modules/
|
||||
|
||||
# Frontend build
|
||||
dist/
|
||||
app/dist/
|
||||
app/src-tauri/gen/schemas/
|
||||
|
||||
# Rust/Tauri build outputs
|
||||
target/
|
||||
src-tauri/target/
|
||||
app/src-tauri/target/
|
||||
manager-api/target/
|
||||
|
||||
# Local environment
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
|
||||
# Docs are scratch notes for now; keep README trackable later
|
||||
*.md
|
||||
!README.md
|
||||
!docs/
|
||||
!docs/*.md
|
||||
docs/rabbitmq-protocol.md
|
||||
# Release notes go on GitHub releases via the release workflow.
|
||||
!release-notes/
|
||||
!release-notes/*.md
|
||||
|
||||
# Editor and OS noise
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
Thumbs.db
|
||||
Desktop.ini
|
||||
|
||||
# Local app/runtime data and secrets
|
||||
.tmp/
|
||||
.playwright-mcp/
|
||||
app/default-config.json
|
||||
app/steamcmd/
|
||||
app/dune-server/
|
||||
dune-server/
|
||||
app/vm/
|
||||
app/vm-*/
|
||||
app/src-tauri/dune-server/
|
||||
app/src-tauri/vm/
|
||||
app/src-tauri/resources/manager-api/dune-manager-api
|
||||
app/src-tauri/resources/manager-api/dune-manager-api.exe
|
||||
vm/
|
||||
*.pem
|
||||
*.key
|
||||
sshKey
|
||||
codex_vm_ed25519_dropbear
|
||||
codex_vm_ed25519_dropbear.pub
|
||||
snapshots/
|
||||
keys/
|
||||
initial-setup-log.txt
|
||||
secrets/
|
||||
7156
docs/reference-repos/adainrivers/Cargo.lock
generated
Normal file
7156
docs/reference-repos/adainrivers/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
7
docs/reference-repos/adainrivers/Cargo.toml
Normal file
7
docs/reference-repos/adainrivers/Cargo.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
[workspace]
|
||||
members = ["crates/dune-manager-core", "crates/dune-server-service", "app/src-tauri"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.dependencies]
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
21
docs/reference-repos/adainrivers/LICENSE
Normal file
21
docs/reference-repos/adainrivers/LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 gaming.tools
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
59
docs/reference-repos/adainrivers/README.md
Normal file
59
docs/reference-repos/adainrivers/README.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Dune Dedicated Server Manager
|
||||
|
||||
A desktop manager for existing Dune Awakening dedicated servers.
|
||||
|
||||

|
||||
|
||||
The app manages already-provisioned Dune dedicated servers over SSH and
|
||||
Kubernetes control commands. It does not install the game server, create VMs,
|
||||
configure Hyper-V, provision Ubuntu, or manage external tools such as SteamCMD.
|
||||
|
||||
## Features
|
||||
|
||||
- Remote server profile management with SSH private-key authentication
|
||||
- BattleGroup status, start, stop, restart, and update controls
|
||||
- Component diagnostics, log viewing, and safe restart actions
|
||||
- Secure Director, File Browser, PostgreSQL, and PgHero access through local SSH tunnels
|
||||
- Bundled `dune-server-service` daemon for on-host scheduled maintenance (daily restarts with in-game warnings, automated backups, server update check + apply) — installed over SSH straight from the Management card
|
||||
- Admin console for in-game actions: item grants, vehicle spawns, skill/journey/XP tags, player lookup with live pawn location, and a logged history of every published command
|
||||
- Automated tasks tab with editable schedule settings (daily restart time, warning lead/frequency, update apply lead, IANA timezone) — saving auto-restarts the service so changes apply immediately
|
||||
- Welcome Package automation: a per-player onboarding chain (item grants, water refill, welcome whisper) driven by Postgres player detection, tracked in the management service's SQLite ledger, and configurable from the Welcome Package tab with both a visual editor and a raw JSON mode
|
||||
|
||||

|
||||
|
||||
More management features coming soon.
|
||||
|
||||
## Install
|
||||
|
||||
Download the latest release for your operating system from GitHub Releases.
|
||||
|
||||
- Windows: run the NSIS installer.
|
||||
- Linux: use the AppImage or Debian package.
|
||||
- macOS: use the DMG for your Mac architecture.
|
||||
|
||||
After launching the app, add an existing server profile with its host, SSH user,
|
||||
and private key path, then refresh it to detect BattleGroups and management
|
||||
endpoints.
|
||||
|
||||
## Managed Server Assumptions
|
||||
|
||||
The target server must already be installed and reachable over SSH. The app
|
||||
expects the Dune Kubernetes resources and vendor management scripts to exist on
|
||||
the server before you add it.
|
||||
|
||||
Required player-facing/server ports depend on your own server deployment. A
|
||||
typical dedicated-server deployment uses:
|
||||
|
||||
- UDP 7777-7810 for game servers
|
||||
- TCP 31982 for RMQ
|
||||
|
||||
If you found a bug or are having other issues, please create an issue here:
|
||||
https://github.com/adainrivers/dune-dedicated-server-manager/issues
|
||||
|
||||
## Building From Source
|
||||
|
||||
See [Building From Source](docs/building-from-source.md).
|
||||
|
||||
## License
|
||||
|
||||
MIT License. See [LICENSE](LICENSE).
|
||||
15
docs/reference-repos/adainrivers/app/index.html
Normal file
15
docs/reference-repos/adainrivers/app/index.html
Normal file
@@ -0,0 +1,15 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Dune Dedicated Server Manager</title>
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||
<link href="https://fonts.googleapis.com/css2?family=Funnel+Display:wght@400;500;600;700&family=Geist:wght@300;400;500;600;700&family=Geist+Mono:wght@400;500;600&display=swap" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
3897
docs/reference-repos/adainrivers/app/package-lock.json
generated
Normal file
3897
docs/reference-repos/adainrivers/app/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
32
docs/reference-repos/adainrivers/app/package.json
Normal file
32
docs/reference-repos/adainrivers/app/package.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"name": "dune-dedicated-server-manager-app",
|
||||
"private": true,
|
||||
"version": "0.3.16",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite --host 127.0.0.1 --port 1420",
|
||||
"build": "tsc && vite build",
|
||||
"preview": "vite preview --host 127.0.0.1 --port 1420",
|
||||
"tauri": "tauri"
|
||||
},
|
||||
"dependencies": {
|
||||
"@radix-ui/react-icons": "^1.3.2",
|
||||
"@radix-ui/themes": "^3.2.1",
|
||||
"@tauri-apps/api": "^2.0.0",
|
||||
"@tauri-apps/plugin-dialog": "^2.7.1",
|
||||
"@tauri-apps/plugin-process": "^2.3.1",
|
||||
"@tauri-apps/plugin-shell": "^2.3.5",
|
||||
"@tauri-apps/plugin-updater": "^2.10.1",
|
||||
"markdown-to-jsx": "^9.8.1",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tauri-apps/cli": "^2.0.0",
|
||||
"@types/react": "^18.3.12",
|
||||
"@types/react-dom": "^18.3.1",
|
||||
"@vitejs/plugin-react": "^4.3.3",
|
||||
"typescript": "^5.6.3",
|
||||
"vite": "^5.4.10"
|
||||
}
|
||||
}
|
||||
26
docs/reference-repos/adainrivers/app/src-tauri/Cargo.toml
Normal file
26
docs/reference-repos/adainrivers/app/src-tauri/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "dune-dedicated-server-manager-app"
|
||||
version = "0.2.0"
|
||||
description = "Desktop shell for Dune Dedicated Server Manager"
|
||||
authors = ["Dune Dedicated Server Manager"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "dune_dedicated_server_manager_app_lib"
|
||||
crate-type = ["staticlib", "cdylib", "rlib"]
|
||||
|
||||
[build-dependencies]
|
||||
tauri-build = { version = "2", features = [] }
|
||||
|
||||
[dependencies]
|
||||
dune-manager-core = { path = "../../crates/dune-manager-core" }
|
||||
tauri = { version = "2", features = ["devtools"] }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tauri-plugin-dialog = "2"
|
||||
tauri-plugin-updater = "2"
|
||||
tauri-plugin-process = "2"
|
||||
tauri-plugin-shell = "2"
|
||||
base64 = "0.22"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock", "std"] }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json"] }
|
||||
6
docs/reference-repos/adainrivers/app/src-tauri/binaries/.gitignore
vendored
Normal file
6
docs/reference-repos/adainrivers/app/src-tauri/binaries/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Populated by CI from the `linux-service-binary` job artifact, or locally
|
||||
# via `cargo zigbuild -p dune-server-service --release --target
|
||||
# x86_64-unknown-linux-musl` + manual copy. Not tracked.
|
||||
dune-server-service
|
||||
dune-server-service.service
|
||||
dune-server-service.openrc
|
||||
@@ -0,0 +1,23 @@
|
||||
# Bundled service binaries
|
||||
|
||||
This directory holds the Linux `dune-server-service` binary (musl-static), its
|
||||
systemd unit, and its OpenRC init script. They are populated by the
|
||||
`linux-service-binary` job in `.github/workflows/release.yml` and bundled into
|
||||
the desktop installer as Tauri resources.
|
||||
|
||||
For local debug builds the directory can be empty — the `install_management_service`
|
||||
Tauri command surfaces a friendly error when the resource is missing.
|
||||
|
||||
For a local end-to-end test, build the service yourself:
|
||||
|
||||
```powershell
|
||||
rustup target add x86_64-unknown-linux-musl
|
||||
cargo install --locked cargo-zigbuild
|
||||
cargo zigbuild -p dune-server-service --release --target x86_64-unknown-linux-musl
|
||||
Copy-Item target\x86_64-unknown-linux-musl\release\dune-server-service `
|
||||
app\src-tauri\binaries\dune-server-service
|
||||
Copy-Item crates\dune-server-service\systemd\dune-server-service.service `
|
||||
app\src-tauri\binaries\dune-server-service.service
|
||||
Copy-Item crates\dune-server-service\openrc\dune-server-service `
|
||||
app\src-tauri\binaries\dune-server-service.openrc
|
||||
```
|
||||
67
docs/reference-repos/adainrivers/app/src-tauri/build.rs
Normal file
67
docs/reference-repos/adainrivers/app/src-tauri/build.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
fn main() {
|
||||
expose_dune_server_service_version();
|
||||
rerun_if_bundled_binaries_change();
|
||||
tauri_build::build();
|
||||
}
|
||||
|
||||
/// Tauri's resource-copy step only fires when Cargo decides build.rs needs to
|
||||
/// re-run, which by default doesn't watch arbitrary files. Without these
|
||||
/// `rerun-if-changed` lines, refreshing the bundled `dune-server-service`
|
||||
/// binary or its systemd/openrc units in `binaries/` after a previous build
|
||||
/// produces a stale `target/release/binaries/` copy — the running exe then
|
||||
/// pushes the OLD binary on Install/Update, with no visible signal.
|
||||
fn rerun_if_bundled_binaries_change() {
|
||||
let dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("binaries");
|
||||
// Watch the directory itself so file additions/deletions also trigger a rerun.
|
||||
println!("cargo:rerun-if-changed={}", dir.display());
|
||||
if let Ok(entries) = std::fs::read_dir(&dir) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
// Skip README, .gitignore, and similar bookkeeping files.
|
||||
if matches!(
|
||||
path.file_name().and_then(|n| n.to_str()),
|
||||
Some("README.md") | Some(".gitignore")
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
println!("cargo:rerun-if-changed={}", path.display());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn expose_dune_server_service_version() {
|
||||
let cargo_toml = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("../../crates/dune-server-service/Cargo.toml");
|
||||
println!("cargo:rerun-if-changed={}", cargo_toml.display());
|
||||
let contents = std::fs::read_to_string(&cargo_toml)
|
||||
.unwrap_or_else(|err| panic!("reading {}: {err}", cargo_toml.display()));
|
||||
let version = parse_package_version(&contents).unwrap_or_else(|| {
|
||||
panic!(
|
||||
"could not find [package].version in {}",
|
||||
cargo_toml.display()
|
||||
)
|
||||
});
|
||||
println!("cargo:rustc-env=DUNE_SERVER_SERVICE_VERSION={version}");
|
||||
}
|
||||
|
||||
fn parse_package_version(toml: &str) -> Option<String> {
|
||||
let mut in_package = false;
|
||||
for line in toml.lines() {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.starts_with('[') {
|
||||
in_package = trimmed == "[package]";
|
||||
continue;
|
||||
}
|
||||
if !in_package {
|
||||
continue;
|
||||
}
|
||||
if let Some(rest) = trimmed.strip_prefix("version") {
|
||||
let rest = rest.trim_start();
|
||||
let rest = rest.strip_prefix('=')?.trim_start();
|
||||
let rest = rest.trim_start_matches('"');
|
||||
let end = rest.find('"')?;
|
||||
return Some(rest[..end].to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"$schema": "../gen/schemas/desktop-schema.json",
|
||||
"identifier": "default",
|
||||
"description": "Default desktop app permissions",
|
||||
"windows": ["main"],
|
||||
"permissions": ["core:default", "dialog:allow-open", "process:default", "shell:allow-open", "updater:default"]
|
||||
}
|
||||
BIN
docs/reference-repos/adainrivers/app/src-tauri/icons/128x128.png
Normal file
BIN
docs/reference-repos/adainrivers/app/src-tauri/icons/128x128.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 24 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 65 KiB |
BIN
docs/reference-repos/adainrivers/app/src-tauri/icons/32x32.png
Normal file
BIN
docs/reference-repos/adainrivers/app/src-tauri/icons/32x32.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.4 KiB |
BIN
docs/reference-repos/adainrivers/app/src-tauri/icons/64x64.png
Normal file
BIN
docs/reference-repos/adainrivers/app/src-tauri/icons/64x64.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 7.3 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user