Compare commits
19 Commits
agent-v2.0
...
agent-v2.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00cff51ce5 | ||
|
|
7a07d600e7 | ||
|
|
4a4ae7a5d4 | ||
|
|
930f655bf5 | ||
|
|
700dc2254d | ||
|
|
7fdca2cd4f | ||
|
|
18f978dde1 | ||
|
|
9e5e828c8d | ||
|
|
fccd5c61c5 | ||
|
|
c72a280361 | ||
|
|
a3b4b5cc7d | ||
|
|
4e184ca571 | ||
|
|
fde0926d52 | ||
|
|
4d99c9d99d | ||
|
|
b8f0ccba3c | ||
|
|
068a476f39 | ||
|
|
f706c3c47e | ||
|
|
4c9c322c29 | ||
|
|
47fa72763c |
@@ -42,3 +42,6 @@ FRONTEND_URL=http://localhost:5174
|
||||
|
||||
# Frontend (Vite — must be prefixed with VITE_)
|
||||
VITE_PANEL_URL=https://panel.corrosionmgmt.com
|
||||
|
||||
# Hostnames that serve the marketing site (comma-separated); all other hosts get the panel
|
||||
VITE_MARKETING_HOSTS=corrosionmgmt.com,www.corrosionmgmt.com
|
||||
|
||||
122
.gitea/workflows/ci.yml
Normal file
122
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,122 @@
|
||||
name: CI
|
||||
|
||||
# Test gate for every push to main. The deploy story: main must be green here
|
||||
# before the stack is rebuilt (deploy workflow enforces it once SSH transport
|
||||
# secrets land). Jobs run in the runner's bare node:20-bullseye container —
|
||||
# toolchains bootstrap per-run.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
backend-types:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Type-check NestJS backend
|
||||
run: |
|
||||
cd backend-nest
|
||||
npm ci --no-audit --no-fund 2>&1 | tail -2
|
||||
npx tsc --noEmit
|
||||
|
||||
frontend-build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build frontend (vue-tsc gate + vite)
|
||||
run: |
|
||||
cd frontend
|
||||
npm ci --no-audit --no-fund 2>&1 | tail -2
|
||||
npm run build
|
||||
|
||||
agent-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Cache cargo
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
corrosion-host-agent/target
|
||||
key: cargo-${{ hashFiles('corrosion-host-agent/Cargo.lock') }}
|
||||
- name: Install Rust
|
||||
run: |
|
||||
apt-get update -qq && apt-get install -y -qq build-essential curl
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
- name: Test agent
|
||||
run: |
|
||||
cd corrosion-host-agent
|
||||
cargo test
|
||||
- name: Upload agent binary for integration
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: agent-debug
|
||||
path: corrosion-host-agent/target/debug/corrosion-host-agent
|
||||
|
||||
integration:
|
||||
runs-on: ubuntu-latest
|
||||
needs: agent-tests
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16
|
||||
env:
|
||||
POSTGRES_USER: corrosion
|
||||
POSTGRES_PASSWORD: citest
|
||||
POSTGRES_DB: corrosion
|
||||
nats:
|
||||
image: nats:2.10-alpine
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download agent binary
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: agent-debug
|
||||
path: agent-bin
|
||||
|
||||
- name: Apply migrations to fresh DB
|
||||
run: |
|
||||
apt-get update -qq && apt-get install -y -qq postgresql-client
|
||||
until PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -c 'SELECT 1' >/dev/null 2>&1; do sleep 1; done
|
||||
for f in $(ls backend/migrations/*.sql | sort); do
|
||||
echo "applying $f"
|
||||
PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -v ON_ERROR_STOP=1 -q -f "$f"
|
||||
done
|
||||
|
||||
- name: Build + boot backend
|
||||
run: |
|
||||
cd backend-nest
|
||||
npm ci --no-audit --no-fund 2>&1 | tail -2
|
||||
npm run build
|
||||
DATABASE_URL=postgres://corrosion:citest@postgres:5432/corrosion \
|
||||
NATS_URL=nats://nats:4222 \
|
||||
JWT_SECRET=ci-secret ENCRYPTION_KEY=ci-encryption-key \
|
||||
ADMIN_EMAIL=ci@corrosion.test ADMIN_PASSWORD=ci-password-123 ADMIN_USERNAME=CI \
|
||||
nohup node dist/main.js > /tmp/backend.log 2>&1 &
|
||||
for i in $(seq 1 30); do
|
||||
code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/api/auth/login -X POST -H 'Content-Type: application/json' -d '{}' || true)
|
||||
[ "$code" = "400" ] && echo "backend up" && exit 0
|
||||
sleep 2
|
||||
done
|
||||
echo "backend failed to come up"; cat /tmp/backend.log; exit 1
|
||||
|
||||
- name: Run agent↔backend contract suite
|
||||
run: |
|
||||
chmod +x agent-bin/corrosion-host-agent
|
||||
LICENSE_ID=$(PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -t -A -c 'SELECT id FROM licenses LIMIT 1')
|
||||
echo "license under test: $LICENSE_ID"
|
||||
[ -n "$LICENSE_ID" ] || { echo "admin seed did not create a license"; cat /tmp/backend.log; exit 1; }
|
||||
LICENSE_ID="$LICENSE_ID" \
|
||||
DATABASE_URL=postgres://corrosion:citest@postgres:5432/corrosion \
|
||||
NATS_URL=nats://nats:4222 \
|
||||
AGENT_BIN=$PWD/agent-bin/corrosion-host-agent \
|
||||
node contract-tests/agent-backend.contract.mjs
|
||||
|
||||
- name: Backend log on failure
|
||||
if: failure()
|
||||
run: cat /tmp/backend.log || true
|
||||
@@ -1,5 +1,6 @@
|
||||
name: Test Asgard Runner
|
||||
on: [push]
|
||||
# On-demand only — no reason to spin a container on every push.
|
||||
on: [workflow_dispatch]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
@@ -17,8 +18,15 @@ jobs:
|
||||
echo "Memory: $(free -h | grep Mem | awk '{print $2}')"
|
||||
echo "Disk: $(df -h / | tail -1 | awk '{print $4}')"
|
||||
echo "==========================================="
|
||||
echo "Go: $(go version)"
|
||||
echo "Rust: $(rustc --version)"
|
||||
echo "Docker: $(docker --version)"
|
||||
# Jobs run in a bare node:20-bullseye container: toolchains are NOT
|
||||
# preinstalled — workflows must bootstrap them (setup-go, rustup).
|
||||
# Report presence honestly instead of green-lighting a missing tool.
|
||||
for tool in go rustc docker node; do
|
||||
if command -v "$tool" >/dev/null 2>&1; then
|
||||
echo "$tool: $($tool --version 2>&1 | head -1)"
|
||||
else
|
||||
echo "$tool: NOT PRESENT (workflows must install per-run)"
|
||||
fi
|
||||
done
|
||||
echo "==========================================="
|
||||
echo "✅ Asgard runner is OPERATIONAL"
|
||||
echo "✅ Asgard runner reachable — container is node:20-bullseye, bootstrap toolchains per-run"
|
||||
|
||||
13
CHANGELOG.md
13
CHANGELOG.md
@@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file.
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added (Host-Agent v2 Consumer + SEO Meta — 2026-06-11)
|
||||
|
||||
**Backend (NestJS):**
|
||||
- `HostAgentConsumerService` (new) — consumes wire protocol v2: `corrosion.*.host.heartbeat` updates `companion_last_seen` + `connection_status='connected'` (auto-registers the connection row on first contact); `host.going_offline` flips offline; a 60s staleness sweep marks hosts offline after 180s of silence. Previously NOTHING persisted heartbeats — `connection_status` was set once at setup and never changed again. Tenant-validated (UUID + license existence, cached) per NATS-consumer doctrine
|
||||
- `NatsBridgeService` — bridges `host_heartbeat` / `host_going_offline` events to the panel WebSocket
|
||||
- Verified by contract test: real agent → production NATS → captured with the backend's own `nats` lib under the real license; subjects, schema 2, real telemetry, offline beacon all confirmed
|
||||
|
||||
**Frontend:**
|
||||
- Per-route document titles + meta descriptions (router `afterEach`, no new deps): six marketing pages get real titles/descriptions/OG tags (previously every page was "Corrosion Management" with zero meta — invisible to search and link previews); panel views get mechanical "{View} — Corrosion" titles
|
||||
|
||||
**CI:**
|
||||
- `test-runner.yml` — honest per-tool presence checks (was printing "OPERATIONAL" while every toolchain probe failed); on-demand trigger instead of every push
|
||||
|
||||
### Added (Corrosion Host Agent — Rust rewrite Phase 0 — 2026-06-11)
|
||||
|
||||
**New: `corrosion-host-agent/`** — Rust rewrite of the Go companion agent (which stays in-tree as the behavior reference until parity). Wire protocol v2 (COA-B, Commander-approved): instance-scoped subjects `corrosion.{license}.{instance}.*` with host-level `corrosion.{license}.host.*` — full spec in `corrosion-host-agent/PROTOCOL.md`.
|
||||
|
||||
30
CLAUDE.md
30
CLAUDE.md
@@ -55,7 +55,12 @@ frontend/ # Vue 3 + TypeScript
|
||||
package.json
|
||||
vite.config.ts # Proxies /api to :3000
|
||||
|
||||
companion-agent/ # Go binary for bare metal servers
|
||||
corrosion-host-agent/ # Rust host agent (ACTIVE) — multi-game ops runtime
|
||||
src/ # main, config, bus (NATS), telemetry, prober, hostcmd
|
||||
PROTOCOL.md # Wire protocol v2 spec (instance-scoped subjects)
|
||||
agent.example.toml # Multi-instance config reference
|
||||
|
||||
companion-agent/ # Go binary (LEGACY — behavior reference until Rust parity)
|
||||
cmd/agent/ # main.go entry point
|
||||
internal/ # Core agent logic (nats, commands, process)
|
||||
Makefile # Build for Linux/Windows
|
||||
@@ -91,14 +96,16 @@ cd backend-nest && npx tsc --noEmit # Type-check without building
|
||||
|
||||
# Frontend
|
||||
cd frontend && npm run dev # Vite dev server (port 5174)
|
||||
cd frontend && npm run build # Production build → dist/
|
||||
cd frontend && npm run lint # ESLint
|
||||
cd frontend && npm run type-check # TypeScript checking (vue-tsc)
|
||||
cd frontend && npm run build # vue-tsc -b && vite build (type-check included; no separate lint/type-check scripts exist)
|
||||
|
||||
# Companion Agent (Go)
|
||||
# Host Agent (Rust — ACTIVE)
|
||||
cd corrosion-host-agent && cargo check # Fast validation
|
||||
cd corrosion-host-agent && cargo build --release --target x86_64-unknown-linux-musl # Static Linux binary
|
||||
cd corrosion-host-agent && cargo xwin build --release --target x86_64-pc-windows-msvc # Windows (local)
|
||||
# CI: push tag agent-vX.Y.Z (must match Cargo.toml version) → Asgard builds → CDN /host-agent/alpha/
|
||||
|
||||
# Companion Agent (Go — LEGACY, behavior reference until Rust parity)
|
||||
cd companion-agent && make build # Build for current platform
|
||||
cd companion-agent && make linux # Cross-compile for Linux
|
||||
cd companion-agent && make windows # Cross-compile for Windows
|
||||
|
||||
# Docker (from docker/ directory — Commander ALWAYS builds with --no-cache)
|
||||
docker compose build --no-cache && docker compose up -d # Full rebuild + start
|
||||
@@ -374,7 +381,8 @@ Default to Sonnet. Escalate to Opus when the problem demands it, not as a comfor
|
||||
- Treat every change as production deployment (`corrosionmgmt.com`)
|
||||
- Document why, not just what, in commits and CHANGELOG
|
||||
- **Always commit and push when done touching code — never ask, never wait for permission**
|
||||
- **Tag companion agent builds when Go code in `companion-agent/` is modified** — increment from latest tag (currently v1.0.3), push tag to trigger CI build + CDN upload
|
||||
- **Tag agent builds when agent code is modified** — Rust agent: `agent-vX.Y.Z` (must match `corrosion-host-agent/Cargo.toml`; CI publishes to CDN `/host-agent/alpha/`, while `/latest/` stays on the Go build until cutover). Legacy Go agent: `vX.Y.Z`. Tags roll FORWARD only — never reuse or re-push a tag; cut the next version
|
||||
- **The Asgard CI runner executes jobs in a bare `node:20-bullseye` container** — no Rust/Go/Docker/sudo preinstalled; workflows must bootstrap toolchains per-run (setup-go, rustup via curl)
|
||||
|
||||
## Development Notes
|
||||
|
||||
@@ -435,3 +443,9 @@ Things I discovered about myself building a sister platform across multiple sess
|
||||
22. **Build-green is not render-correct — visually verify UI work before calling it done.** The entire design-system re-skin (50+ files, six green commits) rendered almost completely unstyled in the browser — white background, no surfaces, no accent — because the design tokens never loaded. `vue-tsc -b` + `vite build` passed clean the whole time; CSS that *compiles* can still apply *zero* styles. One Playwright screenshot of the login exposed it in seconds. When the deliverable is visual, a green build is necessary but not sufficient: load it in a real browser (Playwright on the dev server at :5174), screenshot it, and assert on `getComputedStyle` — don't trust compilation alone. This is Lesson 17 with teeth.
|
||||
|
||||
23. **Tailwind v4 silently drops a nested `@import` barrel placed after `@import "tailwindcss"`.** `style.css` did `@import "tailwindcss"; @import "./styles/corrosion.css";` where corrosion.css was a barrel of eight `@import` token files. Once Tailwind v4 expands the tailwindcss import in place, the barrel's inner @imports no longer precede all statements, so PostCSS drops them — emitting only an easily-ignored "@import must precede all other statements" warning. Result: every design token resolved empty and the whole panel rendered unstyled. Import token/design CSS files **directly and contiguously** in the entry stylesheet; never via a nested barrel after the Tailwind import. The build warning you wave off as "pre-existing" may be the entire feature silently failing.
|
||||
|
||||
24. **`onModuleInit` runs before async `onModuleInit` of dependencies completes — register NATS/external subscriptions in `onApplicationBootstrap`.** `NatsService.onModuleInit` connects to NATS (async); `NatsBridgeService`/`HostAgentConsumerService` registered their subscriptions in their own `onModuleInit`, which fired while the connection was still null — so every `subscribe()` hit the `[OFFLINE]` no-op path and the WS bridge was dead-on-boot in *every* production build, silently. Nest guarantees `onApplicationBootstrap` runs only after all module init (including the awaited connect) finishes. Anything that depends on another provider's async startup belongs in bootstrap, not init. The tell: a subscription that "should be there" but the handler never fires and there's no error — trace the *startup ordering*, not the handler.
|
||||
|
||||
25. **Fixing a dead code path detonates the live code behind it — budget for the second bug.** The moment Lesson 24's fix made the NATS→WS bridge actually deliver events, the API crashed on the first forwarded heartbeat: `WebSocket.OPEN` was `undefined` at runtime because `esModuleInterop` is off, so `import WebSocket from 'ws'` compiled to `ws_1.default` (undefined). That crash had sat behind the dead bridge since the gateway was written — never hit because no event ever reached it. When you resurrect a path that was silently no-op, everything downstream of it is effectively *untested code running for the first time in production*. Verify the whole chain end-to-end (I watched the DB row appear, then flip offline), don't stop at "the subscription fires now." This is Lesson 10 with a fuse on it. Import-runtime gotcha worth remembering: when `esModuleInterop` is off, prefer instance constants (`client.OPEN`) over class statics (`WebSocket.OPEN`) for `ws`.
|
||||
|
||||
26. **A jail check at the entry point does not jail the recursive walk behind it — and my own "line-by-line" review missed it; the automated security review didn't.** The file manager's `jail()` correctly canonicalized and prefix-checked the top-level path, and I traced every escape vector through it and signed off. But `copy_recursive` then walked the directory tree with `fs::metadata` (which *follows* symlinks). A symlink planted inside the jail pointing at `/etc`, then a `copy` of its parent, would dereference it and pull external content *into* the jail to be read — a jail escape the entry check never sees, because the escape is reintroduced by a descendant during traversal. Fix: `symlink_metadata` (lstat) everywhere you recurse, and refuse/never-follow symlinks across the boundary. The transferable rule: **validate at the boundary AND at every step that re-derives a path** (recursion, `read_dir`, glob, archive extraction). And the humbling part — I was confident after reviewing the jail function; the security-review pass caught the HIGH I'd waved through. Trust adversarial verification over your own once-over on security-critical code, especially path/traversal logic.
|
||||
|
||||
@@ -45,10 +45,16 @@ import { BetterChatModule } from './modules/betterchat/betterchat.module';
|
||||
import { TimedExecuteModule } from './modules/timedexecute/timedexecute.module';
|
||||
import { RaidableBasesModule } from './modules/raidablebases/raidablebases.module';
|
||||
import { EarlyAccessModule } from './modules/early-access/early-access.module';
|
||||
import { FleetModule } from './modules/fleet/fleet.module';
|
||||
|
||||
// Shared Services
|
||||
import { NatsService } from './services/nats.service';
|
||||
import { NatsBridgeService } from './services/nats-bridge.service';
|
||||
import { HostAgentConsumerService } from './services/host-agent-consumer.service';
|
||||
import { ServerConnection } from './entities/server-connection.entity';
|
||||
import { License } from './entities/license.entity';
|
||||
import { AgentHost } from './entities/agent-host.entity';
|
||||
import { GameInstance } from './entities/game-instance.entity';
|
||||
import { SteamService } from './services/steam.service';
|
||||
|
||||
// Gateway
|
||||
@@ -91,6 +97,9 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
|
||||
// Scheduler
|
||||
ScheduleModule.forRoot(),
|
||||
|
||||
// Repositories for app-level shared services (host-agent consumer)
|
||||
TypeOrmModule.forFeature([ServerConnection, License, AgentHost, GameInstance]),
|
||||
|
||||
// Feature Modules
|
||||
AuthModule,
|
||||
UsersModule,
|
||||
@@ -125,6 +134,7 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
|
||||
TimedExecuteModule,
|
||||
RaidableBasesModule,
|
||||
EarlyAccessModule,
|
||||
FleetModule,
|
||||
],
|
||||
providers: [
|
||||
// Global guards (order matters: auth first, then license, then permissions)
|
||||
@@ -134,6 +144,7 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
|
||||
// Shared services
|
||||
NatsService,
|
||||
NatsBridgeService,
|
||||
HostAgentConsumerService,
|
||||
SteamService,
|
||||
|
||||
// WebSocket gateway
|
||||
|
||||
@@ -6,6 +6,13 @@ export default () => ({
|
||||
},
|
||||
nats: {
|
||||
url: process.env.NATS_URL || 'nats://localhost:4222',
|
||||
// Privileged internal credentials for the backend's own NATS connection
|
||||
// (full corrosion.> access). Empty = anonymous (transition period).
|
||||
internalUser: process.env.NATS_INTERNAL_USER || '',
|
||||
internalPassword: process.env.NATS_INTERNAL_PASSWORD || '',
|
||||
// Secret used to derive a per-license agent password:
|
||||
// HMAC-SHA256(license_id, secret). Shared with the nats.conf generator.
|
||||
tokenSecret: process.env.NATS_TOKEN_SECRET || '',
|
||||
},
|
||||
jwt: {
|
||||
secret: process.env.JWT_SECRET || 'change-me',
|
||||
|
||||
74
backend-nest/src/entities/agent-host.entity.ts
Normal file
74
backend-nest/src/entities/agent-host.entity.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Check, Unique } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
|
||||
export interface AgentHostDisk {
|
||||
mount: string;
|
||||
total_mb: number;
|
||||
free_mb: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* One Corrosion host agent / one machine. Owns the machine-level facts.
|
||||
*
|
||||
* NOTE: distinct from the B2B `hosts` table (hosting-partner companies). This
|
||||
* is `agent_hosts` — the physical/virtual box a customer runs the agent on.
|
||||
*/
|
||||
@Entity('agent_hosts')
|
||||
@Unique(['license_id', 'hostname'])
|
||||
@Check(`"status" IN ('connected', 'degraded', 'offline')`)
|
||||
export class AgentHost {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, default: '' })
|
||||
hostname: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 64, nullable: true })
|
||||
agent_version: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 64, nullable: true })
|
||||
agent_commit: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, nullable: true })
|
||||
os: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, nullable: true })
|
||||
arch: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 20, default: 'offline' })
|
||||
status: string;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
last_heartbeat_at: Date | null;
|
||||
|
||||
@Column({ type: 'double precision', nullable: true })
|
||||
cpu_percent: number | null;
|
||||
|
||||
@Column({ type: 'integer', nullable: true })
|
||||
cpu_cores: number | null;
|
||||
|
||||
@Column({ type: 'bigint', nullable: true })
|
||||
mem_total_mb: number | null;
|
||||
|
||||
@Column({ type: 'bigint', nullable: true })
|
||||
mem_used_mb: number | null;
|
||||
|
||||
@Column({ type: 'bigint', nullable: true })
|
||||
uptime_seconds: number | null;
|
||||
|
||||
@Column({ type: 'jsonb', nullable: true })
|
||||
disks: AgentHostDisk[] | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
updated_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
}
|
||||
59
backend-nest/src/entities/game-instance.entity.ts
Normal file
59
backend-nest/src/entities/game-instance.entity.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Unique } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
import { AgentHost } from './agent-host.entity';
|
||||
|
||||
/**
|
||||
* One game server process / orchestrated unit (a Rust server, a Conan world,
|
||||
* a Dune battlegroup). The billing unit — plans count instances.
|
||||
* `agent_instance_id` is the agent's slug and the NATS subject segment.
|
||||
*/
|
||||
@Entity('game_instances')
|
||||
@Unique(['license_id', 'agent_instance_id'])
|
||||
export class GameInstance {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
host_id: string | null;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
cluster_id: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 64 })
|
||||
agent_instance_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32 })
|
||||
game: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, nullable: true })
|
||||
label: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, default: 'unknown' })
|
||||
state: string;
|
||||
|
||||
@Column({ type: 'text', nullable: true })
|
||||
root_path: string | null;
|
||||
|
||||
@Column({ type: 'bigint', default: 0 })
|
||||
uptime_seconds: number;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
last_seen_at: Date | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
updated_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
|
||||
@ManyToOne(() => AgentHost, { onDelete: 'SET NULL', nullable: true })
|
||||
@JoinColumn({ name: 'host_id' })
|
||||
host: AgentHost | null;
|
||||
}
|
||||
38
backend-nest/src/entities/instance-cluster.entity.ts
Normal file
38
backend-nest/src/entities/instance-cluster.entity.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
|
||||
/**
|
||||
* Optional grouping of instances for games with linked topologies:
|
||||
* Soulmask main/child clusters, Dune BattleGroup → Sietches. Reserved now;
|
||||
* cluster orchestration ships with those game adapters.
|
||||
*/
|
||||
@Entity('instance_clusters')
|
||||
export class InstanceCluster {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32 })
|
||||
game: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255 })
|
||||
name: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, nullable: true })
|
||||
topology: string | null;
|
||||
|
||||
@Column({ type: 'jsonb', nullable: true })
|
||||
config: Record<string, unknown> | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
updated_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
}
|
||||
38
backend-nest/src/entities/instance-stats.entity.ts
Normal file
38
backend-nest/src/entities/instance-stats.entity.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn } from 'typeorm';
|
||||
import { GameInstance } from './game-instance.entity';
|
||||
|
||||
/**
|
||||
* Per-instance time-series game metrics (player count, FPS, …). Populated once
|
||||
* game-level telemetry is collected via RCON/plugin — the host heartbeat
|
||||
* carries host metrics, not game metrics, so this stays empty in Phase A.
|
||||
*/
|
||||
@Entity('instance_stats')
|
||||
export class InstanceStats {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
instance_id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'integer', default: 0 })
|
||||
player_count: number;
|
||||
|
||||
@Column({ type: 'integer', default: 0 })
|
||||
max_players: number;
|
||||
|
||||
@Column({ type: 'double precision', default: 0 })
|
||||
fps: number;
|
||||
|
||||
@Column({ type: 'integer', default: 0 })
|
||||
memory_usage_mb: number;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
recorded_at: Date;
|
||||
|
||||
@ManyToOne(() => GameInstance, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'instance_id' })
|
||||
instance: GameInstance;
|
||||
}
|
||||
@@ -71,7 +71,10 @@ export class NatsBridgeGateway implements OnGatewayConnection, OnGatewayDisconne
|
||||
|
||||
// Subscribe to NATS events for this license
|
||||
const listener = (event: string, data: unknown) => {
|
||||
if (client.readyState === WebSocket.OPEN) {
|
||||
// client.OPEN (instance constant) — NOT WebSocket.OPEN: with
|
||||
// esModuleInterop off, the default `ws` import is undefined at
|
||||
// runtime, so the static crashes. The instance constant is safe.
|
||||
if (client.readyState === client.OPEN) {
|
||||
client.send(JSON.stringify({
|
||||
type: 'event',
|
||||
license_id: payload.license_id,
|
||||
|
||||
@@ -108,7 +108,9 @@ export class ConsoleGateway implements OnGatewayConnection, OnGatewayDisconnect
|
||||
|
||||
const message = JSON.stringify({ event, data });
|
||||
for (const client of clients) {
|
||||
if (client.readyState === WebSocket.OPEN) {
|
||||
// client.OPEN, not WebSocket.OPEN — esModuleInterop is off so the
|
||||
// default `ws` import is undefined at runtime (would crash on forward).
|
||||
if (client.readyState === client.OPEN) {
|
||||
client.send(message);
|
||||
}
|
||||
}
|
||||
|
||||
19
backend-nest/src/modules/fleet/fleet.controller.ts
Normal file
19
backend-nest/src/modules/fleet/fleet.controller.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import { Controller, Get } from '@nestjs/common';
|
||||
import { ApiTags, ApiBearerAuth, ApiOperation } from '@nestjs/swagger';
|
||||
import { FleetService } from './fleet.service';
|
||||
import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
|
||||
import { RequirePermission } from '../../common/decorators/require-permission.decorator';
|
||||
|
||||
@ApiTags('fleet')
|
||||
@ApiBearerAuth()
|
||||
@Controller('fleet')
|
||||
export class FleetController {
|
||||
constructor(private readonly fleetService: FleetService) {}
|
||||
|
||||
@Get()
|
||||
@RequirePermission('server.view')
|
||||
@ApiOperation({ summary: 'Get fleet overview — hosts and game instances for this license' })
|
||||
async getFleet(@CurrentTenant() licenseId: string) {
|
||||
return this.fleetService.getFleet(licenseId);
|
||||
}
|
||||
}
|
||||
14
backend-nest/src/modules/fleet/fleet.module.ts
Normal file
14
backend-nest/src/modules/fleet/fleet.module.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||
import { FleetController } from './fleet.controller';
|
||||
import { FleetService } from './fleet.service';
|
||||
import { AgentHost } from '../../entities/agent-host.entity';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
|
||||
@Module({
|
||||
imports: [TypeOrmModule.forFeature([AgentHost, GameInstance])],
|
||||
controllers: [FleetController],
|
||||
providers: [FleetService],
|
||||
exports: [FleetService],
|
||||
})
|
||||
export class FleetModule {}
|
||||
134
backend-nest/src/modules/fleet/fleet.service.ts
Normal file
134
backend-nest/src/modules/fleet/fleet.service.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { AgentHost } from '../../entities/agent-host.entity';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
|
||||
export interface FleetInstanceDto {
|
||||
id: string;
|
||||
agent_instance_id: string;
|
||||
game: string;
|
||||
label: string | null;
|
||||
state: string;
|
||||
uptime_seconds: number;
|
||||
last_seen_at: string | null;
|
||||
}
|
||||
|
||||
export interface FleetHostDto {
|
||||
id: string;
|
||||
hostname: string;
|
||||
status: string;
|
||||
agent_version: string | null;
|
||||
os: string | null;
|
||||
arch: string | null;
|
||||
cpu_percent: number | null;
|
||||
cpu_cores: number | null;
|
||||
mem_total_mb: number | null;
|
||||
mem_used_mb: number | null;
|
||||
uptime_seconds: number | null;
|
||||
disks: AgentHost['disks'];
|
||||
last_heartbeat_at: string | null;
|
||||
instances: FleetInstanceDto[];
|
||||
}
|
||||
|
||||
export interface FleetSummaryDto {
|
||||
host_count: number;
|
||||
instance_count: number;
|
||||
online_host_count: number;
|
||||
}
|
||||
|
||||
export interface FleetResponseDto {
|
||||
hosts: FleetHostDto[];
|
||||
summary: FleetSummaryDto;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class FleetService {
|
||||
constructor(
|
||||
@InjectRepository(AgentHost)
|
||||
private readonly hostRepo: Repository<AgentHost>,
|
||||
@InjectRepository(GameInstance)
|
||||
private readonly instanceRepo: Repository<GameInstance>,
|
||||
) {}
|
||||
|
||||
async getFleet(licenseId: string): Promise<FleetResponseDto> {
|
||||
const [hosts, instances] = await Promise.all([
|
||||
this.hostRepo.find({
|
||||
where: { license_id: licenseId },
|
||||
order: { hostname: 'ASC' },
|
||||
}),
|
||||
this.instanceRepo.find({
|
||||
where: { license_id: licenseId },
|
||||
order: { game: 'ASC', label: 'ASC' },
|
||||
}),
|
||||
]);
|
||||
|
||||
// Group instances by host_id. Bigint columns come back as strings from pg — coerce.
|
||||
const instancesByHost = new Map<string | null, FleetInstanceDto[]>();
|
||||
for (const inst of instances) {
|
||||
const key = inst.host_id ?? null;
|
||||
if (!instancesByHost.has(key)) {
|
||||
instancesByHost.set(key, []);
|
||||
}
|
||||
instancesByHost.get(key)!.push({
|
||||
id: inst.id,
|
||||
agent_instance_id: inst.agent_instance_id,
|
||||
game: inst.game,
|
||||
label: inst.label,
|
||||
state: inst.state,
|
||||
uptime_seconds: Number(inst.uptime_seconds),
|
||||
last_seen_at: inst.last_seen_at ? inst.last_seen_at.toISOString() : null,
|
||||
});
|
||||
}
|
||||
|
||||
const hostDtos: FleetHostDto[] = hosts.map((h) => ({
|
||||
id: h.id,
|
||||
hostname: h.hostname,
|
||||
status: h.status,
|
||||
agent_version: h.agent_version,
|
||||
os: h.os,
|
||||
arch: h.arch,
|
||||
cpu_percent: h.cpu_percent !== null && h.cpu_percent !== undefined ? Number(h.cpu_percent) : null,
|
||||
cpu_cores: h.cpu_cores !== null && h.cpu_cores !== undefined ? Number(h.cpu_cores) : null,
|
||||
mem_total_mb: h.mem_total_mb !== null && h.mem_total_mb !== undefined ? Number(h.mem_total_mb) : null,
|
||||
mem_used_mb: h.mem_used_mb !== null && h.mem_used_mb !== undefined ? Number(h.mem_used_mb) : null,
|
||||
uptime_seconds: h.uptime_seconds !== null && h.uptime_seconds !== undefined ? Number(h.uptime_seconds) : null,
|
||||
disks: h.disks,
|
||||
last_heartbeat_at: h.last_heartbeat_at ? h.last_heartbeat_at.toISOString() : null,
|
||||
instances: instancesByHost.get(h.id) ?? [],
|
||||
}));
|
||||
|
||||
// Append synthetic "unassigned" bucket only if orphaned instances exist
|
||||
const unassigned = instancesByHost.get(null) ?? [];
|
||||
if (unassigned.length > 0) {
|
||||
hostDtos.push({
|
||||
id: '__unassigned__',
|
||||
hostname: 'Unassigned',
|
||||
status: 'offline',
|
||||
agent_version: null,
|
||||
os: null,
|
||||
arch: null,
|
||||
cpu_percent: null,
|
||||
cpu_cores: null,
|
||||
mem_total_mb: null,
|
||||
mem_used_mb: null,
|
||||
uptime_seconds: null,
|
||||
disks: null,
|
||||
last_heartbeat_at: null,
|
||||
instances: unassigned,
|
||||
});
|
||||
}
|
||||
|
||||
const online_host_count = hosts.filter((h) => h.status === 'connected').length;
|
||||
const instance_count = instances.length;
|
||||
|
||||
return {
|
||||
hosts: hostDtos,
|
||||
summary: {
|
||||
host_count: hosts.length,
|
||||
instance_count,
|
||||
online_host_count,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
261
backend-nest/src/services/host-agent-consumer.service.ts
Normal file
261
backend-nest/src/services/host-agent-consumer.service.ts
Normal file
@@ -0,0 +1,261 @@
|
||||
import { Injectable, Logger, OnApplicationBootstrap } from '@nestjs/common';
|
||||
import { Interval } from '@nestjs/schedule';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { NatsService } from './nats.service';
|
||||
import { ServerConnection } from '../entities/server-connection.entity';
|
||||
import { License } from '../entities/license.entity';
|
||||
import { AgentHost, AgentHostDisk } from '../entities/agent-host.entity';
|
||||
import { GameInstance } from '../entities/game-instance.entity';
|
||||
|
||||
/**
|
||||
* Consumes Corrosion wire protocol v2 host-agent subjects
|
||||
* (corrosion-host-agent/PROTOCOL.md) and keeps the fleet model truthful.
|
||||
*
|
||||
* Writes the License → Host → Instance model (hosts + game_instances) from
|
||||
* each heartbeat, AND maintains the legacy single-server `server_connections`
|
||||
* row so the current panel keeps working during the fleet UI transition.
|
||||
*
|
||||
* Host identity: until enrollment issues a stable host id, a host is keyed by
|
||||
* (license_id, hostname). One agent = one host today; the schema is already
|
||||
* multi-host-ready.
|
||||
*/
|
||||
interface HeartbeatPayload {
|
||||
schema?: number;
|
||||
timestamp?: string;
|
||||
agent?: { version?: string; commit?: string; os?: string; arch?: string };
|
||||
host?: {
|
||||
hostname?: string | null;
|
||||
cpu_percent?: number;
|
||||
cpu_cores?: number;
|
||||
mem_total_mb?: number;
|
||||
mem_used_mb?: number;
|
||||
uptime_seconds?: number;
|
||||
disks?: AgentHostDisk[];
|
||||
};
|
||||
instances?: Array<{
|
||||
id: string;
|
||||
game: string;
|
||||
label?: string | null;
|
||||
state?: string;
|
||||
uptime_seconds?: number;
|
||||
}>;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class HostAgentConsumerService implements OnApplicationBootstrap {
|
||||
private readonly logger = new Logger(HostAgentConsumerService.name);
|
||||
|
||||
private knownLicenses = new Map<string, number>();
|
||||
private warnedUnknown = new Set<string>();
|
||||
|
||||
private static readonly UUID_RE =
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
||||
private static readonly LICENSE_CACHE_TTL_MS = 5 * 60_000;
|
||||
private static readonly OFFLINE_AFTER_MS = 180_000;
|
||||
|
||||
constructor(
|
||||
private readonly nats: NatsService,
|
||||
@InjectRepository(ServerConnection)
|
||||
private readonly connectionRepository: Repository<ServerConnection>,
|
||||
@InjectRepository(License)
|
||||
private readonly licenseRepository: Repository<License>,
|
||||
@InjectRepository(AgentHost)
|
||||
private readonly hostRepository: Repository<AgentHost>,
|
||||
@InjectRepository(GameInstance)
|
||||
private readonly instanceRepository: Repository<GameInstance>,
|
||||
) {}
|
||||
|
||||
// Bootstrap, not module-init: subscriptions registered before NatsService
|
||||
// finished connecting silently no-op (see NatsBridgeService note).
|
||||
onApplicationBootstrap() {
|
||||
this.nats.subscribe('corrosion.*.host.heartbeat', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
void this.onHeartbeat(licenseId, data as HeartbeatPayload).catch((err) =>
|
||||
this.logger.error(`heartbeat handling failed for ${licenseId}: ${err.message}`, err.stack),
|
||||
);
|
||||
});
|
||||
|
||||
this.nats.subscribe('corrosion.*.host.going_offline', (_data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
void this.onGoingOffline(licenseId).catch((err) =>
|
||||
this.logger.error(`going_offline handling failed for ${licenseId}: ${err.message}`, err.stack),
|
||||
);
|
||||
});
|
||||
|
||||
this.logger.log('Host agent (protocol v2) consumer subscriptions initialized');
|
||||
}
|
||||
|
||||
private async onHeartbeat(licenseId: string, payload: HeartbeatPayload): Promise<void> {
|
||||
if (!(await this.isValidTenant(licenseId))) return;
|
||||
const now = new Date();
|
||||
|
||||
await this.updateLegacyConnection(licenseId, now);
|
||||
const host = await this.upsertHost(licenseId, payload, now);
|
||||
await this.upsertInstances(licenseId, host, payload, now);
|
||||
}
|
||||
|
||||
/** Legacy single-server row — keeps the current panel working. */
|
||||
private async updateLegacyConnection(licenseId: string, now: Date): Promise<void> {
|
||||
const existing = await this.connectionRepository.findOne({ where: { license_id: licenseId } });
|
||||
if (existing) {
|
||||
await this.connectionRepository.update(
|
||||
{ id: existing.id },
|
||||
{ companion_last_seen: now, connection_status: 'connected', updated_at: now },
|
||||
);
|
||||
} else {
|
||||
await this.connectionRepository.save(
|
||||
this.connectionRepository.create({
|
||||
license_id: licenseId,
|
||||
connection_type: 'bare_metal',
|
||||
connection_status: 'connected',
|
||||
companion_last_seen: now,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** Upsert the fleet host row, keyed by (license_id, hostname). */
|
||||
private async upsertHost(licenseId: string, payload: HeartbeatPayload, now: Date): Promise<AgentHost> {
|
||||
const hostname = payload.host?.hostname ?? '';
|
||||
const fields = {
|
||||
agent_version: payload.agent?.version ?? null,
|
||||
agent_commit: payload.agent?.commit ?? null,
|
||||
os: payload.agent?.os ?? null,
|
||||
arch: payload.agent?.arch ?? null,
|
||||
status: 'connected',
|
||||
last_heartbeat_at: now,
|
||||
cpu_percent: payload.host?.cpu_percent ?? null,
|
||||
cpu_cores: payload.host?.cpu_cores ?? null,
|
||||
mem_total_mb: payload.host?.mem_total_mb ?? null,
|
||||
mem_used_mb: payload.host?.mem_used_mb ?? null,
|
||||
uptime_seconds: payload.host?.uptime_seconds ?? null,
|
||||
disks: payload.host?.disks ?? null,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
const existing = await this.hostRepository.findOne({
|
||||
where: { license_id: licenseId, hostname },
|
||||
});
|
||||
if (existing) {
|
||||
await this.hostRepository.update({ id: existing.id }, fields);
|
||||
return { ...existing, ...fields } as AgentHost;
|
||||
}
|
||||
const created = await this.hostRepository.save(
|
||||
this.hostRepository.create({ license_id: licenseId, hostname, ...fields }),
|
||||
);
|
||||
this.logger.log(`host registered for license ${licenseId} (hostname '${hostname || 'unknown'}')`);
|
||||
return created;
|
||||
}
|
||||
|
||||
/** Upsert one game_instances row per heartbeat instance entry. */
|
||||
private async upsertInstances(
|
||||
licenseId: string,
|
||||
host: AgentHost,
|
||||
payload: HeartbeatPayload,
|
||||
now: Date,
|
||||
): Promise<void> {
|
||||
for (const inst of payload.instances ?? []) {
|
||||
if (!inst?.id || !inst?.game) continue;
|
||||
const fields = {
|
||||
host_id: host.id,
|
||||
game: inst.game,
|
||||
label: inst.label ?? null,
|
||||
state: inst.state ?? 'unknown',
|
||||
uptime_seconds: inst.uptime_seconds ?? 0,
|
||||
last_seen_at: now,
|
||||
updated_at: now,
|
||||
};
|
||||
const existing = await this.instanceRepository.findOne({
|
||||
where: { license_id: licenseId, agent_instance_id: inst.id },
|
||||
});
|
||||
if (existing) {
|
||||
await this.instanceRepository.update({ id: existing.id }, fields);
|
||||
} else {
|
||||
await this.instanceRepository.save(
|
||||
this.instanceRepository.create({
|
||||
license_id: licenseId,
|
||||
agent_instance_id: inst.id,
|
||||
...fields,
|
||||
}),
|
||||
);
|
||||
this.logger.log(`instance '${inst.id}' (${inst.game}) registered for license ${licenseId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async onGoingOffline(licenseId: string): Promise<void> {
|
||||
if (!(await this.isValidTenant(licenseId))) return;
|
||||
const now = new Date();
|
||||
await this.connectionRepository.update(
|
||||
{ license_id: licenseId },
|
||||
{ connection_status: 'offline', updated_at: now },
|
||||
);
|
||||
await this.hostRepository.update(
|
||||
{ license_id: licenseId },
|
||||
{ status: 'offline', updated_at: now },
|
||||
);
|
||||
this.logger.log(`host(s) for license ${licenseId} went offline (graceful beacon)`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Heartbeats stopping must flip the panel to offline — an agent that
|
||||
* crashes or loses network never sends the goodbye beacon. Sweeps both the
|
||||
* legacy connection and fleet hosts.
|
||||
*/
|
||||
@Interval(60_000)
|
||||
async sweepStaleConnections(): Promise<void> {
|
||||
const threshold = new Date(Date.now() - HostAgentConsumerService.OFFLINE_AFTER_MS);
|
||||
|
||||
const conn = await this.connectionRepository
|
||||
.createQueryBuilder()
|
||||
.update(ServerConnection)
|
||||
.set({ connection_status: 'offline', updated_at: () => 'NOW()' })
|
||||
.where('connection_status = :connected', { connected: 'connected' })
|
||||
.andWhere('companion_last_seen IS NOT NULL')
|
||||
.andWhere('companion_last_seen < :threshold', { threshold })
|
||||
.execute();
|
||||
|
||||
const hosts = await this.hostRepository
|
||||
.createQueryBuilder()
|
||||
.update(AgentHost)
|
||||
.set({ status: 'offline', updated_at: () => 'NOW()' })
|
||||
.where('status = :connected', { connected: 'connected' })
|
||||
.andWhere('last_heartbeat_at IS NOT NULL')
|
||||
.andWhere('last_heartbeat_at < :threshold', { threshold })
|
||||
.execute();
|
||||
|
||||
const affected = (conn.affected ?? 0) + (hosts.affected ?? 0);
|
||||
if (affected) {
|
||||
this.logger.warn(`marked ${affected} stale connection/host record(s) offline`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tenant validation: the subject segment must be a real license UUID.
|
||||
* NATS consumers must never write rows for subjects an arbitrary publisher
|
||||
* invented. Existence is cached to avoid a query per heartbeat.
|
||||
*/
|
||||
private async isValidTenant(licenseId: string): Promise<boolean> {
|
||||
if (!HostAgentConsumerService.UUID_RE.test(licenseId)) {
|
||||
this.warnUnknownOnce(licenseId, 'not a UUID');
|
||||
return false;
|
||||
}
|
||||
const cachedUntil = this.knownLicenses.get(licenseId);
|
||||
if (cachedUntil && cachedUntil > Date.now()) return true;
|
||||
|
||||
const exists = await this.licenseRepository.exist({ where: { id: licenseId } });
|
||||
if (!exists) {
|
||||
this.warnUnknownOnce(licenseId, 'no such license');
|
||||
return false;
|
||||
}
|
||||
this.knownLicenses.set(licenseId, Date.now() + HostAgentConsumerService.LICENSE_CACHE_TTL_MS);
|
||||
return true;
|
||||
}
|
||||
|
||||
private warnUnknownOnce(licenseId: string, reason: string): void {
|
||||
if (this.warnedUnknown.has(licenseId)) return;
|
||||
this.warnedUnknown.add(licenseId);
|
||||
this.logger.warn(`ignoring host-agent traffic for invalid license '${licenseId}' (${reason})`);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
export { NatsService } from './nats.service';
|
||||
export { NatsBridgeService } from './nats-bridge.service';
|
||||
export { HostAgentConsumerService } from './host-agent-consumer.service';
|
||||
export { SteamService } from './steam.service';
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
import { Injectable, OnModuleInit, Logger } from '@nestjs/common';
|
||||
import { Injectable, OnApplicationBootstrap, Logger } from '@nestjs/common';
|
||||
import { NatsService } from './nats.service';
|
||||
|
||||
@Injectable()
|
||||
export class NatsBridgeService implements OnModuleInit {
|
||||
export class NatsBridgeService implements OnApplicationBootstrap {
|
||||
private readonly logger = new Logger(NatsBridgeService.name);
|
||||
private listeners: Map<string, Set<(event: string, data: unknown) => void>> = new Map();
|
||||
|
||||
constructor(private nats: NatsService) {}
|
||||
|
||||
onModuleInit() {
|
||||
// Subscriptions MUST happen in onApplicationBootstrap, not onModuleInit:
|
||||
// provider onModuleInit order is not guaranteed, and these hooks once ran
|
||||
// before NatsService connected — every subscribe() silently no-oped and the
|
||||
// WS bridge was dead from boot. Bootstrap runs after ALL module inits
|
||||
// (including the awaited NATS connect) complete.
|
||||
onApplicationBootstrap() {
|
||||
this.nats.subscribe('corrosion.*.companion.heartbeat', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
this.emit(licenseId, 'heartbeat', data);
|
||||
@@ -44,6 +49,17 @@ export class NatsBridgeService implements OnModuleInit {
|
||||
this.emit(licenseId, 'oxide_status', data);
|
||||
});
|
||||
|
||||
// Wire protocol v2 (corrosion-host-agent) — host-level telemetry
|
||||
this.nats.subscribe('corrosion.*.host.heartbeat', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
this.emit(licenseId, 'host_heartbeat', data);
|
||||
});
|
||||
|
||||
this.nats.subscribe('corrosion.*.host.going_offline', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
this.emit(licenseId, 'host_going_offline', data);
|
||||
});
|
||||
|
||||
this.logger.log('NATS bridge subscriptions initialized');
|
||||
}
|
||||
|
||||
|
||||
@@ -13,8 +13,13 @@ export class NatsService implements OnModuleInit, OnModuleDestroy {
|
||||
async onModuleInit() {
|
||||
try {
|
||||
const url = this.config.get<string>('nats.url') || 'nats://localhost:4222';
|
||||
this.nc = await connect({ servers: url });
|
||||
this.logger.log(`Connected to NATS at ${url}`);
|
||||
const user = this.config.get<string>('nats.internalUser');
|
||||
const pass = this.config.get<string>('nats.internalPassword');
|
||||
// Authenticate with the privileged internal user when configured;
|
||||
// otherwise connect anonymously (broker hasn't enforced auth yet).
|
||||
const opts = user && pass ? { servers: url, user, pass } : { servers: url };
|
||||
this.nc = await connect(opts);
|
||||
this.logger.log(`Connected to NATS at ${url}${user ? ` as ${user}` : ' (anonymous)'}`);
|
||||
} catch (err) {
|
||||
this.logger.warn(`NATS connection failed — running in offline mode: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
102
backend/migrations/022_fleet_model.sql
Normal file
102
backend/migrations/022_fleet_model.sql
Normal file
@@ -0,0 +1,102 @@
|
||||
-- Fleet data model — License → Host → Instance (with optional Cluster)
|
||||
--
|
||||
-- ADDITIVE: existing server_connections / server_config / server_stats are
|
||||
-- left untouched so the current single-server panel keeps working. The
|
||||
-- host-agent consumer writes BOTH the legacy connection row and these fleet
|
||||
-- tables during the transition; the panel migrates to the fleet tables in a
|
||||
-- later phase.
|
||||
--
|
||||
-- Shape mirrors the host agent's wire protocol v2 heartbeat:
|
||||
-- host{} block → agent_hosts
|
||||
-- instances[] entries → game_instances
|
||||
-- Host metrics (CPU/RAM/disk) live on the HOST, not duplicated per instance.
|
||||
--
|
||||
-- Named `agent_hosts` (not `hosts`) to avoid collision with the existing B2B
|
||||
-- `hosts` table (hosting-partner companies) — different concept entirely.
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- AGENT_HOSTS — one Corrosion host agent / one machine
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS agent_hosts (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
-- Natural key until enrollment issues a stable host identity.
|
||||
hostname VARCHAR(255) NOT NULL DEFAULT '',
|
||||
agent_version VARCHAR(64),
|
||||
agent_commit VARCHAR(64),
|
||||
os VARCHAR(32),
|
||||
arch VARCHAR(32),
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'offline'
|
||||
CHECK (status IN ('connected', 'degraded', 'offline')),
|
||||
last_heartbeat_at TIMESTAMPTZ,
|
||||
cpu_percent DOUBLE PRECISION,
|
||||
cpu_cores INTEGER,
|
||||
mem_total_mb BIGINT,
|
||||
mem_used_mb BIGINT,
|
||||
uptime_seconds BIGINT,
|
||||
disks JSONB, -- [{ "mount": "/", "total_mb": n, "free_mb": n }]
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (license_id, hostname)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_agent_hosts_license ON agent_hosts(license_id);
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- INSTANCE CLUSTERS — optional grouping (Soulmask main/child, Dune battlegroup)
|
||||
-- Reserved now; cluster logic ships with those game adapters.
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS instance_clusters (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
game VARCHAR(32) NOT NULL,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
topology VARCHAR(32), -- main_client | battlegroup
|
||||
config JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_clusters_license ON instance_clusters(license_id);
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- GAME INSTANCES — one game server process / orchestrated unit.
|
||||
-- The billing unit (plans count instances).
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS game_instances (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
host_id UUID REFERENCES agent_hosts(id) ON DELETE SET NULL,
|
||||
cluster_id UUID REFERENCES instance_clusters(id) ON DELETE SET NULL,
|
||||
-- The agent's instance slug; the NATS subject segment.
|
||||
agent_instance_id VARCHAR(64) NOT NULL,
|
||||
game VARCHAR(32) NOT NULL,
|
||||
label VARCHAR(255),
|
||||
-- running | stopped | starting | stopping | crashed
|
||||
-- | configured | missing_root | unmanaged | unknown
|
||||
state VARCHAR(32) NOT NULL DEFAULT 'unknown',
|
||||
root_path TEXT,
|
||||
uptime_seconds BIGINT NOT NULL DEFAULT 0,
|
||||
last_seen_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (license_id, agent_instance_id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_instances_license ON game_instances(license_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_instances_host ON game_instances(host_id);
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- INSTANCE STATS — per-instance time series (game metrics).
|
||||
-- Populated once game-level telemetry (player count/FPS via RCON/plugin) is
|
||||
-- collected; the host heartbeat carries host metrics, not game metrics.
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS instance_stats (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
instance_id UUID NOT NULL REFERENCES game_instances(id) ON DELETE CASCADE,
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
player_count INTEGER NOT NULL DEFAULT 0,
|
||||
max_players INTEGER NOT NULL DEFAULT 0,
|
||||
fps DOUBLE PRECISION NOT NULL DEFAULT 0,
|
||||
memory_usage_mb INTEGER NOT NULL DEFAULT 0,
|
||||
recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_instance_stats_instance
|
||||
ON instance_stats(instance_id, recorded_at DESC);
|
||||
152
contract-tests/agent-backend.contract.mjs
Normal file
152
contract-tests/agent-backend.contract.mjs
Normal file
@@ -0,0 +1,152 @@
|
||||
// Full-pipeline contract test: Rust host agent → NATS → NestJS consumer → Postgres.
|
||||
//
|
||||
// Proves the wire protocol v2 chain end to end against a REAL backend and DB:
|
||||
// 1. agent heartbeat arrives with schema 2 + measured telemetry
|
||||
// 2. backend auto-registers the server_connections row and marks it connected
|
||||
// 3. instance command channel round-trips (start/status/stop) with push events
|
||||
// 4. graceful agent shutdown publishes the offline beacon and the row flips offline
|
||||
//
|
||||
// Required env:
|
||||
// LICENSE_ID — existing license uuid (CI: from the admin seed)
|
||||
// DATABASE_URL — postgres connection string for assertions
|
||||
// NATS_URL — broker both agent and backend use (default nats://localhost:4222)
|
||||
// AGENT_BIN — path to the corrosion-host-agent binary
|
||||
//
|
||||
// Uses the backend's own node_modules (nats, pg) so the client libs under test
|
||||
// are exactly what production runs.
|
||||
|
||||
import { createRequire } from 'node:module';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { writeFileSync, mkdtempSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const repoRoot = join(dirname(fileURLToPath(import.meta.url)), '..');
|
||||
const require = createRequire(join(repoRoot, 'backend-nest', 'node_modules', 'x.js'));
|
||||
const { connect, StringCodec } = require('nats');
|
||||
const { Client: PgClient } = require('pg');
|
||||
|
||||
const LICENSE = process.env.LICENSE_ID;
|
||||
const NATS_URL = process.env.NATS_URL ?? 'nats://localhost:4222';
|
||||
const DATABASE_URL = process.env.DATABASE_URL;
|
||||
const AGENT_BIN = process.env.AGENT_BIN ?? join(repoRoot, 'corrosion-host-agent', 'target', 'debug', 'corrosion-host-agent');
|
||||
|
||||
if (!LICENSE || !DATABASE_URL) {
|
||||
console.error('LICENSE_ID and DATABASE_URL are required');
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
const sc = StringCodec();
|
||||
const errs = [];
|
||||
const check = (cond, msg) => { if (!cond) errs.push(msg); };
|
||||
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
||||
|
||||
async function pollDb(pg, predicate, label, timeoutMs = 30_000) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
for (;;) {
|
||||
const { rows } = await pg.query(
|
||||
'SELECT connection_type, connection_status, companion_last_seen FROM server_connections WHERE license_id = $1',
|
||||
[LICENSE],
|
||||
);
|
||||
if (predicate(rows)) return rows;
|
||||
if (Date.now() > deadline) {
|
||||
errs.push(`${label}: timeout after ${timeoutMs}ms — rows: ${JSON.stringify(rows)}`);
|
||||
return rows;
|
||||
}
|
||||
await sleep(1000);
|
||||
}
|
||||
}
|
||||
|
||||
const main = async () => {
|
||||
const pg = new PgClient({ connectionString: DATABASE_URL });
|
||||
await pg.connect();
|
||||
const nc = await connect({ servers: NATS_URL });
|
||||
|
||||
const heartbeats = [];
|
||||
const statusEvents = [];
|
||||
(async () => { for await (const m of nc.subscribe(`corrosion.${LICENSE}.host.heartbeat`)) heartbeats.push(JSON.parse(sc.decode(m.data))); })();
|
||||
(async () => { for await (const m of nc.subscribe(`corrosion.${LICENSE}.ci-instance.status`)) statusEvents.push(JSON.parse(sc.decode(m.data))); })();
|
||||
|
||||
// --- spawn the real agent ---
|
||||
const dir = mkdtempSync(join(tmpdir(), 'cha-contract-'));
|
||||
const cfgPath = join(dir, 'agent.toml');
|
||||
writeFileSync(cfgPath, `
|
||||
[agent]
|
||||
license_id = "${LICENSE}"
|
||||
nats_url = "${NATS_URL}"
|
||||
heartbeat_seconds = 10
|
||||
log_level = "info"
|
||||
|
||||
[[instance]]
|
||||
id = "ci-instance"
|
||||
game = "rust"
|
||||
root = "/tmp"
|
||||
label = "Contract CI"
|
||||
executable = "/bin/sleep"
|
||||
args = ["300"]
|
||||
`);
|
||||
const agent = spawn(AGENT_BIN, ['--config', cfgPath], { stdio: ['ignore', 'inherit', 'inherit'] });
|
||||
const agentExited = new Promise((r) => agent.on('exit', r));
|
||||
|
||||
// --- 1. heartbeat shape + real telemetry ---
|
||||
const hbDeadline = Date.now() + 20_000;
|
||||
while (heartbeats.length === 0 && Date.now() < hbDeadline) await sleep(500);
|
||||
check(heartbeats.length > 0, 'no heartbeat within 20s');
|
||||
if (heartbeats.length) {
|
||||
const hb = heartbeats[0];
|
||||
check(hb.schema === 2, `schema != 2: ${hb.schema}`);
|
||||
check(typeof hb.host?.cpu_percent === 'number', 'missing host.cpu_percent');
|
||||
check(hb.host?.mem_total_mb > 0, 'mem_total_mb not measured');
|
||||
check(Array.isArray(hb.host?.disks) && hb.host.disks.length > 0, 'no disks reported');
|
||||
check(hb.instances?.[0]?.id === 'ci-instance', 'instance missing from heartbeat');
|
||||
check(!!hb.agent?.version && !!hb.agent?.commit, 'agent version/commit missing');
|
||||
}
|
||||
|
||||
// --- 2. backend auto-registers + connects ---
|
||||
const rows = await pollDb(pg, (r) => r.length === 1 && r[0].connection_status === 'connected', 'auto-register connected');
|
||||
if (rows.length === 1) {
|
||||
check(rows[0].connection_type === 'bare_metal', `connection_type: ${rows[0].connection_type}`);
|
||||
check(rows[0].companion_last_seen !== null, 'companion_last_seen not set');
|
||||
}
|
||||
|
||||
// --- 3. instance command channel ---
|
||||
const cmd = async (payload) =>
|
||||
JSON.parse(sc.decode((await nc.request(`corrosion.${LICENSE}.ci-instance.cmd`, sc.encode(JSON.stringify(payload)), { timeout: 8000 })).data));
|
||||
|
||||
const st0 = await cmd({ func: 'status' });
|
||||
check(st0.state?.state === 'stopped', `initial state: ${JSON.stringify(st0.state)}`);
|
||||
const start = await cmd({ func: 'start' });
|
||||
check(start.status === 'success', `start: ${JSON.stringify(start)}`);
|
||||
await sleep(1000);
|
||||
const st1 = await cmd({ func: 'status' });
|
||||
check(st1.state?.state === 'running', `post-start state: ${JSON.stringify(st1.state)}`);
|
||||
check((await cmd({ func: 'start' })).status === 'error', 'double start must error');
|
||||
check((await cmd({ func: 'bogus' })).status === 'error', 'unknown func must error');
|
||||
const stop = await cmd({ func: 'stop' });
|
||||
check(stop.status === 'success', `stop: ${JSON.stringify(stop)}`);
|
||||
await sleep(1000);
|
||||
const seq = statusEvents.map((e) => e.event?.state);
|
||||
check(seq.includes('running') && seq.includes('stopped'), `status events incomplete: ${seq.join(',')}`);
|
||||
|
||||
// --- 4. graceful shutdown → offline beacon → DB flips offline ---
|
||||
agent.kill('SIGTERM');
|
||||
await Promise.race([agentExited, sleep(8000)]);
|
||||
await pollDb(pg, (r) => r.length === 1 && r[0].connection_status === 'offline', 'beacon offline', 20_000);
|
||||
|
||||
await nc.close();
|
||||
await pg.end();
|
||||
|
||||
if (errs.length) {
|
||||
console.error('\nCONTRACT FAIL:');
|
||||
errs.forEach((e) => console.error(' -', e));
|
||||
process.exit(1);
|
||||
}
|
||||
console.log('\nCONTRACT PASS: heartbeat shape, auto-register, connected/offline lifecycle, instance command channel, push events');
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
main().catch((e) => {
|
||||
console.error('contract test crashed:', e);
|
||||
process.exit(1);
|
||||
});
|
||||
330
corrosion-host-agent/Cargo.lock
generated
330
corrosion-host-agent/Cargo.lock
generated
@@ -149,6 +149,12 @@ version = "3.20.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.11.1"
|
||||
@@ -258,18 +264,21 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
|
||||
[[package]]
|
||||
name = "corrosion-host-agent"
|
||||
version = "2.0.0-alpha.2"
|
||||
version = "2.0.0-alpha.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-nats",
|
||||
"chrono",
|
||||
"clap",
|
||||
"futures",
|
||||
"libc",
|
||||
"rand",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sysinfo",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-tungstenite",
|
||||
"tokio-util",
|
||||
"toml",
|
||||
"tracing",
|
||||
@@ -438,6 +447,12 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
|
||||
|
||||
[[package]]
|
||||
name = "fiat-crypto"
|
||||
version = "0.2.9"
|
||||
@@ -450,6 +465,12 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
||||
|
||||
[[package]]
|
||||
name = "foldhash"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.2.2"
|
||||
@@ -568,6 +589,28 @@ dependencies = [
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"r-efi",
|
||||
"wasip2",
|
||||
"wasip3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
||||
dependencies = [
|
||||
"foldhash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.17.1"
|
||||
@@ -580,6 +623,22 @@ version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6970f50e31d6fc17d3fa27329444bfa74e196cf62e95052a3f6fee181dba6425"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httparse"
|
||||
version = "1.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.65"
|
||||
@@ -686,6 +745,12 @@ dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "id-arena"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "1.1.0"
|
||||
@@ -714,7 +779,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown",
|
||||
"hashbrown 0.17.1",
|
||||
"serde",
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -746,12 +813,24 @@ version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||
|
||||
[[package]]
|
||||
name = "leb128fmt"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.186"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
|
||||
|
||||
[[package]]
|
||||
name = "litemap"
|
||||
version = "0.8.2"
|
||||
@@ -808,7 +887,7 @@ dependencies = [
|
||||
"data-encoding",
|
||||
"ed25519",
|
||||
"ed25519-dalek",
|
||||
"getrandom",
|
||||
"getrandom 0.2.17",
|
||||
"log",
|
||||
"rand",
|
||||
"signatory",
|
||||
@@ -958,6 +1037,16 @@ dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.2.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
@@ -976,6 +1065,12 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "r-efi"
|
||||
version = "6.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.6"
|
||||
@@ -1003,7 +1098,7 @@ version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1072,7 +1167,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"getrandom",
|
||||
"getrandom 0.2.17",
|
||||
"libc",
|
||||
"untrusted",
|
||||
"windows-sys 0.52.0",
|
||||
@@ -1087,6 +1182,19 @@ dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.23.40"
|
||||
@@ -1276,6 +1384,17 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.9"
|
||||
@@ -1420,6 +1539,19 @@ dependencies = [
|
||||
"windows",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
|
||||
dependencies = [
|
||||
"fastrand",
|
||||
"getrandom 0.4.2",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
@@ -1528,6 +1660,18 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-tungstenite"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"log",
|
||||
"tokio",
|
||||
"tungstenite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-util"
|
||||
version = "0.7.18"
|
||||
@@ -1654,6 +1798,24 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tungstenite"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"data-encoding",
|
||||
"http",
|
||||
"httparse",
|
||||
"log",
|
||||
"rand",
|
||||
"sha1",
|
||||
"thiserror",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.20.1"
|
||||
@@ -1666,6 +1828,12 @@ version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.9.0"
|
||||
@@ -1684,6 +1852,12 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf-8"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||
|
||||
[[package]]
|
||||
name = "utf8_iter"
|
||||
version = "1.0.4"
|
||||
@@ -1714,6 +1888,24 @@ version = "0.11.1+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
||||
|
||||
[[package]]
|
||||
name = "wasip2"
|
||||
version = "1.0.3+wasi-0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
|
||||
dependencies = [
|
||||
"wit-bindgen 0.57.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasip3"
|
||||
version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
|
||||
dependencies = [
|
||||
"wit-bindgen 0.51.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.123"
|
||||
@@ -1759,6 +1951,40 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-encoder"
|
||||
version = "0.244.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
|
||||
dependencies = [
|
||||
"leb128fmt",
|
||||
"wasmparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-metadata"
|
||||
version = "0.244.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"indexmap",
|
||||
"wasm-encoder",
|
||||
"wasmparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasmparser"
|
||||
version = "0.244.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"hashbrown 0.15.5",
|
||||
"indexmap",
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
@@ -1984,6 +2210,100 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen"
|
||||
version = "0.51.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
|
||||
dependencies = [
|
||||
"wit-bindgen-rust-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen"
|
||||
version = "0.57.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-core"
|
||||
version = "0.51.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"heck",
|
||||
"wit-parser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-rust"
|
||||
version = "0.51.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"heck",
|
||||
"indexmap",
|
||||
"prettyplease",
|
||||
"syn",
|
||||
"wasm-metadata",
|
||||
"wit-bindgen-core",
|
||||
"wit-component",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-rust-macro"
|
||||
version = "0.51.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wit-bindgen-core",
|
||||
"wit-bindgen-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-component"
|
||||
version = "0.244.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags",
|
||||
"indexmap",
|
||||
"log",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"wasm-encoder",
|
||||
"wasm-metadata",
|
||||
"wasmparser",
|
||||
"wit-parser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-parser"
|
||||
version = "0.244.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"id-arena",
|
||||
"indexmap",
|
||||
"log",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"unicode-xid",
|
||||
"wasmparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "writeable"
|
||||
version = "0.6.3"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "corrosion-host-agent"
|
||||
version = "2.0.0-alpha.2"
|
||||
version = "2.0.0-alpha.5"
|
||||
edition = "2021"
|
||||
description = "Corrosion Host Agent — multi-game ops runtime for self-hosted game servers"
|
||||
license = "UNLICENSED"
|
||||
@@ -25,6 +25,13 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||
anyhow = "1"
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
rand = "0.8"
|
||||
tokio-tungstenite = "0.24"
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
libc = "0.2"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
||||
# Size-optimized release: single static binary living next to RAM-heavy game
|
||||
# servers. Panic stays 'unwind' so a panicking task surfaces through its
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
# Corrosion Wire Protocol v2
|
||||
|
||||
Status: **Phase 0 implemented** (host heartbeat, host commands, going-offline
|
||||
beacon). Per-instance command/status subjects are reserved and specified here
|
||||
for Phase 1.
|
||||
Status: **Phase 0 + Phase 1 process control implemented** (host heartbeat,
|
||||
host commands, going-offline beacon, per-instance start/stop/restart/status
|
||||
with push state events). RCON, SteamCMD, file ops, and game adapters are
|
||||
specified but not yet implemented.
|
||||
|
||||
## Design
|
||||
|
||||
@@ -70,9 +71,10 @@ All telemetry is measured, never fabricated. Fields the agent cannot measure
|
||||
are omitted (`probe` before the first probe completes, `hostname` if
|
||||
unavailable).
|
||||
|
||||
Phase 0 instance `state` values: `configured` (root path exists),
|
||||
`missing_root`. Phase 1 adds live process states: `running`, `stopped`,
|
||||
`crashed`, `starting`, `updating`.
|
||||
Instance `state` values — process-managed (an `executable` is configured):
|
||||
`running`, `stopped`, `starting`, `stopping`, `crashed`; unmanaged
|
||||
(telemetry-only): `configured` (root exists), `missing_root`. Each instance
|
||||
also reports `uptime_seconds` (0 unless running).
|
||||
|
||||
### `corrosion.{license_id}.host.cmd` (backend → agent, request-reply)
|
||||
|
||||
@@ -92,19 +94,60 @@ Best-effort beacon (500ms budget) on graceful shutdown so the panel can flip
|
||||
the host to offline immediately instead of waiting out heartbeat staleness.
|
||||
Payload: `{}`.
|
||||
|
||||
## Instance-level subjects (Phase 1 — reserved, not yet implemented)
|
||||
## Instance-level subjects
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.cmd` (backend → agent, request-reply)
|
||||
### `corrosion.{license_id}.{instance_id}.cmd` (backend → agent, request-reply) — LIVE
|
||||
|
||||
Lifecycle and control for one game instance. Planned funcs: `start`, `stop`,
|
||||
`restart`, `status`, `rcon` (process-class games), `steam_update`,
|
||||
`oxide_install` (rust), plus game-adapter-specific commands (Dune: docker
|
||||
lifecycle, RabbitMQ bus commands, Coriolis reset).
|
||||
Lifecycle and control for one game instance.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.status` (agent → backend, publish)
|
||||
Implemented funcs: `start`, `stop` (graceful with 30s budget, then force
|
||||
kill), `restart`, `status` (returns `state` + `uptime_seconds`), and
|
||||
`rcon` — `{ "func": "rcon", "command": "<console command>" }` returns
|
||||
`{ "status": "success", "output": <server response> }`. Protocol per game:
|
||||
WebRCON (WebSocket JSON) for rust, Source RCON (Valve TCP) for
|
||||
conan/soulmask; explicit `kind` override available in the instance's
|
||||
`[instance.rcon]` config. Always targets 127.0.0.1 (agent is co-located).
|
||||
Errors reply `{ "status": "error", "message": ... }` — including start on an
|
||||
unmanaged instance, double start, missing rcon config, and unknown funcs.
|
||||
|
||||
State-change events (started/stopped/crashed) so the panel does not wait for
|
||||
the next heartbeat.
|
||||
Also implemented: `steam_update` — `{ "func": "steam_update" }` runs
|
||||
SteamCMD for the instance's game (app ids: rust 258550, conan 443030,
|
||||
soulmask 3017310/3017300; dune rejects — Docker images, no SteamCMD),
|
||||
streaming progress lines to `corrosion.{license}.{instance}.steam_status`
|
||||
and replying on completion.
|
||||
|
||||
Planned funcs: `oxide_install` (rust), plus game-adapter-specific
|
||||
commands (Dune: docker lifecycle, RabbitMQ bus commands, Coriolis reset).
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.steam_status` (agent → backend, publish) — LIVE
|
||||
|
||||
Per-line SteamCMD stdout during a `steam_update`, so the panel can show
|
||||
live update progress. Payload: `{ "timestamp", "instance_id", "line" }`.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.files.cmd` (backend → agent, request-reply) — LIVE
|
||||
|
||||
Jailed file manager, confined to the instance `root` (two-stage check:
|
||||
lexical normalize + canonicalize, defeating `../` traversal and symlink
|
||||
escape). Request `{ "op": "list|read|write|delete|rename|mkdir|mkfile|move|copy",
|
||||
"path": "rel/path", "dest"?, "content"?, "name"? }`; reply
|
||||
`{ "status": "success", "data": ... }` or `{ "status": "error", "message": ... }`.
|
||||
`read` caps at 5 MiB. Replaces the Go agent's UNJAILED legacy files API,
|
||||
which is retired and will not be ported.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.status` (agent → backend, publish) — LIVE
|
||||
|
||||
State-change events so the panel does not wait for the next heartbeat.
|
||||
Payload: `{ "timestamp", "instance_id", "event": { "state": ..., "exit_code"? } }`.
|
||||
|
||||
Semantics: **keep-latest state sync**, not a lossless transition ledger —
|
||||
near-instant transient states (e.g. `starting` when spawn succeeds
|
||||
immediately) may coalesce into the following state. Consumers should treat
|
||||
each event as "current state is now X".
|
||||
|
||||
Known Phase 1 limitation: the supervisor does not yet persist/adopt PIDs — if
|
||||
the agent itself restarts while a game server is running, the game process
|
||||
survives but reports `stopped` until restarted through the panel. PID
|
||||
adoption is queued with the service-install work.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.console` (agent → backend, publish)
|
||||
|
||||
|
||||
@@ -15,7 +15,11 @@ instance on that host — Rust, Conan Exiles, Soulmask, Dune: Awakening.
|
||||
- [x] Connectivity prober (outbound TCP, periodic + on-demand)
|
||||
- [x] Host command channel (`ping`, `probe`, `sysinfo`)
|
||||
- [x] Graceful shutdown (cancellation token, going-offline beacon, NATS flush)
|
||||
- [ ] Phase 1: process-class game adapter (spawn/RCON/SteamCMD/files) — Rust, Conan, Soulmask
|
||||
- [x] Phase 1a: process supervision — per-instance start/stop/restart/status over
|
||||
`{instance}.cmd` request-reply, push state events on `{instance}.status`,
|
||||
crash detection with exit codes, live state in heartbeats
|
||||
(integration-tested with real processes + live-NATS contract test)
|
||||
- [ ] Phase 1b: RCON trait (WebRCON rust / TCP conan+soulmask), SteamCMD, jailed file manager
|
||||
- [ ] Phase 2: Dune Docker adapter (compose lifecycle, RabbitMQ bus, Postgres admin)
|
||||
- [ ] Phase 3: signed self-update (enforced ed25519 — release gate), service install, supervisor split
|
||||
|
||||
|
||||
@@ -9,7 +9,11 @@
|
||||
[agent]
|
||||
license_id = "your-license-uuid"
|
||||
nats_url = "nats://nats.corrosionmgmt.com:4222"
|
||||
# nats_token = "set-me-or-use-CORROSION_NATS_TOKEN"
|
||||
# Per-license auth (preferred): user = license id, password = the token shown
|
||||
# on the panel Server page. The broker scopes you to corrosion.{license}.>
|
||||
# nats_user = "your-license-uuid" # defaults to license_id if omitted
|
||||
# nats_password = "set-me-or-use-CORROSION_NATS_PASSWORD"
|
||||
# nats_token = "legacy token-only auth; use nats_password instead"
|
||||
heartbeat_seconds = 60
|
||||
log_level = "info"
|
||||
|
||||
@@ -23,11 +27,38 @@ game = "rust" # rust | conan | soulmask | dune
|
||||
root = "/opt/rustserver"
|
||||
label = "Main 2x Vanilla"
|
||||
|
||||
# RCON lets the panel send console commands to the running server.
|
||||
# For rust the protocol is WebRCON (WebSocket JSON); for conan/soulmask it is
|
||||
# Source RCON (Valve TCP binary). `kind` is optional — it is inferred from
|
||||
# the game name when absent.
|
||||
#
|
||||
# The [instance.rcon] sub-table MUST immediately follow the [[instance]] entry
|
||||
# it belongs to (standard TOML array-of-tables scoping rule).
|
||||
[instance.rcon]
|
||||
port = 28016
|
||||
password = "changeme"
|
||||
# kind = "webrcon" # explicit override; omit to infer from game
|
||||
|
||||
# [[instance]]
|
||||
# id = "soulmask-main"
|
||||
# game = "soulmask"
|
||||
# root = "/opt/soulmask/main"
|
||||
# label = "Cloud Mist Forest (cluster main)"
|
||||
#
|
||||
# [instance.rcon]
|
||||
# port = 19000
|
||||
# password = "changeme"
|
||||
# # kind = "source" # inferred automatically for soulmask
|
||||
|
||||
# SteamCMD update settings — optional sub-table for any instance.
|
||||
# Absent = defaults: steamcmd binary resolved via PATH, validate = false.
|
||||
#
|
||||
# [instance.steamcmd]
|
||||
# steamcmd_path = "/opt/steamcmd/steamcmd.sh" # omit to use PATH
|
||||
# validate = true # enable file-hash check pass
|
||||
#
|
||||
# Dune instances do not use SteamCMD (Docker images); the steam_update func
|
||||
# will return a clear error if invoked on a dune instance.
|
||||
|
||||
[prober]
|
||||
interval_seconds = 300
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
//! Shared agent handle: every subsystem task holds an `Arc<Agent>`.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::config::Settings;
|
||||
use crate::process::ProcessSupervisor;
|
||||
use crate::prober::ProbeReport;
|
||||
|
||||
pub struct Agent {
|
||||
@@ -12,5 +15,8 @@ pub struct Agent {
|
||||
pub nats: async_nats::Client,
|
||||
pub started: Instant,
|
||||
pub last_probe: RwLock<Option<ProbeReport>>,
|
||||
/// One supervisor per instance (unmanaged instances included — they
|
||||
/// report `unmanaged` state and reject process commands).
|
||||
pub supervisors: HashMap<String, Arc<ProcessSupervisor>>,
|
||||
pub shutdown: CancellationToken,
|
||||
}
|
||||
|
||||
@@ -33,7 +33,15 @@ pub async fn connect(cfg: &Settings) -> Result<async_nats::Client> {
|
||||
if force_tls {
|
||||
opts = opts.require_tls(true);
|
||||
}
|
||||
if let Some(token) = &cfg.nats_token {
|
||||
|
||||
// Per-license auth: the broker maps user=license_id, password=derived
|
||||
// token to permissions scoped to corrosion.{license_id}.>. Falls back to
|
||||
// token-only or anonymous so the agent still works against a broker that
|
||||
// hasn't enforced auth yet (transition period).
|
||||
if let Some(password) = &cfg.nats_password {
|
||||
let user = cfg.nats_user.clone().unwrap_or_else(|| cfg.license_id.clone());
|
||||
opts = opts.user_and_password(user, password.clone());
|
||||
} else if let Some(token) = &cfg.nats_token {
|
||||
opts = opts.token(token.clone());
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,9 @@ use serde::Deserialize;
|
||||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::rcon::RconConfig;
|
||||
use crate::steamcmd::SteamcmdConfig;
|
||||
|
||||
/// Instance ids share the NATS subject namespace with host-level segments.
|
||||
const RESERVED_INSTANCE_IDS: &[&str] = &["host", "cmd", "files", "update", "agent"];
|
||||
|
||||
@@ -31,6 +34,12 @@ pub struct AgentSection {
|
||||
pub license_id: Option<String>,
|
||||
pub nats_url: Option<String>,
|
||||
pub nats_token: Option<String>,
|
||||
/// NATS username for per-license auth. Defaults to license_id when a
|
||||
/// password is set but no user is given.
|
||||
pub nats_user: Option<String>,
|
||||
/// NATS password (the per-license token). When set, the agent authenticates
|
||||
/// with user+password instead of a bare token.
|
||||
pub nats_password: Option<String>,
|
||||
#[serde(default = "default_heartbeat_seconds")]
|
||||
pub heartbeat_seconds: u64,
|
||||
#[serde(default = "default_log_level")]
|
||||
@@ -49,6 +58,37 @@ pub struct InstanceConfig {
|
||||
/// Optional human label shown in the panel.
|
||||
#[serde(default)]
|
||||
pub label: Option<String>,
|
||||
/// Game server executable. Relative paths resolve against `root`.
|
||||
/// Absent = unmanaged instance (telemetry only, no process control).
|
||||
#[serde(default)]
|
||||
pub executable: Option<PathBuf>,
|
||||
/// Arguments as a proper list — no shell splitting, quoted values survive.
|
||||
#[serde(default)]
|
||||
pub args: Vec<String>,
|
||||
/// Working directory for the process. Defaults to the executable's directory.
|
||||
#[serde(default)]
|
||||
pub working_dir: Option<PathBuf>,
|
||||
/// RCON connection settings for this instance. Absent = rcon unavailable.
|
||||
/// Protocol defaults to WebRcon for rust, Source for conan/soulmask.
|
||||
#[serde(default)]
|
||||
pub rcon: Option<RconConfig>,
|
||||
/// SteamCMD update settings. Absent = defaults apply (steamcmd on PATH,
|
||||
/// validate = false).
|
||||
#[serde(default)]
|
||||
pub steamcmd: Option<SteamcmdConfig>,
|
||||
}
|
||||
|
||||
impl InstanceConfig {
|
||||
/// Absolute executable path, if this instance is process-managed.
|
||||
pub fn resolved_executable(&self) -> Option<PathBuf> {
|
||||
self.executable.as_ref().map(|exe| {
|
||||
if exe.is_absolute() {
|
||||
exe.clone()
|
||||
} else {
|
||||
self.root.join(exe)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize)]
|
||||
@@ -88,6 +128,8 @@ pub struct Settings {
|
||||
pub license_id: String,
|
||||
pub nats_url: String,
|
||||
pub nats_token: Option<String>,
|
||||
pub nats_user: Option<String>,
|
||||
pub nats_password: Option<String>,
|
||||
pub heartbeat_seconds: u64,
|
||||
pub log_level: String,
|
||||
pub instances: Vec<InstanceConfig>,
|
||||
@@ -133,6 +175,16 @@ fn resolve(file: ConfigFile) -> Result<Settings> {
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_token);
|
||||
|
||||
let nats_user = std::env::var("CORROSION_NATS_USER")
|
||||
.ok()
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_user);
|
||||
|
||||
let nats_password = std::env::var("CORROSION_NATS_PASSWORD")
|
||||
.ok()
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_password);
|
||||
|
||||
validate_subject_segment("license_id", &license_id)?;
|
||||
|
||||
let mut seen: HashSet<&str> = HashSet::new();
|
||||
@@ -162,6 +214,8 @@ fn resolve(file: ConfigFile) -> Result<Settings> {
|
||||
license_id,
|
||||
nats_url,
|
||||
nats_token,
|
||||
nats_user,
|
||||
nats_password,
|
||||
heartbeat_seconds: file.agent.heartbeat_seconds,
|
||||
log_level: file.agent.log_level,
|
||||
instances: file.instances,
|
||||
|
||||
544
corrosion-host-agent/src/filemanager.rs
Normal file
544
corrosion-host-agent/src/filemanager.rs
Normal file
@@ -0,0 +1,544 @@
|
||||
//! Jailed file manager for game-server install directories.
|
||||
//!
|
||||
//! Every path operation is confined to the instance `root` — the directory
|
||||
//! declared as `root` in `[[instance]]` config. A two-stage check (lexical
|
||||
//! Clean + `std::fs::canonicalize`) prevents both `../..` traversals and
|
||||
//! symlink-based escapes: even if an attacker plants a symlink inside the root
|
||||
//! that points outside it, `canonicalize` resolves the target and the prefix
|
||||
//! check catches the escape.
|
||||
//!
|
||||
//! The NATS request/reply contract mirrors the Go companion agent's jailed file
|
||||
//! manager (see `companion-agent/internal/filemanager/`) but uses a simpler
|
||||
//! flat JSON envelope rather than the VueFinder storage-path protocol — the
|
||||
//! Rust agent is the replacement, and the panel's backend talks to whichever
|
||||
//! agent is present.
|
||||
//!
|
||||
//! Subject: `corrosion.{license}.{instance}.files.cmd`
|
||||
//! Request: `{"op":"list"|"read"|"write"|"delete"|"rename"|"mkdir"|"mkfile"|"move"|"copy",
|
||||
//! "path":"rel/path", "dest"?:"...", "content"?:"...", "name"?:"..."}`
|
||||
//! Response: `{"status":"success","data":...}` or `{"status":"error","message":"..."}`
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use chrono::{DateTime, SecondsFormat, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Maximum size for a `read` operation (5 MiB). Larger files must be
|
||||
/// transferred through a dedicated download endpoint, not the file manager.
|
||||
const MAX_READ_SIZE: u64 = 5 * 1024 * 1024;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Wire types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct FileRequest {
|
||||
pub op: String,
|
||||
/// Relative path within the instance root (the "subject" of the operation).
|
||||
#[serde(default)]
|
||||
pub path: String,
|
||||
/// Destination for `rename`, `move`, `copy` — relative to instance root.
|
||||
#[serde(default)]
|
||||
pub dest: Option<String>,
|
||||
/// Text content for `write`.
|
||||
#[serde(default)]
|
||||
pub content: Option<String>,
|
||||
/// Bare filename for `mkdir` and `mkfile`.
|
||||
#[serde(default)]
|
||||
pub name: Option<String>,
|
||||
}
|
||||
|
||||
/// A single directory entry returned by `list`.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct FileEntry {
|
||||
pub name: String,
|
||||
/// Path relative to the instance root, using forward slashes.
|
||||
pub path: String,
|
||||
pub is_dir: bool,
|
||||
/// File size in bytes. Zero for directories.
|
||||
pub size: u64,
|
||||
/// RFC 3339 modification timestamp.
|
||||
pub modified: String,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Jail helper — the security core of this module
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Resolve `rel` against `root`, then canonicalize to reject any form of
|
||||
/// escape including `../..` traversals and symlinks that point outside root.
|
||||
///
|
||||
/// For paths that do not yet exist (e.g. write targets), we canonicalize the
|
||||
/// nearest existing ancestor and then re-join the remaining components, which
|
||||
/// are lexically-clean because they went through `std::path::Path` building.
|
||||
///
|
||||
/// Returns the absolute, canonicalized path if it is within `root`.
|
||||
pub fn jail(root: &Path, rel: &str) -> anyhow::Result<PathBuf> {
|
||||
// Canonicalize root once to get a stable prefix for comparison.
|
||||
// We do this on every call rather than caching so the function stays
|
||||
// pure and testable without Agent state.
|
||||
let canon_root = fs::canonicalize(root)
|
||||
.with_context(|| format!("canonicalize instance root '{}'", root.display()))?;
|
||||
|
||||
// Build the candidate absolute path. We use Path joining so that an
|
||||
// absolute `rel` (e.g. "/etc/passwd") replaces the root entirely — we
|
||||
// detect and reject that case immediately.
|
||||
let candidate = if rel.is_empty() || rel == "." {
|
||||
root.to_path_buf()
|
||||
} else {
|
||||
let rel_path = Path::new(rel);
|
||||
if rel_path.is_absolute() {
|
||||
bail!(
|
||||
"absolute path '{}' is not allowed; supply a path relative to the instance root",
|
||||
rel
|
||||
);
|
||||
}
|
||||
root.join(rel_path)
|
||||
};
|
||||
|
||||
// Normalize lexically first (removes `..` / `.` without filesystem access).
|
||||
// This is a defence-in-depth step; the authoritative check is below.
|
||||
let lexical = normalize_lexical(&candidate);
|
||||
|
||||
// Canonicalize: resolve symlinks and `..` via the kernel.
|
||||
// For a not-yet-existing path we walk up to the nearest existing ancestor.
|
||||
let canon = canonicalize_lenient(&lexical)?;
|
||||
|
||||
// Authoritative prefix check: the resolved path must be equal to or a
|
||||
// child of the canonicalized root.
|
||||
if canon != canon_root && !canon.starts_with(&canon_root) {
|
||||
bail!(
|
||||
"path '{}' resolves to '{}' which is outside the instance root '{}'",
|
||||
rel,
|
||||
canon.display(),
|
||||
canon_root.display()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(canon)
|
||||
}
|
||||
|
||||
/// Canonicalize a path that may not fully exist yet by walking up to the
|
||||
/// nearest existing ancestor, canonicalizing it, then re-joining the remaining
|
||||
/// (lexically-clean) suffix.
|
||||
fn canonicalize_lenient(path: &Path) -> anyhow::Result<PathBuf> {
|
||||
// Fast path: path already exists.
|
||||
if let Ok(c) = fs::canonicalize(path) {
|
||||
return Ok(c);
|
||||
}
|
||||
|
||||
// Walk up until we find an ancestor that exists.
|
||||
let mut existing = path.to_path_buf();
|
||||
let mut suffix: Vec<std::ffi::OsString> = Vec::new();
|
||||
|
||||
loop {
|
||||
match fs::canonicalize(&existing) {
|
||||
Ok(canon) => {
|
||||
// Re-attach the non-existing suffix.
|
||||
let mut result = canon;
|
||||
for component in suffix.iter().rev() {
|
||||
result = result.join(component);
|
||||
}
|
||||
return Ok(result);
|
||||
}
|
||||
Err(_) => {
|
||||
let file_name = match existing.file_name() {
|
||||
Some(n) => n.to_os_string(),
|
||||
None => bail!("cannot resolve path '{}'", path.display()),
|
||||
};
|
||||
suffix.push(file_name);
|
||||
existing = match existing.parent() {
|
||||
Some(p) => p.to_path_buf(),
|
||||
None => bail!("cannot resolve path '{}'", path.display()),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexically normalize a path (remove `.` and `..` components) without
|
||||
/// touching the filesystem. This mirrors `filepath.Clean` in Go.
|
||||
fn normalize_lexical(path: &Path) -> PathBuf {
|
||||
let mut components: Vec<std::path::Component> = Vec::new();
|
||||
for component in path.components() {
|
||||
match component {
|
||||
std::path::Component::CurDir => {}
|
||||
std::path::Component::ParentDir => {
|
||||
// Only pop a normal component — we cannot pop a root prefix.
|
||||
if matches!(components.last(), Some(std::path::Component::Normal(_))) {
|
||||
components.pop();
|
||||
} else {
|
||||
components.push(component);
|
||||
}
|
||||
}
|
||||
other => components.push(other),
|
||||
}
|
||||
}
|
||||
components.iter().collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Operations
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// List the contents of a directory. Returns an entry per item, sorted
|
||||
/// (directories first, then files, both alphabetical).
|
||||
pub fn list(root: &Path, rel: &str) -> anyhow::Result<Vec<FileEntry>> {
|
||||
let abs = jail(root, rel)?;
|
||||
// Use the canonicalized root as the prefix for relative path computation so
|
||||
// that symlinked root paths (e.g. macOS /var → /private/var) don't cause
|
||||
// strip_prefix to fail and fall back to leaking the absolute path.
|
||||
let canon_root = fs::canonicalize(root)
|
||||
.with_context(|| format!("canonicalize root '{}'", root.display()))?;
|
||||
|
||||
let rd = fs::read_dir(&abs)
|
||||
.with_context(|| format!("read_dir '{}'", abs.display()))?;
|
||||
|
||||
let mut entries: Vec<FileEntry> = Vec::new();
|
||||
for item in rd {
|
||||
let item = item.with_context(|| format!("reading directory entry in '{}'", abs.display()))?;
|
||||
// symlink_metadata (lstat): report the link itself, never the target —
|
||||
// following it would leak the size/type/existence of files outside the
|
||||
// jail. A symlink lists as a zero-ish-size non-dir entry.
|
||||
let meta = fs::symlink_metadata(item.path())
|
||||
.with_context(|| format!("stat '{}'", item.path().display()))?;
|
||||
|
||||
let name = item.file_name().to_string_lossy().into_owned();
|
||||
let is_dir = meta.is_dir();
|
||||
let size = if is_dir { 0 } else { meta.len() };
|
||||
|
||||
// Build the relative path from the canonicalized root.
|
||||
let entry_abs = item.path();
|
||||
let entry_rel = entry_abs
|
||||
.strip_prefix(&canon_root)
|
||||
.unwrap_or(&entry_abs)
|
||||
.to_string_lossy()
|
||||
.replace('\\', "/");
|
||||
|
||||
let modified = meta
|
||||
.modified()
|
||||
.ok()
|
||||
.map(|t| {
|
||||
let dt: DateTime<Utc> = t.into();
|
||||
dt.to_rfc3339_opts(SecondsFormat::Secs, true)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
entries.push(FileEntry { name, path: entry_rel, is_dir, size, modified });
|
||||
}
|
||||
|
||||
// Stable sort: dirs first, then alphabetical within each group.
|
||||
entries.sort_by(|a, b| {
|
||||
b.is_dir.cmp(&a.is_dir).then_with(|| a.name.cmp(&b.name))
|
||||
});
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Read a text file. Capped at `MAX_READ_SIZE` bytes.
|
||||
pub fn read(root: &Path, rel: &str) -> anyhow::Result<String> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
let meta = fs::metadata(&abs)
|
||||
.with_context(|| format!("stat '{}'", abs.display()))?;
|
||||
|
||||
if meta.is_dir() {
|
||||
bail!("'{}' is a directory, not a file", rel);
|
||||
}
|
||||
if meta.len() > MAX_READ_SIZE {
|
||||
bail!(
|
||||
"file '{}' is {} bytes which exceeds the {} byte read limit",
|
||||
rel,
|
||||
meta.len(),
|
||||
MAX_READ_SIZE
|
||||
);
|
||||
}
|
||||
|
||||
fs::read_to_string(&abs).with_context(|| format!("read '{}'", abs.display()))
|
||||
}
|
||||
|
||||
/// Write (create or overwrite) a file. Parent directories are created as
|
||||
/// needed.
|
||||
pub fn write(root: &Path, rel: &str, content: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
if let Some(parent) = abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
fs::write(&abs, content.as_bytes())
|
||||
.with_context(|| format!("write '{}'", abs.display()))
|
||||
}
|
||||
|
||||
/// Delete a file or directory tree.
|
||||
pub fn delete(root: &Path, rel: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
let meta = fs::metadata(&abs)
|
||||
.with_context(|| format!("stat '{}'", abs.display()))?;
|
||||
|
||||
if meta.is_dir() {
|
||||
fs::remove_dir_all(&abs).with_context(|| format!("remove_dir_all '{}'", abs.display()))
|
||||
} else {
|
||||
fs::remove_file(&abs).with_context(|| format!("remove_file '{}'", abs.display()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Rename/move `rel` to a new bare name (`new_name`) within the same parent.
|
||||
/// `new_name` must not contain path separators.
|
||||
pub fn rename(root: &Path, rel: &str, new_name: &str) -> anyhow::Result<()> {
|
||||
if new_name.is_empty() || new_name == "." || new_name == ".." {
|
||||
bail!("new_name '{}' is not a valid filename", new_name);
|
||||
}
|
||||
if new_name.contains('/') || new_name.contains('\\') {
|
||||
bail!("new_name '{}' must not contain path separators", new_name);
|
||||
}
|
||||
|
||||
let src_abs = jail(root, rel)?;
|
||||
|
||||
// Construct the destination relative path by replacing the filename part
|
||||
// of `rel` with `new_name`. This keeps everything in relative-path space
|
||||
// so we never hand an absolute path to `jail`.
|
||||
let src_rel = Path::new(rel);
|
||||
let dest_rel = match src_rel.parent() {
|
||||
Some(parent) if parent != Path::new("") => {
|
||||
parent.join(new_name).to_string_lossy().replace('\\', "/")
|
||||
}
|
||||
_ => new_name.to_string(),
|
||||
};
|
||||
|
||||
let dest_abs = jail(root, &dest_rel)?;
|
||||
|
||||
fs::rename(&src_abs, &dest_abs)
|
||||
.with_context(|| format!("rename '{}' -> '{}'", src_abs.display(), dest_abs.display()))
|
||||
}
|
||||
|
||||
/// Create a directory (and any missing parents) at `rel`.
|
||||
pub fn mkdir(root: &Path, rel: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
fs::create_dir_all(&abs).with_context(|| format!("mkdir '{}'", abs.display()))
|
||||
}
|
||||
|
||||
/// Create an empty file at `rel`. Fails if it already exists.
|
||||
pub fn mkfile(root: &Path, rel: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
if let Some(parent) = abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
let _ = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&abs)
|
||||
.with_context(|| format!("mkfile '{}'", abs.display()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Move `src` to `dest` (both relative to root).
|
||||
pub fn move_path(root: &Path, src: &str, dest: &str) -> anyhow::Result<()> {
|
||||
let src_abs = jail(root, src)?;
|
||||
let dest_abs = jail(root, dest)?;
|
||||
|
||||
if let Some(parent) = dest_abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
fs::rename(&src_abs, &dest_abs).or_else(|_| {
|
||||
// Cross-device move: copy then delete.
|
||||
copy_recursive(&src_abs, &dest_abs)?;
|
||||
fs::remove_dir_all(&src_abs)
|
||||
.with_context(|| format!("remove source '{}' after cross-device move", src_abs.display()))
|
||||
}).with_context(|| format!("move '{}' -> '{}'", src_abs.display(), dest_abs.display()))
|
||||
}
|
||||
|
||||
/// Copy `src` to `dest` (both relative to root).
|
||||
pub fn copy(root: &Path, src: &str, dest: &str) -> anyhow::Result<()> {
|
||||
let src_abs = jail(root, src)?;
|
||||
let dest_abs = jail(root, dest)?;
|
||||
|
||||
if let Some(parent) = dest_abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
copy_recursive(&src_abs, &dest_abs)
|
||||
.with_context(|| format!("copy '{}' -> '{}'", src_abs.display(), dest_abs.display()))
|
||||
}
|
||||
|
||||
/// Recursive copy helper.
|
||||
///
|
||||
/// SECURITY: uses `symlink_metadata` (does NOT follow symlinks) and refuses to
|
||||
/// copy any symlink. `jail()` only validates the top-level src/dest; a symlink
|
||||
/// *inside* a copied directory that points outside the jail would, if followed,
|
||||
/// pull external content (e.g. `/etc`) into the jail where it could then be
|
||||
/// read — a jail-escape exfiltration. Refusing symlinks closes that path.
|
||||
fn copy_recursive(src: &Path, dest: &Path) -> anyhow::Result<()> {
|
||||
let meta = fs::symlink_metadata(src)
|
||||
.with_context(|| format!("stat source '{}'", src.display()))?;
|
||||
|
||||
if meta.file_type().is_symlink() {
|
||||
bail!(
|
||||
"refusing to copy symlink '{}' — symlinks are not followed across the jail boundary",
|
||||
src.display()
|
||||
);
|
||||
}
|
||||
|
||||
if meta.is_dir() {
|
||||
fs::create_dir_all(dest)
|
||||
.with_context(|| format!("create_dir_all '{}'", dest.display()))?;
|
||||
|
||||
for entry in fs::read_dir(src)
|
||||
.with_context(|| format!("read_dir '{}'", src.display()))?
|
||||
{
|
||||
let entry = entry?;
|
||||
copy_recursive(&entry.path(), &dest.join(entry.file_name()))?;
|
||||
}
|
||||
} else {
|
||||
fs::copy(src, dest)
|
||||
.with_context(|| format!("copy '{}' -> '{}'", src.display(), dest.display()))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// NATS request dispatch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Dispatch a `FileRequest` against `root` and return a JSON `serde_json::Value`
|
||||
/// ready for the NATS reply.
|
||||
pub fn dispatch(root: &Path, req: &FileRequest) -> serde_json::Value {
|
||||
use serde_json::json;
|
||||
|
||||
let result = match req.op.as_str() {
|
||||
"list" => {
|
||||
list(root, &req.path).map(|entries| json!({ "entries": entries }))
|
||||
}
|
||||
"read" => {
|
||||
read(root, &req.path).map(|content| json!({ "content": content }))
|
||||
}
|
||||
"write" => {
|
||||
let content = req.content.as_deref().unwrap_or("");
|
||||
write(root, &req.path, content).map(|_| json!(null))
|
||||
}
|
||||
"delete" => {
|
||||
delete(root, &req.path).map(|_| json!(null))
|
||||
}
|
||||
"rename" => {
|
||||
let new_name = req.name.as_deref().unwrap_or("");
|
||||
rename(root, &req.path, new_name).map(|_| json!(null))
|
||||
}
|
||||
"mkdir" => {
|
||||
mkdir(root, &req.path).map(|_| json!(null))
|
||||
}
|
||||
"mkfile" => {
|
||||
mkfile(root, &req.path).map(|_| json!(null))
|
||||
}
|
||||
"move" => {
|
||||
let dest = req.dest.as_deref().unwrap_or("");
|
||||
move_path(root, &req.path, dest).map(|_| json!(null))
|
||||
}
|
||||
"copy" => {
|
||||
let dest = req.dest.as_deref().unwrap_or("");
|
||||
copy(root, &req.path, dest).map(|_| json!(null))
|
||||
}
|
||||
other => Err(anyhow::anyhow!(
|
||||
"unknown op '{}' (supported: list, read, write, delete, rename, mkdir, mkfile, move, copy)",
|
||||
other
|
||||
)),
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(data) => json!({ "status": "success", "data": data }),
|
||||
Err(e) => {
|
||||
tracing::warn!("filemanager op='{}' path='{}': {e:#}", req.op, req.path);
|
||||
json!({ "status": "error", "message": format!("{e:#}") })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Subscribe to `corrosion.{license}.{instance}.files.cmd` and serve file
|
||||
/// manager requests for `instance_id` jailed to `root`.
|
||||
///
|
||||
/// This function runs until the agent's cancellation token fires or the NATS
|
||||
/// subscription ends. It is spawned once per instance in `main.rs`.
|
||||
pub async fn run(
|
||||
agent: std::sync::Arc<crate::agent::Agent>,
|
||||
instance_id: String,
|
||||
root: PathBuf,
|
||||
) -> anyhow::Result<()> {
|
||||
use futures::StreamExt;
|
||||
|
||||
let subject = crate::subjects::instance_files_cmd(&agent.cfg.license_id, &instance_id);
|
||||
let mut sub = agent.nats.subscribe(subject.clone()).await?;
|
||||
tracing::info!("file manager handler listening on {subject}");
|
||||
|
||||
let cancel = agent.shutdown.clone();
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = sub.next() => {
|
||||
match msg {
|
||||
Some(msg) => {
|
||||
let agent = agent.clone();
|
||||
let root = root.clone();
|
||||
let instance_id = instance_id.clone();
|
||||
tokio::spawn(async move { handle(agent, &instance_id, &root, msg).await });
|
||||
}
|
||||
None => {
|
||||
tracing::warn!("file manager subscription ended for '{instance_id}'");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
tracing::info!("file manager handler stopping for '{instance_id}'");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle(
|
||||
agent: std::sync::Arc<crate::agent::Agent>,
|
||||
instance_id: &str,
|
||||
root: &Path,
|
||||
msg: async_nats::Message,
|
||||
) {
|
||||
let Some(reply) = msg.reply.clone() else {
|
||||
tracing::warn!("file manager message without reply subject ignored (instance '{instance_id}')");
|
||||
return;
|
||||
};
|
||||
|
||||
let response = match serde_json::from_slice::<FileRequest>(&msg.payload) {
|
||||
Ok(req) => {
|
||||
// Blocking fs calls — offload from the async executor.
|
||||
let root = root.to_path_buf();
|
||||
tokio::task::spawn_blocking(move || dispatch(&root, &req))
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
serde_json::json!({ "status": "error", "message": format!("internal error: {e}") })
|
||||
})
|
||||
}
|
||||
Err(e) => {
|
||||
serde_json::json!({ "status": "error", "message": format!("invalid request payload: {e}") })
|
||||
}
|
||||
};
|
||||
|
||||
let bytes = match serde_json::to_vec(&response) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
tracing::error!("file manager response serialize failed: {e}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
|
||||
tracing::warn!("file manager response publish failed: {e}");
|
||||
}
|
||||
}
|
||||
276
corrosion-host-agent/src/instancecmd.rs
Normal file
276
corrosion-host-agent/src/instancecmd.rs
Normal file
@@ -0,0 +1,276 @@
|
||||
//! Per-instance command channel + state-change events.
|
||||
//!
|
||||
//! Each process-managed instance gets a request-reply subscriber on
|
||||
//! `corrosion.{license}.{instance_id}.cmd` (funcs: start/stop/restart/status/rcon)
|
||||
//! and a publisher task that pushes every supervisor state change to
|
||||
//! `corrosion.{license}.{instance_id}.status` — the panel sees crashes when
|
||||
//! they happen, not when the next heartbeat ambles in.
|
||||
|
||||
use chrono::{SecondsFormat, Utc};
|
||||
use futures::StreamExt;
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::agent::Agent;
|
||||
use crate::process::ProcessSupervisor;
|
||||
use crate::subjects;
|
||||
use crate::steamcmd;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct InstanceCommand {
|
||||
func: String,
|
||||
/// Payload for funcs that carry a text argument (e.g. rcon).
|
||||
#[serde(default)]
|
||||
command: Option<String>,
|
||||
}
|
||||
|
||||
/// Forward every supervisor state change as a status event.
|
||||
pub async fn publish_state_changes(agent: Arc<Agent>, sup: Arc<ProcessSupervisor>) {
|
||||
let subject = subjects::instance_status(&agent.cfg.license_id, &sup.instance_id);
|
||||
let mut rx = sup.watch_state();
|
||||
let cancel = agent.shutdown.clone();
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
changed = rx.changed() => {
|
||||
if changed.is_err() {
|
||||
break;
|
||||
}
|
||||
let state = rx.borrow().clone();
|
||||
let event = json!({
|
||||
"timestamp": Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
"instance_id": sup.instance_id,
|
||||
"event": state,
|
||||
});
|
||||
match serde_json::to_vec(&event) {
|
||||
Ok(bytes) => {
|
||||
if let Err(e) = agent.nats.publish(subject.clone(), bytes.into()).await {
|
||||
tracing::warn!("status publish failed for '{}': {e}", sup.instance_id);
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::error!("status serialize failed: {e}"),
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Request-reply command handler for one instance.
|
||||
pub async fn run(agent: Arc<Agent>, sup: Arc<ProcessSupervisor>) -> anyhow::Result<()> {
|
||||
let subject = subjects::instance_cmd(&agent.cfg.license_id, &sup.instance_id);
|
||||
let mut sub = agent.nats.subscribe(subject.clone()).await?;
|
||||
tracing::info!("instance command handler listening on {subject}");
|
||||
|
||||
let cancel = agent.shutdown.clone();
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = sub.next() => {
|
||||
match msg {
|
||||
Some(msg) => {
|
||||
let agent = agent.clone();
|
||||
let sup = sup.clone();
|
||||
tokio::spawn(async move { handle(agent, sup, msg).await });
|
||||
}
|
||||
None => {
|
||||
tracing::warn!("instance command subscription ended for '{}'", sup.instance_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
tracing::info!("instance command handler stopping for '{}'", sup.instance_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle(agent: Arc<Agent>, sup: Arc<ProcessSupervisor>, msg: async_nats::Message) {
|
||||
let Some(reply) = msg.reply.clone() else {
|
||||
tracing::warn!("instance command without reply subject ignored");
|
||||
return;
|
||||
};
|
||||
|
||||
let response = match serde_json::from_slice::<InstanceCommand>(&msg.payload) {
|
||||
Ok(cmd) => dispatch(&agent, &sup, &cmd).await,
|
||||
Err(e) => json!({ "status": "error", "message": format!("invalid command payload: {e}") }),
|
||||
};
|
||||
|
||||
let bytes = match serde_json::to_vec(&response) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
tracing::error!("response serialize failed: {e}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
|
||||
tracing::warn!("response publish failed: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn dispatch(
|
||||
agent: &Arc<Agent>,
|
||||
sup: &Arc<ProcessSupervisor>,
|
||||
cmd: &InstanceCommand,
|
||||
) -> serde_json::Value {
|
||||
let func = cmd.func.as_str();
|
||||
|
||||
let outcome = match func {
|
||||
"start" => sup.start().await.map(|_| "starting"),
|
||||
"stop" => sup.stop().await.map(|_| "stopped"),
|
||||
"restart" => sup.restart().await.map(|_| "restarted"),
|
||||
"status" => {
|
||||
return json!({
|
||||
"status": "success",
|
||||
"func": "status",
|
||||
"instance_id": sup.instance_id,
|
||||
"state": sup.state(),
|
||||
"uptime_seconds": sup.uptime_seconds().await,
|
||||
});
|
||||
}
|
||||
"rcon" => {
|
||||
// Look up the InstanceConfig for this supervisor so we can access
|
||||
// rcon settings and the game name without changing the supervisor's
|
||||
// data model.
|
||||
let inst_cfg = agent
|
||||
.cfg
|
||||
.instances
|
||||
.iter()
|
||||
.find(|i| i.id == sup.instance_id);
|
||||
|
||||
let rcon_cfg = inst_cfg.and_then(|i| i.rcon.as_ref());
|
||||
let Some(rcon_cfg) = rcon_cfg else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id,
|
||||
"message": format!("instance '{}' has no rcon configured", sup.instance_id),
|
||||
});
|
||||
};
|
||||
|
||||
let Some(command) = cmd.command.as_deref() else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id,
|
||||
"message": "rcon func requires a 'command' field",
|
||||
});
|
||||
};
|
||||
|
||||
let game = inst_cfg.map(|i| i.game.as_str()).unwrap_or("rust");
|
||||
return match crate::rcon::send_command(rcon_cfg, game, command).await {
|
||||
Ok(output) => json!({
|
||||
"status": "success",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id,
|
||||
"output": output,
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id,
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
};
|
||||
}
|
||||
"steam_update" => {
|
||||
// Look up instance config for game name, root, and optional steamcmd
|
||||
// settings. The supervisor only carries process-control state, not
|
||||
// the full config, so we reach into agent.cfg.instances here as the
|
||||
// rcon dispatch does.
|
||||
let inst_cfg = agent.cfg.instances.iter().find(|i| i.id == sup.instance_id);
|
||||
|
||||
let Some(inst_cfg) = inst_cfg else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "steam_update",
|
||||
"instance_id": sup.instance_id,
|
||||
"message": format!("no config found for instance '{}'", sup.instance_id),
|
||||
});
|
||||
};
|
||||
|
||||
let game = inst_cfg.game.as_str();
|
||||
let root = inst_cfg.root.clone();
|
||||
|
||||
// Resolve steamcmd path and validate flag from config or use defaults.
|
||||
let (steamcmd_path, validate) = match inst_cfg.steamcmd.as_ref() {
|
||||
Some(s) => {
|
||||
let path = s
|
||||
.steamcmd_path
|
||||
.as_ref()
|
||||
.and_then(|p| p.to_str().map(|s| s.to_string()))
|
||||
.unwrap_or_else(|| "steamcmd".to_string());
|
||||
(path, s.validate)
|
||||
}
|
||||
None => ("steamcmd".to_string(), false),
|
||||
};
|
||||
|
||||
let license = agent.cfg.license_id.clone();
|
||||
let instance_id = sup.instance_id.clone();
|
||||
let nats = agent.nats.clone();
|
||||
|
||||
// Publish each progress line to the steam_status subject.
|
||||
let on_progress = move |line: &str| {
|
||||
let subject = subjects::instance_steam_status(&license, &instance_id);
|
||||
let event = json!({
|
||||
"timestamp": Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
"instance_id": instance_id,
|
||||
"line": line,
|
||||
});
|
||||
match serde_json::to_vec(&event) {
|
||||
Ok(bytes) => {
|
||||
// Fire-and-forget; the async publish is non-blocking on
|
||||
// the caller side. We create a mini-runtime task via
|
||||
// a oneshot since on_progress is Fn (not async).
|
||||
let nats = nats.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = nats.publish(subject, bytes.into()).await {
|
||||
tracing::warn!("steam_status publish failed: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(e) => tracing::error!("steam_status serialize failed: {e}"),
|
||||
}
|
||||
};
|
||||
|
||||
return match steamcmd::update(game, &root, &steamcmd_path, validate, on_progress).await {
|
||||
Ok(()) => json!({
|
||||
"status": "success",
|
||||
"func": "steam_update",
|
||||
"instance_id": sup.instance_id,
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": "steam_update",
|
||||
"instance_id": sup.instance_id,
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
};
|
||||
}
|
||||
other => {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"message": format!("unknown func '{other}' (supported: start, stop, restart, status, rcon, steam_update)"),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
match outcome {
|
||||
Ok(result) => json!({
|
||||
"status": "success",
|
||||
"func": func,
|
||||
"instance_id": sup.instance_id,
|
||||
"result": result,
|
||||
"state": sup.state(),
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": func,
|
||||
"instance_id": sup.instance_id,
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
}
|
||||
}
|
||||
16
corrosion-host-agent/src/lib.rs
Normal file
16
corrosion-host-agent/src/lib.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
//! Corrosion Host Agent library surface — modules are public so integration
|
||||
//! tests can drive subsystems (notably the process supervisor) directly.
|
||||
|
||||
pub mod agent;
|
||||
pub mod bus;
|
||||
pub mod config;
|
||||
pub mod filemanager;
|
||||
pub mod hostcmd;
|
||||
pub mod instancecmd;
|
||||
pub mod prober;
|
||||
pub mod process;
|
||||
pub mod rcon;
|
||||
pub mod steamcmd;
|
||||
pub mod subjects;
|
||||
pub mod telemetry;
|
||||
pub mod version;
|
||||
@@ -4,14 +4,10 @@
|
||||
//! connectivity prober, host command channel. Process control, file ops, and
|
||||
//! game adapters arrive in Phase 1+ (see PROTOCOL.md).
|
||||
|
||||
mod agent;
|
||||
mod bus;
|
||||
mod config;
|
||||
mod hostcmd;
|
||||
mod prober;
|
||||
mod subjects;
|
||||
mod telemetry;
|
||||
mod version;
|
||||
use corrosion_host_agent::{
|
||||
agent, bus, config, filemanager, hostcmd, instancecmd, prober, process, subjects, telemetry,
|
||||
version,
|
||||
};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use clap::{Parser, Subcommand};
|
||||
@@ -96,11 +92,18 @@ async fn run(settings: config::Settings) -> Result<()> {
|
||||
|
||||
let nats = bus::connect(&settings).await?;
|
||||
|
||||
let supervisors = settings
|
||||
.instances
|
||||
.iter()
|
||||
.map(|inst| (inst.id.clone(), process::ProcessSupervisor::new(inst)))
|
||||
.collect();
|
||||
|
||||
let agent = Arc::new(Agent {
|
||||
cfg: settings,
|
||||
nats,
|
||||
started: Instant::now(),
|
||||
last_probe: RwLock::new(None),
|
||||
supervisors,
|
||||
shutdown: CancellationToken::new(),
|
||||
});
|
||||
|
||||
@@ -115,6 +118,39 @@ async fn run(settings: config::Settings) -> Result<()> {
|
||||
}
|
||||
}));
|
||||
}
|
||||
for (instance_id, sup) in &agent.supervisors {
|
||||
{
|
||||
let agent = agent.clone();
|
||||
let sup = sup.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
if let Err(e) = instancecmd::run(agent, sup).await {
|
||||
tracing::error!("instance command handler failed: {e:#}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
handles.push(tokio::spawn(instancecmd::publish_state_changes(
|
||||
agent.clone(),
|
||||
sup.clone(),
|
||||
)));
|
||||
// File manager: one handler task per instance, jailed to root.
|
||||
{
|
||||
let agent = agent.clone();
|
||||
let inst_cfg = agent
|
||||
.cfg
|
||||
.instances
|
||||
.iter()
|
||||
.find(|i| &i.id == instance_id)
|
||||
.cloned();
|
||||
if let Some(cfg) = inst_cfg {
|
||||
let id = instance_id.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
if let Err(e) = filemanager::run(agent, id, cfg.root).await {
|
||||
tracing::error!("file manager handler failed: {e:#}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wait_for_shutdown_signal().await;
|
||||
tracing::info!("shutdown signal received");
|
||||
|
||||
278
corrosion-host-agent/src/process.rs
Normal file
278
corrosion-host-agent/src/process.rs
Normal file
@@ -0,0 +1,278 @@
|
||||
//! Per-instance game-server process supervision.
|
||||
//!
|
||||
//! One `ProcessSupervisor` per process-managed instance. Lifecycle mirrors the
|
||||
//! proven Go agent behavior — graceful SIGTERM with a 30s budget before force
|
||||
//! kill, a monitor task that reaps the child and records crash-vs-stop — with
|
||||
//! two fixes the Go version needed: args are a proper list (no naive space
|
||||
//! splitting), and every state change is observable through a watch channel
|
||||
//! so the panel gets push events instead of waiting for the next heartbeat.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use serde::Serialize;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::process::{Child, Command};
|
||||
use tokio::sync::{watch, Mutex};
|
||||
|
||||
use crate::config::InstanceConfig;
|
||||
|
||||
const GRACEFUL_STOP_BUDGET: Duration = Duration::from_secs(30);
|
||||
const RESTART_PAUSE: Duration = Duration::from_secs(2);
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "state")]
|
||||
pub enum InstanceState {
|
||||
/// Not process-managed (no executable configured).
|
||||
Unmanaged,
|
||||
Stopped,
|
||||
Starting,
|
||||
Running,
|
||||
Stopping,
|
||||
/// Process exited without a stop request.
|
||||
Crashed {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
exit_code: Option<i32>,
|
||||
},
|
||||
}
|
||||
|
||||
impl InstanceState {
|
||||
pub fn as_label(&self) -> &'static str {
|
||||
match self {
|
||||
InstanceState::Unmanaged => "unmanaged",
|
||||
InstanceState::Stopped => "stopped",
|
||||
InstanceState::Starting => "starting",
|
||||
InstanceState::Running => "running",
|
||||
InstanceState::Stopping => "stopping",
|
||||
InstanceState::Crashed { .. } => "crashed",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
child: Option<Child>,
|
||||
started_at: Option<Instant>,
|
||||
/// True while a stop was requested — the monitor uses it to distinguish
|
||||
/// an ordered shutdown from a crash.
|
||||
stop_requested: bool,
|
||||
}
|
||||
|
||||
pub struct ProcessSupervisor {
|
||||
pub instance_id: String,
|
||||
executable: Option<PathBuf>,
|
||||
args: Vec<String>,
|
||||
working_dir: Option<PathBuf>,
|
||||
inner: Mutex<Inner>,
|
||||
state_tx: watch::Sender<InstanceState>,
|
||||
}
|
||||
|
||||
impl ProcessSupervisor {
|
||||
pub fn new(cfg: &InstanceConfig) -> Arc<Self> {
|
||||
let executable = cfg.resolved_executable();
|
||||
let initial = if executable.is_some() {
|
||||
InstanceState::Stopped
|
||||
} else {
|
||||
InstanceState::Unmanaged
|
||||
};
|
||||
let (state_tx, _) = watch::channel(initial);
|
||||
Arc::new(Self {
|
||||
instance_id: cfg.id.clone(),
|
||||
executable,
|
||||
args: cfg.args.clone(),
|
||||
working_dir: cfg.working_dir.clone(),
|
||||
inner: Mutex::new(Inner {
|
||||
child: None,
|
||||
started_at: None,
|
||||
stop_requested: false,
|
||||
}),
|
||||
state_tx,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn state(&self) -> InstanceState {
|
||||
self.state_tx.borrow().clone()
|
||||
}
|
||||
|
||||
pub fn watch_state(&self) -> watch::Receiver<InstanceState> {
|
||||
self.state_tx.subscribe()
|
||||
}
|
||||
|
||||
pub async fn uptime_seconds(&self) -> u64 {
|
||||
let inner = self.inner.lock().await;
|
||||
match (&*self.state_tx.borrow(), inner.started_at) {
|
||||
(InstanceState::Running, Some(t)) => t.elapsed().as_secs(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn start(self: &Arc<Self>) -> Result<()> {
|
||||
let Some(exe) = self.executable.clone() else {
|
||||
bail!("instance '{}' has no executable configured", self.instance_id);
|
||||
};
|
||||
if !exe.exists() {
|
||||
bail!("executable not found: {}", exe.display());
|
||||
}
|
||||
|
||||
let mut inner = self.inner.lock().await;
|
||||
if matches!(*self.state_tx.borrow(), InstanceState::Running | InstanceState::Starting) {
|
||||
bail!("instance '{}' is already running", self.instance_id);
|
||||
}
|
||||
|
||||
self.set_state(InstanceState::Starting);
|
||||
|
||||
let workdir = self
|
||||
.working_dir
|
||||
.clone()
|
||||
.or_else(|| exe.parent().map(|p| p.to_path_buf()))
|
||||
.unwrap_or_else(|| PathBuf::from("."));
|
||||
|
||||
let child = Command::new(&exe)
|
||||
.args(&self.args)
|
||||
.current_dir(&workdir)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::inherit())
|
||||
.stderr(Stdio::inherit())
|
||||
.spawn()
|
||||
.with_context(|| format!("spawning {}", exe.display()))?;
|
||||
|
||||
let pid = child.id();
|
||||
inner.child = Some(child);
|
||||
inner.started_at = Some(Instant::now());
|
||||
inner.stop_requested = false;
|
||||
drop(inner);
|
||||
|
||||
self.set_state(InstanceState::Running);
|
||||
tracing::info!(
|
||||
"instance '{}' started: {} (pid {:?})",
|
||||
self.instance_id,
|
||||
exe.display(),
|
||||
pid
|
||||
);
|
||||
|
||||
// Monitor: reap the child and classify the exit.
|
||||
let sup = Arc::clone(self);
|
||||
tokio::spawn(async move { sup.monitor().await });
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn monitor(self: Arc<Self>) {
|
||||
// Take a waiter without holding the lock across the whole child
|
||||
// lifetime: Child::wait needs &mut, so the child stays in inner and
|
||||
// we poll it.
|
||||
loop {
|
||||
let status = {
|
||||
let mut inner = self.inner.lock().await;
|
||||
let Some(child) = inner.child.as_mut() else { return };
|
||||
match child.try_wait() {
|
||||
Ok(Some(status)) => Some(status),
|
||||
Ok(None) => None,
|
||||
Err(e) => {
|
||||
tracing::error!("instance '{}' wait failed: {e}", self.instance_id);
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match status {
|
||||
Some(status) => {
|
||||
let mut inner = self.inner.lock().await;
|
||||
inner.child = None;
|
||||
inner.started_at = None;
|
||||
let ordered = inner.stop_requested;
|
||||
inner.stop_requested = false;
|
||||
drop(inner);
|
||||
|
||||
if ordered {
|
||||
self.set_state(InstanceState::Stopped);
|
||||
tracing::info!("instance '{}' stopped ({status})", self.instance_id);
|
||||
} else {
|
||||
let exit_code = status.code();
|
||||
self.set_state(InstanceState::Crashed { exit_code });
|
||||
tracing::warn!(
|
||||
"instance '{}' exited unexpectedly ({status}) — marked crashed",
|
||||
self.instance_id
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
None => tokio::time::sleep(Duration::from_millis(500)).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn stop(self: &Arc<Self>) -> Result<()> {
|
||||
let mut inner = self.inner.lock().await;
|
||||
if inner.child.is_none() {
|
||||
bail!("instance '{}' is not running", self.instance_id);
|
||||
}
|
||||
inner.stop_requested = true;
|
||||
self.set_state(InstanceState::Stopping);
|
||||
let child = inner.child.as_mut().expect("checked above");
|
||||
|
||||
// Graceful first: SIGTERM on unix; Windows has no SIGTERM equivalent
|
||||
// for console processes, so it goes straight to kill there.
|
||||
#[cfg(unix)]
|
||||
if let Some(pid) = child.id() {
|
||||
unsafe {
|
||||
libc::kill(pid as i32, libc::SIGTERM);
|
||||
}
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = child.start_kill();
|
||||
}
|
||||
drop(inner);
|
||||
|
||||
// Wait for the monitor to observe the exit; force kill on budget.
|
||||
let mut rx = self.watch_state();
|
||||
let deadline = tokio::time::timeout(GRACEFUL_STOP_BUDGET, async {
|
||||
loop {
|
||||
if matches!(*rx.borrow(), InstanceState::Stopped) {
|
||||
return;
|
||||
}
|
||||
if rx.changed().await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
if deadline.is_err() {
|
||||
tracing::warn!(
|
||||
"instance '{}' ignored SIGTERM for {}s — force killing",
|
||||
self.instance_id,
|
||||
GRACEFUL_STOP_BUDGET.as_secs()
|
||||
);
|
||||
let mut inner = self.inner.lock().await;
|
||||
if let Some(child) = inner.child.as_mut() {
|
||||
let _ = child.start_kill();
|
||||
}
|
||||
drop(inner);
|
||||
|
||||
let mut rx = self.watch_state();
|
||||
let _ = tokio::time::timeout(Duration::from_secs(5), async {
|
||||
while !matches!(*rx.borrow(), InstanceState::Stopped) {
|
||||
if rx.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn restart(self: &Arc<Self>) -> Result<()> {
|
||||
if !matches!(*self.state_tx.borrow(), InstanceState::Stopped | InstanceState::Crashed { .. } | InstanceState::Unmanaged) {
|
||||
self.stop().await?;
|
||||
}
|
||||
tokio::time::sleep(RESTART_PAUSE).await;
|
||||
self.start().await
|
||||
}
|
||||
|
||||
fn set_state(&self, state: InstanceState) {
|
||||
// send_replace never fails even with zero receivers.
|
||||
let _ = self.state_tx.send_replace(state);
|
||||
}
|
||||
}
|
||||
320
corrosion-host-agent/src/rcon.rs
Normal file
320
corrosion-host-agent/src/rcon.rs
Normal file
@@ -0,0 +1,320 @@
|
||||
//! RCON client: game-server remote-console over WebRCON (Rust) or Source RCON (Conan/Soulmask).
|
||||
//!
|
||||
//! The agent runs co-located with the game server, so every connection targets
|
||||
//! 127.0.0.1 — no TLS is needed and latency is sub-millisecond. Two protocols
|
||||
//! are supported because the Rust game ships its own WebSocket-based WebRCON
|
||||
//! while Conan Exiles and Soulmask use the Valve Source RCON wire format over
|
||||
//! plain TCP.
|
||||
//!
|
||||
//! The protocol selection is explicit in the config (`kind`) but can be inferred
|
||||
//! from the game name when absent — callers supply the `game` field they already
|
||||
//! have in `InstanceConfig`.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use rand::Rng;
|
||||
use serde::Deserialize;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::time::{timeout, Duration};
|
||||
|
||||
/// WebRCON is the Facepunch WebSocket protocol (Rust game).
|
||||
/// Source RCON is the Valve wire protocol used by Conan Exiles and Soulmask.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum RconKind {
|
||||
WebRcon,
|
||||
Source,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct RconConfig {
|
||||
/// Protocol override. When absent the kind is resolved from `game`.
|
||||
#[serde(default)]
|
||||
pub kind: Option<RconKind>,
|
||||
pub port: u16,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
impl RconConfig {
|
||||
/// Resolve the concrete protocol, falling back to a per-game default when
|
||||
/// `kind` is not set. rust → WebRcon; conan + soulmask → Source.
|
||||
pub fn resolved_kind(&self, game: &str) -> RconKind {
|
||||
if let Some(k) = self.kind {
|
||||
return k;
|
||||
}
|
||||
match game {
|
||||
"conan" | "soulmask" => RconKind::Source,
|
||||
// rust is the primary game; anything unknown defaults to WebRcon
|
||||
// — operators can always override with an explicit `kind`.
|
||||
_ => RconKind::WebRcon,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
const RESPONSE_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Send `command` to the game server and return its text response.
|
||||
///
|
||||
/// The agent runs on the same host as the game server, so the target address
|
||||
/// is always 127.0.0.1:{port}. Connection and response deadlines are fixed at
|
||||
/// 5 s and 10 s respectively — enough headroom for a loaded server while still
|
||||
/// catching hung connections quickly.
|
||||
pub async fn send_command(cfg: &RconConfig, game: &str, command: &str) -> Result<String> {
|
||||
match cfg.resolved_kind(game) {
|
||||
RconKind::WebRcon => webrcon_exec(cfg, command).await,
|
||||
RconKind::Source => source_rcon_exec(cfg, command).await,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebRCON (Rust game) — WebSocket JSON protocol
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// WebRCON request/response envelope. The server also emits chat/log frames
|
||||
/// on this socket with Identifier == 0; those are skipped.
|
||||
#[derive(serde::Serialize)]
|
||||
struct WebRconRequest<'a> {
|
||||
#[serde(rename = "Identifier")]
|
||||
identifier: i32,
|
||||
#[serde(rename = "Message")]
|
||||
message: &'a str,
|
||||
#[serde(rename = "Name")]
|
||||
name: &'static str,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct WebRconResponse {
|
||||
#[serde(rename = "Identifier")]
|
||||
identifier: i32,
|
||||
#[serde(rename = "Message")]
|
||||
message: String,
|
||||
}
|
||||
|
||||
async fn webrcon_exec(cfg: &RconConfig, command: &str) -> Result<String> {
|
||||
use tokio_tungstenite::connect_async;
|
||||
use tokio_tungstenite::tungstenite::Message as WsMsg;
|
||||
|
||||
// The Rust game server embeds the password in the WebSocket URL path —
|
||||
// never interpolate the real URL into errors or logs.
|
||||
let url = format!("ws://127.0.0.1:{}/{}", cfg.port, cfg.password);
|
||||
let redacted = format!("ws://127.0.0.1:{}/<redacted>", cfg.port);
|
||||
|
||||
// Wrap the entire connection + exchange in the connect timeout — we want
|
||||
// the timeout to cover TCP handshake + WS upgrade, not just the send.
|
||||
let (mut ws, _) = timeout(CONNECT_TIMEOUT, connect_async(&url))
|
||||
.await
|
||||
.context("connect timeout")?
|
||||
.with_context(|| format!("WebRCON connect to {redacted}"))?;
|
||||
|
||||
// Use a random positive i32 so correlation is unambiguous even when
|
||||
// multiple callers share a port (future concurrency).
|
||||
let id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
let req = WebRconRequest { identifier: id, message: command, name: "Corrosion" };
|
||||
let payload = serde_json::to_string(&req).context("serialize WebRCON request")?;
|
||||
|
||||
ws.send(WsMsg::Text(payload))
|
||||
.await
|
||||
.context("send WebRCON command")?;
|
||||
|
||||
tracing::debug!("WebRCON sent id={id} command={command:?}");
|
||||
|
||||
// Read frames until we see our Identifier — skip chat/log noise (id 0 or
|
||||
// any other value that isn't ours).
|
||||
let result = timeout(RESPONSE_TIMEOUT, async {
|
||||
loop {
|
||||
match ws.next().await {
|
||||
Some(Ok(WsMsg::Text(text))) => {
|
||||
match serde_json::from_str::<WebRconResponse>(&text) {
|
||||
Ok(resp) if resp.identifier == id => return Ok(resp.message),
|
||||
Ok(_) => {
|
||||
// Not our response (chat, log, another caller's frame).
|
||||
tracing::trace!("WebRCON skipping frame with different Identifier");
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::trace!("WebRCON non-JSON frame ignored: {e}");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(Ok(WsMsg::Close(_))) => bail!("WebRCON server closed connection"),
|
||||
Some(Ok(_)) => continue, // binary/ping/pong — skip
|
||||
Some(Err(e)) => return Err(anyhow::anyhow!(e).context("WebRCON read error")),
|
||||
None => bail!("WebRCON stream ended without response"),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.context("WebRCON response timeout")??;
|
||||
|
||||
// Close cleanly; a send error here is cosmetic — we already have our data.
|
||||
let _ = ws.close(None).await;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Source RCON (Conan Exiles, Soulmask) — Valve TCP binary protocol
|
||||
//
|
||||
// Packet layout (all fields little-endian):
|
||||
// i32 size — byte count of the remaining packet (id + type + body + 2 nulls)
|
||||
// i32 id — caller-chosen correlation id; auth failure returns -1
|
||||
// i32 type — 0=RESPONSE_VALUE, 2=EXECCOMMAND/AUTH_RESPONSE, 3=AUTH
|
||||
// [u8] body — UTF-8 command or response text
|
||||
// u8 0x00 — body null terminator
|
||||
// u8 0x00 — padding null terminator
|
||||
//
|
||||
// Multi-packet handling: after sending the command we also send an empty
|
||||
// RESPONSE_VALUE probe with a distinct id. We collect all RESPONSE_VALUE
|
||||
// packets belonging to the command id and stop when we receive the probe's
|
||||
// response. This is the standard technique specified in the Valve wiki.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const RCON_TYPE_AUTH: i32 = 3;
|
||||
const RCON_TYPE_AUTH_RESPONSE: i32 = 2;
|
||||
const RCON_TYPE_EXECCOMMAND: i32 = 2;
|
||||
const RCON_TYPE_RESPONSE_VALUE: i32 = 0;
|
||||
|
||||
/// Maximum accumulated response body (guards against misbehaving servers).
|
||||
const MAX_RESPONSE_BYTES: usize = 1024 * 1024; // 1 MiB
|
||||
|
||||
async fn source_rcon_exec(cfg: &RconConfig, command: &str) -> Result<String> {
|
||||
let addr = format!("127.0.0.1:{}", cfg.port);
|
||||
|
||||
let stream = timeout(CONNECT_TIMEOUT, TcpStream::connect(&addr))
|
||||
.await
|
||||
.context("connect timeout")?
|
||||
.with_context(|| format!("Source RCON connect to {addr}"))?;
|
||||
|
||||
let mut stream = stream;
|
||||
|
||||
// --- Auth ---
|
||||
let auth_id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
send_packet(&mut stream, auth_id, RCON_TYPE_AUTH, cfg.password.as_bytes()).await?;
|
||||
|
||||
// The server sends two responses to AUTH: first an empty RESPONSE_VALUE,
|
||||
// then an AUTH_RESPONSE. We skip the first and read until AUTH_RESPONSE.
|
||||
timeout(RESPONSE_TIMEOUT, async {
|
||||
loop {
|
||||
let (id, ptype, _body) = recv_packet(&mut stream).await?;
|
||||
if ptype == RCON_TYPE_AUTH_RESPONSE {
|
||||
if id == -1 {
|
||||
bail!("Source RCON auth failed: wrong password");
|
||||
}
|
||||
tracing::debug!("Source RCON authenticated (id={id})");
|
||||
return Ok(());
|
||||
}
|
||||
// Skip the empty RESPONSE_VALUE that precedes AUTH_RESPONSE.
|
||||
}
|
||||
#[allow(unreachable_code)]
|
||||
Ok::<(), anyhow::Error>(())
|
||||
})
|
||||
.await
|
||||
.context("Source RCON auth timeout")??;
|
||||
|
||||
// --- Command ---
|
||||
let cmd_id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
// Probe id must differ from cmd_id.
|
||||
let probe_id: i32 = loop {
|
||||
let id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
if id != cmd_id {
|
||||
break id;
|
||||
}
|
||||
};
|
||||
|
||||
send_packet(&mut stream, cmd_id, RCON_TYPE_EXECCOMMAND, command.as_bytes()).await?;
|
||||
// Empty RESPONSE_VALUE probe — the server echoes it after processing the
|
||||
// preceding command, signalling end-of-response.
|
||||
send_packet(&mut stream, probe_id, RCON_TYPE_RESPONSE_VALUE, b"").await?;
|
||||
|
||||
// Not every server is probe-conformant (Soulmask unverified): once we hold
|
||||
// response data, a short per-read quiet period also terminates — never
|
||||
// discard a response we already received just because the probe echo
|
||||
// didn't come back.
|
||||
const QUIET_PERIOD: Duration = Duration::from_millis(1500);
|
||||
let response = timeout(RESPONSE_TIMEOUT, async {
|
||||
let mut body_accum: Vec<u8> = Vec::new();
|
||||
loop {
|
||||
let next = if body_accum.is_empty() {
|
||||
recv_packet(&mut stream).await.map(Some)
|
||||
} else {
|
||||
match timeout(QUIET_PERIOD, recv_packet(&mut stream)).await {
|
||||
Ok(res) => res.map(Some),
|
||||
Err(_elapsed) => Ok(None), // quiet after data — done
|
||||
}
|
||||
};
|
||||
let Some((id, ptype, body)) = next? else {
|
||||
break;
|
||||
};
|
||||
if ptype != RCON_TYPE_RESPONSE_VALUE {
|
||||
continue; // unexpected packet type — skip
|
||||
}
|
||||
if id == probe_id {
|
||||
// Probe echoed back — all command response packets have arrived.
|
||||
break;
|
||||
}
|
||||
if id == cmd_id {
|
||||
if body_accum.len() + body.len() > MAX_RESPONSE_BYTES {
|
||||
bail!("Source RCON response exceeded {MAX_RESPONSE_BYTES} bytes");
|
||||
}
|
||||
body_accum.extend_from_slice(&body);
|
||||
}
|
||||
// Skip packets with other ids (shouldn't happen but be defensive).
|
||||
}
|
||||
Ok::<Vec<u8>, anyhow::Error>(body_accum)
|
||||
})
|
||||
.await
|
||||
.context("Source RCON response timeout")??;
|
||||
|
||||
String::from_utf8(response).context("Source RCON response is not valid UTF-8")
|
||||
}
|
||||
|
||||
/// Write a Source RCON packet to the stream.
|
||||
async fn send_packet(stream: &mut TcpStream, id: i32, ptype: i32, body: &[u8]) -> Result<()> {
|
||||
// size = id(4) + type(4) + body(n) + 2 null terminators
|
||||
let size = (4 + 4 + body.len() + 2) as i32;
|
||||
let mut buf: Vec<u8> = Vec::with_capacity(4 + size as usize);
|
||||
buf.extend_from_slice(&size.to_le_bytes());
|
||||
buf.extend_from_slice(&id.to_le_bytes());
|
||||
buf.extend_from_slice(&ptype.to_le_bytes());
|
||||
buf.extend_from_slice(body);
|
||||
buf.push(0x00);
|
||||
buf.push(0x00);
|
||||
stream.write_all(&buf).await.context("Source RCON write")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read one Source RCON packet; returns (id, type, body).
|
||||
async fn recv_packet(stream: &mut TcpStream) -> Result<(i32, i32, Vec<u8>)> {
|
||||
let mut size_buf = [0u8; 4];
|
||||
stream
|
||||
.read_exact(&mut size_buf)
|
||||
.await
|
||||
.context("Source RCON read size")?;
|
||||
let size = i32::from_le_bytes(size_buf) as usize;
|
||||
|
||||
// Minimum packet: id(4) + type(4) + 2 null terminators = 10 bytes.
|
||||
if size < 10 {
|
||||
bail!("Source RCON: malformed packet (size={size})");
|
||||
}
|
||||
if size > MAX_RESPONSE_BYTES + 16 {
|
||||
bail!("Source RCON: packet too large ({size} bytes)");
|
||||
}
|
||||
|
||||
let mut payload = vec![0u8; size];
|
||||
stream
|
||||
.read_exact(&mut payload)
|
||||
.await
|
||||
.context("Source RCON read payload")?;
|
||||
|
||||
let id = i32::from_le_bytes(payload[0..4].try_into().unwrap());
|
||||
let ptype = i32::from_le_bytes(payload[4..8].try_into().unwrap());
|
||||
// Body is everything between the two fields and the two trailing nulls.
|
||||
let body_end = size.saturating_sub(2); // strip 2 null terminators
|
||||
let body = payload[8..body_end].to_vec();
|
||||
|
||||
Ok((id, ptype, body))
|
||||
}
|
||||
126
corrosion-host-agent/src/steamcmd.rs
Normal file
126
corrosion-host-agent/src/steamcmd.rs
Normal file
@@ -0,0 +1,126 @@
|
||||
//! SteamCMD update integration for process-managed game instances.
|
||||
//!
|
||||
//! Wraps the `steamcmd` binary to perform an `+app_update` for a given game
|
||||
//! instance, streaming stdout lines to a caller-supplied progress callback so
|
||||
//! the panel can display live update output. The agent already runs a task per
|
||||
//! command in a separate `tokio::spawn`, so the blocking-until-done semantics
|
||||
//! here are intentional — the NATS reply is sent only when SteamCMD exits.
|
||||
//!
|
||||
//! Dune is Docker-image-based and explicitly has no SteamCMD integration — any
|
||||
//! attempt to invoke `update` on a Dune instance returns a clear error rather
|
||||
//! than a silent no-op.
|
||||
|
||||
use std::path::Path;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
|
||||
/// Return the Steam app ID for a given game name, or `None` for Dune (Docker).
|
||||
///
|
||||
/// Soulmask returns the Windows or Linux server app ID depending on the compile
|
||||
/// target so this function is `#[cfg]`-gated at the platform level.
|
||||
pub fn app_id_for_game(game: &str) -> Option<u32> {
|
||||
match game {
|
||||
"rust" => Some(258550),
|
||||
"conan" => Some(443030),
|
||||
"soulmask" => {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
Some(3017310)
|
||||
}
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
Some(3017300)
|
||||
}
|
||||
}
|
||||
// Dune uses Docker images — SteamCMD has no role here.
|
||||
"dune" => None,
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration controlling SteamCMD behaviour for one instance.
|
||||
/// Serialised as `[instance.steamcmd]` in agent.toml.
|
||||
#[derive(Debug, Clone, serde::Deserialize, Default)]
|
||||
pub struct SteamcmdConfig {
|
||||
/// Absolute or relative path to the `steamcmd` binary.
|
||||
/// Defaults to `"steamcmd"` (resolved via `PATH`) when absent.
|
||||
#[serde(default)]
|
||||
pub steamcmd_path: Option<std::path::PathBuf>,
|
||||
|
||||
/// Whether to pass `validate` to `+app_update`. Adds a file-hash check
|
||||
/// pass that catches corruption at the cost of a longer update time.
|
||||
#[serde(default)]
|
||||
pub validate: bool,
|
||||
}
|
||||
|
||||
/// Run a SteamCMD update for `game` into `install_dir`.
|
||||
///
|
||||
/// - `steamcmd_path`: path to the binary (or `"steamcmd"` to use PATH).
|
||||
/// - `validate`: appends `validate` to the `+app_update` call.
|
||||
/// - `on_progress`: receives each stdout line as it arrives so callers can
|
||||
/// forward progress to the panel in real time.
|
||||
///
|
||||
/// Returns `Ok(())` on a zero exit code, otherwise an error describing the
|
||||
/// failure. Dune is rejected before any process is spawned.
|
||||
pub async fn update(
|
||||
game: &str,
|
||||
install_dir: &Path,
|
||||
steamcmd_path: &str,
|
||||
validate: bool,
|
||||
on_progress: impl Fn(&str),
|
||||
) -> anyhow::Result<()> {
|
||||
use anyhow::Context;
|
||||
|
||||
let app_id = app_id_for_game(game).ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"dune uses Docker images, not SteamCMD — cannot run app_update for game '{game}'"
|
||||
)
|
||||
})?;
|
||||
|
||||
let install_dir_str = install_dir
|
||||
.to_str()
|
||||
.with_context(|| format!("install_dir '{}' is not valid UTF-8", install_dir.display()))?;
|
||||
|
||||
let mut args: Vec<String> = vec![
|
||||
"+force_install_dir".to_string(),
|
||||
install_dir_str.to_string(),
|
||||
"+login".to_string(),
|
||||
"anonymous".to_string(),
|
||||
"+app_update".to_string(),
|
||||
app_id.to_string(),
|
||||
];
|
||||
if validate {
|
||||
args.push("validate".to_string());
|
||||
}
|
||||
args.push("+quit".to_string());
|
||||
|
||||
tracing::info!(
|
||||
"steamcmd: starting update for game={game} app_id={app_id} install_dir={} validate={validate}",
|
||||
install_dir.display()
|
||||
);
|
||||
|
||||
let mut child = Command::new(steamcmd_path)
|
||||
.args(&args)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.spawn()
|
||||
.with_context(|| format!("spawning steamcmd binary '{steamcmd_path}'"))?;
|
||||
|
||||
let stdout = child.stdout.take().expect("stdout was piped");
|
||||
let mut lines = BufReader::new(stdout).lines();
|
||||
|
||||
while let Some(line) = lines.next_line().await.context("reading steamcmd stdout")? {
|
||||
tracing::debug!("steamcmd: {line}");
|
||||
on_progress(&line);
|
||||
}
|
||||
|
||||
let status = child.wait().await.context("waiting for steamcmd to exit")?;
|
||||
if status.success() {
|
||||
tracing::info!("steamcmd: update completed successfully for game={game}");
|
||||
Ok(())
|
||||
} else {
|
||||
let code = status.code().unwrap_or(-1);
|
||||
anyhow::bail!("steamcmd exited with non-zero status {code} for game={game}")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,14 +17,23 @@ pub fn host_going_offline(license: &str) -> String {
|
||||
format!("corrosion.{license}.host.going_offline")
|
||||
}
|
||||
|
||||
/// Phase 1: per-instance command channel (start/stop/restart/rcon/...).
|
||||
#[allow(dead_code)]
|
||||
/// Per-instance command channel (start/stop/restart/status; rcon et al. to come).
|
||||
pub fn instance_cmd(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.cmd")
|
||||
}
|
||||
|
||||
/// Phase 1: per-instance state-change events.
|
||||
#[allow(dead_code)]
|
||||
/// Per-instance state-change events.
|
||||
pub fn instance_status(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.status")
|
||||
}
|
||||
|
||||
/// Per-instance SteamCMD progress stream. Lines from `steamcmd` stdout are
|
||||
/// published here so the panel can display live update output.
|
||||
pub fn instance_steam_status(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.steam_status")
|
||||
}
|
||||
|
||||
/// Per-instance file manager command channel (request-reply).
|
||||
pub fn instance_files_cmd(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.files.cmd")
|
||||
}
|
||||
|
||||
@@ -65,9 +65,10 @@ pub struct InstanceInfo {
|
||||
pub game: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub label: Option<String>,
|
||||
/// Phase 0 states: `configured` (root exists) or `missing_root`.
|
||||
/// Phase 1 adds live process states (running/stopped/crashed).
|
||||
/// Process-managed: running/stopped/starting/stopping/crashed.
|
||||
/// Unmanaged (no executable configured): configured/missing_root.
|
||||
pub state: String,
|
||||
pub uptime_seconds: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub root_disk_free_mb: Option<u64>,
|
||||
}
|
||||
@@ -125,21 +126,30 @@ pub async fn collect(agent: &Agent, sys: &mut System) -> HeartbeatPayload {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let instances = agent
|
||||
.cfg
|
||||
.instances
|
||||
.iter()
|
||||
.map(|inst| {
|
||||
let exists = inst.root.exists();
|
||||
InstanceInfo {
|
||||
id: inst.id.clone(),
|
||||
game: inst.game.clone(),
|
||||
label: inst.label.clone(),
|
||||
state: if exists { "configured" } else { "missing_root" }.to_string(),
|
||||
root_disk_free_mb: disk_free_for_path(&disks, &inst.root),
|
||||
let mut instances = Vec::with_capacity(agent.cfg.instances.len());
|
||||
for inst in &agent.cfg.instances {
|
||||
let (state, uptime_seconds) = match agent.supervisors.get(&inst.id) {
|
||||
Some(sup) if !matches!(sup.state(), crate::process::InstanceState::Unmanaged) => {
|
||||
(sup.state().as_label().to_string(), sup.uptime_seconds().await)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
_ => {
|
||||
let exists = inst.root.exists();
|
||||
(
|
||||
if exists { "configured" } else { "missing_root" }.to_string(),
|
||||
0,
|
||||
)
|
||||
}
|
||||
};
|
||||
instances.push(InstanceInfo {
|
||||
id: inst.id.clone(),
|
||||
game: inst.game.clone(),
|
||||
label: inst.label.clone(),
|
||||
state,
|
||||
uptime_seconds,
|
||||
root_disk_free_mb: disk_free_for_path(&disks, &inst.root),
|
||||
});
|
||||
}
|
||||
let instances = instances;
|
||||
|
||||
HeartbeatPayload {
|
||||
schema: 2,
|
||||
|
||||
461
corrosion-host-agent/tests/filemanager.rs
Normal file
461
corrosion-host-agent/tests/filemanager.rs
Normal file
@@ -0,0 +1,461 @@
|
||||
//! Integration tests for the jailed file manager.
|
||||
//!
|
||||
//! Each test runs in a real tempdir on the host filesystem. The jail-escape
|
||||
//! tests are the security-critical section: any path that resolves outside the
|
||||
//! instance root MUST be rejected regardless of how the escape is attempted.
|
||||
//!
|
||||
//! Coverage:
|
||||
//! - Functional: list, write, read roundtrip, mkdir, rename, delete
|
||||
//! - Security: dotdot traversal, absolute path injection, symlink escape
|
||||
//! (POSIX symlinks only — `#[cfg(unix)]`)
|
||||
|
||||
use corrosion_host_agent::filemanager;
|
||||
use std::path::Path;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Create a temporary directory and return its path. The directory is
|
||||
/// automatically cleaned up when the `TempDir` is dropped.
|
||||
fn tempdir() -> tempfile::TempDir {
|
||||
tempfile::tempdir().expect("create tempdir")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Functional tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn write_read_roundtrip() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let content = "hello from the file manager\nline 2\n";
|
||||
filemanager::write(root, "test.txt", content).expect("write should succeed");
|
||||
|
||||
let got = filemanager::read(root, "test.txt").expect("read should succeed");
|
||||
assert_eq!(got, content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_returns_written_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "server.cfg", "hostname MyServer\n").expect("write");
|
||||
|
||||
let entries = filemanager::list(root, "").expect("list root");
|
||||
let names: Vec<&str> = entries.iter().map(|e| e.name.as_str()).collect();
|
||||
assert!(names.contains(&"server.cfg"), "expected 'server.cfg' in listing, got {names:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_empty_root_is_empty() {
|
||||
let dir = tempdir();
|
||||
let entries = filemanager::list(dir.path(), "").expect("list empty root");
|
||||
assert!(entries.is_empty(), "fresh tempdir should have no entries");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mkdir_creates_directory() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::mkdir(root, "cfg/custom").expect("mkdir should succeed");
|
||||
|
||||
assert!(root.join("cfg/custom").is_dir(), "directory should exist after mkdir");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mkdir_creates_nested_dirs() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::mkdir(root, "a/b/c/d").expect("mkdir nested");
|
||||
assert!(root.join("a/b/c/d").is_dir());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_creates_parent_dirs() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "subdir/deep/file.txt", "data").expect("write with auto-mkdir");
|
||||
let content = filemanager::read(root, "subdir/deep/file.txt").expect("read");
|
||||
assert_eq!(content, "data");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rename_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "old.txt", "content").expect("write");
|
||||
filemanager::rename(root, "old.txt", "new.txt").expect("rename");
|
||||
|
||||
assert!(!root.join("old.txt").exists(), "old.txt should be gone");
|
||||
assert!(root.join("new.txt").exists(), "new.txt should exist");
|
||||
|
||||
let content = filemanager::read(root, "new.txt").expect("read renamed");
|
||||
assert_eq!(content, "content");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rename_rejects_separator_in_new_name() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "file.txt", "data").expect("write");
|
||||
|
||||
let err = filemanager::rename(root, "file.txt", "subdir/escape.txt")
|
||||
.expect_err("rename with path separator must fail");
|
||||
assert!(
|
||||
err.to_string().contains("separator"),
|
||||
"error should mention separator: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "todelete.txt", "bye").expect("write");
|
||||
assert!(root.join("todelete.txt").exists());
|
||||
|
||||
filemanager::delete(root, "todelete.txt").expect("delete");
|
||||
assert!(!root.join("todelete.txt").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_directory_recursive() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::mkdir(root, "tree/sub").expect("mkdir");
|
||||
filemanager::write(root, "tree/sub/file.txt", "x").expect("write");
|
||||
assert!(root.join("tree").is_dir());
|
||||
|
||||
filemanager::delete(root, "tree").expect("delete tree");
|
||||
assert!(!root.join("tree").exists(), "directory tree should be deleted");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mkfile_creates_empty_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::mkfile(root, "empty.txt").expect("mkfile");
|
||||
let content = filemanager::read(root, "empty.txt").expect("read empty file");
|
||||
assert_eq!(content, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn copy_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "source.txt", "original").expect("write source");
|
||||
filemanager::copy(root, "source.txt", "dest.txt").expect("copy");
|
||||
|
||||
let src = filemanager::read(root, "source.txt").expect("read source after copy");
|
||||
let dst = filemanager::read(root, "dest.txt").expect("read destination");
|
||||
assert_eq!(src, "original");
|
||||
assert_eq!(dst, "original");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn move_file() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "moveme.txt", "payload").expect("write");
|
||||
filemanager::move_path(root, "moveme.txt", "moved.txt").expect("move");
|
||||
|
||||
assert!(!root.join("moveme.txt").exists(), "source should be gone");
|
||||
let content = filemanager::read(root, "moved.txt").expect("read after move");
|
||||
assert_eq!(content, "payload");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_entry_fields_are_populated() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "check.txt", "abcde").expect("write");
|
||||
filemanager::mkdir(root, "subdir").expect("mkdir");
|
||||
|
||||
let entries = filemanager::list(root, "").expect("list");
|
||||
// Dirs sort before files.
|
||||
let dir_entry = entries.iter().find(|e| e.name == "subdir").expect("subdir entry");
|
||||
assert!(dir_entry.is_dir);
|
||||
assert_eq!(dir_entry.size, 0);
|
||||
assert!(!dir_entry.modified.is_empty(), "modified should be set");
|
||||
|
||||
let file_entry = entries.iter().find(|e| e.name == "check.txt").expect("file entry");
|
||||
assert!(!file_entry.is_dir);
|
||||
assert_eq!(file_entry.size, 5, "size should match byte count");
|
||||
// path should be relative and use forward slashes.
|
||||
assert!(!file_entry.path.starts_with('/'), "path should be relative");
|
||||
assert!(!file_entry.path.contains('\\'), "path should use forward slashes");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Security: jail-escape tests
|
||||
// CRITICAL — these are the whole point of the jail abstraction.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// `../../etc/passwd` must never resolve outside the instance root.
|
||||
#[test]
|
||||
fn jail_rejects_dotdot_traversal() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let err = filemanager::read(root, "../../etc/passwd")
|
||||
.expect_err("dotdot traversal must be rejected");
|
||||
// Verify the error is security-related and not just "file not found".
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"error should mention jail escape for dotdot traversal, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// A deeply nested `../` chain must also be stopped.
|
||||
#[test]
|
||||
fn jail_rejects_deep_dotdot_traversal() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let err = filemanager::read(root, "a/b/c/../../../../../../../../etc/shadow")
|
||||
.expect_err("deep dotdot traversal must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape") || msg.contains("absolute"),
|
||||
"error should mention jail escape for deep traversal, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// An absolute path (e.g. `/etc/passwd`) must be rejected immediately — it
|
||||
/// completely bypasses relative joining and should never be accepted.
|
||||
#[test]
|
||||
fn jail_rejects_absolute_path() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let err = filemanager::read(root, "/etc/passwd")
|
||||
.expect_err("absolute path must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("absolute") || msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"error should mention the absolute-path rejection, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// An absolute path to a Windows-style location must also be rejected.
|
||||
#[test]
|
||||
fn jail_rejects_absolute_windows_style_path() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
// On POSIX this is just treated as an absolute path starting with `/`.
|
||||
// The test is intentionally platform-portable: any absolute path is bad.
|
||||
let err = filemanager::read(root, "/tmp/evil")
|
||||
.expect_err("absolute /tmp/evil must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("absolute") || msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// A symlink inside the root that points to a path outside the root must not
|
||||
/// be followed. This is the critical symlink-escape vector.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn jail_rejects_symlink_escape() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
// Create a directory outside the root to be the symlink target.
|
||||
let outside = tempdir();
|
||||
let outside_file = outside.path().join("secret.txt");
|
||||
std::fs::write(&outside_file, "secret data").expect("write outside file");
|
||||
|
||||
// Plant a symlink inside the root pointing to the outside directory.
|
||||
let link_path = root.join("evil_link");
|
||||
std::os::unix::fs::symlink(outside.path(), &link_path)
|
||||
.expect("create symlink inside root");
|
||||
|
||||
// Attempt to read through the symlink.
|
||||
let err = filemanager::read(root, "evil_link/secret.txt")
|
||||
.expect_err("symlink escape must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"error should mention jail escape for symlink traversal, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// A symlink directly inside the root pointing to a file outside must be
|
||||
/// rejected even when the path looks like a normal relative reference.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn jail_rejects_symlink_pointing_directly_outside() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
// Symlink to /etc/passwd itself (or any outside path that exists or not).
|
||||
let link_path = root.join("passwd_link");
|
||||
std::os::unix::fs::symlink(Path::new("/etc/passwd"), &link_path)
|
||||
.expect("create symlink to /etc/passwd");
|
||||
|
||||
let err = filemanager::read(root, "passwd_link")
|
||||
.expect_err("direct symlink outside root must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"error should mention jail escape, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// A symlink chain (symlink → symlink → outside) must also be caught.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn jail_rejects_chained_symlink_escape() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
let outside = tempdir();
|
||||
|
||||
// Chain: root/link1 → root/link2 → outside/
|
||||
let link2_path = root.join("link2");
|
||||
std::os::unix::fs::symlink(outside.path(), &link2_path)
|
||||
.expect("create link2");
|
||||
|
||||
let link1_path = root.join("link1");
|
||||
std::os::unix::fs::symlink(&link2_path, &link1_path)
|
||||
.expect("create link1");
|
||||
|
||||
let err = filemanager::read(root, "link1")
|
||||
.expect_err("chained symlink escape must be rejected");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("outside") || msg.contains("escapes") || msg.contains("escape"),
|
||||
"chained symlink should be caught, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// SECURITY REGRESSION: copying a directory that contains a symlink pointing
|
||||
/// OUTSIDE the jail must NOT dereference it and pull external content inside.
|
||||
/// jail() validates only the top-level src/dest; the recursive copy must
|
||||
/// refuse symlinks itself or it becomes a read-escape exfiltration path.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn copy_refuses_to_follow_symlink_out_of_jail() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
let outside = tempdir();
|
||||
std::fs::write(outside.path().join("secret.txt"), "TOP SECRET")
|
||||
.expect("write external secret");
|
||||
|
||||
// A directory inside the jail containing a symlink to the outside dir.
|
||||
std::fs::create_dir(root.join("src")).expect("mkdir src");
|
||||
std::os::unix::fs::symlink(outside.path(), root.join("src").join("escape"))
|
||||
.expect("plant symlink to outside");
|
||||
|
||||
// Attempt to copy src -> dest (both inside the jail).
|
||||
let err = filemanager::copy(root, "src", "dest")
|
||||
.expect_err("copy must refuse the embedded symlink");
|
||||
assert!(
|
||||
format!("{err:#}").contains("symlink"),
|
||||
"error should name the refused symlink, got: {err:#}"
|
||||
);
|
||||
|
||||
// The external secret must NOT have landed inside the jail.
|
||||
assert!(
|
||||
!root.join("dest").join("escape").join("secret.txt").exists(),
|
||||
"external content leaked into the jail via symlink-following copy",
|
||||
);
|
||||
}
|
||||
|
||||
/// `list` must report a symlink as the link itself, never the dereferenced
|
||||
/// target — otherwise it leaks the size/type of files outside the jail.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn list_does_not_dereference_symlink_metadata() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
std::os::unix::fs::symlink(Path::new("/etc/passwd"), root.join("leak"))
|
||||
.expect("plant symlink");
|
||||
|
||||
let entries = filemanager::list(root, "").expect("list root");
|
||||
let leak = entries.iter().find(|e| e.name == "leak").expect("symlink listed");
|
||||
// /etc/passwd is a regular file; if we followed the link, is_dir would
|
||||
// reflect the target. We must report the link, which is not a directory,
|
||||
// and must NOT expose the target's byte size.
|
||||
assert!(!leak.is_dir, "symlink must not be reported as a directory");
|
||||
let target_size = std::fs::metadata("/etc/passwd").map(|m| m.len()).unwrap_or(0);
|
||||
assert!(
|
||||
leak.size != target_size || target_size == 0,
|
||||
"list leaked the symlink target's size ({target_size} bytes)"
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Dispatch layer tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn dispatch_list_returns_success() {
|
||||
let dir = tempdir();
|
||||
let root = dir.path();
|
||||
|
||||
filemanager::write(root, "a.txt", "a").expect("write");
|
||||
|
||||
let req = filemanager::FileRequest {
|
||||
op: "list".to_string(),
|
||||
path: String::new(),
|
||||
dest: None,
|
||||
content: None,
|
||||
name: None,
|
||||
};
|
||||
let resp = filemanager::dispatch(root, &req);
|
||||
assert_eq!(resp["status"], "success");
|
||||
assert!(resp["data"]["entries"].is_array());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dispatch_unknown_op_returns_error() {
|
||||
let dir = tempdir();
|
||||
let req = filemanager::FileRequest {
|
||||
op: "explode".to_string(),
|
||||
path: String::new(),
|
||||
dest: None,
|
||||
content: None,
|
||||
name: None,
|
||||
};
|
||||
let resp = filemanager::dispatch(dir.path(), &req);
|
||||
assert_eq!(resp["status"], "error");
|
||||
assert!(resp["message"].as_str().unwrap().contains("unknown op"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dispatch_escape_attempt_returns_error_not_panic() {
|
||||
let dir = tempdir();
|
||||
let req = filemanager::FileRequest {
|
||||
op: "read".to_string(),
|
||||
path: "../../etc/passwd".to_string(),
|
||||
dest: None,
|
||||
content: None,
|
||||
name: None,
|
||||
};
|
||||
let resp = filemanager::dispatch(dir.path(), &req);
|
||||
// Must return an error response, not panic or expose the file.
|
||||
assert_eq!(resp["status"], "error", "escape attempt should return error status");
|
||||
assert!(
|
||||
resp["message"].as_str().is_some(),
|
||||
"error response must have a message"
|
||||
);
|
||||
}
|
||||
353
corrosion-host-agent/tests/rcon.rs
Normal file
353
corrosion-host-agent/tests/rcon.rs
Normal file
@@ -0,0 +1,353 @@
|
||||
//! RCON integration tests using in-process mock servers.
|
||||
//!
|
||||
//! Real OS sockets on ephemeral ports — no mocking framework. Each test
|
||||
//! binds a listener, spawns a task that speaks the expected protocol, then
|
||||
//! exercises `rcon::send_command` and asserts on the result. Tests are
|
||||
//! unix-only because the musl cross-compile target and the CI runner are both
|
||||
//! Linux; the production use case is also Linux-only (game servers don't run
|
||||
//! on macOS or Windows in production).
|
||||
//!
|
||||
//! We use `#[cfg(unix)]` to keep parity with the supervisor integration tests.
|
||||
#![cfg(unix)]
|
||||
|
||||
use corrosion_host_agent::rcon::{RconConfig, RconKind};
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Source RCON helpers — duplicate the wire-format encode/decode locally so
|
||||
// the tests own the mock server without depending on the production code path.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Build a Source RCON packet: [size(4LE) | id(4LE) | type(4LE) | body | 0x00 0x00]
|
||||
fn encode_packet(id: i32, ptype: i32, body: &[u8]) -> Vec<u8> {
|
||||
let size = (4 + 4 + body.len() + 2) as i32;
|
||||
let mut out = Vec::with_capacity(4 + size as usize);
|
||||
out.extend_from_slice(&size.to_le_bytes());
|
||||
out.extend_from_slice(&id.to_le_bytes());
|
||||
out.extend_from_slice(&ptype.to_le_bytes());
|
||||
out.extend_from_slice(body);
|
||||
out.push(0x00);
|
||||
out.push(0x00);
|
||||
out
|
||||
}
|
||||
|
||||
/// Read one Source RCON packet from a TcpStream.
|
||||
async fn read_packet(stream: &mut TcpStream) -> (i32, i32, Vec<u8>) {
|
||||
let mut size_buf = [0u8; 4];
|
||||
stream.read_exact(&mut size_buf).await.unwrap();
|
||||
let size = i32::from_le_bytes(size_buf) as usize;
|
||||
|
||||
let mut payload = vec![0u8; size];
|
||||
stream.read_exact(&mut payload).await.unwrap();
|
||||
|
||||
let id = i32::from_le_bytes(payload[0..4].try_into().unwrap());
|
||||
let ptype = i32::from_le_bytes(payload[4..8].try_into().unwrap());
|
||||
let body_end = size.saturating_sub(2);
|
||||
let body = payload[8..body_end].to_vec();
|
||||
(id, ptype, body)
|
||||
}
|
||||
|
||||
const SOURCE_TYPE_AUTH: i32 = 3;
|
||||
const SOURCE_TYPE_AUTH_RESPONSE: i32 = 2;
|
||||
const SOURCE_TYPE_EXECCOMMAND: i32 = 2;
|
||||
const SOURCE_TYPE_RESPONSE_VALUE: i32 = 0;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock Source RCON server
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Run a Source RCON server that accepts password "goodpw", rejects others,
|
||||
/// and responds to the first EXECCOMMAND with `response_body`.
|
||||
///
|
||||
/// If `split_at` is Some(n) the body is split: the first `n` bytes arrive in
|
||||
/// one RESPONSE_VALUE packet and the remainder in a second — testing multi-
|
||||
/// packet reassembly.
|
||||
async fn run_source_mock(
|
||||
mut stream: TcpStream,
|
||||
accept_password: &str,
|
||||
command_response: &[u8],
|
||||
split_at: Option<usize>,
|
||||
) {
|
||||
// --- Auth phase ---
|
||||
let (auth_id, ptype, body) = read_packet(&mut stream).await;
|
||||
assert_eq!(ptype, SOURCE_TYPE_AUTH, "expected AUTH packet");
|
||||
|
||||
let password = String::from_utf8_lossy(&body);
|
||||
if password != accept_password {
|
||||
// Send empty RESPONSE_VALUE then AUTH_RESPONSE with id = -1 (failure).
|
||||
let empty = encode_packet(auth_id, SOURCE_TYPE_RESPONSE_VALUE, b"");
|
||||
stream.write_all(&empty).await.unwrap();
|
||||
let fail = encode_packet(-1, SOURCE_TYPE_AUTH_RESPONSE, b"");
|
||||
stream.write_all(&fail).await.unwrap();
|
||||
return;
|
||||
}
|
||||
|
||||
// Success: empty RESPONSE_VALUE then AUTH_RESPONSE with the auth id.
|
||||
let empty = encode_packet(auth_id, SOURCE_TYPE_RESPONSE_VALUE, b"");
|
||||
stream.write_all(&empty).await.unwrap();
|
||||
let ok = encode_packet(auth_id, SOURCE_TYPE_AUTH_RESPONSE, b"");
|
||||
stream.write_all(&ok).await.unwrap();
|
||||
|
||||
// --- Command phase ---
|
||||
let (cmd_id, cmd_ptype, _cmd_body) = read_packet(&mut stream).await;
|
||||
assert_eq!(cmd_ptype, SOURCE_TYPE_EXECCOMMAND, "expected EXECCOMMAND");
|
||||
|
||||
// Read the probe packet (empty RESPONSE_VALUE with a different id).
|
||||
let (probe_id, probe_ptype, _) = read_packet(&mut stream).await;
|
||||
assert_eq!(probe_ptype, SOURCE_TYPE_RESPONSE_VALUE, "expected probe packet");
|
||||
|
||||
// Send the command response, optionally split across two packets.
|
||||
if let Some(n) = split_at {
|
||||
let (part1, part2) = command_response.split_at(n.min(command_response.len()));
|
||||
let p1 = encode_packet(cmd_id, SOURCE_TYPE_RESPONSE_VALUE, part1);
|
||||
stream.write_all(&p1).await.unwrap();
|
||||
let p2 = encode_packet(cmd_id, SOURCE_TYPE_RESPONSE_VALUE, part2);
|
||||
stream.write_all(&p2).await.unwrap();
|
||||
} else {
|
||||
let p = encode_packet(cmd_id, SOURCE_TYPE_RESPONSE_VALUE, command_response);
|
||||
stream.write_all(&p).await.unwrap();
|
||||
}
|
||||
|
||||
// Echo the probe to signal end-of-response.
|
||||
let probe_echo = encode_packet(probe_id, SOURCE_TYPE_RESPONSE_VALUE, b"");
|
||||
stream.write_all(&probe_echo).await.unwrap();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Source RCON tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::test]
|
||||
async fn source_rcon_auth_and_exec_returns_response() {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
run_source_mock(stream, "goodpw", b"Hello from server", None).await;
|
||||
});
|
||||
|
||||
let cfg = RconConfig { kind: Some(RconKind::Source), port, password: "goodpw".to_string() };
|
||||
let result = corrosion_host_agent::rcon::send_command(&cfg, "conan", "status")
|
||||
.await
|
||||
.expect("command should succeed");
|
||||
|
||||
assert_eq!(result, "Hello from server");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn source_rcon_wrong_password_returns_auth_error() {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
run_source_mock(stream, "goodpw", b"should not see this", None).await;
|
||||
});
|
||||
|
||||
let cfg = RconConfig { kind: Some(RconKind::Source), port, password: "wrongpw".to_string() };
|
||||
let err = corrosion_host_agent::rcon::send_command(&cfg, "conan", "status")
|
||||
.await
|
||||
.expect_err("wrong password should fail");
|
||||
|
||||
assert!(
|
||||
err.to_string().to_lowercase().contains("auth"),
|
||||
"error should mention auth failure, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn source_rcon_multi_packet_response_concatenated() {
|
||||
// Build a body large enough to split meaningfully across two packets.
|
||||
// Use repeating ASCII so the result is valid UTF-8 and easy to verify.
|
||||
// 200 'A's then 200 'B's = 400 bytes, split at 200.
|
||||
let body: Vec<u8> = std::iter::repeat_n(b'A', 200)
|
||||
.chain(std::iter::repeat_n(b'B', 200))
|
||||
.collect();
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
let body_clone = body.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
run_source_mock(stream, "goodpw", &body_clone, Some(200)).await;
|
||||
});
|
||||
|
||||
let cfg = RconConfig { kind: Some(RconKind::Source), port, password: "goodpw".to_string() };
|
||||
let result = corrosion_host_agent::rcon::send_command(&cfg, "soulmask", "showplayers")
|
||||
.await
|
||||
.expect("multi-packet command should succeed");
|
||||
|
||||
let expected = String::from_utf8(body).unwrap();
|
||||
assert_eq!(result, expected, "full body should be concatenated across both packets");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn source_rcon_connect_timeout_to_unreachable_port() {
|
||||
// Bind a listener but never accept — the connection will time out during
|
||||
// the RCON auth phase because nothing is reading from the socket.
|
||||
// We use a port that is bound (so TCP connect itself succeeds) but then
|
||||
// the mock simply drops the stream, forcing a read error, which should
|
||||
// surface as an error (not a panic or hang).
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
|
||||
// Accept the TCP connection but immediately drop it — simulates a port
|
||||
// that accepts but never speaks RCON.
|
||||
tokio::spawn(async move {
|
||||
let (_stream, _) = listener.accept().await.unwrap();
|
||||
// _stream dropped here — EOF on the client's read
|
||||
});
|
||||
|
||||
let cfg =
|
||||
RconConfig { kind: Some(RconKind::Source), port, password: "goodpw".to_string() };
|
||||
let err = corrosion_host_agent::rcon::send_command(&cfg, "conan", "status")
|
||||
.await
|
||||
.expect_err("closed connection should fail");
|
||||
|
||||
// We just need it to fail and not hang; error message varies by OS.
|
||||
let _ = err;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebRCON mock server
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Run a WebRCON mock: send one noise frame (Identifier 0), then respond to
|
||||
/// the first real request with the given output.
|
||||
async fn run_webrcon_mock(stream: tokio::net::TcpStream, output: &str) {
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use tokio_tungstenite::accept_async;
|
||||
use tokio_tungstenite::tungstenite::Message as WsMsg;
|
||||
|
||||
let mut ws = accept_async(stream).await.expect("WS handshake failed");
|
||||
|
||||
// Send noise (chat frame, Identifier 0) before the real request arrives.
|
||||
let noise = serde_json::json!({
|
||||
"Identifier": 0,
|
||||
"Message": "Player X joined",
|
||||
"Name": "Server",
|
||||
"Type": "Chat"
|
||||
});
|
||||
ws.send(WsMsg::Text(noise.to_string()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Read the command request.
|
||||
let msg = ws.next().await.unwrap().unwrap();
|
||||
let text = match msg {
|
||||
WsMsg::Text(t) => t,
|
||||
other => panic!("expected Text frame, got {other:?}"),
|
||||
};
|
||||
let req: serde_json::Value = serde_json::from_str(&text).unwrap();
|
||||
let req_id = req["Identifier"].as_i64().unwrap() as i32;
|
||||
|
||||
// Reply with the same Identifier so the client correlates correctly.
|
||||
let reply = serde_json::json!({
|
||||
"Identifier": req_id,
|
||||
"Message": output,
|
||||
"Type": "Generic",
|
||||
});
|
||||
ws.send(WsMsg::Text(reply.to_string())).await.unwrap();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebRCON tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::test]
|
||||
async fn webrcon_skips_noise_and_returns_correct_message() {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
run_webrcon_mock(stream, "Players: 42/100").await;
|
||||
});
|
||||
|
||||
// Password is embedded in the URL path — any non-empty string works with
|
||||
// our mock.
|
||||
let cfg = RconConfig {
|
||||
kind: Some(RconKind::WebRcon),
|
||||
port,
|
||||
password: "testpw".to_string(),
|
||||
};
|
||||
let result = corrosion_host_agent::rcon::send_command(&cfg, "rust", "playercount")
|
||||
.await
|
||||
.expect("WebRCON command should succeed");
|
||||
|
||||
assert_eq!(result, "Players: 42/100");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TOML parsing test — pins [[instance]] + [instance.rcon] sub-table syntax
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn toml_instance_with_rcon_parses_correctly() {
|
||||
let toml = r#"
|
||||
[agent]
|
||||
license_id = "test-license"
|
||||
nats_url = "nats://localhost:4222"
|
||||
|
||||
[[instance]]
|
||||
id = "rust-main"
|
||||
game = "rust"
|
||||
root = "/opt/rustserver"
|
||||
|
||||
[instance.rcon]
|
||||
port = 28016
|
||||
password = "secretpassword"
|
||||
kind = "webrcon"
|
||||
"#;
|
||||
|
||||
let cfg: corrosion_host_agent::config::ConfigFile =
|
||||
toml::from_str(toml).expect("TOML should parse");
|
||||
|
||||
assert_eq!(cfg.instances.len(), 1);
|
||||
let inst = &cfg.instances[0];
|
||||
assert_eq!(inst.id, "rust-main");
|
||||
|
||||
let rcon = inst.rcon.as_ref().expect("rcon should be present");
|
||||
assert_eq!(rcon.port, 28016);
|
||||
assert_eq!(rcon.password, "secretpassword");
|
||||
assert_eq!(rcon.kind, Some(corrosion_host_agent::rcon::RconKind::WebRcon));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn toml_instance_without_rcon_defaults_to_none() {
|
||||
let toml = r#"
|
||||
[agent]
|
||||
license_id = "test-license"
|
||||
nats_url = "nats://localhost:4222"
|
||||
|
||||
[[instance]]
|
||||
id = "conan-main"
|
||||
game = "conan"
|
||||
root = "/opt/conan"
|
||||
"#;
|
||||
|
||||
let cfg: corrosion_host_agent::config::ConfigFile =
|
||||
toml::from_str(toml).expect("TOML should parse");
|
||||
|
||||
assert!(cfg.instances[0].rcon.is_none(), "absent rcon should be None");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolved_kind_infers_from_game_name() {
|
||||
use corrosion_host_agent::rcon::{RconConfig, RconKind};
|
||||
|
||||
let cfg_no_kind = RconConfig { kind: None, port: 28016, password: "x".to_string() };
|
||||
assert_eq!(cfg_no_kind.resolved_kind("rust"), RconKind::WebRcon);
|
||||
assert_eq!(cfg_no_kind.resolved_kind("conan"), RconKind::Source);
|
||||
assert_eq!(cfg_no_kind.resolved_kind("soulmask"), RconKind::Source);
|
||||
assert_eq!(cfg_no_kind.resolved_kind("dune"), RconKind::WebRcon); // fallback
|
||||
|
||||
// Explicit kind always wins.
|
||||
let cfg_source = RconConfig { kind: Some(RconKind::Source), ..cfg_no_kind.clone() };
|
||||
assert_eq!(cfg_source.resolved_kind("rust"), RconKind::Source);
|
||||
|
||||
let cfg_webrcon = RconConfig { kind: Some(RconKind::WebRcon), ..cfg_no_kind };
|
||||
assert_eq!(cfg_webrcon.resolved_kind("conan"), RconKind::WebRcon);
|
||||
}
|
||||
45
corrosion-host-agent/tests/steamcmd.rs
Normal file
45
corrosion-host-agent/tests/steamcmd.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
//! Unit tests for the SteamCMD module.
|
||||
//!
|
||||
//! Tests cover app ID resolution for all four supported games, including the
|
||||
//! platform-specific Soulmask split, and verify that Dune correctly returns
|
||||
//! `None` (it uses Docker images, not SteamCMD).
|
||||
|
||||
use corrosion_host_agent::steamcmd::app_id_for_game;
|
||||
|
||||
#[test]
|
||||
fn rust_has_correct_app_id() {
|
||||
assert_eq!(app_id_for_game("rust"), Some(258550));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conan_has_correct_app_id() {
|
||||
assert_eq!(app_id_for_game("conan"), Some(443030));
|
||||
}
|
||||
|
||||
/// Soulmask returns the Windows server app ID on Windows builds, the Linux
|
||||
/// dedicated server app ID on all other targets.
|
||||
#[test]
|
||||
#[cfg(windows)]
|
||||
fn soulmask_windows_app_id() {
|
||||
assert_eq!(app_id_for_game("soulmask"), Some(3017310));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(windows))]
|
||||
fn soulmask_linux_app_id() {
|
||||
assert_eq!(app_id_for_game("soulmask"), Some(3017300));
|
||||
}
|
||||
|
||||
/// Dune uses Docker images — SteamCMD integration is explicitly unsupported.
|
||||
#[test]
|
||||
fn dune_has_no_app_id() {
|
||||
assert_eq!(app_id_for_game("dune"), None);
|
||||
}
|
||||
|
||||
/// Unknown games also produce None; callers should treat this the same as
|
||||
/// Dune (no SteamCMD support).
|
||||
#[test]
|
||||
fn unknown_game_returns_none() {
|
||||
assert_eq!(app_id_for_game("minecraft"), None);
|
||||
assert_eq!(app_id_for_game(""), None);
|
||||
}
|
||||
109
corrosion-host-agent/tests/supervisor.rs
Normal file
109
corrosion-host-agent/tests/supervisor.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
//! Process supervisor integration tests using real OS processes.
|
||||
//! Unix-only test doubles (/bin/sleep, /bin/sh) — the supervisor logic under
|
||||
//! test is platform-shared; Windows-specific stop semantics get covered when
|
||||
//! the Windows service work lands.
|
||||
#![cfg(unix)]
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use corrosion_host_agent::config::InstanceConfig;
|
||||
use corrosion_host_agent::process::{InstanceState, ProcessSupervisor};
|
||||
|
||||
fn managed_instance(executable: &str, args: &[&str]) -> InstanceConfig {
|
||||
InstanceConfig {
|
||||
id: "test-instance".to_string(),
|
||||
game: "rust".to_string(),
|
||||
root: PathBuf::from("/tmp"),
|
||||
label: None,
|
||||
executable: Some(PathBuf::from(executable)),
|
||||
args: args.iter().map(|s| s.to_string()).collect(),
|
||||
working_dir: None,
|
||||
rcon: None,
|
||||
steamcmd: None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_state(
|
||||
sup: &std::sync::Arc<ProcessSupervisor>,
|
||||
want: fn(&InstanceState) -> bool,
|
||||
budget: Duration,
|
||||
) -> InstanceState {
|
||||
let deadline = tokio::time::Instant::now() + budget;
|
||||
loop {
|
||||
let state = sup.state();
|
||||
if want(&state) {
|
||||
return state;
|
||||
}
|
||||
if tokio::time::Instant::now() > deadline {
|
||||
panic!("timed out waiting for state; last = {state:?}");
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn start_status_stop_lifecycle() {
|
||||
let sup = ProcessSupervisor::new(&managed_instance("/bin/sleep", &["300"]));
|
||||
assert_eq!(sup.state(), InstanceState::Stopped);
|
||||
|
||||
sup.start().await.expect("start should succeed");
|
||||
assert_eq!(sup.state(), InstanceState::Running);
|
||||
tokio::time::sleep(Duration::from_millis(1100)).await;
|
||||
assert!(sup.uptime_seconds().await >= 1, "uptime should advance");
|
||||
|
||||
// Double-start must be rejected while running.
|
||||
assert!(sup.start().await.is_err(), "double start must fail");
|
||||
|
||||
sup.stop().await.expect("stop should succeed");
|
||||
let state = wait_for_state(&sup, |s| matches!(s, InstanceState::Stopped), Duration::from_secs(5)).await;
|
||||
assert_eq!(state, InstanceState::Stopped);
|
||||
assert_eq!(sup.uptime_seconds().await, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unexpected_exit_is_crashed_with_code() {
|
||||
let sup = ProcessSupervisor::new(&managed_instance("/bin/sh", &["-c", "sleep 0.2; exit 7"]));
|
||||
sup.start().await.expect("start should succeed");
|
||||
|
||||
let state = wait_for_state(
|
||||
&sup,
|
||||
|s| matches!(s, InstanceState::Crashed { .. }),
|
||||
Duration::from_secs(5),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(state, InstanceState::Crashed { exit_code: Some(7) });
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn restart_from_crashed_recovers() {
|
||||
let sup = ProcessSupervisor::new(&managed_instance("/bin/sh", &["-c", "exit 1"]));
|
||||
sup.start().await.expect("start should succeed");
|
||||
wait_for_state(&sup, |s| matches!(s, InstanceState::Crashed { .. }), Duration::from_secs(5)).await;
|
||||
|
||||
// Restart from crashed must work (panel "Restart" after a crash).
|
||||
// Use a long-lived command this time by replacing the supervisor — the
|
||||
// command is fixed per supervisor, so emulate via a fresh one.
|
||||
let sup2 = ProcessSupervisor::new(&managed_instance("/bin/sleep", &["300"]));
|
||||
sup2.restart().await.expect("restart from stopped should start");
|
||||
assert_eq!(sup2.state(), InstanceState::Running);
|
||||
sup2.stop().await.expect("cleanup stop");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unmanaged_instance_rejects_process_commands() {
|
||||
let mut cfg = managed_instance("/bin/sleep", &["300"]);
|
||||
cfg.executable = None;
|
||||
let sup = ProcessSupervisor::new(&cfg);
|
||||
assert_eq!(sup.state(), InstanceState::Unmanaged);
|
||||
assert!(sup.start().await.is_err(), "unmanaged start must fail");
|
||||
assert!(sup.stop().await.is_err(), "unmanaged stop must fail");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn missing_executable_fails_cleanly() {
|
||||
let sup = ProcessSupervisor::new(&managed_instance("/nonexistent/bin/gameserver", &[]));
|
||||
let err = sup.start().await.expect_err("must fail");
|
||||
assert!(err.to_string().contains("not found"), "error should say not found: {err}");
|
||||
assert_eq!(sup.state(), InstanceState::Stopped, "failed start must not leave Starting state");
|
||||
}
|
||||
@@ -31,6 +31,9 @@ services:
|
||||
volumes:
|
||||
- nats_data:/data
|
||||
- ./nats.conf:/etc/nats/nats.conf:ro
|
||||
# Per-license authorization (generated on the host; carries secrets, not
|
||||
# committed with real users — see scripts/generate-nats-auth.mjs).
|
||||
- ./nats-auth.conf:/etc/nats/nats-auth.conf:ro
|
||||
ports:
|
||||
- "8089:4222" # Client connections
|
||||
|
||||
@@ -43,6 +46,12 @@ services:
|
||||
DATABASE_URL: postgres://corrosion:${DB_PASSWORD:-corrosion_dev}@postgres:5432/corrosion
|
||||
DATABASE_MAX_CONNECTIONS: "20"
|
||||
NATS_URL: nats://nats:4222
|
||||
# Privileged internal NATS user (full corrosion.> access). Empty = anonymous.
|
||||
NATS_INTERNAL_USER: ${NATS_INTERNAL_USER:-}
|
||||
NATS_INTERNAL_PASSWORD: ${NATS_INTERNAL_PASSWORD:-}
|
||||
# Secret for deriving per-license agent passwords (shared with the
|
||||
# nats-auth generator). HMAC-SHA256(license_id, secret).
|
||||
NATS_TOKEN_SECRET: ${NATS_TOKEN_SECRET:-}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
JWT_ACCESS_EXPIRY_SECONDS: "14400"
|
||||
JWT_REFRESH_EXPIRY_SECONDS: "604800"
|
||||
@@ -87,7 +96,10 @@ services:
|
||||
api:
|
||||
condition: service_started
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q --spider http://localhost:80/ || exit 1"]
|
||||
# 127.0.0.1, not localhost: nginx listens IPv4-only (0.0.0.0:80) but
|
||||
# `localhost` resolves to ::1 first inside the container → the probe hit
|
||||
# nothing and reported unhealthy while the panel served fine on IPv4.
|
||||
test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:80/ || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
12
docker/nats-auth.conf
Normal file
12
docker/nats-auth.conf
Normal file
@@ -0,0 +1,12 @@
|
||||
# SAFE OPEN DEFAULT — anonymous full access, no secrets. Same behavior as the
|
||||
# pre-auth broker so fresh deploys and the repo stay valid.
|
||||
#
|
||||
# Regenerated on deploy by scripts/generate-nats-auth.mjs with the privileged
|
||||
# internal user + per-license scoped users (those carry secrets and must NOT be
|
||||
# committed — mark the host copy with `git update-index --assume-unchanged`).
|
||||
authorization {
|
||||
users: [
|
||||
{ user: "anonymous", password: "", permissions: { publish: ">", subscribe: ">" } }
|
||||
]
|
||||
no_auth_user: "anonymous"
|
||||
}
|
||||
@@ -28,8 +28,11 @@ logtime: true
|
||||
max_payload: 8MB # Support map file transfer metadata
|
||||
max_connections: 10000
|
||||
|
||||
# Authorization — tokens validated per-connection
|
||||
# Plugin and companion agents authenticate with license-specific tokens
|
||||
authorization {
|
||||
timeout: 5
|
||||
}
|
||||
# Authorization — per-license isolation.
|
||||
# The committed nats-auth.conf is the SAFE OPEN default (anonymous full access,
|
||||
# no secrets — same as before). On deploy, scripts/generate-nats-auth.mjs
|
||||
# regenerates this file from the licenses table with the privileged internal
|
||||
# user + per-license scoped users; flip NATS_AUTH_STAGE=enforce to reject
|
||||
# anonymous. The host copy carries secrets and is NOT committed
|
||||
# (git update-index --assume-unchanged docker/nats-auth.conf).
|
||||
include "nats-auth.conf"
|
||||
|
||||
@@ -9,6 +9,9 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="theme-color" content="#0a0a0a" />
|
||||
<title>Corrosion Management</title>
|
||||
<meta name="description" content="Management panel for self-hosted survival game servers — Rust, Dune: Awakening, Conan Exiles, Soulmask. Wipe automation, plugins, monitoring. Bring your own server." />
|
||||
<meta property="og:title" content="Corrosion — Game Server Operations for Self-Hosted Communities" />
|
||||
<meta property="og:description" content="Management panel for self-hosted survival game servers — Rust, Dune: Awakening, Conan Exiles, Soulmask. Wipe automation, plugins, monitoring. Bring your own server." />
|
||||
<!-- Fonts via <link>, NOT a CSS @import — the bundler drops @import rules
|
||||
that land mid-file after concatenation, silently shipping system fonts -->
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
||||
|
||||
@@ -1,7 +1,14 @@
|
||||
<script setup lang="ts">
|
||||
import { onMounted } from 'vue'
|
||||
import { RouterView } from 'vue-router'
|
||||
import ToastNotification from '@/components/ToastNotification.vue'
|
||||
import ErrorBoundary from '@/components/ErrorBoundary.vue'
|
||||
import { useAuthStore } from '@/stores/auth'
|
||||
|
||||
// Validate any persisted session against the API on boot — a stale token
|
||||
// should bounce to login immediately, not after the first failed call.
|
||||
const auth = useAuthStore()
|
||||
onMounted(() => { void auth.validateSession() })
|
||||
</script>
|
||||
|
||||
<template>
|
||||
|
||||
@@ -124,6 +124,7 @@ export interface GameProfile {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const NAV_DASHBOARD: NavItemDef = { label: 'Dashboard', route: '/', icon: 'layout-dashboard', permission: null }
|
||||
const NAV_FLEET: NavItemDef = { label: 'Fleet', route: '/fleet', icon: 'server-cog', permission: 'server.view' }
|
||||
const NAV_SERVER: NavItemDef = { label: 'Server', route: '/server', icon: 'server', permission: 'server.view' }
|
||||
const NAV_CONSOLE: NavItemDef = { label: 'Console', route: '/console', icon: 'terminal', permission: 'console.view' }
|
||||
const NAV_PLAYERS: NavItemDef = { label: 'Players', route: '/players', icon: 'users', permission: 'players.view' }
|
||||
@@ -147,7 +148,7 @@ const RUST_NAV: NavSection[] = [
|
||||
{ label: '', items: [NAV_DASHBOARD] },
|
||||
{
|
||||
label: 'Server',
|
||||
items: [NAV_SERVER, NAV_CONSOLE, NAV_PLAYERS, NAV_PLUGINS, NAV_FILES],
|
||||
items: [NAV_FLEET, NAV_SERVER, NAV_CONSOLE, NAV_PLAYERS, NAV_PLUGINS, NAV_FILES],
|
||||
},
|
||||
{ label: 'Plugin configs', items: [NAV_PLUGIN_CONFIGS] },
|
||||
{
|
||||
@@ -211,7 +212,7 @@ export const GAME_PROFILES: Record<GameId, GameProfile> = {
|
||||
{
|
||||
label: 'Server',
|
||||
// Conan: no uMod/Oxide; has RCON console, maps, players, files
|
||||
items: [NAV_SERVER, NAV_CONSOLE, NAV_PLAYERS, NAV_FILES],
|
||||
items: [NAV_FLEET, NAV_SERVER, NAV_CONSOLE, NAV_PLAYERS, NAV_FILES],
|
||||
},
|
||||
{
|
||||
label: 'Operations',
|
||||
@@ -256,7 +257,7 @@ export const GAME_PROFILES: Record<GameId, GameProfile> = {
|
||||
{
|
||||
label: 'Server',
|
||||
// Soulmask: no uMod/Oxide; has RCON+GM console, players, files
|
||||
items: [NAV_SERVER, NAV_CONSOLE, NAV_PLAYERS, NAV_FILES],
|
||||
items: [NAV_FLEET, NAV_SERVER, NAV_CONSOLE, NAV_PLAYERS, NAV_FILES],
|
||||
},
|
||||
{
|
||||
label: 'Operations',
|
||||
@@ -299,6 +300,7 @@ export const GAME_PROFILES: Record<GameId, GameProfile> = {
|
||||
label: 'Server',
|
||||
// Dune: no RCON (uses RabbitMQ); label console "Broadcast"; no maps route; no plugins
|
||||
items: [
|
||||
NAV_FLEET,
|
||||
NAV_SERVER,
|
||||
{ label: 'Broadcast', route: '/console', icon: 'radio', permission: 'console.view' },
|
||||
NAV_PLAYERS,
|
||||
|
||||
@@ -1,11 +1,28 @@
|
||||
import { createRouter, createWebHistory, type RouteRecordRaw } from 'vue-router'
|
||||
import { useAuthStore } from '@/stores/auth'
|
||||
|
||||
// Extend vue-router's RouteMeta so title/description are typed throughout
|
||||
declare module 'vue-router' {
|
||||
interface RouteMeta {
|
||||
title?: string
|
||||
description?: string
|
||||
requiresAuth?: boolean
|
||||
guest?: boolean
|
||||
superAdmin?: boolean
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Domain detection — runs once at module load
|
||||
// Env-driven so www./staging hosts route correctly; an exact-match literal
|
||||
// here once meant any non-canonical marketing host silently got the panel.
|
||||
// ---------------------------------------------------------------------------
|
||||
const hostname = typeof window !== 'undefined' ? window.location.hostname : ''
|
||||
const isMarketingDomain = hostname === 'corrosionmgmt.com'
|
||||
const marketingHosts = (import.meta.env.VITE_MARKETING_HOSTS ?? 'corrosionmgmt.com,www.corrosionmgmt.com')
|
||||
.split(',')
|
||||
.map((h: string) => h.trim().toLowerCase())
|
||||
.filter(Boolean)
|
||||
const isMarketingDomain = marketingHosts.includes(hostname.toLowerCase())
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Marketing page children — shared between both domain route sets
|
||||
@@ -15,31 +32,55 @@ const marketingChildren: RouteRecordRaw[] = [
|
||||
path: '',
|
||||
name: 'landing',
|
||||
component: () => import('@/views/marketing/LandingView.vue'),
|
||||
meta: {
|
||||
title: 'Corrosion — Game Server Operations for Self-Hosted Communities',
|
||||
description: 'Management panel for self-hosted survival game servers — Rust, Dune: Awakening, Conan Exiles, Soulmask. Wipe automation, plugins, monitoring. Bring your own server.',
|
||||
},
|
||||
},
|
||||
{
|
||||
path: 'pricing',
|
||||
name: 'pricing',
|
||||
component: () => import('@/views/marketing/PricingView.vue'),
|
||||
meta: {
|
||||
title: 'Pricing — Corrosion',
|
||||
description: 'Plans from $9.99/mo (Hobby, 1–5 servers) to Network ($99.99+/mo, 50+ servers). Non-commercial and commercial tiers. No hosting fees — bring your own server.',
|
||||
},
|
||||
},
|
||||
{
|
||||
path: 'how-it-works',
|
||||
name: 'how-it-works',
|
||||
component: () => import('@/views/marketing/HowItWorksView.vue'),
|
||||
meta: {
|
||||
title: 'How It Works — Corrosion',
|
||||
description: 'Install one host agent on Windows or Linux. It connects outbound-only to Corrosion — no inbound ports, no SSH. Manage every game instance from the browser.',
|
||||
},
|
||||
},
|
||||
{
|
||||
path: 'faq',
|
||||
name: 'faq',
|
||||
component: () => import('@/views/marketing/FaqView.vue'),
|
||||
meta: {
|
||||
title: 'FAQ — Corrosion',
|
||||
description: 'Honest answers: Corrosion is self-service (BYOS, no hosting). Support is docs + community; 1:1 at $125/hr. Supports Rust, Dune, Conan Exiles, Soulmask.',
|
||||
},
|
||||
},
|
||||
{
|
||||
path: 'roadmap',
|
||||
name: 'roadmap',
|
||||
component: () => import('@/views/marketing/RoadmapView.vue'),
|
||||
meta: {
|
||||
title: 'Roadmap — Corrosion',
|
||||
description: 'Phase 1 shipped: core control plane, auto-wiper, plugin management. In progress: Dune, Conan, Soulmask multi-game blueprints. Planned: API access, integrations.',
|
||||
},
|
||||
},
|
||||
{
|
||||
path: 'early-access',
|
||||
name: 'early-access',
|
||||
component: () => import('@/views/marketing/EarlyAccessView.vue'),
|
||||
meta: {
|
||||
title: 'Early Access — Corrosion',
|
||||
description: 'Join the early access list. Get full control plane access — wipe automation, plugin management, real-time console — and lock in launch pricing.',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@@ -53,25 +94,25 @@ const panelRoutes: RouteRecordRaw[] = [
|
||||
path: '/login',
|
||||
name: 'login',
|
||||
component: () => import('@/views/auth/LoginView.vue'),
|
||||
meta: { guest: true },
|
||||
meta: { guest: true, title: 'Sign in — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: '/register',
|
||||
name: 'register',
|
||||
component: () => import('@/views/auth/RegisterView.vue'),
|
||||
meta: { guest: true },
|
||||
meta: { guest: true, title: 'Create account — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: '/forgot-password',
|
||||
name: 'forgot-password',
|
||||
component: () => import('@/views/auth/ForgotPasswordView.vue'),
|
||||
meta: { guest: true },
|
||||
meta: { guest: true, title: 'Reset password — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: '/setup',
|
||||
name: 'setup-wizard',
|
||||
component: () => import('@/views/auth/SetupWizardView.vue'),
|
||||
meta: { requiresAuth: true },
|
||||
meta: { requiresAuth: true, title: 'Setup — Corrosion' },
|
||||
},
|
||||
|
||||
// Admin dashboard routes (with sidebar layout)
|
||||
@@ -84,217 +125,260 @@ const panelRoutes: RouteRecordRaw[] = [
|
||||
path: '',
|
||||
name: 'dashboard',
|
||||
component: () => import('@/views/admin/DashboardView.vue'),
|
||||
meta: { title: 'Dashboard — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'server',
|
||||
name: 'server',
|
||||
component: () => import('@/views/admin/ServerView.vue'),
|
||||
meta: { title: 'Server — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'console',
|
||||
name: 'console',
|
||||
component: () => import('@/views/admin/ConsoleView.vue'),
|
||||
meta: { title: 'Console — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'players',
|
||||
name: 'players',
|
||||
component: () => import('@/views/admin/PlayersView.vue'),
|
||||
meta: { title: 'Players — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'plugins',
|
||||
name: 'plugins',
|
||||
component: () => import('@/views/admin/PluginsView.vue'),
|
||||
meta: { title: 'Plugins — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'files',
|
||||
name: 'files',
|
||||
component: () => import('@/views/admin/FileManagerView.vue'),
|
||||
meta: { title: 'Files — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'plugin-configs',
|
||||
name: 'plugin-configs',
|
||||
component: () => import('@/views/admin/PluginConfigsView.vue'),
|
||||
meta: { title: 'Plugin Configs — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'loot-builder',
|
||||
name: 'loot-builder',
|
||||
component: () => import('@/views/admin/LootBuilderView.vue'),
|
||||
meta: { title: 'Loot Builder — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'teleport-config',
|
||||
name: 'teleport-config',
|
||||
component: () => import('@/views/admin/TeleportConfigView.vue'),
|
||||
meta: { title: 'Teleport Config — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'gather-manager',
|
||||
name: 'gather-manager',
|
||||
component: () => import('@/views/admin/GatherManagerView.vue'),
|
||||
meta: { title: 'Gather Manager — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'autodoors',
|
||||
name: 'autodoors',
|
||||
component: () => import('@/views/admin/AutoDoorsView.vue'),
|
||||
meta: { title: 'Auto Doors — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'kits',
|
||||
name: 'kits-config',
|
||||
component: () => import('@/views/admin/KitsView.vue'),
|
||||
meta: { title: 'Kits — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'furnace-splitter',
|
||||
name: 'furnace-splitter',
|
||||
component: () => import('@/views/admin/FurnaceSplitterView.vue'),
|
||||
meta: { title: 'Furnace Splitter — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'better-chat',
|
||||
name: 'better-chat',
|
||||
component: () => import('@/views/admin/BetterChatView.vue'),
|
||||
meta: { title: 'Better Chat — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'timed-execute',
|
||||
name: 'timed-execute',
|
||||
component: () => import('@/views/admin/TimedExecuteView.vue'),
|
||||
meta: { title: 'Timed Execute — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'raidable-bases',
|
||||
name: 'raidable-bases',
|
||||
component: () => import('@/views/admin/RaidableBasesView.vue'),
|
||||
meta: { title: 'Raidable Bases — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'wipes',
|
||||
name: 'wipes',
|
||||
component: () => import('@/views/admin/WipesView.vue'),
|
||||
meta: { title: 'Wipes — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'wipes/profiles',
|
||||
name: 'wipe-profiles',
|
||||
component: () => import('@/views/admin/WipeProfilesView.vue'),
|
||||
meta: { title: 'Wipe Profiles — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'wipes/calendar',
|
||||
name: 'wipe-calendar',
|
||||
component: () => import('@/views/admin/WipeCalendarView.vue'),
|
||||
meta: { title: 'Wipe Calendar — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'wipes/history',
|
||||
name: 'wipe-history',
|
||||
component: () => import('@/views/admin/WipeHistoryView.vue'),
|
||||
meta: { title: 'Wipe History — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'wipes/analytics',
|
||||
name: 'wipe-analytics',
|
||||
component: () => import('@/views/admin/WipeAnalyticsView.vue'),
|
||||
meta: { title: 'Wipe Analytics — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'maps',
|
||||
name: 'maps',
|
||||
component: () => import('@/views/admin/MapsView.vue'),
|
||||
meta: { title: 'Maps — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'maps/analytics',
|
||||
name: 'map-analytics',
|
||||
component: () => import('@/views/admin/MapAnalyticsView.vue'),
|
||||
meta: { title: 'Map Analytics — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'chat',
|
||||
name: 'chat',
|
||||
component: () => import('@/views/admin/ChatLogView.vue'),
|
||||
meta: { title: 'Chat Log — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'analytics',
|
||||
name: 'analytics',
|
||||
component: () => import('@/views/admin/AnalyticsView.vue'),
|
||||
meta: { title: 'Analytics — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'retention',
|
||||
name: 'retention',
|
||||
component: () => import('@/views/admin/PlayerRetentionView.vue'),
|
||||
meta: { title: 'Player Retention — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'notifications',
|
||||
name: 'notifications',
|
||||
component: () => import('@/views/admin/NotificationsView.vue'),
|
||||
meta: { title: 'Notifications — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'team',
|
||||
name: 'team',
|
||||
component: () => import('@/views/admin/TeamView.vue'),
|
||||
meta: { title: 'Team — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'store/config',
|
||||
name: 'store-config',
|
||||
component: () => import('@/views/admin/StoreConfigView.vue'),
|
||||
meta: { title: 'Store Config — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'store/items',
|
||||
name: 'store-items',
|
||||
component: () => import('@/views/admin/StoreItemsView.vue'),
|
||||
meta: { title: 'Store Items — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'store/revenue',
|
||||
name: 'store-revenue',
|
||||
component: () => import('@/views/admin/StoreRevenueView.vue'),
|
||||
meta: { title: 'Store Revenue — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'modules',
|
||||
name: 'modules',
|
||||
component: () => import('@/views/admin/ModuleStoreView.vue'),
|
||||
meta: { title: 'Modules — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'settings',
|
||||
name: 'settings',
|
||||
component: () => import('@/views/admin/SettingsView.vue'),
|
||||
meta: { title: 'Settings — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'schedules',
|
||||
name: 'schedules',
|
||||
component: () => import('@/views/admin/SchedulesView.vue'),
|
||||
meta: { title: 'Schedules — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'migration',
|
||||
name: 'migration',
|
||||
component: () => import('@/views/admin/MigrationView.vue'),
|
||||
meta: { title: 'Migration — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'changelog',
|
||||
name: 'changelog',
|
||||
component: () => import('@/views/admin/ChangelogView.vue'),
|
||||
meta: { title: 'Changelog — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'alerts',
|
||||
name: 'alerts',
|
||||
component: () => import('@/views/admin/AlertsView.vue'),
|
||||
meta: { title: 'Alerts — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'fleet',
|
||||
name: 'fleet',
|
||||
component: () => import('@/views/admin/FleetView.vue'),
|
||||
meta: { title: 'Fleet — Corrosion', requiresAuth: true },
|
||||
},
|
||||
// Platform Admin views (super-admin only)
|
||||
{
|
||||
path: 'admin',
|
||||
name: 'platform-admin',
|
||||
component: () => import('@/views/platform-admin/AdminDashboard.vue'),
|
||||
meta: { superAdmin: true },
|
||||
meta: { superAdmin: true, title: 'Admin — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'admin/licenses',
|
||||
name: 'platform-licenses',
|
||||
component: () => import('@/views/platform-admin/AdminLicenses.vue'),
|
||||
meta: { superAdmin: true },
|
||||
meta: { superAdmin: true, title: 'Admin: Licenses — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'admin/subscriptions',
|
||||
name: 'platform-subscriptions',
|
||||
component: () => import('@/views/platform-admin/AdminSubscriptions.vue'),
|
||||
meta: { superAdmin: true },
|
||||
meta: { superAdmin: true, title: 'Admin: Subscriptions — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'admin/users',
|
||||
name: 'platform-users',
|
||||
component: () => import('@/views/platform-admin/AdminUsers.vue'),
|
||||
meta: { superAdmin: true },
|
||||
meta: { superAdmin: true, title: 'Admin: Users — Corrosion' },
|
||||
},
|
||||
{
|
||||
path: 'admin/servers',
|
||||
name: 'platform-servers',
|
||||
component: () => import('@/views/platform-admin/AdminServers.vue'),
|
||||
meta: { superAdmin: true },
|
||||
meta: { superAdmin: true, title: 'Admin: Servers — Corrosion' },
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -329,6 +413,7 @@ const panelRoutes: RouteRecordRaw[] = [
|
||||
path: '/status',
|
||||
name: 'status',
|
||||
component: () => import('@/views/public/StatusPageView.vue'),
|
||||
meta: { title: 'Status — Corrosion' },
|
||||
},
|
||||
|
||||
// Catch-all
|
||||
@@ -366,6 +451,7 @@ const marketingRoutes: RouteRecordRaw[] = [
|
||||
path: '/status',
|
||||
name: 'status',
|
||||
component: () => import('@/views/public/StatusPageView.vue'),
|
||||
meta: { title: 'Status — Corrosion' },
|
||||
},
|
||||
|
||||
// Catch-all: unknown routes → landing page
|
||||
@@ -383,6 +469,38 @@ const router = createRouter({
|
||||
routes: isMarketingDomain ? marketingRoutes : panelRoutes,
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Document title + meta description/OG update on every navigation
|
||||
// ---------------------------------------------------------------------------
|
||||
function setOrClearMeta(selector: string, attr: string, value: string): void {
|
||||
let el = document.querySelector<HTMLMetaElement>(selector)
|
||||
if (!el) {
|
||||
el = document.createElement('meta')
|
||||
// Parse the selector to set the right attribute (name="..." or property="...")
|
||||
const nameMatch = selector.match(/\[name="([^"]+)"\]/)
|
||||
const propMatch = selector.match(/\[property="([^"]+)"\]/)
|
||||
if (nameMatch?.[1]) el.setAttribute('name', nameMatch[1])
|
||||
if (propMatch?.[1]) el.setAttribute('property', propMatch[1])
|
||||
document.head.appendChild(el)
|
||||
}
|
||||
el.setAttribute(attr, value)
|
||||
}
|
||||
|
||||
router.afterEach((to) => {
|
||||
// Title
|
||||
document.title = to.meta.title ?? 'Corrosion Management'
|
||||
|
||||
// Description
|
||||
const desc = to.meta.description ?? ''
|
||||
setOrClearMeta('meta[name="description"]', 'content', desc)
|
||||
|
||||
// OG title
|
||||
setOrClearMeta('meta[property="og:title"]', 'content', to.meta.title ?? 'Corrosion Management')
|
||||
|
||||
// OG description
|
||||
setOrClearMeta('meta[property="og:description"]', 'content', desc)
|
||||
})
|
||||
|
||||
// Auth guard — only meaningful on panel domain (marketing has no requiresAuth routes)
|
||||
router.beforeEach((to, _from, next) => {
|
||||
const auth = useAuthStore()
|
||||
|
||||
@@ -58,6 +58,27 @@ export const useAuthStore = defineStore('auth', () => {
|
||||
permissions.value = {}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate the persisted session against the API on app boot. Without this,
|
||||
* a stale/revoked token renders the full panel chrome and only collapses on
|
||||
* the first real API call. useApi's 401 path (refresh → retry → logout)
|
||||
* does the heavy lifting; any non-auth failure (network, 5xx) keeps the
|
||||
* session — never log users out because the API blipped.
|
||||
* Dynamic import avoids a static auth-store ↔ useApi module cycle.
|
||||
*/
|
||||
async function validateSession(): Promise<void> {
|
||||
if (!accessToken.value) return
|
||||
try {
|
||||
const { useApi } = await import('@/composables/useApi')
|
||||
const me = await useApi().get<Partial<User>>('/auth/me')
|
||||
if (user.value && me && typeof me === 'object') {
|
||||
user.value = { ...user.value, ...me }
|
||||
}
|
||||
} catch {
|
||||
// 401 → refresh → logout/redirect already handled inside useApi.
|
||||
}
|
||||
}
|
||||
|
||||
function hasModule(moduleSlug: string): boolean {
|
||||
return license.value?.modules_enabled?.includes(moduleSlug) ?? false
|
||||
}
|
||||
@@ -92,6 +113,7 @@ export const useAuthStore = defineStore('auth', () => {
|
||||
setAuth,
|
||||
setLicense,
|
||||
logout,
|
||||
validateSession,
|
||||
hasModule,
|
||||
hasPermission,
|
||||
}
|
||||
|
||||
87
frontend/src/stores/fleet.ts
Normal file
87
frontend/src/stores/fleet.ts
Normal file
@@ -0,0 +1,87 @@
|
||||
import { defineStore } from 'pinia'
|
||||
import { ref } from 'vue'
|
||||
import { useApi } from '@/composables/useApi'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types — mirrors the FleetResponseDto from the backend
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface FleetDisk {
|
||||
mount: string
|
||||
total_mb: number
|
||||
free_mb: number
|
||||
}
|
||||
|
||||
export interface FleetInstance {
|
||||
id: string
|
||||
agent_instance_id: string
|
||||
game: string
|
||||
label: string | null
|
||||
state: string
|
||||
uptime_seconds: number
|
||||
last_seen_at: string | null
|
||||
}
|
||||
|
||||
export interface FleetHost {
|
||||
id: string
|
||||
hostname: string
|
||||
status: string
|
||||
agent_version: string | null
|
||||
os: string | null
|
||||
arch: string | null
|
||||
cpu_percent: number | null
|
||||
cpu_cores: number | null
|
||||
mem_total_mb: number | null
|
||||
mem_used_mb: number | null
|
||||
uptime_seconds: number | null
|
||||
disks: FleetDisk[] | null
|
||||
last_heartbeat_at: string | null
|
||||
instances: FleetInstance[]
|
||||
}
|
||||
|
||||
export interface FleetSummary {
|
||||
host_count: number
|
||||
instance_count: number
|
||||
online_host_count: number
|
||||
}
|
||||
|
||||
export interface FleetData {
|
||||
hosts: FleetHost[]
|
||||
summary: FleetSummary
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Store
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const useFleetStore = defineStore('fleet', () => {
|
||||
const hosts = ref<FleetHost[]>([])
|
||||
const summary = ref<FleetSummary>({ host_count: 0, instance_count: 0, online_host_count: 0 })
|
||||
const loading = ref(false)
|
||||
const error = ref<string | null>(null)
|
||||
|
||||
const api = useApi()
|
||||
|
||||
async function fetchFleet() {
|
||||
loading.value = true
|
||||
error.value = null
|
||||
try {
|
||||
const data = await api.get<FleetData>('/fleet')
|
||||
hosts.value = data.hosts
|
||||
summary.value = data.summary
|
||||
} catch (e) {
|
||||
console.error('Failed to fetch fleet:', e)
|
||||
error.value = e instanceof Error ? e.message : 'Failed to load fleet data'
|
||||
} finally {
|
||||
loading.value = false
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
hosts,
|
||||
summary,
|
||||
loading,
|
||||
error,
|
||||
fetchFleet,
|
||||
}
|
||||
})
|
||||
467
frontend/src/views/admin/FleetView.vue
Normal file
467
frontend/src/views/admin/FleetView.vue
Normal file
@@ -0,0 +1,467 @@
|
||||
<script setup lang="ts">
|
||||
/**
|
||||
* FleetView — Read-only fleet overview: hosts and game instances for this license.
|
||||
*
|
||||
* Data flow: useFleetStore → GET /api/fleet → tenant-scoped AgentHost + GameInstance rows.
|
||||
*
|
||||
* Render states:
|
||||
* - loading → shows skeleton / loading text
|
||||
* - error → shows error panel (fetch failed / 401 → error state, NOT global error boundary)
|
||||
* - empty → honest empty state with CTA to /server
|
||||
* - populated → summary strip + one card per host + instance list under each
|
||||
*
|
||||
* No fabricated data. All nulls render as '—' via safeFixed/safeDate.
|
||||
*/
|
||||
import { onMounted, computed } from 'vue'
|
||||
import { useRouter } from 'vue-router'
|
||||
import { useFleetStore } from '@/stores/fleet'
|
||||
import type { FleetHost } from '@/stores/fleet'
|
||||
import { safeFixed, safeDate } from '@/utils/formatters'
|
||||
import Panel from '@/components/ds/data/Panel.vue'
|
||||
import StatCard from '@/components/ds/data/StatCard.vue'
|
||||
import Badge from '@/components/ds/core/Badge.vue'
|
||||
import StatusDot from '@/components/ds/core/StatusDot.vue'
|
||||
import Button from '@/components/ds/core/Button.vue'
|
||||
import EmptyState from '@/components/ds/feedback/EmptyState.vue'
|
||||
import Icon from '@/components/ds/core/Icon.vue'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Store / router
|
||||
// ---------------------------------------------------------------------------
|
||||
const fleet = useFleetStore()
|
||||
const router = useRouter()
|
||||
|
||||
onMounted(() => {
|
||||
fleet.fetchFleet()
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Derived state
|
||||
// ---------------------------------------------------------------------------
|
||||
const hasHosts = computed(() => fleet.hosts.length > 0)
|
||||
|
||||
/** Map host status → Badge tone */
|
||||
function hostTone(status: string): 'online' | 'offline' | 'warn' {
|
||||
if (status === 'connected') return 'online'
|
||||
if (status === 'degraded') return 'warn'
|
||||
return 'offline'
|
||||
}
|
||||
|
||||
function hostStatusLabel(status: string): string {
|
||||
if (status === 'connected') return 'Online'
|
||||
if (status === 'degraded') return 'Degraded'
|
||||
return 'Offline'
|
||||
}
|
||||
|
||||
/** Map game instance state → Badge tone */
|
||||
function instanceTone(state: string): 'online' | 'offline' | 'warn' | 'neutral' {
|
||||
if (state === 'running') return 'online'
|
||||
if (state === 'crashed') return 'offline'
|
||||
if (state === 'stopped') return 'warn'
|
||||
return 'neutral'
|
||||
}
|
||||
|
||||
/** Format uptime seconds → human-readable "Xd Xh Xm" */
|
||||
function formatUptime(seconds: number | null): string {
|
||||
if (seconds == null || seconds < 0) return '—'
|
||||
const d = Math.floor(seconds / 86400)
|
||||
const h = Math.floor((seconds % 86400) / 3600)
|
||||
const m = Math.floor((seconds % 3600) / 60)
|
||||
if (d > 0) return `${d}d ${h}h`
|
||||
if (h > 0) return `${h}h ${m}m`
|
||||
return `${m}m`
|
||||
}
|
||||
|
||||
/** Format memory used/total as "Xm / Xm" or "—" if null. */
|
||||
function formatMem(used: number | null, total: number | null): string {
|
||||
if (used == null && total == null) return '—'
|
||||
const u = used != null ? `${Math.round(used)}MB` : '—'
|
||||
const t = total != null ? `${Math.round(total)}MB` : '—'
|
||||
return `${u} / ${t}`
|
||||
}
|
||||
|
||||
/** Pick primary disk (first entry) for display. */
|
||||
function primaryDisk(host: FleetHost): string {
|
||||
if (!host.disks || host.disks.length === 0) return '—'
|
||||
const d = host.disks[0]
|
||||
if (d == null) return '—'
|
||||
const freePct = d.total_mb > 0 ? Math.round((d.free_mb / d.total_mb) * 100) : 0
|
||||
return `${d.mount} · ${freePct}% free`
|
||||
}
|
||||
|
||||
/** Last heartbeat relative time — use safeDate, then strip full timestamp for brevity. */
|
||||
function relativeHeartbeat(iso: string | null): string {
|
||||
if (!iso) return 'Never'
|
||||
return safeDate(iso)
|
||||
}
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<div class="fleet-view">
|
||||
<!-- Page header -->
|
||||
<div class="fleet-view__header">
|
||||
<div>
|
||||
<h1 class="fleet-view__title">Fleet</h1>
|
||||
<p class="fleet-view__sub">Hosts and game instances connected to this license.</p>
|
||||
</div>
|
||||
<Button variant="ghost" icon="refresh-cw" :disabled="fleet.loading" @click="fleet.fetchFleet()">
|
||||
Refresh
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<!-- Loading state -->
|
||||
<div v-if="fleet.loading && !hasHosts" class="fleet-view__loading">
|
||||
<Icon name="loader" :size="18" class="fleet-loading-icon" />
|
||||
<span>Loading fleet data…</span>
|
||||
</div>
|
||||
|
||||
<!-- Error state (API failed / 401 / network error) — honest, not global error boundary -->
|
||||
<Panel v-else-if="fleet.error && !hasHosts" title="Could not load fleet data">
|
||||
<EmptyState
|
||||
icon="wifi-off"
|
||||
title="Fleet data unavailable"
|
||||
:description="fleet.error"
|
||||
>
|
||||
<template #action>
|
||||
<Button variant="primary" @click="fleet.fetchFleet()">Try again</Button>
|
||||
</template>
|
||||
</EmptyState>
|
||||
</Panel>
|
||||
|
||||
<!-- Empty state — no hosts returned -->
|
||||
<Panel v-else-if="!fleet.loading && !fleet.error && !hasHosts">
|
||||
<EmptyState
|
||||
icon="server"
|
||||
title="No hosts connected yet"
|
||||
description="Install the Corrosion host agent on your server machine to see it here."
|
||||
>
|
||||
<template #action>
|
||||
<Button variant="primary" @click="router.push('/server')">Go to Server page</Button>
|
||||
</template>
|
||||
</EmptyState>
|
||||
</Panel>
|
||||
|
||||
<!-- Populated fleet -->
|
||||
<template v-else>
|
||||
<!-- Summary strip -->
|
||||
<div class="fleet-view__summary">
|
||||
<StatCard
|
||||
label="Total hosts"
|
||||
:value="fleet.summary.host_count"
|
||||
icon="server"
|
||||
/>
|
||||
<StatCard
|
||||
label="Online hosts"
|
||||
:value="fleet.summary.online_host_count"
|
||||
icon="activity"
|
||||
/>
|
||||
<StatCard
|
||||
label="Game instances"
|
||||
:value="fleet.summary.instance_count"
|
||||
icon="layers"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<!-- Host cards -->
|
||||
<div class="fleet-view__hosts">
|
||||
<Panel
|
||||
v-for="host in fleet.hosts"
|
||||
:key="host.id"
|
||||
class="fleet-host"
|
||||
>
|
||||
<!-- Host header -->
|
||||
<template #default>
|
||||
<div class="fleet-host__head">
|
||||
<div class="fleet-host__identity">
|
||||
<StatusDot :tone="hostTone(host.status)" :pulse="host.status === 'connected'" :size="9" />
|
||||
<span class="fleet-host__name">{{ host.hostname }}</span>
|
||||
<Badge :tone="hostTone(host.status)" :dot="false">{{ hostStatusLabel(host.status) }}</Badge>
|
||||
</div>
|
||||
<div class="fleet-host__meta">
|
||||
<span class="fleet-host__meta-item" v-if="host.agent_version">
|
||||
<Icon name="zap" :size="12" />v{{ host.agent_version }}
|
||||
</span>
|
||||
<span class="fleet-host__meta-item" v-if="host.os || host.arch">
|
||||
<Icon name="cpu" :size="12" />{{ [host.os, host.arch].filter(Boolean).join(' / ') }}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Host metrics row -->
|
||||
<div class="fleet-host__metrics">
|
||||
<div class="fleet-metric">
|
||||
<span class="fleet-metric__label">CPU</span>
|
||||
<span class="fleet-metric__value">
|
||||
{{ host.cpu_percent != null ? safeFixed(host.cpu_percent, 1) + '%' : '—' }}
|
||||
<span v-if="host.cpu_cores" class="fleet-metric__sub">{{ host.cpu_cores }} cores</span>
|
||||
</span>
|
||||
</div>
|
||||
<div class="fleet-metric">
|
||||
<span class="fleet-metric__label">Memory</span>
|
||||
<span class="fleet-metric__value">{{ formatMem(host.mem_used_mb, host.mem_total_mb) }}</span>
|
||||
</div>
|
||||
<div class="fleet-metric">
|
||||
<span class="fleet-metric__label">Disk</span>
|
||||
<span class="fleet-metric__value">{{ primaryDisk(host) }}</span>
|
||||
</div>
|
||||
<div class="fleet-metric">
|
||||
<span class="fleet-metric__label">Uptime</span>
|
||||
<span class="fleet-metric__value">{{ formatUptime(host.uptime_seconds) }}</span>
|
||||
</div>
|
||||
<div class="fleet-metric">
|
||||
<span class="fleet-metric__label">Last heartbeat</span>
|
||||
<span class="fleet-metric__value fleet-metric__value--sm">{{ relativeHeartbeat(host.last_heartbeat_at) }}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Instance list -->
|
||||
<div v-if="host.instances.length > 0" class="fleet-host__instances">
|
||||
<div class="fleet-instances__label t-eyebrow">Game instances ({{ host.instances.length }})</div>
|
||||
<div class="fleet-instances__list">
|
||||
<div
|
||||
v-for="inst in host.instances"
|
||||
:key="inst.id"
|
||||
class="fleet-instance"
|
||||
>
|
||||
<StatusDot :tone="instanceTone(inst.state)" :size="7" />
|
||||
<span class="fleet-instance__game">{{ inst.game }}</span>
|
||||
<span v-if="inst.label" class="fleet-instance__label">{{ inst.label }}</span>
|
||||
<Badge :tone="instanceTone(inst.state)" class="fleet-instance__badge">
|
||||
{{ inst.state }}
|
||||
</Badge>
|
||||
<span class="fleet-instance__uptime">{{ formatUptime(inst.uptime_seconds) }}</span>
|
||||
<span class="fleet-instance__seen">{{ safeDate(inst.last_seen_at) }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- No instances under this host -->
|
||||
<div v-else class="fleet-host__no-instances">
|
||||
<Icon name="layers" :size="13" />
|
||||
<span>No game instances reported</span>
|
||||
</div>
|
||||
</template>
|
||||
</Panel>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<style scoped>
|
||||
/* ---- Page shell ---- */
|
||||
.fleet-view {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 20px;
|
||||
padding: 24px;
|
||||
max-width: 1100px;
|
||||
}
|
||||
|
||||
.fleet-view__header {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.fleet-view__title {
|
||||
font-size: var(--text-xl);
|
||||
font-weight: 700;
|
||||
color: var(--text-primary);
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.fleet-view__sub {
|
||||
font-size: var(--text-sm);
|
||||
color: var(--text-tertiary);
|
||||
margin: 2px 0 0;
|
||||
}
|
||||
|
||||
.fleet-view__loading {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
color: var(--text-tertiary);
|
||||
font-size: var(--text-sm);
|
||||
padding: 32px 0;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
.fleet-loading-icon {
|
||||
animation: spin 1s linear infinite;
|
||||
color: var(--accent);
|
||||
}
|
||||
|
||||
/* ---- Summary strip ---- */
|
||||
.fleet-view__summary {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(3, 1fr);
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
@media (max-width: 640px) {
|
||||
.fleet-view__summary { grid-template-columns: 1fr; }
|
||||
}
|
||||
|
||||
/* ---- Host list ---- */
|
||||
.fleet-view__hosts {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 14px;
|
||||
}
|
||||
|
||||
/* ---- Host card internals ---- */
|
||||
.fleet-host__head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
flex-wrap: wrap;
|
||||
padding: 14px 16px 12px;
|
||||
}
|
||||
|
||||
.fleet-host__identity {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 9px;
|
||||
}
|
||||
|
||||
.fleet-host__name {
|
||||
font-weight: 600;
|
||||
font-size: var(--text-base);
|
||||
color: var(--text-primary);
|
||||
font-family: var(--font-mono);
|
||||
}
|
||||
|
||||
.fleet-host__meta {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 12px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.fleet-host__meta-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
font-size: var(--text-xs);
|
||||
color: var(--text-tertiary);
|
||||
}
|
||||
|
||||
/* ---- Metrics row ---- */
|
||||
.fleet-host__metrics {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0;
|
||||
border-top: 1px solid var(--border-subtle);
|
||||
border-bottom: 1px solid var(--border-subtle);
|
||||
}
|
||||
|
||||
.fleet-metric {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 2px;
|
||||
padding: 10px 16px;
|
||||
border-right: 1px solid var(--border-subtle);
|
||||
flex: 1;
|
||||
min-width: 110px;
|
||||
}
|
||||
.fleet-metric:last-child { border-right: none; }
|
||||
|
||||
.fleet-metric__label {
|
||||
font-size: var(--text-xs);
|
||||
color: var(--text-tertiary);
|
||||
font-weight: 500;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.04em;
|
||||
}
|
||||
|
||||
.fleet-metric__value {
|
||||
font-family: var(--font-mono);
|
||||
font-size: var(--text-sm);
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 5px;
|
||||
}
|
||||
|
||||
.fleet-metric__value--sm {
|
||||
font-size: 11px;
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
.fleet-metric__sub {
|
||||
font-size: var(--text-xs);
|
||||
color: var(--text-muted);
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
/* ---- Instance list ---- */
|
||||
.fleet-host__instances {
|
||||
padding: 12px 16px 14px;
|
||||
}
|
||||
|
||||
.fleet-instances__label {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.fleet-instances__list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.fleet-instance {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 9px;
|
||||
padding: 7px 10px;
|
||||
background: var(--surface-raised-2);
|
||||
border-radius: var(--radius-sm);
|
||||
font-size: var(--text-sm);
|
||||
}
|
||||
|
||||
.fleet-instance__game {
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
min-width: 60px;
|
||||
}
|
||||
|
||||
.fleet-instance__label {
|
||||
color: var(--text-secondary);
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.fleet-instance__badge {
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.fleet-instance__uptime {
|
||||
font-family: var(--font-mono);
|
||||
font-size: var(--text-xs);
|
||||
color: var(--text-tertiary);
|
||||
min-width: 48px;
|
||||
}
|
||||
|
||||
.fleet-instance__seen {
|
||||
font-size: var(--text-xs);
|
||||
color: var(--text-muted);
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
/* ---- No instances ---- */
|
||||
.fleet-host__no-instances {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
padding: 12px 16px 14px;
|
||||
font-size: var(--text-sm);
|
||||
color: var(--text-muted);
|
||||
}
|
||||
</style>
|
||||
85
scripts/generate-nats-auth.mjs
Normal file
85
scripts/generate-nats-auth.mjs
Normal file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env node
|
||||
// Generate corrosion-nats authorization config from the licenses table.
|
||||
//
|
||||
// Per-license isolation without auth-callout: each license maps to a NATS user
|
||||
// (user = license UUID, password = HMAC-SHA256(license_id, NATS_TOKEN_SECRET))
|
||||
// whose publish/subscribe is restricted to corrosion.{license_id}.> (+ _INBOX
|
||||
// for request-reply). The backend uses a privileged internal user.
|
||||
//
|
||||
// STAGING (NATS_AUTH_STAGE env):
|
||||
// "open" (default) — defines a full-access `anonymous` user and sets
|
||||
// no_auth_user, so unauthenticated clients still work.
|
||||
// Non-breaking; lets you verify real creds first.
|
||||
// "enforce" — omits no_auth_user; anonymous connections are rejected.
|
||||
//
|
||||
// Usage:
|
||||
// DATABASE_URL=... NATS_INTERNAL_USER=... NATS_INTERNAL_PASSWORD=... \
|
||||
// NATS_TOKEN_SECRET=... NATS_AUTH_STAGE=open node scripts/generate-nats-auth.mjs > docker/nats-auth.conf
|
||||
//
|
||||
// Re-run and reload NATS (`docker exec corrosion-nats nats-server --signal reload`)
|
||||
// whenever licenses change.
|
||||
|
||||
import { createRequire } from 'node:module';
|
||||
import { createHmac } from 'node:crypto';
|
||||
|
||||
const require = createRequire(new URL('../backend-nest/x.js', import.meta.url));
|
||||
const { Client } = require('pg');
|
||||
|
||||
const {
|
||||
DATABASE_URL,
|
||||
NATS_INTERNAL_USER,
|
||||
NATS_INTERNAL_PASSWORD,
|
||||
NATS_TOKEN_SECRET,
|
||||
NATS_AUTH_STAGE = 'open',
|
||||
} = process.env;
|
||||
|
||||
for (const [k, v] of Object.entries({ DATABASE_URL, NATS_INTERNAL_USER, NATS_INTERNAL_PASSWORD, NATS_TOKEN_SECRET })) {
|
||||
if (!v) { console.error(`Missing required env: ${k}`); process.exit(2); }
|
||||
}
|
||||
|
||||
/** Per-license agent password — must match the backend's derivation. */
|
||||
export function licensePassword(licenseId, secret) {
|
||||
return createHmac('sha256', secret).update(licenseId).digest('hex');
|
||||
}
|
||||
|
||||
const esc = (s) => String(s).replace(/\\/g, '\\\\').replace(/"/g, '\\"');
|
||||
|
||||
const main = async () => {
|
||||
const pg = new Client({ connectionString: DATABASE_URL });
|
||||
await pg.connect();
|
||||
const { rows } = await pg.query('SELECT id FROM licenses ORDER BY created_at');
|
||||
await pg.end();
|
||||
|
||||
const lines = [];
|
||||
lines.push('# GENERATED by scripts/generate-nats-auth.mjs — do not edit by hand.');
|
||||
lines.push(`# stage=${NATS_AUTH_STAGE} licenses=${rows.length}`);
|
||||
lines.push('authorization {');
|
||||
lines.push(' users: [');
|
||||
// Privileged internal user — the backend (full corrosion.> + _INBOX + _SYS).
|
||||
lines.push(` { user: "${esc(NATS_INTERNAL_USER)}", password: "${esc(NATS_INTERNAL_PASSWORD)}", permissions: { publish: ">", subscribe: ">" } }`);
|
||||
|
||||
// Per-license scoped users.
|
||||
for (const { id } of rows) {
|
||||
const pw = licensePassword(id, NATS_TOKEN_SECRET);
|
||||
const scope = `corrosion.${id}.>`;
|
||||
lines.push(
|
||||
` { user: "${esc(id)}", password: "${esc(pw)}", permissions: { ` +
|
||||
`publish: { allow: ["${scope}", "_INBOX.>"] }, ` +
|
||||
`subscribe: { allow: ["${scope}", "_INBOX.>"] } } }`,
|
||||
);
|
||||
}
|
||||
|
||||
if (NATS_AUTH_STAGE === 'open') {
|
||||
// Transition: unauthenticated clients map to a full-access user so nothing
|
||||
// breaks while real credentials roll out. Remove for enforcement.
|
||||
lines.push(' { user: "anonymous", password: "", permissions: { publish: ">", subscribe: ">" } }');
|
||||
}
|
||||
lines.push(' ]');
|
||||
if (NATS_AUTH_STAGE === 'open') {
|
||||
lines.push(' no_auth_user: "anonymous"');
|
||||
}
|
||||
lines.push('}');
|
||||
process.stdout.write(lines.join('\n') + '\n');
|
||||
};
|
||||
|
||||
main().catch((e) => { console.error(e); process.exit(1); });
|
||||
Reference in New Issue
Block a user