Compare commits
78 Commits
redesign/d
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9c9c7a8a97 | ||
|
|
907cfcb428 | ||
|
|
b1961df18e | ||
|
|
cfdec62a1d | ||
|
|
e510f8b005 | ||
|
|
cf1f1dea9a | ||
|
|
2e72850b97 | ||
|
|
9f9785fc09 | ||
|
|
142ba21113 | ||
|
|
04e664045b | ||
|
|
cef95540fc | ||
|
|
7f2207bc28 | ||
|
|
57858a1e1c | ||
|
|
5b323137e0 | ||
|
|
4d455918f5 | ||
|
|
a1768bdd2a | ||
|
|
0effaaf86c | ||
|
|
55c9893131 | ||
|
|
62bc9cd2a3 | ||
|
|
e23b6a7e69 | ||
|
|
215355d1cb | ||
|
|
440474290b | ||
|
|
6f783bfac8 | ||
|
|
f2ea415840 | ||
|
|
d13f2cb8b1 | ||
|
|
651a35d4be | ||
|
|
0715492ddf | ||
|
|
4ef5db5b0d | ||
|
|
bb71763714 | ||
|
|
f18b45e3f2 | ||
|
|
702de24e28 | ||
|
|
6b3e805ac2 | ||
|
|
7c84912ff5 | ||
|
|
355a53f6e3 | ||
|
|
589516a021 | ||
|
|
f60e6abd33 | ||
|
|
877fadcb6c | ||
|
|
e897a4802f | ||
|
|
c0b20f2f78 | ||
|
|
06e832fca1 | ||
|
|
009ceb86ad | ||
|
|
6f31c41dc3 | ||
|
|
99433a09d1 | ||
|
|
b442ef4102 | ||
|
|
856106174a | ||
|
|
463908b18e | ||
|
|
00cff51ce5 | ||
|
|
7a07d600e7 | ||
|
|
4a4ae7a5d4 | ||
|
|
930f655bf5 | ||
|
|
700dc2254d | ||
|
|
7fdca2cd4f | ||
|
|
18f978dde1 | ||
|
|
9e5e828c8d | ||
|
|
fccd5c61c5 | ||
|
|
c72a280361 | ||
|
|
a3b4b5cc7d | ||
|
|
4e184ca571 | ||
|
|
fde0926d52 | ||
|
|
4d99c9d99d | ||
|
|
b8f0ccba3c | ||
|
|
068a476f39 | ||
|
|
f706c3c47e | ||
|
|
4c9c322c29 | ||
|
|
47fa72763c | ||
|
|
b455bf9f14 | ||
|
|
4abf0ab889 | ||
|
|
cea3d66cdd | ||
|
|
1abe57ca40 | ||
|
|
a8722a7a07 | ||
|
|
180631989a | ||
|
|
23decd9b08 | ||
|
|
8b84bba165 | ||
|
|
9a5b93dd08 | ||
|
|
3545e6f5c8 | ||
|
|
1edaaf985d | ||
|
|
f2b09b281a | ||
|
|
be57d2839a |
@@ -42,3 +42,6 @@ FRONTEND_URL=http://localhost:5174
|
||||
|
||||
# Frontend (Vite — must be prefixed with VITE_)
|
||||
VITE_PANEL_URL=https://panel.corrosionmgmt.com
|
||||
|
||||
# Hostnames that serve the marketing site (comma-separated); all other hosts get the panel
|
||||
VITE_MARKETING_HOSTS=corrosionmgmt.com,www.corrosionmgmt.com
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Build Companion Agent
|
||||
name: Build Host Agent
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -26,19 +26,19 @@ jobs:
|
||||
run: |
|
||||
cd companion-agent
|
||||
mkdir -p bin
|
||||
GOOS=linux GOARCH=amd64 go build -ldflags "-s -w -X main.version=${{ steps.version.outputs.VERSION }}" -o bin/corrosion-companion-linux-amd64 ./cmd/agent
|
||||
chmod +x bin/corrosion-companion-linux-amd64
|
||||
GOOS=linux GOARCH=amd64 go build -ldflags "-s -w -X main.version=${{ steps.version.outputs.VERSION }}" -o bin/corrosion-host-agent-linux-amd64 ./cmd/agent
|
||||
chmod +x bin/corrosion-host-agent-linux-amd64
|
||||
|
||||
- name: Build Windows AMD64
|
||||
run: |
|
||||
cd companion-agent
|
||||
GOOS=windows GOARCH=amd64 go build -ldflags "-s -w -X main.version=${{ steps.version.outputs.VERSION }}" -o bin/corrosion-companion-windows-amd64.exe ./cmd/agent
|
||||
GOOS=windows GOARCH=amd64 go build -ldflags "-s -w -X main.version=${{ steps.version.outputs.VERSION }}" -o bin/corrosion-host-agent-windows-amd64.exe ./cmd/agent
|
||||
|
||||
- name: Generate checksums
|
||||
run: |
|
||||
cd companion-agent/bin
|
||||
sha256sum corrosion-companion-linux-amd64 > checksums.txt
|
||||
sha256sum corrosion-companion-windows-amd64.exe >> checksums.txt
|
||||
sha256sum corrosion-host-agent-linux-amd64 > checksums.txt
|
||||
sha256sum corrosion-host-agent-windows-amd64.exe >> checksums.txt
|
||||
cat checksums.txt
|
||||
|
||||
- name: Create Release
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
RESPONSE=$(curl -s -X POST \
|
||||
-H "Authorization: token ${RELEASE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"tag_name\": \"${VERSION}\", \"name\": \"Companion Agent ${VERSION}\", \"body\": \"Companion Agent release ${VERSION}\", \"draft\": false, \"prerelease\": false}" \
|
||||
-d "{\"tag_name\": \"${VERSION}\", \"name\": \"Corrosion Host Agent ${VERSION}\", \"body\": \"Corrosion Host Agent release ${VERSION}\", \"draft\": false, \"prerelease\": false}" \
|
||||
"${API_URL}/repos/${REPO}/releases")
|
||||
RELEASE_ID=$(echo "$RESPONSE" | grep -o '"id":[0-9]*' | head -1 | grep -o '[0-9]*')
|
||||
|
||||
@@ -68,15 +68,15 @@ jobs:
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token ${RELEASE_TOKEN}" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary @companion-agent/bin/corrosion-companion-linux-amd64 \
|
||||
"${API_URL}/repos/${REPO}/releases/${RELEASE_ID}/assets?name=corrosion-companion-linux-amd64"
|
||||
--data-binary @companion-agent/bin/corrosion-host-agent-linux-amd64 \
|
||||
"${API_URL}/repos/${REPO}/releases/${RELEASE_ID}/assets?name=corrosion-host-agent-linux-amd64"
|
||||
|
||||
# Upload Windows binary
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token ${RELEASE_TOKEN}" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary @companion-agent/bin/corrosion-companion-windows-amd64.exe \
|
||||
"${API_URL}/repos/${REPO}/releases/${RELEASE_ID}/assets?name=corrosion-companion-windows-amd64.exe"
|
||||
--data-binary @companion-agent/bin/corrosion-host-agent-windows-amd64.exe \
|
||||
"${API_URL}/repos/${REPO}/releases/${RELEASE_ID}/assets?name=corrosion-host-agent-windows-amd64.exe"
|
||||
|
||||
# Upload checksums
|
||||
curl -s -X POST \
|
||||
@@ -89,43 +89,43 @@ jobs:
|
||||
run: |
|
||||
CDN_URL="https://cdn.corrosionmgmt.com"
|
||||
|
||||
# Upload Linux binary to /companion/latest/
|
||||
# Upload Linux binary to /host-agent/latest/
|
||||
curl -s -X POST \
|
||||
-F "file=@companion-agent/bin/corrosion-companion-linux-amd64" \
|
||||
"${CDN_URL}/companion/latest/corrosion-companion-linux-amd64"
|
||||
-F "file=@companion-agent/bin/corrosion-host-agent-linux-amd64" \
|
||||
"${CDN_URL}/host-agent/latest/corrosion-host-agent-linux-amd64"
|
||||
|
||||
# Upload Windows binary to /companion/latest/
|
||||
# Upload Windows binary to /host-agent/latest/
|
||||
curl -s -X POST \
|
||||
-F "file=@companion-agent/bin/corrosion-companion-windows-amd64.exe" \
|
||||
"${CDN_URL}/companion/latest/corrosion-companion-windows-amd64.exe"
|
||||
-F "file=@companion-agent/bin/corrosion-host-agent-windows-amd64.exe" \
|
||||
"${CDN_URL}/host-agent/latest/corrosion-host-agent-windows-amd64.exe"
|
||||
|
||||
# Upload checksums
|
||||
curl -s -X POST \
|
||||
-F "file=@companion-agent/bin/checksums.txt" \
|
||||
"${CDN_URL}/companion/latest/checksums.txt"
|
||||
"${CDN_URL}/host-agent/latest/checksums.txt"
|
||||
|
||||
# Also upload versioned copies
|
||||
VERSION=${{ steps.version.outputs.VERSION }}
|
||||
curl -s -X POST \
|
||||
-F "file=@companion-agent/bin/corrosion-companion-linux-amd64" \
|
||||
"${CDN_URL}/companion/${VERSION}/corrosion-companion-linux-amd64"
|
||||
-F "file=@companion-agent/bin/corrosion-host-agent-linux-amd64" \
|
||||
"${CDN_URL}/host-agent/${VERSION}/corrosion-host-agent-linux-amd64"
|
||||
curl -s -X POST \
|
||||
-F "file=@companion-agent/bin/corrosion-companion-windows-amd64.exe" \
|
||||
"${CDN_URL}/companion/${VERSION}/corrosion-companion-windows-amd64.exe"
|
||||
-F "file=@companion-agent/bin/corrosion-host-agent-windows-amd64.exe" \
|
||||
"${CDN_URL}/host-agent/${VERSION}/corrosion-host-agent-windows-amd64.exe"
|
||||
curl -s -X POST \
|
||||
-F "file=@companion-agent/bin/checksums.txt" \
|
||||
"${CDN_URL}/companion/${VERSION}/checksums.txt"
|
||||
"${CDN_URL}/host-agent/${VERSION}/checksums.txt"
|
||||
|
||||
echo "CDN upload complete: ${CDN_URL}/companion/latest/"
|
||||
echo "CDN upload complete: ${CDN_URL}/host-agent/latest/"
|
||||
|
||||
- name: Build Summary
|
||||
run: |
|
||||
echo "## Companion Agent Build Complete" >> $GITHUB_STEP_SUMMARY
|
||||
echo "## Corrosion Host Agent Build Complete" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Version:** ${{ steps.version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Commit:** ${GITHUB_SHA:0:7}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Built Artifacts:" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Linux AMD64 ($(stat -c%s companion-agent/bin/corrosion-companion-linux-amd64) bytes)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Windows AMD64 ($(stat -c%s companion-agent/bin/corrosion-companion-windows-amd64.exe) bytes)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Linux AMD64 ($(stat -c%s companion-agent/bin/corrosion-host-agent-linux-amd64) bytes)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Windows AMD64 ($(stat -c%s companion-agent/bin/corrosion-host-agent-windows-amd64.exe) bytes)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- SHA256 checksums" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
161
.gitea/workflows/build-host-agent.yml
Normal file
161
.gitea/workflows/build-host-agent.yml
Normal file
@@ -0,0 +1,161 @@
|
||||
name: Build Host Agent (Rust)
|
||||
|
||||
# Rust agent ships on its own tag namespace (agent-v*) so it never collides
|
||||
# with the legacy Go pipeline (v*.*.*). Artifacts publish to the CDN /alpha/
|
||||
# channel — /host-agent/latest/ stays on the Go build until cutover.
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'agent-v*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
# Override the macOS toolchain names in corrosion-host-agent/.cargo/config.toml
|
||||
# (real env beats the config [env] table).
|
||||
CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER: musl-gcc
|
||||
CC_x86_64_unknown_linux_musl: musl-gcc
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get version from tag
|
||||
id: version
|
||||
run: echo "VERSION=${GITHUB_REF#refs/tags/agent-v}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Verify tag matches Cargo.toml
|
||||
run: |
|
||||
CARGO_VERSION=$(grep '^version' corrosion-host-agent/Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
|
||||
if [ "${{ steps.version.outputs.VERSION }}" != "$CARGO_VERSION" ]; then
|
||||
echo "Tag agent-v${{ steps.version.outputs.VERSION }} does not match Cargo.toml version $CARGO_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# The Asgard runner executes jobs in a bare node:20-bullseye container
|
||||
# (no Rust, no sudo, runs as root) — bootstrap the toolchain per-run,
|
||||
# same pattern as actions/setup-go in the Go pipeline.
|
||||
- name: Install Rust + cross toolchains
|
||||
run: |
|
||||
apt-get update -qq
|
||||
apt-get install -y -qq build-essential musl-tools gcc-mingw-w64-x86-64 curl
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
"$HOME/.cargo/bin/rustup" target add x86_64-unknown-linux-musl x86_64-pc-windows-gnu
|
||||
|
||||
- name: Build Linux AMD64 (static musl)
|
||||
run: |
|
||||
cd corrosion-host-agent
|
||||
cargo build --release --target x86_64-unknown-linux-musl
|
||||
mkdir -p bin
|
||||
cp target/x86_64-unknown-linux-musl/release/corrosion-host-agent bin/corrosion-host-agent-linux-amd64
|
||||
chmod +x bin/corrosion-host-agent-linux-amd64
|
||||
|
||||
- name: Build Windows AMD64 (mingw)
|
||||
run: |
|
||||
cd corrosion-host-agent
|
||||
cargo build --release --target x86_64-pc-windows-gnu
|
||||
cp target/x86_64-pc-windows-gnu/release/corrosion-host-agent.exe bin/corrosion-host-agent-windows-amd64.exe
|
||||
|
||||
- name: Generate checksums
|
||||
run: |
|
||||
cd corrosion-host-agent/bin
|
||||
sha256sum corrosion-host-agent-linux-amd64 > checksums.txt
|
||||
sha256sum corrosion-host-agent-windows-amd64.exe >> checksums.txt
|
||||
cat checksums.txt
|
||||
|
||||
- name: Sign artifacts (minisign)
|
||||
env:
|
||||
MINISIGN_SECRET_KEY: ${{ secrets.MINISIGN_SECRET_KEY }}
|
||||
run: |
|
||||
if [ -z "$MINISIGN_SECRET_KEY" ]; then
|
||||
echo "::error::MINISIGN_SECRET_KEY secret is not set — refusing to publish unsigned agent artifacts."
|
||||
exit 1
|
||||
fi
|
||||
# minisign isn't packaged for bullseye — fetch the official static binary.
|
||||
curl -sSL https://github.com/jedisct1/minisign/releases/download/0.12/minisign-0.12-linux.tar.gz -o /tmp/minisign.tgz
|
||||
tar -xzf /tmp/minisign.tgz -C /tmp
|
||||
MINISIGN="$(find /tmp -type f -name minisign -path '*linux*' | head -1)"
|
||||
chmod +x "$MINISIGN"
|
||||
"$MINISIGN" -v
|
||||
# A minisign secret key file is TWO lines (comment + base64 blob). CI
|
||||
# secret storage mangles embedded newlines, collapsing it to one line
|
||||
# so minisign can't load it. Preferred form: store the secret
|
||||
# base64-encoded (single line) — we decode it here. Auto-detect so a
|
||||
# correctly-stored raw two-line key still works.
|
||||
if printf '%s' "$MINISIGN_SECRET_KEY" | base64 -d 2>/dev/null | head -1 | grep -q "untrusted comment:"; then
|
||||
printf '%s' "$MINISIGN_SECRET_KEY" | base64 -d > /tmp/sign.key
|
||||
else
|
||||
printf '%s\n' "$MINISIGN_SECRET_KEY" > /tmp/sign.key
|
||||
fi
|
||||
if ! head -1 /tmp/sign.key | grep -q "untrusted comment:"; then
|
||||
echo "::error::MINISIGN_SECRET_KEY is neither base64 of a minisign key nor a raw two-line key file. Store it as: base64 < your-secret.key | tr -d '\n'"
|
||||
rm -f /tmp/sign.key
|
||||
exit 1
|
||||
fi
|
||||
cd corrosion-host-agent/bin
|
||||
# Passwordless key (-W generated); feed empty stdin so it never blocks.
|
||||
for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-windows-amd64.exe checksums.txt; do
|
||||
"$MINISIGN" -S -s /tmp/sign.key -m "$f" -x "$f.minisig" < /dev/null
|
||||
done
|
||||
rm -f /tmp/sign.key
|
||||
echo "signed: $(ls *.minisig)"
|
||||
|
||||
- name: Create Release
|
||||
env:
|
||||
RELEASE_TOKEN: ${{ secrets.RELEASE_TOKEN }}
|
||||
run: |
|
||||
API_URL="${{ github.server_url }}/api/v1"
|
||||
REPO="${{ github.repository }}"
|
||||
VERSION="agent-v${{ steps.version.outputs.VERSION }}"
|
||||
|
||||
RESPONSE=$(curl -s -X POST \
|
||||
-H "Authorization: token ${RELEASE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"tag_name\": \"${VERSION}\", \"name\": \"Corrosion Host Agent ${VERSION}\", \"body\": \"Rust host agent release ${VERSION}\", \"draft\": false, \"prerelease\": true}" \
|
||||
"${API_URL}/repos/${REPO}/releases")
|
||||
RELEASE_ID=$(echo "$RESPONSE" | grep -o '"id":[0-9]*' | head -1 | grep -o '[0-9]*')
|
||||
|
||||
for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-linux-amd64.minisig \
|
||||
corrosion-host-agent-windows-amd64.exe corrosion-host-agent-windows-amd64.exe.minisig \
|
||||
checksums.txt checksums.txt.minisig; do
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token ${RELEASE_TOKEN}" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary @corrosion-host-agent/bin/$f \
|
||||
"${API_URL}/repos/${REPO}/releases/${RELEASE_ID}/assets?name=$f"
|
||||
done
|
||||
|
||||
- name: Upload to CDN (alpha channel)
|
||||
run: |
|
||||
CDN_URL="https://cdn.corrosionmgmt.com"
|
||||
VERSION="${{ steps.version.outputs.VERSION }}"
|
||||
|
||||
for f in corrosion-host-agent-linux-amd64 corrosion-host-agent-linux-amd64.minisig \
|
||||
corrosion-host-agent-windows-amd64.exe corrosion-host-agent-windows-amd64.exe.minisig \
|
||||
checksums.txt checksums.txt.minisig; do
|
||||
curl -s -X POST \
|
||||
-F "file=@corrosion-host-agent/bin/$f" \
|
||||
"${CDN_URL}/host-agent/alpha/$f"
|
||||
curl -s -X POST \
|
||||
-F "file=@corrosion-host-agent/bin/$f" \
|
||||
"${CDN_URL}/host-agent/${VERSION}/$f"
|
||||
done
|
||||
|
||||
echo "CDN upload complete: ${CDN_URL}/host-agent/alpha/"
|
||||
|
||||
- name: Build Summary
|
||||
run: |
|
||||
echo "## Corrosion Host Agent (Rust) Build Complete" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Version:** ${{ steps.version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Commit:** ${GITHUB_SHA:0:7}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Channel:** alpha (latest/ untouched until cutover)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Built Artifacts:" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Linux AMD64 static musl ($(stat -c%s corrosion-host-agent/bin/corrosion-host-agent-linux-amd64) bytes)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Windows AMD64 mingw ($(stat -c%s corrosion-host-agent/bin/corrosion-host-agent-windows-amd64.exe) bytes)" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- SHA256 checksums" >> $GITHUB_STEP_SUMMARY
|
||||
122
.gitea/workflows/ci.yml
Normal file
122
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,122 @@
|
||||
name: CI
|
||||
|
||||
# Test gate for every push to main. The deploy story: main must be green here
|
||||
# before the stack is rebuilt (deploy workflow enforces it once SSH transport
|
||||
# secrets land). Jobs run in the runner's bare node:20-bullseye container —
|
||||
# toolchains bootstrap per-run.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
backend-types:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Type-check NestJS backend
|
||||
run: |
|
||||
cd backend-nest
|
||||
npm ci --no-audit --no-fund 2>&1 | tail -2
|
||||
npx tsc --noEmit
|
||||
|
||||
frontend-build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build frontend (vue-tsc gate + vite)
|
||||
run: |
|
||||
cd frontend
|
||||
npm ci --no-audit --no-fund 2>&1 | tail -2
|
||||
npm run build
|
||||
|
||||
agent-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Cache cargo
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
corrosion-host-agent/target
|
||||
key: cargo-${{ hashFiles('corrosion-host-agent/Cargo.lock') }}
|
||||
- name: Install Rust
|
||||
run: |
|
||||
apt-get update -qq && apt-get install -y -qq build-essential curl
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
- name: Test agent
|
||||
run: |
|
||||
cd corrosion-host-agent
|
||||
cargo test
|
||||
- name: Upload agent binary for integration
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: agent-debug
|
||||
path: corrosion-host-agent/target/debug/corrosion-host-agent
|
||||
|
||||
integration:
|
||||
runs-on: ubuntu-latest
|
||||
needs: agent-tests
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16
|
||||
env:
|
||||
POSTGRES_USER: corrosion
|
||||
POSTGRES_PASSWORD: citest
|
||||
POSTGRES_DB: corrosion
|
||||
nats:
|
||||
image: nats:2.10-alpine
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download agent binary
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: agent-debug
|
||||
path: agent-bin
|
||||
|
||||
- name: Apply migrations to fresh DB
|
||||
run: |
|
||||
apt-get update -qq && apt-get install -y -qq postgresql-client
|
||||
until PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -c 'SELECT 1' >/dev/null 2>&1; do sleep 1; done
|
||||
for f in $(ls backend/migrations/*.sql | sort); do
|
||||
echo "applying $f"
|
||||
PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -v ON_ERROR_STOP=1 -q -f "$f"
|
||||
done
|
||||
|
||||
- name: Build + boot backend
|
||||
run: |
|
||||
cd backend-nest
|
||||
npm ci --no-audit --no-fund 2>&1 | tail -2
|
||||
npm run build
|
||||
DATABASE_URL=postgres://corrosion:citest@postgres:5432/corrosion \
|
||||
NATS_URL=nats://nats:4222 \
|
||||
JWT_SECRET=ci-secret ENCRYPTION_KEY=ci-encryption-key \
|
||||
ADMIN_EMAIL=ci@corrosion.test ADMIN_PASSWORD=ci-password-123 ADMIN_USERNAME=CI \
|
||||
nohup node dist/main.js > /tmp/backend.log 2>&1 &
|
||||
for i in $(seq 1 30); do
|
||||
code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/api/auth/login -X POST -H 'Content-Type: application/json' -d '{}' || true)
|
||||
[ "$code" = "400" ] && echo "backend up" && exit 0
|
||||
sleep 2
|
||||
done
|
||||
echo "backend failed to come up"; cat /tmp/backend.log; exit 1
|
||||
|
||||
- name: Run agent↔backend contract suite
|
||||
run: |
|
||||
chmod +x agent-bin/corrosion-host-agent
|
||||
LICENSE_ID=$(PGPASSWORD=citest psql -h postgres -U corrosion -d corrosion -t -A -c 'SELECT id FROM licenses LIMIT 1')
|
||||
echo "license under test: $LICENSE_ID"
|
||||
[ -n "$LICENSE_ID" ] || { echo "admin seed did not create a license"; cat /tmp/backend.log; exit 1; }
|
||||
LICENSE_ID="$LICENSE_ID" \
|
||||
DATABASE_URL=postgres://corrosion:citest@postgres:5432/corrosion \
|
||||
NATS_URL=nats://nats:4222 \
|
||||
AGENT_BIN=$PWD/agent-bin/corrosion-host-agent \
|
||||
node contract-tests/agent-backend.contract.mjs
|
||||
|
||||
- name: Backend log on failure
|
||||
if: failure()
|
||||
run: cat /tmp/backend.log || true
|
||||
@@ -1,5 +1,6 @@
|
||||
name: Test Asgard Runner
|
||||
on: [push]
|
||||
# On-demand only — no reason to spin a container on every push.
|
||||
on: [workflow_dispatch]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
@@ -17,8 +18,15 @@ jobs:
|
||||
echo "Memory: $(free -h | grep Mem | awk '{print $2}')"
|
||||
echo "Disk: $(df -h / | tail -1 | awk '{print $4}')"
|
||||
echo "==========================================="
|
||||
echo "Go: $(go version)"
|
||||
echo "Rust: $(rustc --version)"
|
||||
echo "Docker: $(docker --version)"
|
||||
# Jobs run in a bare node:20-bullseye container: toolchains are NOT
|
||||
# preinstalled — workflows must bootstrap them (setup-go, rustup).
|
||||
# Report presence honestly instead of green-lighting a missing tool.
|
||||
for tool in go rustc docker node; do
|
||||
if command -v "$tool" >/dev/null 2>&1; then
|
||||
echo "$tool: $($tool --version 2>&1 | head -1)"
|
||||
else
|
||||
echo "$tool: NOT PRESENT (workflows must install per-run)"
|
||||
fi
|
||||
done
|
||||
echo "==========================================="
|
||||
echo "✅ Asgard runner is OPERATIONAL"
|
||||
echo "✅ Asgard runner reachable — container is node:20-bullseye, bootstrap toolchains per-run"
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
|
||||
### **TYPE 1: THE SCOUT (Intelligence)**
|
||||
|
||||
- **Model:** haiku
|
||||
- **Model:** sonnet[1m]
|
||||
|
||||
- **Role:** Reconnaissance, Context Mapping, Log Analysis.
|
||||
|
||||
|
||||
73
CHANGELOG.md
73
CHANGELOG.md
@@ -4,6 +4,79 @@ All notable changes to this project will be documented in this file.
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added (Host-agent Phase 2 — Dune docker-compose adapter — 2026-06-12)
|
||||
|
||||
**`Supervisor` trait abstraction (`corrosion-host-agent`):**
|
||||
- Introduced `trait Supervisor` (via `async-trait`, the battle-tested ecosystem standard) so the agent can manage games with fundamentally different models behind one wire contract. `ProcessSupervisor` (spawned OS process — Rust/Conan/Soulmask) and the new `DockerComposeSupervisor` (Dune) both implement it; `Agent.supervisors` is now `HashMap<String, Arc<dyn Supervisor>>` and the instance command dispatch (`instancecmd::dispatch`) is fully game-agnostic — `start`/`stop`/`restart`/`status` are identical across games. A per-game factory in `main` selects the impl. `InstanceState` moved to the shared `supervisor` module.
|
||||
- **Architecture call** (per Commander): chose the `dyn` trait over a zero-dependency enum because the Dune references point at *several* future management planes (kubectl, AMP/podman, SSH) — a trait makes each new plane "new struct + impl," no central match to edit.
|
||||
|
||||
**`DockerComposeSupervisor` (Dune: Awakening):**
|
||||
- Drives `docker compose up -d` / `stop` / `restart` against the instance's compose project (a "battlegroup"), with `-f`/`-p`/single-service support and a configurable compose binary (`docker compose` default, `docker-compose` legacy). New `[instance.docker_compose]` config block (file/project/service/command, all optional). `steam_update` already rejected for Dune (Docker images, no SteamCMD).
|
||||
- **Scope (first cut):** lifecycle + cached state. Deferred to Phase 3b (with process PID adoption): container crash-detection and state adoption on agent restart (both reconcilable with a `docker compose ps` probe).
|
||||
- Verified: 6 new docker-compose tests (mock `docker` binary asserting exact invocations + state transitions + failure paths) + the 5 refactored process-supervisor tests; full agent suite 56 tests green, zero warnings. Live verification against a real Dune stack pending the Commander standing one up.
|
||||
|
||||
### Changed (Fleet-driven active game + signed-update CI fix — 2026-06-12)
|
||||
|
||||
**Frontend — active game follows the deployed fleet:**
|
||||
- The panel's active game (shell skin + sidebar nav + dashboard terminology) is now **derived from the deployed instances** instead of a localStorage-only toggle. `syncActiveGameFromFleet()` reads the distinct `game` values of the license's instances (`game_instances.game`, reported by the host agent): exactly one game deployed → the shell auto-skins to it; zero or multiple → `all` (neutral house skin). Wired into `DashboardLayout` (the always-mounted admin shell) via a watch on the fleet store.
|
||||
- A manual GameSwitcher pick still wins — it persists to `cc-active-game` and suppresses auto-derive (operator intent beats the heuristic). Un-overridden panels keep tracking the fleet across sessions.
|
||||
- **No backend/schema change:** a license's game(s) are the distinct games of its instances — the normalized source of truth. Deliberately did NOT add a `licenses.game` column (would duplicate `game_instances.game` and drift; see Lesson 20).
|
||||
|
||||
**Frontend — sidebar agent-health footer is now fleet-aware:**
|
||||
- The shell footer read a single legacy `server.connection` (one `server_connections` row), which disagreed with the multi-host fleet. Repointed it at the fleet store: one host → hostname + status + last-heartbeat; multiple → `{online}/{total} online` + total instance count. Tone aggregates (all online → healthy, some → degraded, none → offline). Dropped the legacy `useServerStore` dependency from the shell entirely.
|
||||
|
||||
**Frontend — removed dead `vuefinder` dependency:**
|
||||
- VueFinder was replaced by the native instance-scoped file manager but the plugin (and its CSS) were still globally registered in `main.ts` and shipped in the bundle. Removed the dep + the three `main.ts` lines. Side effect: the main JS chunk dropped **588 kB → 165 kB** (vuefinder bundled an entire unused file-manager UI).
|
||||
|
||||
**Recon note (not a change):** `corrosion.{license}.cmd.server` was on the cleanup list as "dead v1" — it is NOT. It remains the live license-level command path for all plugin/module config applies, plugin install, scheduled tasks, and legacy start/stop/restart, served only by the legacy Go agent. The Rust agent does not implement it yet — this is a **parity/migration gap** (Phase 2+), not dead code. Left intact.
|
||||
|
||||
**CI — signed host-agent build:**
|
||||
- Fixed the `Sign artifacts (minisign)` step (`Error while loading the secret key file`): a minisign secret key is two lines and CI secret storage mangles the embedded newline. The job now base64-decodes the secret (single-line, mangling-proof) with auto-detect fallback to a raw key. `MINISIGN_SECRET_KEY` must be stored as `base64 < secret.key | tr -d '\n'`. Verified end-to-end: `agent-v2.0.0-alpha.8` Linux + Windows binaries validate against the agent's embedded public key; tampered byte rejected.
|
||||
|
||||
### Added (Host-Agent v2 Consumer + SEO Meta — 2026-06-11)
|
||||
|
||||
**Backend (NestJS):**
|
||||
- `HostAgentConsumerService` (new) — consumes wire protocol v2: `corrosion.*.host.heartbeat` updates `companion_last_seen` + `connection_status='connected'` (auto-registers the connection row on first contact); `host.going_offline` flips offline; a 60s staleness sweep marks hosts offline after 180s of silence. Previously NOTHING persisted heartbeats — `connection_status` was set once at setup and never changed again. Tenant-validated (UUID + license existence, cached) per NATS-consumer doctrine
|
||||
- `NatsBridgeService` — bridges `host_heartbeat` / `host_going_offline` events to the panel WebSocket
|
||||
- Verified by contract test: real agent → production NATS → captured with the backend's own `nats` lib under the real license; subjects, schema 2, real telemetry, offline beacon all confirmed
|
||||
|
||||
**Frontend:**
|
||||
- Per-route document titles + meta descriptions (router `afterEach`, no new deps): six marketing pages get real titles/descriptions/OG tags (previously every page was "Corrosion Management" with zero meta — invisible to search and link previews); panel views get mechanical "{View} — Corrosion" titles
|
||||
|
||||
**CI:**
|
||||
- `test-runner.yml` — honest per-tool presence checks (was printing "OPERATIONAL" while every toolchain probe failed); on-demand trigger instead of every push
|
||||
|
||||
### Added (Corrosion Host Agent — Rust rewrite Phase 0 — 2026-06-11)
|
||||
|
||||
**New: `corrosion-host-agent/`** — Rust rewrite of the Go companion agent (which stays in-tree as the behavior reference until parity). Wire protocol v2 (COA-B, Commander-approved): instance-scoped subjects `corrosion.{license}.{instance}.*` with host-level `corrosion.{license}.host.*` — full spec in `corrosion-host-agent/PROTOCOL.md`.
|
||||
|
||||
- Multi-instance TOML config baked into the foundation (one agent supervises N game instances; rust/conan/soulmask/dune), env overrides for secrets, strict validation (subject-safe ids, reserved segments)
|
||||
- NATS layer with the production-proven Vigilance profile: infinite reconnect w/ capped backoff, 30s ping, 8192-msg offline send buffer, `tls://` scheme support
|
||||
- Host heartbeat with REAL telemetry via sysinfo (CPU/mem/disks/per-instance state) — the Go agent hardcoded disk=50000MB and cpu=0.0; this is the first true Resources data
|
||||
- Connectivity prober (outbound TCP + latency, periodic jittered + on-demand) — first piece of the support-triage story
|
||||
- Host command channel (`ping`/`probe`/`sysinfo`, request-reply), going-offline beacon, CancellationToken graceful shutdown
|
||||
- Version embedding (semver + git hash + build ts) in `--version` and every heartbeat
|
||||
- Verified live against production NATS: connected, heartbeats published, clean shutdown
|
||||
- Deploy artifacts verified: 3.7MB fully-static linux-musl binary, 3.8MB windows .exe (static CRT, no VC++ redist needed)
|
||||
|
||||
**Next phases**: 1 = process-class adapter (spawn/RCON/SteamCMD/files for Rust/Conan/Soulmask) + NestJS v2 heartbeat consumer; 2 = Dune Docker adapter; 3 = signed self-update (release gate) + service install.
|
||||
|
||||
### Fixed (Site Audit — Fake Data, Resilience, Fonts — 2026-06-11)
|
||||
|
||||
**Frontend:**
|
||||
- `SetupWizardView.vue` — Replaced fake install instructions (`get.corrosionmgmt.com | sh` install script and `corrosion-agent` binary, neither of which exists) with the real host-agent download + run commands matching ServerView; multi-game copy on the completion step
|
||||
- Marketing views (Landing, Pricing, HowItWorks, Roadmap, EarlyAccess) — Replaced "View live demo" CTA (no demo exists; it linked to the panel login) with an honest "Sign in" link
|
||||
- `ErrorBoundary.vue` — Error state now resets on route change (previously one failed view bricked the entire SPA, including marketing pages, until manual reload); added `content` variant
|
||||
- `DashboardLayout.vue` — Routed views are now wrapped in a content-scoped ErrorBoundary so the sidebar/topbar survive a view failure instead of the whole panel unmounting
|
||||
- `index.html` / `styles/tokens/fonts.css` — Google Fonts moved from CSS `@import` to `<link>` tags. The bundler silently dropped the mid-bundle `@import`, so production shipped system fallback fonts (Geist/JetBrains Mono/Oxanium never loaded)
|
||||
- `StatusPageView.vue` — Platform KPIs show "—" until the first successful fetch instead of fake zeros
|
||||
- `LoginView.vue` — Added missing "Forgot password?" link (route + backend endpoint already existed)
|
||||
|
||||
**Backend (NestJS):**
|
||||
- `AdminSeedService` (new, auth module) — Bootstraps a super-admin user + active license from `ADMIN_EMAIL`/`ADMIN_PASSWORD`/`ADMIN_USERNAME`/`ADMIN_LICENSE_KEY` when the users table is empty. A fresh deploy previously had a schema but no possible login. Compose already passes the env vars
|
||||
|
||||
**Purpose:** Findings from the full-site fake-data audit. Show real data or honest empty states — never invented values, dead URLs, or fabricated zeros.
|
||||
|
||||
### Fixed (Safe Formatting Utilities — 2026-02-15)
|
||||
|
||||
**Frontend:**
|
||||
|
||||
34
CLAUDE.md
34
CLAUDE.md
@@ -55,7 +55,12 @@ frontend/ # Vue 3 + TypeScript
|
||||
package.json
|
||||
vite.config.ts # Proxies /api to :3000
|
||||
|
||||
companion-agent/ # Go binary for bare metal servers
|
||||
corrosion-host-agent/ # Rust host agent (ACTIVE) — multi-game ops runtime
|
||||
src/ # main, config, bus (NATS), telemetry, prober, hostcmd
|
||||
PROTOCOL.md # Wire protocol v2 spec (instance-scoped subjects)
|
||||
agent.example.toml # Multi-instance config reference
|
||||
|
||||
companion-agent/ # Go binary (LEGACY — behavior reference until Rust parity)
|
||||
cmd/agent/ # main.go entry point
|
||||
internal/ # Core agent logic (nats, commands, process)
|
||||
Makefile # Build for Linux/Windows
|
||||
@@ -91,14 +96,16 @@ cd backend-nest && npx tsc --noEmit # Type-check without building
|
||||
|
||||
# Frontend
|
||||
cd frontend && npm run dev # Vite dev server (port 5174)
|
||||
cd frontend && npm run build # Production build → dist/
|
||||
cd frontend && npm run lint # ESLint
|
||||
cd frontend && npm run type-check # TypeScript checking (vue-tsc)
|
||||
cd frontend && npm run build # vue-tsc -b && vite build (type-check included; no separate lint/type-check scripts exist)
|
||||
|
||||
# Companion Agent (Go)
|
||||
# Host Agent (Rust — ACTIVE)
|
||||
cd corrosion-host-agent && cargo check # Fast validation
|
||||
cd corrosion-host-agent && cargo build --release --target x86_64-unknown-linux-musl # Static Linux binary
|
||||
cd corrosion-host-agent && cargo xwin build --release --target x86_64-pc-windows-msvc # Windows (local)
|
||||
# CI: push tag agent-vX.Y.Z (must match Cargo.toml version) → Asgard builds → CDN /host-agent/alpha/
|
||||
|
||||
# Companion Agent (Go — LEGACY, behavior reference until Rust parity)
|
||||
cd companion-agent && make build # Build for current platform
|
||||
cd companion-agent && make linux # Cross-compile for Linux
|
||||
cd companion-agent && make windows # Cross-compile for Windows
|
||||
|
||||
# Docker (from docker/ directory — Commander ALWAYS builds with --no-cache)
|
||||
docker compose build --no-cache && docker compose up -d # Full rebuild + start
|
||||
@@ -374,7 +381,8 @@ Default to Sonnet. Escalate to Opus when the problem demands it, not as a comfor
|
||||
- Treat every change as production deployment (`corrosionmgmt.com`)
|
||||
- Document why, not just what, in commits and CHANGELOG
|
||||
- **Always commit and push when done touching code — never ask, never wait for permission**
|
||||
- **Tag companion agent builds when Go code in `companion-agent/` is modified** — increment from latest tag (currently v1.0.3), push tag to trigger CI build + CDN upload
|
||||
- **Tag agent builds when agent code is modified** — Rust agent: `agent-vX.Y.Z` (must match `corrosion-host-agent/Cargo.toml`; CI publishes to CDN `/host-agent/alpha/`, while `/latest/` stays on the Go build until cutover). Legacy Go agent: `vX.Y.Z`. Tags roll FORWARD only — never reuse or re-push a tag; cut the next version
|
||||
- **The Asgard CI runner executes jobs in a bare `node:20-bullseye` container** — no Rust/Go/Docker/sudo preinstalled; workflows must bootstrap toolchains per-run (setup-go, rustup via curl)
|
||||
|
||||
## Development Notes
|
||||
|
||||
@@ -435,3 +443,13 @@ Things I discovered about myself building a sister platform across multiple sess
|
||||
22. **Build-green is not render-correct — visually verify UI work before calling it done.** The entire design-system re-skin (50+ files, six green commits) rendered almost completely unstyled in the browser — white background, no surfaces, no accent — because the design tokens never loaded. `vue-tsc -b` + `vite build` passed clean the whole time; CSS that *compiles* can still apply *zero* styles. One Playwright screenshot of the login exposed it in seconds. When the deliverable is visual, a green build is necessary but not sufficient: load it in a real browser (Playwright on the dev server at :5174), screenshot it, and assert on `getComputedStyle` — don't trust compilation alone. This is Lesson 17 with teeth.
|
||||
|
||||
23. **Tailwind v4 silently drops a nested `@import` barrel placed after `@import "tailwindcss"`.** `style.css` did `@import "tailwindcss"; @import "./styles/corrosion.css";` where corrosion.css was a barrel of eight `@import` token files. Once Tailwind v4 expands the tailwindcss import in place, the barrel's inner @imports no longer precede all statements, so PostCSS drops them — emitting only an easily-ignored "@import must precede all other statements" warning. Result: every design token resolved empty and the whole panel rendered unstyled. Import token/design CSS files **directly and contiguously** in the entry stylesheet; never via a nested barrel after the Tailwind import. The build warning you wave off as "pre-existing" may be the entire feature silently failing.
|
||||
|
||||
24. **`onModuleInit` runs before async `onModuleInit` of dependencies completes — register NATS/external subscriptions in `onApplicationBootstrap`.** `NatsService.onModuleInit` connects to NATS (async); `NatsBridgeService`/`HostAgentConsumerService` registered their subscriptions in their own `onModuleInit`, which fired while the connection was still null — so every `subscribe()` hit the `[OFFLINE]` no-op path and the WS bridge was dead-on-boot in *every* production build, silently. Nest guarantees `onApplicationBootstrap` runs only after all module init (including the awaited connect) finishes. Anything that depends on another provider's async startup belongs in bootstrap, not init. The tell: a subscription that "should be there" but the handler never fires and there's no error — trace the *startup ordering*, not the handler.
|
||||
|
||||
25. **Fixing a dead code path detonates the live code behind it — budget for the second bug.** The moment Lesson 24's fix made the NATS→WS bridge actually deliver events, the API crashed on the first forwarded heartbeat: `WebSocket.OPEN` was `undefined` at runtime because `esModuleInterop` is off, so `import WebSocket from 'ws'` compiled to `ws_1.default` (undefined). That crash had sat behind the dead bridge since the gateway was written — never hit because no event ever reached it. When you resurrect a path that was silently no-op, everything downstream of it is effectively *untested code running for the first time in production*. Verify the whole chain end-to-end (I watched the DB row appear, then flip offline), don't stop at "the subscription fires now." This is Lesson 10 with a fuse on it. Import-runtime gotcha worth remembering: when `esModuleInterop` is off, prefer instance constants (`client.OPEN`) over class statics (`WebSocket.OPEN`) for `ws`.
|
||||
|
||||
26. **A jail check at the entry point does not jail the recursive walk behind it — and my own "line-by-line" review missed it; the automated security review didn't.** The file manager's `jail()` correctly canonicalized and prefix-checked the top-level path, and I traced every escape vector through it and signed off. But `copy_recursive` then walked the directory tree with `fs::metadata` (which *follows* symlinks). A symlink planted inside the jail pointing at `/etc`, then a `copy` of its parent, would dereference it and pull external content *into* the jail to be read — a jail escape the entry check never sees, because the escape is reintroduced by a descendant during traversal. Fix: `symlink_metadata` (lstat) everywhere you recurse, and refuse/never-follow symlinks across the boundary. The transferable rule: **validate at the boundary AND at every step that re-derives a path** (recursion, `read_dir`, glob, archive extraction). And the humbling part — I was confident after reviewing the jail function; the security-review pass caught the HIGH I'd waved through. Trust adversarial verification over your own once-over on security-critical code, especially path/traversal logic.
|
||||
|
||||
27. **Validate infra config BEFORE it reaches a deploy — and know that `docker compose up -d <service>` will recreate other services whose definitions changed.** During the NATS auth cutover I ran `docker compose up -d api` to pick up new env. Because the *nats* service definition had also changed (a new volume mount), compose recreated **corrosion-nats too** — and it failed to start on a config error (`no_auth_user` nested inside `authorization{}` instead of at top level), taking the broker down for ~3 minutes with the backend in offline mode. Two lessons: (a) a broker/proxy/DB config file is code — lint it before it can reach a restart (`nats-server -t -c cfg` to test-parse, `nginx -t`, etc.), don't let the first validation be the production container's startup; (b) `compose up -d <one-service>` is not surgical — it reconciles that service's **dependencies** too, so a stale edit to a depended-on service ships when you didn't mean it to. When touching shared-infra config, restart that service explicitly and watch it come up before moving on. Recovery also surfaced a third gotcha: recreating a client (api) while its server (nats) is down leaves the client stuck on a cached DNS failure (`EAI_AGAIN`) — restart the client once the server is healthy.
|
||||
|
||||
28. **A multi-line secret in CI (minisign/SSH/PGP keys) must be stored base64-encoded — the runner mangles embedded newlines and the key silently fails to load.** The signed-update CI passed the toolchain build, downloaded minisign fine, then died at the sign step on `Error while loading the secret key file` (exit 2). The cause wasn't the key or minisign — a minisign secret key file is **two lines** (`untrusted comment:` + base64 blob), and Gitea/act_runner secret storage collapses the embedded newline so the reconstructed file is one unparseable line. The robust pattern: store the secret as `base64 < secret.key | tr -d '\n'` (single line, mangling-proof) and `base64 -d` it in the job, with auto-detect fallback so a correctly-stored raw key still works, and a loud `::error::` carrying the fix command if it's neither. This applies to **any** multi-line credential in CI, not just minisign. Two corollaries: (a) the tell is "the tool runs but can't load its key" — suspect newline-mangling before the key itself; (b) generating that base64 prints the **private key to the terminal/transcript** — for a supply-chain signing key, treat it as exposed and rotate before cutover (embed the new pubkey, re-store the new secret, retire the old). And verify the published artifact end-to-end against the *embedded* pubkey (`minisign -Vm bin -P <pub>`) plus a tampered-byte negative control — a green build that signs is not the same as a signature the agent will actually accept.
|
||||
|
||||
@@ -44,10 +44,20 @@ import { FurnaceSplitterModule } from './modules/furnacesplitter/furnacesplitter
|
||||
import { BetterChatModule } from './modules/betterchat/betterchat.module';
|
||||
import { TimedExecuteModule } from './modules/timedexecute/timedexecute.module';
|
||||
import { RaidableBasesModule } from './modules/raidablebases/raidablebases.module';
|
||||
import { EarlyAccessModule } from './modules/early-access/early-access.module';
|
||||
import { FleetModule } from './modules/fleet/fleet.module';
|
||||
import { InstancesModule } from './modules/instances/instances.module';
|
||||
import { ApiKeysModule } from './modules/api-keys/api-keys.module';
|
||||
import { WebhooksModule } from './modules/webhooks/webhooks.module';
|
||||
|
||||
// Shared Services
|
||||
import { NatsService } from './services/nats.service';
|
||||
import { NatsBridgeService } from './services/nats-bridge.service';
|
||||
import { HostAgentConsumerService } from './services/host-agent-consumer.service';
|
||||
import { ServerConnection } from './entities/server-connection.entity';
|
||||
import { License } from './entities/license.entity';
|
||||
import { AgentHost } from './entities/agent-host.entity';
|
||||
import { GameInstance } from './entities/game-instance.entity';
|
||||
import { SteamService } from './services/steam.service';
|
||||
|
||||
// Gateway
|
||||
@@ -90,6 +100,9 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
|
||||
// Scheduler
|
||||
ScheduleModule.forRoot(),
|
||||
|
||||
// Repositories for app-level shared services (host-agent consumer)
|
||||
TypeOrmModule.forFeature([ServerConnection, License, AgentHost, GameInstance]),
|
||||
|
||||
// Feature Modules
|
||||
AuthModule,
|
||||
UsersModule,
|
||||
@@ -123,6 +136,11 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
|
||||
BetterChatModule,
|
||||
TimedExecuteModule,
|
||||
RaidableBasesModule,
|
||||
EarlyAccessModule,
|
||||
FleetModule,
|
||||
InstancesModule,
|
||||
ApiKeysModule,
|
||||
WebhooksModule,
|
||||
],
|
||||
providers: [
|
||||
// Global guards (order matters: auth first, then license, then permissions)
|
||||
@@ -132,6 +150,7 @@ import { NatsBridgeGateway } from './gateways/nats-bridge.gateway';
|
||||
// Shared services
|
||||
NatsService,
|
||||
NatsBridgeService,
|
||||
HostAgentConsumerService,
|
||||
SteamService,
|
||||
|
||||
// WebSocket gateway
|
||||
|
||||
51
backend-nest/src/common/cron.util.ts
Normal file
51
backend-nest/src/common/cron.util.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Minimal 5-field cron "next run" calculator, shared by the event scheduler
|
||||
* (SchedulesService) and the wipe scheduler (WipesService).
|
||||
*
|
||||
* Supports `*` and exact numeric fields (minute hour day-of-month month
|
||||
* day-of-week). Walks minute-by-minute up to 366 days ahead. Returns null on a
|
||||
* malformed expression or if no match is found within a year.
|
||||
*
|
||||
* NOTE: the expression is evaluated in **UTC**. A per-schedule `timezone`
|
||||
* column exists on both schedule tables but is NOT yet honored here — fixing it
|
||||
* properly needs a timezone-aware cron library; tracked as a shared follow-up.
|
||||
*/
|
||||
export function nextCronDate(expr: string, after: Date): Date | null {
|
||||
const parts = expr.trim().split(/\s+/);
|
||||
if (parts.length !== 5) return null;
|
||||
|
||||
const [minuteExpr, hourExpr, domExpr, monthExpr, dowExpr] = parts;
|
||||
|
||||
const matches = (e: string, value: number): boolean => {
|
||||
if (e === '*') return true;
|
||||
return parseInt(e, 10) === value;
|
||||
};
|
||||
|
||||
// Walk minute-by-minute up to 366 days forward to find the next match.
|
||||
const candidate = new Date(after.getTime() + 60_000); // advance at least 1 minute
|
||||
candidate.setSeconds(0, 0);
|
||||
|
||||
const limit = new Date(after.getTime() + 366 * 24 * 60 * 60 * 1000);
|
||||
|
||||
while (candidate < limit) {
|
||||
const min = candidate.getUTCMinutes();
|
||||
const hour = candidate.getUTCHours();
|
||||
const dom = candidate.getUTCDate();
|
||||
const month = candidate.getUTCMonth() + 1; // 1-12
|
||||
const dow = candidate.getUTCDay(); // 0=Sun
|
||||
|
||||
if (
|
||||
matches(minuteExpr, min) &&
|
||||
matches(hourExpr, hour) &&
|
||||
matches(domExpr, dom) &&
|
||||
matches(monthExpr, month) &&
|
||||
matches(dowExpr, dow)
|
||||
) {
|
||||
return candidate;
|
||||
}
|
||||
|
||||
candidate.setTime(candidate.getTime() + 60_000);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -1,20 +1,68 @@
|
||||
import { Injectable, ExecutionContext } from '@nestjs/common';
|
||||
import {
|
||||
Injectable,
|
||||
ExecutionContext,
|
||||
UnauthorizedException,
|
||||
} from '@nestjs/common';
|
||||
import { AuthGuard } from '@nestjs/passport';
|
||||
import { Reflector } from '@nestjs/core';
|
||||
import { IS_PUBLIC_KEY } from '../decorators/public.decorator';
|
||||
import { ApiKeysService } from '../../modules/api-keys/api-keys.service';
|
||||
|
||||
@Injectable()
|
||||
export class JwtAuthGuard extends AuthGuard('jwt') {
|
||||
constructor(private reflector: Reflector) {
|
||||
constructor(
|
||||
private reflector: Reflector,
|
||||
private readonly apiKeysService: ApiKeysService,
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
canActivate(context: ExecutionContext) {
|
||||
async canActivate(context: ExecutionContext): Promise<boolean> {
|
||||
const isPublic = this.reflector.getAllAndOverride<boolean>(IS_PUBLIC_KEY, [
|
||||
context.getHandler(),
|
||||
context.getClass(),
|
||||
]);
|
||||
if (isPublic) return true;
|
||||
return super.canActivate(context);
|
||||
|
||||
// Additive API-key auth: a `corr_`-prefixed bearer token (or X-API-Key
|
||||
// header) authenticates programmatically AS the license owner. JWTs are
|
||||
// `eyJ...` and never collide with the `corr_` prefix, so the standard JWT
|
||||
// path below is left completely untouched — zero login regression risk.
|
||||
const request = context.switchToHttp().getRequest();
|
||||
const rawKey = this.extractApiKey(request);
|
||||
if (rawKey) {
|
||||
const result = await this.apiKeysService.validateKey(rawKey);
|
||||
if (!result) {
|
||||
throw new UnauthorizedException('Invalid or revoked API key');
|
||||
}
|
||||
// Shape the principal like a JWT user so @CurrentTenant / @CurrentUser and
|
||||
// the permission layer behave identically. is_api_key grants full access
|
||||
// to THIS license (see PermissionsGuard) — a key is full programmatic
|
||||
// access to your own license, always tenant-scoped by license_id.
|
||||
request.user = {
|
||||
sub: result.user_id ?? undefined,
|
||||
license_id: result.license_id,
|
||||
is_super_admin: false,
|
||||
is_api_key: true,
|
||||
permissions: {},
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
return (await super.canActivate(context)) as boolean;
|
||||
}
|
||||
|
||||
/** Pull a `corr_`-prefixed key from `Authorization: Bearer` or `X-API-Key`. */
|
||||
private extractApiKey(request: any): string | null {
|
||||
const auth = request.headers?.authorization;
|
||||
if (typeof auth === 'string' && auth.startsWith('Bearer ')) {
|
||||
const token = auth.slice(7).trim();
|
||||
if (token.startsWith('corr_')) return token;
|
||||
}
|
||||
const headerKey = request.headers?.['x-api-key'];
|
||||
if (typeof headerKey === 'string' && headerKey.startsWith('corr_')) {
|
||||
return headerKey.trim();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,10 +19,19 @@ export class PermissionsGuard implements CanActivate {
|
||||
// Super admins bypass all permission checks
|
||||
if (user.is_super_admin) return true;
|
||||
|
||||
// API keys are full programmatic access to their own license (always
|
||||
// tenant-scoped by license_id via @CurrentTenant). Granted here rather than
|
||||
// enumerating every permission. Future: scoped/read-only keys.
|
||||
if (user.is_api_key) return true;
|
||||
|
||||
// Check permissions JSONB from role
|
||||
const permissions = user.permissions as Record<string, boolean> | undefined;
|
||||
if (!permissions) return false;
|
||||
|
||||
// Global wildcard — the Owner role (full control of its license) carries
|
||||
// {"*": true}, so new features never need to amend the role enumeration.
|
||||
if (permissions['*'] === true) return true;
|
||||
|
||||
// Support wildcard: "server.*" matches "server.view", "server.console", etc.
|
||||
const parts = requiredPermission.split('.');
|
||||
const wildcard = parts[0] + '.*';
|
||||
|
||||
100
backend-nest/src/common/ssrf-guard.ts
Normal file
100
backend-nest/src/common/ssrf-guard.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import { BadRequestException } from '@nestjs/common';
|
||||
import { lookup } from 'node:dns/promises';
|
||||
import { isIP } from 'node:net';
|
||||
|
||||
/**
|
||||
* SSRF guard for operator-supplied outbound URLs (webhooks today; any future
|
||||
* "we POST to a URL you give us" feature should reuse this).
|
||||
*
|
||||
* The danger: an operator (or anyone who can create a webhook) points the URL at
|
||||
* an internal address — 127.0.0.1, the NATS/DB ports, 192.168.x, or the cloud
|
||||
* metadata endpoint 169.254.169.254 — and turns our server into a request proxy
|
||||
* into the private network. We defend by resolving the host and refusing any
|
||||
* private / loopback / link-local / reserved destination.
|
||||
*
|
||||
* Validate at storage (early, clear 400) AND immediately before each delivery
|
||||
* (a hostname can resolve public at create time and private at send time — DNS
|
||||
* rebinding / TOCTOU). `redirect: 'manual'` at the fetch call closes the
|
||||
* redirect-bounce variant.
|
||||
*/
|
||||
|
||||
function isBlockedIpv4(ip: string): boolean {
|
||||
const parts = ip.split('.').map((p) => parseInt(p, 10));
|
||||
if (parts.length !== 4 || parts.some((n) => Number.isNaN(n) || n < 0 || n > 255)) {
|
||||
return true; // unparseable → block defensively
|
||||
}
|
||||
const [a, b] = parts;
|
||||
if (a === 0) return true; // 0.0.0.0/8 "this network"
|
||||
if (a === 10) return true; // 10.0.0.0/8 private
|
||||
if (a === 127) return true; // 127.0.0.0/8 loopback
|
||||
if (a === 169 && b === 254) return true; // 169.254.0.0/16 link-local (incl. 169.254.169.254 metadata)
|
||||
if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12 private
|
||||
if (a === 192 && b === 168) return true; // 192.168.0.0/16 private
|
||||
if (a === 100 && b >= 64 && b <= 127) return true; // 100.64.0.0/10 CGNAT
|
||||
if (a === 255) return true; // 255.x broadcast space
|
||||
return false;
|
||||
}
|
||||
|
||||
function isBlockedIpv6(ip: string): boolean {
|
||||
const addr = ip.toLowerCase();
|
||||
// IPv4-mapped (::ffff:1.2.3.4) — unwrap and apply the v4 rules.
|
||||
const mapped = addr.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/);
|
||||
if (mapped) return isBlockedIpv4(mapped[1]);
|
||||
if (addr === '::' || addr === '::1') return true; // unspecified / loopback
|
||||
const head = addr.split(':')[0];
|
||||
if (head.startsWith('fc') || head.startsWith('fd')) return true; // fc00::/7 ULA
|
||||
if (/^fe[89ab]/.test(head)) return true; // fe80::/10 link-local
|
||||
return false;
|
||||
}
|
||||
|
||||
function isBlockedIp(ip: string): boolean {
|
||||
const fam = isIP(ip);
|
||||
if (fam === 4) return isBlockedIpv4(ip);
|
||||
if (fam === 6) return isBlockedIpv6(ip);
|
||||
return true; // not a recognizable IP → block defensively
|
||||
}
|
||||
|
||||
/** Parse + require http/https scheme. Throws BadRequestException on anything else. */
|
||||
export function parseHttpUrl(raw: string): URL {
|
||||
let url: URL;
|
||||
try {
|
||||
url = new URL(raw);
|
||||
} catch {
|
||||
throw new BadRequestException('Webhook URL is not a valid URL');
|
||||
}
|
||||
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
||||
throw new BadRequestException('Webhook URL must use http:// or https://');
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the host and reject if it maps to any private / reserved address.
|
||||
* If a hostname resolves to multiple addresses, ANY blocked one rejects the
|
||||
* whole URL (a DNS-rebinding response that mixes a public and a private answer
|
||||
* must not slip through). Returns the parsed URL on success.
|
||||
*/
|
||||
export async function assertPublicHttpUrl(raw: string): Promise<URL> {
|
||||
const url = parseHttpUrl(raw);
|
||||
// URL keeps IPv6 literals bracketed ("[::1]") — strip so isIP/lookup see the
|
||||
// bare address; otherwise IPv6 literals never reach the classifier.
|
||||
const host = url.hostname.replace(/^\[|\]$/g, '');
|
||||
|
||||
let addresses: Array<{ address: string }>;
|
||||
if (isIP(host)) {
|
||||
addresses = [{ address: host }];
|
||||
} else {
|
||||
try {
|
||||
addresses = await lookup(host, { all: true });
|
||||
} catch {
|
||||
throw new BadRequestException(`Webhook host could not be resolved: ${host}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (addresses.length === 0 || addresses.some((a) => isBlockedIp(a.address))) {
|
||||
throw new BadRequestException(
|
||||
'Webhook URL resolves to a private or reserved address and is not allowed',
|
||||
);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
@@ -6,6 +6,15 @@ export default () => ({
|
||||
},
|
||||
nats: {
|
||||
url: process.env.NATS_URL || 'nats://localhost:4222',
|
||||
// Public broker address shown to agents in setup instructions.
|
||||
publicUrl: process.env.NATS_PUBLIC_URL || 'nats://nats.corrosionmgmt.com:4222',
|
||||
// Privileged internal credentials for the backend's own NATS connection
|
||||
// (full corrosion.> access). Empty = anonymous (transition period).
|
||||
internalUser: process.env.NATS_INTERNAL_USER || '',
|
||||
internalPassword: process.env.NATS_INTERNAL_PASSWORD || '',
|
||||
// Secret used to derive a per-license agent password:
|
||||
// HMAC-SHA256(license_id, secret). Shared with the nats.conf generator.
|
||||
tokenSecret: process.env.NATS_TOKEN_SECRET || '',
|
||||
},
|
||||
jwt: {
|
||||
secret: process.env.JWT_SECRET || 'change-me',
|
||||
|
||||
74
backend-nest/src/entities/agent-host.entity.ts
Normal file
74
backend-nest/src/entities/agent-host.entity.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Check, Unique } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
|
||||
export interface AgentHostDisk {
|
||||
mount: string;
|
||||
total_mb: number;
|
||||
free_mb: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* One Corrosion host agent / one machine. Owns the machine-level facts.
|
||||
*
|
||||
* NOTE: distinct from the B2B `hosts` table (hosting-partner companies). This
|
||||
* is `agent_hosts` — the physical/virtual box a customer runs the agent on.
|
||||
*/
|
||||
@Entity('agent_hosts')
|
||||
@Unique(['license_id', 'hostname'])
|
||||
@Check(`"status" IN ('connected', 'degraded', 'offline')`)
|
||||
export class AgentHost {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, default: '' })
|
||||
hostname: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 64, nullable: true })
|
||||
agent_version: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 64, nullable: true })
|
||||
agent_commit: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, nullable: true })
|
||||
os: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, nullable: true })
|
||||
arch: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 20, default: 'offline' })
|
||||
status: string;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
last_heartbeat_at: Date | null;
|
||||
|
||||
@Column({ type: 'double precision', nullable: true })
|
||||
cpu_percent: number | null;
|
||||
|
||||
@Column({ type: 'integer', nullable: true })
|
||||
cpu_cores: number | null;
|
||||
|
||||
@Column({ type: 'bigint', nullable: true })
|
||||
mem_total_mb: number | null;
|
||||
|
||||
@Column({ type: 'bigint', nullable: true })
|
||||
mem_used_mb: number | null;
|
||||
|
||||
@Column({ type: 'bigint', nullable: true })
|
||||
uptime_seconds: number | null;
|
||||
|
||||
@Column({ type: 'jsonb', nullable: true })
|
||||
disks: AgentHostDisk[] | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
updated_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
}
|
||||
37
backend-nest/src/entities/api-key.entity.ts
Normal file
37
backend-nest/src/entities/api-key.entity.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Index } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
|
||||
@Entity('api_keys')
|
||||
@Index(['key_hash'])
|
||||
@Index(['license_id'])
|
||||
export class ApiKey {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 100 })
|
||||
name: string;
|
||||
|
||||
/** First 8 chars of the random token — shown in UI so users can identify keys. */
|
||||
@Column({ type: 'varchar', length: 16 })
|
||||
key_prefix: string;
|
||||
|
||||
/** SHA-256 hex digest of the full plaintext key. Never returned to clients. */
|
||||
@Column({ type: 'varchar', length: 128 })
|
||||
key_hash: string;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
last_used_at: Date | null;
|
||||
|
||||
@Column({ type: 'boolean', default: true })
|
||||
is_active: boolean;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
}
|
||||
59
backend-nest/src/entities/game-instance.entity.ts
Normal file
59
backend-nest/src/entities/game-instance.entity.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Unique } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
import { AgentHost } from './agent-host.entity';
|
||||
|
||||
/**
|
||||
* One game server process / orchestrated unit (a Rust server, a Conan world,
|
||||
* a Dune battlegroup). The billing unit — plans count instances.
|
||||
* `agent_instance_id` is the agent's slug and the NATS subject segment.
|
||||
*/
|
||||
@Entity('game_instances')
|
||||
@Unique(['license_id', 'agent_instance_id'])
|
||||
export class GameInstance {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
host_id: string | null;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
cluster_id: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 64 })
|
||||
agent_instance_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32 })
|
||||
game: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, nullable: true })
|
||||
label: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, default: 'unknown' })
|
||||
state: string;
|
||||
|
||||
@Column({ type: 'text', nullable: true })
|
||||
root_path: string | null;
|
||||
|
||||
@Column({ type: 'bigint', default: 0 })
|
||||
uptime_seconds: number;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
last_seen_at: Date | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
updated_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
|
||||
@ManyToOne(() => AgentHost, { onDelete: 'SET NULL', nullable: true })
|
||||
@JoinColumn({ name: 'host_id' })
|
||||
host: AgentHost | null;
|
||||
}
|
||||
38
backend-nest/src/entities/instance-cluster.entity.ts
Normal file
38
backend-nest/src/entities/instance-cluster.entity.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
|
||||
/**
|
||||
* Optional grouping of instances for games with linked topologies:
|
||||
* Soulmask main/child clusters, Dune BattleGroup → Sietches. Reserved now;
|
||||
* cluster orchestration ships with those game adapters.
|
||||
*/
|
||||
@Entity('instance_clusters')
|
||||
export class InstanceCluster {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32 })
|
||||
game: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255 })
|
||||
name: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, nullable: true })
|
||||
topology: string | null;
|
||||
|
||||
@Column({ type: 'jsonb', nullable: true })
|
||||
config: Record<string, unknown> | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
updated_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
}
|
||||
38
backend-nest/src/entities/instance-stats.entity.ts
Normal file
38
backend-nest/src/entities/instance-stats.entity.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn } from 'typeorm';
|
||||
import { GameInstance } from './game-instance.entity';
|
||||
|
||||
/**
|
||||
* Per-instance time-series game metrics (player count, FPS, …). Populated once
|
||||
* game-level telemetry is collected via RCON/plugin — the host heartbeat
|
||||
* carries host metrics, not game metrics, so this stays empty in Phase A.
|
||||
*/
|
||||
@Entity('instance_stats')
|
||||
export class InstanceStats {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
instance_id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'integer', default: 0 })
|
||||
player_count: number;
|
||||
|
||||
@Column({ type: 'integer', default: 0 })
|
||||
max_players: number;
|
||||
|
||||
@Column({ type: 'double precision', default: 0 })
|
||||
fps: number;
|
||||
|
||||
@Column({ type: 'integer', default: 0 })
|
||||
memory_usage_mb: number;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
recorded_at: Date;
|
||||
|
||||
@ManyToOne(() => GameInstance, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'instance_id' })
|
||||
instance: GameInstance;
|
||||
}
|
||||
47
backend-nest/src/entities/webhook.entity.ts
Normal file
47
backend-nest/src/entities/webhook.entity.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import { Entity, PrimaryGeneratedColumn, Column, ManyToOne, JoinColumn, Index } from 'typeorm';
|
||||
import { License } from './license.entity';
|
||||
|
||||
@Entity('webhooks')
|
||||
@Index(['license_id'])
|
||||
export class Webhook {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
license_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 100 })
|
||||
name: string;
|
||||
|
||||
@Column({ type: 'text' })
|
||||
url: string;
|
||||
|
||||
/**
|
||||
* Comma-separated event keys stored as plain text in Postgres.
|
||||
* TypeORM simple-array serialises string[] ↔ 'event1,event2' automatically.
|
||||
*/
|
||||
@Column({ type: 'simple-array' })
|
||||
events: string[];
|
||||
|
||||
/** HMAC-SHA256 signing secret. Auto-generated on create if omitted. */
|
||||
@Column({ type: 'varchar', length: 128 })
|
||||
secret: string;
|
||||
|
||||
@Column({ type: 'boolean', default: true })
|
||||
is_active: boolean;
|
||||
|
||||
/** Timestamp of the most recent delivery attempt (success or failure). */
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
last_delivery_at: Date | null;
|
||||
|
||||
/** 'ok' | 'failed' — outcome of the most recent delivery attempt. */
|
||||
@Column({ type: 'varchar', length: 20, nullable: true })
|
||||
last_status: string | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'NOW()' })
|
||||
created_at: Date;
|
||||
|
||||
@ManyToOne(() => License, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'license_id' })
|
||||
license: License;
|
||||
}
|
||||
@@ -71,7 +71,10 @@ export class NatsBridgeGateway implements OnGatewayConnection, OnGatewayDisconne
|
||||
|
||||
// Subscribe to NATS events for this license
|
||||
const listener = (event: string, data: unknown) => {
|
||||
if (client.readyState === WebSocket.OPEN) {
|
||||
// client.OPEN (instance constant) — NOT WebSocket.OPEN: with
|
||||
// esModuleInterop off, the default `ws` import is undefined at
|
||||
// runtime, so the static crashes. The instance constant is safe.
|
||||
if (client.readyState === client.OPEN) {
|
||||
client.send(JSON.stringify({
|
||||
type: 'event',
|
||||
license_id: payload.license_id,
|
||||
|
||||
@@ -111,13 +111,13 @@ export class AnalyticsService {
|
||||
.createQueryBuilder('wipe')
|
||||
.leftJoinAndSelect('wipe.map', 'map')
|
||||
.select('map.id', 'map_id')
|
||||
.addSelect('map.name', 'map_name')
|
||||
.addSelect('map.display_name', 'map_name')
|
||||
.addSelect('COUNT(wipe.id)', 'usage_count')
|
||||
.where('wipe.license_id = :licenseId', { licenseId })
|
||||
.andWhere('wipe.started_at >= :cutoff', { cutoff })
|
||||
.andWhere('wipe.map_id IS NOT NULL')
|
||||
.groupBy('map.id')
|
||||
.addGroupBy('map.name')
|
||||
.addGroupBy('map.display_name')
|
||||
.getRawMany();
|
||||
|
||||
return {
|
||||
|
||||
55
backend-nest/src/modules/api-keys/api-keys.controller.ts
Normal file
55
backend-nest/src/modules/api-keys/api-keys.controller.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import {
|
||||
Controller,
|
||||
Get,
|
||||
Post,
|
||||
Delete,
|
||||
Body,
|
||||
Param,
|
||||
} from '@nestjs/common';
|
||||
import { ApiTags, ApiBearerAuth, ApiOperation, ApiResponse } from '@nestjs/swagger';
|
||||
import { ApiKeysService } from './api-keys.service';
|
||||
import { CreateApiKeyDto } from './dto/create-api-key.dto';
|
||||
import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
|
||||
import { RequirePermission } from '../../common/decorators/require-permission.decorator';
|
||||
|
||||
@ApiTags('api-keys')
|
||||
@ApiBearerAuth()
|
||||
@Controller('api-keys')
|
||||
export class ApiKeysController {
|
||||
constructor(private readonly apiKeysService: ApiKeysService) {}
|
||||
|
||||
@Post()
|
||||
@RequirePermission('apikeys.manage')
|
||||
@ApiOperation({
|
||||
summary: 'Create an API key',
|
||||
description:
|
||||
'Issues a new API key for this license. The full plaintext key is returned ONCE — store it securely; it cannot be retrieved again.',
|
||||
})
|
||||
@ApiResponse({ status: 201, description: 'Key created — plaintext key returned once.' })
|
||||
async create(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Body() dto: CreateApiKeyDto,
|
||||
) {
|
||||
return this.apiKeysService.create(licenseId, dto.name);
|
||||
}
|
||||
|
||||
@Get()
|
||||
@RequirePermission('apikeys.view')
|
||||
@ApiOperation({ summary: 'List API keys', description: 'Returns all keys (active and revoked) for this license. Key hashes are never returned.' })
|
||||
@ApiResponse({ status: 200, description: 'Key list.' })
|
||||
async list(@CurrentTenant() licenseId: string) {
|
||||
return this.apiKeysService.list(licenseId);
|
||||
}
|
||||
|
||||
@Delete(':id')
|
||||
@RequirePermission('apikeys.manage')
|
||||
@ApiOperation({ summary: 'Revoke an API key', description: 'Soft-deletes the key (is_active = false). The row is retained for audit purposes.' })
|
||||
@ApiResponse({ status: 200, description: 'Key revoked.' })
|
||||
@ApiResponse({ status: 404, description: 'Key not found in this license.' })
|
||||
async revoke(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
) {
|
||||
return this.apiKeysService.revoke(licenseId, id);
|
||||
}
|
||||
}
|
||||
15
backend-nest/src/modules/api-keys/api-keys.module.ts
Normal file
15
backend-nest/src/modules/api-keys/api-keys.module.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import { Global, Module } from '@nestjs/common';
|
||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||
import { ApiKey } from '../../entities/api-key.entity';
|
||||
import { License } from '../../entities/license.entity';
|
||||
import { ApiKeysController } from './api-keys.controller';
|
||||
import { ApiKeysService } from './api-keys.service';
|
||||
|
||||
@Global()
|
||||
@Module({
|
||||
imports: [TypeOrmModule.forFeature([ApiKey, License])],
|
||||
controllers: [ApiKeysController],
|
||||
providers: [ApiKeysService],
|
||||
exports: [ApiKeysService],
|
||||
})
|
||||
export class ApiKeysModule {}
|
||||
163
backend-nest/src/modules/api-keys/api-keys.service.ts
Normal file
163
backend-nest/src/modules/api-keys/api-keys.service.ts
Normal file
@@ -0,0 +1,163 @@
|
||||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import * as crypto from 'crypto';
|
||||
import { ApiKey } from '../../entities/api-key.entity';
|
||||
import { License } from '../../entities/license.entity';
|
||||
|
||||
/** Shape returned to the caller on creation — the ONLY time the plaintext key is exposed. */
|
||||
export interface CreatedApiKey {
|
||||
/** Full plaintext key — show once, store nowhere. */
|
||||
plaintext_key: string;
|
||||
id: string;
|
||||
name: string;
|
||||
key_prefix: string;
|
||||
is_active: boolean;
|
||||
created_at: Date;
|
||||
}
|
||||
|
||||
/** Safe list view — no hash, no plaintext. */
|
||||
export interface ApiKeyListItem {
|
||||
id: string;
|
||||
name: string;
|
||||
key_prefix: string;
|
||||
last_used_at: Date | null;
|
||||
is_active: boolean;
|
||||
created_at: Date;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class ApiKeysService {
|
||||
private readonly logger = new Logger(ApiKeysService.name);
|
||||
|
||||
constructor(
|
||||
@InjectRepository(ApiKey)
|
||||
private readonly apiKeyRepo: Repository<ApiKey>,
|
||||
@InjectRepository(License)
|
||||
private readonly licenseRepo: Repository<License>,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Issue a new API key for the given license.
|
||||
*
|
||||
* Key format: `corr_<prefix8>_<secret32>`
|
||||
* where prefix and secret are URL-safe base64url random bytes.
|
||||
*
|
||||
* Returns the full plaintext key ONCE alongside the saved row.
|
||||
* The hash is never returned to the caller.
|
||||
*/
|
||||
async create(licenseId: string, name: string): Promise<CreatedApiKey> {
|
||||
const prefixBytes = crypto.randomBytes(6); // 8 base64url chars
|
||||
const secretBytes = crypto.randomBytes(24); // 32 base64url chars
|
||||
|
||||
const prefix = prefixBytes.toString('base64url');
|
||||
const secret = secretBytes.toString('base64url');
|
||||
const plaintextKey = `corr_${prefix}_${secret}`;
|
||||
|
||||
const keyHash = crypto
|
||||
.createHash('sha256')
|
||||
.update(plaintextKey)
|
||||
.digest('hex');
|
||||
|
||||
const entity = this.apiKeyRepo.create({
|
||||
license_id: licenseId,
|
||||
name,
|
||||
key_prefix: prefix,
|
||||
key_hash: keyHash,
|
||||
is_active: true,
|
||||
});
|
||||
|
||||
const saved = await this.apiKeyRepo.save(entity);
|
||||
|
||||
this.logger.log(
|
||||
`API key created: id=${saved.id} prefix=${prefix} license=${licenseId}`,
|
||||
);
|
||||
|
||||
return {
|
||||
plaintext_key: plaintextKey,
|
||||
id: saved.id,
|
||||
name: saved.name,
|
||||
key_prefix: saved.key_prefix,
|
||||
is_active: saved.is_active,
|
||||
created_at: saved.created_at,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* List all keys (active and revoked) for a license.
|
||||
* The key_hash is intentionally excluded.
|
||||
*/
|
||||
async list(licenseId: string): Promise<ApiKeyListItem[]> {
|
||||
const rows = await this.apiKeyRepo.find({
|
||||
where: { license_id: licenseId },
|
||||
order: { created_at: 'DESC' },
|
||||
select: ['id', 'name', 'key_prefix', 'last_used_at', 'is_active', 'created_at'],
|
||||
});
|
||||
|
||||
return rows.map((r) => ({
|
||||
id: r.id,
|
||||
name: r.name,
|
||||
key_prefix: r.key_prefix,
|
||||
last_used_at: r.last_used_at,
|
||||
is_active: r.is_active,
|
||||
created_at: r.created_at,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Revoke (soft-delete) a key.
|
||||
* Returns the updated row or throws NotFoundException if the key
|
||||
* doesn't exist within this license.
|
||||
*/
|
||||
async revoke(licenseId: string, id: string): Promise<{ id: string; is_active: boolean }> {
|
||||
const key = await this.apiKeyRepo.findOne({
|
||||
where: { id, license_id: licenseId },
|
||||
});
|
||||
|
||||
if (!key) {
|
||||
throw new NotFoundException(`API key ${id} not found`);
|
||||
}
|
||||
|
||||
key.is_active = false;
|
||||
await this.apiKeyRepo.save(key);
|
||||
|
||||
this.logger.log(`API key revoked: id=${id} license=${licenseId}`);
|
||||
|
||||
return { id: key.id, is_active: key.is_active };
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a raw API key string. Called by JwtAuthGuard.
|
||||
*
|
||||
* Hashes the raw key, looks up an ACTIVE row, touches last_used_at, resolves
|
||||
* the license owner (so the guard can attribute the call to a real user UUID),
|
||||
* and returns { license_id, user_id } on success or null on failure.
|
||||
*
|
||||
* user_id is the license owner — API-key calls act AS the owner, so any
|
||||
* created_by / @CurrentUser FK insert gets a valid UUID and correct attribution.
|
||||
*/
|
||||
async validateKey(
|
||||
rawKey: string,
|
||||
): Promise<{ license_id: string; user_id: string | null } | null> {
|
||||
const keyHash = crypto.createHash('sha256').update(rawKey).digest('hex');
|
||||
|
||||
const key = await this.apiKeyRepo.findOne({
|
||||
where: { key_hash: keyHash, is_active: true },
|
||||
select: ['id', 'license_id'],
|
||||
});
|
||||
|
||||
if (!key) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Update last_used_at without loading the full row again.
|
||||
await this.apiKeyRepo.update(key.id, { last_used_at: new Date() });
|
||||
|
||||
const license = await this.licenseRepo.findOne({
|
||||
where: { id: key.license_id },
|
||||
select: ['id', 'owner_user_id'],
|
||||
});
|
||||
|
||||
return { license_id: key.license_id, user_id: license?.owner_user_id ?? null };
|
||||
}
|
||||
}
|
||||
10
backend-nest/src/modules/api-keys/dto/create-api-key.dto.ts
Normal file
10
backend-nest/src/modules/api-keys/dto/create-api-key.dto.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { IsString, IsNotEmpty, MaxLength } from 'class-validator';
|
||||
import { ApiProperty } from '@nestjs/swagger';
|
||||
|
||||
export class CreateApiKeyDto {
|
||||
@ApiProperty({ description: 'Human-readable label for this key', maxLength: 100 })
|
||||
@IsString()
|
||||
@IsNotEmpty()
|
||||
@MaxLength(100)
|
||||
name: string;
|
||||
}
|
||||
82
backend-nest/src/modules/auth/admin-seed.service.ts
Normal file
82
backend-nest/src/modules/auth/admin-seed.service.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import { Injectable, Logger, OnApplicationBootstrap } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import * as argon2 from 'argon2';
|
||||
import { randomBytes } from 'crypto';
|
||||
import { User } from '../../entities/user.entity';
|
||||
import { License } from '../../entities/license.entity';
|
||||
|
||||
/**
|
||||
* Bootstraps the first admin account on a fresh database.
|
||||
*
|
||||
* A fresh deploy builds the schema via docker-entrypoint-initdb.d but contains
|
||||
* zero users, so the panel has no possible login. If ADMIN_EMAIL and
|
||||
* ADMIN_PASSWORD are set and the users table is empty, this creates a
|
||||
* super-admin user plus an active license — the same rows the register flow
|
||||
* would create. It never runs against a database that already has users.
|
||||
*/
|
||||
@Injectable()
|
||||
export class AdminSeedService implements OnApplicationBootstrap {
|
||||
private readonly logger = new Logger(AdminSeedService.name);
|
||||
|
||||
constructor(
|
||||
private readonly config: ConfigService,
|
||||
@InjectRepository(User) private readonly userRepository: Repository<User>,
|
||||
@InjectRepository(License) private readonly licenseRepository: Repository<License>,
|
||||
) {}
|
||||
|
||||
async onApplicationBootstrap(): Promise<void> {
|
||||
try {
|
||||
await this.seedAdminIfEmpty();
|
||||
} catch (err) {
|
||||
// A failed seed must not take the API down — surface it loudly and move on
|
||||
this.logger.error(`Admin bootstrap failed: ${(err as Error).message}`, (err as Error).stack);
|
||||
}
|
||||
}
|
||||
|
||||
private async seedAdminIfEmpty(): Promise<void> {
|
||||
const email = this.config.get<string>('admin.email');
|
||||
const password = this.config.get<string>('admin.password');
|
||||
const username = this.config.get<string>('admin.username') || 'Commander';
|
||||
|
||||
if (!email || !password) {
|
||||
this.logger.log('Admin bootstrap skipped: ADMIN_EMAIL / ADMIN_PASSWORD not set');
|
||||
return;
|
||||
}
|
||||
|
||||
const userCount = await this.userRepository.count();
|
||||
if (userCount > 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const password_hash = await argon2.hash(password);
|
||||
const user = this.userRepository.create({
|
||||
email: email.toLowerCase(),
|
||||
username,
|
||||
password_hash,
|
||||
email_verified: true,
|
||||
is_super_admin: true,
|
||||
});
|
||||
await this.userRepository.save(user);
|
||||
|
||||
const licenseKey = this.config.get<string>('admin.licenseKey') || this.generateLicenseKey();
|
||||
const license = this.licenseRepository.create({
|
||||
license_key: licenseKey,
|
||||
owner_user_id: user.id,
|
||||
status: 'active',
|
||||
modules_enabled: [],
|
||||
webstore_active: false,
|
||||
});
|
||||
await this.licenseRepository.save(license);
|
||||
|
||||
this.logger.log(`Bootstrap admin created: ${user.email} (license ${license.license_key})`);
|
||||
}
|
||||
|
||||
private generateLicenseKey(): string {
|
||||
const part1 = randomBytes(2).toString('hex').toUpperCase();
|
||||
const part2 = randomBytes(2).toString('hex').toUpperCase();
|
||||
const part3 = randomBytes(2).toString('hex').toUpperCase();
|
||||
return `CORR-${part1}-${part2}-${part3}`;
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import { LoginDto } from './dto/login.dto';
|
||||
import { RefreshTokenDto } from './dto/refresh-token.dto';
|
||||
import { VerifyTotpDto } from './dto/verify-totp.dto';
|
||||
import { UpdateProfileDto } from './dto/update-profile.dto';
|
||||
import { ChangePasswordDto } from './dto/change-password.dto';
|
||||
import { ForgotPasswordDto } from './dto/forgot-password.dto';
|
||||
import { ResetPasswordDto } from './dto/reset-password.dto';
|
||||
import { Public } from '../../common/decorators/public.decorator';
|
||||
@@ -61,6 +62,30 @@ export class AuthController {
|
||||
return this.authService.verifyTotp(userId, dto.code);
|
||||
}
|
||||
|
||||
@Post('2fa/disable')
|
||||
@ApiBearerAuth()
|
||||
@ApiOperation({ summary: 'Disable TOTP 2FA (requires a current code)' })
|
||||
async disableTotp(
|
||||
@CurrentUser('sub') userId: string,
|
||||
@Body() dto: VerifyTotpDto,
|
||||
) {
|
||||
return this.authService.disableTotp(userId, dto.code);
|
||||
}
|
||||
|
||||
@Post('change-password')
|
||||
@ApiBearerAuth()
|
||||
@ApiOperation({ summary: 'Change the current user password' })
|
||||
async changePassword(
|
||||
@CurrentUser('sub') userId: string,
|
||||
@Body() dto: ChangePasswordDto,
|
||||
) {
|
||||
return this.authService.changePassword(
|
||||
userId,
|
||||
dto.current_password,
|
||||
dto.new_password,
|
||||
);
|
||||
}
|
||||
|
||||
@Get('me')
|
||||
@ApiBearerAuth()
|
||||
@ApiOperation({ summary: 'Get current user profile' })
|
||||
|
||||
@@ -5,6 +5,7 @@ import { TypeOrmModule } from '@nestjs/typeorm';
|
||||
import { ConfigModule, ConfigService } from '@nestjs/config';
|
||||
import { AuthController } from './auth.controller';
|
||||
import { AuthService } from './auth.service';
|
||||
import { AdminSeedService } from './admin-seed.service';
|
||||
import { JwtStrategy } from './jwt.strategy';
|
||||
import { User } from '../../entities/user.entity';
|
||||
import { License } from '../../entities/license.entity';
|
||||
@@ -27,7 +28,7 @@ import { TeamMember } from '../../entities/team-member.entity';
|
||||
TypeOrmModule.forFeature([User, License, Role, TeamMember]),
|
||||
],
|
||||
controllers: [AuthController],
|
||||
providers: [AuthService, JwtStrategy],
|
||||
providers: [AuthService, AdminSeedService, JwtStrategy],
|
||||
exports: [AuthService],
|
||||
})
|
||||
export class AuthModule {}
|
||||
|
||||
@@ -335,6 +335,56 @@ export class AuthService {
|
||||
throw new NotImplementedException('Password reset not yet configured');
|
||||
}
|
||||
|
||||
async changePassword(userId: string, currentPassword: string, newPassword: string) {
|
||||
const user = await this.userRepository.findOne({ where: { id: userId } });
|
||||
if (!user) {
|
||||
throw new NotFoundException('User not found');
|
||||
}
|
||||
|
||||
const valid = await argon2.verify(user.password_hash, currentPassword);
|
||||
if (!valid) {
|
||||
throw new UnauthorizedException('Current password is incorrect');
|
||||
}
|
||||
|
||||
if (await argon2.verify(user.password_hash, newPassword)) {
|
||||
throw new BadRequestException('New password must be different from the current one');
|
||||
}
|
||||
|
||||
const password_hash = await argon2.hash(newPassword);
|
||||
await this.userRepository.update(user.id, { password_hash });
|
||||
this.logger.log(`Password changed for user ${user.id}`);
|
||||
|
||||
// NOTE: existing JWTs remain valid until expiry — this design has no
|
||||
// server-side refresh-token store to revoke. Session invalidation on
|
||||
// password change is a follow-up (tracked separately).
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
async disableTotp(userId: string, code: string) {
|
||||
const user = await this.userRepository.findOne({ where: { id: userId } });
|
||||
if (!user) {
|
||||
throw new NotFoundException('User not found');
|
||||
}
|
||||
|
||||
if (!user.totp_enabled) {
|
||||
throw new BadRequestException('2FA is not enabled');
|
||||
}
|
||||
|
||||
// Require a valid current code — proves possession of the second factor
|
||||
// before removing it, so a hijacked session can't silently strip 2FA.
|
||||
const valid = await this.verifyTotpCode(user, code);
|
||||
if (!valid) {
|
||||
throw new UnauthorizedException('Invalid TOTP code');
|
||||
}
|
||||
|
||||
await this.userRepository.update(user.id, {
|
||||
totp_enabled: false,
|
||||
totp_secret: null,
|
||||
});
|
||||
this.logger.log(`TOTP disabled for user ${user.id}`);
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
// Helper methods
|
||||
|
||||
private async generateTokens(user: User, licenseId?: string) {
|
||||
|
||||
14
backend-nest/src/modules/auth/dto/change-password.dto.ts
Normal file
14
backend-nest/src/modules/auth/dto/change-password.dto.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { IsString, MinLength, MaxLength } from 'class-validator';
|
||||
import { ApiProperty } from '@nestjs/swagger';
|
||||
|
||||
export class ChangePasswordDto {
|
||||
@ApiProperty({ description: 'Current account password' })
|
||||
@IsString()
|
||||
current_password: string;
|
||||
|
||||
@ApiProperty({ description: 'New password', minLength: 8, maxLength: 128 })
|
||||
@IsString()
|
||||
@MinLength(8)
|
||||
@MaxLength(128)
|
||||
new_password: string;
|
||||
}
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { AutoDoorsConfig } from '../../entities/autodoors-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateAutoDoorsConfigDto } from './dto/create-autodoors-config.dto';
|
||||
import { UpdateAutoDoorsConfigDto } from './dto/update-autodoors-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class AutoDoorsService {
|
||||
constructor(
|
||||
@InjectRepository(AutoDoorsConfig)
|
||||
private readonly autoDoorsRepo: Repository<AutoDoorsConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class AutoDoorsService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write AutoDoors.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/AutoDoors.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write AutoDoors.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/AutoDoors.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload AutoDoors plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload AutoDoors',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload AutoDoors');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.autoDoorsRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class AutoDoorsService {
|
||||
/** Import AutoDoors.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read AutoDoors.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/AutoDoors.json',
|
||||
},
|
||||
30000,
|
||||
// Read AutoDoors.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/AutoDoors.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class AutoDoorsService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { BetterChatConfig } from '../../entities/betterchat-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateBetterChatConfigDto } from './dto/create-betterchat-config.dto';
|
||||
import { UpdateBetterChatConfigDto } from './dto/update-betterchat-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class BetterChatService {
|
||||
constructor(
|
||||
@InjectRepository(BetterChatConfig)
|
||||
private readonly repo: Repository<BetterChatConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class BetterChatService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write BetterChat.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/BetterChat.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write BetterChat.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/BetterChat.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload BetterChat plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload BetterChat',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload BetterChat');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.repo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class BetterChatService {
|
||||
/** Import BetterChat.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read BetterChat.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/BetterChat.json',
|
||||
},
|
||||
30000,
|
||||
// Read BetterChat.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/BetterChat.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class BetterChatService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -108,7 +108,9 @@ export class ConsoleGateway implements OnGatewayConnection, OnGatewayDisconnect
|
||||
|
||||
const message = JSON.stringify({ event, data });
|
||||
for (const client of clients) {
|
||||
if (client.readyState === WebSocket.OPEN) {
|
||||
// client.OPEN, not WebSocket.OPEN — esModuleInterop is off so the
|
||||
// default `ws` import is undefined at runtime (would crash on forward).
|
||||
if (client.readyState === client.OPEN) {
|
||||
client.send(message);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
import { IsEmail, IsOptional, IsString, MaxLength } from 'class-validator';
|
||||
import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
|
||||
|
||||
export class CreateEarlyAccessDto {
|
||||
@ApiProperty({ example: 'admin@example.com' })
|
||||
@IsEmail()
|
||||
email: string;
|
||||
|
||||
@ApiPropertyOptional({ example: 'rust', description: 'Primary game interest or server count' })
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
@MaxLength(10)
|
||||
server_count?: string;
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
import { Body, Controller, HttpCode, HttpStatus, Post } from '@nestjs/common';
|
||||
import { ApiOperation, ApiTags } from '@nestjs/swagger';
|
||||
import { Public } from '../../common/decorators/public.decorator';
|
||||
import { EarlyAccessService } from './early-access.service';
|
||||
import { CreateEarlyAccessDto } from './dto/create-early-access.dto';
|
||||
|
||||
@ApiTags('early-access')
|
||||
@Controller()
|
||||
export class EarlyAccessController {
|
||||
constructor(private readonly earlyAccessService: EarlyAccessService) {}
|
||||
|
||||
@Public()
|
||||
@Post('early-access')
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@ApiOperation({ summary: 'Register for early access' })
|
||||
async register(@Body() dto: CreateEarlyAccessDto) {
|
||||
return this.earlyAccessService.register(dto);
|
||||
}
|
||||
}
|
||||
12
backend-nest/src/modules/early-access/early-access.module.ts
Normal file
12
backend-nest/src/modules/early-access/early-access.module.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||
import { EarlyAccessSignup } from '../../entities/early-access-signup.entity';
|
||||
import { EarlyAccessController } from './early-access.controller';
|
||||
import { EarlyAccessService } from './early-access.service';
|
||||
|
||||
@Module({
|
||||
imports: [TypeOrmModule.forFeature([EarlyAccessSignup])],
|
||||
controllers: [EarlyAccessController],
|
||||
providers: [EarlyAccessService],
|
||||
})
|
||||
export class EarlyAccessModule {}
|
||||
@@ -0,0 +1,42 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { EarlyAccessSignup } from '../../entities/early-access-signup.entity';
|
||||
import { CreateEarlyAccessDto } from './dto/create-early-access.dto';
|
||||
|
||||
@Injectable()
|
||||
export class EarlyAccessService {
|
||||
private readonly logger = new Logger(EarlyAccessService.name);
|
||||
|
||||
constructor(
|
||||
@InjectRepository(EarlyAccessSignup)
|
||||
private readonly repo: Repository<EarlyAccessSignup>,
|
||||
) {}
|
||||
|
||||
async register(dto: CreateEarlyAccessDto): Promise<{ success: true; alreadyRegistered: boolean }> {
|
||||
const existing = await this.repo.findOne({ where: { email: dto.email } });
|
||||
if (existing) {
|
||||
// Duplicate email — return friendly success rather than a 409 that would break the UX
|
||||
return { success: true, alreadyRegistered: true };
|
||||
}
|
||||
|
||||
const signup = this.repo.create({
|
||||
email: dto.email,
|
||||
server_count: dto.server_count ?? 'not specified',
|
||||
});
|
||||
|
||||
try {
|
||||
await this.repo.save(signup);
|
||||
} catch (err: unknown) {
|
||||
// Guard against a race-condition duplicate (unique constraint violation)
|
||||
const pg = err as { code?: string };
|
||||
if (pg.code === '23505') {
|
||||
return { success: true, alreadyRegistered: true };
|
||||
}
|
||||
this.logger.error('Failed to save early-access signup', err);
|
||||
throw err;
|
||||
}
|
||||
|
||||
return { success: true, alreadyRegistered: false };
|
||||
}
|
||||
}
|
||||
26
backend-nest/src/modules/fleet/fleet.controller.ts
Normal file
26
backend-nest/src/modules/fleet/fleet.controller.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { Controller, Get, Delete, Param } from '@nestjs/common';
|
||||
import { ApiTags, ApiBearerAuth, ApiOperation } from '@nestjs/swagger';
|
||||
import { FleetService } from './fleet.service';
|
||||
import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
|
||||
import { RequirePermission } from '../../common/decorators/require-permission.decorator';
|
||||
|
||||
@ApiTags('fleet')
|
||||
@ApiBearerAuth()
|
||||
@Controller('fleet')
|
||||
export class FleetController {
|
||||
constructor(private readonly fleetService: FleetService) {}
|
||||
|
||||
@Get()
|
||||
@RequirePermission('server.view')
|
||||
@ApiOperation({ summary: 'Get fleet overview — hosts and game instances for this license' })
|
||||
async getFleet(@CurrentTenant() licenseId: string) {
|
||||
return this.fleetService.getFleet(licenseId);
|
||||
}
|
||||
|
||||
@Delete('hosts/:id')
|
||||
@RequirePermission('server.manage')
|
||||
@ApiOperation({ summary: 'Remove a host and its instances (host must be offline)' })
|
||||
async deleteHost(@CurrentTenant() licenseId: string, @Param('id') id: string) {
|
||||
return this.fleetService.deleteHost(licenseId, id);
|
||||
}
|
||||
}
|
||||
15
backend-nest/src/modules/fleet/fleet.module.ts
Normal file
15
backend-nest/src/modules/fleet/fleet.module.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||
import { FleetController } from './fleet.controller';
|
||||
import { FleetService } from './fleet.service';
|
||||
import { AgentHost } from '../../entities/agent-host.entity';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
import { ServerConnection } from '../../entities/server-connection.entity';
|
||||
|
||||
@Module({
|
||||
imports: [TypeOrmModule.forFeature([AgentHost, GameInstance, ServerConnection])],
|
||||
controllers: [FleetController],
|
||||
providers: [FleetService],
|
||||
exports: [FleetService],
|
||||
})
|
||||
export class FleetModule {}
|
||||
170
backend-nest/src/modules/fleet/fleet.service.ts
Normal file
170
backend-nest/src/modules/fleet/fleet.service.ts
Normal file
@@ -0,0 +1,170 @@
|
||||
import { Injectable, NotFoundException, ConflictException } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { AgentHost } from '../../entities/agent-host.entity';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
import { ServerConnection } from '../../entities/server-connection.entity';
|
||||
|
||||
export interface FleetInstanceDto {
|
||||
id: string;
|
||||
agent_instance_id: string;
|
||||
game: string;
|
||||
label: string | null;
|
||||
state: string;
|
||||
uptime_seconds: number;
|
||||
last_seen_at: string | null;
|
||||
}
|
||||
|
||||
export interface FleetHostDto {
|
||||
id: string;
|
||||
hostname: string;
|
||||
status: string;
|
||||
agent_version: string | null;
|
||||
os: string | null;
|
||||
arch: string | null;
|
||||
cpu_percent: number | null;
|
||||
cpu_cores: number | null;
|
||||
mem_total_mb: number | null;
|
||||
mem_used_mb: number | null;
|
||||
uptime_seconds: number | null;
|
||||
disks: AgentHost['disks'];
|
||||
last_heartbeat_at: string | null;
|
||||
instances: FleetInstanceDto[];
|
||||
}
|
||||
|
||||
export interface FleetSummaryDto {
|
||||
host_count: number;
|
||||
instance_count: number;
|
||||
online_host_count: number;
|
||||
}
|
||||
|
||||
export interface FleetResponseDto {
|
||||
hosts: FleetHostDto[];
|
||||
summary: FleetSummaryDto;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class FleetService {
|
||||
constructor(
|
||||
@InjectRepository(AgentHost)
|
||||
private readonly hostRepo: Repository<AgentHost>,
|
||||
@InjectRepository(GameInstance)
|
||||
private readonly instanceRepo: Repository<GameInstance>,
|
||||
@InjectRepository(ServerConnection)
|
||||
private readonly connectionRepo: Repository<ServerConnection>,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Remove a host and its game instances from the fleet.
|
||||
*
|
||||
* Refuses while the host is `connected` — a live agent re-registers on its
|
||||
* next heartbeat, so the operator must stop the agent first. Deletes the
|
||||
* host's instances explicitly (the FK is SET NULL, which would otherwise
|
||||
* orphan them); instance_stats cascade. If this was the license's last host,
|
||||
* the legacy single-server connection row is cleared too so the old
|
||||
* Dashboard doesn't show a stale server.
|
||||
*/
|
||||
async deleteHost(
|
||||
licenseId: string,
|
||||
hostId: string,
|
||||
): Promise<{ deleted: true; instances_removed: number }> {
|
||||
const host = await this.hostRepo.findOne({ where: { id: hostId, license_id: licenseId } });
|
||||
if (!host) throw new NotFoundException('Host not found');
|
||||
if (host.status === 'connected') {
|
||||
throw new ConflictException(
|
||||
'Host is online — stop the agent first, or it will re-register on its next heartbeat',
|
||||
);
|
||||
}
|
||||
|
||||
const del = await this.instanceRepo.delete({ license_id: licenseId, host_id: hostId });
|
||||
await this.hostRepo.delete({ id: hostId, license_id: licenseId });
|
||||
|
||||
const remaining = await this.hostRepo.count({ where: { license_id: licenseId } });
|
||||
if (remaining === 0) {
|
||||
await this.connectionRepo.delete({ license_id: licenseId });
|
||||
}
|
||||
|
||||
return { deleted: true, instances_removed: del.affected ?? 0 };
|
||||
}
|
||||
|
||||
async getFleet(licenseId: string): Promise<FleetResponseDto> {
|
||||
const [hosts, instances] = await Promise.all([
|
||||
this.hostRepo.find({
|
||||
where: { license_id: licenseId },
|
||||
order: { hostname: 'ASC' },
|
||||
}),
|
||||
this.instanceRepo.find({
|
||||
where: { license_id: licenseId },
|
||||
order: { game: 'ASC', label: 'ASC' },
|
||||
}),
|
||||
]);
|
||||
|
||||
// Group instances by host_id. Bigint columns come back as strings from pg — coerce.
|
||||
const instancesByHost = new Map<string | null, FleetInstanceDto[]>();
|
||||
for (const inst of instances) {
|
||||
const key = inst.host_id ?? null;
|
||||
if (!instancesByHost.has(key)) {
|
||||
instancesByHost.set(key, []);
|
||||
}
|
||||
instancesByHost.get(key)!.push({
|
||||
id: inst.id,
|
||||
agent_instance_id: inst.agent_instance_id,
|
||||
game: inst.game,
|
||||
label: inst.label,
|
||||
state: inst.state,
|
||||
uptime_seconds: Number(inst.uptime_seconds),
|
||||
last_seen_at: inst.last_seen_at ? inst.last_seen_at.toISOString() : null,
|
||||
});
|
||||
}
|
||||
|
||||
const hostDtos: FleetHostDto[] = hosts.map((h) => ({
|
||||
id: h.id,
|
||||
hostname: h.hostname,
|
||||
status: h.status,
|
||||
agent_version: h.agent_version,
|
||||
os: h.os,
|
||||
arch: h.arch,
|
||||
cpu_percent: h.cpu_percent !== null && h.cpu_percent !== undefined ? Number(h.cpu_percent) : null,
|
||||
cpu_cores: h.cpu_cores !== null && h.cpu_cores !== undefined ? Number(h.cpu_cores) : null,
|
||||
mem_total_mb: h.mem_total_mb !== null && h.mem_total_mb !== undefined ? Number(h.mem_total_mb) : null,
|
||||
mem_used_mb: h.mem_used_mb !== null && h.mem_used_mb !== undefined ? Number(h.mem_used_mb) : null,
|
||||
uptime_seconds: h.uptime_seconds !== null && h.uptime_seconds !== undefined ? Number(h.uptime_seconds) : null,
|
||||
disks: h.disks,
|
||||
last_heartbeat_at: h.last_heartbeat_at ? h.last_heartbeat_at.toISOString() : null,
|
||||
instances: instancesByHost.get(h.id) ?? [],
|
||||
}));
|
||||
|
||||
// Append synthetic "unassigned" bucket only if orphaned instances exist
|
||||
const unassigned = instancesByHost.get(null) ?? [];
|
||||
if (unassigned.length > 0) {
|
||||
hostDtos.push({
|
||||
id: '__unassigned__',
|
||||
hostname: 'Unassigned',
|
||||
status: 'offline',
|
||||
agent_version: null,
|
||||
os: null,
|
||||
arch: null,
|
||||
cpu_percent: null,
|
||||
cpu_cores: null,
|
||||
mem_total_mb: null,
|
||||
mem_used_mb: null,
|
||||
uptime_seconds: null,
|
||||
disks: null,
|
||||
last_heartbeat_at: null,
|
||||
instances: unassigned,
|
||||
});
|
||||
}
|
||||
|
||||
const online_host_count = hosts.filter((h) => h.status === 'connected').length;
|
||||
const instance_count = instances.length;
|
||||
|
||||
return {
|
||||
hosts: hostDtos,
|
||||
summary: {
|
||||
host_count: hosts.length,
|
||||
instance_count,
|
||||
online_host_count,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { FurnaceSplitterConfig } from '../../entities/furnacesplitter-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateFurnaceSplitterConfigDto } from './dto/create-furnacesplitter-config.dto';
|
||||
import { UpdateFurnaceSplitterConfigDto } from './dto/update-furnacesplitter-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class FurnaceSplitterService {
|
||||
constructor(
|
||||
@InjectRepository(FurnaceSplitterConfig)
|
||||
private readonly furnaceRepo: Repository<FurnaceSplitterConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class FurnaceSplitterService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write FurnaceSplitter.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/FurnaceSplitter.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write FurnaceSplitter.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/FurnaceSplitter.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload FurnaceSplitter plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload FurnaceSplitter',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload FurnaceSplitter');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.furnaceRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class FurnaceSplitterService {
|
||||
/** Import FurnaceSplitter.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read FurnaceSplitter.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/FurnaceSplitter.json',
|
||||
},
|
||||
30000,
|
||||
// Read FurnaceSplitter.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/FurnaceSplitter.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class FurnaceSplitterService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { GatherConfig } from '../../entities/gather-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateGatherConfigDto } from './dto/create-gather-config.dto';
|
||||
import { UpdateGatherConfigDto } from './dto/update-gather-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class GatherService {
|
||||
constructor(
|
||||
@InjectRepository(GatherConfig)
|
||||
private readonly gatherRepo: Repository<GatherConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class GatherService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write GatherManager.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/GatherManager.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write GatherManager.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/GatherManager.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload GatherManager plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload GatherManager',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload GatherManager');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.gatherRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class GatherService {
|
||||
/** Import GatherManager.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read GatherManager.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/GatherManager.json',
|
||||
},
|
||||
30000,
|
||||
// Read GatherManager.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/GatherManager.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class GatherService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
133
backend-nest/src/modules/instances/instances.controller.ts
Normal file
133
backend-nest/src/modules/instances/instances.controller.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
import { Controller, Post, Get, Put, Body, Param, Query } from '@nestjs/common';
|
||||
import { ApiTags, ApiBearerAuth, ApiOperation } from '@nestjs/swagger';
|
||||
import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
|
||||
import { RequirePermission } from '../../common/decorators/require-permission.decorator';
|
||||
import { InstancesService, LifecycleFunc } from './instances.service';
|
||||
|
||||
@ApiTags('instances')
|
||||
@ApiBearerAuth()
|
||||
@Controller('instances')
|
||||
export class InstancesController {
|
||||
constructor(private readonly instances: InstancesService) {}
|
||||
|
||||
@Post(':id/lifecycle')
|
||||
@RequirePermission('server.manage')
|
||||
@ApiOperation({ summary: 'Send a lifecycle command to a game instance (start/stop/restart/status/steam_update)' })
|
||||
async lifecycle(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { action: LifecycleFunc },
|
||||
) {
|
||||
return this.instances.lifecycle(licenseId, id, body.action);
|
||||
}
|
||||
|
||||
@Post(':id/rcon')
|
||||
@RequirePermission('server.console')
|
||||
@ApiOperation({ summary: 'Send an RCON/console command to a game instance' })
|
||||
async rcon(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { command: string },
|
||||
) {
|
||||
return this.instances.rcon(licenseId, id, body.command);
|
||||
}
|
||||
|
||||
@Get(':id/files')
|
||||
@RequirePermission('files.view')
|
||||
@ApiOperation({ summary: 'List a directory in the instance (jailed to its root)' })
|
||||
async listFiles(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Query('path') path?: string,
|
||||
) {
|
||||
return this.instances.listFiles(licenseId, id, path ?? '');
|
||||
}
|
||||
|
||||
@Get(':id/file')
|
||||
@RequirePermission('files.view')
|
||||
@ApiOperation({ summary: 'Read a text file from the instance (jailed, 5 MiB cap)' })
|
||||
async readFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Query('path') path: string,
|
||||
) {
|
||||
return this.instances.readFile(licenseId, id, path);
|
||||
}
|
||||
|
||||
@Put(':id/file')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Write a text file in the instance (jailed)' })
|
||||
async writeFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string; content: string },
|
||||
) {
|
||||
return this.instances.writeFile(licenseId, id, body.path, body.content ?? '');
|
||||
}
|
||||
|
||||
@Post(':id/files/delete')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Delete a file or directory (jailed)' })
|
||||
async deleteFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string },
|
||||
) {
|
||||
return this.instances.deleteFile(licenseId, id, body.path);
|
||||
}
|
||||
|
||||
@Post(':id/files/rename')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Rename a file/directory within its parent (jailed)' })
|
||||
async renameFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string; name: string },
|
||||
) {
|
||||
return this.instances.renameFile(licenseId, id, body.path, body.name);
|
||||
}
|
||||
|
||||
@Post(':id/files/mkdir')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Create a directory (jailed)' })
|
||||
async mkdir(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string },
|
||||
) {
|
||||
return this.instances.mkdir(licenseId, id, body.path);
|
||||
}
|
||||
|
||||
@Post(':id/files/mkfile')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Create an empty file (jailed)' })
|
||||
async mkfile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string },
|
||||
) {
|
||||
return this.instances.mkfile(licenseId, id, body.path);
|
||||
}
|
||||
|
||||
@Post(':id/files/move')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Move a file/directory (jailed)' })
|
||||
async moveFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string; dest: string },
|
||||
) {
|
||||
return this.instances.moveFile(licenseId, id, body.path, body.dest);
|
||||
}
|
||||
|
||||
@Post(':id/files/copy')
|
||||
@RequirePermission('files.manage')
|
||||
@ApiOperation({ summary: 'Copy a file/directory (jailed)' })
|
||||
async copyFile(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() body: { path: string; dest: string },
|
||||
) {
|
||||
return this.instances.copyFile(licenseId, id, body.path, body.dest);
|
||||
}
|
||||
}
|
||||
18
backend-nest/src/modules/instances/instances.module.ts
Normal file
18
backend-nest/src/modules/instances/instances.module.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import { Global, Module } from '@nestjs/common';
|
||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||
import { InstancesController } from './instances.controller';
|
||||
import { InstancesService } from './instances.service';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
|
||||
// Global so the legacy single-server services (servers/players/schedules/wipes/
|
||||
// plugins + the 9 plugin-config modules) can inject InstancesService to route
|
||||
// commands at the now-only Rust agent without each importing this module.
|
||||
@Global()
|
||||
@Module({
|
||||
imports: [TypeOrmModule.forFeature([GameInstance])],
|
||||
controllers: [InstancesController],
|
||||
providers: [InstancesService, NatsService],
|
||||
exports: [InstancesService],
|
||||
})
|
||||
export class InstancesModule {}
|
||||
223
backend-nest/src/modules/instances/instances.service.ts
Normal file
223
backend-nest/src/modules/instances/instances.service.ts
Normal file
@@ -0,0 +1,223 @@
|
||||
import { Injectable, NotFoundException, BadRequestException, Logger } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { GameInstance } from '../../entities/game-instance.entity';
|
||||
|
||||
/** Lifecycle funcs the agent's {instance}.cmd handler accepts. */
|
||||
const LIFECYCLE_FUNCS = ['start', 'stop', 'restart', 'status', 'steam_update'] as const;
|
||||
export type LifecycleFunc = (typeof LIFECYCLE_FUNCS)[number];
|
||||
|
||||
@Injectable()
|
||||
export class InstancesService {
|
||||
private readonly logger = new Logger(InstancesService.name);
|
||||
|
||||
constructor(
|
||||
private readonly nats: NatsService,
|
||||
@InjectRepository(GameInstance)
|
||||
private readonly instanceRepo: Repository<GameInstance>,
|
||||
) {}
|
||||
|
||||
/** Resolve an instance the caller's license actually owns (tenant guard). */
|
||||
private async resolveInstance(licenseId: string, instanceId: string): Promise<GameInstance> {
|
||||
const inst = await this.instanceRepo.findOne({
|
||||
where: { id: instanceId, license_id: licenseId },
|
||||
});
|
||||
if (!inst) throw new NotFoundException('Instance not found');
|
||||
return inst;
|
||||
}
|
||||
|
||||
async lifecycle(licenseId: string, instanceId: string, func: LifecycleFunc): Promise<unknown> {
|
||||
if (!LIFECYCLE_FUNCS.includes(func)) {
|
||||
throw new BadRequestException(`Unsupported action '${func}'`);
|
||||
}
|
||||
const inst = await this.resolveInstance(licenseId, instanceId);
|
||||
const subject = `corrosion.${licenseId}.${inst.agent_instance_id}.cmd`;
|
||||
this.logger.log(`instance ${inst.agent_instance_id}: ${func}`);
|
||||
return this.nats.requestScoped(licenseId, subject, { func });
|
||||
}
|
||||
|
||||
async rcon(licenseId: string, instanceId: string, command: string): Promise<unknown> {
|
||||
if (!command || !command.trim()) {
|
||||
throw new BadRequestException('command is required');
|
||||
}
|
||||
const inst = await this.resolveInstance(licenseId, instanceId);
|
||||
const subject = `corrosion.${licenseId}.${inst.agent_instance_id}.cmd`;
|
||||
// RCON can take longer than a lifecycle ack — give it more headroom.
|
||||
return this.nats.requestScoped(licenseId, subject, { func: 'rcon', command }, 12_000);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// File access — jailed to the instance root by the agent's file manager.
|
||||
// The agent protocol (corrosion-host-agent/src/filemanager.rs):
|
||||
// { op: list|read|write|delete|rename|mkdir|mkfile|move|copy, path, ... }
|
||||
// reply: { status: 'success'|'error', data?, message? }
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private filesSubject(inst: GameInstance, licenseId: string): string {
|
||||
return `corrosion.${licenseId}.${inst.agent_instance_id}.files.cmd`;
|
||||
}
|
||||
|
||||
private async fileOp(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
payload: Record<string, unknown>,
|
||||
): Promise<{ status: string; data?: unknown; message?: string }> {
|
||||
const inst = await this.resolveInstance(licenseId, instanceId);
|
||||
const res = await this.nats.requestScoped<{ status: string; data?: unknown; message?: string }>(
|
||||
licenseId,
|
||||
this.filesSubject(inst, licenseId),
|
||||
payload,
|
||||
12_000,
|
||||
);
|
||||
if (res?.status === 'error') {
|
||||
throw new BadRequestException(res.message ?? 'File operation failed');
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
async listFiles(licenseId: string, instanceId: string, path = ''): Promise<unknown> {
|
||||
const res = await this.fileOp(licenseId, instanceId, { op: 'list', path });
|
||||
return res.data;
|
||||
}
|
||||
|
||||
async readFile(licenseId: string, instanceId: string, path: string): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
const res = await this.fileOp(licenseId, instanceId, { op: 'read', path });
|
||||
return res.data;
|
||||
}
|
||||
|
||||
async writeFile(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
path: string,
|
||||
content: string,
|
||||
): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
const res = await this.fileOp(licenseId, instanceId, { op: 'write', path, content });
|
||||
return res.data ?? { status: 'success' };
|
||||
}
|
||||
|
||||
async deleteFile(licenseId: string, instanceId: string, path: string): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'delete', path })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async renameFile(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
path: string,
|
||||
name: string,
|
||||
): Promise<unknown> {
|
||||
if (!path || !name) throw new BadRequestException('path and name are required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'rename', path, name })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async mkdir(licenseId: string, instanceId: string, path: string): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'mkdir', path })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async mkfile(licenseId: string, instanceId: string, path: string): Promise<unknown> {
|
||||
if (!path) throw new BadRequestException('path is required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'mkfile', path })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async moveFile(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
path: string,
|
||||
dest: string,
|
||||
): Promise<unknown> {
|
||||
if (!path || !dest) throw new BadRequestException('path and dest are required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'move', path, dest })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
async copyFile(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
path: string,
|
||||
dest: string,
|
||||
): Promise<unknown> {
|
||||
if (!path || !dest) throw new BadRequestException('path and dest are required');
|
||||
return (await this.fileOp(licenseId, instanceId, { op: 'copy', path, dest })).data ?? { ok: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Wipe an instance's game data via the agent's jailed wipe handler: stop →
|
||||
* delete files per wipe_type (map/blueprint/full) → restart. Long timeout
|
||||
* because the agent does all three steps before replying.
|
||||
*/
|
||||
async wipe(
|
||||
licenseId: string,
|
||||
instanceId: string,
|
||||
wipeType: 'map' | 'blueprint' | 'full',
|
||||
backup = true,
|
||||
): Promise<unknown> {
|
||||
const inst = await this.resolveInstance(licenseId, instanceId);
|
||||
const subject = `corrosion.${licenseId}.${inst.agent_instance_id}.cmd`;
|
||||
this.logger.log(`instance ${inst.agent_instance_id}: wipe (${wipeType})`);
|
||||
return this.nats.requestScoped(
|
||||
licenseId,
|
||||
subject,
|
||||
{ func: 'wipe', wipe_type: wipeType, backup },
|
||||
120_000,
|
||||
);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// License-scoped convenience wrappers. Legacy single-server services
|
||||
// (servers/players/schedules/wipes/plugins + the 9 plugin-config modules)
|
||||
// predate the instance model and carry only a licenseId. These resolve the
|
||||
// license's primary instance, then dispatch to the agent — replacing the old
|
||||
// publishes to the now-defunct `cmd.server` subject.
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** The license's primary (oldest) instance. Throws if none is connected. */
|
||||
async resolveDefaultInstance(licenseId: string): Promise<GameInstance> {
|
||||
const inst = await this.instanceRepo.findOne({
|
||||
where: { license_id: licenseId },
|
||||
order: { created_at: 'ASC' },
|
||||
});
|
||||
if (!inst) {
|
||||
throw new NotFoundException(
|
||||
'No game instance is connected for this license yet — install and start the host agent first.',
|
||||
);
|
||||
}
|
||||
return inst;
|
||||
}
|
||||
|
||||
async lifecycleForLicense(licenseId: string, func: LifecycleFunc): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.lifecycle(licenseId, inst.id, func);
|
||||
}
|
||||
|
||||
async rconForLicense(licenseId: string, command: string): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.rcon(licenseId, inst.id, command);
|
||||
}
|
||||
|
||||
async writeFileForLicense(licenseId: string, path: string, content: string): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.writeFile(licenseId, inst.id, path, content);
|
||||
}
|
||||
|
||||
async readFileForLicense(licenseId: string, path: string): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.readFile(licenseId, inst.id, path);
|
||||
}
|
||||
|
||||
async deleteFileForLicense(licenseId: string, path: string): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.deleteFile(licenseId, inst.id, path);
|
||||
}
|
||||
|
||||
async wipeForLicense(
|
||||
licenseId: string,
|
||||
wipeType: 'map' | 'blueprint' | 'full',
|
||||
backup = true,
|
||||
): Promise<unknown> {
|
||||
const inst = await this.resolveDefaultInstance(licenseId);
|
||||
return this.wipe(licenseId, inst.id, wipeType, backup);
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { KitsConfig } from '../../entities/kits-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateKitsConfigDto } from './dto/create-kits-config.dto';
|
||||
import { UpdateKitsConfigDto } from './dto/update-kits-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class KitsService {
|
||||
constructor(
|
||||
@InjectRepository(KitsConfig)
|
||||
private readonly kitsRepo: Repository<KitsConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class KitsService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write Kits.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/Kits.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write Kits.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/Kits.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload Kits plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload Kits',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload Kits');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.kitsRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class KitsService {
|
||||
/** Import Kits.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read Kits.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/Kits.json',
|
||||
},
|
||||
30000,
|
||||
// Read Kits.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/Kits.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class KitsService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { LootProfile } from '../../entities/loot-profile.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateLootProfileDto } from './dto/create-loot-profile.dto';
|
||||
import { UpdateLootProfileDto } from './dto/update-loot-profile.dto';
|
||||
import { ImportLootProfileDto } from './dto/import-loot-profile.dto';
|
||||
@@ -15,7 +15,7 @@ export class LootService {
|
||||
constructor(
|
||||
@InjectRepository(LootProfile)
|
||||
private readonly lootRepo: Repository<LootProfile>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List profiles for a license (summaries — no JSONB) */
|
||||
@@ -114,37 +114,22 @@ export class LootService {
|
||||
const lootGroupsJson = JSON.stringify(scaledGroups, null, 2);
|
||||
|
||||
try {
|
||||
// Write LootTables.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/data/BetterLoot/LootTables.json',
|
||||
content: lootTablesJson,
|
||||
},
|
||||
30000,
|
||||
// Write LootTables.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/data/BetterLoot/LootTables.json',
|
||||
lootTablesJson,
|
||||
);
|
||||
|
||||
// Write LootGroups.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/data/BetterLoot/LootGroups.json',
|
||||
content: lootGroupsJson,
|
||||
},
|
||||
30000,
|
||||
// Write LootGroups.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/data/BetterLoot/LootGroups.json',
|
||||
lootGroupsJson,
|
||||
);
|
||||
|
||||
// Reload BetterLoot plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload BetterLoot',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload BetterLoot');
|
||||
|
||||
// Mark this profile as active, deactivate others
|
||||
await this.lootRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Injectable, BadRequestException } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { PlayerAction } from '../../entities/player-action.entity';
|
||||
import { PlayerSession } from '../../entities/player-session.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { WebhooksService } from '../webhooks/webhooks.service';
|
||||
import { PlayerActionDto } from './dto/player-action.dto';
|
||||
|
||||
export interface Player {
|
||||
@@ -23,7 +24,8 @@ export class PlayersService {
|
||||
private readonly actionRepo: Repository<PlayerAction>,
|
||||
@InjectRepository(PlayerSession)
|
||||
private readonly sessionRepo: Repository<PlayerSession>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
private readonly webhooksService: WebhooksService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
@@ -132,15 +134,60 @@ export class PlayersService {
|
||||
|
||||
await this.actionRepo.save(action);
|
||||
|
||||
// Forward kick, ban, and unban to the game server via NATS
|
||||
// Forward kick, ban, and unban to the game server via RCON
|
||||
if (dto.action_type === 'kick' || dto.action_type === 'ban' || dto.action_type === 'unban') {
|
||||
await this.natsService.sendServerCommand(licenseId, dto.action_type, {
|
||||
const rconCmd = this.buildRconCommand(dto);
|
||||
await this.instancesService.rconForLicense(licenseId, rconCmd);
|
||||
}
|
||||
|
||||
// Fire webhook event for player bans. Fire-and-forget — a delivery failure
|
||||
// must never surface to the caller or roll back the ban action.
|
||||
if (dto.action_type === 'ban') {
|
||||
void this.webhooksService
|
||||
.dispatch(licenseId, 'player_banned', {
|
||||
steam_id: dto.steam_id,
|
||||
reason: dto.reason,
|
||||
duration_minutes: dto.duration_minutes,
|
||||
player_name: dto.player_name,
|
||||
reason: dto.reason ?? null,
|
||||
duration_minutes: dto.duration_minutes ?? null,
|
||||
})
|
||||
.catch(() => {
|
||||
// dispatch() already logs internally; swallow here to guarantee
|
||||
// the ban action result is unaffected.
|
||||
});
|
||||
}
|
||||
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
private buildRconCommand(dto: PlayerActionDto): string {
|
||||
// Defense-in-depth against RCON command injection. The command is a single
|
||||
// line; an id or reason containing a newline/control char could break the
|
||||
// framing and inject a second console command. So:
|
||||
// - the player id must be a safe token (no whitespace/control chars) — a
|
||||
// permissive charset, not a Rust-only SteamID64 regex, so Conan (Funcom)
|
||||
// and Dune ids still validate. Reject outright if not.
|
||||
// - the free-text reason has control chars stripped and is length-capped.
|
||||
// - duration is coerced to a non-negative integer.
|
||||
const id = dto.steam_id ?? '';
|
||||
if (!/^[A-Za-z0-9_.:-]{1,64}$/.test(id)) {
|
||||
throw new BadRequestException('Invalid player id');
|
||||
}
|
||||
const safeReason =
|
||||
(dto.reason ?? 'banned').replace(/[\u0000-\u001F]+/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 200) || 'banned';
|
||||
const secs = Number.isFinite(dto.duration_minutes)
|
||||
? Math.max(0, Math.floor((dto.duration_minutes as number) * 60))
|
||||
: 0;
|
||||
|
||||
switch (dto.action_type) {
|
||||
case 'kick':
|
||||
return `kick ${id}${dto.reason ? ' ' + safeReason : ''}`;
|
||||
case 'ban':
|
||||
// banid <steamId> <reason> <durationSeconds> — 0 = permanent
|
||||
return `banid ${id} ${safeReason} ${secs}`;
|
||||
case 'unban':
|
||||
return `unban ${id}`;
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { Injectable, NotFoundException, ConflictException, BadRequestException, Logger } from '@nestjs/common';
|
||||
import { Injectable, NotFoundException, ConflictException, BadRequestException, ServiceUnavailableException, Logger } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { PluginRegistry } from '../../entities/plugin-registry.entity';
|
||||
import { InstallPluginDto } from './dto/install-plugin.dto';
|
||||
import { UpdatePluginConfigDto } from './dto/update-plugin-config.dto';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
|
||||
interface UmodCacheEntry {
|
||||
data: unknown;
|
||||
@@ -20,7 +20,7 @@ export class PluginsService {
|
||||
constructor(
|
||||
@InjectRepository(PluginRegistry)
|
||||
private readonly pluginRegistryRepo: Repository<PluginRegistry>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
async getPlugins(licenseId: string): Promise<PluginRegistry[]> {
|
||||
@@ -43,30 +43,11 @@ export class PluginsService {
|
||||
throw new ConflictException(`Plugin ${dto.plugin_name} is already installed`);
|
||||
}
|
||||
|
||||
const plugin = this.pluginRegistryRepo.create({
|
||||
license_id: licenseId,
|
||||
plugin_name: dto.plugin_name,
|
||||
umod_slug: dto.umod_slug,
|
||||
source: dto.source || 'manual',
|
||||
is_installed: true,
|
||||
is_loaded: false,
|
||||
});
|
||||
|
||||
const saved = await this.pluginRegistryRepo.save(plugin);
|
||||
|
||||
try {
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.server`, {
|
||||
action: 'plugin_install',
|
||||
plugin_name: dto.plugin_name,
|
||||
umod_slug: dto.umod_slug,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
this.logger.log(`Plugin install dispatched for ${dto.plugin_name} on license ${licenseId}`);
|
||||
} catch (err) {
|
||||
this.logger.error(`Failed to dispatch plugin install for ${dto.plugin_name} on license ${licenseId}: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
return saved;
|
||||
// One-click uMod install via agent is not yet implemented.
|
||||
// Fail fast — do not persist a DB record for a plugin that won't be deployed.
|
||||
throw new ServiceUnavailableException(
|
||||
'One-click uMod install is coming soon — download the .cs and use Upload for now.',
|
||||
);
|
||||
}
|
||||
|
||||
async uninstallPlugin(licenseId: string, pluginId: string): Promise<void> {
|
||||
@@ -80,11 +61,8 @@ export class PluginsService {
|
||||
|
||||
await this.pluginRegistryRepo.delete({ id: pluginId, license_id: licenseId });
|
||||
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.plugin`, {
|
||||
action: 'unload',
|
||||
plugin_name: plugin.plugin_name,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(licenseId, `oxide.unload ${plugin.plugin_name}`);
|
||||
await this.instancesService.deleteFileForLicense(licenseId, `oxide/plugins/${plugin.plugin_name}.cs`);
|
||||
this.logger.log(`Plugin uninstall dispatched for ${plugin.plugin_name} on license ${licenseId}`);
|
||||
}
|
||||
|
||||
@@ -100,11 +78,7 @@ export class PluginsService {
|
||||
throw new NotFoundException(`Plugin ${pluginId} not found`);
|
||||
}
|
||||
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.plugin`, {
|
||||
action: 'reload',
|
||||
plugin_name: plugin.plugin_name,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(licenseId, `oxide.reload ${plugin.plugin_name}`);
|
||||
this.logger.log(`Plugin reload dispatched for ${plugin.plugin_name} on license ${licenseId}`);
|
||||
|
||||
return { reloaded: true, plugin_name: plugin.plugin_name };
|
||||
@@ -215,19 +189,14 @@ export class PluginsService {
|
||||
|
||||
const saved = await this.pluginRegistryRepo.save(plugin);
|
||||
|
||||
// Dispatch to companion agent via NATS
|
||||
// Deploy .cs file to server via host agent
|
||||
try {
|
||||
const content = file.buffer.toString('base64');
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.server`, {
|
||||
action: 'plugin_upload',
|
||||
filename: originalName,
|
||||
content,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
this.logger.log(`Plugin upload dispatched: "${originalName}" (${file.size} bytes) for license ${licenseId}`);
|
||||
const content = file.buffer.toString('utf8');
|
||||
await this.instancesService.writeFileForLicense(licenseId, `oxide/plugins/${originalName}`, content);
|
||||
this.logger.log(`Plugin upload deployed: "${originalName}" (${file.size} bytes) for license ${licenseId}`);
|
||||
} catch (err) {
|
||||
this.logger.error(`NATS publish failed for plugin upload "${originalName}" on license ${licenseId}: ${(err as Error).message}`);
|
||||
// Don't fail the request — plugin record is saved, NATS delivery is best-effort
|
||||
this.logger.error(`File write failed for plugin upload "${originalName}" on license ${licenseId}: ${(err as Error).message}`);
|
||||
// Don't fail the request — plugin record is saved, file delivery is best-effort
|
||||
}
|
||||
|
||||
return saved;
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { RaidableBasesConfig } from '../../entities/raidablebases-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateRaidableBasesConfigDto } from './dto/create-raidablebases-config.dto';
|
||||
import { UpdateRaidableBasesConfigDto } from './dto/update-raidablebases-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class RaidableBasesService {
|
||||
constructor(
|
||||
@InjectRepository(RaidableBasesConfig)
|
||||
private readonly raidableBasesRepo: Repository<RaidableBasesConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class RaidableBasesService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write RaidableBases.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/RaidableBases.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write RaidableBases.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/RaidableBases.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload RaidableBases plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload RaidableBases',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload RaidableBases');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.raidableBasesRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class RaidableBasesService {
|
||||
/** Import RaidableBases.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read RaidableBases.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/RaidableBases.json',
|
||||
},
|
||||
30000,
|
||||
// Read RaidableBases.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/RaidableBases.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class RaidableBasesService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -10,48 +10,8 @@ import { LessThanOrEqual, Repository } from 'typeorm';
|
||||
import { ScheduledTask } from '../../entities/scheduled-task.entity';
|
||||
import { CreateTaskDto } from './dto/create-task.dto';
|
||||
import { UpdateTaskDto } from './dto/update-task.dto';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
|
||||
/** Parse a 5-field cron expression and return the next Date after `after`. */
|
||||
function nextCronDate(expr: string, after: Date): Date | null {
|
||||
const parts = expr.trim().split(/\s+/);
|
||||
if (parts.length !== 5) return null;
|
||||
|
||||
const [minuteExpr, hourExpr, domExpr, monthExpr, dowExpr] = parts;
|
||||
|
||||
function matches(expr: string, value: number): boolean {
|
||||
if (expr === '*') return true;
|
||||
return parseInt(expr, 10) === value;
|
||||
}
|
||||
|
||||
// Walk minute-by-minute up to 366 days forward to find next match.
|
||||
const candidate = new Date(after.getTime() + 60_000); // advance at least 1 minute
|
||||
candidate.setSeconds(0, 0);
|
||||
|
||||
const limit = new Date(after.getTime() + 366 * 24 * 60 * 60 * 1000);
|
||||
|
||||
while (candidate < limit) {
|
||||
const min = candidate.getUTCMinutes();
|
||||
const hour = candidate.getUTCHours();
|
||||
const dom = candidate.getUTCDate();
|
||||
const month = candidate.getUTCMonth() + 1; // 1-12
|
||||
const dow = candidate.getUTCDay(); // 0=Sun
|
||||
|
||||
if (
|
||||
matches(minuteExpr, min) &&
|
||||
matches(hourExpr, hour) &&
|
||||
matches(domExpr, dom) &&
|
||||
matches(monthExpr, month) &&
|
||||
matches(dowExpr, dow)
|
||||
) {
|
||||
return candidate;
|
||||
}
|
||||
|
||||
candidate.setTime(candidate.getTime() + 60_000);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { nextCronDate } from '../../common/cron.util';
|
||||
|
||||
@Injectable()
|
||||
export class SchedulesService implements OnModuleInit, OnModuleDestroy {
|
||||
@@ -61,7 +21,7 @@ export class SchedulesService implements OnModuleInit, OnModuleDestroy {
|
||||
constructor(
|
||||
@InjectRepository(ScheduledTask)
|
||||
private taskRepository: Repository<ScheduledTask>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -160,21 +120,12 @@ export class SchedulesService implements OnModuleInit, OnModuleDestroy {
|
||||
|
||||
switch (task_type) {
|
||||
case 'restart':
|
||||
await this.natsService.sendServerCommand(license_id, 'restart', {
|
||||
source: 'scheduler',
|
||||
task_id: task.id,
|
||||
});
|
||||
await this.instancesService.lifecycleForLicense(license_id, 'restart');
|
||||
break;
|
||||
|
||||
case 'announcement': {
|
||||
const message = (task_config?.message as string) ?? 'Scheduled announcement';
|
||||
await this.natsService.publish(`corrosion.${license_id}.cmd.server`, {
|
||||
action: 'command',
|
||||
command: `say ${message}`,
|
||||
source: 'scheduler',
|
||||
task_id: task.id,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(license_id, `say ${message}`);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -184,25 +135,13 @@ export class SchedulesService implements OnModuleInit, OnModuleDestroy {
|
||||
this.logger.warn(`Task ${task.id} has no command configured — skipping`);
|
||||
return;
|
||||
}
|
||||
await this.natsService.publish(`corrosion.${license_id}.cmd.server`, {
|
||||
action: 'command',
|
||||
command,
|
||||
source: 'scheduler',
|
||||
task_id: task.id,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(license_id, command);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'plugin_reload': {
|
||||
const plugin_name = (task_config?.plugin_name as string) ?? '';
|
||||
await this.natsService.publish(`corrosion.${license_id}.cmd.plugin`, {
|
||||
action: 'reload',
|
||||
plugin_name,
|
||||
source: 'scheduler',
|
||||
task_id: task.id,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
await this.instancesService.rconForLicense(license_id, `oxide.reload ${plugin_name}`);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,13 @@ export class ServersController {
|
||||
return await this.serversService.getServer(licenseId);
|
||||
}
|
||||
|
||||
@Get('agent-credentials')
|
||||
@RequirePermission('server.manage')
|
||||
@ApiOperation({ summary: 'NATS credentials for this license\'s host agent' })
|
||||
async getAgentCredentials(@CurrentTenant() licenseId: string) {
|
||||
return await this.serversService.getAgentCredentials(licenseId);
|
||||
}
|
||||
|
||||
@Put('config')
|
||||
@RequirePermission('server.manage')
|
||||
@ApiOperation({ summary: 'Update server configuration' })
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import { Injectable, NotFoundException, InternalServerErrorException, Logger } from '@nestjs/common';
|
||||
import { Injectable, NotFoundException, InternalServerErrorException, ServiceUnavailableException, Logger } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { ServerConnection } from '../../entities/server-connection.entity';
|
||||
import { ServerConfig } from '../../entities/server-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { UpdateServerConfigDto } from './dto/update-config.dto';
|
||||
import { DeployServerDto } from './dto/deploy-server.dto';
|
||||
|
||||
@@ -17,8 +18,18 @@ export class ServersService {
|
||||
@InjectRepository(ServerConfig)
|
||||
private readonly configRepo: Repository<ServerConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* NATS credentials the customer puts in their host agent's config so it can
|
||||
* authenticate to the per-license-scoped broker. Returns null if the broker
|
||||
* isn't enforcing auth yet (NATS_TOKEN_SECRET unset).
|
||||
*/
|
||||
async getAgentCredentials(licenseId: string) {
|
||||
return this.natsService.getAgentCredentials(licenseId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get server connection and config for a license.
|
||||
* Returns null fields if no server has been set up yet.
|
||||
@@ -59,11 +70,11 @@ export class ServersService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a console command to the server via NATS
|
||||
* Send a console command to the server via the host agent (RCON)
|
||||
*/
|
||||
async sendCommand(licenseId: string, command: string) {
|
||||
try {
|
||||
await this.natsService.sendServerCommand(licenseId, 'command', { command });
|
||||
await this.instancesService.rconForLicense(licenseId, command);
|
||||
this.logger.log(`Console command dispatched for license ${licenseId}: ${command}`);
|
||||
} catch (err) {
|
||||
this.logger.error(`Failed to dispatch console command for license ${licenseId}: ${(err as Error).message}`);
|
||||
@@ -73,42 +84,45 @@ export class ServersService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the server via NATS
|
||||
* Start the server via the host agent
|
||||
*/
|
||||
async startServer(licenseId: string) {
|
||||
await this.natsService.sendServerCommand(licenseId, 'start');
|
||||
await this.instancesService.lifecycleForLicense(licenseId, 'start');
|
||||
return { message: 'Start command sent' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the server via NATS
|
||||
* Stop the server via the host agent
|
||||
*/
|
||||
async stopServer(licenseId: string) {
|
||||
await this.natsService.sendServerCommand(licenseId, 'stop');
|
||||
await this.instancesService.lifecycleForLicense(licenseId, 'stop');
|
||||
return { message: 'Stop command sent' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Restart the server via NATS
|
||||
* Restart the server via the host agent
|
||||
*/
|
||||
async restartServer(licenseId: string) {
|
||||
await this.natsService.sendServerCommand(licenseId, 'restart');
|
||||
await this.instancesService.lifecycleForLicense(licenseId, 'restart');
|
||||
return { message: 'Restart command sent' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Deploy Rust server via companion agent
|
||||
* Deploy Rust server — not yet supported via host agent.
|
||||
* Install the server manually and point the host agent at it.
|
||||
*/
|
||||
async deployServer(licenseId: string, dto: DeployServerDto) {
|
||||
await this.natsService.sendDeployCommand(licenseId, { ...dto });
|
||||
return { message: 'Deployment started' };
|
||||
async deployServer(_licenseId: string, _dto: DeployServerDto) {
|
||||
throw new ServiceUnavailableException(
|
||||
'Server deployment from the panel is coming soon — install the server and point the host agent at it for now.',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Install Oxide/uMod via companion agent
|
||||
* Install Oxide/uMod — not yet supported via host agent.
|
||||
*/
|
||||
async installOxide(licenseId: string) {
|
||||
await this.natsService.sendOxideInstallCommand(licenseId);
|
||||
return { message: 'Oxide installation started' };
|
||||
async installOxide(_licenseId: string) {
|
||||
throw new ServiceUnavailableException(
|
||||
'Oxide install from the panel is coming soon — install Oxide/uMod on the server for now.',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Injectable, ServiceUnavailableException } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
@@ -55,6 +55,13 @@ export class SetupService {
|
||||
if (dto.panel_api_key) {
|
||||
const encryptionKey = this.configService.get<string>('encryption.key', '');
|
||||
const keyBuffer = Buffer.from(encryptionKey, 'hex');
|
||||
// AES-256-GCM needs a 32-byte key. An unset/short ENCRYPTION_KEY would
|
||||
// otherwise crash createCipheriv with an opaque "Invalid key length" 500.
|
||||
if (keyBuffer.length !== 32) {
|
||||
throw new ServiceUnavailableException(
|
||||
'Server encryption is not configured (ENCRYPTION_KEY must be 32 bytes / 64 hex chars). Contact the platform operator.',
|
||||
);
|
||||
}
|
||||
const iv = crypto.randomBytes(16);
|
||||
const cipher = crypto.createCipheriv('aes-256-gcm', keyBuffer, iv);
|
||||
const encrypted = Buffer.concat([
|
||||
@@ -82,9 +89,12 @@ export class SetupService {
|
||||
});
|
||||
|
||||
if (connection) {
|
||||
// For bare metal, mark as connected immediately (waiting for agent)
|
||||
if (connection.connection_type === 'bare_metal') {
|
||||
connection.connection_status = 'connected';
|
||||
// Bare-metal stays 'offline' until the agent's first heartbeat flips it
|
||||
// 'connected' (HostAgentConsumerService). Marking it connected here was a
|
||||
// false positive — the dashboard showed a live server before any agent
|
||||
// had checked in.
|
||||
if (connection.connection_type === 'bare_metal' && connection.connection_status !== 'connected') {
|
||||
connection.connection_status = 'offline';
|
||||
connection.updated_at = new Date();
|
||||
await this.connectionRepo.save(connection);
|
||||
}
|
||||
|
||||
@@ -57,11 +57,17 @@ export class StoreService {
|
||||
throw new NotFoundException('Module not found');
|
||||
}
|
||||
|
||||
// Beta: modules are granted free (no payment processing wired yet). Record
|
||||
// it honestly as a beta grant at $0 rather than a fake `txn_*` id that
|
||||
// implies a real charge occurred.
|
||||
this.logger.log(
|
||||
`Granting module ${moduleId} to license ${licenseId} free (Beta — no payment processing)`,
|
||||
);
|
||||
const purchase = this.purchaseRepo.create({
|
||||
license_id: licenseId,
|
||||
module_id: moduleId,
|
||||
transaction_id: `txn_${Date.now()}`,
|
||||
amount_paid: parseFloat(module.price_usd.toString()),
|
||||
transaction_id: 'beta-free-grant',
|
||||
amount_paid: 0,
|
||||
});
|
||||
|
||||
return this.purchaseRepo.save(purchase);
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { TeleportConfig } from '../../entities/teleport-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateTeleportConfigDto } from './dto/create-teleport-config.dto';
|
||||
import { UpdateTeleportConfigDto } from './dto/update-teleport-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class TeleportService {
|
||||
constructor(
|
||||
@InjectRepository(TeleportConfig)
|
||||
private readonly teleportRepo: Repository<TeleportConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class TeleportService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write NTeleportation.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/NTeleportation.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write NTeleportation.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/NTeleportation.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload NTeleportation plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload NTeleportation',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload NTeleportation');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.teleportRepo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class TeleportService {
|
||||
/** Import NTeleportation.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read NTeleportation.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/NTeleportation.json',
|
||||
},
|
||||
30000,
|
||||
// Read NTeleportation.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/NTeleportation.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class TeleportService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Injectable, Logger, NotFoundException, HttpException, HttpStatus } from
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { TimedExecuteConfig } from '../../entities/timedexecute-config.entity';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { CreateTimedExecuteConfigDto } from './dto/create-timedexecute-config.dto';
|
||||
import { UpdateTimedExecuteConfigDto } from './dto/update-timedexecute-config.dto';
|
||||
|
||||
@@ -13,7 +13,7 @@ export class TimedExecuteService {
|
||||
constructor(
|
||||
@InjectRepository(TimedExecuteConfig)
|
||||
private readonly repo: Repository<TimedExecuteConfig>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
) {}
|
||||
|
||||
/** List configs for a license (summaries — no JSONB) */
|
||||
@@ -81,26 +81,15 @@ export class TimedExecuteService {
|
||||
const jsonString = JSON.stringify(config.config_data, null, 2);
|
||||
|
||||
try {
|
||||
// Write TimedExecute.json via file manager NATS
|
||||
await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_save',
|
||||
path: 'server://oxide/config/TimedExecute.json',
|
||||
content: jsonString,
|
||||
},
|
||||
30000,
|
||||
// Write TimedExecute.json via Rust agent
|
||||
await this.instancesService.writeFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/TimedExecute.json',
|
||||
jsonString,
|
||||
);
|
||||
|
||||
// Reload TimedExecute plugin via RCON
|
||||
await this.natsService.publish(
|
||||
`corrosion.${licenseId}.cmd.server`,
|
||||
{
|
||||
action: 'command',
|
||||
command: 'oxide.reload TimedExecute',
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
);
|
||||
await this.instancesService.rconForLicense(licenseId, 'oxide.reload TimedExecute');
|
||||
|
||||
// Mark this config as active, deactivate others
|
||||
await this.repo.update({ license_id: licenseId }, { is_active: false });
|
||||
@@ -126,17 +115,13 @@ export class TimedExecuteService {
|
||||
/** Import TimedExecute.json from game server via NATS */
|
||||
async importFromServer(licenseId: string, configName: string, description?: string) {
|
||||
try {
|
||||
// Read TimedExecute.json from server via file manager NATS
|
||||
const response = await this.natsService.request(
|
||||
`corrosion.${licenseId}.files.cmd`,
|
||||
{
|
||||
func: 'fm_preview',
|
||||
path: 'server://oxide/config/TimedExecute.json',
|
||||
},
|
||||
30000,
|
||||
// Read TimedExecute.json from server via Rust agent
|
||||
const result = await this.instancesService.readFileForLicense(
|
||||
licenseId,
|
||||
'oxide/config/TimedExecute.json',
|
||||
);
|
||||
|
||||
if (!response) {
|
||||
if (!result) {
|
||||
throw new HttpException(
|
||||
'No response from agent — it may be offline',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
@@ -144,13 +129,13 @@ export class TimedExecuteService {
|
||||
}
|
||||
|
||||
// Parse the response content as JSON
|
||||
const responseData = response as Record<string, any>;
|
||||
const responseData = (result as any).content;
|
||||
let configData: Record<string, any>;
|
||||
|
||||
if (typeof responseData.content === 'string') {
|
||||
configData = JSON.parse(responseData.content);
|
||||
} else if (typeof responseData.content === 'object') {
|
||||
configData = responseData.content;
|
||||
if (typeof responseData === 'string') {
|
||||
configData = JSON.parse(responseData);
|
||||
} else if (typeof responseData === 'object') {
|
||||
configData = responseData;
|
||||
} else {
|
||||
throw new HttpException(
|
||||
'Unexpected response format from agent',
|
||||
|
||||
33
backend-nest/src/modules/webhooks/dto/create-webhook.dto.ts
Normal file
33
backend-nest/src/modules/webhooks/dto/create-webhook.dto.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import { IsString, IsNotEmpty, IsUrl, IsArray, ArrayNotEmpty, IsOptional, MaxLength } from 'class-validator';
|
||||
import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
|
||||
|
||||
export class CreateWebhookDto {
|
||||
@ApiProperty({ description: 'Human-readable label for this webhook', maxLength: 100 })
|
||||
@IsString()
|
||||
@IsNotEmpty()
|
||||
@MaxLength(100)
|
||||
name: string;
|
||||
|
||||
@ApiProperty({ description: 'HTTPS URL to POST events to' })
|
||||
@IsUrl({ protocols: ['https', 'http'], require_tld: false })
|
||||
url: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Event keys to subscribe to',
|
||||
example: ['player_banned', 'server_down'],
|
||||
type: [String],
|
||||
})
|
||||
@IsArray()
|
||||
@ArrayNotEmpty()
|
||||
@IsString({ each: true })
|
||||
events: string[];
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'HMAC-SHA256 signing secret. Auto-generated if omitted.',
|
||||
maxLength: 128,
|
||||
})
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
@MaxLength(128)
|
||||
secret?: string;
|
||||
}
|
||||
31
backend-nest/src/modules/webhooks/dto/update-webhook.dto.ts
Normal file
31
backend-nest/src/modules/webhooks/dto/update-webhook.dto.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import { IsString, IsUrl, IsArray, ArrayNotEmpty, IsOptional, IsBoolean, MaxLength } from 'class-validator';
|
||||
import { ApiPropertyOptional } from '@nestjs/swagger';
|
||||
|
||||
export class UpdateWebhookDto {
|
||||
@ApiPropertyOptional({ description: 'Human-readable label for this webhook', maxLength: 100 })
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
@MaxLength(100)
|
||||
name?: string;
|
||||
|
||||
@ApiPropertyOptional({ description: 'HTTPS URL to POST events to' })
|
||||
@IsOptional()
|
||||
@IsUrl({ protocols: ['https', 'http'], require_tld: false })
|
||||
url?: string;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Event keys to subscribe to',
|
||||
example: ['player_banned', 'server_down'],
|
||||
type: [String],
|
||||
})
|
||||
@IsOptional()
|
||||
@IsArray()
|
||||
@ArrayNotEmpty()
|
||||
@IsString({ each: true })
|
||||
events?: string[];
|
||||
|
||||
@ApiPropertyOptional({ description: 'Enable or disable this webhook' })
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
is_active?: boolean;
|
||||
}
|
||||
70
backend-nest/src/modules/webhooks/webhooks.controller.ts
Normal file
70
backend-nest/src/modules/webhooks/webhooks.controller.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
import {
|
||||
Controller,
|
||||
Get,
|
||||
Post,
|
||||
Patch,
|
||||
Delete,
|
||||
Body,
|
||||
Param,
|
||||
} from '@nestjs/common';
|
||||
import { ApiTags, ApiBearerAuth, ApiOperation, ApiResponse } from '@nestjs/swagger';
|
||||
import { WebhooksService } from './webhooks.service';
|
||||
import { CreateWebhookDto } from './dto/create-webhook.dto';
|
||||
import { UpdateWebhookDto } from './dto/update-webhook.dto';
|
||||
import { CurrentTenant } from '../../common/decorators/current-tenant.decorator';
|
||||
import { RequirePermission } from '../../common/decorators/require-permission.decorator';
|
||||
|
||||
@ApiTags('webhooks')
|
||||
@ApiBearerAuth()
|
||||
@Controller('webhooks')
|
||||
export class WebhooksController {
|
||||
constructor(private readonly webhooksService: WebhooksService) {}
|
||||
|
||||
@Post()
|
||||
@RequirePermission('webhooks.manage')
|
||||
@ApiOperation({
|
||||
summary: 'Create a webhook',
|
||||
description:
|
||||
'Registers a new outbound webhook for this license. A signing secret is auto-generated if not provided.',
|
||||
})
|
||||
@ApiResponse({ status: 201, description: 'Webhook created.' })
|
||||
async create(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Body() dto: CreateWebhookDto,
|
||||
) {
|
||||
return this.webhooksService.create(licenseId, dto);
|
||||
}
|
||||
|
||||
@Get()
|
||||
@RequirePermission('webhooks.view')
|
||||
@ApiOperation({ summary: 'List webhooks', description: 'Returns all webhooks for this license.' })
|
||||
@ApiResponse({ status: 200, description: 'Webhook list.' })
|
||||
async list(@CurrentTenant() licenseId: string) {
|
||||
return this.webhooksService.list(licenseId);
|
||||
}
|
||||
|
||||
@Patch(':id')
|
||||
@RequirePermission('webhooks.manage')
|
||||
@ApiOperation({ summary: 'Update a webhook', description: 'Update name, URL, event subscriptions, or active state.' })
|
||||
@ApiResponse({ status: 200, description: 'Webhook updated.' })
|
||||
@ApiResponse({ status: 404, description: 'Webhook not found in this license.' })
|
||||
async update(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
@Body() dto: UpdateWebhookDto,
|
||||
) {
|
||||
return this.webhooksService.update(licenseId, id, dto);
|
||||
}
|
||||
|
||||
@Delete(':id')
|
||||
@RequirePermission('webhooks.manage')
|
||||
@ApiOperation({ summary: 'Delete a webhook' })
|
||||
@ApiResponse({ status: 200, description: 'Webhook deleted.' })
|
||||
@ApiResponse({ status: 404, description: 'Webhook not found in this license.' })
|
||||
async remove(
|
||||
@CurrentTenant() licenseId: string,
|
||||
@Param('id') id: string,
|
||||
) {
|
||||
return this.webhooksService.remove(licenseId, id);
|
||||
}
|
||||
}
|
||||
14
backend-nest/src/modules/webhooks/webhooks.module.ts
Normal file
14
backend-nest/src/modules/webhooks/webhooks.module.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { Global, Module } from '@nestjs/common';
|
||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||
import { Webhook } from '../../entities/webhook.entity';
|
||||
import { WebhooksController } from './webhooks.controller';
|
||||
import { WebhooksService } from './webhooks.service';
|
||||
|
||||
@Global()
|
||||
@Module({
|
||||
imports: [TypeOrmModule.forFeature([Webhook])],
|
||||
controllers: [WebhooksController],
|
||||
providers: [WebhooksService],
|
||||
exports: [WebhooksService],
|
||||
})
|
||||
export class WebhooksModule {}
|
||||
236
backend-nest/src/modules/webhooks/webhooks.service.ts
Normal file
236
backend-nest/src/modules/webhooks/webhooks.service.ts
Normal file
@@ -0,0 +1,236 @@
|
||||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import * as crypto from 'crypto';
|
||||
import { Webhook } from '../../entities/webhook.entity';
|
||||
import { CreateWebhookDto } from './dto/create-webhook.dto';
|
||||
import { UpdateWebhookDto } from './dto/update-webhook.dto';
|
||||
import { assertPublicHttpUrl } from '../../common/ssrf-guard';
|
||||
|
||||
/** Safe list view — secret is included (operator's own resource). */
|
||||
export interface WebhookListItem {
|
||||
id: string;
|
||||
name: string;
|
||||
url: string;
|
||||
events: string[];
|
||||
secret: string;
|
||||
is_active: boolean;
|
||||
last_delivery_at: Date | null;
|
||||
last_status: string | null;
|
||||
created_at: Date;
|
||||
}
|
||||
|
||||
/** Shape returned on create — identical to list item. */
|
||||
export type CreatedWebhook = WebhookListItem;
|
||||
|
||||
@Injectable()
|
||||
export class WebhooksService {
|
||||
private readonly logger = new Logger(WebhooksService.name);
|
||||
|
||||
constructor(
|
||||
@InjectRepository(Webhook)
|
||||
private readonly webhookRepo: Repository<Webhook>,
|
||||
) {}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CRUD
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async create(licenseId: string, dto: CreateWebhookDto): Promise<CreatedWebhook> {
|
||||
// SSRF guard: reject URLs resolving to private/reserved space before storing.
|
||||
await assertPublicHttpUrl(dto.url);
|
||||
|
||||
// Generate a secret if the caller didn't supply one.
|
||||
const secret = dto.secret ?? crypto.randomBytes(32).toString('hex');
|
||||
|
||||
const entity = this.webhookRepo.create({
|
||||
license_id: licenseId,
|
||||
name: dto.name,
|
||||
url: dto.url,
|
||||
events: dto.events,
|
||||
secret,
|
||||
is_active: true,
|
||||
});
|
||||
|
||||
const saved = await this.webhookRepo.save(entity);
|
||||
|
||||
this.logger.log(
|
||||
`webhook created: id=${saved.id} name="${saved.name}" events=[${saved.events.join(',')}] license=${licenseId}`,
|
||||
);
|
||||
|
||||
return this.toListItem(saved);
|
||||
}
|
||||
|
||||
async list(licenseId: string): Promise<WebhookListItem[]> {
|
||||
const rows = await this.webhookRepo.find({
|
||||
where: { license_id: licenseId },
|
||||
order: { created_at: 'DESC' },
|
||||
});
|
||||
return rows.map(this.toListItem);
|
||||
}
|
||||
|
||||
async update(licenseId: string, id: string, dto: UpdateWebhookDto): Promise<WebhookListItem> {
|
||||
const webhook = await this.findOwned(licenseId, id);
|
||||
|
||||
// SSRF guard on any URL change.
|
||||
if (dto.url !== undefined) await assertPublicHttpUrl(dto.url);
|
||||
|
||||
if (dto.name !== undefined) webhook.name = dto.name;
|
||||
if (dto.url !== undefined) webhook.url = dto.url;
|
||||
if (dto.events !== undefined) webhook.events = dto.events;
|
||||
if (dto.is_active !== undefined) webhook.is_active = dto.is_active;
|
||||
|
||||
const saved = await this.webhookRepo.save(webhook);
|
||||
|
||||
this.logger.log(`webhook updated: id=${id} license=${licenseId}`);
|
||||
|
||||
return this.toListItem(saved);
|
||||
}
|
||||
|
||||
async remove(licenseId: string, id: string): Promise<{ id: string }> {
|
||||
const webhook = await this.findOwned(licenseId, id);
|
||||
await this.webhookRepo.remove(webhook);
|
||||
this.logger.log(`webhook deleted: id=${id} license=${licenseId}`);
|
||||
return { id };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Dispatch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Fire an event to all active webhooks for a license that are subscribed to
|
||||
* the given event key.
|
||||
*
|
||||
* Contract:
|
||||
* - Fire-and-forget: each delivery is attempted with a 5-second AbortController
|
||||
* timeout and never throws out to the caller.
|
||||
* - Each attempt updates last_delivery_at + last_status ('ok' | 'failed').
|
||||
* - The triggering action is NOT blocked. All deliveries run concurrently via
|
||||
* Promise.allSettled; the returned Promise resolves only after all attempts
|
||||
* finish (or time out), so callers can void it for true fire-and-forget.
|
||||
*
|
||||
* Signature header: X-Corrosion-Signature: sha256=<hex>
|
||||
* where hex = HMAC-SHA256(rawBody, webhook.secret).
|
||||
*/
|
||||
async dispatch(
|
||||
licenseId: string,
|
||||
event: string,
|
||||
payload: Record<string, unknown>,
|
||||
): Promise<void> {
|
||||
let hooks: Webhook[];
|
||||
try {
|
||||
hooks = await this.webhookRepo.find({
|
||||
where: { license_id: licenseId, is_active: true },
|
||||
});
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`dispatch: failed to query webhooks for license ${licenseId}: ${(err as Error).message}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Filter to those subscribed to this event.
|
||||
const subscribed = hooks.filter((h) => h.events.includes(event));
|
||||
if (subscribed.length === 0) return;
|
||||
|
||||
const body = JSON.stringify({
|
||||
event,
|
||||
timestamp: new Date().toISOString(),
|
||||
data: payload,
|
||||
});
|
||||
|
||||
await Promise.allSettled(
|
||||
subscribed.map((hook) => this.deliverOne(hook, event, body)),
|
||||
);
|
||||
}
|
||||
|
||||
/** Deliver to a single webhook endpoint; update delivery metadata. Never throws. */
|
||||
private async deliverOne(hook: Webhook, event: string, body: string): Promise<void> {
|
||||
const signature = this.sign(body, hook.secret);
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), 5_000);
|
||||
|
||||
let status: 'ok' | 'failed' = 'failed';
|
||||
|
||||
try {
|
||||
// Re-validate at send time: a host that was public at create time can
|
||||
// resolve to a private address now (DNS rebinding / TOCTOU). Throws → caught
|
||||
// below → recorded 'failed'.
|
||||
await assertPublicHttpUrl(hook.url);
|
||||
|
||||
const res = await fetch(hook.url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Corrosion-Signature': `sha256=${signature}`,
|
||||
},
|
||||
body,
|
||||
signal: controller.signal,
|
||||
// Do not auto-follow redirects — a 3xx Location could point at an
|
||||
// internal host, re-opening the SSRF we just closed. A redirect is a
|
||||
// failed delivery here.
|
||||
redirect: 'manual',
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
status = 'ok';
|
||||
} else {
|
||||
this.logger.warn(
|
||||
`webhook delivery failed: id=${hook.id} event=${event} status=${res.status}`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
this.logger.warn(
|
||||
`webhook delivery error: id=${hook.id} event=${event} err=${msg}`,
|
||||
);
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
|
||||
// Persist delivery outcome — best-effort, never throws.
|
||||
try {
|
||||
await this.webhookRepo.update(hook.id, {
|
||||
last_delivery_at: new Date(),
|
||||
last_status: status,
|
||||
});
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`webhook metadata update failed: id=${hook.id}: ${(err as Error).message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
private async findOwned(licenseId: string, id: string): Promise<Webhook> {
|
||||
const webhook = await this.webhookRepo.findOne({
|
||||
where: { id, license_id: licenseId },
|
||||
});
|
||||
if (!webhook) {
|
||||
throw new NotFoundException(`Webhook ${id} not found`);
|
||||
}
|
||||
return webhook;
|
||||
}
|
||||
|
||||
private sign(body: string, secret: string): string {
|
||||
return crypto.createHmac('sha256', secret).update(body).digest('hex');
|
||||
}
|
||||
|
||||
private toListItem(w: Webhook): WebhookListItem {
|
||||
return {
|
||||
id: w.id,
|
||||
name: w.name,
|
||||
url: w.url,
|
||||
events: w.events,
|
||||
secret: w.secret,
|
||||
is_active: w.is_active,
|
||||
last_delivery_at: w.last_delivery_at,
|
||||
last_status: w.last_status,
|
||||
created_at: w.created_at,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Injectable, NotFoundException } from '@nestjs/common';
|
||||
import { Injectable, NotFoundException, ServiceUnavailableException } from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { StoreConfig } from '../../entities/store-config.entity';
|
||||
@@ -224,23 +224,13 @@ export class WebstoreService {
|
||||
throw new NotFoundException('Item not found');
|
||||
}
|
||||
|
||||
const transaction = this.transactionRepo.create({
|
||||
license_id: license.id,
|
||||
item_id: item.id,
|
||||
steam_id: dto.steam_id,
|
||||
player_name: dto.player_name,
|
||||
paypal_order_id: `order_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
||||
amount: parseFloat(item.price.toString()),
|
||||
currency: 'USD', // Would get from config
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
await this.transactionRepo.save(transaction);
|
||||
|
||||
// Return mock PayPal approval URL
|
||||
return {
|
||||
order_id: transaction.paypal_order_id,
|
||||
approval_url: `https://www.sandbox.paypal.com/checkoutnow?token=${transaction.paypal_order_id}`,
|
||||
};
|
||||
// Beta: real PayPal/Stripe processing is not wired yet. Refuse honestly
|
||||
// instead of writing a pending transaction and handing the player a fake
|
||||
// order token that resolves to nowhere. (item lookup above still validates
|
||||
// the request so the storefront UI can show the catalogue.)
|
||||
void item;
|
||||
throw new ServiceUnavailableException(
|
||||
'Storefront checkout is not available yet — payment processing is coming soon.',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
import { Injectable, NotFoundException, Logger } from '@nestjs/common';
|
||||
import {
|
||||
Injectable,
|
||||
NotFoundException,
|
||||
Logger,
|
||||
OnModuleInit,
|
||||
OnModuleDestroy,
|
||||
} from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { IsNull, LessThanOrEqual, Repository } from 'typeorm';
|
||||
import { WipeProfile } from '../../entities/wipe-profile.entity';
|
||||
import { WipeSchedule } from '../../entities/wipe-schedule.entity';
|
||||
import { WipeHistory } from '../../entities/wipe-history.entity';
|
||||
@@ -8,11 +14,14 @@ import { CreateProfileDto } from './dto/create-profile.dto';
|
||||
import { UpdateProfileDto } from './dto/update-profile.dto';
|
||||
import { CreateScheduleDto } from './dto/create-schedule.dto';
|
||||
import { TriggerWipeDto } from './dto/trigger-wipe.dto';
|
||||
import { NatsService } from '../../services/nats.service';
|
||||
import { InstancesService } from '../instances/instances.service';
|
||||
import { WebhooksService } from '../webhooks/webhooks.service';
|
||||
import { nextCronDate } from '../../common/cron.util';
|
||||
|
||||
@Injectable()
|
||||
export class WipesService {
|
||||
export class WipesService implements OnModuleInit, OnModuleDestroy {
|
||||
private readonly logger = new Logger(WipesService.name);
|
||||
private wipeExecutorInterval: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
constructor(
|
||||
@InjectRepository(WipeProfile)
|
||||
@@ -21,9 +30,86 @@ export class WipesService {
|
||||
private readonly wipeScheduleRepo: Repository<WipeSchedule>,
|
||||
@InjectRepository(WipeHistory)
|
||||
private readonly wipeHistoryRepo: Repository<WipeHistory>,
|
||||
private readonly natsService: NatsService,
|
||||
private readonly instancesService: InstancesService,
|
||||
private readonly webhooksService: WebhooksService,
|
||||
) {}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scheduled-wipe executor — the auto-wiper. Mirrors SchedulesService: a 60s
|
||||
// poll fires every active wipe schedule whose next_scheduled_run is due, then
|
||||
// advances it from its cron expression. Without this, wipe_schedules rows
|
||||
// never fire (the headline auto-wipe feature was inert).
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
onModuleInit(): void {
|
||||
this.bootstrapWipeSchedules().catch((err) =>
|
||||
this.logger.error('Failed to bootstrap wipe-schedule next runs', err),
|
||||
);
|
||||
this.wipeExecutorInterval = setInterval(() => {
|
||||
this.executeDueWipes().catch((err) =>
|
||||
this.logger.error('Wipe-schedule executor error', err),
|
||||
);
|
||||
}, 60_000);
|
||||
this.logger.log('Wipe-schedule executor started (60s polling interval)');
|
||||
}
|
||||
|
||||
onModuleDestroy(): void {
|
||||
if (this.wipeExecutorInterval) {
|
||||
clearInterval(this.wipeExecutorInterval);
|
||||
this.wipeExecutorInterval = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** On startup, stamp next_scheduled_run on active schedules that lack one. */
|
||||
private async bootstrapWipeSchedules(): Promise<void> {
|
||||
const schedules = await this.wipeScheduleRepo.find({
|
||||
where: { is_active: true, next_scheduled_run: IsNull() },
|
||||
});
|
||||
for (const s of schedules) {
|
||||
const next = nextCronDate(s.cron_expression, new Date());
|
||||
if (next) {
|
||||
s.next_scheduled_run = next;
|
||||
await this.wipeScheduleRepo.save(s);
|
||||
}
|
||||
}
|
||||
if (schedules.length > 0) {
|
||||
this.logger.log(`Bootstrapped next run for ${schedules.length} wipe schedule(s)`);
|
||||
}
|
||||
}
|
||||
|
||||
/** Fire every active wipe schedule whose next_scheduled_run <= now. */
|
||||
private async executeDueWipes(): Promise<void> {
|
||||
const now = new Date();
|
||||
const due = await this.wipeScheduleRepo.find({
|
||||
where: { is_active: true, next_scheduled_run: LessThanOrEqual(now) },
|
||||
});
|
||||
if (due.length === 0) return;
|
||||
|
||||
this.logger.log(`Executing ${due.length} due wipe schedule(s)`);
|
||||
for (const s of due) {
|
||||
try {
|
||||
await this.triggerWipe(
|
||||
s.license_id,
|
||||
{
|
||||
wipe_type: s.wipe_type as TriggerWipeDto['wipe_type'],
|
||||
wipe_profile_id: s.wipe_profile_id,
|
||||
},
|
||||
'scheduled',
|
||||
);
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`Scheduled wipe failed for schedule ${s.id} (${s.schedule_name})`,
|
||||
(err as Error).stack,
|
||||
);
|
||||
} finally {
|
||||
// Advance next_scheduled_run regardless, so a failing schedule doesn't
|
||||
// re-fire every 60s.
|
||||
s.next_scheduled_run = nextCronDate(s.cron_expression, now);
|
||||
await this.wipeScheduleRepo.save(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async getProfiles(licenseId: string): Promise<WipeProfile[]> {
|
||||
return this.wipeProfileRepo.find({
|
||||
where: { license_id: licenseId },
|
||||
@@ -96,25 +182,56 @@ export class WipesService {
|
||||
async triggerWipe(
|
||||
licenseId: string,
|
||||
dto: TriggerWipeDto,
|
||||
triggerType: 'manual' | 'scheduled' = 'manual',
|
||||
): Promise<{ wipe_history_id: string }> {
|
||||
const history = this.wipeHistoryRepo.create({
|
||||
license_id: licenseId,
|
||||
wipe_type: dto.wipe_type,
|
||||
wipe_profile_id: dto.wipe_profile_id,
|
||||
trigger_type: 'manual',
|
||||
status: 'pending',
|
||||
trigger_type: triggerType,
|
||||
status: 'wiping',
|
||||
started_at: new Date(),
|
||||
});
|
||||
|
||||
const saved = await this.wipeHistoryRepo.save(history);
|
||||
this.logger.log(
|
||||
`Wipe ${triggerType} dispatched for license ${licenseId} — history ${saved.id}`,
|
||||
);
|
||||
|
||||
await this.natsService.publish(`corrosion.${licenseId}.cmd.wipe`, {
|
||||
// Dispatch to the agent WITHOUT blocking the caller — a wipe is
|
||||
// stop → delete → start and can take a minute+. We record the outcome on
|
||||
// wipe_history from the agent's reply and fire the wipe_completed webhook
|
||||
// when it lands. Previously the row was created 'pending' and never
|
||||
// advanced, so history lied about every wipe.
|
||||
void this.instancesService
|
||||
.wipeForLicense(licenseId, dto.wipe_type, true)
|
||||
.then((reply: unknown) => {
|
||||
const r = (reply ?? {}) as { status?: string; message?: string; deleted_count?: number };
|
||||
const ok = r.status === 'success';
|
||||
saved.status = ok ? 'success' : 'failed';
|
||||
saved.completed_at = new Date();
|
||||
if (!ok) {
|
||||
saved.error_message = r.message ?? 'agent reported wipe failure';
|
||||
}
|
||||
return this.wipeHistoryRepo.save(saved).then(() => {
|
||||
this.logger.log(`Wipe ${saved.id} ${saved.status}`);
|
||||
if (ok) {
|
||||
void this.webhooksService.dispatch(licenseId, 'wipe_completed', {
|
||||
wipe_history_id: saved.id,
|
||||
wipe_type: dto.wipe_type,
|
||||
wipe_profile_id: dto.wipe_profile_id ?? null,
|
||||
trigger_type: 'manual',
|
||||
timestamp: new Date().toISOString(),
|
||||
trigger_type: triggerType,
|
||||
deleted_count: r.deleted_count ?? null,
|
||||
});
|
||||
}
|
||||
});
|
||||
})
|
||||
.catch((err: unknown) => {
|
||||
saved.status = 'failed';
|
||||
saved.completed_at = new Date();
|
||||
saved.error_message = err instanceof Error ? err.message : 'wipe dispatch failed';
|
||||
this.logger.warn(`Wipe ${saved.id} failed: ${saved.error_message}`);
|
||||
void this.wipeHistoryRepo.save(saved);
|
||||
});
|
||||
this.logger.log(`Wipe triggered for license ${licenseId} — history id ${saved.id}`);
|
||||
|
||||
return { wipe_history_id: saved.id };
|
||||
}
|
||||
|
||||
313
backend-nest/src/services/host-agent-consumer.service.ts
Normal file
313
backend-nest/src/services/host-agent-consumer.service.ts
Normal file
@@ -0,0 +1,313 @@
|
||||
import { Injectable, Logger, OnApplicationBootstrap } from '@nestjs/common';
|
||||
import { Interval } from '@nestjs/schedule';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { NatsService } from './nats.service';
|
||||
import { ServerConnection } from '../entities/server-connection.entity';
|
||||
import { License } from '../entities/license.entity';
|
||||
import { AgentHost, AgentHostDisk } from '../entities/agent-host.entity';
|
||||
import { GameInstance } from '../entities/game-instance.entity';
|
||||
import { WebhooksService } from '../modules/webhooks/webhooks.service';
|
||||
|
||||
/**
|
||||
* Consumes Corrosion wire protocol v2 host-agent subjects
|
||||
* (corrosion-host-agent/PROTOCOL.md) and keeps the fleet model truthful.
|
||||
*
|
||||
* Writes the License → Host → Instance model (hosts + game_instances) from
|
||||
* each heartbeat, AND maintains the legacy single-server `server_connections`
|
||||
* row so the current panel keeps working during the fleet UI transition.
|
||||
*
|
||||
* Host identity: until enrollment issues a stable host id, a host is keyed by
|
||||
* (license_id, hostname). One agent = one host today; the schema is already
|
||||
* multi-host-ready.
|
||||
*/
|
||||
interface HeartbeatPayload {
|
||||
schema?: number;
|
||||
timestamp?: string;
|
||||
agent?: { version?: string; commit?: string; os?: string; arch?: string };
|
||||
host?: {
|
||||
hostname?: string | null;
|
||||
cpu_percent?: number;
|
||||
cpu_cores?: number;
|
||||
mem_total_mb?: number;
|
||||
mem_used_mb?: number;
|
||||
uptime_seconds?: number;
|
||||
disks?: AgentHostDisk[];
|
||||
};
|
||||
instances?: Array<{
|
||||
id: string;
|
||||
game: string;
|
||||
label?: string | null;
|
||||
state?: string;
|
||||
uptime_seconds?: number;
|
||||
}>;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class HostAgentConsumerService implements OnApplicationBootstrap {
|
||||
private readonly logger = new Logger(HostAgentConsumerService.name);
|
||||
|
||||
private knownLicenses = new Map<string, number>();
|
||||
private warnedUnknown = new Set<string>();
|
||||
|
||||
private static readonly UUID_RE =
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
||||
private static readonly LICENSE_CACHE_TTL_MS = 5 * 60_000;
|
||||
private static readonly OFFLINE_AFTER_MS = 180_000;
|
||||
|
||||
constructor(
|
||||
private readonly nats: NatsService,
|
||||
@InjectRepository(ServerConnection)
|
||||
private readonly connectionRepository: Repository<ServerConnection>,
|
||||
@InjectRepository(License)
|
||||
private readonly licenseRepository: Repository<License>,
|
||||
@InjectRepository(AgentHost)
|
||||
private readonly hostRepository: Repository<AgentHost>,
|
||||
@InjectRepository(GameInstance)
|
||||
private readonly instanceRepository: Repository<GameInstance>,
|
||||
private readonly webhooksService: WebhooksService,
|
||||
) {}
|
||||
|
||||
// Bootstrap, not module-init: subscriptions registered before NatsService
|
||||
// finished connecting silently no-op (see NatsBridgeService note).
|
||||
onApplicationBootstrap() {
|
||||
this.nats.subscribe('corrosion.*.host.heartbeat', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
void this.onHeartbeat(licenseId, data as HeartbeatPayload).catch((err) =>
|
||||
this.logger.error(`heartbeat handling failed for ${licenseId}: ${err.message}`, err.stack),
|
||||
);
|
||||
});
|
||||
|
||||
this.nats.subscribe('corrosion.*.host.going_offline', (_data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
void this.onGoingOffline(licenseId).catch((err) =>
|
||||
this.logger.error(`going_offline handling failed for ${licenseId}: ${err.message}`, err.stack),
|
||||
);
|
||||
});
|
||||
|
||||
this.logger.log('Host agent (protocol v2) consumer subscriptions initialized');
|
||||
}
|
||||
|
||||
private async onHeartbeat(licenseId: string, payload: HeartbeatPayload): Promise<void> {
|
||||
if (!(await this.isValidTenant(licenseId))) return;
|
||||
// A well-formed v2 heartbeat always carries a host block. Reject malformed
|
||||
// payloads so a stray/empty publish can't create a phantom host row.
|
||||
if (!payload || typeof payload.host !== 'object' || payload.host === null) {
|
||||
this.logger.warn(`ignoring malformed heartbeat for license ${licenseId} (no host block)`);
|
||||
return;
|
||||
}
|
||||
const now = new Date();
|
||||
|
||||
await this.updateLegacyConnection(licenseId, now);
|
||||
const host = await this.upsertHost(licenseId, payload, now);
|
||||
await this.upsertInstances(licenseId, host, payload, now);
|
||||
}
|
||||
|
||||
/** Legacy single-server row — keeps the current panel working. */
|
||||
private async updateLegacyConnection(licenseId: string, now: Date): Promise<void> {
|
||||
const existing = await this.connectionRepository.findOne({ where: { license_id: licenseId } });
|
||||
if (existing) {
|
||||
await this.connectionRepository.update(
|
||||
{ id: existing.id },
|
||||
{ companion_last_seen: now, connection_status: 'connected', updated_at: now },
|
||||
);
|
||||
} else {
|
||||
await this.connectionRepository.save(
|
||||
this.connectionRepository.create({
|
||||
license_id: licenseId,
|
||||
connection_type: 'bare_metal',
|
||||
connection_status: 'connected',
|
||||
companion_last_seen: now,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** Upsert the fleet host row, keyed by (license_id, hostname). */
|
||||
private async upsertHost(licenseId: string, payload: HeartbeatPayload, now: Date): Promise<AgentHost> {
|
||||
const hostname = payload.host?.hostname ?? '';
|
||||
const fields = {
|
||||
agent_version: payload.agent?.version ?? null,
|
||||
agent_commit: payload.agent?.commit ?? null,
|
||||
os: payload.agent?.os ?? null,
|
||||
arch: payload.agent?.arch ?? null,
|
||||
status: 'connected',
|
||||
last_heartbeat_at: now,
|
||||
cpu_percent: payload.host?.cpu_percent ?? null,
|
||||
cpu_cores: payload.host?.cpu_cores ?? null,
|
||||
mem_total_mb: payload.host?.mem_total_mb ?? null,
|
||||
mem_used_mb: payload.host?.mem_used_mb ?? null,
|
||||
uptime_seconds: payload.host?.uptime_seconds ?? null,
|
||||
disks: payload.host?.disks ?? null,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
const existing = await this.hostRepository.findOne({
|
||||
where: { license_id: licenseId, hostname },
|
||||
});
|
||||
if (existing) {
|
||||
await this.hostRepository.update({ id: existing.id }, fields);
|
||||
return { ...existing, ...fields } as AgentHost;
|
||||
}
|
||||
const created = await this.hostRepository.save(
|
||||
this.hostRepository.create({ license_id: licenseId, hostname, ...fields }),
|
||||
);
|
||||
this.logger.log(`host registered for license ${licenseId} (hostname '${hostname || 'unknown'}')`);
|
||||
return created;
|
||||
}
|
||||
|
||||
/** Upsert one game_instances row per heartbeat instance entry. */
|
||||
private async upsertInstances(
|
||||
licenseId: string,
|
||||
host: AgentHost,
|
||||
payload: HeartbeatPayload,
|
||||
now: Date,
|
||||
): Promise<void> {
|
||||
for (const inst of payload.instances ?? []) {
|
||||
if (!inst?.id || !inst?.game) continue;
|
||||
const fields = {
|
||||
host_id: host.id,
|
||||
game: inst.game,
|
||||
label: inst.label ?? null,
|
||||
state: inst.state ?? 'unknown',
|
||||
uptime_seconds: inst.uptime_seconds ?? 0,
|
||||
last_seen_at: now,
|
||||
updated_at: now,
|
||||
};
|
||||
const existing = await this.instanceRepository.findOne({
|
||||
where: { license_id: licenseId, agent_instance_id: inst.id },
|
||||
});
|
||||
if (existing) {
|
||||
await this.instanceRepository.update({ id: existing.id }, fields);
|
||||
} else {
|
||||
await this.instanceRepository.save(
|
||||
this.instanceRepository.create({
|
||||
license_id: licenseId,
|
||||
agent_instance_id: inst.id,
|
||||
...fields,
|
||||
}),
|
||||
);
|
||||
this.logger.log(`instance '${inst.id}' (${inst.game}) registered for license ${licenseId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async onGoingOffline(licenseId: string): Promise<void> {
|
||||
if (!(await this.isValidTenant(licenseId))) return;
|
||||
const now = new Date();
|
||||
await this.connectionRepository.update(
|
||||
{ license_id: licenseId },
|
||||
{ connection_status: 'offline', updated_at: now },
|
||||
);
|
||||
|
||||
// Capture hostname(s) before flipping status so the webhook payload is useful.
|
||||
const hosts = await this.hostRepository.find({ where: { license_id: licenseId } });
|
||||
|
||||
await this.hostRepository.update(
|
||||
{ license_id: licenseId },
|
||||
{ status: 'offline', updated_at: now },
|
||||
);
|
||||
this.logger.log(`host(s) for license ${licenseId} went offline (graceful beacon)`);
|
||||
|
||||
// Dispatch server_down event for each host that went offline. Fire-and-forget.
|
||||
for (const host of hosts) {
|
||||
void this.webhooksService
|
||||
.dispatch(licenseId, 'server_down', {
|
||||
host_id: host.id,
|
||||
hostname: host.hostname ?? null,
|
||||
reason: 'graceful_shutdown',
|
||||
})
|
||||
.catch(() => {
|
||||
// dispatch() logs internally; swallow here to keep the handler clean.
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Heartbeats stopping must flip the panel to offline — an agent that
|
||||
* crashes or loses network never sends the goodbye beacon. Sweeps both the
|
||||
* legacy connection and fleet hosts.
|
||||
*
|
||||
* Hosts that transition to offline here also fire the server_down webhook.
|
||||
* We identify them BEFORE the bulk update so we can carry their identity
|
||||
* into the webhook payload.
|
||||
*/
|
||||
@Interval(60_000)
|
||||
async sweepStaleConnections(): Promise<void> {
|
||||
const threshold = new Date(Date.now() - HostAgentConsumerService.OFFLINE_AFTER_MS);
|
||||
|
||||
// Identify stale hosts BEFORE bulk-updating so we can dispatch webhooks
|
||||
// with meaningful host_id / hostname data.
|
||||
const staleHosts = await this.hostRepository
|
||||
.createQueryBuilder('host')
|
||||
.where('host.status = :connected', { connected: 'connected' })
|
||||
.andWhere('host.last_heartbeat_at IS NOT NULL')
|
||||
.andWhere('host.last_heartbeat_at < :threshold', { threshold })
|
||||
.getMany();
|
||||
|
||||
const conn = await this.connectionRepository
|
||||
.createQueryBuilder()
|
||||
.update(ServerConnection)
|
||||
.set({ connection_status: 'offline', updated_at: () => 'NOW()' })
|
||||
.where('connection_status = :connected', { connected: 'connected' })
|
||||
.andWhere('companion_last_seen IS NOT NULL')
|
||||
.andWhere('companion_last_seen < :threshold', { threshold })
|
||||
.execute();
|
||||
|
||||
const hosts = await this.hostRepository
|
||||
.createQueryBuilder()
|
||||
.update(AgentHost)
|
||||
.set({ status: 'offline', updated_at: () => 'NOW()' })
|
||||
.where('status = :connected', { connected: 'connected' })
|
||||
.andWhere('last_heartbeat_at IS NOT NULL')
|
||||
.andWhere('last_heartbeat_at < :threshold', { threshold })
|
||||
.execute();
|
||||
|
||||
const affected = (conn.affected ?? 0) + (hosts.affected ?? 0);
|
||||
if (affected) {
|
||||
this.logger.warn(`marked ${affected} stale connection/host record(s) offline`);
|
||||
}
|
||||
|
||||
// Dispatch server_down webhook for each host that just timed out.
|
||||
// Fire-and-forget — webhook failures must never break the sweep.
|
||||
for (const host of staleHosts) {
|
||||
void this.webhooksService
|
||||
.dispatch(host.license_id, 'server_down', {
|
||||
host_id: host.id,
|
||||
hostname: host.hostname ?? null,
|
||||
reason: 'heartbeat_timeout',
|
||||
})
|
||||
.catch(() => {
|
||||
// dispatch() logs internally; swallow here to keep the sweep clean.
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tenant validation: the subject segment must be a real license UUID.
|
||||
* NATS consumers must never write rows for subjects an arbitrary publisher
|
||||
* invented. Existence is cached to avoid a query per heartbeat.
|
||||
*/
|
||||
private async isValidTenant(licenseId: string): Promise<boolean> {
|
||||
if (!HostAgentConsumerService.UUID_RE.test(licenseId)) {
|
||||
this.warnUnknownOnce(licenseId, 'not a UUID');
|
||||
return false;
|
||||
}
|
||||
const cachedUntil = this.knownLicenses.get(licenseId);
|
||||
if (cachedUntil && cachedUntil > Date.now()) return true;
|
||||
|
||||
const exists = await this.licenseRepository.exist({ where: { id: licenseId } });
|
||||
if (!exists) {
|
||||
this.warnUnknownOnce(licenseId, 'no such license');
|
||||
return false;
|
||||
}
|
||||
this.knownLicenses.set(licenseId, Date.now() + HostAgentConsumerService.LICENSE_CACHE_TTL_MS);
|
||||
return true;
|
||||
}
|
||||
|
||||
private warnUnknownOnce(licenseId: string, reason: string): void {
|
||||
if (this.warnedUnknown.has(licenseId)) return;
|
||||
this.warnedUnknown.add(licenseId);
|
||||
this.logger.warn(`ignoring host-agent traffic for invalid license '${licenseId}' (${reason})`);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
export { NatsService } from './nats.service';
|
||||
export { NatsBridgeService } from './nats-bridge.service';
|
||||
export { HostAgentConsumerService } from './host-agent-consumer.service';
|
||||
export { SteamService } from './steam.service';
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
import { Injectable, OnModuleInit, Logger } from '@nestjs/common';
|
||||
import { Injectable, OnApplicationBootstrap, Logger } from '@nestjs/common';
|
||||
import { NatsService } from './nats.service';
|
||||
|
||||
@Injectable()
|
||||
export class NatsBridgeService implements OnModuleInit {
|
||||
export class NatsBridgeService implements OnApplicationBootstrap {
|
||||
private readonly logger = new Logger(NatsBridgeService.name);
|
||||
private listeners: Map<string, Set<(event: string, data: unknown) => void>> = new Map();
|
||||
|
||||
constructor(private nats: NatsService) {}
|
||||
|
||||
onModuleInit() {
|
||||
// Subscriptions MUST happen in onApplicationBootstrap, not onModuleInit:
|
||||
// provider onModuleInit order is not guaranteed, and these hooks once ran
|
||||
// before NatsService connected — every subscribe() silently no-oped and the
|
||||
// WS bridge was dead from boot. Bootstrap runs after ALL module inits
|
||||
// (including the awaited NATS connect) complete.
|
||||
onApplicationBootstrap() {
|
||||
this.nats.subscribe('corrosion.*.companion.heartbeat', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
this.emit(licenseId, 'heartbeat', data);
|
||||
@@ -44,6 +49,17 @@ export class NatsBridgeService implements OnModuleInit {
|
||||
this.emit(licenseId, 'oxide_status', data);
|
||||
});
|
||||
|
||||
// Wire protocol v2 (corrosion-host-agent) — host-level telemetry
|
||||
this.nats.subscribe('corrosion.*.host.heartbeat', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
this.emit(licenseId, 'host_heartbeat', data);
|
||||
});
|
||||
|
||||
this.nats.subscribe('corrosion.*.host.going_offline', (data, subject) => {
|
||||
const licenseId = subject.split('.')[1];
|
||||
this.emit(licenseId, 'host_going_offline', data);
|
||||
});
|
||||
|
||||
this.logger.log('NATS bridge subscriptions initialized');
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
import { Injectable, OnModuleInit, OnModuleDestroy, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { connect, NatsConnection, StringCodec, Subscription } from 'nats';
|
||||
import { createHmac, randomUUID } from 'crypto';
|
||||
|
||||
export interface AgentCredentials {
|
||||
license_id: string;
|
||||
nats_user: string;
|
||||
nats_password: string;
|
||||
nats_url: string;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class NatsService implements OnModuleInit, OnModuleDestroy {
|
||||
@@ -13,8 +21,13 @@ export class NatsService implements OnModuleInit, OnModuleDestroy {
|
||||
async onModuleInit() {
|
||||
try {
|
||||
const url = this.config.get<string>('nats.url') || 'nats://localhost:4222';
|
||||
this.nc = await connect({ servers: url });
|
||||
this.logger.log(`Connected to NATS at ${url}`);
|
||||
const user = this.config.get<string>('nats.internalUser');
|
||||
const pass = this.config.get<string>('nats.internalPassword');
|
||||
// Authenticate with the privileged internal user when configured;
|
||||
// otherwise connect anonymously (broker hasn't enforced auth yet).
|
||||
const opts = user && pass ? { servers: url, user, pass } : { servers: url };
|
||||
this.nc = await connect(opts);
|
||||
this.logger.log(`Connected to NATS at ${url}${user ? ` as ${user}` : ' (anonymous)'}`);
|
||||
} catch (err) {
|
||||
this.logger.warn(`NATS connection failed — running in offline mode: ${(err as Error).message}`);
|
||||
}
|
||||
@@ -62,6 +75,64 @@ export class NatsService implements OnModuleInit, OnModuleDestroy {
|
||||
return sub;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request-reply to a host-agent subject with a LICENSE-SCOPED reply subject.
|
||||
*
|
||||
* Per-license agent users are confined to corrosion.{license}.> and have no
|
||||
* _INBOX permission, so the agent cannot publish a reply to the default
|
||||
* global inbox. The reply must live inside the license namespace
|
||||
* (corrosion.{license}.reply.<id>); the privileged backend subscribes there.
|
||||
* See corrosion-host-agent/PROTOCOL.md ("Reply-subject rule").
|
||||
*/
|
||||
async requestScoped<T = unknown>(
|
||||
licenseId: string,
|
||||
subject: string,
|
||||
payload: Record<string, unknown>,
|
||||
timeoutMs = 8000,
|
||||
): Promise<T> {
|
||||
if (!this.nc) {
|
||||
throw new Error('NATS unavailable — agent is not reachable');
|
||||
}
|
||||
const replySubject = `corrosion.${licenseId}.reply.${randomUUID()}`;
|
||||
const nc = this.nc;
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
nc.subscribe(replySubject, {
|
||||
max: 1,
|
||||
timeout: timeoutMs,
|
||||
callback: (err, msg) => {
|
||||
if (err) {
|
||||
reject(new Error(`agent did not respond within ${timeoutMs}ms`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(JSON.parse(this.sc.decode(msg.data)) as T);
|
||||
} catch {
|
||||
resolve(this.sc.decode(msg.data) as unknown as T);
|
||||
}
|
||||
},
|
||||
});
|
||||
nc.publish(subject, this.sc.encode(JSON.stringify(payload)), { reply: replySubject });
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive a license's agent NATS credentials. Password is
|
||||
* HMAC-SHA256(license_id, NATS_TOKEN_SECRET) — must match the broker config
|
||||
* generated by scripts/generate-nats-auth.mjs. Returns null if the secret
|
||||
* isn't configured (broker not yet enforcing auth).
|
||||
*/
|
||||
getAgentCredentials(licenseId: string): AgentCredentials | null {
|
||||
const secret = this.config.get<string>('nats.tokenSecret');
|
||||
if (!secret) return null;
|
||||
const password = createHmac('sha256', secret).update(licenseId).digest('hex');
|
||||
return {
|
||||
license_id: licenseId,
|
||||
nats_user: licenseId,
|
||||
nats_password: password,
|
||||
nats_url: this.config.get<string>('nats.publicUrl') || 'nats://nats.corrosionmgmt.com:4222',
|
||||
};
|
||||
}
|
||||
|
||||
/** Publish a command to a specific license's server */
|
||||
async sendServerCommand(licenseId: string, action: string, payload: Record<string, unknown> = {}): Promise<void> {
|
||||
await this.publish(`corrosion.${licenseId}.cmd.server`, {
|
||||
|
||||
102
backend/migrations/022_fleet_model.sql
Normal file
102
backend/migrations/022_fleet_model.sql
Normal file
@@ -0,0 +1,102 @@
|
||||
-- Fleet data model — License → Host → Instance (with optional Cluster)
|
||||
--
|
||||
-- ADDITIVE: existing server_connections / server_config / server_stats are
|
||||
-- left untouched so the current single-server panel keeps working. The
|
||||
-- host-agent consumer writes BOTH the legacy connection row and these fleet
|
||||
-- tables during the transition; the panel migrates to the fleet tables in a
|
||||
-- later phase.
|
||||
--
|
||||
-- Shape mirrors the host agent's wire protocol v2 heartbeat:
|
||||
-- host{} block → agent_hosts
|
||||
-- instances[] entries → game_instances
|
||||
-- Host metrics (CPU/RAM/disk) live on the HOST, not duplicated per instance.
|
||||
--
|
||||
-- Named `agent_hosts` (not `hosts`) to avoid collision with the existing B2B
|
||||
-- `hosts` table (hosting-partner companies) — different concept entirely.
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- AGENT_HOSTS — one Corrosion host agent / one machine
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS agent_hosts (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
-- Natural key until enrollment issues a stable host identity.
|
||||
hostname VARCHAR(255) NOT NULL DEFAULT '',
|
||||
agent_version VARCHAR(64),
|
||||
agent_commit VARCHAR(64),
|
||||
os VARCHAR(32),
|
||||
arch VARCHAR(32),
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'offline'
|
||||
CHECK (status IN ('connected', 'degraded', 'offline')),
|
||||
last_heartbeat_at TIMESTAMPTZ,
|
||||
cpu_percent DOUBLE PRECISION,
|
||||
cpu_cores INTEGER,
|
||||
mem_total_mb BIGINT,
|
||||
mem_used_mb BIGINT,
|
||||
uptime_seconds BIGINT,
|
||||
disks JSONB, -- [{ "mount": "/", "total_mb": n, "free_mb": n }]
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (license_id, hostname)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_agent_hosts_license ON agent_hosts(license_id);
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- INSTANCE CLUSTERS — optional grouping (Soulmask main/child, Dune battlegroup)
|
||||
-- Reserved now; cluster logic ships with those game adapters.
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS instance_clusters (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
game VARCHAR(32) NOT NULL,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
topology VARCHAR(32), -- main_client | battlegroup
|
||||
config JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_clusters_license ON instance_clusters(license_id);
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- GAME INSTANCES — one game server process / orchestrated unit.
|
||||
-- The billing unit (plans count instances).
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS game_instances (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
host_id UUID REFERENCES agent_hosts(id) ON DELETE SET NULL,
|
||||
cluster_id UUID REFERENCES instance_clusters(id) ON DELETE SET NULL,
|
||||
-- The agent's instance slug; the NATS subject segment.
|
||||
agent_instance_id VARCHAR(64) NOT NULL,
|
||||
game VARCHAR(32) NOT NULL,
|
||||
label VARCHAR(255),
|
||||
-- running | stopped | starting | stopping | crashed
|
||||
-- | configured | missing_root | unmanaged | unknown
|
||||
state VARCHAR(32) NOT NULL DEFAULT 'unknown',
|
||||
root_path TEXT,
|
||||
uptime_seconds BIGINT NOT NULL DEFAULT 0,
|
||||
last_seen_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (license_id, agent_instance_id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_instances_license ON game_instances(license_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_instances_host ON game_instances(host_id);
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- INSTANCE STATS — per-instance time series (game metrics).
|
||||
-- Populated once game-level telemetry (player count/FPS via RCON/plugin) is
|
||||
-- collected; the host heartbeat carries host metrics, not game metrics.
|
||||
-----------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS instance_stats (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
instance_id UUID NOT NULL REFERENCES game_instances(id) ON DELETE CASCADE,
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
player_count INTEGER NOT NULL DEFAULT 0,
|
||||
max_players INTEGER NOT NULL DEFAULT 0,
|
||||
fps DOUBLE PRECISION NOT NULL DEFAULT 0,
|
||||
memory_usage_mb INTEGER NOT NULL DEFAULT 0,
|
||||
recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_instance_stats_instance
|
||||
ON instance_stats(instance_id, recorded_at DESC);
|
||||
17
backend/migrations/023_api_keys.sql
Normal file
17
backend/migrations/023_api_keys.sql
Normal file
@@ -0,0 +1,17 @@
|
||||
-- Per-license API key management
|
||||
-- Each row represents one issued key: the plaintext is shown once at creation
|
||||
-- and never stored; only the SHA-256 hex digest is persisted.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS api_keys (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
name VARCHAR(100) NOT NULL,
|
||||
key_prefix VARCHAR(16) NOT NULL,
|
||||
key_hash VARCHAR(128) NOT NULL,
|
||||
last_used_at TIMESTAMPTZ NULL,
|
||||
is_active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_api_keys_license ON api_keys(license_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_api_keys_key_hash ON api_keys(key_hash);
|
||||
26
backend/migrations/024_webhooks.sql
Normal file
26
backend/migrations/024_webhooks.sql
Normal file
@@ -0,0 +1,26 @@
|
||||
-- 024_webhooks.sql
|
||||
-- Per-license outbound webhook registry.
|
||||
-- Operators register URLs + event subscriptions; the backend POSTs signed
|
||||
-- JSON payloads on matching events (player_banned, server_down, …).
|
||||
|
||||
CREATE TABLE webhooks (
|
||||
id uuid NOT NULL DEFAULT uuid_generate_v4(),
|
||||
license_id uuid NOT NULL REFERENCES licenses(id) ON DELETE CASCADE,
|
||||
name varchar(100) NOT NULL,
|
||||
url text NOT NULL,
|
||||
-- Comma-separated event keys, e.g. 'player_banned,server_down'
|
||||
-- TypeORM simple-array maps this transparently to string[].
|
||||
events text NOT NULL,
|
||||
-- HMAC-SHA256 signing secret; generated server-side if omitted on create.
|
||||
secret varchar(128) NOT NULL,
|
||||
is_active boolean NOT NULL DEFAULT true,
|
||||
-- Populated after each delivery attempt.
|
||||
last_delivery_at timestamptz NULL,
|
||||
-- 'ok' | 'failed' — last HTTP delivery outcome.
|
||||
last_status varchar(20) NULL,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
|
||||
CONSTRAINT webhooks_pkey PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_webhooks_license_id ON webhooks (license_id);
|
||||
15
backend/migrations/025_owner_full_access.sql
Normal file
15
backend/migrations/025_owner_full_access.sql
Normal file
@@ -0,0 +1,15 @@
|
||||
-- 025_owner_full_access.sql
|
||||
--
|
||||
-- The system-default Owner role enumerated per-resource wildcards
|
||||
-- (server.*, wipe.*, players.*, ...). Every feature added since drift past that
|
||||
-- enumeration: apikeys, webhooks, alerts, analytics, chat, schedules,
|
||||
-- notifications, map, users, and ALL plugin-config modules (plus a singular
|
||||
-- 'plugin.*' vs granted 'plugins.*' mismatch) were silently locked out for any
|
||||
-- non-super-admin Owner — PermissionsGuard denies a permission the role doesn't
|
||||
-- grant. The Owner has "full control of their license" by definition, so grant
|
||||
-- a global wildcard instead of an enumeration that must be amended per feature.
|
||||
--
|
||||
-- PermissionsGuard and the frontend auth store both honor "*" as allow-all.
|
||||
UPDATE roles
|
||||
SET permissions = '{"*": true}'::jsonb
|
||||
WHERE role_name = 'Owner' AND is_system_default = true;
|
||||
@@ -1,7 +1,7 @@
|
||||
.PHONY: all build build-linux build-windows clean test run
|
||||
|
||||
# Binary names
|
||||
BINARY_NAME=corrosion-companion
|
||||
BINARY_NAME=corrosion-host-agent
|
||||
BINARY_LINUX=$(BINARY_NAME)-linux-amd64
|
||||
BINARY_WINDOWS=$(BINARY_NAME)-windows-amd64.exe
|
||||
|
||||
@@ -66,10 +66,10 @@ run: build-local
|
||||
install-service:
|
||||
@echo "Installing systemd service..."
|
||||
@sudo cp $(BUILD_DIR)/$(BINARY_LINUX) /usr/local/bin/$(BINARY_NAME)
|
||||
@sudo cp deployment/corrosion-companion.service /etc/systemd/system/
|
||||
@sudo cp deployment/corrosion-host-agent.service /etc/systemd/system/
|
||||
@sudo systemctl daemon-reload
|
||||
@sudo systemctl enable corrosion-companion
|
||||
@echo "Service installed. Configure /etc/corrosion-companion/.env then start with: sudo systemctl start corrosion-companion"
|
||||
@sudo systemctl enable corrosion-host-agent
|
||||
@echo "Service installed. Configure /etc/corrosion-host-agent/.env then start with: sudo systemctl start corrosion-host-agent"
|
||||
|
||||
# Development helpers
|
||||
dev: build-local
|
||||
|
||||
152
contract-tests/agent-backend.contract.mjs
Normal file
152
contract-tests/agent-backend.contract.mjs
Normal file
@@ -0,0 +1,152 @@
|
||||
// Full-pipeline contract test: Rust host agent → NATS → NestJS consumer → Postgres.
|
||||
//
|
||||
// Proves the wire protocol v2 chain end to end against a REAL backend and DB:
|
||||
// 1. agent heartbeat arrives with schema 2 + measured telemetry
|
||||
// 2. backend auto-registers the server_connections row and marks it connected
|
||||
// 3. instance command channel round-trips (start/status/stop) with push events
|
||||
// 4. graceful agent shutdown publishes the offline beacon and the row flips offline
|
||||
//
|
||||
// Required env:
|
||||
// LICENSE_ID — existing license uuid (CI: from the admin seed)
|
||||
// DATABASE_URL — postgres connection string for assertions
|
||||
// NATS_URL — broker both agent and backend use (default nats://localhost:4222)
|
||||
// AGENT_BIN — path to the corrosion-host-agent binary
|
||||
//
|
||||
// Uses the backend's own node_modules (nats, pg) so the client libs under test
|
||||
// are exactly what production runs.
|
||||
|
||||
import { createRequire } from 'node:module';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { writeFileSync, mkdtempSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const repoRoot = join(dirname(fileURLToPath(import.meta.url)), '..');
|
||||
const require = createRequire(join(repoRoot, 'backend-nest', 'node_modules', 'x.js'));
|
||||
const { connect, StringCodec } = require('nats');
|
||||
const { Client: PgClient } = require('pg');
|
||||
|
||||
const LICENSE = process.env.LICENSE_ID;
|
||||
const NATS_URL = process.env.NATS_URL ?? 'nats://localhost:4222';
|
||||
const DATABASE_URL = process.env.DATABASE_URL;
|
||||
const AGENT_BIN = process.env.AGENT_BIN ?? join(repoRoot, 'corrosion-host-agent', 'target', 'debug', 'corrosion-host-agent');
|
||||
|
||||
if (!LICENSE || !DATABASE_URL) {
|
||||
console.error('LICENSE_ID and DATABASE_URL are required');
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
const sc = StringCodec();
|
||||
const errs = [];
|
||||
const check = (cond, msg) => { if (!cond) errs.push(msg); };
|
||||
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
||||
|
||||
async function pollDb(pg, predicate, label, timeoutMs = 30_000) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
for (;;) {
|
||||
const { rows } = await pg.query(
|
||||
'SELECT connection_type, connection_status, companion_last_seen FROM server_connections WHERE license_id = $1',
|
||||
[LICENSE],
|
||||
);
|
||||
if (predicate(rows)) return rows;
|
||||
if (Date.now() > deadline) {
|
||||
errs.push(`${label}: timeout after ${timeoutMs}ms — rows: ${JSON.stringify(rows)}`);
|
||||
return rows;
|
||||
}
|
||||
await sleep(1000);
|
||||
}
|
||||
}
|
||||
|
||||
const main = async () => {
|
||||
const pg = new PgClient({ connectionString: DATABASE_URL });
|
||||
await pg.connect();
|
||||
const nc = await connect({ servers: NATS_URL });
|
||||
|
||||
const heartbeats = [];
|
||||
const statusEvents = [];
|
||||
(async () => { for await (const m of nc.subscribe(`corrosion.${LICENSE}.host.heartbeat`)) heartbeats.push(JSON.parse(sc.decode(m.data))); })();
|
||||
(async () => { for await (const m of nc.subscribe(`corrosion.${LICENSE}.ci-instance.status`)) statusEvents.push(JSON.parse(sc.decode(m.data))); })();
|
||||
|
||||
// --- spawn the real agent ---
|
||||
const dir = mkdtempSync(join(tmpdir(), 'cha-contract-'));
|
||||
const cfgPath = join(dir, 'agent.toml');
|
||||
writeFileSync(cfgPath, `
|
||||
[agent]
|
||||
license_id = "${LICENSE}"
|
||||
nats_url = "${NATS_URL}"
|
||||
heartbeat_seconds = 10
|
||||
log_level = "info"
|
||||
|
||||
[[instance]]
|
||||
id = "ci-instance"
|
||||
game = "rust"
|
||||
root = "/tmp"
|
||||
label = "Contract CI"
|
||||
executable = "/bin/sleep"
|
||||
args = ["300"]
|
||||
`);
|
||||
const agent = spawn(AGENT_BIN, ['--config', cfgPath], { stdio: ['ignore', 'inherit', 'inherit'] });
|
||||
const agentExited = new Promise((r) => agent.on('exit', r));
|
||||
|
||||
// --- 1. heartbeat shape + real telemetry ---
|
||||
const hbDeadline = Date.now() + 20_000;
|
||||
while (heartbeats.length === 0 && Date.now() < hbDeadline) await sleep(500);
|
||||
check(heartbeats.length > 0, 'no heartbeat within 20s');
|
||||
if (heartbeats.length) {
|
||||
const hb = heartbeats[0];
|
||||
check(hb.schema === 2, `schema != 2: ${hb.schema}`);
|
||||
check(typeof hb.host?.cpu_percent === 'number', 'missing host.cpu_percent');
|
||||
check(hb.host?.mem_total_mb > 0, 'mem_total_mb not measured');
|
||||
check(Array.isArray(hb.host?.disks) && hb.host.disks.length > 0, 'no disks reported');
|
||||
check(hb.instances?.[0]?.id === 'ci-instance', 'instance missing from heartbeat');
|
||||
check(!!hb.agent?.version && !!hb.agent?.commit, 'agent version/commit missing');
|
||||
}
|
||||
|
||||
// --- 2. backend auto-registers + connects ---
|
||||
const rows = await pollDb(pg, (r) => r.length === 1 && r[0].connection_status === 'connected', 'auto-register connected');
|
||||
if (rows.length === 1) {
|
||||
check(rows[0].connection_type === 'bare_metal', `connection_type: ${rows[0].connection_type}`);
|
||||
check(rows[0].companion_last_seen !== null, 'companion_last_seen not set');
|
||||
}
|
||||
|
||||
// --- 3. instance command channel ---
|
||||
const cmd = async (payload) =>
|
||||
JSON.parse(sc.decode((await nc.request(`corrosion.${LICENSE}.ci-instance.cmd`, sc.encode(JSON.stringify(payload)), { timeout: 8000 })).data));
|
||||
|
||||
const st0 = await cmd({ func: 'status' });
|
||||
check(st0.state?.state === 'stopped', `initial state: ${JSON.stringify(st0.state)}`);
|
||||
const start = await cmd({ func: 'start' });
|
||||
check(start.status === 'success', `start: ${JSON.stringify(start)}`);
|
||||
await sleep(1000);
|
||||
const st1 = await cmd({ func: 'status' });
|
||||
check(st1.state?.state === 'running', `post-start state: ${JSON.stringify(st1.state)}`);
|
||||
check((await cmd({ func: 'start' })).status === 'error', 'double start must error');
|
||||
check((await cmd({ func: 'bogus' })).status === 'error', 'unknown func must error');
|
||||
const stop = await cmd({ func: 'stop' });
|
||||
check(stop.status === 'success', `stop: ${JSON.stringify(stop)}`);
|
||||
await sleep(1000);
|
||||
const seq = statusEvents.map((e) => e.event?.state);
|
||||
check(seq.includes('running') && seq.includes('stopped'), `status events incomplete: ${seq.join(',')}`);
|
||||
|
||||
// --- 4. graceful shutdown → offline beacon → DB flips offline ---
|
||||
agent.kill('SIGTERM');
|
||||
await Promise.race([agentExited, sleep(8000)]);
|
||||
await pollDb(pg, (r) => r.length === 1 && r[0].connection_status === 'offline', 'beacon offline', 20_000);
|
||||
|
||||
await nc.close();
|
||||
await pg.end();
|
||||
|
||||
if (errs.length) {
|
||||
console.error('\nCONTRACT FAIL:');
|
||||
errs.forEach((e) => console.error(' -', e));
|
||||
process.exit(1);
|
||||
}
|
||||
console.log('\nCONTRACT PASS: heartbeat shape, auto-register, connected/offline lifecycle, instance command channel, push events');
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
main().catch((e) => {
|
||||
console.error('contract test crashed:', e);
|
||||
process.exit(1);
|
||||
});
|
||||
22
corrosion-host-agent/.cargo/config.toml
Normal file
22
corrosion-host-agent/.cargo/config.toml
Normal file
@@ -0,0 +1,22 @@
|
||||
# Corrosion Host Agent — cross-compilation configuration
|
||||
#
|
||||
# Deploy targets:
|
||||
# Linux: x86_64-unknown-linux-musl (fully static — runs on any distro)
|
||||
# Windows: x86_64-pc-windows-msvc (build via `cargo xwin build` on non-Windows)
|
||||
#
|
||||
# Prerequisites on macOS:
|
||||
# brew install filosottile/musl-cross/musl-cross (x86_64-linux-musl-gcc)
|
||||
# cargo install cargo-xwin (bundles MSVC CRT + lld-link)
|
||||
|
||||
[target.x86_64-unknown-linux-musl]
|
||||
linker = "x86_64-linux-musl-gcc"
|
||||
|
||||
[env]
|
||||
CC_x86_64_unknown_linux_musl = "x86_64-linux-musl-gcc"
|
||||
|
||||
[target.x86_64-pc-windows-msvc]
|
||||
linker = "lld-link"
|
||||
# Statically link the MSVC CRT so the agent runs on fresh Windows installs
|
||||
# without the Visual C++ Redistributable (otherwise: STATUS_DLL_NOT_FOUND on
|
||||
# any machine missing VCRUNTIME140.dll — most fresh OEM images).
|
||||
rustflags = ["-C", "target-feature=+crt-static"]
|
||||
1
corrosion-host-agent/.gitignore
vendored
Normal file
1
corrosion-host-agent/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/target
|
||||
2874
corrosion-host-agent/Cargo.lock
generated
Normal file
2874
corrosion-host-agent/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
46
corrosion-host-agent/Cargo.toml
Normal file
46
corrosion-host-agent/Cargo.toml
Normal file
@@ -0,0 +1,46 @@
|
||||
[package]
|
||||
name = "corrosion-host-agent"
|
||||
version = "2.0.0-alpha.11"
|
||||
edition = "2021"
|
||||
description = "Corrosion Host Agent — multi-game ops runtime for self-hosted game servers"
|
||||
license = "UNLICENSED"
|
||||
publish = false
|
||||
|
||||
[[bin]]
|
||||
name = "corrosion-host-agent"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tokio-util = { version = "0.7", features = ["rt"] }
|
||||
futures = "0.3"
|
||||
async-nats = "0.37"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
toml = "0.8"
|
||||
sysinfo = "0.33"
|
||||
chrono = { version = "0.4", features = ["serde", "clock"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||
anyhow = "1"
|
||||
async-trait = "0.1"
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
rand = "0.8"
|
||||
tokio-tungstenite = "0.24"
|
||||
minisign-verify = "0.2.5"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream"] }
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
libc = "0.2"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
||||
# Size-optimized release: single static binary living next to RAM-heavy game
|
||||
# servers. Panic stays 'unwind' so a panicking task surfaces through its
|
||||
# JoinHandle instead of killing the whole agent.
|
||||
[profile.release]
|
||||
opt-level = "s"
|
||||
lto = true
|
||||
codegen-units = 1
|
||||
strip = true
|
||||
215
corrosion-host-agent/PROTOCOL.md
Normal file
215
corrosion-host-agent/PROTOCOL.md
Normal file
@@ -0,0 +1,215 @@
|
||||
# Corrosion Wire Protocol v2
|
||||
|
||||
Status: **Phase 0 + Phase 1 process control implemented** (host heartbeat,
|
||||
host commands, going-offline beacon, per-instance start/stop/restart/status
|
||||
with push state events). RCON, SteamCMD, file ops, and game adapters are
|
||||
specified but not yet implemented.
|
||||
|
||||
## Design
|
||||
|
||||
One **host agent** per machine supervises **N game instances**. Subjects are
|
||||
scoped license-first, then by addressee:
|
||||
|
||||
```
|
||||
corrosion.{license_id}.host.* host-level (the agent itself)
|
||||
corrosion.{license_id}.{instance_id}.* instance-level (one game server)
|
||||
```
|
||||
|
||||
`instance_id` is a config-defined slug (`[a-z0-9_-]{1,64}`), validated at
|
||||
agent start. `host` is a reserved segment and can never be an instance id.
|
||||
Payloads are JSON. Every heartbeat carries `"schema": 2` so consumers can
|
||||
distinguish v2 from the legacy Go companion protocol (which used
|
||||
`corrosion.{license_id}.companion.heartbeat`, no schema field).
|
||||
|
||||
## Host-level subjects (Phase 0 — live)
|
||||
|
||||
### `corrosion.{license_id}.host.heartbeat` (agent → backend, publish)
|
||||
|
||||
Published every `heartbeat_seconds` (default 60, jittered ±20%).
|
||||
|
||||
```json
|
||||
{
|
||||
"schema": 2,
|
||||
"timestamp": "2026-06-11T18:00:00Z",
|
||||
"agent": {
|
||||
"version": "2.0.0-alpha.1",
|
||||
"commit": "a8722a7",
|
||||
"os": "linux",
|
||||
"arch": "x86_64",
|
||||
"uptime_seconds": 86400
|
||||
},
|
||||
"host": {
|
||||
"hostname": "asgard-01",
|
||||
"cpu_percent": 12.5,
|
||||
"cpu_cores": 80,
|
||||
"mem_total_mb": 262144,
|
||||
"mem_used_mb": 81920,
|
||||
"uptime_seconds": 1209600,
|
||||
"disks": [
|
||||
{ "mount": "/", "total_mb": 1907729, "free_mb": 1532211 }
|
||||
]
|
||||
},
|
||||
"instances": [
|
||||
{
|
||||
"id": "rust-main",
|
||||
"game": "rust",
|
||||
"label": "Main 2x Vanilla",
|
||||
"state": "configured",
|
||||
"root_disk_free_mb": 1532211
|
||||
}
|
||||
],
|
||||
"probe": {
|
||||
"timestamp": "2026-06-11T17:58:00Z",
|
||||
"results": [
|
||||
{ "name": "corrosion-cdn", "host": "cdn.corrosionmgmt.com", "port": 443, "ok": true, "latency_ms": 18 }
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
All telemetry is measured, never fabricated. Fields the agent cannot measure
|
||||
are omitted (`probe` before the first probe completes, `hostname` if
|
||||
unavailable).
|
||||
|
||||
Instance `state` values — process-managed (an `executable` is configured):
|
||||
`running`, `stopped`, `starting`, `stopping`, `crashed`; unmanaged
|
||||
(telemetry-only): `configured` (root exists), `missing_root`. Each instance
|
||||
also reports `uptime_seconds` (0 unless running).
|
||||
|
||||
### `corrosion.{license_id}.host.cmd` (backend → agent, request-reply)
|
||||
|
||||
Request: `{ "func": "<name>" }`. Reply: `{ "status": "success" | "error", ... }`.
|
||||
|
||||
| func | Reply payload |
|
||||
| --------- | -------------------------------------------------------- |
|
||||
| `ping` | `version`, `commit`, `uptime_seconds` |
|
||||
| `probe` | `report` — fresh ProbeReport (also cached for heartbeat) |
|
||||
| `sysinfo` | `snapshot` — full heartbeat payload, collected on demand |
|
||||
| `update` | `{ "func": "update", "url": "https://cdn.corrosionmgmt.com/host-agent/.../corrosion-host-agent-<plat>" }` → downloads the binary + `<url>.minisig`, verifies the minisign signature against the agent's EMBEDDED public key, atomically swaps (with `.old` rollback), replies `{ status: success, message: "...relaunching" }`, then relaunches the new binary. Rejects anything not signed by the release key and any URL that isn't `https://cdn.corrosionmgmt.com`. |
|
||||
|
||||
Unknown funcs return `status: "error"` with a message listing supported funcs.
|
||||
|
||||
### `corrosion.{license_id}.host.going_offline` (agent → backend, publish)
|
||||
|
||||
Best-effort beacon (500ms budget) on graceful shutdown so the panel can flip
|
||||
the host to offline immediately instead of waiting out heartbeat staleness.
|
||||
Payload: `{}`.
|
||||
|
||||
## Instance-level subjects
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.cmd` (backend → agent, request-reply) — LIVE
|
||||
|
||||
Lifecycle and control for one game instance.
|
||||
|
||||
The same `start`/`stop`/`restart`/`status` funcs work for **every** game: the
|
||||
agent picks a `Supervisor` impl per game — a spawned-process supervisor for
|
||||
Rust/Conan/Soulmask, a **docker-compose supervisor for Dune** (`docker compose
|
||||
up -d` / `stop` / `restart` against the instance's compose project, configured
|
||||
via `[instance.docker_compose]`). The wire contract is identical; only the
|
||||
management model behind it differs.
|
||||
|
||||
Implemented funcs: `start`, `stop` (graceful with 30s budget, then force
|
||||
kill — process supervisor; Dune maps stop to `docker compose stop`), `restart`,
|
||||
`status` (returns `state` + `uptime_seconds`), and
|
||||
`rcon` — `{ "func": "rcon", "command": "<console command>" }` returns
|
||||
`{ "status": "success", "output": <server response> }`. Protocol per game:
|
||||
WebRCON (WebSocket JSON) for rust, Source RCON (Valve TCP) for
|
||||
conan/soulmask; explicit `kind` override available in the instance's
|
||||
`[instance.rcon]` config. Always targets 127.0.0.1 (agent is co-located).
|
||||
Errors reply `{ "status": "error", "message": ... }` — including start on an
|
||||
unmanaged instance, double start, missing rcon config, and unknown funcs.
|
||||
|
||||
Also implemented: `steam_update` — `{ "func": "steam_update" }` runs
|
||||
SteamCMD for the instance's game (app ids: rust 258550, conan 443030,
|
||||
soulmask 3017310/3017300; dune rejects — Docker images, no SteamCMD),
|
||||
streaming progress lines to `corrosion.{license}.{instance}.steam_status`
|
||||
and replying on completion.
|
||||
|
||||
Planned funcs: `oxide_install` (rust), plus game-adapter-specific
|
||||
commands (Dune: RabbitMQ admin-bus commands, Coriolis reset, Postgres admin
|
||||
surface). Dune **lifecycle** is already covered by the shared
|
||||
start/stop/restart funcs above; container crash-detection and state adoption on
|
||||
agent restart land with Phase 3b.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.steam_status` (agent → backend, publish) — LIVE
|
||||
|
||||
Per-line SteamCMD stdout during a `steam_update`, so the panel can show
|
||||
live update progress. Payload: `{ "timestamp", "instance_id", "line" }`.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.files.cmd` (backend → agent, request-reply) — LIVE
|
||||
|
||||
Jailed file manager, confined to the instance `root` (two-stage check:
|
||||
lexical normalize + canonicalize, defeating `../` traversal and symlink
|
||||
escape). Request `{ "op": "list|read|write|delete|rename|mkdir|mkfile|move|copy",
|
||||
"path": "rel/path", "dest"?, "content"?, "name"? }`; reply
|
||||
`{ "status": "success", "data": ... }` or `{ "status": "error", "message": ... }`.
|
||||
`read` caps at 5 MiB. Replaces the Go agent's UNJAILED legacy files API,
|
||||
which is retired and will not be ported.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.status` (agent → backend, publish) — LIVE
|
||||
|
||||
State-change events so the panel does not wait for the next heartbeat.
|
||||
Payload: `{ "timestamp", "instance_id", "event": { "state": ..., "exit_code"? } }`.
|
||||
|
||||
Semantics: **keep-latest state sync**, not a lossless transition ledger —
|
||||
near-instant transient states (e.g. `starting` when spawn succeeds
|
||||
immediately) may coalesce into the following state. Consumers should treat
|
||||
each event as "current state is now X".
|
||||
|
||||
Known Phase 1 limitation: the supervisor does not yet persist/adopt PIDs — if
|
||||
the agent itself restarts while a game server is running, the game process
|
||||
survives but reports `stopped` until restarted through the panel. PID
|
||||
adoption is queued with the service-install work.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.console` (agent → backend, publish)
|
||||
|
||||
Live console/log lines for the panel console view.
|
||||
|
||||
### `corrosion.{license_id}.{instance_id}.files.cmd` (backend → agent, request-reply)
|
||||
|
||||
VueFinder-style file manager ops, jailed to the instance root. Carries over
|
||||
the Go agent's jailed filemanager semantics (`fm_list`, `fm_save`, ...); the
|
||||
legacy UNJAILED `files.get/put/delete/list` API is retired and will not be
|
||||
ported.
|
||||
|
||||
## Backend mapping notes (Phase 0)
|
||||
|
||||
- The NestJS NATS bridge subscribes `corrosion.*.host.heartbeat` and
|
||||
`corrosion.*.host.going_offline`.
|
||||
- Until the license→host→instance schema lands, the backend may map the host
|
||||
heartbeat onto the existing single `server_connections` row per license:
|
||||
`companion_last_seen` ← heartbeat arrival, `connection_status` ←
|
||||
connected/offline, resources ← `host.cpu_percent` / `mem_*` / first disk.
|
||||
Instance-level mapping activates with the fleet schema.
|
||||
|
||||
## Probing — scope honesty
|
||||
|
||||
The Phase 0 prober measures **outbound** reachability from the host (TCP
|
||||
connect + latency). It cannot verify **inbound** port-forwarding (the thing
|
||||
players hit). Inbound verification requires a backend-side reverse probe
|
||||
service that attempts connections to the customer's public IP/ports on
|
||||
request; that is specified as a Phase 1+ feature and will reuse this report
|
||||
format with `direction: "inbound"`.
|
||||
|
||||
## Authentication & tenant isolation
|
||||
|
||||
The broker enforces per-license auth: an agent connects with `user = license_id`,
|
||||
`password = HMAC-SHA256(license_id, NATS_TOKEN_SECRET)` (shown on the panel
|
||||
Server page), and is scoped to `corrosion.{license_id}.>` only. The backend uses
|
||||
a privileged internal user. This makes cross-tenant access impossible at the
|
||||
broker, not just by convention.
|
||||
|
||||
**Reply-subject rule:** per-license users have NO `_INBOX` permission (granting
|
||||
it would let one license read another's request-reply traffic). Therefore any
|
||||
backend→agent request-reply MUST use a reply subject inside the license
|
||||
namespace — e.g. `corrosion.{license_id}.reply.<id>` — never the client's
|
||||
default global `_INBOX`. The agent is unaffected: it responds to whatever
|
||||
`msg.reply` it receives. The constraint is on the requester (the internal user
|
||||
has full access). The contract/CI tests run against an unauthenticated broker
|
||||
and use the default inbox; production request-reply must follow this rule.
|
||||
|
||||
## Versioning
|
||||
|
||||
- The agent embeds semver + git hash + build timestamp (`--version`,
|
||||
heartbeat `agent` block).
|
||||
- Schema changes bump `schema` and are additive where possible.
|
||||
43
corrosion-host-agent/README.md
Normal file
43
corrosion-host-agent/README.md
Normal file
@@ -0,0 +1,43 @@
|
||||
# Corrosion Host Agent
|
||||
|
||||
Rust rewrite of the Go companion agent (`companion-agent/`, retained as the
|
||||
behavior reference until parity). One agent per machine supervises every game
|
||||
instance on that host — Rust, Conan Exiles, Soulmask, Dune: Awakening.
|
||||
|
||||
- **Wire protocol**: see [PROTOCOL.md](./PROTOCOL.md) (v2, instance-scoped subjects)
|
||||
- **Config**: see [agent.example.toml](./agent.example.toml)
|
||||
|
||||
## Status — Phase 0
|
||||
|
||||
- [x] Multi-instance TOML config + env overrides (`CORROSION_LICENSE_ID`, `CORROSION_NATS_URL`, `CORROSION_NATS_TOKEN`)
|
||||
- [x] NATS connection (infinite reconnect, capped backoff, 30s ping, offline send-buffering, `tls://` support)
|
||||
- [x] Host heartbeat with real telemetry (sysinfo: CPU, memory, disks) — no fabricated values
|
||||
- [x] Connectivity prober (outbound TCP, periodic + on-demand)
|
||||
- [x] Host command channel (`ping`, `probe`, `sysinfo`)
|
||||
- [x] Graceful shutdown (cancellation token, going-offline beacon, NATS flush)
|
||||
- [x] Phase 1a: process supervision — per-instance start/stop/restart/status over
|
||||
`{instance}.cmd` request-reply, push state events on `{instance}.status`,
|
||||
crash detection with exit codes, live state in heartbeats
|
||||
(integration-tested with real processes + live-NATS contract test)
|
||||
- [ ] Phase 1b: RCON trait (WebRCON rust / TCP conan+soulmask), SteamCMD, jailed file manager
|
||||
- [~] Phase 2: Dune Docker adapter — **compose lifecycle done** (`docker compose up -d/stop/restart`
|
||||
via the `Supervisor` trait + `DockerComposeSupervisor`); RabbitMQ admin bus + Postgres admin
|
||||
surface deferred. Container crash-detection + state adoption on agent restart land with Phase 3b.
|
||||
- [x] Phase 3a: SIGNED self-update — minisign-verified download+swap+relaunch (NATS `update` func); embedded public key; CI signs releases
|
||||
- [ ] Phase 3b: service install (systemd/SCM), PID adoption
|
||||
|
||||
## Build
|
||||
|
||||
```bash
|
||||
cargo build --release # native
|
||||
cargo build --release --target x86_64-unknown-linux-gnu # linux deploy target
|
||||
cargo build --release --target x86_64-pc-windows-msvc # windows (cargo-xwin on non-Windows)
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
```bash
|
||||
corrosion-host-agent --config ./agent.toml # foreground
|
||||
corrosion-host-agent --config ./agent.toml check # validate config only
|
||||
corrosion-host-agent version # semver + git hash + build ts
|
||||
```
|
||||
88
corrosion-host-agent/agent.example.toml
Normal file
88
corrosion-host-agent/agent.example.toml
Normal file
@@ -0,0 +1,88 @@
|
||||
# Corrosion Host Agent configuration
|
||||
# Default location: /etc/corrosion/agent.toml (Linux)
|
||||
# C:\ProgramData\Corrosion\agent.toml (Windows)
|
||||
# Override with: corrosion-host-agent --config /path/to/agent.toml
|
||||
#
|
||||
# Secrets can come from the environment instead of this file:
|
||||
# CORROSION_LICENSE_ID, CORROSION_NATS_URL, CORROSION_NATS_TOKEN
|
||||
|
||||
[agent]
|
||||
license_id = "your-license-uuid"
|
||||
nats_url = "nats://nats.corrosionmgmt.com:4222"
|
||||
# Per-license auth (preferred): user = license id, password = the token shown
|
||||
# on the panel Server page. The broker scopes you to corrosion.{license}.>
|
||||
# nats_user = "your-license-uuid" # defaults to license_id if omitted
|
||||
# nats_password = "set-me-or-use-CORROSION_NATS_PASSWORD"
|
||||
# nats_token = "legacy token-only auth; use nats_password instead"
|
||||
heartbeat_seconds = 60
|
||||
log_level = "info"
|
||||
|
||||
# One agent supervises every game instance on this host.
|
||||
# Each instance gets a stable id (lowercase letters, digits, '-', '_') that
|
||||
# the panel uses to address it. Changing an id orphans its panel history.
|
||||
|
||||
[[instance]]
|
||||
id = "rust-main"
|
||||
game = "rust" # rust | conan | soulmask | dune
|
||||
root = "/opt/rustserver"
|
||||
label = "Main 2x Vanilla"
|
||||
|
||||
# RCON lets the panel send console commands to the running server.
|
||||
# For rust the protocol is WebRCON (WebSocket JSON); for conan/soulmask it is
|
||||
# Source RCON (Valve TCP binary). `kind` is optional — it is inferred from
|
||||
# the game name when absent.
|
||||
#
|
||||
# The [instance.rcon] sub-table MUST immediately follow the [[instance]] entry
|
||||
# it belongs to (standard TOML array-of-tables scoping rule).
|
||||
[instance.rcon]
|
||||
port = 28016
|
||||
password = "changeme"
|
||||
# kind = "webrcon" # explicit override; omit to infer from game
|
||||
|
||||
# [[instance]]
|
||||
# id = "soulmask-main"
|
||||
# game = "soulmask"
|
||||
# root = "/opt/soulmask/main"
|
||||
# label = "Cloud Mist Forest (cluster main)"
|
||||
#
|
||||
# [instance.rcon]
|
||||
# port = 19000
|
||||
# password = "changeme"
|
||||
# # kind = "source" # inferred automatically for soulmask
|
||||
|
||||
# SteamCMD update settings — optional sub-table for any instance.
|
||||
# Absent = defaults: steamcmd binary resolved via PATH, validate = false.
|
||||
#
|
||||
# [instance.steamcmd]
|
||||
# steamcmd_path = "/opt/steamcmd/steamcmd.sh" # omit to use PATH
|
||||
# validate = true # enable file-hash check pass
|
||||
#
|
||||
# Dune instances do not use SteamCMD (Docker images); the steam_update func
|
||||
# will return a clear error if invoked on a dune instance.
|
||||
|
||||
# --- Dune: Awakening (container-managed) ---------------------------------
|
||||
# Dune runs as a docker-compose stack, not a spawned process — leave
|
||||
# `executable` unset and add an [instance.docker_compose] block. The agent
|
||||
# drives `docker compose up -d / stop / restart` for start/stop/restart, and
|
||||
# `steam_update` is rejected (Dune ships as Docker images).
|
||||
#
|
||||
# [[instance]]
|
||||
# id = "dune-main"
|
||||
# game = "dune"
|
||||
# root = "/opt/dune" # directory the compose commands run in
|
||||
# label = "Arrakis (battlegroup)"
|
||||
#
|
||||
# [instance.docker_compose]
|
||||
# file = "docker-compose.yml" # -f; relative to root. Omit to use compose's discovery
|
||||
# project = "dune-main" # -p; defaults to the instance id
|
||||
# service = "gameserver" # limit lifecycle to one service; omit for the whole stack
|
||||
# command = ["docker", "compose"] # default; use ["docker-compose"] for the legacy binary
|
||||
|
||||
[prober]
|
||||
interval_seconds = 300
|
||||
|
||||
# Extra outbound TCP checks beyond the built-in defaults:
|
||||
# [[prober.target]]
|
||||
# name = "steam-cdn"
|
||||
# host = "steamcdn-a.akamaihd.net"
|
||||
# port = 443
|
||||
21
corrosion-host-agent/build.rs
Normal file
21
corrosion-host-agent/build.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
use std::process::Command;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
fn main() {
|
||||
let git_hash = Command::new("git")
|
||||
.args(["rev-parse", "--short", "HEAD"])
|
||||
.output()
|
||||
.ok()
|
||||
.filter(|o| o.status.success())
|
||||
.map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
|
||||
let build_ts = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0);
|
||||
|
||||
println!("cargo:rustc-env=CORROSION_GIT_HASH={git_hash}");
|
||||
println!("cargo:rustc-env=CORROSION_BUILD_TS={build_ts}");
|
||||
println!("cargo:rerun-if-changed=../.git/HEAD");
|
||||
}
|
||||
23
corrosion-host-agent/src/agent.rs
Normal file
23
corrosion-host-agent/src/agent.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
//! Shared agent handle: every subsystem task holds an `Arc<Agent>`.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::config::Settings;
|
||||
use crate::prober::ProbeReport;
|
||||
use crate::supervisor::Supervisor;
|
||||
|
||||
pub struct Agent {
|
||||
pub cfg: Settings,
|
||||
pub nats: async_nats::Client,
|
||||
pub started: Instant,
|
||||
pub last_probe: RwLock<Option<ProbeReport>>,
|
||||
/// One supervisor per instance, keyed by instance id. The concrete impl
|
||||
/// (process vs docker-compose) is chosen per game by the factory in main;
|
||||
/// every subsystem talks to the `Supervisor` trait only.
|
||||
pub supervisors: HashMap<String, Arc<dyn Supervisor>>,
|
||||
pub shutdown: CancellationToken,
|
||||
}
|
||||
66
corrosion-host-agent/src/bus.rs
Normal file
66
corrosion-host-agent/src/bus.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
//! NATS connection layer.
|
||||
//!
|
||||
//! Connection parameters follow the production-proven Vigilance profile:
|
||||
//! infinite reconnects with capped exponential backoff, 30s pings to detect
|
||||
//! zombie TCP in ~60s, and a deep client-side send queue so telemetry buffers
|
||||
//! through broker outages instead of erroring.
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::config::Settings;
|
||||
|
||||
pub async fn connect(cfg: &Settings) -> Result<async_nats::Client> {
|
||||
let (url, force_tls) = normalize_url(&cfg.nats_url);
|
||||
|
||||
let mut opts = async_nats::ConnectOptions::new()
|
||||
.name("corrosion-host-agent")
|
||||
.retry_on_initial_connect()
|
||||
.max_reconnects(None)
|
||||
.ping_interval(Duration::from_secs(30))
|
||||
.client_capacity(8192)
|
||||
.reconnect_delay_callback(|attempts| {
|
||||
Duration::from_millis(std::cmp::min(attempts as u64 * 100, 8_000))
|
||||
})
|
||||
.event_callback(|event| async move {
|
||||
match event {
|
||||
async_nats::Event::Disconnected => tracing::warn!("nats disconnected"),
|
||||
async_nats::Event::Connected => tracing::info!("nats connected"),
|
||||
other => tracing::debug!("nats event: {other}"),
|
||||
}
|
||||
});
|
||||
|
||||
if force_tls {
|
||||
opts = opts.require_tls(true);
|
||||
}
|
||||
|
||||
// Per-license auth: the broker maps user=license_id, password=derived
|
||||
// token to permissions scoped to corrosion.{license_id}.>. Falls back to
|
||||
// token-only or anonymous so the agent still works against a broker that
|
||||
// hasn't enforced auth yet (transition period).
|
||||
if let Some(password) = &cfg.nats_password {
|
||||
let user = cfg.nats_user.clone().unwrap_or_else(|| cfg.license_id.clone());
|
||||
opts = opts.user_and_password(user, password.clone());
|
||||
} else if let Some(token) = &cfg.nats_token {
|
||||
opts = opts.token(token.clone());
|
||||
}
|
||||
|
||||
let client = opts
|
||||
.connect(&url)
|
||||
.await
|
||||
.with_context(|| format!("connecting to NATS at {url}"))?;
|
||||
|
||||
Ok(client)
|
||||
}
|
||||
|
||||
/// Accept `tls://` / `nats+tls://` URL schemes by translating to `nats://` +
|
||||
/// an explicit TLS requirement.
|
||||
fn normalize_url(raw: &str) -> (String, bool) {
|
||||
if let Some(rest) = raw.strip_prefix("tls://") {
|
||||
(format!("nats://{rest}"), true)
|
||||
} else if let Some(rest) = raw.strip_prefix("nats+tls://") {
|
||||
(format!("nats://{rest}"), true)
|
||||
} else {
|
||||
(raw.to_string(), false)
|
||||
}
|
||||
}
|
||||
245
corrosion-host-agent/src/config.rs
Normal file
245
corrosion-host-agent/src/config.rs
Normal file
@@ -0,0 +1,245 @@
|
||||
//! Agent configuration: TOML file + environment overrides.
|
||||
//!
|
||||
//! Multi-instance is foundational, not bolted on: one agent supervises N game
|
||||
//! instances on the host, each declared as an `[[instance]]` block. Connection
|
||||
//! secrets may come from env so the config file can be world-readable-ish
|
||||
//! while the token is not.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use serde::Deserialize;
|
||||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::docker_compose::DockerComposeConfig;
|
||||
use crate::rcon::RconConfig;
|
||||
use crate::steamcmd::SteamcmdConfig;
|
||||
|
||||
/// Instance ids share the NATS subject namespace with host-level segments.
|
||||
const RESERVED_INSTANCE_IDS: &[&str] = &["host", "cmd", "files", "update", "agent"];
|
||||
|
||||
pub const SUPPORTED_GAMES: &[&str] = &["rust", "conan", "soulmask", "dune"];
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct ConfigFile {
|
||||
pub agent: AgentSection,
|
||||
#[serde(default, rename = "instance")]
|
||||
pub instances: Vec<InstanceConfig>,
|
||||
#[serde(default)]
|
||||
pub prober: ProberSection,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct AgentSection {
|
||||
pub license_id: Option<String>,
|
||||
pub nats_url: Option<String>,
|
||||
pub nats_token: Option<String>,
|
||||
/// NATS username for per-license auth. Defaults to license_id when a
|
||||
/// password is set but no user is given.
|
||||
pub nats_user: Option<String>,
|
||||
/// NATS password (the per-license token). When set, the agent authenticates
|
||||
/// with user+password instead of a bare token.
|
||||
pub nats_password: Option<String>,
|
||||
#[serde(default = "default_heartbeat_seconds")]
|
||||
pub heartbeat_seconds: u64,
|
||||
#[serde(default = "default_log_level")]
|
||||
pub log_level: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct InstanceConfig {
|
||||
/// Short slug, unique per license: becomes a NATS subject segment.
|
||||
pub id: String,
|
||||
/// One of SUPPORTED_GAMES.
|
||||
pub game: String,
|
||||
/// Install root for this instance on the host.
|
||||
pub root: PathBuf,
|
||||
/// Optional human label shown in the panel.
|
||||
#[serde(default)]
|
||||
pub label: Option<String>,
|
||||
/// Game server executable. Relative paths resolve against `root`.
|
||||
/// Absent = unmanaged instance (telemetry only, no process control).
|
||||
#[serde(default)]
|
||||
pub executable: Option<PathBuf>,
|
||||
/// Arguments as a proper list — no shell splitting, quoted values survive.
|
||||
#[serde(default)]
|
||||
pub args: Vec<String>,
|
||||
/// Working directory for the process. Defaults to the executable's directory.
|
||||
#[serde(default)]
|
||||
pub working_dir: Option<PathBuf>,
|
||||
/// RCON connection settings for this instance. Absent = rcon unavailable.
|
||||
/// Protocol defaults to WebRcon for rust, Source for conan/soulmask.
|
||||
#[serde(default)]
|
||||
pub rcon: Option<RconConfig>,
|
||||
/// SteamCMD update settings. Absent = defaults apply (steamcmd on PATH,
|
||||
/// validate = false).
|
||||
#[serde(default)]
|
||||
pub steamcmd: Option<SteamcmdConfig>,
|
||||
/// Docker-compose settings for container-managed games (Dune). Absent =
|
||||
/// defaults apply (compose file in the instance root, project = instance id).
|
||||
#[serde(default)]
|
||||
pub docker_compose: Option<DockerComposeConfig>,
|
||||
}
|
||||
|
||||
impl InstanceConfig {
|
||||
/// Absolute executable path, if this instance is process-managed.
|
||||
pub fn resolved_executable(&self) -> Option<PathBuf> {
|
||||
self.executable.as_ref().map(|exe| {
|
||||
if exe.is_absolute() {
|
||||
exe.clone()
|
||||
} else {
|
||||
self.root.join(exe)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct ProberSection {
|
||||
#[serde(default = "default_probe_interval")]
|
||||
pub interval_seconds: u64,
|
||||
/// Extra TCP targets beyond the built-in defaults.
|
||||
#[serde(default, rename = "target")]
|
||||
pub targets: Vec<ProbeTargetConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct ProbeTargetConfig {
|
||||
pub name: String,
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
}
|
||||
|
||||
fn default_heartbeat_seconds() -> u64 {
|
||||
60
|
||||
}
|
||||
|
||||
fn default_probe_interval() -> u64 {
|
||||
300
|
||||
}
|
||||
|
||||
fn default_log_level() -> String {
|
||||
"info".to_string()
|
||||
}
|
||||
|
||||
/// Fully-resolved settings after merging file + env. Everything required is
|
||||
/// present and validated.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Settings {
|
||||
pub license_id: String,
|
||||
pub nats_url: String,
|
||||
pub nats_token: Option<String>,
|
||||
pub nats_user: Option<String>,
|
||||
pub nats_password: Option<String>,
|
||||
pub heartbeat_seconds: u64,
|
||||
pub log_level: String,
|
||||
pub instances: Vec<InstanceConfig>,
|
||||
pub probe_interval_seconds: u64,
|
||||
pub probe_targets: Vec<ProbeTargetConfig>,
|
||||
}
|
||||
|
||||
pub fn default_config_path() -> PathBuf {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
PathBuf::from(r"C:\ProgramData\Corrosion\agent.toml")
|
||||
}
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
PathBuf::from("/etc/corrosion/agent.toml")
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load(path: &Path) -> Result<Settings> {
|
||||
let raw = std::fs::read_to_string(path)
|
||||
.with_context(|| format!("reading config file {}", path.display()))?;
|
||||
let file: ConfigFile = toml::from_str(&raw)
|
||||
.with_context(|| format!("parsing config file {}", path.display()))?;
|
||||
resolve(file)
|
||||
}
|
||||
|
||||
/// Merge env overrides (env wins) and validate.
|
||||
fn resolve(file: ConfigFile) -> Result<Settings> {
|
||||
let license_id = std::env::var("CORROSION_LICENSE_ID")
|
||||
.ok()
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.license_id)
|
||||
.context("license_id missing: set [agent].license_id or CORROSION_LICENSE_ID")?;
|
||||
|
||||
let nats_url = std::env::var("CORROSION_NATS_URL")
|
||||
.ok()
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_url)
|
||||
.context("nats_url missing: set [agent].nats_url or CORROSION_NATS_URL")?;
|
||||
|
||||
let nats_token = std::env::var("CORROSION_NATS_TOKEN")
|
||||
.ok()
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_token);
|
||||
|
||||
let nats_user = std::env::var("CORROSION_NATS_USER")
|
||||
.ok()
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_user);
|
||||
|
||||
let nats_password = std::env::var("CORROSION_NATS_PASSWORD")
|
||||
.ok()
|
||||
.filter(|v| !v.is_empty())
|
||||
.or(file.agent.nats_password);
|
||||
|
||||
validate_subject_segment("license_id", &license_id)?;
|
||||
|
||||
let mut seen: HashSet<&str> = HashSet::new();
|
||||
for inst in &file.instances {
|
||||
validate_subject_segment("instance id", &inst.id)?;
|
||||
if RESERVED_INSTANCE_IDS.contains(&inst.id.as_str()) {
|
||||
bail!("instance id '{}' is reserved", inst.id);
|
||||
}
|
||||
if !seen.insert(inst.id.as_str()) {
|
||||
bail!("duplicate instance id '{}'", inst.id);
|
||||
}
|
||||
if !SUPPORTED_GAMES.contains(&inst.game.as_str()) {
|
||||
bail!(
|
||||
"instance '{}': unsupported game '{}' (supported: {})",
|
||||
inst.id,
|
||||
inst.game,
|
||||
SUPPORTED_GAMES.join(", ")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if file.agent.heartbeat_seconds < 10 {
|
||||
bail!("[agent].heartbeat_seconds must be >= 10");
|
||||
}
|
||||
|
||||
Ok(Settings {
|
||||
license_id,
|
||||
nats_url,
|
||||
nats_token,
|
||||
nats_user,
|
||||
nats_password,
|
||||
heartbeat_seconds: file.agent.heartbeat_seconds,
|
||||
log_level: file.agent.log_level,
|
||||
instances: file.instances,
|
||||
probe_interval_seconds: file.prober.interval_seconds.max(30),
|
||||
probe_targets: file.prober.targets,
|
||||
})
|
||||
}
|
||||
|
||||
/// NATS subject segments must not contain '.', '*', '>', whitespace, etc.
|
||||
/// Keep it strict: lowercase alphanumerics plus '-' and '_', max 64 chars.
|
||||
fn validate_subject_segment(what: &str, value: &str) -> Result<()> {
|
||||
if value.is_empty() || value.len() > 64 {
|
||||
bail!("{what} '{value}' must be 1-64 characters");
|
||||
}
|
||||
if !value
|
||||
.chars()
|
||||
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '_')
|
||||
{
|
||||
bail!("{what} '{value}' may only contain lowercase letters, digits, '-' and '_'");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
216
corrosion-host-agent/src/docker_compose.rs
Normal file
216
corrosion-host-agent/src/docker_compose.rs
Normal file
@@ -0,0 +1,216 @@
|
||||
//! Docker-compose instance supervision — the Dune: Awakening adapter.
|
||||
//!
|
||||
//! Dune does not ship as a SteamCMD-updated process like Rust/Conan/Soulmask;
|
||||
//! it runs as Docker container(s) (game server + RabbitMQ broker + Postgres),
|
||||
//! orchestrated as a compose stack (a "battlegroup"). So Dune lifecycle is
|
||||
//! `docker compose up -d / stop / restart` against the instance's compose
|
||||
//! project, not a spawned OS process. This supervisor implements the same
|
||||
//! [`Supervisor`] trait `ProcessSupervisor` does, so the instance command
|
||||
//! dispatch is identical — only the management model differs.
|
||||
//!
|
||||
//! Scope (first cut): lifecycle + cached state. Two parity items are deferred
|
||||
//! to Phase 3b alongside process PID adoption: (1) crash detection (containers
|
||||
//! give us no child handle — a `docker compose ps` poll loop would supply it);
|
||||
//! (2) state adoption on agent restart (a running stack reports `stopped` until
|
||||
//! the next lifecycle command). Both are reconcilable with a `ps` probe.
|
||||
//!
|
||||
//! Reference: docs/reference-repos/icehunter SETUP_DOCKER.md (the docker
|
||||
//! control plane this mirrors).
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use serde::Deserialize;
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::{watch, Mutex};
|
||||
|
||||
use crate::config::InstanceConfig;
|
||||
use crate::supervisor::{InstanceState, Supervisor};
|
||||
|
||||
/// Per-instance docker-compose settings (`[instance.docker_compose]`). All
|
||||
/// fields optional — defaults cover the common "one compose file in the
|
||||
/// instance root" case.
|
||||
#[derive(Debug, Clone, Default, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct DockerComposeConfig {
|
||||
/// Compose file (`-f`). Relative paths resolve against the run dir. Default:
|
||||
/// compose's own discovery (docker-compose.yml in the run dir).
|
||||
#[serde(default)]
|
||||
pub file: Option<PathBuf>,
|
||||
/// Compose project name (`-p`). Default: the instance id.
|
||||
#[serde(default)]
|
||||
pub project: Option<String>,
|
||||
/// Limit lifecycle ops to one service. Default: every service in the file.
|
||||
#[serde(default)]
|
||||
pub service: Option<String>,
|
||||
/// Override the compose binary invocation. Default: `["docker","compose"]`.
|
||||
/// Use `["docker-compose"]` for the legacy standalone binary.
|
||||
#[serde(default)]
|
||||
pub command: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
started_at: Option<Instant>,
|
||||
}
|
||||
|
||||
pub struct DockerComposeSupervisor {
|
||||
instance_id: String,
|
||||
/// Directory the compose commands run in (relative `-f`/file paths resolve
|
||||
/// against it).
|
||||
run_dir: PathBuf,
|
||||
compose_file: Option<PathBuf>,
|
||||
project: String,
|
||||
service: Option<String>,
|
||||
/// Compose binary + leading args, e.g. `["docker","compose"]`.
|
||||
command: Vec<String>,
|
||||
inner: Mutex<Inner>,
|
||||
state_tx: watch::Sender<InstanceState>,
|
||||
}
|
||||
|
||||
impl DockerComposeSupervisor {
|
||||
pub fn new(cfg: &InstanceConfig) -> Arc<Self> {
|
||||
let dc = cfg.docker_compose.clone().unwrap_or_default();
|
||||
let run_dir = cfg
|
||||
.working_dir
|
||||
.clone()
|
||||
.unwrap_or_else(|| cfg.root.clone());
|
||||
let command = dc
|
||||
.command
|
||||
.filter(|c| !c.is_empty())
|
||||
.unwrap_or_else(|| vec!["docker".to_string(), "compose".to_string()]);
|
||||
let (state_tx, _) = watch::channel(InstanceState::Stopped);
|
||||
Arc::new(Self {
|
||||
instance_id: cfg.id.clone(),
|
||||
run_dir,
|
||||
compose_file: dc.file,
|
||||
project: dc.project.unwrap_or_else(|| cfg.id.clone()),
|
||||
service: dc.service,
|
||||
command,
|
||||
inner: Mutex::new(Inner { started_at: None }),
|
||||
state_tx,
|
||||
})
|
||||
}
|
||||
|
||||
fn set_state(&self, state: InstanceState) {
|
||||
let _ = self.state_tx.send_replace(state);
|
||||
}
|
||||
|
||||
/// Run one compose subcommand (`up`/`stop`/`restart`/...), bailing with the
|
||||
/// captured stderr on non-zero exit. Global flags (`-f`, `-p`) precede the
|
||||
/// subcommand; the optional single service is appended last.
|
||||
async fn run(&self, action: &str, action_args: &[&str]) -> Result<()> {
|
||||
let mut cmd = Command::new(&self.command[0]);
|
||||
cmd.args(&self.command[1..]);
|
||||
if let Some(file) = &self.compose_file {
|
||||
cmd.arg("-f").arg(file);
|
||||
}
|
||||
cmd.arg("-p").arg(&self.project);
|
||||
cmd.arg(action);
|
||||
cmd.args(action_args);
|
||||
if let Some(service) = &self.service {
|
||||
cmd.arg(service);
|
||||
}
|
||||
cmd.current_dir(&self.run_dir)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
let output = cmd
|
||||
.output()
|
||||
.await
|
||||
.with_context(|| format!("running `{} {action}` (is docker installed and on PATH?)", self.command.join(" ")))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let detail = if !stderr.trim().is_empty() {
|
||||
stderr.trim()
|
||||
} else {
|
||||
stdout.trim()
|
||||
};
|
||||
bail!("compose {action} failed ({}): {detail}", output.status);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Supervisor for DockerComposeSupervisor {
|
||||
fn instance_id(&self) -> &str {
|
||||
&self.instance_id
|
||||
}
|
||||
|
||||
fn state(&self) -> InstanceState {
|
||||
self.state_tx.borrow().clone()
|
||||
}
|
||||
|
||||
fn watch_state(&self) -> watch::Receiver<InstanceState> {
|
||||
self.state_tx.subscribe()
|
||||
}
|
||||
|
||||
async fn uptime_seconds(&self) -> u64 {
|
||||
let inner = self.inner.lock().await;
|
||||
match (&*self.state_tx.borrow(), inner.started_at) {
|
||||
(InstanceState::Running, Some(t)) => t.elapsed().as_secs(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
async fn start(self: Arc<Self>) -> Result<()> {
|
||||
if matches!(
|
||||
*self.state_tx.borrow(),
|
||||
InstanceState::Running | InstanceState::Starting
|
||||
) {
|
||||
bail!("instance '{}' is already running", self.instance_id);
|
||||
}
|
||||
self.set_state(InstanceState::Starting);
|
||||
match self.run("up", &["-d"]).await {
|
||||
Ok(()) => {
|
||||
self.inner.lock().await.started_at = Some(Instant::now());
|
||||
self.set_state(InstanceState::Running);
|
||||
tracing::info!("instance '{}' compose up -d", self.instance_id);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
self.set_state(InstanceState::Stopped);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn stop(self: Arc<Self>) -> Result<()> {
|
||||
self.set_state(InstanceState::Stopping);
|
||||
match self.run("stop", &[]).await {
|
||||
Ok(()) => {
|
||||
self.inner.lock().await.started_at = None;
|
||||
self.set_state(InstanceState::Stopped);
|
||||
tracing::info!("instance '{}' compose stop", self.instance_id);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
// Stop failed — the stack is most likely still up.
|
||||
self.set_state(InstanceState::Running);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn restart(self: Arc<Self>) -> Result<()> {
|
||||
self.set_state(InstanceState::Starting);
|
||||
match self.run("restart", &[]).await {
|
||||
Ok(()) => {
|
||||
self.inner.lock().await.started_at = Some(Instant::now());
|
||||
self.set_state(InstanceState::Running);
|
||||
tracing::info!("instance '{}' compose restart", self.instance_id);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
self.set_state(InstanceState::Stopped);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
544
corrosion-host-agent/src/filemanager.rs
Normal file
544
corrosion-host-agent/src/filemanager.rs
Normal file
@@ -0,0 +1,544 @@
|
||||
//! Jailed file manager for game-server install directories.
|
||||
//!
|
||||
//! Every path operation is confined to the instance `root` — the directory
|
||||
//! declared as `root` in `[[instance]]` config. A two-stage check (lexical
|
||||
//! Clean + `std::fs::canonicalize`) prevents both `../..` traversals and
|
||||
//! symlink-based escapes: even if an attacker plants a symlink inside the root
|
||||
//! that points outside it, `canonicalize` resolves the target and the prefix
|
||||
//! check catches the escape.
|
||||
//!
|
||||
//! The NATS request/reply contract mirrors the Go companion agent's jailed file
|
||||
//! manager (see `companion-agent/internal/filemanager/`) but uses a simpler
|
||||
//! flat JSON envelope rather than the VueFinder storage-path protocol — the
|
||||
//! Rust agent is the replacement, and the panel's backend talks to whichever
|
||||
//! agent is present.
|
||||
//!
|
||||
//! Subject: `corrosion.{license}.{instance}.files.cmd`
|
||||
//! Request: `{"op":"list"|"read"|"write"|"delete"|"rename"|"mkdir"|"mkfile"|"move"|"copy",
|
||||
//! "path":"rel/path", "dest"?:"...", "content"?:"...", "name"?:"..."}`
|
||||
//! Response: `{"status":"success","data":...}` or `{"status":"error","message":"..."}`
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use chrono::{DateTime, SecondsFormat, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Maximum size for a `read` operation (5 MiB). Larger files must be
|
||||
/// transferred through a dedicated download endpoint, not the file manager.
|
||||
const MAX_READ_SIZE: u64 = 5 * 1024 * 1024;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Wire types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct FileRequest {
|
||||
pub op: String,
|
||||
/// Relative path within the instance root (the "subject" of the operation).
|
||||
#[serde(default)]
|
||||
pub path: String,
|
||||
/// Destination for `rename`, `move`, `copy` — relative to instance root.
|
||||
#[serde(default)]
|
||||
pub dest: Option<String>,
|
||||
/// Text content for `write`.
|
||||
#[serde(default)]
|
||||
pub content: Option<String>,
|
||||
/// Bare filename for `mkdir` and `mkfile`.
|
||||
#[serde(default)]
|
||||
pub name: Option<String>,
|
||||
}
|
||||
|
||||
/// A single directory entry returned by `list`.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct FileEntry {
|
||||
pub name: String,
|
||||
/// Path relative to the instance root, using forward slashes.
|
||||
pub path: String,
|
||||
pub is_dir: bool,
|
||||
/// File size in bytes. Zero for directories.
|
||||
pub size: u64,
|
||||
/// RFC 3339 modification timestamp.
|
||||
pub modified: String,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Jail helper — the security core of this module
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Resolve `rel` against `root`, then canonicalize to reject any form of
|
||||
/// escape including `../..` traversals and symlinks that point outside root.
|
||||
///
|
||||
/// For paths that do not yet exist (e.g. write targets), we canonicalize the
|
||||
/// nearest existing ancestor and then re-join the remaining components, which
|
||||
/// are lexically-clean because they went through `std::path::Path` building.
|
||||
///
|
||||
/// Returns the absolute, canonicalized path if it is within `root`.
|
||||
pub fn jail(root: &Path, rel: &str) -> anyhow::Result<PathBuf> {
|
||||
// Canonicalize root once to get a stable prefix for comparison.
|
||||
// We do this on every call rather than caching so the function stays
|
||||
// pure and testable without Agent state.
|
||||
let canon_root = fs::canonicalize(root)
|
||||
.with_context(|| format!("canonicalize instance root '{}'", root.display()))?;
|
||||
|
||||
// Build the candidate absolute path. We use Path joining so that an
|
||||
// absolute `rel` (e.g. "/etc/passwd") replaces the root entirely — we
|
||||
// detect and reject that case immediately.
|
||||
let candidate = if rel.is_empty() || rel == "." {
|
||||
root.to_path_buf()
|
||||
} else {
|
||||
let rel_path = Path::new(rel);
|
||||
if rel_path.is_absolute() {
|
||||
bail!(
|
||||
"absolute path '{}' is not allowed; supply a path relative to the instance root",
|
||||
rel
|
||||
);
|
||||
}
|
||||
root.join(rel_path)
|
||||
};
|
||||
|
||||
// Normalize lexically first (removes `..` / `.` without filesystem access).
|
||||
// This is a defence-in-depth step; the authoritative check is below.
|
||||
let lexical = normalize_lexical(&candidate);
|
||||
|
||||
// Canonicalize: resolve symlinks and `..` via the kernel.
|
||||
// For a not-yet-existing path we walk up to the nearest existing ancestor.
|
||||
let canon = canonicalize_lenient(&lexical)?;
|
||||
|
||||
// Authoritative prefix check: the resolved path must be equal to or a
|
||||
// child of the canonicalized root.
|
||||
if canon != canon_root && !canon.starts_with(&canon_root) {
|
||||
bail!(
|
||||
"path '{}' resolves to '{}' which is outside the instance root '{}'",
|
||||
rel,
|
||||
canon.display(),
|
||||
canon_root.display()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(canon)
|
||||
}
|
||||
|
||||
/// Canonicalize a path that may not fully exist yet by walking up to the
|
||||
/// nearest existing ancestor, canonicalizing it, then re-joining the remaining
|
||||
/// (lexically-clean) suffix.
|
||||
fn canonicalize_lenient(path: &Path) -> anyhow::Result<PathBuf> {
|
||||
// Fast path: path already exists.
|
||||
if let Ok(c) = fs::canonicalize(path) {
|
||||
return Ok(c);
|
||||
}
|
||||
|
||||
// Walk up until we find an ancestor that exists.
|
||||
let mut existing = path.to_path_buf();
|
||||
let mut suffix: Vec<std::ffi::OsString> = Vec::new();
|
||||
|
||||
loop {
|
||||
match fs::canonicalize(&existing) {
|
||||
Ok(canon) => {
|
||||
// Re-attach the non-existing suffix.
|
||||
let mut result = canon;
|
||||
for component in suffix.iter().rev() {
|
||||
result = result.join(component);
|
||||
}
|
||||
return Ok(result);
|
||||
}
|
||||
Err(_) => {
|
||||
let file_name = match existing.file_name() {
|
||||
Some(n) => n.to_os_string(),
|
||||
None => bail!("cannot resolve path '{}'", path.display()),
|
||||
};
|
||||
suffix.push(file_name);
|
||||
existing = match existing.parent() {
|
||||
Some(p) => p.to_path_buf(),
|
||||
None => bail!("cannot resolve path '{}'", path.display()),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexically normalize a path (remove `.` and `..` components) without
|
||||
/// touching the filesystem. This mirrors `filepath.Clean` in Go.
|
||||
fn normalize_lexical(path: &Path) -> PathBuf {
|
||||
let mut components: Vec<std::path::Component> = Vec::new();
|
||||
for component in path.components() {
|
||||
match component {
|
||||
std::path::Component::CurDir => {}
|
||||
std::path::Component::ParentDir => {
|
||||
// Only pop a normal component — we cannot pop a root prefix.
|
||||
if matches!(components.last(), Some(std::path::Component::Normal(_))) {
|
||||
components.pop();
|
||||
} else {
|
||||
components.push(component);
|
||||
}
|
||||
}
|
||||
other => components.push(other),
|
||||
}
|
||||
}
|
||||
components.iter().collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Operations
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// List the contents of a directory. Returns an entry per item, sorted
|
||||
/// (directories first, then files, both alphabetical).
|
||||
pub fn list(root: &Path, rel: &str) -> anyhow::Result<Vec<FileEntry>> {
|
||||
let abs = jail(root, rel)?;
|
||||
// Use the canonicalized root as the prefix for relative path computation so
|
||||
// that symlinked root paths (e.g. macOS /var → /private/var) don't cause
|
||||
// strip_prefix to fail and fall back to leaking the absolute path.
|
||||
let canon_root = fs::canonicalize(root)
|
||||
.with_context(|| format!("canonicalize root '{}'", root.display()))?;
|
||||
|
||||
let rd = fs::read_dir(&abs)
|
||||
.with_context(|| format!("read_dir '{}'", abs.display()))?;
|
||||
|
||||
let mut entries: Vec<FileEntry> = Vec::new();
|
||||
for item in rd {
|
||||
let item = item.with_context(|| format!("reading directory entry in '{}'", abs.display()))?;
|
||||
// symlink_metadata (lstat): report the link itself, never the target —
|
||||
// following it would leak the size/type/existence of files outside the
|
||||
// jail. A symlink lists as a zero-ish-size non-dir entry.
|
||||
let meta = fs::symlink_metadata(item.path())
|
||||
.with_context(|| format!("stat '{}'", item.path().display()))?;
|
||||
|
||||
let name = item.file_name().to_string_lossy().into_owned();
|
||||
let is_dir = meta.is_dir();
|
||||
let size = if is_dir { 0 } else { meta.len() };
|
||||
|
||||
// Build the relative path from the canonicalized root.
|
||||
let entry_abs = item.path();
|
||||
let entry_rel = entry_abs
|
||||
.strip_prefix(&canon_root)
|
||||
.unwrap_or(&entry_abs)
|
||||
.to_string_lossy()
|
||||
.replace('\\', "/");
|
||||
|
||||
let modified = meta
|
||||
.modified()
|
||||
.ok()
|
||||
.map(|t| {
|
||||
let dt: DateTime<Utc> = t.into();
|
||||
dt.to_rfc3339_opts(SecondsFormat::Secs, true)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
entries.push(FileEntry { name, path: entry_rel, is_dir, size, modified });
|
||||
}
|
||||
|
||||
// Stable sort: dirs first, then alphabetical within each group.
|
||||
entries.sort_by(|a, b| {
|
||||
b.is_dir.cmp(&a.is_dir).then_with(|| a.name.cmp(&b.name))
|
||||
});
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Read a text file. Capped at `MAX_READ_SIZE` bytes.
|
||||
pub fn read(root: &Path, rel: &str) -> anyhow::Result<String> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
let meta = fs::metadata(&abs)
|
||||
.with_context(|| format!("stat '{}'", abs.display()))?;
|
||||
|
||||
if meta.is_dir() {
|
||||
bail!("'{}' is a directory, not a file", rel);
|
||||
}
|
||||
if meta.len() > MAX_READ_SIZE {
|
||||
bail!(
|
||||
"file '{}' is {} bytes which exceeds the {} byte read limit",
|
||||
rel,
|
||||
meta.len(),
|
||||
MAX_READ_SIZE
|
||||
);
|
||||
}
|
||||
|
||||
fs::read_to_string(&abs).with_context(|| format!("read '{}'", abs.display()))
|
||||
}
|
||||
|
||||
/// Write (create or overwrite) a file. Parent directories are created as
|
||||
/// needed.
|
||||
pub fn write(root: &Path, rel: &str, content: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
if let Some(parent) = abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
fs::write(&abs, content.as_bytes())
|
||||
.with_context(|| format!("write '{}'", abs.display()))
|
||||
}
|
||||
|
||||
/// Delete a file or directory tree.
|
||||
pub fn delete(root: &Path, rel: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
let meta = fs::metadata(&abs)
|
||||
.with_context(|| format!("stat '{}'", abs.display()))?;
|
||||
|
||||
if meta.is_dir() {
|
||||
fs::remove_dir_all(&abs).with_context(|| format!("remove_dir_all '{}'", abs.display()))
|
||||
} else {
|
||||
fs::remove_file(&abs).with_context(|| format!("remove_file '{}'", abs.display()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Rename/move `rel` to a new bare name (`new_name`) within the same parent.
|
||||
/// `new_name` must not contain path separators.
|
||||
pub fn rename(root: &Path, rel: &str, new_name: &str) -> anyhow::Result<()> {
|
||||
if new_name.is_empty() || new_name == "." || new_name == ".." {
|
||||
bail!("new_name '{}' is not a valid filename", new_name);
|
||||
}
|
||||
if new_name.contains('/') || new_name.contains('\\') {
|
||||
bail!("new_name '{}' must not contain path separators", new_name);
|
||||
}
|
||||
|
||||
let src_abs = jail(root, rel)?;
|
||||
|
||||
// Construct the destination relative path by replacing the filename part
|
||||
// of `rel` with `new_name`. This keeps everything in relative-path space
|
||||
// so we never hand an absolute path to `jail`.
|
||||
let src_rel = Path::new(rel);
|
||||
let dest_rel = match src_rel.parent() {
|
||||
Some(parent) if parent != Path::new("") => {
|
||||
parent.join(new_name).to_string_lossy().replace('\\', "/")
|
||||
}
|
||||
_ => new_name.to_string(),
|
||||
};
|
||||
|
||||
let dest_abs = jail(root, &dest_rel)?;
|
||||
|
||||
fs::rename(&src_abs, &dest_abs)
|
||||
.with_context(|| format!("rename '{}' -> '{}'", src_abs.display(), dest_abs.display()))
|
||||
}
|
||||
|
||||
/// Create a directory (and any missing parents) at `rel`.
|
||||
pub fn mkdir(root: &Path, rel: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
fs::create_dir_all(&abs).with_context(|| format!("mkdir '{}'", abs.display()))
|
||||
}
|
||||
|
||||
/// Create an empty file at `rel`. Fails if it already exists.
|
||||
pub fn mkfile(root: &Path, rel: &str) -> anyhow::Result<()> {
|
||||
let abs = jail(root, rel)?;
|
||||
|
||||
if let Some(parent) = abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
let _ = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&abs)
|
||||
.with_context(|| format!("mkfile '{}'", abs.display()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Move `src` to `dest` (both relative to root).
|
||||
pub fn move_path(root: &Path, src: &str, dest: &str) -> anyhow::Result<()> {
|
||||
let src_abs = jail(root, src)?;
|
||||
let dest_abs = jail(root, dest)?;
|
||||
|
||||
if let Some(parent) = dest_abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
fs::rename(&src_abs, &dest_abs).or_else(|_| {
|
||||
// Cross-device move: copy then delete.
|
||||
copy_recursive(&src_abs, &dest_abs)?;
|
||||
fs::remove_dir_all(&src_abs)
|
||||
.with_context(|| format!("remove source '{}' after cross-device move", src_abs.display()))
|
||||
}).with_context(|| format!("move '{}' -> '{}'", src_abs.display(), dest_abs.display()))
|
||||
}
|
||||
|
||||
/// Copy `src` to `dest` (both relative to root).
|
||||
pub fn copy(root: &Path, src: &str, dest: &str) -> anyhow::Result<()> {
|
||||
let src_abs = jail(root, src)?;
|
||||
let dest_abs = jail(root, dest)?;
|
||||
|
||||
if let Some(parent) = dest_abs.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.with_context(|| format!("create_dir_all '{}'", parent.display()))?;
|
||||
}
|
||||
|
||||
copy_recursive(&src_abs, &dest_abs)
|
||||
.with_context(|| format!("copy '{}' -> '{}'", src_abs.display(), dest_abs.display()))
|
||||
}
|
||||
|
||||
/// Recursive copy helper.
|
||||
///
|
||||
/// SECURITY: uses `symlink_metadata` (does NOT follow symlinks) and refuses to
|
||||
/// copy any symlink. `jail()` only validates the top-level src/dest; a symlink
|
||||
/// *inside* a copied directory that points outside the jail would, if followed,
|
||||
/// pull external content (e.g. `/etc`) into the jail where it could then be
|
||||
/// read — a jail-escape exfiltration. Refusing symlinks closes that path.
|
||||
fn copy_recursive(src: &Path, dest: &Path) -> anyhow::Result<()> {
|
||||
let meta = fs::symlink_metadata(src)
|
||||
.with_context(|| format!("stat source '{}'", src.display()))?;
|
||||
|
||||
if meta.file_type().is_symlink() {
|
||||
bail!(
|
||||
"refusing to copy symlink '{}' — symlinks are not followed across the jail boundary",
|
||||
src.display()
|
||||
);
|
||||
}
|
||||
|
||||
if meta.is_dir() {
|
||||
fs::create_dir_all(dest)
|
||||
.with_context(|| format!("create_dir_all '{}'", dest.display()))?;
|
||||
|
||||
for entry in fs::read_dir(src)
|
||||
.with_context(|| format!("read_dir '{}'", src.display()))?
|
||||
{
|
||||
let entry = entry?;
|
||||
copy_recursive(&entry.path(), &dest.join(entry.file_name()))?;
|
||||
}
|
||||
} else {
|
||||
fs::copy(src, dest)
|
||||
.with_context(|| format!("copy '{}' -> '{}'", src.display(), dest.display()))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// NATS request dispatch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Dispatch a `FileRequest` against `root` and return a JSON `serde_json::Value`
|
||||
/// ready for the NATS reply.
|
||||
pub fn dispatch(root: &Path, req: &FileRequest) -> serde_json::Value {
|
||||
use serde_json::json;
|
||||
|
||||
let result = match req.op.as_str() {
|
||||
"list" => {
|
||||
list(root, &req.path).map(|entries| json!({ "entries": entries }))
|
||||
}
|
||||
"read" => {
|
||||
read(root, &req.path).map(|content| json!({ "content": content }))
|
||||
}
|
||||
"write" => {
|
||||
let content = req.content.as_deref().unwrap_or("");
|
||||
write(root, &req.path, content).map(|_| json!(null))
|
||||
}
|
||||
"delete" => {
|
||||
delete(root, &req.path).map(|_| json!(null))
|
||||
}
|
||||
"rename" => {
|
||||
let new_name = req.name.as_deref().unwrap_or("");
|
||||
rename(root, &req.path, new_name).map(|_| json!(null))
|
||||
}
|
||||
"mkdir" => {
|
||||
mkdir(root, &req.path).map(|_| json!(null))
|
||||
}
|
||||
"mkfile" => {
|
||||
mkfile(root, &req.path).map(|_| json!(null))
|
||||
}
|
||||
"move" => {
|
||||
let dest = req.dest.as_deref().unwrap_or("");
|
||||
move_path(root, &req.path, dest).map(|_| json!(null))
|
||||
}
|
||||
"copy" => {
|
||||
let dest = req.dest.as_deref().unwrap_or("");
|
||||
copy(root, &req.path, dest).map(|_| json!(null))
|
||||
}
|
||||
other => Err(anyhow::anyhow!(
|
||||
"unknown op '{}' (supported: list, read, write, delete, rename, mkdir, mkfile, move, copy)",
|
||||
other
|
||||
)),
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(data) => json!({ "status": "success", "data": data }),
|
||||
Err(e) => {
|
||||
tracing::warn!("filemanager op='{}' path='{}': {e:#}", req.op, req.path);
|
||||
json!({ "status": "error", "message": format!("{e:#}") })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Subscribe to `corrosion.{license}.{instance}.files.cmd` and serve file
|
||||
/// manager requests for `instance_id` jailed to `root`.
|
||||
///
|
||||
/// This function runs until the agent's cancellation token fires or the NATS
|
||||
/// subscription ends. It is spawned once per instance in `main.rs`.
|
||||
pub async fn run(
|
||||
agent: std::sync::Arc<crate::agent::Agent>,
|
||||
instance_id: String,
|
||||
root: PathBuf,
|
||||
) -> anyhow::Result<()> {
|
||||
use futures::StreamExt;
|
||||
|
||||
let subject = crate::subjects::instance_files_cmd(&agent.cfg.license_id, &instance_id);
|
||||
let mut sub = agent.nats.subscribe(subject.clone()).await?;
|
||||
tracing::info!("file manager handler listening on {subject}");
|
||||
|
||||
let cancel = agent.shutdown.clone();
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = sub.next() => {
|
||||
match msg {
|
||||
Some(msg) => {
|
||||
let agent = agent.clone();
|
||||
let root = root.clone();
|
||||
let instance_id = instance_id.clone();
|
||||
tokio::spawn(async move { handle(agent, &instance_id, &root, msg).await });
|
||||
}
|
||||
None => {
|
||||
tracing::warn!("file manager subscription ended for '{instance_id}'");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
tracing::info!("file manager handler stopping for '{instance_id}'");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle(
|
||||
agent: std::sync::Arc<crate::agent::Agent>,
|
||||
instance_id: &str,
|
||||
root: &Path,
|
||||
msg: async_nats::Message,
|
||||
) {
|
||||
let Some(reply) = msg.reply.clone() else {
|
||||
tracing::warn!("file manager message without reply subject ignored (instance '{instance_id}')");
|
||||
return;
|
||||
};
|
||||
|
||||
let response = match serde_json::from_slice::<FileRequest>(&msg.payload) {
|
||||
Ok(req) => {
|
||||
// Blocking fs calls — offload from the async executor.
|
||||
let root = root.to_path_buf();
|
||||
tokio::task::spawn_blocking(move || dispatch(&root, &req))
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
serde_json::json!({ "status": "error", "message": format!("internal error: {e}") })
|
||||
})
|
||||
}
|
||||
Err(e) => {
|
||||
serde_json::json!({ "status": "error", "message": format!("invalid request payload: {e}") })
|
||||
}
|
||||
};
|
||||
|
||||
let bytes = match serde_json::to_vec(&response) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
tracing::error!("file manager response serialize failed: {e}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
|
||||
tracing::warn!("file manager response publish failed: {e}");
|
||||
}
|
||||
}
|
||||
145
corrosion-host-agent/src/hostcmd.rs
Normal file
145
corrosion-host-agent/src/hostcmd.rs
Normal file
@@ -0,0 +1,145 @@
|
||||
//! Host-level command handler: request-reply on `corrosion.{license}.host.cmd`.
|
||||
//!
|
||||
//! One subscriber; each message handled in its own task so a slow command
|
||||
//! never blocks the dispatch loop. Phase 0 commands: ping, probe, sysinfo.
|
||||
|
||||
use futures::StreamExt;
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
use std::sync::Arc;
|
||||
use sysinfo::System;
|
||||
|
||||
use crate::agent::Agent;
|
||||
use crate::prober;
|
||||
use crate::subjects;
|
||||
use crate::telemetry;
|
||||
use crate::update;
|
||||
use crate::version;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HostCommand {
|
||||
func: String,
|
||||
/// Signed-update artifact URL (for func = "update").
|
||||
#[serde(default)]
|
||||
url: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn run(agent: Arc<Agent>) -> anyhow::Result<()> {
|
||||
let subject = subjects::host_cmd(&agent.cfg.license_id);
|
||||
let mut sub = agent.nats.subscribe(subject.clone()).await?;
|
||||
tracing::info!("host command handler listening on {subject}");
|
||||
|
||||
let cancel = agent.shutdown.clone();
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = sub.next() => {
|
||||
match msg {
|
||||
Some(msg) => {
|
||||
let agent = agent.clone();
|
||||
tokio::spawn(async move { handle(agent, msg).await });
|
||||
}
|
||||
None => {
|
||||
tracing::warn!("host command subscription ended");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
tracing::info!("host command handler stopping");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle(agent: Arc<Agent>, msg: async_nats::Message) {
|
||||
let Some(reply) = msg.reply.clone() else {
|
||||
tracing::warn!("host command without reply subject ignored");
|
||||
return;
|
||||
};
|
||||
|
||||
let cmd = match serde_json::from_slice::<HostCommand>(&msg.payload) {
|
||||
Ok(cmd) => cmd,
|
||||
Err(e) => {
|
||||
publish(&agent, &reply, json!({ "status": "error", "message": format!("invalid command payload: {e}") })).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Self-update is special: it must reply BEFORE relaunching, because the
|
||||
// relaunch replaces this process and nothing after it would run.
|
||||
if cmd.func == "update" {
|
||||
let Some(url) = cmd.url else {
|
||||
publish(&agent, &reply, json!({ "status": "error", "message": "update requires a 'url'" })).await;
|
||||
return;
|
||||
};
|
||||
match update::download_verify_swap(&url).await {
|
||||
Ok(_) => {
|
||||
publish(&agent, &reply, json!({ "status": "success", "func": "update", "message": "verified and swapped; relaunching" })).await;
|
||||
let _ = agent.nats.flush().await;
|
||||
update::relaunch_and_exit();
|
||||
}
|
||||
Err(e) => {
|
||||
publish(&agent, &reply, json!({ "status": "error", "func": "update", "message": format!("{e:#}") })).await;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let response = dispatch(&agent, &cmd.func).await;
|
||||
publish(&agent, &reply, response).await;
|
||||
}
|
||||
|
||||
async fn publish(agent: &Arc<Agent>, reply: &async_nats::Subject, value: serde_json::Value) {
|
||||
match serde_json::to_vec(&value) {
|
||||
Ok(bytes) => {
|
||||
if let Err(e) = agent.nats.publish(reply.clone(), bytes.into()).await {
|
||||
tracing::warn!("response publish failed: {e}");
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::error!("response serialize failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
async fn dispatch(agent: &Arc<Agent>, func: &str) -> serde_json::Value {
|
||||
match func {
|
||||
"ping" => json!({
|
||||
"status": "success",
|
||||
"func": "ping",
|
||||
"version": version::VERSION,
|
||||
"commit": version::GIT_HASH,
|
||||
"uptime_seconds": agent.started.elapsed().as_secs(),
|
||||
}),
|
||||
"probe" => {
|
||||
let report = prober::run_probe(&agent.cfg.probe_targets).await;
|
||||
*agent.last_probe.write().await = Some(report.clone());
|
||||
match serde_json::to_value(&report) {
|
||||
Ok(report_json) => json!({
|
||||
"status": "success",
|
||||
"func": "probe",
|
||||
"report": report_json,
|
||||
}),
|
||||
Err(e) => json!({ "status": "error", "message": format!("probe serialize: {e}") }),
|
||||
}
|
||||
}
|
||||
"sysinfo" => {
|
||||
let mut sys = System::new();
|
||||
sys.refresh_cpu_usage();
|
||||
tokio::time::sleep(std::time::Duration::from_millis(250)).await;
|
||||
let payload = telemetry::collect(agent, &mut sys).await;
|
||||
match serde_json::to_value(&payload) {
|
||||
Ok(snapshot) => json!({
|
||||
"status": "success",
|
||||
"func": "sysinfo",
|
||||
"snapshot": snapshot,
|
||||
}),
|
||||
Err(e) => json!({ "status": "error", "message": format!("sysinfo serialize: {e}") }),
|
||||
}
|
||||
}
|
||||
other => json!({
|
||||
"status": "error",
|
||||
"message": format!("unknown func '{other}' (supported: ping, probe, sysinfo)"),
|
||||
}),
|
||||
}
|
||||
}
|
||||
361
corrosion-host-agent/src/instancecmd.rs
Normal file
361
corrosion-host-agent/src/instancecmd.rs
Normal file
@@ -0,0 +1,361 @@
|
||||
//! Per-instance command channel + state-change events.
|
||||
//!
|
||||
//! Each process-managed instance gets a request-reply subscriber on
|
||||
//! `corrosion.{license}.{instance_id}.cmd` (funcs: start/stop/restart/status/rcon)
|
||||
//! and a publisher task that pushes every supervisor state change to
|
||||
//! `corrosion.{license}.{instance_id}.status` — the panel sees crashes when
|
||||
//! they happen, not when the next heartbeat ambles in.
|
||||
|
||||
use chrono::{SecondsFormat, Utc};
|
||||
use futures::StreamExt;
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::agent::Agent;
|
||||
use crate::subjects;
|
||||
use crate::steamcmd;
|
||||
use crate::supervisor::Supervisor;
|
||||
use crate::wipe;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct InstanceCommand {
|
||||
func: String,
|
||||
/// Payload for funcs that carry a text argument (e.g. rcon).
|
||||
#[serde(default)]
|
||||
command: Option<String>,
|
||||
/// Wipe type: "map" | "blueprint" | "full" — required for func="wipe".
|
||||
#[serde(default)]
|
||||
wipe_type: Option<wipe::WipeType>,
|
||||
/// Whether to back up wipe targets before deleting (func="wipe").
|
||||
#[serde(default)]
|
||||
backup: bool,
|
||||
/// Label for the backup subdirectory (func="wipe"). Defaults to "wipe-backup".
|
||||
#[serde(default = "default_backup_label")]
|
||||
backup_label: String,
|
||||
}
|
||||
|
||||
fn default_backup_label() -> String {
|
||||
"wipe-backup".to_string()
|
||||
}
|
||||
|
||||
/// Forward every supervisor state change as a status event.
|
||||
pub async fn publish_state_changes(agent: Arc<Agent>, sup: Arc<dyn Supervisor>) {
|
||||
let subject = subjects::instance_status(&agent.cfg.license_id, sup.instance_id());
|
||||
let mut rx = sup.watch_state();
|
||||
let cancel = agent.shutdown.clone();
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
changed = rx.changed() => {
|
||||
if changed.is_err() {
|
||||
break;
|
||||
}
|
||||
let state = rx.borrow().clone();
|
||||
let event = json!({
|
||||
"timestamp": Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
"instance_id": sup.instance_id(),
|
||||
"event": state,
|
||||
});
|
||||
match serde_json::to_vec(&event) {
|
||||
Ok(bytes) => {
|
||||
if let Err(e) = agent.nats.publish(subject.clone(), bytes.into()).await {
|
||||
tracing::warn!("status publish failed for '{}': {e}", sup.instance_id());
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::error!("status serialize failed: {e}"),
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Request-reply command handler for one instance.
|
||||
pub async fn run(agent: Arc<Agent>, sup: Arc<dyn Supervisor>) -> anyhow::Result<()> {
|
||||
let subject = subjects::instance_cmd(&agent.cfg.license_id, sup.instance_id());
|
||||
let mut sub = agent.nats.subscribe(subject.clone()).await?;
|
||||
tracing::info!("instance command handler listening on {subject}");
|
||||
|
||||
let cancel = agent.shutdown.clone();
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = sub.next() => {
|
||||
match msg {
|
||||
Some(msg) => {
|
||||
let agent = agent.clone();
|
||||
let sup = sup.clone();
|
||||
tokio::spawn(async move { handle(agent, sup, msg).await });
|
||||
}
|
||||
None => {
|
||||
tracing::warn!("instance command subscription ended for '{}'", sup.instance_id());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
tracing::info!("instance command handler stopping for '{}'", sup.instance_id());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle(agent: Arc<Agent>, sup: Arc<dyn Supervisor>, msg: async_nats::Message) {
|
||||
let Some(reply) = msg.reply.clone() else {
|
||||
tracing::warn!("instance command without reply subject ignored");
|
||||
return;
|
||||
};
|
||||
|
||||
let response = match serde_json::from_slice::<InstanceCommand>(&msg.payload) {
|
||||
Ok(cmd) => dispatch(&agent, &sup, &cmd).await,
|
||||
Err(e) => json!({ "status": "error", "message": format!("invalid command payload: {e}") }),
|
||||
};
|
||||
|
||||
let bytes = match serde_json::to_vec(&response) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
tracing::error!("response serialize failed: {e}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
|
||||
tracing::warn!("response publish failed: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn dispatch(
|
||||
agent: &Arc<Agent>,
|
||||
sup: &Arc<dyn Supervisor>,
|
||||
cmd: &InstanceCommand,
|
||||
) -> serde_json::Value {
|
||||
let func = cmd.func.as_str();
|
||||
|
||||
// start/stop/restart take `self: Arc<Self>` (they may hand a clone to a
|
||||
// monitor task), so clone the Arc before the consuming call.
|
||||
let outcome = match func {
|
||||
"start" => sup.clone().start().await.map(|_| "starting"),
|
||||
"stop" => sup.clone().stop().await.map(|_| "stopped"),
|
||||
"restart" => sup.clone().restart().await.map(|_| "restarted"),
|
||||
"status" => {
|
||||
return json!({
|
||||
"status": "success",
|
||||
"func": "status",
|
||||
"instance_id": sup.instance_id(),
|
||||
"state": sup.state(),
|
||||
"uptime_seconds": sup.uptime_seconds().await,
|
||||
});
|
||||
}
|
||||
"rcon" => {
|
||||
// Look up the InstanceConfig for this supervisor so we can access
|
||||
// rcon settings and the game name without changing the supervisor's
|
||||
// data model.
|
||||
let inst_cfg = agent
|
||||
.cfg
|
||||
.instances
|
||||
.iter()
|
||||
.find(|i| i.id == sup.instance_id());
|
||||
|
||||
let rcon_cfg = inst_cfg.and_then(|i| i.rcon.as_ref());
|
||||
let Some(rcon_cfg) = rcon_cfg else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("instance '{}' has no rcon configured", sup.instance_id()),
|
||||
});
|
||||
};
|
||||
|
||||
let Some(command) = cmd.command.as_deref() else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": "rcon func requires a 'command' field",
|
||||
});
|
||||
};
|
||||
|
||||
let game = inst_cfg.map(|i| i.game.as_str()).unwrap_or("rust");
|
||||
return match crate::rcon::send_command(rcon_cfg, game, command).await {
|
||||
Ok(output) => json!({
|
||||
"status": "success",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id(),
|
||||
"output": output,
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": "rcon",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
};
|
||||
}
|
||||
"steam_update" => {
|
||||
// Look up instance config for game name, root, and optional steamcmd
|
||||
// settings. The supervisor only carries process-control state, not
|
||||
// the full config, so we reach into agent.cfg.instances here as the
|
||||
// rcon dispatch does.
|
||||
let inst_cfg = agent.cfg.instances.iter().find(|i| i.id == sup.instance_id());
|
||||
|
||||
let Some(inst_cfg) = inst_cfg else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "steam_update",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("no config found for instance '{}'", sup.instance_id()),
|
||||
});
|
||||
};
|
||||
|
||||
let game = inst_cfg.game.as_str();
|
||||
let root = inst_cfg.root.clone();
|
||||
|
||||
// Resolve steamcmd path and validate flag from config or use defaults.
|
||||
let (steamcmd_path, validate) = match inst_cfg.steamcmd.as_ref() {
|
||||
Some(s) => {
|
||||
let path = s
|
||||
.steamcmd_path
|
||||
.as_ref()
|
||||
.and_then(|p| p.to_str().map(|s| s.to_string()))
|
||||
.unwrap_or_else(|| "steamcmd".to_string());
|
||||
(path, s.validate)
|
||||
}
|
||||
None => ("steamcmd".to_string(), false),
|
||||
};
|
||||
|
||||
let license = agent.cfg.license_id.clone();
|
||||
let instance_id = sup.instance_id().to_string();
|
||||
let nats = agent.nats.clone();
|
||||
|
||||
// Publish each progress line to the steam_status subject.
|
||||
let on_progress = move |line: &str| {
|
||||
let subject = subjects::instance_steam_status(&license, &instance_id);
|
||||
let event = json!({
|
||||
"timestamp": Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
"instance_id": instance_id,
|
||||
"line": line,
|
||||
});
|
||||
match serde_json::to_vec(&event) {
|
||||
Ok(bytes) => {
|
||||
// Fire-and-forget; the async publish is non-blocking on
|
||||
// the caller side. We create a mini-runtime task via
|
||||
// a oneshot since on_progress is Fn (not async).
|
||||
let nats = nats.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = nats.publish(subject, bytes.into()).await {
|
||||
tracing::warn!("steam_status publish failed: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(e) => tracing::error!("steam_status serialize failed: {e}"),
|
||||
}
|
||||
};
|
||||
|
||||
return match steamcmd::update(game, &root, &steamcmd_path, validate, on_progress).await {
|
||||
Ok(()) => json!({
|
||||
"status": "success",
|
||||
"func": "steam_update",
|
||||
"instance_id": sup.instance_id(),
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": "steam_update",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
};
|
||||
}
|
||||
"wipe" => {
|
||||
let inst_cfg = agent.cfg.instances.iter().find(|i| i.id == sup.instance_id());
|
||||
|
||||
let Some(inst_cfg) = inst_cfg else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("no config found for instance '{}'", sup.instance_id()),
|
||||
});
|
||||
};
|
||||
|
||||
let Some(wipe_type) = cmd.wipe_type.clone() else {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": "wipe func requires a 'wipe_type' field (\"map\", \"blueprint\", or \"full\")",
|
||||
});
|
||||
};
|
||||
|
||||
let root = inst_cfg.root.clone();
|
||||
let instance_id = sup.instance_id().to_string();
|
||||
|
||||
let wipe_req = wipe::WipeRequest {
|
||||
wipe_type,
|
||||
backup: cmd.backup,
|
||||
backup_label: cmd.backup_label.clone(),
|
||||
};
|
||||
|
||||
// Stop the server best-effort before wiping; proceed even if stop fails
|
||||
// (the server may already be down).
|
||||
if let Err(e) = sup.clone().stop().await {
|
||||
tracing::warn!("wipe: stop instance '{}' failed (proceeding anyway): {e:#}", instance_id);
|
||||
}
|
||||
|
||||
// Run the blocking I/O on the blocking thread pool.
|
||||
let result = tokio::task::spawn_blocking(move || wipe::execute(&root, &wipe_req)).await;
|
||||
|
||||
// Restart best-effort regardless of wipe outcome.
|
||||
if let Err(e) = sup.clone().start().await {
|
||||
tracing::warn!("wipe: restart instance '{}' failed: {e:#}", instance_id);
|
||||
}
|
||||
|
||||
return match result {
|
||||
Ok(Ok(wr)) => {
|
||||
let wipe_type_str = format!("{:?}", wr.wipe_type).to_lowercase();
|
||||
json!({
|
||||
"status": "success",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"wipe_type": wipe_type_str,
|
||||
"deleted_count": wr.deleted_count,
|
||||
})
|
||||
}
|
||||
Ok(Err(e)) => json!({
|
||||
"status": "error",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": "wipe",
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("internal error: {e}"),
|
||||
}),
|
||||
};
|
||||
}
|
||||
other => {
|
||||
return json!({
|
||||
"status": "error",
|
||||
"message": format!("unknown func '{other}' (supported: start, stop, restart, status, rcon, steam_update, wipe)"),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
match outcome {
|
||||
Ok(result) => json!({
|
||||
"status": "success",
|
||||
"func": func,
|
||||
"instance_id": sup.instance_id(),
|
||||
"result": result,
|
||||
"state": sup.state(),
|
||||
}),
|
||||
Err(e) => json!({
|
||||
"status": "error",
|
||||
"func": func,
|
||||
"instance_id": sup.instance_id(),
|
||||
"message": format!("{e:#}"),
|
||||
}),
|
||||
}
|
||||
}
|
||||
21
corrosion-host-agent/src/lib.rs
Normal file
21
corrosion-host-agent/src/lib.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
//! Corrosion Host Agent library surface — modules are public so integration
|
||||
//! tests can drive subsystems (notably the process supervisor) directly.
|
||||
|
||||
pub mod agent;
|
||||
pub mod bus;
|
||||
pub mod config;
|
||||
pub mod docker_compose;
|
||||
pub mod filemanager;
|
||||
pub mod hostcmd;
|
||||
pub mod instancecmd;
|
||||
pub mod prober;
|
||||
pub mod process;
|
||||
pub mod rcon;
|
||||
pub mod service;
|
||||
pub mod steamcmd;
|
||||
pub mod subjects;
|
||||
pub mod supervisor;
|
||||
pub mod telemetry;
|
||||
pub mod update;
|
||||
pub mod version;
|
||||
pub mod wipe;
|
||||
220
corrosion-host-agent/src/main.rs
Normal file
220
corrosion-host-agent/src/main.rs
Normal file
@@ -0,0 +1,220 @@
|
||||
//! Corrosion Host Agent — multi-game ops runtime.
|
||||
//!
|
||||
//! Phase 0: NATS connectivity, real host telemetry, multi-instance config,
|
||||
//! connectivity prober, host command channel. Process control, file ops, and
|
||||
//! game adapters arrive in Phase 1+ (see PROTOCOL.md).
|
||||
|
||||
use corrosion_host_agent::{
|
||||
agent, bus, config, docker_compose, filemanager, hostcmd, instancecmd, prober, process,
|
||||
service, subjects, supervisor, telemetry, version,
|
||||
};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use clap::{Parser, Subcommand};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::agent::Agent;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "corrosion-host-agent", version = version::VERSION, about)]
|
||||
struct Cli {
|
||||
/// Path to agent.toml (default: /etc/corrosion/agent.toml on Linux,
|
||||
/// C:\ProgramData\Corrosion\agent.toml on Windows)
|
||||
#[arg(long, short = 'c')]
|
||||
config: Option<PathBuf>,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Option<Command>,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Command {
|
||||
/// Validate the config file and exit.
|
||||
Check,
|
||||
/// Print full version (semver, git hash, build timestamp) and exit.
|
||||
Version,
|
||||
/// Install as a systemd service and start it (Linux; requires root).
|
||||
Install,
|
||||
/// Stop and remove the systemd service (Linux; requires root).
|
||||
Uninstall,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
let config_path = cli.config.unwrap_or_else(config::default_config_path);
|
||||
|
||||
match cli.command {
|
||||
Some(Command::Version) => {
|
||||
println!("corrosion-host-agent {}", version::long());
|
||||
Ok(())
|
||||
}
|
||||
Some(Command::Check) => {
|
||||
let settings = config::load(&config_path)?;
|
||||
println!(
|
||||
"config ok: license {}, {} instance(s), nats {}",
|
||||
settings.license_id,
|
||||
settings.instances.len(),
|
||||
settings.nats_url
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
Some(Command::Install) => service::install(&config_path),
|
||||
Some(Command::Uninstall) => service::uninstall(),
|
||||
None => {
|
||||
let settings = config::load(&config_path)?;
|
||||
init_logging(&settings.log_level);
|
||||
tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.context("building tokio runtime")?
|
||||
.block_on(run(settings))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn init_logging(level: &str) {
|
||||
let filter = tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(level));
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(filter)
|
||||
.with_target(false)
|
||||
.init();
|
||||
}
|
||||
|
||||
async fn run(settings: config::Settings) -> Result<()> {
|
||||
tracing::info!(
|
||||
"corrosion-host-agent {} starting: license {}, {} instance(s)",
|
||||
version::long(),
|
||||
settings.license_id,
|
||||
settings.instances.len()
|
||||
);
|
||||
for inst in &settings.instances {
|
||||
tracing::info!(" instance '{}' ({}) at {}", inst.id, inst.game, inst.root.display());
|
||||
}
|
||||
|
||||
let nats = bus::connect(&settings).await?;
|
||||
|
||||
// Per-game supervisor factory: container-managed games (Dune) get a
|
||||
// docker-compose supervisor; everything else is a spawned-process
|
||||
// supervisor. Both satisfy the `Supervisor` trait, so the rest of the agent
|
||||
// is game-agnostic.
|
||||
let supervisors: std::collections::HashMap<String, Arc<dyn supervisor::Supervisor>> = settings
|
||||
.instances
|
||||
.iter()
|
||||
.map(|inst| {
|
||||
let sup: Arc<dyn supervisor::Supervisor> = match inst.game.as_str() {
|
||||
"dune" => docker_compose::DockerComposeSupervisor::new(inst),
|
||||
_ => process::ProcessSupervisor::new(inst),
|
||||
};
|
||||
(inst.id.clone(), sup)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let agent = Arc::new(Agent {
|
||||
cfg: settings,
|
||||
nats,
|
||||
started: Instant::now(),
|
||||
last_probe: RwLock::new(None),
|
||||
supervisors,
|
||||
shutdown: CancellationToken::new(),
|
||||
});
|
||||
|
||||
let mut handles = Vec::new();
|
||||
handles.push(tokio::spawn(telemetry::run(agent.clone())));
|
||||
handles.push(tokio::spawn(prober::run_loop(agent.clone())));
|
||||
{
|
||||
let agent = agent.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
if let Err(e) = hostcmd::run(agent).await {
|
||||
tracing::error!("host command handler failed: {e:#}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
for (instance_id, sup) in &agent.supervisors {
|
||||
{
|
||||
let agent = agent.clone();
|
||||
let sup = sup.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
if let Err(e) = instancecmd::run(agent, sup).await {
|
||||
tracing::error!("instance command handler failed: {e:#}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
handles.push(tokio::spawn(instancecmd::publish_state_changes(
|
||||
agent.clone(),
|
||||
sup.clone(),
|
||||
)));
|
||||
// File manager: one handler task per instance, jailed to root.
|
||||
{
|
||||
let agent = agent.clone();
|
||||
let inst_cfg = agent
|
||||
.cfg
|
||||
.instances
|
||||
.iter()
|
||||
.find(|i| &i.id == instance_id)
|
||||
.cloned();
|
||||
if let Some(cfg) = inst_cfg {
|
||||
let id = instance_id.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
if let Err(e) = filemanager::run(agent, id, cfg.root).await {
|
||||
tracing::error!("file manager handler failed: {e:#}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wait_for_shutdown_signal().await;
|
||||
tracing::info!("shutdown signal received");
|
||||
agent.shutdown.cancel();
|
||||
|
||||
// Best-effort offline beacon so the panel flips to offline immediately
|
||||
// instead of waiting out the heartbeat staleness window.
|
||||
let beacon = subjects::host_going_offline(&agent.cfg.license_id);
|
||||
let _ = tokio::time::timeout(
|
||||
Duration::from_millis(500),
|
||||
agent.nats.publish(beacon, "{}".into()),
|
||||
)
|
||||
.await;
|
||||
|
||||
match tokio::time::timeout(
|
||||
Duration::from_secs(10),
|
||||
futures::future::join_all(handles),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => tracing::info!("all subsystems stopped cleanly"),
|
||||
Err(_) => tracing::warn!("shutdown timeout: some subsystems did not stop within 10s"),
|
||||
}
|
||||
|
||||
let _ = agent.nats.flush().await;
|
||||
tracing::info!("corrosion-host-agent stopped");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn wait_for_shutdown_signal() {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use tokio::signal::unix::{signal, SignalKind};
|
||||
let mut sigterm = match signal(SignalKind::terminate()) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::error!("SIGTERM handler failed: {e}; falling back to ctrl-c only");
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
tokio::select! {
|
||||
_ = tokio::signal::ctrl_c() => {}
|
||||
_ = sigterm.recv() => {}
|
||||
}
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
}
|
||||
}
|
||||
121
corrosion-host-agent/src/prober.rs
Normal file
121
corrosion-host-agent/src/prober.rs
Normal file
@@ -0,0 +1,121 @@
|
||||
//! Connectivity prober.
|
||||
//!
|
||||
//! Answers "is it the box or is it the network?" before a support ticket gets
|
||||
//! written. Phase 0 scope is OUTBOUND reachability: TCP connect timing from
|
||||
//! the host to known endpoints. Inbound port-forward verification (the thing
|
||||
//! panel users actually struggle with) requires a backend-side reverse probe
|
||||
//! and is specified in PROTOCOL.md as a later phase.
|
||||
|
||||
use chrono::{SecondsFormat, Utc};
|
||||
use serde::Serialize;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::net::TcpStream;
|
||||
|
||||
use crate::agent::Agent;
|
||||
use crate::config::ProbeTargetConfig;
|
||||
|
||||
const CONNECT_TIMEOUT: Duration = Duration::from_secs(3);
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ProbeResult {
|
||||
pub name: String,
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
pub ok: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub latency_ms: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ProbeReport {
|
||||
pub timestamp: String,
|
||||
pub results: Vec<ProbeResult>,
|
||||
}
|
||||
|
||||
/// Built-in targets every agent checks, before config extras.
|
||||
fn default_targets() -> Vec<ProbeTargetConfig> {
|
||||
vec![ProbeTargetConfig {
|
||||
name: "corrosion-cdn".to_string(),
|
||||
host: "cdn.corrosionmgmt.com".to_string(),
|
||||
port: 443,
|
||||
}]
|
||||
}
|
||||
|
||||
pub async fn run_probe(extra_targets: &[ProbeTargetConfig]) -> ProbeReport {
|
||||
let mut targets = default_targets();
|
||||
targets.extend(extra_targets.iter().cloned());
|
||||
|
||||
let checks = targets.into_iter().map(|t| async move {
|
||||
let started = Instant::now();
|
||||
let addr = format!("{}:{}", t.host, t.port);
|
||||
let outcome = tokio::time::timeout(CONNECT_TIMEOUT, TcpStream::connect(&addr)).await;
|
||||
match outcome {
|
||||
Ok(Ok(_stream)) => ProbeResult {
|
||||
name: t.name,
|
||||
host: t.host,
|
||||
port: t.port,
|
||||
ok: true,
|
||||
latency_ms: Some(started.elapsed().as_millis() as u64),
|
||||
error: None,
|
||||
},
|
||||
Ok(Err(e)) => ProbeResult {
|
||||
name: t.name,
|
||||
host: t.host,
|
||||
port: t.port,
|
||||
ok: false,
|
||||
latency_ms: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
Err(_) => ProbeResult {
|
||||
name: t.name,
|
||||
host: t.host,
|
||||
port: t.port,
|
||||
ok: false,
|
||||
latency_ms: None,
|
||||
error: Some(format!("timeout after {}s", CONNECT_TIMEOUT.as_secs())),
|
||||
},
|
||||
}
|
||||
});
|
||||
|
||||
let results = futures::future::join_all(checks).await;
|
||||
|
||||
ProbeReport {
|
||||
timestamp: Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
results,
|
||||
}
|
||||
}
|
||||
|
||||
/// Periodic probe loop; results land in shared state and ride the next
|
||||
/// heartbeat. Jittered interval to avoid fleet-wide synchronization.
|
||||
pub async fn run_loop(agent: Arc<Agent>) {
|
||||
let cancel = agent.shutdown.clone();
|
||||
loop {
|
||||
let report = run_probe(&agent.cfg.probe_targets).await;
|
||||
let failed: Vec<&str> = report
|
||||
.results
|
||||
.iter()
|
||||
.filter(|r| !r.ok)
|
||||
.map(|r| r.name.as_str())
|
||||
.collect();
|
||||
if failed.is_empty() {
|
||||
tracing::debug!("probe ok ({} targets)", report.results.len());
|
||||
} else {
|
||||
tracing::warn!("probe failures: {}", failed.join(", "));
|
||||
}
|
||||
*agent.last_probe.write().await = Some(report);
|
||||
|
||||
let jitter = rand::Rng::gen_range(&mut rand::thread_rng(), 0.8..1.2);
|
||||
let interval =
|
||||
Duration::from_secs_f64(agent.cfg.probe_interval_seconds as f64 * jitter);
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(interval) => {}
|
||||
_ = cancel.cancelled() => {
|
||||
tracing::info!("prober stopping");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
262
corrosion-host-agent/src/process.rs
Normal file
262
corrosion-host-agent/src/process.rs
Normal file
@@ -0,0 +1,262 @@
|
||||
//! Per-instance game-server process supervision.
|
||||
//!
|
||||
//! One `ProcessSupervisor` per process-managed instance (Rust/Conan/Soulmask).
|
||||
//! Lifecycle mirrors the proven Go agent behavior — graceful SIGTERM with a 30s
|
||||
//! budget before force kill, a monitor task that reaps the child and records
|
||||
//! crash-vs-stop — with two fixes the Go version needed: args are a proper list
|
||||
//! (no naive space splitting), and every state change is observable through a
|
||||
//! watch channel so the panel gets push events instead of waiting for the next
|
||||
//! heartbeat. Lifecycle control is exposed through the [`Supervisor`] trait so
|
||||
//! the command dispatch is identical across process- and container-managed
|
||||
//! games.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::process::{Child, Command};
|
||||
use tokio::sync::{watch, Mutex};
|
||||
|
||||
use crate::config::InstanceConfig;
|
||||
use crate::supervisor::{InstanceState, Supervisor};
|
||||
|
||||
const GRACEFUL_STOP_BUDGET: Duration = Duration::from_secs(30);
|
||||
const RESTART_PAUSE: Duration = Duration::from_secs(2);
|
||||
|
||||
struct Inner {
|
||||
child: Option<Child>,
|
||||
started_at: Option<Instant>,
|
||||
/// True while a stop was requested — the monitor uses it to distinguish
|
||||
/// an ordered shutdown from a crash.
|
||||
stop_requested: bool,
|
||||
}
|
||||
|
||||
pub struct ProcessSupervisor {
|
||||
instance_id: String,
|
||||
executable: Option<PathBuf>,
|
||||
args: Vec<String>,
|
||||
working_dir: Option<PathBuf>,
|
||||
inner: Mutex<Inner>,
|
||||
state_tx: watch::Sender<InstanceState>,
|
||||
}
|
||||
|
||||
impl ProcessSupervisor {
|
||||
pub fn new(cfg: &InstanceConfig) -> Arc<Self> {
|
||||
let executable = cfg.resolved_executable();
|
||||
let initial = if executable.is_some() {
|
||||
InstanceState::Stopped
|
||||
} else {
|
||||
InstanceState::Unmanaged
|
||||
};
|
||||
let (state_tx, _) = watch::channel(initial);
|
||||
Arc::new(Self {
|
||||
instance_id: cfg.id.clone(),
|
||||
executable,
|
||||
args: cfg.args.clone(),
|
||||
working_dir: cfg.working_dir.clone(),
|
||||
inner: Mutex::new(Inner {
|
||||
child: None,
|
||||
started_at: None,
|
||||
stop_requested: false,
|
||||
}),
|
||||
state_tx,
|
||||
})
|
||||
}
|
||||
|
||||
async fn monitor(self: Arc<Self>) {
|
||||
// Take a waiter without holding the lock across the whole child
|
||||
// lifetime: Child::wait needs &mut, so the child stays in inner and
|
||||
// we poll it.
|
||||
loop {
|
||||
let status = {
|
||||
let mut inner = self.inner.lock().await;
|
||||
let Some(child) = inner.child.as_mut() else { return };
|
||||
match child.try_wait() {
|
||||
Ok(Some(status)) => Some(status),
|
||||
Ok(None) => None,
|
||||
Err(e) => {
|
||||
tracing::error!("instance '{}' wait failed: {e}", self.instance_id);
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match status {
|
||||
Some(status) => {
|
||||
let mut inner = self.inner.lock().await;
|
||||
inner.child = None;
|
||||
inner.started_at = None;
|
||||
let ordered = inner.stop_requested;
|
||||
inner.stop_requested = false;
|
||||
drop(inner);
|
||||
|
||||
if ordered {
|
||||
self.set_state(InstanceState::Stopped);
|
||||
tracing::info!("instance '{}' stopped ({status})", self.instance_id);
|
||||
} else {
|
||||
let exit_code = status.code();
|
||||
self.set_state(InstanceState::Crashed { exit_code });
|
||||
tracing::warn!(
|
||||
"instance '{}' exited unexpectedly ({status}) — marked crashed",
|
||||
self.instance_id
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
None => tokio::time::sleep(Duration::from_millis(500)).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn set_state(&self, state: InstanceState) {
|
||||
// send_replace never fails even with zero receivers.
|
||||
let _ = self.state_tx.send_replace(state);
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Supervisor for ProcessSupervisor {
|
||||
fn instance_id(&self) -> &str {
|
||||
&self.instance_id
|
||||
}
|
||||
|
||||
fn state(&self) -> InstanceState {
|
||||
self.state_tx.borrow().clone()
|
||||
}
|
||||
|
||||
fn watch_state(&self) -> watch::Receiver<InstanceState> {
|
||||
self.state_tx.subscribe()
|
||||
}
|
||||
|
||||
async fn uptime_seconds(&self) -> u64 {
|
||||
let inner = self.inner.lock().await;
|
||||
match (&*self.state_tx.borrow(), inner.started_at) {
|
||||
(InstanceState::Running, Some(t)) => t.elapsed().as_secs(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
async fn start(self: Arc<Self>) -> Result<()> {
|
||||
let Some(exe) = self.executable.clone() else {
|
||||
bail!("instance '{}' has no executable configured", self.instance_id);
|
||||
};
|
||||
if !exe.exists() {
|
||||
bail!("executable not found: {}", exe.display());
|
||||
}
|
||||
|
||||
let mut inner = self.inner.lock().await;
|
||||
if matches!(*self.state_tx.borrow(), InstanceState::Running | InstanceState::Starting) {
|
||||
bail!("instance '{}' is already running", self.instance_id);
|
||||
}
|
||||
|
||||
self.set_state(InstanceState::Starting);
|
||||
|
||||
let workdir = self
|
||||
.working_dir
|
||||
.clone()
|
||||
.or_else(|| exe.parent().map(|p| p.to_path_buf()))
|
||||
.unwrap_or_else(|| PathBuf::from("."));
|
||||
|
||||
let child = Command::new(&exe)
|
||||
.args(&self.args)
|
||||
.current_dir(&workdir)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::inherit())
|
||||
.stderr(Stdio::inherit())
|
||||
.spawn()
|
||||
.with_context(|| format!("spawning {}", exe.display()))?;
|
||||
|
||||
let pid = child.id();
|
||||
inner.child = Some(child);
|
||||
inner.started_at = Some(Instant::now());
|
||||
inner.stop_requested = false;
|
||||
drop(inner);
|
||||
|
||||
self.set_state(InstanceState::Running);
|
||||
tracing::info!(
|
||||
"instance '{}' started: {} (pid {:?})",
|
||||
self.instance_id,
|
||||
exe.display(),
|
||||
pid
|
||||
);
|
||||
|
||||
// Monitor: reap the child and classify the exit.
|
||||
let sup = Arc::clone(&self);
|
||||
tokio::spawn(async move { sup.monitor().await });
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn stop(self: Arc<Self>) -> Result<()> {
|
||||
let mut inner = self.inner.lock().await;
|
||||
if inner.child.is_none() {
|
||||
bail!("instance '{}' is not running", self.instance_id);
|
||||
}
|
||||
inner.stop_requested = true;
|
||||
self.set_state(InstanceState::Stopping);
|
||||
let child = inner.child.as_mut().expect("checked above");
|
||||
|
||||
// Graceful first: SIGTERM on unix; Windows has no SIGTERM equivalent
|
||||
// for console processes, so it goes straight to kill there.
|
||||
#[cfg(unix)]
|
||||
if let Some(pid) = child.id() {
|
||||
unsafe {
|
||||
libc::kill(pid as i32, libc::SIGTERM);
|
||||
}
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = child.start_kill();
|
||||
}
|
||||
drop(inner);
|
||||
|
||||
// Wait for the monitor to observe the exit; force kill on budget.
|
||||
let mut rx = self.watch_state();
|
||||
let deadline = tokio::time::timeout(GRACEFUL_STOP_BUDGET, async {
|
||||
loop {
|
||||
if matches!(*rx.borrow(), InstanceState::Stopped) {
|
||||
return;
|
||||
}
|
||||
if rx.changed().await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
if deadline.is_err() {
|
||||
tracing::warn!(
|
||||
"instance '{}' ignored SIGTERM for {}s — force killing",
|
||||
self.instance_id,
|
||||
GRACEFUL_STOP_BUDGET.as_secs()
|
||||
);
|
||||
let mut inner = self.inner.lock().await;
|
||||
if let Some(child) = inner.child.as_mut() {
|
||||
let _ = child.start_kill();
|
||||
}
|
||||
drop(inner);
|
||||
|
||||
let mut rx = self.watch_state();
|
||||
let _ = tokio::time::timeout(Duration::from_secs(5), async {
|
||||
while !matches!(*rx.borrow(), InstanceState::Stopped) {
|
||||
if rx.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn restart(self: Arc<Self>) -> Result<()> {
|
||||
if !matches!(
|
||||
*self.state_tx.borrow(),
|
||||
InstanceState::Stopped | InstanceState::Crashed { .. } | InstanceState::Unmanaged
|
||||
) {
|
||||
self.clone().stop().await?;
|
||||
}
|
||||
tokio::time::sleep(RESTART_PAUSE).await;
|
||||
self.start().await
|
||||
}
|
||||
}
|
||||
320
corrosion-host-agent/src/rcon.rs
Normal file
320
corrosion-host-agent/src/rcon.rs
Normal file
@@ -0,0 +1,320 @@
|
||||
//! RCON client: game-server remote-console over WebRCON (Rust) or Source RCON (Conan/Soulmask).
|
||||
//!
|
||||
//! The agent runs co-located with the game server, so every connection targets
|
||||
//! 127.0.0.1 — no TLS is needed and latency is sub-millisecond. Two protocols
|
||||
//! are supported because the Rust game ships its own WebSocket-based WebRCON
|
||||
//! while Conan Exiles and Soulmask use the Valve Source RCON wire format over
|
||||
//! plain TCP.
|
||||
//!
|
||||
//! The protocol selection is explicit in the config (`kind`) but can be inferred
|
||||
//! from the game name when absent — callers supply the `game` field they already
|
||||
//! have in `InstanceConfig`.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use rand::Rng;
|
||||
use serde::Deserialize;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::time::{timeout, Duration};
|
||||
|
||||
/// WebRCON is the Facepunch WebSocket protocol (Rust game).
|
||||
/// Source RCON is the Valve wire protocol used by Conan Exiles and Soulmask.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum RconKind {
|
||||
WebRcon,
|
||||
Source,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct RconConfig {
|
||||
/// Protocol override. When absent the kind is resolved from `game`.
|
||||
#[serde(default)]
|
||||
pub kind: Option<RconKind>,
|
||||
pub port: u16,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
impl RconConfig {
|
||||
/// Resolve the concrete protocol, falling back to a per-game default when
|
||||
/// `kind` is not set. rust → WebRcon; conan + soulmask → Source.
|
||||
pub fn resolved_kind(&self, game: &str) -> RconKind {
|
||||
if let Some(k) = self.kind {
|
||||
return k;
|
||||
}
|
||||
match game {
|
||||
"conan" | "soulmask" => RconKind::Source,
|
||||
// rust is the primary game; anything unknown defaults to WebRcon
|
||||
// — operators can always override with an explicit `kind`.
|
||||
_ => RconKind::WebRcon,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
const RESPONSE_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Send `command` to the game server and return its text response.
|
||||
///
|
||||
/// The agent runs on the same host as the game server, so the target address
|
||||
/// is always 127.0.0.1:{port}. Connection and response deadlines are fixed at
|
||||
/// 5 s and 10 s respectively — enough headroom for a loaded server while still
|
||||
/// catching hung connections quickly.
|
||||
pub async fn send_command(cfg: &RconConfig, game: &str, command: &str) -> Result<String> {
|
||||
match cfg.resolved_kind(game) {
|
||||
RconKind::WebRcon => webrcon_exec(cfg, command).await,
|
||||
RconKind::Source => source_rcon_exec(cfg, command).await,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebRCON (Rust game) — WebSocket JSON protocol
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// WebRCON request/response envelope. The server also emits chat/log frames
|
||||
/// on this socket with Identifier == 0; those are skipped.
|
||||
#[derive(serde::Serialize)]
|
||||
struct WebRconRequest<'a> {
|
||||
#[serde(rename = "Identifier")]
|
||||
identifier: i32,
|
||||
#[serde(rename = "Message")]
|
||||
message: &'a str,
|
||||
#[serde(rename = "Name")]
|
||||
name: &'static str,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct WebRconResponse {
|
||||
#[serde(rename = "Identifier")]
|
||||
identifier: i32,
|
||||
#[serde(rename = "Message")]
|
||||
message: String,
|
||||
}
|
||||
|
||||
async fn webrcon_exec(cfg: &RconConfig, command: &str) -> Result<String> {
|
||||
use tokio_tungstenite::connect_async;
|
||||
use tokio_tungstenite::tungstenite::Message as WsMsg;
|
||||
|
||||
// The Rust game server embeds the password in the WebSocket URL path —
|
||||
// never interpolate the real URL into errors or logs.
|
||||
let url = format!("ws://127.0.0.1:{}/{}", cfg.port, cfg.password);
|
||||
let redacted = format!("ws://127.0.0.1:{}/<redacted>", cfg.port);
|
||||
|
||||
// Wrap the entire connection + exchange in the connect timeout — we want
|
||||
// the timeout to cover TCP handshake + WS upgrade, not just the send.
|
||||
let (mut ws, _) = timeout(CONNECT_TIMEOUT, connect_async(&url))
|
||||
.await
|
||||
.context("connect timeout")?
|
||||
.with_context(|| format!("WebRCON connect to {redacted}"))?;
|
||||
|
||||
// Use a random positive i32 so correlation is unambiguous even when
|
||||
// multiple callers share a port (future concurrency).
|
||||
let id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
let req = WebRconRequest { identifier: id, message: command, name: "Corrosion" };
|
||||
let payload = serde_json::to_string(&req).context("serialize WebRCON request")?;
|
||||
|
||||
ws.send(WsMsg::Text(payload))
|
||||
.await
|
||||
.context("send WebRCON command")?;
|
||||
|
||||
tracing::debug!("WebRCON sent id={id} command={command:?}");
|
||||
|
||||
// Read frames until we see our Identifier — skip chat/log noise (id 0 or
|
||||
// any other value that isn't ours).
|
||||
let result = timeout(RESPONSE_TIMEOUT, async {
|
||||
loop {
|
||||
match ws.next().await {
|
||||
Some(Ok(WsMsg::Text(text))) => {
|
||||
match serde_json::from_str::<WebRconResponse>(&text) {
|
||||
Ok(resp) if resp.identifier == id => return Ok(resp.message),
|
||||
Ok(_) => {
|
||||
// Not our response (chat, log, another caller's frame).
|
||||
tracing::trace!("WebRCON skipping frame with different Identifier");
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::trace!("WebRCON non-JSON frame ignored: {e}");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(Ok(WsMsg::Close(_))) => bail!("WebRCON server closed connection"),
|
||||
Some(Ok(_)) => continue, // binary/ping/pong — skip
|
||||
Some(Err(e)) => return Err(anyhow::anyhow!(e).context("WebRCON read error")),
|
||||
None => bail!("WebRCON stream ended without response"),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.context("WebRCON response timeout")??;
|
||||
|
||||
// Close cleanly; a send error here is cosmetic — we already have our data.
|
||||
let _ = ws.close(None).await;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Source RCON (Conan Exiles, Soulmask) — Valve TCP binary protocol
|
||||
//
|
||||
// Packet layout (all fields little-endian):
|
||||
// i32 size — byte count of the remaining packet (id + type + body + 2 nulls)
|
||||
// i32 id — caller-chosen correlation id; auth failure returns -1
|
||||
// i32 type — 0=RESPONSE_VALUE, 2=EXECCOMMAND/AUTH_RESPONSE, 3=AUTH
|
||||
// [u8] body — UTF-8 command or response text
|
||||
// u8 0x00 — body null terminator
|
||||
// u8 0x00 — padding null terminator
|
||||
//
|
||||
// Multi-packet handling: after sending the command we also send an empty
|
||||
// RESPONSE_VALUE probe with a distinct id. We collect all RESPONSE_VALUE
|
||||
// packets belonging to the command id and stop when we receive the probe's
|
||||
// response. This is the standard technique specified in the Valve wiki.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const RCON_TYPE_AUTH: i32 = 3;
|
||||
const RCON_TYPE_AUTH_RESPONSE: i32 = 2;
|
||||
const RCON_TYPE_EXECCOMMAND: i32 = 2;
|
||||
const RCON_TYPE_RESPONSE_VALUE: i32 = 0;
|
||||
|
||||
/// Maximum accumulated response body (guards against misbehaving servers).
|
||||
const MAX_RESPONSE_BYTES: usize = 1024 * 1024; // 1 MiB
|
||||
|
||||
async fn source_rcon_exec(cfg: &RconConfig, command: &str) -> Result<String> {
|
||||
let addr = format!("127.0.0.1:{}", cfg.port);
|
||||
|
||||
let stream = timeout(CONNECT_TIMEOUT, TcpStream::connect(&addr))
|
||||
.await
|
||||
.context("connect timeout")?
|
||||
.with_context(|| format!("Source RCON connect to {addr}"))?;
|
||||
|
||||
let mut stream = stream;
|
||||
|
||||
// --- Auth ---
|
||||
let auth_id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
send_packet(&mut stream, auth_id, RCON_TYPE_AUTH, cfg.password.as_bytes()).await?;
|
||||
|
||||
// The server sends two responses to AUTH: first an empty RESPONSE_VALUE,
|
||||
// then an AUTH_RESPONSE. We skip the first and read until AUTH_RESPONSE.
|
||||
timeout(RESPONSE_TIMEOUT, async {
|
||||
loop {
|
||||
let (id, ptype, _body) = recv_packet(&mut stream).await?;
|
||||
if ptype == RCON_TYPE_AUTH_RESPONSE {
|
||||
if id == -1 {
|
||||
bail!("Source RCON auth failed: wrong password");
|
||||
}
|
||||
tracing::debug!("Source RCON authenticated (id={id})");
|
||||
return Ok(());
|
||||
}
|
||||
// Skip the empty RESPONSE_VALUE that precedes AUTH_RESPONSE.
|
||||
}
|
||||
#[allow(unreachable_code)]
|
||||
Ok::<(), anyhow::Error>(())
|
||||
})
|
||||
.await
|
||||
.context("Source RCON auth timeout")??;
|
||||
|
||||
// --- Command ---
|
||||
let cmd_id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
// Probe id must differ from cmd_id.
|
||||
let probe_id: i32 = loop {
|
||||
let id: i32 = rand::thread_rng().gen_range(1..=i32::MAX);
|
||||
if id != cmd_id {
|
||||
break id;
|
||||
}
|
||||
};
|
||||
|
||||
send_packet(&mut stream, cmd_id, RCON_TYPE_EXECCOMMAND, command.as_bytes()).await?;
|
||||
// Empty RESPONSE_VALUE probe — the server echoes it after processing the
|
||||
// preceding command, signalling end-of-response.
|
||||
send_packet(&mut stream, probe_id, RCON_TYPE_RESPONSE_VALUE, b"").await?;
|
||||
|
||||
// Not every server is probe-conformant (Soulmask unverified): once we hold
|
||||
// response data, a short per-read quiet period also terminates — never
|
||||
// discard a response we already received just because the probe echo
|
||||
// didn't come back.
|
||||
const QUIET_PERIOD: Duration = Duration::from_millis(1500);
|
||||
let response = timeout(RESPONSE_TIMEOUT, async {
|
||||
let mut body_accum: Vec<u8> = Vec::new();
|
||||
loop {
|
||||
let next = if body_accum.is_empty() {
|
||||
recv_packet(&mut stream).await.map(Some)
|
||||
} else {
|
||||
match timeout(QUIET_PERIOD, recv_packet(&mut stream)).await {
|
||||
Ok(res) => res.map(Some),
|
||||
Err(_elapsed) => Ok(None), // quiet after data — done
|
||||
}
|
||||
};
|
||||
let Some((id, ptype, body)) = next? else {
|
||||
break;
|
||||
};
|
||||
if ptype != RCON_TYPE_RESPONSE_VALUE {
|
||||
continue; // unexpected packet type — skip
|
||||
}
|
||||
if id == probe_id {
|
||||
// Probe echoed back — all command response packets have arrived.
|
||||
break;
|
||||
}
|
||||
if id == cmd_id {
|
||||
if body_accum.len() + body.len() > MAX_RESPONSE_BYTES {
|
||||
bail!("Source RCON response exceeded {MAX_RESPONSE_BYTES} bytes");
|
||||
}
|
||||
body_accum.extend_from_slice(&body);
|
||||
}
|
||||
// Skip packets with other ids (shouldn't happen but be defensive).
|
||||
}
|
||||
Ok::<Vec<u8>, anyhow::Error>(body_accum)
|
||||
})
|
||||
.await
|
||||
.context("Source RCON response timeout")??;
|
||||
|
||||
String::from_utf8(response).context("Source RCON response is not valid UTF-8")
|
||||
}
|
||||
|
||||
/// Write a Source RCON packet to the stream.
|
||||
async fn send_packet(stream: &mut TcpStream, id: i32, ptype: i32, body: &[u8]) -> Result<()> {
|
||||
// size = id(4) + type(4) + body(n) + 2 null terminators
|
||||
let size = (4 + 4 + body.len() + 2) as i32;
|
||||
let mut buf: Vec<u8> = Vec::with_capacity(4 + size as usize);
|
||||
buf.extend_from_slice(&size.to_le_bytes());
|
||||
buf.extend_from_slice(&id.to_le_bytes());
|
||||
buf.extend_from_slice(&ptype.to_le_bytes());
|
||||
buf.extend_from_slice(body);
|
||||
buf.push(0x00);
|
||||
buf.push(0x00);
|
||||
stream.write_all(&buf).await.context("Source RCON write")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read one Source RCON packet; returns (id, type, body).
|
||||
async fn recv_packet(stream: &mut TcpStream) -> Result<(i32, i32, Vec<u8>)> {
|
||||
let mut size_buf = [0u8; 4];
|
||||
stream
|
||||
.read_exact(&mut size_buf)
|
||||
.await
|
||||
.context("Source RCON read size")?;
|
||||
let size = i32::from_le_bytes(size_buf) as usize;
|
||||
|
||||
// Minimum packet: id(4) + type(4) + 2 null terminators = 10 bytes.
|
||||
if size < 10 {
|
||||
bail!("Source RCON: malformed packet (size={size})");
|
||||
}
|
||||
if size > MAX_RESPONSE_BYTES + 16 {
|
||||
bail!("Source RCON: packet too large ({size} bytes)");
|
||||
}
|
||||
|
||||
let mut payload = vec![0u8; size];
|
||||
stream
|
||||
.read_exact(&mut payload)
|
||||
.await
|
||||
.context("Source RCON read payload")?;
|
||||
|
||||
let id = i32::from_le_bytes(payload[0..4].try_into().unwrap());
|
||||
let ptype = i32::from_le_bytes(payload[4..8].try_into().unwrap());
|
||||
// Body is everything between the two fields and the two trailing nulls.
|
||||
let body_end = size.saturating_sub(2); // strip 2 null terminators
|
||||
let body = payload[8..body_end].to_vec();
|
||||
|
||||
Ok((id, ptype, body))
|
||||
}
|
||||
129
corrosion-host-agent/src/service.rs
Normal file
129
corrosion-host-agent/src/service.rs
Normal file
@@ -0,0 +1,129 @@
|
||||
//! systemd service installation for the host agent (Linux).
|
||||
//!
|
||||
//! `corrosion-host-agent install` writes a systemd unit pointing at the current
|
||||
//! binary + config, reloads systemd, and enables + starts the service.
|
||||
//! `uninstall` reverses it. Windows SCM support is a follow-up; on non-Linux
|
||||
//! these return a clear "Linux only" error rather than silently doing nothing.
|
||||
//!
|
||||
//! The agent already handles SIGTERM (see main::wait_for_shutdown_signal), so a
|
||||
//! plain `Type=simple` unit gives systemd clean start/stop semantics.
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use std::path::Path;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
use anyhow::Context;
|
||||
|
||||
pub const SERVICE_NAME: &str = "corrosion-host-agent";
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
const UNIT_PATH: &str = "/etc/systemd/system/corrosion-host-agent.service";
|
||||
|
||||
/// Render the systemd unit. Pure (no I/O) so it is unit-testable.
|
||||
pub fn unit_file_contents(exec_path: &str, config_path: &str) -> String {
|
||||
format!(
|
||||
"[Unit]\n\
|
||||
Description=Corrosion Host Agent (multi-game ops runtime)\n\
|
||||
Documentation=https://corrosionmgmt.com\n\
|
||||
After=network-online.target\n\
|
||||
Wants=network-online.target\n\
|
||||
\n\
|
||||
[Service]\n\
|
||||
Type=simple\n\
|
||||
ExecStart={exec} --config {cfg}\n\
|
||||
Restart=on-failure\n\
|
||||
RestartSec=5\n\
|
||||
# The agent supervises game-server processes and their files, so it\n\
|
||||
# needs broad filesystem access and runs as root by default.\n\
|
||||
User=root\n\
|
||||
\n\
|
||||
[Install]\n\
|
||||
WantedBy=multi-user.target\n",
|
||||
exec = exec_path,
|
||||
cfg = config_path,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn install(config_path: &Path) -> Result<()> {
|
||||
let exec = std::env::current_exe().context("resolving current executable path")?;
|
||||
let exec_str = exec.to_string_lossy();
|
||||
let cfg_str = config_path.to_string_lossy();
|
||||
|
||||
let unit = unit_file_contents(&exec_str, &cfg_str);
|
||||
std::fs::write(UNIT_PATH, unit)
|
||||
.with_context(|| format!("writing {UNIT_PATH} (are you root?)"))?;
|
||||
println!("wrote {UNIT_PATH}");
|
||||
|
||||
run("systemctl", &["daemon-reload"])?;
|
||||
run("systemctl", &["enable", "--now", SERVICE_NAME])?;
|
||||
|
||||
println!(
|
||||
"service '{SERVICE_NAME}' installed and started.\n \
|
||||
status: systemctl status {SERVICE_NAME}\n \
|
||||
logs: journalctl -u {SERVICE_NAME} -f"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn uninstall() -> Result<()> {
|
||||
// Best-effort stop+disable; don't fail if it isn't currently active.
|
||||
let _ = std::process::Command::new("systemctl")
|
||||
.args(["disable", "--now", SERVICE_NAME])
|
||||
.status();
|
||||
|
||||
if Path::new(UNIT_PATH).exists() {
|
||||
std::fs::remove_file(UNIT_PATH)
|
||||
.with_context(|| format!("removing {UNIT_PATH} (are you root?)"))?;
|
||||
println!("removed {UNIT_PATH}");
|
||||
}
|
||||
run("systemctl", &["daemon-reload"])?;
|
||||
println!("service '{SERVICE_NAME}' uninstalled.");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn run(cmd: &str, args: &[&str]) -> Result<()> {
|
||||
let status = std::process::Command::new(cmd)
|
||||
.args(args)
|
||||
.status()
|
||||
.with_context(|| format!("running {cmd} {}", args.join(" ")))?;
|
||||
if !status.success() {
|
||||
bail!("{cmd} {} failed with {status}", args.join(" "));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
pub fn install(_config_path: &Path) -> Result<()> {
|
||||
bail!(
|
||||
"`install` is only supported on Linux (systemd). Windows SCM support is \
|
||||
coming; for now run the agent directly or via your platform's service manager."
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
pub fn uninstall() -> Result<()> {
|
||||
bail!("`uninstall` is only supported on Linux (systemd).");
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn unit_contains_exec_config_and_install_target() {
|
||||
let u = unit_file_contents(
|
||||
"/usr/local/bin/corrosion-host-agent",
|
||||
"/etc/corrosion/agent.toml",
|
||||
);
|
||||
assert!(u.contains(
|
||||
"ExecStart=/usr/local/bin/corrosion-host-agent --config /etc/corrosion/agent.toml"
|
||||
));
|
||||
assert!(u.contains("Type=simple"));
|
||||
assert!(u.contains("Restart=on-failure"));
|
||||
assert!(u.contains("WantedBy=multi-user.target"));
|
||||
assert!(u.contains("After=network-online.target"));
|
||||
}
|
||||
}
|
||||
126
corrosion-host-agent/src/steamcmd.rs
Normal file
126
corrosion-host-agent/src/steamcmd.rs
Normal file
@@ -0,0 +1,126 @@
|
||||
//! SteamCMD update integration for process-managed game instances.
|
||||
//!
|
||||
//! Wraps the `steamcmd` binary to perform an `+app_update` for a given game
|
||||
//! instance, streaming stdout lines to a caller-supplied progress callback so
|
||||
//! the panel can display live update output. The agent already runs a task per
|
||||
//! command in a separate `tokio::spawn`, so the blocking-until-done semantics
|
||||
//! here are intentional — the NATS reply is sent only when SteamCMD exits.
|
||||
//!
|
||||
//! Dune is Docker-image-based and explicitly has no SteamCMD integration — any
|
||||
//! attempt to invoke `update` on a Dune instance returns a clear error rather
|
||||
//! than a silent no-op.
|
||||
|
||||
use std::path::Path;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
|
||||
/// Return the Steam app ID for a given game name, or `None` for Dune (Docker).
|
||||
///
|
||||
/// Soulmask returns the Windows or Linux server app ID depending on the compile
|
||||
/// target so this function is `#[cfg]`-gated at the platform level.
|
||||
pub fn app_id_for_game(game: &str) -> Option<u32> {
|
||||
match game {
|
||||
"rust" => Some(258550),
|
||||
"conan" => Some(443030),
|
||||
"soulmask" => {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
Some(3017310)
|
||||
}
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
Some(3017300)
|
||||
}
|
||||
}
|
||||
// Dune uses Docker images — SteamCMD has no role here.
|
||||
"dune" => None,
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration controlling SteamCMD behaviour for one instance.
|
||||
/// Serialised as `[instance.steamcmd]` in agent.toml.
|
||||
#[derive(Debug, Clone, serde::Deserialize, Default)]
|
||||
pub struct SteamcmdConfig {
|
||||
/// Absolute or relative path to the `steamcmd` binary.
|
||||
/// Defaults to `"steamcmd"` (resolved via `PATH`) when absent.
|
||||
#[serde(default)]
|
||||
pub steamcmd_path: Option<std::path::PathBuf>,
|
||||
|
||||
/// Whether to pass `validate` to `+app_update`. Adds a file-hash check
|
||||
/// pass that catches corruption at the cost of a longer update time.
|
||||
#[serde(default)]
|
||||
pub validate: bool,
|
||||
}
|
||||
|
||||
/// Run a SteamCMD update for `game` into `install_dir`.
|
||||
///
|
||||
/// - `steamcmd_path`: path to the binary (or `"steamcmd"` to use PATH).
|
||||
/// - `validate`: appends `validate` to the `+app_update` call.
|
||||
/// - `on_progress`: receives each stdout line as it arrives so callers can
|
||||
/// forward progress to the panel in real time.
|
||||
///
|
||||
/// Returns `Ok(())` on a zero exit code, otherwise an error describing the
|
||||
/// failure. Dune is rejected before any process is spawned.
|
||||
pub async fn update(
|
||||
game: &str,
|
||||
install_dir: &Path,
|
||||
steamcmd_path: &str,
|
||||
validate: bool,
|
||||
on_progress: impl Fn(&str),
|
||||
) -> anyhow::Result<()> {
|
||||
use anyhow::Context;
|
||||
|
||||
let app_id = app_id_for_game(game).ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"dune uses Docker images, not SteamCMD — cannot run app_update for game '{game}'"
|
||||
)
|
||||
})?;
|
||||
|
||||
let install_dir_str = install_dir
|
||||
.to_str()
|
||||
.with_context(|| format!("install_dir '{}' is not valid UTF-8", install_dir.display()))?;
|
||||
|
||||
let mut args: Vec<String> = vec![
|
||||
"+force_install_dir".to_string(),
|
||||
install_dir_str.to_string(),
|
||||
"+login".to_string(),
|
||||
"anonymous".to_string(),
|
||||
"+app_update".to_string(),
|
||||
app_id.to_string(),
|
||||
];
|
||||
if validate {
|
||||
args.push("validate".to_string());
|
||||
}
|
||||
args.push("+quit".to_string());
|
||||
|
||||
tracing::info!(
|
||||
"steamcmd: starting update for game={game} app_id={app_id} install_dir={} validate={validate}",
|
||||
install_dir.display()
|
||||
);
|
||||
|
||||
let mut child = Command::new(steamcmd_path)
|
||||
.args(&args)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.spawn()
|
||||
.with_context(|| format!("spawning steamcmd binary '{steamcmd_path}'"))?;
|
||||
|
||||
let stdout = child.stdout.take().expect("stdout was piped");
|
||||
let mut lines = BufReader::new(stdout).lines();
|
||||
|
||||
while let Some(line) = lines.next_line().await.context("reading steamcmd stdout")? {
|
||||
tracing::debug!("steamcmd: {line}");
|
||||
on_progress(&line);
|
||||
}
|
||||
|
||||
let status = child.wait().await.context("waiting for steamcmd to exit")?;
|
||||
if status.success() {
|
||||
tracing::info!("steamcmd: update completed successfully for game={game}");
|
||||
Ok(())
|
||||
} else {
|
||||
let code = status.code().unwrap_or(-1);
|
||||
anyhow::bail!("steamcmd exited with non-zero status {code} for game={game}")
|
||||
}
|
||||
}
|
||||
|
||||
39
corrosion-host-agent/src/subjects.rs
Normal file
39
corrosion-host-agent/src/subjects.rs
Normal file
@@ -0,0 +1,39 @@
|
||||
//! Corrosion wire protocol v2 subject scheme (see PROTOCOL.md).
|
||||
//!
|
||||
//! Host-level subjects live under `corrosion.{license}.host.*`; per-instance
|
||||
//! subjects under `corrosion.{license}.{instance_id}.*`. Instance ids are
|
||||
//! validated at config load so they can never collide with the reserved
|
||||
//! `host` segment or contain subject metacharacters.
|
||||
|
||||
pub fn host_heartbeat(license: &str) -> String {
|
||||
format!("corrosion.{license}.host.heartbeat")
|
||||
}
|
||||
|
||||
pub fn host_cmd(license: &str) -> String {
|
||||
format!("corrosion.{license}.host.cmd")
|
||||
}
|
||||
|
||||
pub fn host_going_offline(license: &str) -> String {
|
||||
format!("corrosion.{license}.host.going_offline")
|
||||
}
|
||||
|
||||
/// Per-instance command channel (start/stop/restart/status; rcon et al. to come).
|
||||
pub fn instance_cmd(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.cmd")
|
||||
}
|
||||
|
||||
/// Per-instance state-change events.
|
||||
pub fn instance_status(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.status")
|
||||
}
|
||||
|
||||
/// Per-instance SteamCMD progress stream. Lines from `steamcmd` stdout are
|
||||
/// published here so the panel can display live update output.
|
||||
pub fn instance_steam_status(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.steam_status")
|
||||
}
|
||||
|
||||
/// Per-instance file manager command channel (request-reply).
|
||||
pub fn instance_files_cmd(license: &str, instance: &str) -> String {
|
||||
format!("corrosion.{license}.{instance}.files.cmd")
|
||||
}
|
||||
80
corrosion-host-agent/src/supervisor.rs
Normal file
80
corrosion-host-agent/src/supervisor.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
//! The supervision abstraction.
|
||||
//!
|
||||
//! A `Supervisor` owns the lifecycle of one game instance. Different games are
|
||||
//! managed in fundamentally different ways — Rust/Conan/Soulmask are spawned OS
|
||||
//! processes ([`crate::process::ProcessSupervisor`]); Dune is a docker-compose
|
||||
//! stack ([`crate::docker_compose::DockerComposeSupervisor`]); future planes
|
||||
//! (kubectl, AMP/podman, SSH) will be their own impls. The instance command
|
||||
//! dispatch (`instancecmd::dispatch`) talks only to this trait, so it never
|
||||
//! learns which management model is behind a given instance.
|
||||
//!
|
||||
//! Trait objects (`Arc<dyn Supervisor>`) need object-safe, dynamically
|
||||
//! dispatchable async methods; native `async fn` in traits is not yet
|
||||
//! dyn-compatible, so we use `#[async_trait]` (the battle-tested ecosystem
|
||||
//! standard) to box the returned futures. The cost — one heap alloc per
|
||||
//! lifecycle call — is irrelevant for start/stop/restart, which happen seconds
|
||||
//! to minutes apart.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use serde::Serialize;
|
||||
use tokio::sync::watch;
|
||||
|
||||
/// Observable lifecycle state of one instance. Shared vocabulary across every
|
||||
/// supervisor impl; serialized verbatim into heartbeats and status events
|
||||
/// (`{"state":"running", ...}`).
|
||||
#[derive(Debug, Clone, PartialEq, Serialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "state")]
|
||||
pub enum InstanceState {
|
||||
/// Not lifecycle-managed (a process instance with no executable, etc.).
|
||||
Unmanaged,
|
||||
Stopped,
|
||||
Starting,
|
||||
Running,
|
||||
Stopping,
|
||||
/// Exited/died without a stop request.
|
||||
Crashed {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
exit_code: Option<i32>,
|
||||
},
|
||||
}
|
||||
|
||||
impl InstanceState {
|
||||
pub fn as_label(&self) -> &'static str {
|
||||
match self {
|
||||
InstanceState::Unmanaged => "unmanaged",
|
||||
InstanceState::Stopped => "stopped",
|
||||
InstanceState::Starting => "starting",
|
||||
InstanceState::Running => "running",
|
||||
InstanceState::Stopping => "stopping",
|
||||
InstanceState::Crashed { .. } => "crashed",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lifecycle control + state observation for one instance.
|
||||
///
|
||||
/// `start`/`stop`/`restart` take `self: Arc<Self>` so an impl can hand a clone
|
||||
/// to a spawned monitor task; callers hold an `Arc<dyn Supervisor>` and
|
||||
/// `clone()` before each call. `watch_state` exposes the same channel the
|
||||
/// status-event publisher drains, so panel push events stay decoupled from the
|
||||
/// heartbeat cadence.
|
||||
#[async_trait::async_trait]
|
||||
pub trait Supervisor: Send + Sync {
|
||||
/// The instance slug (a NATS subject segment).
|
||||
fn instance_id(&self) -> &str;
|
||||
|
||||
/// Current cached state (cheap; no I/O).
|
||||
fn state(&self) -> InstanceState;
|
||||
|
||||
/// Subscribe to state transitions.
|
||||
fn watch_state(&self) -> watch::Receiver<InstanceState>;
|
||||
|
||||
/// Seconds since the instance entered `Running` (0 otherwise).
|
||||
async fn uptime_seconds(&self) -> u64;
|
||||
|
||||
async fn start(self: Arc<Self>) -> Result<()>;
|
||||
async fn stop(self: Arc<Self>) -> Result<()>;
|
||||
async fn restart(self: Arc<Self>) -> Result<()>;
|
||||
}
|
||||
185
corrosion-host-agent/src/telemetry.rs
Normal file
185
corrosion-host-agent/src/telemetry.rs
Normal file
@@ -0,0 +1,185 @@
|
||||
//! Host heartbeat: real telemetry, never fabricated.
|
||||
//!
|
||||
//! The Go agent shipped `disk_free_mb: 50000` and `cpu_percent: 0.0` as
|
||||
//! hardcoded placeholders. This module is the first time the panel's
|
||||
//! Resources view receives the truth. Anything we cannot measure is omitted
|
||||
//! or null — never invented.
|
||||
|
||||
use chrono::{SecondsFormat, Utc};
|
||||
use rand::Rng;
|
||||
use serde::Serialize;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use sysinfo::{Disks, System};
|
||||
|
||||
use crate::agent::Agent;
|
||||
use crate::prober::ProbeReport;
|
||||
use crate::subjects;
|
||||
use crate::version;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct HeartbeatPayload {
|
||||
/// Wire schema version — lets the backend distinguish v2 host heartbeats
|
||||
/// from legacy Go companion heartbeats during any transition window.
|
||||
pub schema: u32,
|
||||
pub timestamp: String,
|
||||
pub agent: AgentInfo,
|
||||
pub host: HostInfo,
|
||||
pub instances: Vec<InstanceInfo>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub probe: Option<ProbeReport>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct AgentInfo {
|
||||
pub version: String,
|
||||
pub commit: String,
|
||||
pub os: String,
|
||||
pub arch: String,
|
||||
pub uptime_seconds: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct HostInfo {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub hostname: Option<String>,
|
||||
pub cpu_percent: f32,
|
||||
pub cpu_cores: usize,
|
||||
pub mem_total_mb: u64,
|
||||
pub mem_used_mb: u64,
|
||||
pub uptime_seconds: u64,
|
||||
pub disks: Vec<DiskInfo>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct DiskInfo {
|
||||
pub mount: String,
|
||||
pub total_mb: u64,
|
||||
pub free_mb: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct InstanceInfo {
|
||||
pub id: String,
|
||||
pub game: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub label: Option<String>,
|
||||
/// Process-managed: running/stopped/starting/stopping/crashed.
|
||||
/// Unmanaged (no executable configured): configured/missing_root.
|
||||
pub state: String,
|
||||
pub uptime_seconds: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub root_disk_free_mb: Option<u64>,
|
||||
}
|
||||
|
||||
pub async fn run(agent: Arc<Agent>) {
|
||||
let cancel = agent.shutdown.clone();
|
||||
let mut sys = System::new();
|
||||
|
||||
// CPU usage is a delta between refreshes; prime it once so the first
|
||||
// heartbeat carries a real figure instead of 0.
|
||||
sys.refresh_cpu_usage();
|
||||
tokio::time::sleep(Duration::from_millis(250)).await;
|
||||
|
||||
loop {
|
||||
let payload = collect(&agent, &mut sys).await;
|
||||
match serde_json::to_vec(&payload) {
|
||||
Ok(bytes) => {
|
||||
let subject = subjects::host_heartbeat(&agent.cfg.license_id);
|
||||
if let Err(e) = agent.nats.publish(subject, bytes.into()).await {
|
||||
tracing::warn!("heartbeat publish failed: {e}");
|
||||
} else {
|
||||
tracing::debug!(
|
||||
"heartbeat sent: cpu {:.1}%, {} instance(s)",
|
||||
payload.host.cpu_percent,
|
||||
payload.instances.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::error!("heartbeat serialize failed: {e}"),
|
||||
}
|
||||
|
||||
let jitter = rand::thread_rng().gen_range(0.8..1.2);
|
||||
let interval = Duration::from_secs_f64(agent.cfg.heartbeat_seconds as f64 * jitter);
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(interval) => {}
|
||||
_ = cancel.cancelled() => {
|
||||
tracing::info!("telemetry stopping");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn collect(agent: &Agent, sys: &mut System) -> HeartbeatPayload {
|
||||
sys.refresh_cpu_usage();
|
||||
sys.refresh_memory();
|
||||
let disks = Disks::new_with_refreshed_list();
|
||||
|
||||
let disk_infos: Vec<DiskInfo> = disks
|
||||
.iter()
|
||||
.map(|d| DiskInfo {
|
||||
mount: d.mount_point().to_string_lossy().to_string(),
|
||||
total_mb: d.total_space() / 1_048_576,
|
||||
free_mb: d.available_space() / 1_048_576,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut instances = Vec::with_capacity(agent.cfg.instances.len());
|
||||
for inst in &agent.cfg.instances {
|
||||
let (state, uptime_seconds) = match agent.supervisors.get(&inst.id) {
|
||||
Some(sup) if !matches!(sup.state(), crate::supervisor::InstanceState::Unmanaged) => {
|
||||
(sup.state().as_label().to_string(), sup.uptime_seconds().await)
|
||||
}
|
||||
_ => {
|
||||
let exists = inst.root.exists();
|
||||
(
|
||||
if exists { "configured" } else { "missing_root" }.to_string(),
|
||||
0,
|
||||
)
|
||||
}
|
||||
};
|
||||
instances.push(InstanceInfo {
|
||||
id: inst.id.clone(),
|
||||
game: inst.game.clone(),
|
||||
label: inst.label.clone(),
|
||||
state,
|
||||
uptime_seconds,
|
||||
root_disk_free_mb: disk_free_for_path(&disks, &inst.root),
|
||||
});
|
||||
}
|
||||
let instances = instances;
|
||||
|
||||
HeartbeatPayload {
|
||||
schema: 2,
|
||||
timestamp: Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
agent: AgentInfo {
|
||||
version: version::VERSION.to_string(),
|
||||
commit: version::GIT_HASH.to_string(),
|
||||
os: std::env::consts::OS.to_string(),
|
||||
arch: std::env::consts::ARCH.to_string(),
|
||||
uptime_seconds: agent.started.elapsed().as_secs(),
|
||||
},
|
||||
host: HostInfo {
|
||||
hostname: System::host_name(),
|
||||
cpu_percent: sys.global_cpu_usage(),
|
||||
cpu_cores: sys.cpus().len(),
|
||||
mem_total_mb: sys.total_memory() / 1_048_576,
|
||||
mem_used_mb: sys.used_memory() / 1_048_576,
|
||||
uptime_seconds: System::uptime(),
|
||||
disks: disk_infos,
|
||||
},
|
||||
instances,
|
||||
probe: agent.last_probe.read().await.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Free space on the disk whose mount point is the longest prefix of `path`.
|
||||
fn disk_free_for_path(disks: &Disks, path: &Path) -> Option<u64> {
|
||||
disks
|
||||
.iter()
|
||||
.filter(|d| path.starts_with(d.mount_point()))
|
||||
.max_by_key(|d| d.mount_point().as_os_str().len())
|
||||
.map(|d| d.available_space() / 1_048_576)
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user