feat(host-agent): Rust rewrite Phase 0 — multi-instance foundation, v2 wire protocol, real telemetry
All checks were successful
Test Asgard Runner / test (push) Successful in 3s
All checks were successful
Test Asgard Runner / test (push) Successful in 3s
New corrosion-host-agent/ crate (Go companion-agent stays as behavior
reference until parity). Wire protocol v2 per COA-B: instance-scoped
subjects corrosion.{license}.{instance}.* + host-level .host.* — spec
in PROTOCOL.md, designed for the license->host->instance fleet model.
- Multi-instance TOML config in the foundation, not retrofitted
- NATS layer on the Vigilance production profile (infinite reconnect,
capped backoff, 30s ping, 8192-msg offline buffer)
- Heartbeat with real sysinfo telemetry — Go agent shipped hardcoded
disk/cpu placeholders; this is the panel's first true Resources data
- Connectivity prober (outbound TCP, periodic + on-demand)
- Host cmd channel (ping/probe/sysinfo), going-offline beacon,
CancellationToken shutdown
- Live-fire verified against production NATS; artifacts: 3.7MB static
linux-musl, 3.8MB windows .exe (static CRT)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
168
corrosion-host-agent/src/main.rs
Normal file
168
corrosion-host-agent/src/main.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
//! Corrosion Host Agent — multi-game ops runtime.
|
||||
//!
|
||||
//! Phase 0: NATS connectivity, real host telemetry, multi-instance config,
|
||||
//! connectivity prober, host command channel. Process control, file ops, and
|
||||
//! game adapters arrive in Phase 1+ (see PROTOCOL.md).
|
||||
|
||||
mod agent;
|
||||
mod bus;
|
||||
mod config;
|
||||
mod hostcmd;
|
||||
mod prober;
|
||||
mod subjects;
|
||||
mod telemetry;
|
||||
mod version;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use clap::{Parser, Subcommand};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::agent::Agent;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "corrosion-host-agent", version = version::VERSION, about)]
|
||||
struct Cli {
|
||||
/// Path to agent.toml (default: /etc/corrosion/agent.toml on Linux,
|
||||
/// C:\ProgramData\Corrosion\agent.toml on Windows)
|
||||
#[arg(long, short = 'c')]
|
||||
config: Option<PathBuf>,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Option<Command>,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Command {
|
||||
/// Validate the config file and exit.
|
||||
Check,
|
||||
/// Print full version (semver, git hash, build timestamp) and exit.
|
||||
Version,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
let config_path = cli.config.unwrap_or_else(config::default_config_path);
|
||||
|
||||
match cli.command {
|
||||
Some(Command::Version) => {
|
||||
println!("corrosion-host-agent {}", version::long());
|
||||
Ok(())
|
||||
}
|
||||
Some(Command::Check) => {
|
||||
let settings = config::load(&config_path)?;
|
||||
println!(
|
||||
"config ok: license {}, {} instance(s), nats {}",
|
||||
settings.license_id,
|
||||
settings.instances.len(),
|
||||
settings.nats_url
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
None => {
|
||||
let settings = config::load(&config_path)?;
|
||||
init_logging(&settings.log_level);
|
||||
tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.context("building tokio runtime")?
|
||||
.block_on(run(settings))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn init_logging(level: &str) {
|
||||
let filter = tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(level));
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(filter)
|
||||
.with_target(false)
|
||||
.init();
|
||||
}
|
||||
|
||||
async fn run(settings: config::Settings) -> Result<()> {
|
||||
tracing::info!(
|
||||
"corrosion-host-agent {} starting: license {}, {} instance(s)",
|
||||
version::long(),
|
||||
settings.license_id,
|
||||
settings.instances.len()
|
||||
);
|
||||
for inst in &settings.instances {
|
||||
tracing::info!(" instance '{}' ({}) at {}", inst.id, inst.game, inst.root.display());
|
||||
}
|
||||
|
||||
let nats = bus::connect(&settings).await?;
|
||||
|
||||
let agent = Arc::new(Agent {
|
||||
cfg: settings,
|
||||
nats,
|
||||
started: Instant::now(),
|
||||
last_probe: RwLock::new(None),
|
||||
shutdown: CancellationToken::new(),
|
||||
});
|
||||
|
||||
let mut handles = Vec::new();
|
||||
handles.push(tokio::spawn(telemetry::run(agent.clone())));
|
||||
handles.push(tokio::spawn(prober::run_loop(agent.clone())));
|
||||
{
|
||||
let agent = agent.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
if let Err(e) = hostcmd::run(agent).await {
|
||||
tracing::error!("host command handler failed: {e:#}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
wait_for_shutdown_signal().await;
|
||||
tracing::info!("shutdown signal received");
|
||||
agent.shutdown.cancel();
|
||||
|
||||
// Best-effort offline beacon so the panel flips to offline immediately
|
||||
// instead of waiting out the heartbeat staleness window.
|
||||
let beacon = subjects::host_going_offline(&agent.cfg.license_id);
|
||||
let _ = tokio::time::timeout(
|
||||
Duration::from_millis(500),
|
||||
agent.nats.publish(beacon, "{}".into()),
|
||||
)
|
||||
.await;
|
||||
|
||||
match tokio::time::timeout(
|
||||
Duration::from_secs(10),
|
||||
futures::future::join_all(handles),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => tracing::info!("all subsystems stopped cleanly"),
|
||||
Err(_) => tracing::warn!("shutdown timeout: some subsystems did not stop within 10s"),
|
||||
}
|
||||
|
||||
let _ = agent.nats.flush().await;
|
||||
tracing::info!("corrosion-host-agent stopped");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn wait_for_shutdown_signal() {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use tokio::signal::unix::{signal, SignalKind};
|
||||
let mut sigterm = match signal(SignalKind::terminate()) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::error!("SIGTERM handler failed: {e}; falling back to ctrl-c only");
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
tokio::select! {
|
||||
_ = tokio::signal::ctrl_c() => {}
|
||||
_ = sigterm.recv() => {}
|
||||
}
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user