feat(host-agent): Rust rewrite Phase 0 — multi-instance foundation, v2 wire protocol, real telemetry
All checks were successful
Test Asgard Runner / test (push) Successful in 3s
All checks were successful
Test Asgard Runner / test (push) Successful in 3s
New corrosion-host-agent/ crate (Go companion-agent stays as behavior
reference until parity). Wire protocol v2 per COA-B: instance-scoped
subjects corrosion.{license}.{instance}.* + host-level .host.* — spec
in PROTOCOL.md, designed for the license->host->instance fleet model.
- Multi-instance TOML config in the foundation, not retrofitted
- NATS layer on the Vigilance production profile (infinite reconnect,
capped backoff, 30s ping, 8192-msg offline buffer)
- Heartbeat with real sysinfo telemetry — Go agent shipped hardcoded
disk/cpu placeholders; this is the panel's first true Resources data
- Connectivity prober (outbound TCP, periodic + on-demand)
- Host cmd channel (ping/probe/sysinfo), going-offline beacon,
CancellationToken shutdown
- Live-fire verified against production NATS; artifacts: 3.7MB static
linux-musl, 3.8MB windows .exe (static CRT)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
58
corrosion-host-agent/src/bus.rs
Normal file
58
corrosion-host-agent/src/bus.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
//! NATS connection layer.
|
||||
//!
|
||||
//! Connection parameters follow the production-proven Vigilance profile:
|
||||
//! infinite reconnects with capped exponential backoff, 30s pings to detect
|
||||
//! zombie TCP in ~60s, and a deep client-side send queue so telemetry buffers
|
||||
//! through broker outages instead of erroring.
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::config::Settings;
|
||||
|
||||
pub async fn connect(cfg: &Settings) -> Result<async_nats::Client> {
|
||||
let (url, force_tls) = normalize_url(&cfg.nats_url);
|
||||
|
||||
let mut opts = async_nats::ConnectOptions::new()
|
||||
.name("corrosion-host-agent")
|
||||
.retry_on_initial_connect()
|
||||
.max_reconnects(None)
|
||||
.ping_interval(Duration::from_secs(30))
|
||||
.client_capacity(8192)
|
||||
.reconnect_delay_callback(|attempts| {
|
||||
Duration::from_millis(std::cmp::min(attempts as u64 * 100, 8_000))
|
||||
})
|
||||
.event_callback(|event| async move {
|
||||
match event {
|
||||
async_nats::Event::Disconnected => tracing::warn!("nats disconnected"),
|
||||
async_nats::Event::Connected => tracing::info!("nats connected"),
|
||||
other => tracing::debug!("nats event: {other}"),
|
||||
}
|
||||
});
|
||||
|
||||
if force_tls {
|
||||
opts = opts.require_tls(true);
|
||||
}
|
||||
if let Some(token) = &cfg.nats_token {
|
||||
opts = opts.token(token.clone());
|
||||
}
|
||||
|
||||
let client = opts
|
||||
.connect(&url)
|
||||
.await
|
||||
.with_context(|| format!("connecting to NATS at {url}"))?;
|
||||
|
||||
Ok(client)
|
||||
}
|
||||
|
||||
/// Accept `tls://` / `nats+tls://` URL schemes by translating to `nats://` +
|
||||
/// an explicit TLS requirement.
|
||||
fn normalize_url(raw: &str) -> (String, bool) {
|
||||
if let Some(rest) = raw.strip_prefix("tls://") {
|
||||
(format!("nats://{rest}"), true)
|
||||
} else if let Some(rest) = raw.strip_prefix("nats+tls://") {
|
||||
(format!("nats://{rest}"), true)
|
||||
} else {
|
||||
(raw.to_string(), false)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user