feat(host-agent): Phase 3a signed self-update (minisign) + CI signing gate
Some checks failed
CI / backend-types (push) Successful in 9s
CI / frontend-build (push) Successful in 16s
CI / agent-tests (push) Successful in 1m27s
CI / integration (push) Successful in 21s
Build Host Agent (Rust) / build (push) Failing after 1m33s

Agent only ever runs a binary whose minisign signature verifies against
the EMBEDDED public key. NATS host.cmd func 'update' {url}: download
binary + .minisig from the CDN -> verify against embedded pubkey ->
atomic swap (.old rollback) -> relaunch. URL allowlist (https + cdn.
corrosionmgmt.com only, rejects userinfo-bypass), 100MiB cap. Closes the
supply-chain hole: even a malicious CDN upload can't run unsigned.

CI: build-host-agent.yml signs every artifact with MINISIGN_SECRET_KEY
(Gitea secret) and publishes .minisig alongside; the step FAILS the
build if the secret is absent (refuses to ship unsigned). Bumped to
alpha.6.

6 deterministic tests (accept valid / reject tampered+garbage+empty sig,
URL allowlist incl userinfo-bypass, atomic swap+rollback). Fixtures
signed with the real release key so tests need no key at runtime. Full
suite 50/50 green; musl + native build clean.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Vantz Stockwell
2026-06-11 20:00:36 -04:00
parent 7c84912ff5
commit 6b3e805ac2
11 changed files with 751 additions and 29 deletions

View File

@@ -13,11 +13,15 @@ use crate::agent::Agent;
use crate::prober;
use crate::subjects;
use crate::telemetry;
use crate::update;
use crate::version;
#[derive(Debug, Deserialize)]
struct HostCommand {
func: String,
/// Signed-update artifact URL (for func = "update").
#[serde(default)]
url: Option<String>,
}
pub async fn run(agent: Arc<Agent>) -> anyhow::Result<()> {
@@ -55,20 +59,46 @@ async fn handle(agent: Arc<Agent>, msg: async_nats::Message) {
return;
};
let response = match serde_json::from_slice::<HostCommand>(&msg.payload) {
Ok(cmd) => dispatch(&agent, &cmd.func).await,
Err(e) => json!({ "status": "error", "message": format!("invalid command payload: {e}") }),
};
let bytes = match serde_json::to_vec(&response) {
Ok(b) => b,
let cmd = match serde_json::from_slice::<HostCommand>(&msg.payload) {
Ok(cmd) => cmd,
Err(e) => {
tracing::error!("response serialize failed: {e}");
publish(&agent, &reply, json!({ "status": "error", "message": format!("invalid command payload: {e}") })).await;
return;
}
};
if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
tracing::warn!("response publish failed: {e}");
// Self-update is special: it must reply BEFORE relaunching, because the
// relaunch replaces this process and nothing after it would run.
if cmd.func == "update" {
let Some(url) = cmd.url else {
publish(&agent, &reply, json!({ "status": "error", "message": "update requires a 'url'" })).await;
return;
};
match update::download_verify_swap(&url).await {
Ok(_) => {
publish(&agent, &reply, json!({ "status": "success", "func": "update", "message": "verified and swapped; relaunching" })).await;
let _ = agent.nats.flush().await;
update::relaunch_and_exit();
}
Err(e) => {
publish(&agent, &reply, json!({ "status": "error", "func": "update", "message": format!("{e:#}") })).await;
}
}
return;
}
let response = dispatch(&agent, &cmd.func).await;
publish(&agent, &reply, response).await;
}
async fn publish(agent: &Arc<Agent>, reply: &async_nats::Subject, value: serde_json::Value) {
match serde_json::to_vec(&value) {
Ok(bytes) => {
if let Err(e) = agent.nats.publish(reply.clone(), bytes.into()).await {
tracing::warn!("response publish failed: {e}");
}
}
Err(e) => tracing::error!("response serialize failed: {e}"),
}
}

View File

@@ -13,4 +13,5 @@ pub mod rcon;
pub mod steamcmd;
pub mod subjects;
pub mod telemetry;
pub mod update;
pub mod version;

View File

@@ -0,0 +1,154 @@
//! Signed self-update.
//!
//! The agent only ever runs a binary whose minisign signature verifies against
//! the EMBEDDED public key below. Even if the CDN (which currently accepts
//! unauthenticated uploads) served a malicious binary, the agent refuses it
//! without a valid signature from the release private key (a CI secret).
//!
//! Flow: download binary + `.minisig` from the CDN → verify signature →
//! atomic swap (current → `.old`, new → current, rollback on failure) →
//! relaunch the new binary. Defence in depth mirrors the Vigilance updater:
//! a real URL parse rejecting credential-in-URL bypasses, an https + host
//! allowlist, and a size cap.
use anyhow::{bail, Context, Result};
use minisign_verify::{PublicKey, Signature};
use std::path::{Path, PathBuf};
use std::time::Duration;
/// minisign public key. The matching private key signs releases in CI
/// (Gitea Actions secret MINISIGN_SECRET_KEY). Rotating it means re-signing
/// every published artifact and shipping an agent build with the new key.
const PUBLIC_KEY: &str = "RWQKhJptuiwIkp31cZdz10z/R72UPZkl7/VtnZJ2Vfbe0dQfDlXHZYFC";
const ALLOWED_HOST: &str = "cdn.corrosionmgmt.com";
const MAX_BINARY_BYTES: usize = 100 * 1024 * 1024; // 100 MiB sanity cap
const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(600);
/// Verify a binary against the embedded public key + a minisign signature blob.
/// The security core of self-update — tampered or unsigned content is rejected.
pub fn verify_signature(binary: &[u8], signature_blob: &str) -> Result<()> {
let pk = PublicKey::from_base64(PUBLIC_KEY).context("embedded public key is invalid")?;
let sig = Signature::decode(signature_blob).context("malformed minisign signature")?;
pk.verify(binary, &sig, false)
.map_err(|e| anyhow::anyhow!("signature verification failed: {e}"))?;
Ok(())
}
/// Reject anything but `https://cdn.corrosionmgmt.com/...` with no embedded
/// credentials (the userinfo-bypass class).
pub fn assert_url_allowed(url: &str) -> Result<()> {
let parsed = reqwest::Url::parse(url).context("invalid update URL")?;
if parsed.scheme() != "https" {
bail!("update URL must be https");
}
if !parsed.username().is_empty() || parsed.password().is_some() {
bail!("update URL must not contain credentials");
}
if parsed.host_str() != Some(ALLOWED_HOST) {
bail!("update URL host not allowed: {:?}", parsed.host_str());
}
Ok(())
}
/// Download, verify, and atomically swap in a new agent binary. Does NOT
/// restart — the caller decides when to relaunch (after replying on NATS).
/// Returns the path of the now-current (new) binary.
pub async fn download_verify_swap(url: &str) -> Result<PathBuf> {
assert_url_allowed(url)?;
let sig_url = format!("{url}.minisig");
assert_url_allowed(&sig_url)?;
let client = reqwest::Client::builder()
.timeout(DOWNLOAD_TIMEOUT)
.build()
.context("building HTTP client")?;
let binary = client
.get(url)
.send()
.await
.with_context(|| format!("downloading {url}"))?
.error_for_status()
.context("update binary download failed")?
.bytes()
.await
.context("reading update binary")?;
if binary.len() > MAX_BINARY_BYTES {
bail!("update binary is {} bytes, exceeds the {MAX_BINARY_BYTES} cap", binary.len());
}
let signature = client
.get(&sig_url)
.send()
.await
.with_context(|| format!("downloading {sig_url}"))?
.error_for_status()
.context("signature download failed")?
.text()
.await
.context("reading signature")?;
verify_signature(&binary, &signature).context("refusing unsigned/tampered update")?;
tracing::info!("update signature verified ({} bytes)", binary.len());
let current = std::env::current_exe().context("resolving current executable")?;
swap_binary(&current, &binary)?;
tracing::info!("update swapped in at {}", current.display());
Ok(current)
}
/// Atomically replace `current` with `new_bytes`, keeping a `.old` backup and
/// rolling back if the rename fails.
pub fn swap_binary(current: &Path, new_bytes: &[u8]) -> Result<()> {
let dir = current.parent().unwrap_or_else(|| Path::new("."));
let stem = current.file_name().and_then(|s| s.to_str()).unwrap_or("corrosion-host-agent");
let new_path = dir.join(format!("{stem}.new"));
let backup = dir.join(format!("{stem}.old"));
std::fs::write(&new_path, new_bytes)
.with_context(|| format!("writing {}", new_path.display()))?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&new_path, std::fs::Permissions::from_mode(0o755))
.context("chmod +x on new binary")?;
}
let _ = std::fs::remove_file(&backup);
std::fs::rename(current, &backup)
.with_context(|| format!("backing up current binary to {}", backup.display()))?;
if let Err(e) = std::fs::rename(&new_path, current) {
// Roll back: restore the backup so the agent stays runnable.
let _ = std::fs::rename(&backup, current);
return Err(anyhow::anyhow!(e).context("installing new binary (rolled back)"));
}
Ok(())
}
/// Relaunch the (already-swapped) binary with the same args, then exit. No
/// service manager is required — the new process reconnects on its own. There
/// is a sub-second window with no agent; acceptable for an update.
pub fn relaunch_and_exit() -> ! {
let exe = std::env::current_exe().unwrap_or_else(|_| PathBuf::from("corrosion-host-agent"));
let args: Vec<String> = std::env::args().skip(1).collect();
tracing::info!("relaunching {} after update", exe.display());
#[cfg(unix)]
{
use std::os::unix::process::CommandExt;
// exec replaces this process image with the new binary — cleanest,
// no gap. Only returns on failure.
let err = std::process::Command::new(&exe).args(&args).exec();
tracing::error!("exec after update failed: {err}; exiting for service restart");
std::process::exit(70);
}
#[cfg(not(unix))]
{
let _ = std::process::Command::new(&exe).args(&args).spawn();
std::process::exit(0);
}
}