feat(host-agent): Phase 3a signed self-update (minisign) + CI signing gate
Agent only ever runs a binary whose minisign signature verifies against
the EMBEDDED public key. NATS host.cmd func 'update' {url}: download
binary + .minisig from the CDN -> verify against embedded pubkey ->
atomic swap (.old rollback) -> relaunch. URL allowlist (https + cdn.
corrosionmgmt.com only, rejects userinfo-bypass), 100MiB cap. Closes the
supply-chain hole: even a malicious CDN upload can't run unsigned.
CI: build-host-agent.yml signs every artifact with MINISIGN_SECRET_KEY
(Gitea secret) and publishes .minisig alongside; the step FAILS the
build if the secret is absent (refuses to ship unsigned). Bumped to
alpha.6.
6 deterministic tests (accept valid / reject tampered+garbage+empty sig,
URL allowlist incl userinfo-bypass, atomic swap+rollback). Fixtures
signed with the real release key so tests need no key at runtime. Full
suite 50/50 green; musl + native build clean.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -13,11 +13,15 @@ use crate::agent::Agent;
|
||||
use crate::prober;
|
||||
use crate::subjects;
|
||||
use crate::telemetry;
|
||||
use crate::update;
|
||||
use crate::version;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HostCommand {
|
||||
func: String,
|
||||
/// Signed-update artifact URL (for func = "update").
|
||||
#[serde(default)]
|
||||
url: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn run(agent: Arc<Agent>) -> anyhow::Result<()> {
|
||||
@@ -55,20 +59,46 @@ async fn handle(agent: Arc<Agent>, msg: async_nats::Message) {
|
||||
return;
|
||||
};
|
||||
|
||||
let response = match serde_json::from_slice::<HostCommand>(&msg.payload) {
|
||||
Ok(cmd) => dispatch(&agent, &cmd.func).await,
|
||||
Err(e) => json!({ "status": "error", "message": format!("invalid command payload: {e}") }),
|
||||
};
|
||||
|
||||
let bytes = match serde_json::to_vec(&response) {
|
||||
Ok(b) => b,
|
||||
let cmd = match serde_json::from_slice::<HostCommand>(&msg.payload) {
|
||||
Ok(cmd) => cmd,
|
||||
Err(e) => {
|
||||
tracing::error!("response serialize failed: {e}");
|
||||
publish(&agent, &reply, json!({ "status": "error", "message": format!("invalid command payload: {e}") })).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = agent.nats.publish(reply, bytes.into()).await {
|
||||
tracing::warn!("response publish failed: {e}");
|
||||
|
||||
// Self-update is special: it must reply BEFORE relaunching, because the
|
||||
// relaunch replaces this process and nothing after it would run.
|
||||
if cmd.func == "update" {
|
||||
let Some(url) = cmd.url else {
|
||||
publish(&agent, &reply, json!({ "status": "error", "message": "update requires a 'url'" })).await;
|
||||
return;
|
||||
};
|
||||
match update::download_verify_swap(&url).await {
|
||||
Ok(_) => {
|
||||
publish(&agent, &reply, json!({ "status": "success", "func": "update", "message": "verified and swapped; relaunching" })).await;
|
||||
let _ = agent.nats.flush().await;
|
||||
update::relaunch_and_exit();
|
||||
}
|
||||
Err(e) => {
|
||||
publish(&agent, &reply, json!({ "status": "error", "func": "update", "message": format!("{e:#}") })).await;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let response = dispatch(&agent, &cmd.func).await;
|
||||
publish(&agent, &reply, response).await;
|
||||
}
|
||||
|
||||
async fn publish(agent: &Arc<Agent>, reply: &async_nats::Subject, value: serde_json::Value) {
|
||||
match serde_json::to_vec(&value) {
|
||||
Ok(bytes) => {
|
||||
if let Err(e) = agent.nats.publish(reply.clone(), bytes.into()).await {
|
||||
tracing::warn!("response publish failed: {e}");
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::error!("response serialize failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,4 +13,5 @@ pub mod rcon;
|
||||
pub mod steamcmd;
|
||||
pub mod subjects;
|
||||
pub mod telemetry;
|
||||
pub mod update;
|
||||
pub mod version;
|
||||
|
||||
154
corrosion-host-agent/src/update.rs
Normal file
154
corrosion-host-agent/src/update.rs
Normal file
@@ -0,0 +1,154 @@
|
||||
//! Signed self-update.
|
||||
//!
|
||||
//! The agent only ever runs a binary whose minisign signature verifies against
|
||||
//! the EMBEDDED public key below. Even if the CDN (which currently accepts
|
||||
//! unauthenticated uploads) served a malicious binary, the agent refuses it
|
||||
//! without a valid signature from the release private key (a CI secret).
|
||||
//!
|
||||
//! Flow: download binary + `.minisig` from the CDN → verify signature →
|
||||
//! atomic swap (current → `.old`, new → current, rollback on failure) →
|
||||
//! relaunch the new binary. Defence in depth mirrors the Vigilance updater:
|
||||
//! a real URL parse rejecting credential-in-URL bypasses, an https + host
|
||||
//! allowlist, and a size cap.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use minisign_verify::{PublicKey, Signature};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Duration;
|
||||
|
||||
/// minisign public key. The matching private key signs releases in CI
|
||||
/// (Gitea Actions secret MINISIGN_SECRET_KEY). Rotating it means re-signing
|
||||
/// every published artifact and shipping an agent build with the new key.
|
||||
const PUBLIC_KEY: &str = "RWQKhJptuiwIkp31cZdz10z/R72UPZkl7/VtnZJ2Vfbe0dQfDlXHZYFC";
|
||||
|
||||
const ALLOWED_HOST: &str = "cdn.corrosionmgmt.com";
|
||||
const MAX_BINARY_BYTES: usize = 100 * 1024 * 1024; // 100 MiB sanity cap
|
||||
const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(600);
|
||||
|
||||
/// Verify a binary against the embedded public key + a minisign signature blob.
|
||||
/// The security core of self-update — tampered or unsigned content is rejected.
|
||||
pub fn verify_signature(binary: &[u8], signature_blob: &str) -> Result<()> {
|
||||
let pk = PublicKey::from_base64(PUBLIC_KEY).context("embedded public key is invalid")?;
|
||||
let sig = Signature::decode(signature_blob).context("malformed minisign signature")?;
|
||||
pk.verify(binary, &sig, false)
|
||||
.map_err(|e| anyhow::anyhow!("signature verification failed: {e}"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reject anything but `https://cdn.corrosionmgmt.com/...` with no embedded
|
||||
/// credentials (the userinfo-bypass class).
|
||||
pub fn assert_url_allowed(url: &str) -> Result<()> {
|
||||
let parsed = reqwest::Url::parse(url).context("invalid update URL")?;
|
||||
if parsed.scheme() != "https" {
|
||||
bail!("update URL must be https");
|
||||
}
|
||||
if !parsed.username().is_empty() || parsed.password().is_some() {
|
||||
bail!("update URL must not contain credentials");
|
||||
}
|
||||
if parsed.host_str() != Some(ALLOWED_HOST) {
|
||||
bail!("update URL host not allowed: {:?}", parsed.host_str());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Download, verify, and atomically swap in a new agent binary. Does NOT
|
||||
/// restart — the caller decides when to relaunch (after replying on NATS).
|
||||
/// Returns the path of the now-current (new) binary.
|
||||
pub async fn download_verify_swap(url: &str) -> Result<PathBuf> {
|
||||
assert_url_allowed(url)?;
|
||||
let sig_url = format!("{url}.minisig");
|
||||
assert_url_allowed(&sig_url)?;
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(DOWNLOAD_TIMEOUT)
|
||||
.build()
|
||||
.context("building HTTP client")?;
|
||||
|
||||
let binary = client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("downloading {url}"))?
|
||||
.error_for_status()
|
||||
.context("update binary download failed")?
|
||||
.bytes()
|
||||
.await
|
||||
.context("reading update binary")?;
|
||||
|
||||
if binary.len() > MAX_BINARY_BYTES {
|
||||
bail!("update binary is {} bytes, exceeds the {MAX_BINARY_BYTES} cap", binary.len());
|
||||
}
|
||||
|
||||
let signature = client
|
||||
.get(&sig_url)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("downloading {sig_url}"))?
|
||||
.error_for_status()
|
||||
.context("signature download failed")?
|
||||
.text()
|
||||
.await
|
||||
.context("reading signature")?;
|
||||
|
||||
verify_signature(&binary, &signature).context("refusing unsigned/tampered update")?;
|
||||
tracing::info!("update signature verified ({} bytes)", binary.len());
|
||||
|
||||
let current = std::env::current_exe().context("resolving current executable")?;
|
||||
swap_binary(¤t, &binary)?;
|
||||
tracing::info!("update swapped in at {}", current.display());
|
||||
Ok(current)
|
||||
}
|
||||
|
||||
/// Atomically replace `current` with `new_bytes`, keeping a `.old` backup and
|
||||
/// rolling back if the rename fails.
|
||||
pub fn swap_binary(current: &Path, new_bytes: &[u8]) -> Result<()> {
|
||||
let dir = current.parent().unwrap_or_else(|| Path::new("."));
|
||||
let stem = current.file_name().and_then(|s| s.to_str()).unwrap_or("corrosion-host-agent");
|
||||
let new_path = dir.join(format!("{stem}.new"));
|
||||
let backup = dir.join(format!("{stem}.old"));
|
||||
|
||||
std::fs::write(&new_path, new_bytes)
|
||||
.with_context(|| format!("writing {}", new_path.display()))?;
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
std::fs::set_permissions(&new_path, std::fs::Permissions::from_mode(0o755))
|
||||
.context("chmod +x on new binary")?;
|
||||
}
|
||||
|
||||
let _ = std::fs::remove_file(&backup);
|
||||
std::fs::rename(current, &backup)
|
||||
.with_context(|| format!("backing up current binary to {}", backup.display()))?;
|
||||
|
||||
if let Err(e) = std::fs::rename(&new_path, current) {
|
||||
// Roll back: restore the backup so the agent stays runnable.
|
||||
let _ = std::fs::rename(&backup, current);
|
||||
return Err(anyhow::anyhow!(e).context("installing new binary (rolled back)"));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Relaunch the (already-swapped) binary with the same args, then exit. No
|
||||
/// service manager is required — the new process reconnects on its own. There
|
||||
/// is a sub-second window with no agent; acceptable for an update.
|
||||
pub fn relaunch_and_exit() -> ! {
|
||||
let exe = std::env::current_exe().unwrap_or_else(|_| PathBuf::from("corrosion-host-agent"));
|
||||
let args: Vec<String> = std::env::args().skip(1).collect();
|
||||
tracing::info!("relaunching {} after update", exe.display());
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::process::CommandExt;
|
||||
// exec replaces this process image with the new binary — cleanest,
|
||||
// no gap. Only returns on failure.
|
||||
let err = std::process::Command::new(&exe).args(&args).exec();
|
||||
tracing::error!("exec after update failed: {err}; exiting for service restart");
|
||||
std::process::exit(70);
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = std::process::Command::new(&exe).args(&args).spawn();
|
||||
std::process::exit(0);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user