//! Connectivity prober. //! //! Answers "is it the box or is it the network?" before a support ticket gets //! written. Phase 0 scope is OUTBOUND reachability: TCP connect timing from //! the host to known endpoints. Inbound port-forward verification (the thing //! panel users actually struggle with) requires a backend-side reverse probe //! and is specified in PROTOCOL.md as a later phase. use chrono::{SecondsFormat, Utc}; use serde::Serialize; use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::net::TcpStream; use crate::agent::Agent; use crate::config::ProbeTargetConfig; const CONNECT_TIMEOUT: Duration = Duration::from_secs(3); #[derive(Debug, Clone, Serialize)] pub struct ProbeResult { pub name: String, pub host: String, pub port: u16, pub ok: bool, #[serde(skip_serializing_if = "Option::is_none")] pub latency_ms: Option, #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, } #[derive(Debug, Clone, Serialize)] pub struct ProbeReport { pub timestamp: String, pub results: Vec, } /// Built-in targets every agent checks, before config extras. fn default_targets() -> Vec { vec![ProbeTargetConfig { name: "corrosion-cdn".to_string(), host: "cdn.corrosionmgmt.com".to_string(), port: 443, }] } pub async fn run_probe(extra_targets: &[ProbeTargetConfig]) -> ProbeReport { let mut targets = default_targets(); targets.extend(extra_targets.iter().cloned()); let checks = targets.into_iter().map(|t| async move { let started = Instant::now(); let addr = format!("{}:{}", t.host, t.port); let outcome = tokio::time::timeout(CONNECT_TIMEOUT, TcpStream::connect(&addr)).await; match outcome { Ok(Ok(_stream)) => ProbeResult { name: t.name, host: t.host, port: t.port, ok: true, latency_ms: Some(started.elapsed().as_millis() as u64), error: None, }, Ok(Err(e)) => ProbeResult { name: t.name, host: t.host, port: t.port, ok: false, latency_ms: None, error: Some(e.to_string()), }, Err(_) => ProbeResult { name: t.name, host: t.host, port: t.port, ok: false, latency_ms: None, error: Some(format!("timeout after {}s", CONNECT_TIMEOUT.as_secs())), }, } }); let results = futures::future::join_all(checks).await; ProbeReport { timestamp: Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true), results, } } /// Periodic probe loop; results land in shared state and ride the next /// heartbeat. Jittered interval to avoid fleet-wide synchronization. pub async fn run_loop(agent: Arc) { let cancel = agent.shutdown.clone(); loop { let report = run_probe(&agent.cfg.probe_targets).await; let failed: Vec<&str> = report .results .iter() .filter(|r| !r.ok) .map(|r| r.name.as_str()) .collect(); if failed.is_empty() { tracing::debug!("probe ok ({} targets)", report.results.len()); } else { tracing::warn!("probe failures: {}", failed.join(", ")); } *agent.last_probe.write().await = Some(report); let jitter = rand::Rng::gen_range(&mut rand::thread_rng(), 0.8..1.2); let interval = Duration::from_secs_f64(agent.cfg.probe_interval_seconds as f64 * jitter); tokio::select! { _ = tokio::time::sleep(interval) => {} _ = cancel.cancelled() => { tracing::info!("prober stopping"); break; } } } }