feat(host-agent): Phase 2 — Dune docker-compose adapter via Supervisor trait
Introduce a Supervisor trait (async-trait) so the agent manages games with different models behind one wire contract. ProcessSupervisor (spawned process: rust/conan/soulmask) and the new DockerComposeSupervisor (dune) both impl it; Agent.supervisors is now HashMap<String, Arc<dyn Supervisor>> and instancecmd dispatch is game-agnostic — start/stop/restart/status identical across games, selected by a per-game factory in main. InstanceState moved to the shared supervisor module. DockerComposeSupervisor drives docker-compose up-d / stop / restart against the instance's compose project, with -f/-p/single-service support and a configurable compose binary. New [instance.docker_compose] config block. First cut = lifecycle + cached state; container crash-detection + restart adoption deferred to Phase 3b (reconcilable with a compose ps probe). Trait choice (dyn over enum) per Commander: scales to future planes (kubectl, AMP/podman, SSH) as new struct+impl, no central match. 56 tests green (6 new docker-compose mock-binary tests + 5 refactored process tests), zero warnings. Live verification pending a real Dune stack. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
216
corrosion-host-agent/src/docker_compose.rs
Normal file
216
corrosion-host-agent/src/docker_compose.rs
Normal file
@@ -0,0 +1,216 @@
|
||||
//! Docker-compose instance supervision — the Dune: Awakening adapter.
|
||||
//!
|
||||
//! Dune does not ship as a SteamCMD-updated process like Rust/Conan/Soulmask;
|
||||
//! it runs as Docker container(s) (game server + RabbitMQ broker + Postgres),
|
||||
//! orchestrated as a compose stack (a "battlegroup"). So Dune lifecycle is
|
||||
//! `docker compose up -d / stop / restart` against the instance's compose
|
||||
//! project, not a spawned OS process. This supervisor implements the same
|
||||
//! [`Supervisor`] trait `ProcessSupervisor` does, so the instance command
|
||||
//! dispatch is identical — only the management model differs.
|
||||
//!
|
||||
//! Scope (first cut): lifecycle + cached state. Two parity items are deferred
|
||||
//! to Phase 3b alongside process PID adoption: (1) crash detection (containers
|
||||
//! give us no child handle — a `docker compose ps` poll loop would supply it);
|
||||
//! (2) state adoption on agent restart (a running stack reports `stopped` until
|
||||
//! the next lifecycle command). Both are reconcilable with a `ps` probe.
|
||||
//!
|
||||
//! Reference: docs/reference-repos/icehunter SETUP_DOCKER.md (the docker
|
||||
//! control plane this mirrors).
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use serde::Deserialize;
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::{watch, Mutex};
|
||||
|
||||
use crate::config::InstanceConfig;
|
||||
use crate::supervisor::{InstanceState, Supervisor};
|
||||
|
||||
/// Per-instance docker-compose settings (`[instance.docker_compose]`). All
|
||||
/// fields optional — defaults cover the common "one compose file in the
|
||||
/// instance root" case.
|
||||
#[derive(Debug, Clone, Default, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct DockerComposeConfig {
|
||||
/// Compose file (`-f`). Relative paths resolve against the run dir. Default:
|
||||
/// compose's own discovery (docker-compose.yml in the run dir).
|
||||
#[serde(default)]
|
||||
pub file: Option<PathBuf>,
|
||||
/// Compose project name (`-p`). Default: the instance id.
|
||||
#[serde(default)]
|
||||
pub project: Option<String>,
|
||||
/// Limit lifecycle ops to one service. Default: every service in the file.
|
||||
#[serde(default)]
|
||||
pub service: Option<String>,
|
||||
/// Override the compose binary invocation. Default: `["docker","compose"]`.
|
||||
/// Use `["docker-compose"]` for the legacy standalone binary.
|
||||
#[serde(default)]
|
||||
pub command: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
started_at: Option<Instant>,
|
||||
}
|
||||
|
||||
pub struct DockerComposeSupervisor {
|
||||
instance_id: String,
|
||||
/// Directory the compose commands run in (relative `-f`/file paths resolve
|
||||
/// against it).
|
||||
run_dir: PathBuf,
|
||||
compose_file: Option<PathBuf>,
|
||||
project: String,
|
||||
service: Option<String>,
|
||||
/// Compose binary + leading args, e.g. `["docker","compose"]`.
|
||||
command: Vec<String>,
|
||||
inner: Mutex<Inner>,
|
||||
state_tx: watch::Sender<InstanceState>,
|
||||
}
|
||||
|
||||
impl DockerComposeSupervisor {
|
||||
pub fn new(cfg: &InstanceConfig) -> Arc<Self> {
|
||||
let dc = cfg.docker_compose.clone().unwrap_or_default();
|
||||
let run_dir = cfg
|
||||
.working_dir
|
||||
.clone()
|
||||
.unwrap_or_else(|| cfg.root.clone());
|
||||
let command = dc
|
||||
.command
|
||||
.filter(|c| !c.is_empty())
|
||||
.unwrap_or_else(|| vec!["docker".to_string(), "compose".to_string()]);
|
||||
let (state_tx, _) = watch::channel(InstanceState::Stopped);
|
||||
Arc::new(Self {
|
||||
instance_id: cfg.id.clone(),
|
||||
run_dir,
|
||||
compose_file: dc.file,
|
||||
project: dc.project.unwrap_or_else(|| cfg.id.clone()),
|
||||
service: dc.service,
|
||||
command,
|
||||
inner: Mutex::new(Inner { started_at: None }),
|
||||
state_tx,
|
||||
})
|
||||
}
|
||||
|
||||
fn set_state(&self, state: InstanceState) {
|
||||
let _ = self.state_tx.send_replace(state);
|
||||
}
|
||||
|
||||
/// Run one compose subcommand (`up`/`stop`/`restart`/...), bailing with the
|
||||
/// captured stderr on non-zero exit. Global flags (`-f`, `-p`) precede the
|
||||
/// subcommand; the optional single service is appended last.
|
||||
async fn run(&self, action: &str, action_args: &[&str]) -> Result<()> {
|
||||
let mut cmd = Command::new(&self.command[0]);
|
||||
cmd.args(&self.command[1..]);
|
||||
if let Some(file) = &self.compose_file {
|
||||
cmd.arg("-f").arg(file);
|
||||
}
|
||||
cmd.arg("-p").arg(&self.project);
|
||||
cmd.arg(action);
|
||||
cmd.args(action_args);
|
||||
if let Some(service) = &self.service {
|
||||
cmd.arg(service);
|
||||
}
|
||||
cmd.current_dir(&self.run_dir)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
let output = cmd
|
||||
.output()
|
||||
.await
|
||||
.with_context(|| format!("running `{} {action}` (is docker installed and on PATH?)", self.command.join(" ")))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let detail = if !stderr.trim().is_empty() {
|
||||
stderr.trim()
|
||||
} else {
|
||||
stdout.trim()
|
||||
};
|
||||
bail!("compose {action} failed ({}): {detail}", output.status);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Supervisor for DockerComposeSupervisor {
|
||||
fn instance_id(&self) -> &str {
|
||||
&self.instance_id
|
||||
}
|
||||
|
||||
fn state(&self) -> InstanceState {
|
||||
self.state_tx.borrow().clone()
|
||||
}
|
||||
|
||||
fn watch_state(&self) -> watch::Receiver<InstanceState> {
|
||||
self.state_tx.subscribe()
|
||||
}
|
||||
|
||||
async fn uptime_seconds(&self) -> u64 {
|
||||
let inner = self.inner.lock().await;
|
||||
match (&*self.state_tx.borrow(), inner.started_at) {
|
||||
(InstanceState::Running, Some(t)) => t.elapsed().as_secs(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
async fn start(self: Arc<Self>) -> Result<()> {
|
||||
if matches!(
|
||||
*self.state_tx.borrow(),
|
||||
InstanceState::Running | InstanceState::Starting
|
||||
) {
|
||||
bail!("instance '{}' is already running", self.instance_id);
|
||||
}
|
||||
self.set_state(InstanceState::Starting);
|
||||
match self.run("up", &["-d"]).await {
|
||||
Ok(()) => {
|
||||
self.inner.lock().await.started_at = Some(Instant::now());
|
||||
self.set_state(InstanceState::Running);
|
||||
tracing::info!("instance '{}' compose up -d", self.instance_id);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
self.set_state(InstanceState::Stopped);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn stop(self: Arc<Self>) -> Result<()> {
|
||||
self.set_state(InstanceState::Stopping);
|
||||
match self.run("stop", &[]).await {
|
||||
Ok(()) => {
|
||||
self.inner.lock().await.started_at = None;
|
||||
self.set_state(InstanceState::Stopped);
|
||||
tracing::info!("instance '{}' compose stop", self.instance_id);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
// Stop failed — the stack is most likely still up.
|
||||
self.set_state(InstanceState::Running);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn restart(self: Arc<Self>) -> Result<()> {
|
||||
self.set_state(InstanceState::Starting);
|
||||
match self.run("restart", &[]).await {
|
||||
Ok(()) => {
|
||||
self.inner.lock().await.started_at = Some(Instant::now());
|
||||
self.set_state(InstanceState::Running);
|
||||
tracing::info!("instance '{}' compose restart", self.instance_id);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
self.set_state(InstanceState::Stopped);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user