//! Docker-compose instance supervision — the Dune: Awakening adapter. //! //! Dune does not ship as a SteamCMD-updated process like Rust/Conan/Soulmask; //! it runs as Docker container(s) (game server + RabbitMQ broker + Postgres), //! orchestrated as a compose stack (a "battlegroup"). So Dune lifecycle is //! `docker compose up -d / stop / restart` against the instance's compose //! project, not a spawned OS process. This supervisor implements the same //! [`Supervisor`] trait `ProcessSupervisor` does, so the instance command //! dispatch is identical — only the management model differs. //! //! Scope (first cut): lifecycle + cached state. Two parity items are deferred //! to Phase 3b alongside process PID adoption: (1) crash detection (containers //! give us no child handle — a `docker compose ps` poll loop would supply it); //! (2) state adoption on agent restart (a running stack reports `stopped` until //! the next lifecycle command). Both are reconcilable with a `ps` probe. //! //! Reference: docs/reference-repos/icehunter SETUP_DOCKER.md (the docker //! control plane this mirrors). use std::path::PathBuf; use std::process::Stdio; use std::sync::Arc; use std::time::Instant; use anyhow::{bail, Context, Result}; use serde::Deserialize; use tokio::process::Command; use tokio::sync::{watch, Mutex}; use crate::config::InstanceConfig; use crate::supervisor::{InstanceState, Supervisor}; /// Per-instance docker-compose settings (`[instance.docker_compose]`). All /// fields optional — defaults cover the common "one compose file in the /// instance root" case. #[derive(Debug, Clone, Default, Deserialize)] #[serde(deny_unknown_fields)] pub struct DockerComposeConfig { /// Compose file (`-f`). Relative paths resolve against the run dir. Default: /// compose's own discovery (docker-compose.yml in the run dir). #[serde(default)] pub file: Option, /// Compose project name (`-p`). Default: the instance id. #[serde(default)] pub project: Option, /// Limit lifecycle ops to one service. Default: every service in the file. #[serde(default)] pub service: Option, /// Override the compose binary invocation. Default: `["docker","compose"]`. /// Use `["docker-compose"]` for the legacy standalone binary. #[serde(default)] pub command: Option>, } struct Inner { started_at: Option, } pub struct DockerComposeSupervisor { instance_id: String, /// Directory the compose commands run in (relative `-f`/file paths resolve /// against it). run_dir: PathBuf, compose_file: Option, project: String, service: Option, /// Compose binary + leading args, e.g. `["docker","compose"]`. command: Vec, inner: Mutex, state_tx: watch::Sender, } impl DockerComposeSupervisor { pub fn new(cfg: &InstanceConfig) -> Arc { let dc = cfg.docker_compose.clone().unwrap_or_default(); let run_dir = cfg .working_dir .clone() .unwrap_or_else(|| cfg.root.clone()); let command = dc .command .filter(|c| !c.is_empty()) .unwrap_or_else(|| vec!["docker".to_string(), "compose".to_string()]); let (state_tx, _) = watch::channel(InstanceState::Stopped); Arc::new(Self { instance_id: cfg.id.clone(), run_dir, compose_file: dc.file, project: dc.project.unwrap_or_else(|| cfg.id.clone()), service: dc.service, command, inner: Mutex::new(Inner { started_at: None }), state_tx, }) } fn set_state(&self, state: InstanceState) { let _ = self.state_tx.send_replace(state); } /// Run one compose subcommand (`up`/`stop`/`restart`/...), bailing with the /// captured stderr on non-zero exit. Global flags (`-f`, `-p`) precede the /// subcommand; the optional single service is appended last. async fn run(&self, action: &str, action_args: &[&str]) -> Result<()> { let mut cmd = Command::new(&self.command[0]); cmd.args(&self.command[1..]); if let Some(file) = &self.compose_file { cmd.arg("-f").arg(file); } cmd.arg("-p").arg(&self.project); cmd.arg(action); cmd.args(action_args); if let Some(service) = &self.service { cmd.arg(service); } cmd.current_dir(&self.run_dir) .stdin(Stdio::null()) .stdout(Stdio::piped()) .stderr(Stdio::piped()); let output = cmd .output() .await .with_context(|| format!("running `{} {action}` (is docker installed and on PATH?)", self.command.join(" ")))?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); let stdout = String::from_utf8_lossy(&output.stdout); let detail = if !stderr.trim().is_empty() { stderr.trim() } else { stdout.trim() }; bail!("compose {action} failed ({}): {detail}", output.status); } Ok(()) } } #[async_trait::async_trait] impl Supervisor for DockerComposeSupervisor { fn instance_id(&self) -> &str { &self.instance_id } fn state(&self) -> InstanceState { self.state_tx.borrow().clone() } fn watch_state(&self) -> watch::Receiver { self.state_tx.subscribe() } async fn uptime_seconds(&self) -> u64 { let inner = self.inner.lock().await; match (&*self.state_tx.borrow(), inner.started_at) { (InstanceState::Running, Some(t)) => t.elapsed().as_secs(), _ => 0, } } async fn start(self: Arc) -> Result<()> { if matches!( *self.state_tx.borrow(), InstanceState::Running | InstanceState::Starting ) { bail!("instance '{}' is already running", self.instance_id); } self.set_state(InstanceState::Starting); match self.run("up", &["-d"]).await { Ok(()) => { self.inner.lock().await.started_at = Some(Instant::now()); self.set_state(InstanceState::Running); tracing::info!("instance '{}' compose up -d", self.instance_id); Ok(()) } Err(e) => { self.set_state(InstanceState::Stopped); Err(e) } } } async fn stop(self: Arc) -> Result<()> { self.set_state(InstanceState::Stopping); match self.run("stop", &[]).await { Ok(()) => { self.inner.lock().await.started_at = None; self.set_state(InstanceState::Stopped); tracing::info!("instance '{}' compose stop", self.instance_id); Ok(()) } Err(e) => { // Stop failed — the stack is most likely still up. self.set_state(InstanceState::Running); Err(e) } } } async fn restart(self: Arc) -> Result<()> { self.set_state(InstanceState::Starting); match self.run("restart", &[]).await { Ok(()) => { self.inner.lock().await.started_at = Some(Instant::now()); self.set_state(InstanceState::Running); tracing::info!("instance '{}' compose restart", self.instance_id); Ok(()) } Err(e) => { self.set_state(InstanceState::Stopped); Err(e) } } } }