Introduce a Supervisor trait (async-trait) so the agent manages games with different models behind one wire contract. ProcessSupervisor (spawned process: rust/conan/soulmask) and the new DockerComposeSupervisor (dune) both impl it; Agent.supervisors is now HashMap<String, Arc<dyn Supervisor>> and instancecmd dispatch is game-agnostic — start/stop/restart/status identical across games, selected by a per-game factory in main. InstanceState moved to the shared supervisor module. DockerComposeSupervisor drives docker-compose up-d / stop / restart against the instance's compose project, with -f/-p/single-service support and a configurable compose binary. New [instance.docker_compose] config block. First cut = lifecycle + cached state; container crash-detection + restart adoption deferred to Phase 3b (reconcilable with a compose ps probe). Trait choice (dyn over enum) per Commander: scales to future planes (kubectl, AMP/podman, SSH) as new struct+impl, no central match. 56 tests green (6 new docker-compose mock-binary tests + 5 refactored process tests), zero warnings. Live verification pending a real Dune stack. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
217 lines
7.6 KiB
Rust
217 lines
7.6 KiB
Rust
//! Docker-compose instance supervision — the Dune: Awakening adapter.
|
|
//!
|
|
//! Dune does not ship as a SteamCMD-updated process like Rust/Conan/Soulmask;
|
|
//! it runs as Docker container(s) (game server + RabbitMQ broker + Postgres),
|
|
//! orchestrated as a compose stack (a "battlegroup"). So Dune lifecycle is
|
|
//! `docker compose up -d / stop / restart` against the instance's compose
|
|
//! project, not a spawned OS process. This supervisor implements the same
|
|
//! [`Supervisor`] trait `ProcessSupervisor` does, so the instance command
|
|
//! dispatch is identical — only the management model differs.
|
|
//!
|
|
//! Scope (first cut): lifecycle + cached state. Two parity items are deferred
|
|
//! to Phase 3b alongside process PID adoption: (1) crash detection (containers
|
|
//! give us no child handle — a `docker compose ps` poll loop would supply it);
|
|
//! (2) state adoption on agent restart (a running stack reports `stopped` until
|
|
//! the next lifecycle command). Both are reconcilable with a `ps` probe.
|
|
//!
|
|
//! Reference: docs/reference-repos/icehunter SETUP_DOCKER.md (the docker
|
|
//! control plane this mirrors).
|
|
|
|
use std::path::PathBuf;
|
|
use std::process::Stdio;
|
|
use std::sync::Arc;
|
|
use std::time::Instant;
|
|
|
|
use anyhow::{bail, Context, Result};
|
|
use serde::Deserialize;
|
|
use tokio::process::Command;
|
|
use tokio::sync::{watch, Mutex};
|
|
|
|
use crate::config::InstanceConfig;
|
|
use crate::supervisor::{InstanceState, Supervisor};
|
|
|
|
/// Per-instance docker-compose settings (`[instance.docker_compose]`). All
|
|
/// fields optional — defaults cover the common "one compose file in the
|
|
/// instance root" case.
|
|
#[derive(Debug, Clone, Default, Deserialize)]
|
|
#[serde(deny_unknown_fields)]
|
|
pub struct DockerComposeConfig {
|
|
/// Compose file (`-f`). Relative paths resolve against the run dir. Default:
|
|
/// compose's own discovery (docker-compose.yml in the run dir).
|
|
#[serde(default)]
|
|
pub file: Option<PathBuf>,
|
|
/// Compose project name (`-p`). Default: the instance id.
|
|
#[serde(default)]
|
|
pub project: Option<String>,
|
|
/// Limit lifecycle ops to one service. Default: every service in the file.
|
|
#[serde(default)]
|
|
pub service: Option<String>,
|
|
/// Override the compose binary invocation. Default: `["docker","compose"]`.
|
|
/// Use `["docker-compose"]` for the legacy standalone binary.
|
|
#[serde(default)]
|
|
pub command: Option<Vec<String>>,
|
|
}
|
|
|
|
struct Inner {
|
|
started_at: Option<Instant>,
|
|
}
|
|
|
|
pub struct DockerComposeSupervisor {
|
|
instance_id: String,
|
|
/// Directory the compose commands run in (relative `-f`/file paths resolve
|
|
/// against it).
|
|
run_dir: PathBuf,
|
|
compose_file: Option<PathBuf>,
|
|
project: String,
|
|
service: Option<String>,
|
|
/// Compose binary + leading args, e.g. `["docker","compose"]`.
|
|
command: Vec<String>,
|
|
inner: Mutex<Inner>,
|
|
state_tx: watch::Sender<InstanceState>,
|
|
}
|
|
|
|
impl DockerComposeSupervisor {
|
|
pub fn new(cfg: &InstanceConfig) -> Arc<Self> {
|
|
let dc = cfg.docker_compose.clone().unwrap_or_default();
|
|
let run_dir = cfg
|
|
.working_dir
|
|
.clone()
|
|
.unwrap_or_else(|| cfg.root.clone());
|
|
let command = dc
|
|
.command
|
|
.filter(|c| !c.is_empty())
|
|
.unwrap_or_else(|| vec!["docker".to_string(), "compose".to_string()]);
|
|
let (state_tx, _) = watch::channel(InstanceState::Stopped);
|
|
Arc::new(Self {
|
|
instance_id: cfg.id.clone(),
|
|
run_dir,
|
|
compose_file: dc.file,
|
|
project: dc.project.unwrap_or_else(|| cfg.id.clone()),
|
|
service: dc.service,
|
|
command,
|
|
inner: Mutex::new(Inner { started_at: None }),
|
|
state_tx,
|
|
})
|
|
}
|
|
|
|
fn set_state(&self, state: InstanceState) {
|
|
let _ = self.state_tx.send_replace(state);
|
|
}
|
|
|
|
/// Run one compose subcommand (`up`/`stop`/`restart`/...), bailing with the
|
|
/// captured stderr on non-zero exit. Global flags (`-f`, `-p`) precede the
|
|
/// subcommand; the optional single service is appended last.
|
|
async fn run(&self, action: &str, action_args: &[&str]) -> Result<()> {
|
|
let mut cmd = Command::new(&self.command[0]);
|
|
cmd.args(&self.command[1..]);
|
|
if let Some(file) = &self.compose_file {
|
|
cmd.arg("-f").arg(file);
|
|
}
|
|
cmd.arg("-p").arg(&self.project);
|
|
cmd.arg(action);
|
|
cmd.args(action_args);
|
|
if let Some(service) = &self.service {
|
|
cmd.arg(service);
|
|
}
|
|
cmd.current_dir(&self.run_dir)
|
|
.stdin(Stdio::null())
|
|
.stdout(Stdio::piped())
|
|
.stderr(Stdio::piped());
|
|
|
|
let output = cmd
|
|
.output()
|
|
.await
|
|
.with_context(|| format!("running `{} {action}` (is docker installed and on PATH?)", self.command.join(" ")))?;
|
|
|
|
if !output.status.success() {
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
let detail = if !stderr.trim().is_empty() {
|
|
stderr.trim()
|
|
} else {
|
|
stdout.trim()
|
|
};
|
|
bail!("compose {action} failed ({}): {detail}", output.status);
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[async_trait::async_trait]
|
|
impl Supervisor for DockerComposeSupervisor {
|
|
fn instance_id(&self) -> &str {
|
|
&self.instance_id
|
|
}
|
|
|
|
fn state(&self) -> InstanceState {
|
|
self.state_tx.borrow().clone()
|
|
}
|
|
|
|
fn watch_state(&self) -> watch::Receiver<InstanceState> {
|
|
self.state_tx.subscribe()
|
|
}
|
|
|
|
async fn uptime_seconds(&self) -> u64 {
|
|
let inner = self.inner.lock().await;
|
|
match (&*self.state_tx.borrow(), inner.started_at) {
|
|
(InstanceState::Running, Some(t)) => t.elapsed().as_secs(),
|
|
_ => 0,
|
|
}
|
|
}
|
|
|
|
async fn start(self: Arc<Self>) -> Result<()> {
|
|
if matches!(
|
|
*self.state_tx.borrow(),
|
|
InstanceState::Running | InstanceState::Starting
|
|
) {
|
|
bail!("instance '{}' is already running", self.instance_id);
|
|
}
|
|
self.set_state(InstanceState::Starting);
|
|
match self.run("up", &["-d"]).await {
|
|
Ok(()) => {
|
|
self.inner.lock().await.started_at = Some(Instant::now());
|
|
self.set_state(InstanceState::Running);
|
|
tracing::info!("instance '{}' compose up -d", self.instance_id);
|
|
Ok(())
|
|
}
|
|
Err(e) => {
|
|
self.set_state(InstanceState::Stopped);
|
|
Err(e)
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn stop(self: Arc<Self>) -> Result<()> {
|
|
self.set_state(InstanceState::Stopping);
|
|
match self.run("stop", &[]).await {
|
|
Ok(()) => {
|
|
self.inner.lock().await.started_at = None;
|
|
self.set_state(InstanceState::Stopped);
|
|
tracing::info!("instance '{}' compose stop", self.instance_id);
|
|
Ok(())
|
|
}
|
|
Err(e) => {
|
|
// Stop failed — the stack is most likely still up.
|
|
self.set_state(InstanceState::Running);
|
|
Err(e)
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn restart(self: Arc<Self>) -> Result<()> {
|
|
self.set_state(InstanceState::Starting);
|
|
match self.run("restart", &[]).await {
|
|
Ok(()) => {
|
|
self.inner.lock().await.started_at = Some(Instant::now());
|
|
self.set_state(InstanceState::Running);
|
|
tracing::info!("instance '{}' compose restart", self.instance_id);
|
|
Ok(())
|
|
}
|
|
Err(e) => {
|
|
self.set_state(InstanceState::Stopped);
|
|
Err(e)
|
|
}
|
|
}
|
|
}
|
|
}
|