Files
corrosion-admin-panel/backend/src/services/scheduler.rs
Vantz Stockwell 75d08aeee4 feat: Phase 2 data aggregation pipeline (Strike 4A)
Backend:
- Stats ingestion consumer subscribing to corrosion.*.stats NATS subject
- Hourly aggregation scheduler (runs :05 past every hour)
- Daily cleanup job (03:00 UTC) with 7-day raw / 90-day hourly retention
- Analytics API (summary, timeseries, CSV export)
- Complete stats DB queries with aggregation and cleanup

Frontend:
- Analytics dashboard with ECharts integration
- Player count and server performance charts
- Time range selector (24h/7d/30d)
- CSV export functionality
- Real-time data loading

Infrastructure:
- Exposed NatsBridge.jetstream for consumer access
- Background service initialization in main.rs

Data flow: Plugin → NATS → Consumer → DB → Aggregation → API → Charts

Unblocks Strike 4B (dashboards) and 4C (alerting).

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-15 12:53:25 -05:00

341 lines
11 KiB
Rust

use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio_cron_scheduler::{Job, JobScheduler};
use uuid::Uuid;
use super::nats_bridge::NatsBridge;
/// Cron-based wipe schedule management.
///
/// Manages wipe schedules backed by `tokio-cron-scheduler`. Each schedule
/// maps a cron expression + timezone to a wipe profile. When a schedule
/// fires, it creates a wipe_history record and dispatches execution to
/// the WipeEngine.
pub struct SchedulerService {
db: sqlx::PgPool,
nats: Arc<NatsBridge>,
scheduler: JobScheduler,
/// Maps schedule_id to job_id for removal
job_handles: Arc<Mutex<HashMap<Uuid, Uuid>>>,
}
impl SchedulerService {
pub async fn new(db: sqlx::PgPool, nats: Arc<NatsBridge>) -> Result<Self> {
let scheduler = JobScheduler::new()
.await
.context("Failed to create JobScheduler")?;
Ok(Self {
db,
nats,
scheduler,
job_handles: Arc::new(Mutex::new(HashMap::new())),
})
}
/// Start the scheduler, loading all active schedules from the database.
pub async fn start(&self) -> Result<()> {
// Query all active wipe_schedules from DB
let schedules: Vec<(Uuid,)> = sqlx::query_as(
"SELECT id FROM wipe_schedules WHERE enabled = true",
)
.fetch_all(&self.db)
.await
.context("Failed to query active wipe schedules")?;
tracing::info!("Loading {} active wipe schedules", schedules.len());
// Register each schedule as a cron job
for (schedule_id,) in schedules {
if let Err(e) = self.register_wipe_schedule(schedule_id).await {
tracing::error!("Failed to register schedule {}: {}", schedule_id, e);
// Continue loading other schedules
}
}
// Start the scheduler event loop
self.scheduler
.start()
.await
.context("Failed to start scheduler")?;
tracing::info!("Scheduler started successfully");
Ok(())
}
/// Register a wipe schedule as a cron job.
pub async fn register_wipe_schedule(&self, schedule_id: Uuid) -> Result<()> {
// Load WipeSchedule from DB
let schedule: Option<(String, String, Uuid, Uuid)> = sqlx::query_as(
"SELECT cron_expression, timezone, wipe_profile_id, license_id
FROM wipe_schedules
WHERE id = $1 AND enabled = true",
)
.bind(schedule_id)
.fetch_optional(&self.db)
.await
.context("Failed to query wipe schedule")?;
if schedule.is_none() {
anyhow::bail!("Schedule not found or disabled: {}", schedule_id);
}
let (cron_expression, timezone, wipe_profile_id, license_id) = schedule.unwrap();
// Clone references for closure
let db = self.db.clone();
let nats = self.nats.clone();
let schedule_id_clone = schedule_id;
// Create tokio-cron-scheduler job
let job = Job::new_async(cron_expression.as_str(), move |_uuid, _l| {
let db = db.clone();
let nats = nats.clone();
let wipe_profile_id = wipe_profile_id;
let license_id = license_id;
let schedule_id = schedule_id_clone;
Box::pin(async move {
tracing::info!(
"Scheduled wipe triggered: schedule {} (profile {})",
schedule_id,
wipe_profile_id
);
// Create wipe_history record
let wipe_history_id = Uuid::new_v4();
let result = sqlx::query(
"INSERT INTO wipe_history (id, license_id, wipe_profile_id, wipe_schedule_id, status, created_at)
VALUES ($1, $2, $3, $4, 'pending', NOW())",
)
.bind(wipe_history_id)
.bind(license_id)
.bind(wipe_profile_id)
.bind(schedule_id)
.execute(&db)
.await;
if let Err(e) = result {
tracing::error!("Failed to create wipe_history record: {}", e);
return;
}
// Publish wipe execution event to NATS
let payload = serde_json::json!({
"wipe_history_id": wipe_history_id,
"license_id": license_id,
"wipe_profile_id": wipe_profile_id,
"schedule_id": schedule_id,
"triggered_by": "scheduler",
"timestamp": Utc::now().to_rfc3339(),
});
if let Err(e) = nats
.publish_jetstream(
&format!("corrosion.wipes.{}.execute", license_id),
payload.to_string().as_bytes(),
)
.await
{
tracing::error!(
"Failed to publish wipe execution event for {}: {}",
wipe_history_id,
e
);
}
tracing::info!(
"Wipe execution dispatched: {} (schedule: {})",
wipe_history_id,
schedule_id
);
})
})
.context("Failed to create cron job")?;
// Add job to scheduler
let job_id = self
.scheduler
.add(job)
.await
.context("Failed to add job to scheduler")?;
// Store job handle for later removal
self.job_handles.lock().await.insert(schedule_id, job_id);
tracing::info!(
"Registered wipe schedule: {} (cron: {}, tz: {})",
schedule_id,
cron_expression,
timezone
);
Ok(())
}
/// Remove a schedule from the running scheduler.
pub async fn remove_schedule(&self, schedule_id: Uuid) -> Result<()> {
// Look up job handle by schedule_id
let job_id = {
let mut handles = self.job_handles.lock().await;
handles.remove(&schedule_id)
};
if let Some(job_id) = job_id {
// Remove from tokio-cron-scheduler
self.scheduler
.remove(&job_id)
.await
.context("Failed to remove job from scheduler")?;
tracing::info!("Removed wipe schedule: {}", schedule_id);
} else {
tracing::warn!("Schedule not found in scheduler: {}", schedule_id);
}
Ok(())
}
/// Calculate the next N scheduled run times for a given schedule.
pub async fn get_next_runs(
&self,
schedule_id: Uuid,
count: usize,
) -> Result<Vec<DateTime<Utc>>> {
// Load cron_expression and timezone from DB
let schedule: Option<(String, String)> = sqlx::query_as(
"SELECT cron_expression, timezone FROM wipe_schedules WHERE id = $1",
)
.bind(schedule_id)
.fetch_optional(&self.db)
.await
.context("Failed to query wipe schedule")?;
if schedule.is_none() {
anyhow::bail!("Schedule not found: {}", schedule_id);
}
let (cron_expression, _timezone) = schedule.unwrap();
// Parse cron expression using cron library
use cron::Schedule;
use std::str::FromStr;
let schedule = Schedule::from_str(&cron_expression)
.with_context(|| format!("Invalid cron expression: {}", cron_expression))?;
// Compute next N fire times
let now = Utc::now();
let next_times: Vec<DateTime<Utc>> = schedule
.upcoming(Utc)
.take(count)
.collect();
tracing::debug!(
"Calculated {} next run times for schedule {}",
next_times.len(),
schedule_id
);
Ok(next_times)
}
/// Register hourly stats aggregation job (runs at :05 past every hour).
pub async fn register_stats_aggregation(&self) -> Result<()> {
let db = self.db.clone();
let job = Job::new_async("0 5 * * * *", move |_uuid, _l| {
let db = db.clone();
Box::pin(async move {
tracing::info!("Running hourly stats aggregation");
// Get all active licenses
let licenses: Vec<(Uuid,)> = match sqlx::query_as(
"SELECT id FROM licenses WHERE status = 'active'",
)
.fetch_all(&db)
.await
{
Ok(l) => l,
Err(e) => {
tracing::error!("Failed to query active licenses: {}", e);
return;
}
};
tracing::info!("Aggregating stats for {} licenses", licenses.len());
for (license_id,) in licenses {
if let Err(e) = crate::db::stats::aggregate_hourly_stats(&db, license_id).await
{
tracing::error!(
"Failed to aggregate stats for license {}: {}",
license_id,
e
);
}
}
tracing::info!("Hourly stats aggregation complete");
})
})
.context("Failed to create stats aggregation job")?;
self.scheduler
.add(job)
.await
.context("Failed to add stats aggregation job to scheduler")?;
tracing::info!("Registered hourly stats aggregation job (cron: 0 5 * * * *)");
Ok(())
}
/// Register daily stats cleanup job (runs at 03:00 UTC).
pub async fn register_stats_cleanup(&self) -> Result<()> {
let db = self.db.clone();
let job = Job::new_async("0 0 3 * * *", move |_uuid, _l| {
let db = db.clone();
Box::pin(async move {
tracing::info!("Running daily stats cleanup");
// Delete raw stats older than 7 days
match crate::db::stats::cleanup_old_stats(&db, 7).await {
Ok(deleted) => {
tracing::info!("Deleted {} old raw stats records", deleted);
}
Err(e) => {
tracing::error!("Failed to cleanup old raw stats: {}", e);
}
}
// Delete hourly stats older than 90 days
match crate::db::stats::cleanup_old_hourly_stats(&db, 90).await {
Ok(deleted) => {
tracing::info!("Deleted {} old hourly stats records", deleted);
}
Err(e) => {
tracing::error!("Failed to cleanup old hourly stats: {}", e);
}
}
tracing::info!("Daily stats cleanup complete");
})
})
.context("Failed to create stats cleanup job")?;
self.scheduler
.add(job)
.await
.context("Failed to add stats cleanup job to scheduler")?;
tracing::info!("Registered daily stats cleanup job (cron: 0 0 3 * * *)");
Ok(())
}
}