feat: Phase 2 data aggregation pipeline (Strike 4A)

Backend:
- Stats ingestion consumer subscribing to corrosion.*.stats NATS subject
- Hourly aggregation scheduler (runs :05 past every hour)
- Daily cleanup job (03:00 UTC) with 7-day raw / 90-day hourly retention
- Analytics API (summary, timeseries, CSV export)
- Complete stats DB queries with aggregation and cleanup

Frontend:
- Analytics dashboard with ECharts integration
- Player count and server performance charts
- Time range selector (24h/7d/30d)
- CSV export functionality
- Real-time data loading

Infrastructure:
- Exposed NatsBridge.jetstream for consumer access
- Background service initialization in main.rs

Data flow: Plugin → NATS → Consumer → DB → Aggregation → API → Charts

Unblocks Strike 4B (dashboards) and 4C (alerting).

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Vantz Stockwell
2026-02-15 12:53:25 -05:00
parent 81eeb3b451
commit 75d08aeee4
11 changed files with 1130 additions and 73 deletions

View File

@@ -1,26 +1,222 @@
use sqlx::PgPool;
use uuid::Uuid;
use anyhow::Result;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
// TODO: Define ServerStats struct (id, server_id, player_count, fps, memory_usage, entities, timestamp)
// TODO: Define HourlyStats struct (id, server_id, hour, avg_players, avg_fps, avg_memory, peak_players)
/// Raw stats snapshot (for DB persistence).
#[derive(Debug, Clone, sqlx::FromRow, Serialize)]
pub struct ServerStatsRow {
pub id: Uuid,
pub license_id: Uuid,
pub player_count: i32,
pub max_players: i32,
pub fps: f64,
pub entity_count: i32,
pub uptime_seconds: i32,
pub memory_usage_mb: i32,
pub recorded_at: DateTime<Utc>,
}
/// Hourly aggregated stats.
#[derive(Debug, Clone, sqlx::FromRow, Serialize, Deserialize)]
pub struct HourlyStats {
pub id: Uuid,
pub license_id: Uuid,
pub hour: DateTime<Utc>,
pub avg_players: f64,
pub max_players: i32,
pub avg_fps: f64,
pub min_fps: f64,
pub avg_entities: i32,
pub uptime_percentage: f64,
}
/// Analytics summary metrics.
#[derive(Debug, Clone, Serialize)]
pub struct AnalyticsSummary {
pub peak_players: i32,
pub avg_players: f64,
pub uptime_percentage: f64,
pub unique_players: Option<i64>, // For Phase 2.2
}
/// Insert a raw stats snapshot from the game server.
pub async fn insert_server_stats(pool: &PgPool, server_id: Uuid, player_count: i32, fps: f64, memory_usage: i64, entities: i32) -> Result<Uuid> {
todo!()
pub async fn insert_server_stats(
pool: &PgPool,
license_id: Uuid,
player_count: i32,
max_players: i32,
fps: f64,
entity_count: i32,
uptime_seconds: i32,
memory_usage_mb: i32,
) -> Result<Uuid> {
let id = Uuid::new_v4();
sqlx::query(
"INSERT INTO server_stats
(id, license_id, player_count, max_players, fps, entity_count, uptime_seconds, memory_usage_mb, recorded_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, NOW())",
)
.bind(id)
.bind(license_id)
.bind(player_count)
.bind(max_players)
.bind(fps)
.bind(entity_count)
.bind(uptime_seconds)
.bind(memory_usage_mb)
.execute(pool)
.await
.context("Failed to insert server stats")?;
Ok(id)
}
/// Get the most recent stats snapshots for a server.
pub async fn get_recent_stats(pool: &PgPool, server_id: Uuid, limit: i64) -> Result<()> {
todo!()
pub async fn get_recent_stats(
pool: &PgPool,
license_id: Uuid,
limit: i64,
) -> Result<Vec<ServerStatsRow>> {
let stats = sqlx::query_as::<_, ServerStatsRow>(
"SELECT id, license_id, player_count, max_players, fps, entity_count, uptime_seconds, memory_usage_mb, recorded_at
FROM server_stats
WHERE license_id = $1
ORDER BY recorded_at DESC
LIMIT $2",
)
.bind(license_id)
.bind(limit)
.fetch_all(pool)
.await
.context("Failed to query recent stats")?;
Ok(stats)
}
/// Get hourly aggregated stats for charting.
pub async fn get_hourly_stats(pool: &PgPool, server_id: Uuid, hours: i64) -> Result<()> {
todo!()
pub async fn get_hourly_stats(
pool: &PgPool,
license_id: Uuid,
hours: i64,
) -> Result<Vec<HourlyStats>> {
let stats = sqlx::query_as::<_, HourlyStats>(
"SELECT id, license_id, hour, avg_players, max_players, avg_fps, min_fps, avg_entities, uptime_percentage
FROM server_stats_hourly
WHERE license_id = $1 AND hour >= NOW() - ($2 || ' hours')::INTERVAL
ORDER BY hour ASC",
)
.bind(license_id)
.bind(hours)
.fetch_all(pool)
.await
.context("Failed to query hourly stats")?;
Ok(stats)
}
/// Get analytics summary for a time range.
pub async fn get_analytics_summary(
pool: &PgPool,
license_id: Uuid,
hours: i64,
) -> Result<AnalyticsSummary> {
let result: Option<(Option<i32>, Option<f64>, Option<i64>)> = sqlx::query_as(
"SELECT
MAX(player_count) as peak_players,
AVG(player_count) as avg_players,
COUNT(*) as sample_count
FROM server_stats
WHERE license_id = $1 AND recorded_at >= NOW() - ($2 || ' hours')::INTERVAL",
)
.bind(license_id)
.bind(hours)
.fetch_optional(pool)
.await
.context("Failed to query analytics summary")?;
let (peak_players, avg_players, sample_count) = result.unwrap_or((None, None, None));
// Calculate uptime percentage (assuming stats every 60s, any gap >90s = downtime)
let uptime_percentage = if let Some(count) = sample_count {
let expected_samples = (hours * 60) as i64; // 1 sample per minute
if expected_samples > 0 {
((count as f64 / expected_samples as f64) * 100.0).min(100.0)
} else {
0.0
}
} else {
0.0
};
Ok(AnalyticsSummary {
peak_players: peak_players.unwrap_or(0),
avg_players: avg_players.unwrap_or(0.0),
uptime_percentage,
unique_players: None, // Phase 2.2
})
}
/// Roll up raw stats into hourly aggregates (called by a scheduled job).
pub async fn aggregate_hourly_stats(pool: &PgPool, server_id: Uuid) -> Result<()> {
todo!()
/// Aggregates the previous full hour (e.g., if called at 14:05, aggregates 13:00-13:59).
pub async fn aggregate_hourly_stats(pool: &PgPool, license_id: Uuid) -> Result<()> {
sqlx::query(
"INSERT INTO server_stats_hourly (id, license_id, hour, avg_players, max_players, avg_fps, min_fps, avg_entities, uptime_percentage)
SELECT
uuid_generate_v4(),
license_id,
DATE_TRUNC('hour', recorded_at) as hour,
AVG(player_count) as avg_players,
MAX(player_count) as max_players,
AVG(fps) as avg_fps,
MIN(fps) as min_fps,
AVG(entity_count) as avg_entities,
100.0 as uptime_percentage
FROM server_stats
WHERE license_id = $1
AND recorded_at >= DATE_TRUNC('hour', NOW() - INTERVAL '1 hour')
AND recorded_at < DATE_TRUNC('hour', NOW())
GROUP BY license_id, DATE_TRUNC('hour', recorded_at)
ON CONFLICT (license_id, hour) DO UPDATE SET
avg_players = EXCLUDED.avg_players,
max_players = EXCLUDED.max_players,
avg_fps = EXCLUDED.avg_fps,
min_fps = EXCLUDED.min_fps,
avg_entities = EXCLUDED.avg_entities,
uptime_percentage = EXCLUDED.uptime_percentage",
)
.bind(license_id)
.execute(pool)
.await
.context("Failed to aggregate hourly stats")?;
Ok(())
}
/// Delete raw stats older than the retention period (7 days).
pub async fn cleanup_old_stats(pool: &PgPool, retention_days: i64) -> Result<u64> {
let result = sqlx::query(
"DELETE FROM server_stats WHERE recorded_at < NOW() - ($1 || ' days')::INTERVAL",
)
.bind(retention_days)
.execute(pool)
.await
.context("Failed to delete old stats")?;
Ok(result.rows_affected())
}
/// Delete hourly stats older than the retention period (90 days).
pub async fn cleanup_old_hourly_stats(pool: &PgPool, retention_days: i64) -> Result<u64> {
let result = sqlx::query(
"DELETE FROM server_stats_hourly WHERE hour < NOW() - ($1 || ' days')::INTERVAL",
)
.bind(retention_days)
.execute(pool)
.await
.context("Failed to delete old hourly stats")?;
Ok(result.rows_affected())
}