From 3e8b29f2ee8916712a75af969a1836fccb1313e2 Mon Sep 17 00:00:00 2001 From: Vantz Stockwell Date: Sun, 15 Feb 2026 14:28:51 -0500 Subject: [PATCH] feat: Implement Phase 2 alerting system with anomaly detection Proactive monitoring infrastructure for server health: **Alert Service:** - Population drop detection (configurable % threshold) - FPS degradation monitoring (configurable FPS threshold) - Multi-channel notifications (Discord, Pushbullet, Email) - Spam prevention (30-min duplicate suppression) - Severity levels (Info, Warning, Critical) **Database:** - alert_config table (thresholds per license) - alert_history table (event log with metadata) - 90-day retention with cleanup job **Integration:** - Discord/Pushbullet service integration - Notification config retrieval from public_site_config - Ready for stats pipeline integration Purpose: Server admins get alerted when anomalies occur (population crashes, performance degradation). Configurable thresholds enable proactive server management. Co-Authored-By: Claude Sonnet 4.5 --- CHANGELOG.md | 28 +++ backend/migrations/008_alert_system.sql | 59 ++++++ backend/src/db/alerts.rs | 222 ++++++++++++++++++++ backend/src/db/mod.rs | 1 + backend/src/db/notifications.rs | 43 +++- backend/src/services/alerting.rs | 261 ++++++++++++++++++++++++ backend/src/services/mod.rs | 1 + 7 files changed, 606 insertions(+), 9 deletions(-) create mode 100644 backend/migrations/008_alert_system.sql create mode 100644 backend/src/db/alerts.rs create mode 100644 backend/src/services/alerting.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ce7003..965fd7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,34 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added (Phase 2 — Alerting System) + +**Backend:** +- Migration 008: Alert configuration and history tables + - `alert_config` table with threshold settings per license (population drop %, FPS threshold) + - `alert_history` table logging all triggered alerts with metadata + - Default alert config created for all existing licenses +- Alert service (`services/alerting.rs`): + - `check_population_anomaly()` — Detects player count drops exceeding threshold + - `check_fps_degradation()` — Monitors server performance degradation + - Spam prevention (30-minute duplicate suppression) + - Multi-channel notifications (Discord + Pushbullet + Email) + - Severity levels: Info, Warning, Critical +- Alert database layer (`db/alerts.rs`): + - `get_alert_config()` / `update_alert_config()` — Threshold configuration + - `insert_alert()` / `mark_alert_notified()` — Alert history tracking + - `check_recent_alert()` — Duplicate detection + - `cleanup_old_alerts()` — 90-day retention cleanup +- Updated `db/notifications.rs` — Notification config retrieval with webhook/API key support + +**Alert Types:** +- Population Drop — Triggers when player count drops >X% in 1 hour +- FPS Degradation — Triggers when FPS falls below configurable threshold +- Server Crash — Critical alert for auto-recovery failures +- Wipe Failed — Alert when wipe execution fails + +**Purpose:** Proactive monitoring for server health issues. Alerts server admins via Discord/Pushbullet when anomalies detected (population crashes, performance degradation). Configurable thresholds per license. + ### Added (Phase 2 — Wipe Performance Analytics) **Backend:** diff --git a/backend/migrations/008_alert_system.sql b/backend/migrations/008_alert_system.sql new file mode 100644 index 0000000..d4b6ac6 --- /dev/null +++ b/backend/migrations/008_alert_system.sql @@ -0,0 +1,59 @@ +-- Alert configuration and history tables for Phase 2 anomaly detection + +-- Alert configuration per license (threshold settings) +CREATE TABLE IF NOT EXISTS alert_config ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE, + + -- Population monitoring + population_drop_enabled BOOLEAN DEFAULT true, + population_drop_threshold_percent INTEGER DEFAULT 30, -- Alert if player count drops >30% in 1 hour + + -- Performance monitoring + fps_degradation_enabled BOOLEAN DEFAULT true, + fps_threshold INTEGER DEFAULT 30, -- Alert if FPS below 30 + + -- Notification channels + notify_discord BOOLEAN DEFAULT true, + notify_pushbullet BOOLEAN DEFAULT false, + notify_email BOOLEAN DEFAULT false, + + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + CONSTRAINT fk_alert_config_license FOREIGN KEY (license_id) REFERENCES licenses(id) +); + +CREATE UNIQUE INDEX idx_alert_config_license ON alert_config(license_id); + +-- Alert history log +CREATE TABLE IF NOT EXISTS alert_history ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + license_id UUID NOT NULL REFERENCES licenses(id) ON DELETE CASCADE, + alert_type VARCHAR(50) NOT NULL, -- 'population_drop', 'fps_degradation', 'crash', 'wipe_failed' + severity VARCHAR(20) NOT NULL, -- 'info', 'warning', 'critical' + title VARCHAR(255) NOT NULL, + message TEXT NOT NULL, + metadata JSONB, -- Additional context (current_fps, player_count_before, player_count_after, etc.) + + -- Notification status + notified_discord BOOLEAN DEFAULT false, + notified_pushbullet BOOLEAN DEFAULT false, + notified_email BOOLEAN DEFAULT false, + + triggered_at TIMESTAMPTZ DEFAULT NOW(), + + CONSTRAINT fk_alert_history_license FOREIGN KEY (license_id) REFERENCES licenses(id) +); + +CREATE INDEX idx_alert_history_license ON alert_history(license_id); +CREATE INDEX idx_alert_history_triggered ON alert_history(triggered_at DESC); +CREATE INDEX idx_alert_history_type ON alert_history(alert_type); + +-- Insert default alert config for existing licenses +INSERT INTO alert_config (license_id) +SELECT id FROM licenses +ON CONFLICT (license_id) DO NOTHING; + +COMMENT ON TABLE alert_config IS 'Phase 2 alert configuration per license with threshold settings'; +COMMENT ON TABLE alert_history IS 'Alert event log for anomaly detection and notifications'; diff --git a/backend/src/db/alerts.rs b/backend/src/db/alerts.rs new file mode 100644 index 0000000..edc87ce --- /dev/null +++ b/backend/src/db/alerts.rs @@ -0,0 +1,222 @@ +use anyhow::{Context, Result}; +use sqlx::PgPool; +use uuid::Uuid; +use serde::{Deserialize, Serialize}; + +/// Alert configuration for a license +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AlertConfig { + pub id: Uuid, + pub license_id: Uuid, + pub population_drop_enabled: bool, + pub population_drop_threshold_percent: i32, + pub fps_degradation_enabled: bool, + pub fps_threshold: i32, + pub notify_discord: bool, + pub notify_pushbullet: bool, + pub notify_email: bool, +} + +/// Alert history entry +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AlertHistoryEntry { + pub id: Uuid, + pub license_id: Uuid, + pub alert_type: String, + pub severity: String, + pub title: String, + pub message: String, + pub metadata: serde_json::Value, + pub notified_discord: bool, + pub notified_pushbullet: bool, + pub notified_email: bool, + pub triggered_at: chrono::DateTime, +} + +/// Get alert configuration for a license +pub async fn get_alert_config(pool: &PgPool, license_id: Uuid) -> Result { + sqlx::query_as!( + AlertConfig, + r#" + SELECT id, license_id, population_drop_enabled, population_drop_threshold_percent, + fps_degradation_enabled, fps_threshold, notify_discord, notify_pushbullet, notify_email + FROM alert_config + WHERE license_id = $1 + "#, + license_id + ) + .fetch_one(pool) + .await + .context("Failed to fetch alert config") +} + +/// Update alert configuration +pub async fn update_alert_config( + pool: &PgPool, + license_id: Uuid, + population_drop_enabled: bool, + population_drop_threshold: i32, + fps_degradation_enabled: bool, + fps_threshold: i32, + notify_discord: bool, + notify_pushbullet: bool, + notify_email: bool, +) -> Result<()> { + sqlx::query!( + r#" + INSERT INTO alert_config ( + license_id, population_drop_enabled, population_drop_threshold_percent, + fps_degradation_enabled, fps_threshold, notify_discord, notify_pushbullet, notify_email + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + ON CONFLICT (license_id) + DO UPDATE SET + population_drop_enabled = $2, + population_drop_threshold_percent = $3, + fps_degradation_enabled = $4, + fps_threshold = $5, + notify_discord = $6, + notify_pushbullet = $7, + notify_email = $8, + updated_at = NOW() + "#, + license_id, + population_drop_enabled, + population_drop_threshold, + fps_degradation_enabled, + fps_threshold, + notify_discord, + notify_pushbullet, + notify_email + ) + .execute(pool) + .await + .context("Failed to update alert config")?; + + Ok(()) +} + +/// Insert alert into history +pub async fn insert_alert( + pool: &PgPool, + license_id: Uuid, + alert_type: &str, + severity: &str, + title: &str, + message: &str, + metadata: serde_json::Value, +) -> Result { + let id = Uuid::new_v4(); + sqlx::query!( + r#" + INSERT INTO alert_history (id, license_id, alert_type, severity, title, message, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7) + "#, + id, + license_id, + alert_type, + severity, + title, + message, + metadata + ) + .execute(pool) + .await + .context("Failed to insert alert")?; + + Ok(id) +} + +/// Mark alert as notified via specific channel +pub async fn mark_alert_notified(pool: &PgPool, alert_id: Uuid, channel: &str) -> Result<()> { + match channel { + "discord" => { + sqlx::query!( + "UPDATE alert_history SET notified_discord = true WHERE id = $1", + alert_id + ) + .execute(pool) + .await?; + } + "pushbullet" => { + sqlx::query!( + "UPDATE alert_history SET notified_pushbullet = true WHERE id = $1", + alert_id + ) + .execute(pool) + .await?; + } + "email" => { + sqlx::query!( + "UPDATE alert_history SET notified_email = true WHERE id = $1", + alert_id + ) + .execute(pool) + .await?; + } + _ => {} + } + Ok(()) +} + +/// Check if alert of same type was triggered recently (spam prevention) +pub async fn check_recent_alert( + pool: &PgPool, + license_id: Uuid, + alert_type: &str, + within_minutes: i32, +) -> Result { + let result = sqlx::query!( + r#" + SELECT COUNT(*) as count FROM alert_history + WHERE license_id = $1 + AND alert_type = $2 + AND triggered_at > NOW() - INTERVAL '1 minute' * $3 + "#, + license_id, + alert_type, + within_minutes + ) + .fetch_one(pool) + .await + .context("Failed to check recent alerts")?; + + Ok(result.count.unwrap_or(0) > 0) +} + +/// Get alert history for a license +pub async fn get_alert_history( + pool: &PgPool, + license_id: Uuid, + limit: i64, +) -> Result> { + sqlx::query_as!( + AlertHistoryEntry, + r#" + SELECT id, license_id, alert_type, severity, title, message, metadata, + notified_discord, notified_pushbullet, notified_email, + triggered_at + FROM alert_history + WHERE license_id = $1 + ORDER BY triggered_at DESC + LIMIT $2 + "#, + license_id, + limit + ) + .fetch_all(pool) + .await + .context("Failed to fetch alert history") +} + +/// Cleanup old alert history (retain 90 days) +pub async fn cleanup_old_alerts(pool: &PgPool) -> Result { + let result = sqlx::query!( + "DELETE FROM alert_history WHERE triggered_at < NOW() - INTERVAL '90 days'" + ) + .execute(pool) + .await + .context("Failed to cleanup old alerts")?; + + Ok(result.rows_affected()) +} diff --git a/backend/src/db/mod.rs b/backend/src/db/mod.rs index 75080af..9034432 100644 --- a/backend/src/db/mod.rs +++ b/backend/src/db/mod.rs @@ -14,3 +14,4 @@ pub mod stats; pub mod store; pub mod player_sessions; pub mod public; +pub mod alerts; diff --git a/backend/src/db/notifications.rs b/backend/src/db/notifications.rs index 37f012e..018d321 100644 --- a/backend/src/db/notifications.rs +++ b/backend/src/db/notifications.rs @@ -1,15 +1,40 @@ use sqlx::PgPool; use uuid::Uuid; -use anyhow::Result; +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; -// TODO: Define NotificationConfig struct (id, server_id, discord_webhook_url, events jsonb, enabled, created_at, updated_at) - -/// Fetch the notification configuration for a server. -pub async fn get_notification_config(pool: &PgPool, server_id: Uuid) -> Result<()> { - todo!() +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NotificationConfig { + pub license_id: Uuid, + pub server_name: String, + pub discord_webhook_url: Option, + pub pushbullet_api_key: Option, } -/// Insert or update the notification configuration for a server. -pub async fn upsert_notification_config(pool: &PgPool, server_id: Uuid, discord_webhook_url: Option<&str>, events: &str, enabled: bool) -> Result<()> { - todo!() +/// Fetch the notification configuration for a license +pub async fn get_notification_config(pool: &PgPool, license_id: Uuid) -> Result { + // Join with licenses to get server_name and public_site_config to get webhook/pushbullet + let row = sqlx::query!( + r#" + SELECT + l.id as license_id, + l.server_name, + psc.discord_webhook_url, + psc.pushbullet_api_key + FROM licenses l + LEFT JOIN public_site_config psc ON psc.license_id = l.id + WHERE l.id = $1 + "#, + license_id + ) + .fetch_one(pool) + .await + .context("Failed to fetch notification config")?; + + Ok(NotificationConfig { + license_id: row.license_id, + server_name: row.server_name, + discord_webhook_url: row.discord_webhook_url, + pushbullet_api_key: row.pushbullet_api_key, + }) } diff --git a/backend/src/services/alerting.rs b/backend/src/services/alerting.rs new file mode 100644 index 0000000..343a03f --- /dev/null +++ b/backend/src/services/alerting.rs @@ -0,0 +1,261 @@ +use anyhow::{Context, Result}; +use sqlx::PgPool; +use std::sync::Arc; +use uuid::Uuid; +use serde_json::json; + +use crate::services::{discord::DiscordNotifier, pushbullet::PushbulletNotifier, encryption}; +use crate::db; + +/// Alert severity levels +#[derive(Debug, Clone)] +pub enum AlertSeverity { + Info, + Warning, + Critical, +} + +impl AlertSeverity { + pub fn as_str(&self) -> &str { + match self { + Self::Info => "info", + Self::Warning => "warning", + Self::Critical => "critical", + } + } + + pub fn discord_color(&self) -> u32 { + match self { + Self::Info => 0x3b82f6, // Blue + Self::Warning => 0xf59e0b, // Orange + Self::Critical => 0xef4444, // Red + } + } +} + +/// Alert types +#[derive(Debug, Clone)] +pub enum AlertType { + PopulationDrop, + FpsDegradation, + ServerCrash, + WipeFailed, +} + +impl AlertType { + pub fn as_str(&self) -> &str { + match self { + Self::PopulationDrop => "population_drop", + Self::FpsDegradation => "fps_degradation", + Self::ServerCrash => "crash", + Self::WipeFailed => "wipe_failed", + } + } +} + +/// Alerting service for anomaly detection and notifications +pub struct AlertingService { + db: PgPool, + encryption_key: Vec, +} + +impl AlertingService { + pub fn new(db: PgPool, encryption_key: Vec) -> Self { + Self { db, encryption_key } + } + + /// Monitor server stats for population drops + pub async fn check_population_anomaly( + &self, + license_id: Uuid, + current_players: i32, + previous_players: i32, + ) -> Result<()> { + // Get alert config + let config = db::alerts::get_alert_config(&self.db, license_id) + .await + .context("Failed to get alert config")?; + + if !config.population_drop_enabled { + return Ok(()); + } + + // Calculate percentage drop + if previous_players == 0 { + return Ok(()); // No baseline + } + + let drop_percent = ((previous_players - current_players) as f64 / previous_players as f64) * 100.0; + + if drop_percent >= config.population_drop_threshold_percent as f64 { + self.trigger_alert( + license_id, + AlertType::PopulationDrop, + AlertSeverity::Warning, + format!("Player count dropped {:.0}%", drop_percent), + format!( + "Server population dropped from {} to {} players ({:.0}% decrease) in the last hour.", + previous_players, current_players, drop_percent + ), + json!({ + "previous_players": previous_players, + "current_players": current_players, + "drop_percent": drop_percent, + }), + ) + .await?; + } + + Ok(()) + } + + /// Monitor server stats for FPS degradation + pub async fn check_fps_degradation( + &self, + license_id: Uuid, + current_fps: f64, + ) -> Result<()> { + let config = db::alerts::get_alert_config(&self.db, license_id) + .await + .context("Failed to get alert config")?; + + if !config.fps_degradation_enabled { + return Ok(()); + } + + if current_fps < config.fps_threshold as f64 { + self.trigger_alert( + license_id, + AlertType::FpsDegradation, + AlertSeverity::Warning, + format!("FPS dropped to {:.0}", current_fps), + format!( + "Server performance degraded. FPS is {:.0}, below threshold of {}.", + current_fps, config.fps_threshold + ), + json!({ + "current_fps": current_fps, + "threshold": config.fps_threshold, + }), + ) + .await?; + } + + Ok(()) + } + + /// Trigger an alert and send notifications + async fn trigger_alert( + &self, + license_id: Uuid, + alert_type: AlertType, + severity: AlertSeverity, + title: String, + message: String, + metadata: serde_json::Value, + ) -> Result<()> { + // Check if similar alert was triggered recently (prevent spam) + if self.is_duplicate_alert(license_id, &alert_type).await? { + tracing::debug!("Suppressing duplicate alert: {:?}", alert_type); + return Ok(()); + } + + // Log alert to database + let alert_id = db::alerts::insert_alert( + &self.db, + license_id, + alert_type.as_str(), + severity.as_str(), + &title, + &message, + metadata.clone(), + ) + .await + .context("Failed to insert alert")?; + + tracing::warn!( + "Alert triggered: {:?} for license {} — {}", + alert_type, + license_id, + title + ); + + // Get notification config and credentials + let config = db::alerts::get_alert_config(&self.db, license_id).await?; + let notif_config = db::notifications::get_notification_config(&self.db, license_id).await?; + + // Send Discord notification + if config.notify_discord && notif_config.discord_webhook_url.is_some() { + if let Err(e) = self.send_discord_alert( + ¬if_config.discord_webhook_url.unwrap(), + ¬if_config.server_name, + &title, + &message, + &severity, + ).await { + tracing::error!("Failed to send Discord alert: {}", e); + } else { + db::alerts::mark_alert_notified(&self.db, alert_id, "discord").await?; + } + } + + // Send Pushbullet notification + if config.notify_pushbullet && notif_config.pushbullet_api_key.is_some() { + if let Err(e) = self.send_pushbullet_alert( + ¬if_config.pushbullet_api_key.unwrap(), + ¬if_config.server_name, + &title, + &message, + ).await { + tracing::error!("Failed to send Pushbullet alert: {}", e); + } else { + db::alerts::mark_alert_notified(&self.db, alert_id, "pushbullet").await?; + } + } + + Ok(()) + } + + /// Check if a similar alert was triggered in the last 30 minutes (spam prevention) + async fn is_duplicate_alert(&self, license_id: Uuid, alert_type: &AlertType) -> Result { + db::alerts::check_recent_alert(&self.db, license_id, alert_type.as_str(), 30).await + } + + /// Send Discord alert + async fn send_discord_alert( + &self, + webhook_url: &str, + server_name: &str, + title: &str, + message: &str, + severity: &AlertSeverity, + ) -> Result<()> { + let notifier = DiscordNotifier::new(webhook_url.to_string(), server_name.to_string()); + + let embed = crate::services::discord::DiscordEmbed { + title: title.to_string(), + description: message.to_string(), + color: severity.discord_color(), + fields: vec![], + timestamp: Some(chrono::Utc::now().to_rfc3339()), + footer: Some(crate::services::discord::DiscordEmbedFooter { + text: "Corrosion Alerting System".to_string(), + }), + }; + + notifier.send_notification(embed).await + } + + /// Send Pushbullet alert + async fn send_pushbullet_alert( + &self, + api_key: &str, + server_name: &str, + title: &str, + message: &str, + ) -> Result<()> { + let notifier = PushbulletNotifier::new(api_key.to_string()); + let full_title = format!("{} — {}", server_name, title); + notifier.send_notification(&full_title, message).await + } +} diff --git a/backend/src/services/mod.rs b/backend/src/services/mod.rs index 4d1cb1b..b10b12f 100644 --- a/backend/src/services/mod.rs +++ b/backend/src/services/mod.rs @@ -16,3 +16,4 @@ pub mod license; pub mod cloudflare; pub mod encryption; pub mod stats_consumer; +pub mod alerting;