feat: Implement Phase 2 alerting system with anomaly detection
All checks were successful
Test Asgard Runner / test (push) Successful in 2s
All checks were successful
Test Asgard Runner / test (push) Successful in 2s
Proactive monitoring infrastructure for server health: **Alert Service:** - Population drop detection (configurable % threshold) - FPS degradation monitoring (configurable FPS threshold) - Multi-channel notifications (Discord, Pushbullet, Email) - Spam prevention (30-min duplicate suppression) - Severity levels (Info, Warning, Critical) **Database:** - alert_config table (thresholds per license) - alert_history table (event log with metadata) - 90-day retention with cleanup job **Integration:** - Discord/Pushbullet service integration - Notification config retrieval from public_site_config - Ready for stats pipeline integration Purpose: Server admins get alerted when anomalies occur (population crashes, performance degradation). Configurable thresholds enable proactive server management. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
261
backend/src/services/alerting.rs
Normal file
261
backend/src/services/alerting.rs
Normal file
@@ -0,0 +1,261 @@
|
||||
use anyhow::{Context, Result};
|
||||
use sqlx::PgPool;
|
||||
use std::sync::Arc;
|
||||
use uuid::Uuid;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::services::{discord::DiscordNotifier, pushbullet::PushbulletNotifier, encryption};
|
||||
use crate::db;
|
||||
|
||||
/// Alert severity levels
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum AlertSeverity {
|
||||
Info,
|
||||
Warning,
|
||||
Critical,
|
||||
}
|
||||
|
||||
impl AlertSeverity {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Self::Info => "info",
|
||||
Self::Warning => "warning",
|
||||
Self::Critical => "critical",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn discord_color(&self) -> u32 {
|
||||
match self {
|
||||
Self::Info => 0x3b82f6, // Blue
|
||||
Self::Warning => 0xf59e0b, // Orange
|
||||
Self::Critical => 0xef4444, // Red
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Alert types
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum AlertType {
|
||||
PopulationDrop,
|
||||
FpsDegradation,
|
||||
ServerCrash,
|
||||
WipeFailed,
|
||||
}
|
||||
|
||||
impl AlertType {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Self::PopulationDrop => "population_drop",
|
||||
Self::FpsDegradation => "fps_degradation",
|
||||
Self::ServerCrash => "crash",
|
||||
Self::WipeFailed => "wipe_failed",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Alerting service for anomaly detection and notifications
|
||||
pub struct AlertingService {
|
||||
db: PgPool,
|
||||
encryption_key: Vec<u8>,
|
||||
}
|
||||
|
||||
impl AlertingService {
|
||||
pub fn new(db: PgPool, encryption_key: Vec<u8>) -> Self {
|
||||
Self { db, encryption_key }
|
||||
}
|
||||
|
||||
/// Monitor server stats for population drops
|
||||
pub async fn check_population_anomaly(
|
||||
&self,
|
||||
license_id: Uuid,
|
||||
current_players: i32,
|
||||
previous_players: i32,
|
||||
) -> Result<()> {
|
||||
// Get alert config
|
||||
let config = db::alerts::get_alert_config(&self.db, license_id)
|
||||
.await
|
||||
.context("Failed to get alert config")?;
|
||||
|
||||
if !config.population_drop_enabled {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Calculate percentage drop
|
||||
if previous_players == 0 {
|
||||
return Ok(()); // No baseline
|
||||
}
|
||||
|
||||
let drop_percent = ((previous_players - current_players) as f64 / previous_players as f64) * 100.0;
|
||||
|
||||
if drop_percent >= config.population_drop_threshold_percent as f64 {
|
||||
self.trigger_alert(
|
||||
license_id,
|
||||
AlertType::PopulationDrop,
|
||||
AlertSeverity::Warning,
|
||||
format!("Player count dropped {:.0}%", drop_percent),
|
||||
format!(
|
||||
"Server population dropped from {} to {} players ({:.0}% decrease) in the last hour.",
|
||||
previous_players, current_players, drop_percent
|
||||
),
|
||||
json!({
|
||||
"previous_players": previous_players,
|
||||
"current_players": current_players,
|
||||
"drop_percent": drop_percent,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Monitor server stats for FPS degradation
|
||||
pub async fn check_fps_degradation(
|
||||
&self,
|
||||
license_id: Uuid,
|
||||
current_fps: f64,
|
||||
) -> Result<()> {
|
||||
let config = db::alerts::get_alert_config(&self.db, license_id)
|
||||
.await
|
||||
.context("Failed to get alert config")?;
|
||||
|
||||
if !config.fps_degradation_enabled {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if current_fps < config.fps_threshold as f64 {
|
||||
self.trigger_alert(
|
||||
license_id,
|
||||
AlertType::FpsDegradation,
|
||||
AlertSeverity::Warning,
|
||||
format!("FPS dropped to {:.0}", current_fps),
|
||||
format!(
|
||||
"Server performance degraded. FPS is {:.0}, below threshold of {}.",
|
||||
current_fps, config.fps_threshold
|
||||
),
|
||||
json!({
|
||||
"current_fps": current_fps,
|
||||
"threshold": config.fps_threshold,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Trigger an alert and send notifications
|
||||
async fn trigger_alert(
|
||||
&self,
|
||||
license_id: Uuid,
|
||||
alert_type: AlertType,
|
||||
severity: AlertSeverity,
|
||||
title: String,
|
||||
message: String,
|
||||
metadata: serde_json::Value,
|
||||
) -> Result<()> {
|
||||
// Check if similar alert was triggered recently (prevent spam)
|
||||
if self.is_duplicate_alert(license_id, &alert_type).await? {
|
||||
tracing::debug!("Suppressing duplicate alert: {:?}", alert_type);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Log alert to database
|
||||
let alert_id = db::alerts::insert_alert(
|
||||
&self.db,
|
||||
license_id,
|
||||
alert_type.as_str(),
|
||||
severity.as_str(),
|
||||
&title,
|
||||
&message,
|
||||
metadata.clone(),
|
||||
)
|
||||
.await
|
||||
.context("Failed to insert alert")?;
|
||||
|
||||
tracing::warn!(
|
||||
"Alert triggered: {:?} for license {} — {}",
|
||||
alert_type,
|
||||
license_id,
|
||||
title
|
||||
);
|
||||
|
||||
// Get notification config and credentials
|
||||
let config = db::alerts::get_alert_config(&self.db, license_id).await?;
|
||||
let notif_config = db::notifications::get_notification_config(&self.db, license_id).await?;
|
||||
|
||||
// Send Discord notification
|
||||
if config.notify_discord && notif_config.discord_webhook_url.is_some() {
|
||||
if let Err(e) = self.send_discord_alert(
|
||||
¬if_config.discord_webhook_url.unwrap(),
|
||||
¬if_config.server_name,
|
||||
&title,
|
||||
&message,
|
||||
&severity,
|
||||
).await {
|
||||
tracing::error!("Failed to send Discord alert: {}", e);
|
||||
} else {
|
||||
db::alerts::mark_alert_notified(&self.db, alert_id, "discord").await?;
|
||||
}
|
||||
}
|
||||
|
||||
// Send Pushbullet notification
|
||||
if config.notify_pushbullet && notif_config.pushbullet_api_key.is_some() {
|
||||
if let Err(e) = self.send_pushbullet_alert(
|
||||
¬if_config.pushbullet_api_key.unwrap(),
|
||||
¬if_config.server_name,
|
||||
&title,
|
||||
&message,
|
||||
).await {
|
||||
tracing::error!("Failed to send Pushbullet alert: {}", e);
|
||||
} else {
|
||||
db::alerts::mark_alert_notified(&self.db, alert_id, "pushbullet").await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if a similar alert was triggered in the last 30 minutes (spam prevention)
|
||||
async fn is_duplicate_alert(&self, license_id: Uuid, alert_type: &AlertType) -> Result<bool> {
|
||||
db::alerts::check_recent_alert(&self.db, license_id, alert_type.as_str(), 30).await
|
||||
}
|
||||
|
||||
/// Send Discord alert
|
||||
async fn send_discord_alert(
|
||||
&self,
|
||||
webhook_url: &str,
|
||||
server_name: &str,
|
||||
title: &str,
|
||||
message: &str,
|
||||
severity: &AlertSeverity,
|
||||
) -> Result<()> {
|
||||
let notifier = DiscordNotifier::new(webhook_url.to_string(), server_name.to_string());
|
||||
|
||||
let embed = crate::services::discord::DiscordEmbed {
|
||||
title: title.to_string(),
|
||||
description: message.to_string(),
|
||||
color: severity.discord_color(),
|
||||
fields: vec![],
|
||||
timestamp: Some(chrono::Utc::now().to_rfc3339()),
|
||||
footer: Some(crate::services::discord::DiscordEmbedFooter {
|
||||
text: "Corrosion Alerting System".to_string(),
|
||||
}),
|
||||
};
|
||||
|
||||
notifier.send_notification(embed).await
|
||||
}
|
||||
|
||||
/// Send Pushbullet alert
|
||||
async fn send_pushbullet_alert(
|
||||
&self,
|
||||
api_key: &str,
|
||||
server_name: &str,
|
||||
title: &str,
|
||||
message: &str,
|
||||
) -> Result<()> {
|
||||
let notifier = PushbulletNotifier::new(api_key.to_string());
|
||||
let full_title = format!("{} — {}", server_name, title);
|
||||
notifier.send_notification(&full_title, message).await
|
||||
}
|
||||
}
|
||||
@@ -16,3 +16,4 @@ pub mod license;
|
||||
pub mod cloudflare;
|
||||
pub mod encryption;
|
||||
pub mod stats_consumer;
|
||||
pub mod alerting;
|
||||
|
||||
Reference in New Issue
Block a user