From e1a8ee1c157d29fdb32957b6f1365a85e3108dad Mon Sep 17 00:00:00 2001 From: Willem Date: Thu, 22 May 2025 14:16:41 -0400 Subject: [PATCH] feat: send alerts to multiple alert channels --- harmony/src/modules/monitoring/config.rs | 4 +- .../monitoring/discord_alert_manager.rs | 32 ++-- .../src/modules/monitoring/kube_prometheus.rs | 138 +++++++++++------- .../modules/monitoring/monitoring_alerting.rs | 19 ++- 4 files changed, 121 insertions(+), 72 deletions(-) diff --git a/harmony/src/modules/monitoring/config.rs b/harmony/src/modules/monitoring/config.rs index 1849c76..c06377c 100644 --- a/harmony/src/modules/monitoring/config.rs +++ b/harmony/src/modules/monitoring/config.rs @@ -22,7 +22,7 @@ pub struct KubePrometheusConfig { pub kube_proxy: bool, pub kube_state_metrics: bool, pub prometheus_operator: bool, - pub alert_channel: Option, + pub alert_channel: Vec, } impl KubePrometheusConfig { pub fn new() -> Self { @@ -31,7 +31,7 @@ impl KubePrometheusConfig { default_rules: true, windows_monitoring: false, alert_manager: true, - alert_channel: None, + alert_channel: Vec::new(), grafana: true, node_exporter: false, prometheus: true, diff --git a/harmony/src/modules/monitoring/discord_alert_manager.rs b/harmony/src/modules/monitoring/discord_alert_manager.rs index dfafbfe..5eaffa0 100644 --- a/harmony/src/modules/monitoring/discord_alert_manager.rs +++ b/harmony/src/modules/monitoring/discord_alert_manager.rs @@ -6,27 +6,27 @@ use crate::modules::helm::chart::HelmChartScore; use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel}; -pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore { - let (url, release_name) = match &config.alert_channel { - Some(AlertChannel::Discord { webhook_url, name }) => { - (webhook_url.to_string(), name.to_string()) +fn get_discord_alert_manager_score(config: &KubePrometheusConfig) -> Option { + let (url, name) = config.alert_channel.iter().find_map(|channel| { + if let AlertChannel::Discord { webhook_url, name } = channel { + Some((webhook_url, name)) + } else { + None } - _ => panic!("Expected Discord alert channel configuration"), - }; + })?; let values = format!( - r#" - + r#" environment: - name: "DISCORD_WEBHOOK" value: "{url}" - "#, +"#, ); - HelmChartScore { + Some(HelmChartScore { namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), - release_name: NonBlankString::from_str(&release_name).unwrap(), - chart_name: NonBlankString::from_str("oci://hub.nationtech.io/nt/alertmanager-discord") + release_name: NonBlankString::from_str(&name).unwrap(), + chart_name: NonBlankString::from_str("oci://hub.nationtech.io/library/alertmanager-discord") .unwrap(), chart_version: None, values_overrides: None, @@ -34,5 +34,13 @@ environment: create_namespace: true, install_only: true, repository: None, + }) +} + +pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore { + if let Some(chart) = get_discord_alert_manager_score(config) { + chart + } else { + panic!("Expected discord alert manager helm chart"); } } diff --git a/harmony/src/modules/monitoring/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs index 404184a..b694f51 100644 --- a/harmony/src/modules/monitoring/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -1,8 +1,8 @@ use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel}; use log::info; use non_blank_string_rs::NonBlankString; -use url::Url; use std::{collections::HashMap, str::FromStr}; +use url::Url; use crate::modules::helm::chart::HelmChartScore; @@ -116,21 +116,6 @@ defaultRules: windows: true windowsMonitoring: enabled: {windows_monitoring} -alertmanager: - enabled: {alert_manager} - config: - route: - group_by: ['job'] - group_wait: 30s - group_interval: 5m - repeat_interval: 12h - routes: - - receiver: 'null' - matchers: - - alertname="Watchdog" - continue: false - receivers: - - name: 'null' grafana: enabled: {grafana} kubernetesServiceMonitors: @@ -160,15 +145,65 @@ prometheus: "#, ); - if let Some(alert_channel) = &config.alert_channel { - match alert_channel { - AlertChannel::Discord { name, .. } => { - values.push_str(&discord_alert_builder(name).to_string());} - AlertChannel::Slack { slack_channel, webhook_url } => { - values.push_str(&slack_alert_builder(slack_channel, webhook_url).to_string());} - AlertChannel::Smpt { .. } => todo!(), + let alertmanager_config = alert_manager_yaml_builder(&config); + values.push_str(&alertmanager_config); + + fn alert_manager_yaml_builder(config: &KubePrometheusConfig) -> String { + let mut receivers = String::new(); + let mut routes = String::new(); + let mut global_configs = String::new(); + let alert_manager = config.alert_manager; + for alert_channel in &config.alert_channel { + match alert_channel { + AlertChannel::Discord { name, .. } => { + let (receiver, route) = discord_alert_builder(name); + info!("discord receiver: {} \nroute: {}", receiver, route); + receivers.push_str(&receiver); + routes.push_str(&route); + } + AlertChannel::Slack { + slack_channel, + webhook_url, + } => { + let (receiver, route) = slack_alert_builder(slack_channel); + info!("slack receiver: {} \nroute: {}", receiver, route); + receivers.push_str(&receiver); + + routes.push_str(&route); + let global_config = format!( + r#" + global: + slack_api_url: {webhook_url}"# + ); + + global_configs.push_str(&global_config); + } + AlertChannel::Smpt { .. } => todo!(), + } } - }; + info!("after alert receiver: {}", receivers); + info!("after alert routes: {}", routes); + + let alertmanager_config = format!( + r#" +alertmanager: + enabled: {alert_manager} + config: {global_configs} + route: + group_by: ['job'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + routes: +{routes} + receivers: + - name: 'null' +{receivers}"# + ); + + info!("alert manager config: {}", alertmanager_config); + alertmanager_config + } HelmChartScore { namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), @@ -186,43 +221,42 @@ prometheus: } } -fn discord_alert_builder(release_name: &String) -> String { - let discord_alert_manager_release_name = release_name; - let discord_alert_values = format!( +fn discord_alert_builder(release_name: &String) -> (String, String) { + let discord_receiver_name = format!("Discord-{}", release_name); + let receiver = format!( r#" -alertmanager: - config: - route: - receiver: 'Discord' - receivers: - - name: 'null' - - name: 'Discord' + - name: '{discord_receiver_name}' webhook_configs: - - url: 'http://{discord_alert_manager_release_name}-alertmanager-discord:9094' - send_resolved: true - "#, + - url: 'http://{release_name}-alertmanager-discord:9094' + send_resolved: true"#, ); - discord_alert_values + let route = format!( + r#" + - receiver: '{discord_receiver_name}' + matchers: + - alertname!=Watchdog + continue: true"#, + ); + (receiver, route) } -fn slack_alert_builder(slack_channel: &String, webhook_url: &Url) -> String { - let slack_alert_values = format!( +fn slack_alert_builder(slack_channel: &String) -> (String, String) { + let slack_receiver_name = format!("Slack-{}", slack_channel); + let receiver = format!( r#" -alertmanager: - config: - global: - slack_api_url: {webhook_url} - route: - receiver: 'Slack' - receivers: - - name: 'null' - - name: 'Slack' + - name: '{slack_receiver_name}' slack_configs: - channel: '{slack_channel}' send_resolved: true title: '{{{{ .CommonAnnotations.title }}}}' - text: '{{{{ .CommonAnnotations.description }}}}' - "#, + text: '{{{{ .CommonAnnotations.description }}}}'"#, ); - slack_alert_values + let route = format!( + r#" + - receiver: '{slack_receiver_name}' + matchers: + - alertname!=Watchdog + continue: true"#, + ); + (receiver, route) } diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index 21bcc20..0e7c2d4 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -38,14 +38,14 @@ pub enum AlertChannel { #[derive(Debug, Clone, Serialize)] pub struct MonitoringAlertingStackScore { - pub alert_channel: Option, + pub alert_channel: Vec, pub namespace: Option, } impl MonitoringAlertingStackScore { pub fn new() -> Self { Self { - alert_channel: None, + alert_channel: Vec::new(), namespace: None, } } @@ -96,8 +96,10 @@ impl MonitoringAlertingStackInterpret { topology: &T, config: &KubePrometheusConfig, ) -> Result { - match &self.score.alert_channel { - Some(channel) => match channel { + let mut outcomes = vec![]; + + for channel in &self.score.alert_channel { + let outcome = match channel { AlertChannel::Discord { .. } => { discord_alert_manager_score(config) .create_interpret() @@ -110,9 +112,14 @@ impl MonitoringAlertingStackInterpret { AlertChannel::Smpt { .. } => { todo!() } - }, - None => Ok(Outcome::success("No alert channel configured".to_string())), + }; + outcomes.push(outcome); } + for result in outcomes { + result?; + } + + Ok(Outcome::success("All alert channels deployed".to_string())) } }