diff --git a/harmony/src/modules/monitoring/config.rs b/harmony/src/modules/monitoring/config.rs index 7a073bc..1849c76 100644 --- a/harmony/src/modules/monitoring/config.rs +++ b/harmony/src/modules/monitoring/config.rs @@ -1,8 +1,7 @@ -use email_address::EmailAddress; use serde::Serialize; -use url::Url; -use super::monitoring_alerting::WebhookServiceType; +use super::monitoring_alerting::AlertChannel; + #[derive(Debug, Clone, Serialize)] pub struct KubePrometheusConfig { @@ -23,9 +22,7 @@ pub struct KubePrometheusConfig { pub kube_proxy: bool, pub kube_state_metrics: bool, pub prometheus_operator: bool, - pub webhook_url: Option, - pub webhook_service_type: Option, - pub discord_alert_manager_release_name: String, + pub alert_channel: Option, } impl KubePrometheusConfig { pub fn new() -> Self { @@ -34,8 +31,7 @@ impl KubePrometheusConfig { default_rules: true, windows_monitoring: false, alert_manager: true, - webhook_service_type: None, - webhook_url: None, + alert_channel: None, grafana: true, node_exporter: false, prometheus: true, @@ -49,7 +45,6 @@ impl KubePrometheusConfig { prometheus_operator: true, core_dns: false, kube_scheduler: false, - discord_alert_manager_release_name: "discord-alert-manager".into(), } } } diff --git a/harmony/src/modules/monitoring/discord_alert_manager.rs b/harmony/src/modules/monitoring/discord_alert_manager.rs index 868b1b7..dfafbfe 100644 --- a/harmony/src/modules/monitoring/discord_alert_manager.rs +++ b/harmony/src/modules/monitoring/discord_alert_manager.rs @@ -4,13 +4,14 @@ use non_blank_string_rs::NonBlankString; use crate::modules::helm::chart::HelmChartScore; -use super::config::KubePrometheusConfig; +use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel}; pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore { - let url = if let Some(url) = &config.webhook_url { - url.to_string() - } else { - "None".to_string() + let (url, release_name) = match &config.alert_channel { + Some(AlertChannel::Discord { webhook_url, name }) => { + (webhook_url.to_string(), name.to_string()) + } + _ => panic!("Expected Discord alert channel configuration"), }; let values = format!( @@ -24,7 +25,7 @@ environment: HelmChartScore { namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), - release_name: NonBlankString::from_str(&config.discord_alert_manager_release_name).unwrap(), + release_name: NonBlankString::from_str(&release_name).unwrap(), chart_name: NonBlankString::from_str("oci://hub.nationtech.io/nt/alertmanager-discord") .unwrap(), chart_version: None, diff --git a/harmony/src/modules/monitoring/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs index ed7916e..404184a 100644 --- a/harmony/src/modules/monitoring/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -1,5 +1,7 @@ -use super::{config::KubePrometheusConfig, monitoring_alerting::WebhookServiceType}; +use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel}; +use log::info; use non_blank_string_rs::NonBlankString; +use url::Url; use std::{collections::HashMap, str::FromStr}; use crate::modules::helm::chart::HelmChartScore; @@ -10,14 +12,6 @@ pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmCh let default_rules = config.default_rules.to_string(); let windows_monitoring = config.windows_monitoring.to_string(); let alert_manager = config.alert_manager.to_string(); - let webhook_service_type = if let Some(service) = &config.webhook_service_type { - match service { - WebhookServiceType::Discord => "Discord".to_string(), - WebhookServiceType::Slack => "Slack".to_string(), - } - } else { - "None".to_string() - }; let grafana = config.grafana.to_string(); let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string(); let kubernetes_api_server = config.kubernetes_api_server.to_string(); @@ -31,8 +25,7 @@ pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmCh let node_exporter = config.node_exporter.to_string(); let prometheus_operator = config.prometheus_operator.to_string(); let prometheus = config.prometheus.to_string(); - let discord_alert_manager_release_name = config.discord_alert_manager_release_name.to_string(); - let values = format!( + let mut values = format!( r#" additionalPrometheusRulesMap: pods-status-alerts: @@ -45,23 +38,23 @@ additionalPrometheusRulesMap: labels: severity: critical annotations: - title: "[CRIT] POD not healthy : {{ $labels.pod }}" + title: "[CRIT] POD not healthy : {{{{ $labels.pod }}}}" description: | A POD is in a non-ready state! - - **Pod**: {{ $labels.pod }} - - **Namespace**: {{ $labels.namespace }} + - **Pod**: {{{{ $labels.pod }}}} + - **Namespace**: {{{{ $labels.namespace }}}} - alert: "[CRIT] POD crash looping" expr: increase(kube_pod_container_status_restarts_total[5m]) > 3 for: 0m labels: severity: critical annotations: - title: "[CRIT] POD crash looping : {{ $labels.pod }}" + title: "[CRIT] POD crash looping : {{{{ $labels.pod }}}}" description: | A POD is drowning in a crash loop! - - **Pod**: {{ $labels.pod }} - - **Namespace**: {{ $labels.namespace }} - - **Instance**: {{ $labels.instance }} + - **Pod**: {{{{ $labels.pod }}}} + - **Namespace**: {{{{ $labels.namespace }}}} + - **Instance**: {{{{ $labels.instance }}}} pvc-alerts: groups: - name: pvc-alerts @@ -82,8 +75,8 @@ additionalPrometheusRulesMap: labels: severity: warning annotations: - description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days. - title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days + description: The PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} is predicted to fill over 95% in less than 2 days. + title: PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} will fill over 95% in less than 2 days defaultRules: create: {default_rules} rules: @@ -131,7 +124,6 @@ alertmanager: group_wait: 30s group_interval: 5m repeat_interval: 12h - receiver: '{webhook_service_type}' routes: - receiver: 'null' matchers: @@ -139,10 +131,6 @@ alertmanager: continue: false receivers: - name: 'null' - - name: '{webhook_service_type}' - webhook_configs: - - url: 'http://{discord_alert_manager_release_name}-alertmanager-discord:9094' - send_resolved: true grafana: enabled: {grafana} kubernetesServiceMonitors: @@ -172,6 +160,16 @@ prometheus: "#, ); + if let Some(alert_channel) = &config.alert_channel { + match alert_channel { + AlertChannel::Discord { name, .. } => { + values.push_str(&discord_alert_builder(name).to_string());} + AlertChannel::Slack { slack_channel, webhook_url } => { + values.push_str(&slack_alert_builder(slack_channel, webhook_url).to_string());} + AlertChannel::Smpt { .. } => todo!(), + } + }; + HelmChartScore { namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), release_name: NonBlankString::from_str("kube-prometheus").unwrap(), @@ -187,3 +185,44 @@ prometheus: repository: None, } } + +fn discord_alert_builder(release_name: &String) -> String { + let discord_alert_manager_release_name = release_name; + let discord_alert_values = format!( + r#" +alertmanager: + config: + route: + receiver: 'Discord' + receivers: + - name: 'null' + - name: 'Discord' + webhook_configs: + - url: 'http://{discord_alert_manager_release_name}-alertmanager-discord:9094' + send_resolved: true + "#, + ); + discord_alert_values +} + +fn slack_alert_builder(slack_channel: &String, webhook_url: &Url) -> String { + let slack_alert_values = format!( + r#" +alertmanager: + config: + global: + slack_api_url: {webhook_url} + route: + receiver: 'Slack' + receivers: + - name: 'null' + - name: 'Slack' + slack_configs: + - channel: '{slack_channel}' + send_resolved: true + title: '{{{{ .CommonAnnotations.title }}}}' + text: '{{{{ .CommonAnnotations.description }}}}' + "#, + ); + slack_alert_values +} diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index 6bb6e83..21bcc20 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -20,9 +20,13 @@ use super::{ #[derive(Debug, Clone, Serialize)] pub enum AlertChannel { - WebHookUrl { - url: Url, - webhook_service_type: WebhookServiceType, + Discord { + name: String, + webhook_url: Url, + }, + Slack { + slack_channel: String, + webhook_url: Url, }, //TODO test and implement in helm chart //currently does not work @@ -32,13 +36,6 @@ pub enum AlertChannel { }, } -#[derive(Debug, Clone, Serialize)] -pub enum WebhookServiceType { - Discord, - //TODO test slack notifications - Slack, -} - #[derive(Debug, Clone, Serialize)] pub struct MonitoringAlertingStackScore { pub alert_channel: Option, @@ -52,26 +49,6 @@ impl MonitoringAlertingStackScore { namespace: None, } } - fn set_alert_channel(&self, config: &mut KubePrometheusConfig) { - if let Some(alert_channel) = &self.alert_channel { - match alert_channel { - AlertChannel::WebHookUrl { - url, - webhook_service_type, - } => { - config.webhook_url = Some(url.clone()); - config.webhook_service_type = Some(webhook_service_type.clone()); - } - AlertChannel::Smpt { - //TODO setup smpt alerts - email_address, - service_name, - } => { - todo!() - } - } - } - } } impl Score for MonitoringAlertingStackScore { @@ -93,10 +70,10 @@ struct MonitoringAlertingStackInterpret { impl MonitoringAlertingStackInterpret { async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig { let mut config = KubePrometheusConfig::new(); - self.score.set_alert_channel(&mut config); if let Some(ns) = &self.score.namespace { config.namespace = ns.clone(); } + config.alert_channel = self.score.alert_channel.clone(); config } @@ -120,23 +97,20 @@ impl MonitoringAlertingStackInterpret { config: &KubePrometheusConfig, ) -> Result { match &self.score.alert_channel { - Some(AlertChannel::WebHookUrl { - webhook_service_type, - .. - }) => match webhook_service_type { - WebhookServiceType::Discord => { + Some(channel) => match channel { + AlertChannel::Discord { .. } => { discord_alert_manager_score(config) .create_interpret() .execute(inventory, topology) .await } - WebhookServiceType::Slack => Ok(Outcome::success( + AlertChannel::Slack { .. } => Ok(Outcome::success( "No extra configs for slack alerting".to_string(), )), + AlertChannel::Smpt { .. } => { + todo!() + } }, - Some(AlertChannel::Smpt { .. }) => { - todo!() - } None => Ok(Outcome::success("No alert channel configured".to_string())), } } @@ -155,7 +129,8 @@ impl Interpret for MonitoringAlertingStackInterpre self.deploy_kube_prometheus_helm_chart_score(inventory, topology, &config) .await?; info!("Installing alert channel service"); - self.deploy_alert_channel_service(inventory, topology, &config).await?; + self.deploy_alert_channel_service(inventory, topology, &config) + .await?; Ok(Outcome::success(format!( "succesfully deployed monitoring and alerting stack" )))