Merge pull request 'feat:added Slack notifications support' (#38) from feat/slack-notifs into master
Reviewed-on: https://git.nationtech.io/NationTech/harmony/pulls/38 Reviewed-by: johnride <jg@nationtech.io>
This commit is contained in:
		
						commit
						9c51040f3b
					
				| @ -1,8 +1,7 @@ | ||||
| use email_address::EmailAddress; | ||||
| use serde::Serialize; | ||||
| use url::Url; | ||||
| 
 | ||||
| use super::monitoring_alerting::WebhookServiceType; | ||||
| use super::monitoring_alerting::AlertChannel; | ||||
| 
 | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct KubePrometheusConfig { | ||||
| @ -23,9 +22,7 @@ pub struct KubePrometheusConfig { | ||||
|     pub kube_proxy: bool, | ||||
|     pub kube_state_metrics: bool, | ||||
|     pub prometheus_operator: bool, | ||||
|     pub webhook_url: Option<Url>, | ||||
|     pub webhook_service_type: Option<WebhookServiceType>, | ||||
|     pub discord_alert_manager_release_name: String, | ||||
|     pub alert_channel: Vec<AlertChannel>, | ||||
| } | ||||
| impl KubePrometheusConfig { | ||||
|     pub fn new() -> Self { | ||||
| @ -34,8 +31,7 @@ impl KubePrometheusConfig { | ||||
|             default_rules: true, | ||||
|             windows_monitoring: false, | ||||
|             alert_manager: true, | ||||
|             webhook_service_type: None, | ||||
|             webhook_url: None, | ||||
|             alert_channel: Vec::new(), | ||||
|             grafana: true, | ||||
|             node_exporter: false, | ||||
|             prometheus: true, | ||||
| @ -49,7 +45,6 @@ impl KubePrometheusConfig { | ||||
|             prometheus_operator: true, | ||||
|             core_dns: false, | ||||
|             kube_scheduler: false, | ||||
|             discord_alert_manager_release_name: "discord-alert-manager".into(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -4,28 +4,29 @@ use non_blank_string_rs::NonBlankString; | ||||
| 
 | ||||
| use crate::modules::helm::chart::HelmChartScore; | ||||
| 
 | ||||
| use super::config::KubePrometheusConfig; | ||||
| use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel}; | ||||
| 
 | ||||
| pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore { | ||||
|     let url = if let Some(url) = &config.webhook_url { | ||||
|         url.to_string() | ||||
|     } else { | ||||
|         "None".to_string() | ||||
|     }; | ||||
| fn get_discord_alert_manager_score(config: &KubePrometheusConfig) -> Option<HelmChartScore> { | ||||
|     let (url, name) = config.alert_channel.iter().find_map(|channel| { | ||||
|         if let AlertChannel::Discord { webhook_url, name } = channel { | ||||
|             Some((webhook_url, name)) | ||||
|         } else { | ||||
|             None | ||||
|         } | ||||
|     })?; | ||||
| 
 | ||||
|     let values = format!( | ||||
|         r#"    
 | ||||
| 
 | ||||
|         r#" | ||||
| environment: | ||||
|   - name: "DISCORD_WEBHOOK" | ||||
|     value: "{url}" | ||||
|     "#,
 | ||||
| "#,
 | ||||
|     ); | ||||
| 
 | ||||
|     HelmChartScore { | ||||
|     Some(HelmChartScore { | ||||
|         namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), | ||||
|         release_name: NonBlankString::from_str(&config.discord_alert_manager_release_name).unwrap(), | ||||
|         chart_name: NonBlankString::from_str("oci://hub.nationtech.io/nt/alertmanager-discord") | ||||
|         release_name: NonBlankString::from_str(&name).unwrap(), | ||||
|         chart_name: NonBlankString::from_str("oci://hub.nationtech.io/library/alertmanager-discord") | ||||
|             .unwrap(), | ||||
|         chart_version: None, | ||||
|         values_overrides: None, | ||||
| @ -33,5 +34,13 @@ environment: | ||||
|         create_namespace: true, | ||||
|         install_only: true, | ||||
|         repository: None, | ||||
|     }) | ||||
| } | ||||
| 
 | ||||
| pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore { | ||||
|     if let Some(chart) = get_discord_alert_manager_score(config) { | ||||
|         chart | ||||
|     } else { | ||||
|         panic!("Expected discord alert manager helm chart"); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -1,6 +1,8 @@ | ||||
| use super::{config::KubePrometheusConfig, monitoring_alerting::WebhookServiceType}; | ||||
| use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel}; | ||||
| use log::info; | ||||
| use non_blank_string_rs::NonBlankString; | ||||
| use std::{collections::HashMap, str::FromStr}; | ||||
| use url::Url; | ||||
| 
 | ||||
| use crate::modules::helm::chart::HelmChartScore; | ||||
| 
 | ||||
| @ -10,14 +12,6 @@ pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmCh | ||||
|     let default_rules = config.default_rules.to_string(); | ||||
|     let windows_monitoring = config.windows_monitoring.to_string(); | ||||
|     let alert_manager = config.alert_manager.to_string(); | ||||
|     let webhook_service_type = if let Some(service) = &config.webhook_service_type { | ||||
|         match service { | ||||
|             WebhookServiceType::Discord  => "Discord".to_string(), | ||||
|             WebhookServiceType::Slack  => "Slack".to_string(), | ||||
|         } | ||||
|     } else { | ||||
|         "None".to_string() | ||||
|     }; | ||||
|     let grafana = config.grafana.to_string(); | ||||
|     let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string(); | ||||
|     let kubernetes_api_server = config.kubernetes_api_server.to_string(); | ||||
| @ -31,8 +25,7 @@ pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmCh | ||||
|     let node_exporter = config.node_exporter.to_string(); | ||||
|     let prometheus_operator = config.prometheus_operator.to_string(); | ||||
|     let prometheus = config.prometheus.to_string(); | ||||
|     let discord_alert_manager_release_name = config.discord_alert_manager_release_name.to_string(); | ||||
|     let values = format!( | ||||
|     let mut values = format!( | ||||
|         r#" | ||||
| additionalPrometheusRulesMap: | ||||
|   pods-status-alerts: | ||||
| @ -45,23 +38,23 @@ additionalPrometheusRulesMap: | ||||
|             labels: | ||||
|               severity: critical | ||||
|             annotations: | ||||
|               title: "[CRIT] POD not healthy : {{ $labels.pod }}" | ||||
|               title: "[CRIT] POD not healthy : {{{{ $labels.pod }}}}" | ||||
|               description: | | ||||
|                A POD is in a non-ready state! | ||||
|                - **Pod**: {{ $labels.pod }} | ||||
|                - **Namespace**: {{ $labels.namespace }} | ||||
|                - **Pod**: {{{{ $labels.pod }}}} | ||||
|                - **Namespace**: {{{{ $labels.namespace }}}} | ||||
|           - alert: "[CRIT] POD crash looping" | ||||
|             expr: increase(kube_pod_container_status_restarts_total[5m]) > 3 | ||||
|             for: 0m | ||||
|             labels: | ||||
|               severity: critical | ||||
|             annotations: | ||||
|               title: "[CRIT] POD crash looping : {{ $labels.pod }}" | ||||
|               title: "[CRIT] POD crash looping : {{{{ $labels.pod }}}}" | ||||
|               description: | | ||||
|                A POD is drowning in a crash loop! | ||||
|                - **Pod**: {{ $labels.pod }} | ||||
|                - **Namespace**: {{ $labels.namespace }} | ||||
|                - **Instance**: {{ $labels.instance }} | ||||
|                - **Pod**: {{{{ $labels.pod }}}} | ||||
|                - **Namespace**: {{{{ $labels.namespace }}}} | ||||
|                - **Instance**: {{{{ $labels.instance }}}} | ||||
|   pvc-alerts: | ||||
|     groups: | ||||
|       - name: pvc-alerts | ||||
| @ -82,8 +75,8 @@ additionalPrometheusRulesMap: | ||||
|             labels: | ||||
|               severity: warning | ||||
|             annotations: | ||||
|               description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days. | ||||
|               title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days | ||||
|               description: The PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} is predicted to fill over 95% in less than 2 days. | ||||
|               title: PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} will fill over 95% in less than 2 days | ||||
| defaultRules: | ||||
|   create: {default_rules} | ||||
|   rules: | ||||
| @ -123,26 +116,6 @@ defaultRules: | ||||
|     windows: true | ||||
| windowsMonitoring: | ||||
|   enabled: {windows_monitoring} | ||||
| alertmanager: | ||||
|   enabled: {alert_manager} | ||||
|   config:  | ||||
|     route: | ||||
|       group_by: ['job'] | ||||
|       group_wait: 30s | ||||
|       group_interval: 5m | ||||
|       repeat_interval: 12h | ||||
|       receiver: '{webhook_service_type}' | ||||
|       routes: | ||||
|       - receiver: 'null' | ||||
|         matchers: | ||||
|           - alertname="Watchdog" | ||||
|         continue: false | ||||
|     receivers: | ||||
|     - name: 'null' | ||||
|     - name: '{webhook_service_type}' | ||||
|       webhook_configs: | ||||
|       - url: 'http://{discord_alert_manager_release_name}-alertmanager-discord:9094'
 | ||||
|         send_resolved: true | ||||
| grafana: | ||||
|   enabled: {grafana} | ||||
| kubernetesServiceMonitors: | ||||
| @ -172,6 +145,66 @@ prometheus: | ||||
| "#,
 | ||||
|     ); | ||||
| 
 | ||||
|     let alertmanager_config = alert_manager_yaml_builder(&config); | ||||
|     values.push_str(&alertmanager_config); | ||||
| 
 | ||||
|     fn alert_manager_yaml_builder(config: &KubePrometheusConfig) -> String { | ||||
|         let mut receivers = String::new(); | ||||
|         let mut routes = String::new(); | ||||
|         let mut global_configs = String::new(); | ||||
|         let alert_manager = config.alert_manager; | ||||
|         for alert_channel in &config.alert_channel { | ||||
|             match alert_channel { | ||||
|                 AlertChannel::Discord { name, .. } => { | ||||
|                     let (receiver, route) = discord_alert_builder(name); | ||||
|                     info!("discord receiver: {} \nroute: {}", receiver, route); | ||||
|                     receivers.push_str(&receiver); | ||||
|                     routes.push_str(&route); | ||||
|                 } | ||||
|                 AlertChannel::Slack { | ||||
|                     slack_channel, | ||||
|                     webhook_url, | ||||
|                 } => { | ||||
|                     let (receiver, route) = slack_alert_builder(slack_channel); | ||||
|                     info!("slack receiver: {} \nroute: {}", receiver, route); | ||||
|                     receivers.push_str(&receiver); | ||||
| 
 | ||||
|                     routes.push_str(&route); | ||||
|                     let global_config = format!( | ||||
|                         r#" | ||||
|     global: | ||||
|       slack_api_url: {webhook_url}"#
 | ||||
|                     ); | ||||
| 
 | ||||
|                     global_configs.push_str(&global_config); | ||||
|                 } | ||||
|                 AlertChannel::Smpt { .. } => todo!(), | ||||
|             } | ||||
|         } | ||||
|         info!("after alert receiver: {}", receivers); | ||||
|         info!("after alert routes: {}", routes); | ||||
| 
 | ||||
|         let alertmanager_config = format!( | ||||
|             r#" | ||||
| alertmanager: | ||||
|   enabled: {alert_manager} | ||||
|   config: {global_configs} | ||||
|     route:  | ||||
|       group_by: ['job'] | ||||
|       group_wait: 30s | ||||
|       group_interval: 5m | ||||
|       repeat_interval: 12h | ||||
|       routes: | ||||
| {routes} | ||||
|     receivers:  | ||||
|     - name: 'null' | ||||
| {receivers}"#
 | ||||
|         ); | ||||
| 
 | ||||
|         info!("alert manager config: {}", alertmanager_config); | ||||
|         alertmanager_config | ||||
|     } | ||||
| 
 | ||||
|     HelmChartScore { | ||||
|         namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), | ||||
|         release_name: NonBlankString::from_str("kube-prometheus").unwrap(), | ||||
| @ -187,3 +220,43 @@ prometheus: | ||||
|         repository: None, | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn discord_alert_builder(release_name: &String) -> (String, String) { | ||||
|     let discord_receiver_name = format!("Discord-{}", release_name); | ||||
|     let receiver = format!( | ||||
|         r#" | ||||
|     - name: '{discord_receiver_name}' | ||||
|       webhook_configs: | ||||
|       - url: 'http://{release_name}-alertmanager-discord:9094'
 | ||||
|         send_resolved: true"#,
 | ||||
|     ); | ||||
|     let route = format!( | ||||
|         r#" | ||||
|       - receiver: '{discord_receiver_name}' | ||||
|         matchers:  | ||||
|           - alertname!=Watchdog | ||||
|         continue: true"#,
 | ||||
|     ); | ||||
|     (receiver, route) | ||||
| } | ||||
| 
 | ||||
| fn slack_alert_builder(slack_channel: &String) -> (String, String) { | ||||
|     let slack_receiver_name = format!("Slack-{}", slack_channel); | ||||
|     let receiver = format!( | ||||
|         r#" | ||||
|     - name: '{slack_receiver_name}' | ||||
|       slack_configs: | ||||
|       - channel: '{slack_channel}' | ||||
|         send_resolved: true | ||||
|         title: '{{{{ .CommonAnnotations.title }}}}' | ||||
|         text: '{{{{ .CommonAnnotations.description }}}}'"#,
 | ||||
|     ); | ||||
|     let route = format!( | ||||
|         r#" | ||||
|       - receiver: '{slack_receiver_name}' | ||||
|         matchers:  | ||||
|           - alertname!=Watchdog | ||||
|         continue: true"#,
 | ||||
|     ); | ||||
|     (receiver, route) | ||||
| } | ||||
|  | ||||
| @ -20,9 +20,13 @@ use super::{ | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub enum AlertChannel { | ||||
|     WebHookUrl { | ||||
|         url: Url, | ||||
|         webhook_service_type: WebhookServiceType, | ||||
|     Discord { | ||||
|         name: String, | ||||
|         webhook_url: Url, | ||||
|     }, | ||||
|     Slack { | ||||
|         slack_channel: String, | ||||
|         webhook_url: Url, | ||||
|     }, | ||||
|     //TODO test and implement in helm chart
 | ||||
|     //currently does not work
 | ||||
| @ -32,46 +36,19 @@ pub enum AlertChannel { | ||||
|     }, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub enum WebhookServiceType { | ||||
|     Discord, | ||||
|     //TODO test slack notifications
 | ||||
|     Slack, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct MonitoringAlertingStackScore { | ||||
|     pub alert_channel: Option<AlertChannel>, | ||||
|     pub alert_channel: Vec<AlertChannel>, | ||||
|     pub namespace: Option<String>, | ||||
| } | ||||
| 
 | ||||
| impl MonitoringAlertingStackScore { | ||||
|     pub fn new() -> Self { | ||||
|         Self { | ||||
|             alert_channel: None, | ||||
|             alert_channel: Vec::new(), | ||||
|             namespace: None, | ||||
|         } | ||||
|     } | ||||
|     fn set_alert_channel(&self, config: &mut KubePrometheusConfig) { | ||||
|         if let Some(alert_channel) = &self.alert_channel { | ||||
|             match alert_channel { | ||||
|                 AlertChannel::WebHookUrl { | ||||
|                     url, | ||||
|                     webhook_service_type, | ||||
|                 } => { | ||||
|                     config.webhook_url = Some(url.clone()); | ||||
|                     config.webhook_service_type = Some(webhook_service_type.clone()); | ||||
|                 } | ||||
|                 AlertChannel::Smpt { | ||||
|                     //TODO setup smpt alerts 
 | ||||
|                     email_address, | ||||
|                     service_name, | ||||
|                 } => { | ||||
|                     todo!() | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore { | ||||
| @ -93,10 +70,10 @@ struct MonitoringAlertingStackInterpret { | ||||
| impl MonitoringAlertingStackInterpret { | ||||
|     async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig { | ||||
|         let mut config = KubePrometheusConfig::new(); | ||||
|         self.score.set_alert_channel(&mut config); | ||||
|         if let Some(ns) = &self.score.namespace { | ||||
|             config.namespace = ns.clone(); | ||||
|         } | ||||
|         config.alert_channel = self.score.alert_channel.clone(); | ||||
|         config | ||||
|     } | ||||
| 
 | ||||
| @ -119,26 +96,30 @@ impl MonitoringAlertingStackInterpret { | ||||
|         topology: &T, | ||||
|         config: &KubePrometheusConfig, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         match &self.score.alert_channel { | ||||
|             Some(AlertChannel::WebHookUrl { | ||||
|                 webhook_service_type, | ||||
|                 .. | ||||
|             }) => match webhook_service_type { | ||||
|                 WebhookServiceType::Discord => { | ||||
|         let mut outcomes = vec![]; | ||||
| 
 | ||||
|         for channel in &self.score.alert_channel { | ||||
|             let outcome = match channel { | ||||
|                 AlertChannel::Discord { .. } => { | ||||
|                     discord_alert_manager_score(config) | ||||
|                         .create_interpret() | ||||
|                         .execute(inventory, topology) | ||||
|                         .await | ||||
|                 } | ||||
|                 WebhookServiceType::Slack => Ok(Outcome::success( | ||||
|                 AlertChannel::Slack { .. } => Ok(Outcome::success( | ||||
|                     "No extra configs for slack alerting".to_string(), | ||||
|                 )), | ||||
|             }, | ||||
|             Some(AlertChannel::Smpt { .. }) => { | ||||
|                 todo!() | ||||
|             } | ||||
|             None => Ok(Outcome::success("No alert channel configured".to_string())), | ||||
|                 AlertChannel::Smpt { .. } => { | ||||
|                     todo!() | ||||
|                 } | ||||
|             }; | ||||
|             outcomes.push(outcome); | ||||
|         } | ||||
|         for result in outcomes { | ||||
|             result?; 
 | ||||
|         } | ||||
| 
 | ||||
|         Ok(Outcome::success("All alert channels deployed".to_string())) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| @ -155,7 +136,8 @@ impl<T: Topology + HelmCommand> Interpret<T> for MonitoringAlertingStackInterpre | ||||
|         self.deploy_kube_prometheus_helm_chart_score(inventory, topology, &config) | ||||
|             .await?; | ||||
|         info!("Installing alert channel service"); | ||||
|         self.deploy_alert_channel_service(inventory, topology, &config).await?; | ||||
|         self.deploy_alert_channel_service(inventory, topology, &config) | ||||
|             .await?; | ||||
|         Ok(Outcome::success(format!( | ||||
|             "succesfully deployed monitoring and alerting stack" | ||||
|         ))) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user