From b4c6848433d607a61239d6c91a268f423a8f1d36 Mon Sep 17 00:00:00 2001 From: Willem Date: Thu, 15 May 2025 14:52:04 -0400 Subject: [PATCH 1/4] feat: added default monitoringStackScore implementation --- Cargo.lock | 10 +++ harmony/Cargo.toml | 1 + harmony/src/modules/monitoring/config.rs | 47 +++++++++++ .../src/modules/monitoring/kube_prometheus.rs | 42 ++++++++-- harmony/src/modules/monitoring/mod.rs | 1 + .../modules/monitoring/monitoring_alerting.rs | 79 +++++++++++++++---- 6 files changed, 161 insertions(+), 19 deletions(-) create mode 100644 harmony/src/modules/monitoring/config.rs diff --git a/Cargo.lock b/Cargo.lock index f84d847..ae52051 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -936,6 +936,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "email_address" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e079f19b08ca6239f47f8ba8509c11cf3ea30095831f7fed61441475edd8c449" +dependencies = [ + "serde", +] + [[package]] name = "encoding_rs" version = "0.8.35" @@ -1396,6 +1405,7 @@ dependencies = [ "derive-new", "directories", "dockerfile_builder", + "email_address", "env_logger", "harmony_macros", "harmony_types", diff --git a/harmony/Cargo.toml b/harmony/Cargo.toml index 02a0ce7..cb9b001 100644 --- a/harmony/Cargo.toml +++ b/harmony/Cargo.toml @@ -39,3 +39,4 @@ lazy_static = "1.5.0" dockerfile_builder = "0.1.5" temp-file = "0.1.9" convert_case.workspace = true +email_address = "0.2.9" diff --git a/harmony/src/modules/monitoring/config.rs b/harmony/src/modules/monitoring/config.rs new file mode 100644 index 0000000..412d713 --- /dev/null +++ b/harmony/src/modules/monitoring/config.rs @@ -0,0 +1,47 @@ +use email_address::EmailAddress; +use serde::Serialize; +use url::Url; + +#[derive(Debug, Clone, Serialize)] +pub struct KubePrometheusConfig { + pub namespace: String, + pub node_exporter: bool, + pub alert_manager: bool, + pub prometheus: bool, + pub grafana: bool, + pub windows_monitoring: bool, + pub kubernetes_service_monitors: bool, + pub kubelet: bool, + pub kube_controller_manager: bool, + pub kube_etcd: bool, + pub kube_proxy: bool, + pub kube_state_metrics: bool, + pub prometheus_operator: bool, + pub webhook_url: Option, + pub webhook_service_name: Option, + pub smpt_email_address: Option, + pub smtp_service_name: Option, +} +impl KubePrometheusConfig { + pub fn new() -> Self { + Self { + namespace: "monitoring".into(), + node_exporter: false, + alert_manager: false, + prometheus: true, + grafana: true, + windows_monitoring: false, + kubernetes_service_monitors: true, + kubelet: true, + kube_controller_manager: true, + kube_etcd: true, + kube_proxy: true, + kube_state_metrics: true, + prometheus_operator: true, + webhook_url: None, + webhook_service_name: None, + smpt_email_address: None, + smtp_service_name: None, + } + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs index c729c96..0289d8a 100644 --- a/harmony/src/modules/monitoring/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -1,10 +1,10 @@ -use std::str::FromStr; - +use super::config::KubePrometheusConfig; use non_blank_string_rs::NonBlankString; +use std::{collections::HashMap, str::FromStr}; use crate::modules::helm::chart::HelmChartScore; -pub fn kube_prometheus_score(ns: &str) -> HelmChartScore { +pub fn kube_prometheus_score(config: &KubePrometheusConfig) -> HelmChartScore { //TODO this should be make into a rule with default formatting that can be easily passed as a vec //to the overrides or something leaving the user to deal with formatting here seems bad let values = r#" @@ -32,8 +32,40 @@ additionalPrometheusRulesMap: description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days. title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days "#; + let mut values_overrides: HashMap = HashMap::new(); + + macro_rules! insert_flag { + ($key:expr, $val:expr) => { + values_overrides.insert(NonBlankString::from_str($key).unwrap(), $val.to_string()); + }; + } + + insert_flag!("nodeExporter.enabled", config.node_exporter); + insert_flag!("windowsMonitoring.enabled", config.windows_monitoring); + insert_flag!("grafana.enabled", config.grafana); + insert_flag!("alertmanager.enabled", config.alert_manager); + insert_flag!("prometheus.enabled", config.prometheus); + insert_flag!( + "kubernetes_service_monitors.enabled", + config.kubernetes_service_monitors + ); + insert_flag!("kubelet.enabled", config.kubelet); + insert_flag!( + "kubeControllerManager.enabled", + config.kube_controller_manager + ); + insert_flag!("kubeProxy.enabled", config.kube_proxy); + insert_flag!("kubeEtcd.enabled", config.kube_etcd); + insert_flag!("kubeStateMetrics.enabled", config.kube_state_metrics); + insert_flag!("prometheusOperator.enabled", config.prometheus_operator); + + if let (Some(url), Some(name)) = (&config.webhook_url, &config.webhook_service_name) { + insert_flag!("alertmanager.config.receivers.webhook_configs.url", url.as_str()); + insert_flag!("alertmanager.config.receivers.name", name.as_str()); + } + HelmChartScore { - namespace: Some(NonBlankString::from_str(ns).unwrap()), + namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), release_name: NonBlankString::from_str("kube-prometheus").unwrap(), chart_name: NonBlankString::from_str( "oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", //use kube prometheus chart which includes grafana, prometheus, alert @@ -41,7 +73,7 @@ additionalPrometheusRulesMap: ) .unwrap(), chart_version: None, - values_overrides: None, + values_overrides: Some(values_overrides), values_yaml: Some(values.to_string()), create_namespace: true, install_only: true, diff --git a/harmony/src/modules/monitoring/mod.rs b/harmony/src/modules/monitoring/mod.rs index 3c73fad..01bb194 100644 --- a/harmony/src/modules/monitoring/mod.rs +++ b/harmony/src/modules/monitoring/mod.rs @@ -1,2 +1,3 @@ mod kube_prometheus; pub mod monitoring_alerting; +mod config; diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index a08b038..f969d7a 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -1,44 +1,95 @@ +use email_address::EmailAddress; + use serde::Serialize; +use url::Url; use crate::{ interpret::Interpret, - modules::helm::chart::HelmChartScore, score::Score, topology::{HelmCommand, Topology}, }; -use super::kube_prometheus::kube_prometheus_score; +use super::{config::KubePrometheusConfig, kube_prometheus::kube_prometheus_score}; + +#[derive(Debug, Clone, Serialize)] +pub enum AlertChannel { + WebHookUrl { + url: Url, + service_name: String, + }, + Smpt { + email_address: EmailAddress, + service_name: String, + }, +} + +#[derive(Debug, Clone, Serialize)] +pub enum Stack { + KubePrometheusStack, + OtherStack, +} #[derive(Debug, Clone, Serialize)] pub struct MonitoringAlertingStackScore { - // TODO Support other components in our monitoring/alerting stack instead of a single helm - // chart - pub monitoring_stack: HelmChartScore, - pub namespace: String, + pub alert_channel: Option, + pub monitoring_stack: Stack, } impl MonitoringAlertingStackScore { - pub fn new_with_ns(ns: &str) -> Self { - Self { - monitoring_stack: kube_prometheus_score(ns), - namespace: ns.to_string(), + fn match_alert_channel(&self, config: &mut KubePrometheusConfig) { + if let Some(alert_channel) = &self.alert_channel { + match alert_channel { + //opt1 + AlertChannel::WebHookUrl { url, service_name } => { + config.webhook_url = Some(url.clone()); + config.webhook_service_name = Some(service_name.clone()); + } + //opt2 + AlertChannel::Smpt { + email_address, + service_name, + } => { + config.smpt_email_address = Some(email_address.clone()); + config.smtp_service_name = Some(service_name.clone()); + } + } } } } +// pub fn new_with_ns(ns: &str) -> Self { +// let mut config = KubePrometheusConfig::default(); +// let namespace = ns.to_string(); +// config.namespace = namespace; +// let score = kube_prometheus_score(&config); +// Self { +// alert_channel: None, +// monitoring_stack: Some(Stack::KubePrometheusStack), +// } +// } +//} impl Default for MonitoringAlertingStackScore { fn default() -> Self { - let ns = "monitoring"; Self { - monitoring_stack: kube_prometheus_score(ns), - namespace: ns.to_string(), + alert_channel: None, + monitoring_stack: Stack::KubePrometheusStack, } } } impl Score for MonitoringAlertingStackScore { fn create_interpret(&self) -> Box> { - self.monitoring_stack.create_interpret() + match &self.monitoring_stack { + Stack::KubePrometheusStack => { + let mut config = KubePrometheusConfig::new(); + self.match_alert_channel(&mut config); + let score = kube_prometheus_score(&config); + score.create_interpret() + } + Stack::OtherStack => { + todo!() + } + } } fn name(&self) -> String { From eb8a8a2e0493a846993e86936fda7cb83911b996 Mon Sep 17 00:00:00 2001 From: Willem Date: Thu, 15 May 2025 15:19:40 -0400 Subject: [PATCH 2/4] chore: modified build config to be able to pass namespace to the config --- examples/lamp/src/main.rs | 4 +- .../src/modules/monitoring/kube_prometheus.rs | 2 +- .../modules/monitoring/monitoring_alerting.rs | 41 ++++++++----------- 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/examples/lamp/src/main.rs b/examples/lamp/src/main.rs index 06d8534..14180cc 100644 --- a/examples/lamp/src/main.rs +++ b/examples/lamp/src/main.rs @@ -43,8 +43,8 @@ async fn main() { .await .unwrap(); - let monitoring_stack_score = - MonitoringAlertingStackScore::new_with_ns(&lamp_stack.config.namespace); + let mut monitoring_stack_score = MonitoringAlertingStackScore::new(); + monitoring_stack_score.namespace = Some(lamp_stack.config.namespace.clone()); maestro.register_all(vec![Box::new(lamp_stack), Box::new(monitoring_stack_score)]); // Here we bootstrap the CLI, this gives some nice features if you need them diff --git a/harmony/src/modules/monitoring/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs index 0289d8a..296891e 100644 --- a/harmony/src/modules/monitoring/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -4,7 +4,7 @@ use std::{collections::HashMap, str::FromStr}; use crate::modules::helm::chart::HelmChartScore; -pub fn kube_prometheus_score(config: &KubePrometheusConfig) -> HelmChartScore { +pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmChartScore { //TODO this should be make into a rule with default formatting that can be easily passed as a vec //to the overrides or something leaving the user to deal with formatting here seems bad let values = r#" diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index f969d7a..0ec6adf 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -9,7 +9,7 @@ use crate::{ topology::{HelmCommand, Topology}, }; -use super::{config::KubePrometheusConfig, kube_prometheus::kube_prometheus_score}; +use super::{config::KubePrometheusConfig, kube_prometheus::kube_prometheus_helm_chart_score}; #[derive(Debug, Clone, Serialize)] pub enum AlertChannel { @@ -33,9 +33,17 @@ pub enum Stack { pub struct MonitoringAlertingStackScore { pub alert_channel: Option, pub monitoring_stack: Stack, + pub namespace: Option, } impl MonitoringAlertingStackScore { + pub fn new() -> Self { + Self { + alert_channel: None, + monitoring_stack: Stack::KubePrometheusStack, + namespace: None, + } + } fn match_alert_channel(&self, config: &mut KubePrometheusConfig) { if let Some(alert_channel) = &self.alert_channel { match alert_channel { @@ -55,25 +63,13 @@ impl MonitoringAlertingStackScore { } } } -} -// pub fn new_with_ns(ns: &str) -> Self { -// let mut config = KubePrometheusConfig::default(); -// let namespace = ns.to_string(); -// config.namespace = namespace; -// let score = kube_prometheus_score(&config); -// Self { -// alert_channel: None, -// monitoring_stack: Some(Stack::KubePrometheusStack), -// } -// } -//} - -impl Default for MonitoringAlertingStackScore { - fn default() -> Self { - Self { - alert_channel: None, - monitoring_stack: Stack::KubePrometheusStack, + fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig { + let mut config = KubePrometheusConfig::new(); + self.match_alert_channel(&mut config); + if let Some(ns) = &self.namespace { + config.namespace = ns.clone(); } + config } } @@ -81,10 +77,9 @@ impl Score for MonitoringAlertingStackScore { fn create_interpret(&self) -> Box> { match &self.monitoring_stack { Stack::KubePrometheusStack => { - let mut config = KubePrometheusConfig::new(); - self.match_alert_channel(&mut config); - let score = kube_prometheus_score(&config); - score.create_interpret() + let config = self.build_kube_prometheus_helm_chart_config(); + let helm_chart = kube_prometheus_helm_chart_score(&config); + helm_chart.create_interpret() } Stack::OtherStack => { todo!() From e80752ea3fc6cdaa320dcc65c16f4585c5df73f1 Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 20 May 2025 15:51:03 -0400 Subject: [PATCH 3/4] feat: install discord alert manager helm chart when Discord is the chosen alerting channel --- harmony/src/modules/monitoring/config.rs | 42 ++-- .../monitoring/discord_alert_manager.rs | 37 ++++ .../src/modules/monitoring/kube_prometheus.rs | 179 ++++++++++++++---- harmony/src/modules/monitoring/mod.rs | 1 + .../modules/monitoring/monitoring_alerting.rs | 154 +++++++++++---- 5 files changed, 326 insertions(+), 87 deletions(-) create mode 100644 harmony/src/modules/monitoring/discord_alert_manager.rs diff --git a/harmony/src/modules/monitoring/config.rs b/harmony/src/modules/monitoring/config.rs index 412d713..7a073bc 100644 --- a/harmony/src/modules/monitoring/config.rs +++ b/harmony/src/modules/monitoring/config.rs @@ -2,46 +2,54 @@ use email_address::EmailAddress; use serde::Serialize; use url::Url; +use super::monitoring_alerting::WebhookServiceType; + #[derive(Debug, Clone, Serialize)] pub struct KubePrometheusConfig { pub namespace: String, - pub node_exporter: bool, + pub default_rules: bool, + pub windows_monitoring: bool, pub alert_manager: bool, + pub node_exporter: bool, pub prometheus: bool, pub grafana: bool, - pub windows_monitoring: bool, pub kubernetes_service_monitors: bool, + pub kubernetes_api_server: bool, pub kubelet: bool, pub kube_controller_manager: bool, + pub core_dns: bool, pub kube_etcd: bool, + pub kube_scheduler: bool, pub kube_proxy: bool, pub kube_state_metrics: bool, pub prometheus_operator: bool, pub webhook_url: Option, - pub webhook_service_name: Option, - pub smpt_email_address: Option, - pub smtp_service_name: Option, + pub webhook_service_type: Option, + pub discord_alert_manager_release_name: String, } impl KubePrometheusConfig { pub fn new() -> Self { Self { namespace: "monitoring".into(), - node_exporter: false, - alert_manager: false, - prometheus: true, - grafana: true, + default_rules: true, windows_monitoring: false, + alert_manager: true, + webhook_service_type: None, + webhook_url: None, + grafana: true, + node_exporter: false, + prometheus: true, kubernetes_service_monitors: true, - kubelet: true, - kube_controller_manager: true, - kube_etcd: true, - kube_proxy: true, + kubernetes_api_server: false, + kubelet: false, + kube_controller_manager: false, + kube_etcd: false, + kube_proxy: false, kube_state_metrics: true, prometheus_operator: true, - webhook_url: None, - webhook_service_name: None, - smpt_email_address: None, - smtp_service_name: None, + core_dns: false, + kube_scheduler: false, + discord_alert_manager_release_name: "discord-alert-manager".into(), } } } diff --git a/harmony/src/modules/monitoring/discord_alert_manager.rs b/harmony/src/modules/monitoring/discord_alert_manager.rs new file mode 100644 index 0000000..868b1b7 --- /dev/null +++ b/harmony/src/modules/monitoring/discord_alert_manager.rs @@ -0,0 +1,37 @@ +use std::str::FromStr; + +use non_blank_string_rs::NonBlankString; + +use crate::modules::helm::chart::HelmChartScore; + +use super::config::KubePrometheusConfig; + +pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore { + let url = if let Some(url) = &config.webhook_url { + url.to_string() + } else { + "None".to_string() + }; + + let values = format!( + r#" + +environment: + - name: "DISCORD_WEBHOOK" + value: "{url}" + "#, + ); + + HelmChartScore { + namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), + release_name: NonBlankString::from_str(&config.discord_alert_manager_release_name).unwrap(), + chart_name: NonBlankString::from_str("oci://hub.nationtech.io/nt/alertmanager-discord") + .unwrap(), + chart_version: None, + values_overrides: None, + values_yaml: Some(values.to_string()), + create_namespace: true, + install_only: true, + repository: None, + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs index 296891e..ed7916e 100644 --- a/harmony/src/modules/monitoring/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -1,4 +1,4 @@ -use super::config::KubePrometheusConfig; +use super::{config::KubePrometheusConfig, monitoring_alerting::WebhookServiceType}; use non_blank_string_rs::NonBlankString; use std::{collections::HashMap, str::FromStr}; @@ -7,8 +7,61 @@ use crate::modules::helm::chart::HelmChartScore; pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmChartScore { //TODO this should be make into a rule with default formatting that can be easily passed as a vec //to the overrides or something leaving the user to deal with formatting here seems bad - let values = r#" + let default_rules = config.default_rules.to_string(); + let windows_monitoring = config.windows_monitoring.to_string(); + let alert_manager = config.alert_manager.to_string(); + let webhook_service_type = if let Some(service) = &config.webhook_service_type { + match service { + WebhookServiceType::Discord => "Discord".to_string(), + WebhookServiceType::Slack => "Slack".to_string(), + } + } else { + "None".to_string() + }; + let grafana = config.grafana.to_string(); + let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string(); + let kubernetes_api_server = config.kubernetes_api_server.to_string(); + let kubelet = config.kubelet.to_string(); + let kube_controller_manager = config.kube_controller_manager.to_string(); + let core_dns = config.core_dns.to_string(); + let kube_etcd = config.kube_etcd.to_string(); + let kube_scheduler = config.kube_scheduler.to_string(); + let kube_proxy = config.kube_proxy.to_string(); + let kube_state_metrics = config.kube_state_metrics.to_string(); + let node_exporter = config.node_exporter.to_string(); + let prometheus_operator = config.prometheus_operator.to_string(); + let prometheus = config.prometheus.to_string(); + let discord_alert_manager_release_name = config.discord_alert_manager_release_name.to_string(); + let values = format!( + r#" additionalPrometheusRulesMap: + pods-status-alerts: + groups: + - name: pods + rules: + - alert: "[CRIT] POD not healthy" + expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{{phase=~"Pending|Unknown|Failed"}})[15m:1m]) > 0 + for: 0m + labels: + severity: critical + annotations: + title: "[CRIT] POD not healthy : {{ $labels.pod }}" + description: | + A POD is in a non-ready state! + - **Pod**: {{ $labels.pod }} + - **Namespace**: {{ $labels.namespace }} + - alert: "[CRIT] POD crash looping" + expr: increase(kube_pod_container_status_restarts_total[5m]) > 3 + for: 0m + labels: + severity: critical + annotations: + title: "[CRIT] POD crash looping : {{ $labels.pod }}" + description: | + A POD is drowning in a crash loop! + - **Pod**: {{ $labels.pod }} + - **Namespace**: {{ $labels.namespace }} + - **Instance**: {{ $labels.instance }} pvc-alerts: groups: - name: pvc-alerts @@ -31,49 +84,103 @@ additionalPrometheusRulesMap: annotations: description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days. title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days -"#; - let mut values_overrides: HashMap = HashMap::new(); - - macro_rules! insert_flag { - ($key:expr, $val:expr) => { - values_overrides.insert(NonBlankString::from_str($key).unwrap(), $val.to_string()); - }; - } - - insert_flag!("nodeExporter.enabled", config.node_exporter); - insert_flag!("windowsMonitoring.enabled", config.windows_monitoring); - insert_flag!("grafana.enabled", config.grafana); - insert_flag!("alertmanager.enabled", config.alert_manager); - insert_flag!("prometheus.enabled", config.prometheus); - insert_flag!( - "kubernetes_service_monitors.enabled", - config.kubernetes_service_monitors +defaultRules: + create: {default_rules} + rules: + alertmanager: true + etcd: true + configReloaders: true + general: true + k8sContainerCpuUsageSecondsTotal: true + k8sContainerMemoryCache: true + k8sContainerMemoryRss: true + k8sContainerMemorySwap: true + k8sContainerResource: true + k8sContainerMemoryWorkingSetBytes: true + k8sPodOwner: true + kubeApiserverAvailability: true + kubeApiserverBurnrate: true + kubeApiserverHistogram: true + kubeApiserverSlos: true + kubeControllerManager: true + kubelet: true + kubeProxy: true + kubePrometheusGeneral: true + kubePrometheusNodeRecording: true + kubernetesApps: true + kubernetesResources: true + kubernetesStorage: true + kubernetesSystem: true + kubeSchedulerAlerting: true + kubeSchedulerRecording: true + kubeStateMetrics: true + network: true + node: true + nodeExporterAlerting: true + nodeExporterRecording: true + prometheus: true + prometheusOperator: true + windows: true +windowsMonitoring: + enabled: {windows_monitoring} +alertmanager: + enabled: {alert_manager} + config: + route: + group_by: ['job'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: '{webhook_service_type}' + routes: + - receiver: 'null' + matchers: + - alertname="Watchdog" + continue: false + receivers: + - name: 'null' + - name: '{webhook_service_type}' + webhook_configs: + - url: 'http://{discord_alert_manager_release_name}-alertmanager-discord:9094' + send_resolved: true +grafana: + enabled: {grafana} +kubernetesServiceMonitors: + enabled: {kubernetes_service_monitors} +kubeApiServer: + enabled: {kubernetes_api_server} +kubelet: + enabled: {kubelet} +kubeControllerManager: + enabled: {kube_controller_manager} +coreDns: + enabled: {core_dns} +kubeEtcd: + enabled: {kube_etcd} +kubeScheduler: + enabled: {kube_scheduler} +kubeProxy: + enabled: {kube_proxy} +kubeStateMetrics: + enabled: {kube_state_metrics} +nodeExporter: + enabled: {node_exporter} +prometheusOperator: + enabled: {prometheus_operator} +prometheus: + enabled: {prometheus} +"#, ); - insert_flag!("kubelet.enabled", config.kubelet); - insert_flag!( - "kubeControllerManager.enabled", - config.kube_controller_manager - ); - insert_flag!("kubeProxy.enabled", config.kube_proxy); - insert_flag!("kubeEtcd.enabled", config.kube_etcd); - insert_flag!("kubeStateMetrics.enabled", config.kube_state_metrics); - insert_flag!("prometheusOperator.enabled", config.prometheus_operator); - - if let (Some(url), Some(name)) = (&config.webhook_url, &config.webhook_service_name) { - insert_flag!("alertmanager.config.receivers.webhook_configs.url", url.as_str()); - insert_flag!("alertmanager.config.receivers.name", name.as_str()); - } HelmChartScore { namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), release_name: NonBlankString::from_str("kube-prometheus").unwrap(), chart_name: NonBlankString::from_str( - "oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", //use kube prometheus chart which includes grafana, prometheus, alert - //manager, etc + "oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", ) .unwrap(), chart_version: None, - values_overrides: Some(values_overrides), + values_overrides: None, values_yaml: Some(values.to_string()), create_namespace: true, install_only: true, diff --git a/harmony/src/modules/monitoring/mod.rs b/harmony/src/modules/monitoring/mod.rs index 01bb194..d880a67 100644 --- a/harmony/src/modules/monitoring/mod.rs +++ b/harmony/src/modules/monitoring/mod.rs @@ -1,3 +1,4 @@ mod kube_prometheus; pub mod monitoring_alerting; +mod discord_alert_manager; mod config; diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index 0ec6adf..6bb6e83 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -1,22 +1,31 @@ +use async_trait::async_trait; use email_address::EmailAddress; +use log::info; use serde::Serialize; use url::Url; use crate::{ - interpret::Interpret, + data::{Id, Version}, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, score::Score, topology::{HelmCommand, Topology}, }; -use super::{config::KubePrometheusConfig, kube_prometheus::kube_prometheus_helm_chart_score}; +use super::{ + config::KubePrometheusConfig, discord_alert_manager::discord_alert_manager_score, + kube_prometheus::kube_prometheus_helm_chart_score, +}; #[derive(Debug, Clone, Serialize)] pub enum AlertChannel { WebHookUrl { url: Url, - service_name: String, + webhook_service_type: WebhookServiceType, }, + //TODO test and implement in helm chart + //currently does not work Smpt { email_address: EmailAddress, service_name: String, @@ -24,15 +33,15 @@ pub enum AlertChannel { } #[derive(Debug, Clone, Serialize)] -pub enum Stack { - KubePrometheusStack, - OtherStack, +pub enum WebhookServiceType { + Discord, + //TODO test slack notifications + Slack, } #[derive(Debug, Clone, Serialize)] pub struct MonitoringAlertingStackScore { pub alert_channel: Option, - pub monitoring_stack: Stack, pub namespace: Option, } @@ -40,54 +49,131 @@ impl MonitoringAlertingStackScore { pub fn new() -> Self { Self { alert_channel: None, - monitoring_stack: Stack::KubePrometheusStack, namespace: None, } } - fn match_alert_channel(&self, config: &mut KubePrometheusConfig) { + fn set_alert_channel(&self, config: &mut KubePrometheusConfig) { if let Some(alert_channel) = &self.alert_channel { match alert_channel { - //opt1 - AlertChannel::WebHookUrl { url, service_name } => { + AlertChannel::WebHookUrl { + url, + webhook_service_type, + } => { config.webhook_url = Some(url.clone()); - config.webhook_service_name = Some(service_name.clone()); + config.webhook_service_type = Some(webhook_service_type.clone()); } - //opt2 AlertChannel::Smpt { + //TODO setup smpt alerts email_address, service_name, } => { - config.smpt_email_address = Some(email_address.clone()); - config.smtp_service_name = Some(service_name.clone()); + todo!() } } } } - fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig { - let mut config = KubePrometheusConfig::new(); - self.match_alert_channel(&mut config); - if let Some(ns) = &self.namespace { - config.namespace = ns.clone(); - } - config - } } impl Score for MonitoringAlertingStackScore { fn create_interpret(&self) -> Box> { - match &self.monitoring_stack { - Stack::KubePrometheusStack => { - let config = self.build_kube_prometheus_helm_chart_config(); - let helm_chart = kube_prometheus_helm_chart_score(&config); - helm_chart.create_interpret() - } - Stack::OtherStack => { - todo!() - } - } + Box::new(MonitoringAlertingStackInterpret { + score: self.clone(), + }) } - fn name(&self) -> String { format!("MonitoringAlertingStackScore") } } + +#[derive(Debug, Clone, Serialize)] +struct MonitoringAlertingStackInterpret { + score: MonitoringAlertingStackScore, +} + +impl MonitoringAlertingStackInterpret { + async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig { + let mut config = KubePrometheusConfig::new(); + self.score.set_alert_channel(&mut config); + if let Some(ns) = &self.score.namespace { + config.namespace = ns.clone(); + } + config + } + + async fn deploy_kube_prometheus_helm_chart_score( + &self, + inventory: &Inventory, + topology: &T, + config: &KubePrometheusConfig, + ) -> Result { + let helm_chart = kube_prometheus_helm_chart_score(config); + helm_chart + .create_interpret() + .execute(inventory, topology) + .await + } + + async fn deploy_alert_channel_service( + &self, + inventory: &Inventory, + topology: &T, + config: &KubePrometheusConfig, + ) -> Result { + match &self.score.alert_channel { + Some(AlertChannel::WebHookUrl { + webhook_service_type, + .. + }) => match webhook_service_type { + WebhookServiceType::Discord => { + discord_alert_manager_score(config) + .create_interpret() + .execute(inventory, topology) + .await + } + WebhookServiceType::Slack => Ok(Outcome::success( + "No extra configs for slack alerting".to_string(), + )), + }, + Some(AlertChannel::Smpt { .. }) => { + todo!() + } + None => Ok(Outcome::success("No alert channel configured".to_string())), + } + } +} + +#[async_trait] +impl Interpret for MonitoringAlertingStackInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + let config = self.build_kube_prometheus_helm_chart_config().await; + info!("Built kube prometheus config"); + info!("Installing kube prometheus chart"); + self.deploy_kube_prometheus_helm_chart_score(inventory, topology, &config) + .await?; + info!("Installing alert channel service"); + self.deploy_alert_channel_service(inventory, topology, &config).await?; + Ok(Outcome::success(format!( + "succesfully deployed monitoring and alerting stack" + ))) + } + + fn get_name(&self) -> InterpretName { + todo!() + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} From 7fc2b1ebfe3774faa1a318a5ac2cf0b1c3dec983 Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 20 May 2025 15:59:01 -0400 Subject: [PATCH 4/4] feat: added monitoring stack example to lamp demo --- examples/lamp/src/main.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/examples/lamp/src/main.rs b/examples/lamp/src/main.rs index 9d486eb..feac05d 100644 --- a/examples/lamp/src/main.rs +++ b/examples/lamp/src/main.rs @@ -3,10 +3,10 @@ use harmony::{ inventory::Inventory, maestro::Maestro, modules::{ - { lamp::{LAMPConfig, LAMPScore}, - }, - monitoring::monitoring_alerting::MonitoringAlertingStackScore, + monitoring::monitoring_alerting::{ + AlertChannel, MonitoringAlertingStackScore, WebhookServiceType, + }, }, topology::{K8sAnywhereTopology, Url}, }; @@ -45,8 +45,15 @@ async fn main() { .await .unwrap(); + let url = url::Url::parse("https://discord.com/api/webhooks/dummy_channel/dummy_token") + .expect("invalid URL"); + let mut monitoring_stack_score = MonitoringAlertingStackScore::new(); monitoring_stack_score.namespace = Some(lamp_stack.config.namespace.clone()); + monitoring_stack_score.alert_channel = Some(AlertChannel::WebHookUrl { + url: url, + webhook_service_type: WebhookServiceType::Discord, + }); maestro.register_all(vec![Box::new(lamp_stack), Box::new(monitoring_stack_score)]); // Here we bootstrap the CLI, this gives some nice features if you need them