From 819f4a32fd0fb11150731d69ee334177ba58483c Mon Sep 17 00:00:00 2001 From: Willem Date: Fri, 11 Jul 2025 16:01:52 -0400 Subject: [PATCH] wip: added an implementation of CRDalertmanagerconfigs that can be used to add a discord webhook receiver, currently the namespace is hard coded and there are a bunch of todos!() that need to be cleaned up, and flags need to be added so that alertmanager will automatically register the crd --- examples/rust/src/main.rs | 19 ++- .../topology/oberservability/monitoring.rs | 1 + .../application/features/monitoring.rs | 35 ++-- .../alert_channel/discord_alert_channel.rs | 83 +++++++++ .../alert_channel/webhook_receiver.rs | 6 + .../kube_prometheus/alert_manager_config.rs | 160 ++++++++++++++++++ .../monitoring/kube_prometheus/helm/config.rs | 14 +- .../helm/kube_prometheus_helm_chart.rs | 15 +- .../helm_prometheus_application_alerting.rs | 76 +++++++++ .../modules/monitoring/kube_prometheus/mod.rs | 2 + .../monitoring/kube_prometheus/types.rs | 8 + 11 files changed, 383 insertions(+), 36 deletions(-) create mode 100644 harmony/src/modules/monitoring/kube_prometheus/alert_manager_config.rs create mode 100644 harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs diff --git a/examples/rust/src/main.rs b/examples/rust/src/main.rs index c44ce88..3f13371 100644 --- a/examples/rust/src/main.rs +++ b/examples/rust/src/main.rs @@ -3,10 +3,9 @@ use std::{path::PathBuf, sync::Arc}; use harmony::{ inventory::Inventory, maestro::Maestro, - modules::application::{ - ApplicationScore, RustWebFramework, RustWebapp, - features::{ContinuousDelivery, Monitoring}, - }, + modules::{application::{ + features::{ContinuousDelivery, PrometheusApplicationMonitoring}, ApplicationScore, RustWebFramework, RustWebapp + }, monitoring::alert_channel::discord_alert_channel::DiscordWebhook}, topology::{K8sAnywhereTopology, Url}, }; @@ -20,12 +19,20 @@ async fn main() { framework: Some(RustWebFramework::Leptos), }); + let discord_receiver = DiscordWebhook { + name: "test-discord".to_string(), + url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()), + }; + let app = ApplicationScore { features: vec![ - Box::new(ContinuousDelivery { + // Box::new(ContinuousDelivery { + // application: application.clone(), + // }), + Box::new(PrometheusApplicationMonitoring { application: application.clone(), + alert_receiver: vec![Box::new(discord_receiver),] }), - Box::new(Monitoring {}), // TODO add monitoring, backups, multisite ha, etc ], application, diff --git a/harmony/src/domain/topology/oberservability/monitoring.rs b/harmony/src/domain/topology/oberservability/monitoring.rs index 6d60c7a..a215924 100644 --- a/harmony/src/domain/topology/oberservability/monitoring.rs +++ b/harmony/src/domain/topology/oberservability/monitoring.rs @@ -62,6 +62,7 @@ impl, T: Topology> Interpret for AlertingInte #[async_trait] pub trait AlertReceiver: std::fmt::Debug + Send + Sync { async fn install(&self, sender: &S) -> Result; + fn name(&self) -> String; fn clone_box(&self) -> Box>; } diff --git a/harmony/src/modules/application/features/monitoring.rs b/harmony/src/modules/application/features/monitoring.rs index 0a8d421..528e721 100644 --- a/harmony/src/modules/application/features/monitoring.rs +++ b/harmony/src/modules/application/features/monitoring.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use async_trait::async_trait; use log::info; @@ -5,34 +7,29 @@ use crate::{ inventory::Inventory, modules::{ application::{Application, ApplicationFeature}, - monitoring::{ - application_monitoring::k8s_application_monitoring_score::ApplicationPrometheusMonitoringScore, + monitoring:: kube_prometheus::{ - helm_prometheus_alert_score::HelmPrometheusAlertingScore, - types::{NamespaceSelector, ServiceMonitor}, - }, - }, + alert_manager_config::{CRDAlertManager, CRDAlertManagerReceiver}, helm_prometheus_application_alerting::HelmPrometheusApplicationAlertingScore + } + , }, score::Score, - topology::{HelmCommand, Topology, tenant::TenantManager}, + topology::{oberservability::monitoring::AlertReceiver, tenant::TenantManager, HelmCommand, K8sclient, Topology}, }; -#[derive(Debug, Default, Clone)] -pub struct Monitoring {} +#[derive(Debug, Clone)] +pub struct PrometheusApplicationMonitoring { + pub application: Arc, + pub alert_receiver: Vec>,} #[async_trait] -impl ApplicationFeature for Monitoring { +impl ApplicationFeature for PrometheusApplicationMonitoring { async fn ensure_installed(&self, topology: &T) -> Result<(), String> { info!("Ensuring monitoring is available for application"); - let mut service_monitor = ServiceMonitor::default(); - service_monitor.namespace_selector = Some(NamespaceSelector { - any: true, - match_names: vec![], - }); - let alerting_score = ApplicationPrometheusMonitoringScore { - receivers: vec![], - rules: vec![], - service_monitors: vec![service_monitor], + + let alerting_score = HelmPrometheusApplicationAlertingScore { + namespace: self.application.name().clone(), + receivers: self.alert_receiver.clone(), }; alerting_score diff --git a/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs b/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs index be8f0e3..b791b05 100644 --- a/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs +++ b/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs @@ -1,11 +1,15 @@ use async_trait::async_trait; +use kube::api::ObjectMeta; use serde::Serialize; +use serde_json::json; use serde_yaml::{Mapping, Value}; +use crate::modules::monitoring::kube_prometheus::alert_manager_config::AlertmanagerConfigSpec; use crate::{ interpret::{InterpretError, Outcome}, modules::monitoring::{ kube_prometheus::{ + alert_manager_config::{AlertmanagerConfig, CRDAlertManager, CRDAlertManagerReceiver}, prometheus::{KubePrometheus, KubePrometheusReceiver}, types::{AlertChannelConfig, AlertManagerChannelConfig}, }, @@ -20,11 +24,66 @@ pub struct DiscordWebhook { pub url: Url, } +#[async_trait] +impl CRDAlertManagerReceiver for DiscordWebhook { + fn name(&self) -> String { + self.name.clone() + } + + async fn configure_receiver(&self) -> AlertmanagerConfig { + let spec = AlertmanagerConfigSpec { + route: Some(json!({ + "group_by": ["alertname"], + "receiver": self.name, + })), + receivers: Some(json!([ + { + "name": self.name, + "webhook_configs": [ + { + "url": self.url + } + ] + } + ])), + }; + + AlertmanagerConfig { + metadata: ObjectMeta { + name: Some(self.name.clone()), + //TODO this cant be hardcoded + namespace: Some("monitoring".into()), + ..Default::default() + }, + spec, + } + } + fn clone_box(&self) -> Box { + Box::new(self.clone()) + } +} + +#[async_trait] +impl AlertReceiver for DiscordWebhook { + async fn install(&self, sender: &CRDAlertManager) -> Result { + todo!() + } + fn name(&self) -> String { + "discord-webhook".to_string() + } + fn clone_box(&self) -> Box> { + Box::new(self.clone()) + } +} + #[async_trait] impl AlertReceiver for DiscordWebhook { async fn install(&self, sender: &Prometheus) -> Result { sender.install_receiver(self).await } + fn name(&self) -> String { + "discord-webhook".to_string() + } fn clone_box(&self) -> Box> { Box::new(self.clone()) } @@ -48,6 +107,9 @@ impl AlertReceiver for DiscordWebhook { fn clone_box(&self) -> Box> { Box::new(self.clone()) } + fn name(&self) -> String { + "discord-webhook".to_string() + } } #[async_trait] @@ -112,6 +174,27 @@ impl DiscordWebhook { } } +impl From for AlertmanagerConfigSpec { + fn from(dw: DiscordWebhook) -> Self { + AlertmanagerConfigSpec { + route: Some(json!({ + "group_by": ["alertname"], + "receiver": dw.name, + })), + receivers: Some(json!([ + { + "name": dw.name, + "webhook_configs": [ + { + "url": dw.url + } + ] + } + ])), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs b/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs index f844431..d02be82 100644 --- a/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs +++ b/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs @@ -25,6 +25,9 @@ impl AlertReceiver for WebhookReceiver { async fn install(&self, sender: &Prometheus) -> Result { sender.install_receiver(self).await } + fn name(&self) -> String { + "webhook-receiver".to_string() + } fn clone_box(&self) -> Box> { Box::new(self.clone()) } @@ -44,6 +47,9 @@ impl AlertReceiver for WebhookReceiver { async fn install(&self, sender: &KubePrometheus) -> Result { sender.install_receiver(self).await } + fn name(&self) -> String { + "webhook-receiver".to_string() + } fn clone_box(&self) -> Box> { Box::new(self.clone()) } diff --git a/harmony/src/modules/monitoring/kube_prometheus/alert_manager_config.rs b/harmony/src/modules/monitoring/kube_prometheus/alert_manager_config.rs new file mode 100644 index 0000000..b6bae51 --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/alert_manager_config.rs @@ -0,0 +1,160 @@ +use std::sync::Arc; + +use async_trait::async_trait; +// use k8s_openapi::{Metadata, NamespaceResourceScope}; +// use kube::Resource; +// use serde::Serialize; +// use strum::EnumString; +// +// use crate::topology::Url; +// +// +// pub trait CRDAlertManagerConfigs { +// fn get_crd_alert_manager_config(&self) -> CRDAlertManagerConfig;} +// +// #[derive(Debug, Clone, Serialize)] +// pub struct CRDAlertManagerConfig { +// pub name: String, +// pub namespace: String, +// pub receivers: Vec, +// pub matchers: Vec, +// } +// +// #[derive(Debug, Clone, Serialize)] +// pub struct Receiver { +// pub receiver_name: String, +// pub configs: Config, +// } +// +// #[derive(Debug, Clone, Serialize)] +// pub struct Config{ +// pub url: Url, +// } +// +// #[derive(Debug, Clone, Serialize)] +// pub struct Matchers{ +// pub name: String, +// pub r#type: MatchType, +// pub value: String, +// } +// +// #[derive(Debug, EnumString, Clone, Serialize)] +// pub enum MatchType{ +// #[strum(serialize = "=")] +// Equal, +// #[strum(serialize = "!=")] +// NotEqual, +// } +// +// impl Resource for CRDAlertManagerConfig { +// type DynamicType = (); +// +// type Scope = NamespaceResourceScope; +// +// fn kind(_: &Self::DynamicType) -> std::borrow::Cow<'_, str> { +// "AlertmanagerConfig".into() +// } +// +// fn group(_: &Self::DynamicType) -> std::borrow::Cow<'_, str> { +// "monitoring.coreos.com".into() +// } +// +// fn version(_: &Self::DynamicType) -> std::borrow::Cow<'_, str> { +// "v1alpha1".into() +// } +// +// fn plural(_: &Self::DynamicType) -> std::borrow::Cow<'_, str> { +// "alertmanagerconfigs".into() +// } +// +// fn meta(&self) -> &kube::api::ObjectMeta { +// &self.metadata) +// } +// +// fn meta_mut(&mut self) -> &mut kube::api::ObjectMeta { +// &mut self.metadata +// } +// } +use kube::{CustomResource, api::ObjectMeta}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::{ + interpret::{InterpretError, Outcome}, + inventory::Inventory, + topology::{ + HelmCommand, K8sclient, Topology, + installable::Installable, + k8s::K8sClient, + oberservability::monitoring::{AlertReceiver, AlertSender}, + tenant::TenantManager, + }, +}; + +#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[kube( + group = "monitoring.coreos.com", + version = "v1alpha1", + kind = "AlertmanagerConfig", + plural = "alertmanagerconfigs", + namespaced +)] + +pub struct AlertmanagerConfigSpec { + // Define the spec fields here, or use serde's `flatten` if you want to store arbitrary data + // Example placeholder: + pub route: Option, + pub receivers: Option, +} + +#[derive(Debug, Clone)] +pub struct CRDAlertManager { + namespace: String, + client: K8sClient, +} + +impl AlertSender for CRDAlertManager { + fn name(&self) -> String { + "CRDAlertManager".to_string() + } +} + +impl Clone for Box> { + fn clone(&self) -> Self { + self.clone_box() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +#[async_trait] +pub trait CRDAlertManagerReceiver: + AlertReceiver + Send + Sync + std::fmt::Debug +{ + fn name(&self) -> String; + async fn configure_receiver(&self) -> AlertmanagerConfig; + // This new method is for cloning the trait object + fn clone_box(&self) -> Box; +} + +impl Clone for Box { + fn clone(&self) -> Self { + CRDAlertManagerReceiver::clone_box(self.as_ref()) + } +} + +impl Serialize for Box { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs index 3f273c6..3c4fa37 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs @@ -35,18 +35,18 @@ impl KubePrometheusConfig { windows_monitoring: false, alert_manager: true, grafana: true, - node_exporter: false, + node_exporter: true, prometheus: true, kubernetes_service_monitors: true, - kubernetes_api_server: false, + kubernetes_api_server: true, kubelet: true, - kube_controller_manager: false, - kube_etcd: false, - kube_proxy: false, + kube_controller_manager: true, + kube_etcd: true, + kube_proxy: true, kube_state_metrics: true, prometheus_operator: true, - core_dns: false, - kube_scheduler: false, + core_dns: true, + kube_scheduler: true, alert_receiver_configs: vec![], alert_rules: vec![], additional_service_monitors: vec![], diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs index 14d9f5f..fe0f38c 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs @@ -12,8 +12,8 @@ use crate::modules::{ helm::chart::HelmChartScore, monitoring::kube_prometheus::types::{ AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig, - AlertManagerRoute, AlertManagerSpec, AlertManagerValues, ConfigReloader, Limits, - PrometheusConfig, Requests, Resources, + AlertManagerConfigSelector, AlertManagerRoute, AlertManagerSpec, AlertManagerValues, + ConfigReloader, Limits, PrometheusConfig, Requests, Resources, }, }; @@ -70,7 +70,7 @@ pub fn kube_prometheus_helm_chart_score( r#" global: rbac: - create: false + create: true prometheus: enabled: {prometheus} prometheusSpec: @@ -245,7 +245,7 @@ prometheus-node-exporter: cpu: 200m memory: 250Mi prometheusOperator: - enabled: false + enabled: true resources: requests: cpu: 100m @@ -332,6 +332,11 @@ prometheusOperator: .push(receiver.channel_receiver.clone()); } + let mut labels = BTreeMap::new(); + labels.insert("alertmanagerConfig".to_string(), "enabled".to_string()); + let alert_manager_config_selector = AlertManagerConfigSelector { + match_labels: labels, + }; let alert_manager_values = AlertManagerValues { alertmanager: AlertManager { enabled: config.alert_manager, @@ -347,6 +352,8 @@ prometheusOperator: cpu: "100m".to_string(), }, }, + alert_manager_config_selector, + replicas: 2, }, init_config_reloader: ConfigReloader { resources: Resources { diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs new file mode 100644 index 0000000..6dd19a0 --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs @@ -0,0 +1,76 @@ +use async_trait::async_trait; +use kube::{Api, api::ObjectMeta}; +use log::debug; +use serde::Serialize; + +use crate::{ + data::{Id, Version}, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + score::Score, + topology::{K8sclient, Topology, oberservability::monitoring::AlertReceiver}, +}; + +use super::{ + alert_manager_config::{AlertmanagerConfig, AlertmanagerConfigSpec, CRDAlertManager, CRDAlertManagerReceiver}, + prometheus::KubePrometheus, +}; + +#[derive(Clone, Debug, Serialize)] +pub struct HelmPrometheusApplicationAlertingScore { + pub namespace: String, + pub receivers: Vec>, +} + +impl Score for HelmPrometheusApplicationAlertingScore { + fn create_interpret(&self) -> Box> { + Box::new(HelmPrometheusApplicationAlertingInterpret { + namespace: self.namespace.clone(), + receivers: self.receivers.clone(), + }) + } + + fn name(&self) -> String { + "HelmPrometheusApplicationAlertingScore".into() + } +} + +#[derive(Clone, Debug)] +pub struct HelmPrometheusApplicationAlertingInterpret { + pub namespace: String, + pub receivers: Vec>, +} + +#[async_trait] +impl Interpret for HelmPrometheusApplicationAlertingInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + for receiver in self.receivers.iter() { + let alertmanager_config: AlertmanagerConfig = receiver.configure_receiver().await; + let client = topology.k8s_client().await.unwrap(); + client + .apply(&alertmanager_config, Some(&self.namespace)) + .await?; + } + Ok(Outcome::success(format!("deployed alert channels"))) + } + + fn get_name(&self) -> InterpretName { + todo!() + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/mod.rs b/harmony/src/modules/monitoring/kube_prometheus/mod.rs index 7c8233a..2a5974c 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/mod.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/mod.rs @@ -2,3 +2,5 @@ pub mod helm; pub mod helm_prometheus_alert_score; pub mod prometheus; pub mod types; +pub mod alert_manager_config; +pub mod helm_prometheus_application_alerting; diff --git a/harmony/src/modules/monitoring/kube_prometheus/types.rs b/harmony/src/modules/monitoring/kube_prometheus/types.rs index 33bfcc3..c9209dc 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/types.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/types.rs @@ -55,6 +55,14 @@ pub struct AlertManagerChannelConfig { #[serde(rename_all = "camelCase")] pub struct AlertManagerSpec { pub(crate) resources: Resources, + pub replicas: u32, + pub alert_manager_config_selector: AlertManagerConfigSelector, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct AlertManagerConfigSelector { + pub match_labels: BTreeMap, } #[derive(Debug, Clone, Serialize)]