From cefb65933a8ba8c41207a20dfd331fe75a7ffaa4 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Wed, 29 Oct 2025 17:26:21 -0400 Subject: [PATCH] wip: cluster monitoring score coming along, this simply edits OKD builtin alertmanager instance and adds a receiver --- .../topology/oberservability/monitoring.rs | 9 ++ .../alert_channel/discord_alert_channel.rs | 96 +++++++++--- .../alert_channel/webhook_receiver.rs | 14 +- .../monitoring/okd/cluster_monitoring.rs | 139 ++++++++++++++++++ harmony/src/modules/monitoring/okd/config.rs | 90 ++++++++++++ .../monitoring/okd/enable_user_workload.rs | 99 +------------ harmony/src/modules/monitoring/okd/mod.rs | 13 ++ 7 files changed, 346 insertions(+), 114 deletions(-) create mode 100644 harmony/src/modules/monitoring/okd/cluster_monitoring.rs create mode 100644 harmony/src/modules/monitoring/okd/config.rs diff --git a/harmony/src/domain/topology/oberservability/monitoring.rs b/harmony/src/domain/topology/oberservability/monitoring.rs index 6d7411c..1bad370 100644 --- a/harmony/src/domain/topology/oberservability/monitoring.rs +++ b/harmony/src/domain/topology/oberservability/monitoring.rs @@ -1,6 +1,7 @@ use std::any::Any; use async_trait::async_trait; +use kube::api::DynamicObject; use log::debug; use crate::{ @@ -76,6 +77,14 @@ pub trait AlertReceiver: std::fmt::Debug + Send + Sync { fn name(&self) -> String; fn clone_box(&self) -> Box>; fn as_any(&self) -> &dyn Any; + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver; +} + +#[derive(Debug)] +pub struct AlertManagerReceiver { + pub receiver_config: serde_json::Value, + // FIXME we should not leak k8s here. DynamicObject is k8s specific + pub additional_ressources: Vec, } #[async_trait] diff --git a/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs b/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs index 8bef793..550ce46 100644 --- a/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs +++ b/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs @@ -3,7 +3,8 @@ use std::collections::BTreeMap; use async_trait::async_trait; use k8s_openapi::api::core::v1::Secret; -use kube::api::ObjectMeta; +use kube::Resource; +use kube::api::{DynamicObject, ObjectMeta}; use log::debug; use serde::Serialize; use serde_json::json; @@ -13,6 +14,8 @@ use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{ AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus, }; use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability; +use crate::modules::monitoring::okd::OpenshiftClusterAlertSender; +use crate::topology::oberservability::monitoring::AlertManagerReceiver; use crate::{ interpret::{InterpretError, Outcome}, modules::monitoring::{ @@ -32,10 +35,8 @@ pub struct DiscordWebhook { pub url: Url, } -#[async_trait] -impl AlertReceiver for DiscordWebhook { - async fn install(&self, sender: &RHOBObservability) -> Result { - let ns = sender.namespace.clone(); +impl DiscordWebhook { + fn get_receiver_config(&self) -> AlertManagerReceiver { let secret_name = format!("{}-secret", self.name.clone()); let webhook_key = format!("{}", self.url.clone()); @@ -52,26 +53,74 @@ impl AlertReceiver for DiscordWebhook { ..Default::default() }; - let _ = sender.client.apply(&secret, Some(&ns)).await; + AlertManagerReceiver { + additional_ressources: vec![], + + receiver_config: json!({ + "name": self.name, + "discordConfigs": [ + { + "apiURL": { + "name": secret_name, + "key": "webhook-url", + }, + "title": "{{ template \"discord.default.title\" . }}", + "message": "{{ template \"discord.default.message\" . }}" + } + ] + }), + } + } +} + +#[async_trait] +impl AlertReceiver for DiscordWebhook { + async fn install( + &self, + sender: &OpenshiftClusterAlertSender, + ) -> Result { + todo!() + } + + fn name(&self) -> String { + todo!() + } + + fn clone_box(&self) -> Box> { + Box::new(self.clone()) + } + + fn as_any(&self) -> &dyn Any { + todo!() + } + + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver { + self.get_receiver_config() + } +} + +#[async_trait] +impl AlertReceiver for DiscordWebhook { + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver { + todo!() + } + + async fn install(&self, sender: &RHOBObservability) -> Result { + let ns = sender.namespace.clone(); + + let config = self.get_receiver_config(); + for resource in config.additional_ressources.iter() { + todo!("can I apply a dynamicresource"); + // sender.client.apply(resource, Some(&ns)).await; + } + let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec { data: json!({ "route": { "receiver": self.name, }, "receivers": [ - { - "name": self.name, - "discordConfigs": [ - { - "apiURL": { - "name": secret_name, - "key": "webhook-url", - }, - "title": "{{ template \"discord.default.title\" . }}", - "message": "{{ template \"discord.default.message\" . }}" - } - ] - } + config.receiver_config ] }), }; @@ -122,6 +171,9 @@ impl AlertReceiver for DiscordWebhook { #[async_trait] impl AlertReceiver for DiscordWebhook { + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver { + todo!() + } async fn install(&self, sender: &CRDPrometheus) -> Result { let ns = sender.namespace.clone(); let secret_name = format!("{}-secret", self.name.clone()); @@ -200,6 +252,9 @@ impl AlertReceiver for DiscordWebhook { #[async_trait] impl AlertReceiver for DiscordWebhook { + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver { + todo!() + } async fn install(&self, sender: &Prometheus) -> Result { sender.install_receiver(self).await } @@ -226,6 +281,9 @@ impl PrometheusReceiver for DiscordWebhook { #[async_trait] impl AlertReceiver for DiscordWebhook { + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver { + todo!() + } async fn install(&self, sender: &KubePrometheus) -> Result { sender.install_receiver(self).await } diff --git a/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs b/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs index 1b20df3..c1d32b2 100644 --- a/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs +++ b/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs @@ -19,7 +19,7 @@ use crate::{ }, prometheus::prometheus::{Prometheus, PrometheusReceiver}, }, - topology::oberservability::monitoring::AlertReceiver, + topology::oberservability::monitoring::{AlertManagerReceiver, AlertReceiver}, }; use harmony_types::net::Url; @@ -31,6 +31,9 @@ pub struct WebhookReceiver { #[async_trait] impl AlertReceiver for WebhookReceiver { + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver { + todo!() + } async fn install(&self, sender: &RHOBObservability) -> Result { let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec { data: json!({ @@ -97,6 +100,9 @@ impl AlertReceiver for WebhookReceiver { #[async_trait] impl AlertReceiver for WebhookReceiver { + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver { + todo!() + } async fn install(&self, sender: &CRDPrometheus) -> Result { let spec = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfigSpec { data: json!({ @@ -158,6 +164,9 @@ impl AlertReceiver for WebhookReceiver { #[async_trait] impl AlertReceiver for WebhookReceiver { + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver { + todo!() + } async fn install(&self, sender: &Prometheus) -> Result { sender.install_receiver(self).await } @@ -184,6 +193,9 @@ impl PrometheusReceiver for WebhookReceiver { #[async_trait] impl AlertReceiver for WebhookReceiver { + fn as_alertmanager_receiver(&self) -> AlertManagerReceiver { + todo!() + } async fn install(&self, sender: &KubePrometheus) -> Result { sender.install_receiver(self).await } diff --git a/harmony/src/modules/monitoring/okd/cluster_monitoring.rs b/harmony/src/modules/monitoring/okd/cluster_monitoring.rs new file mode 100644 index 0000000..05da341 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/cluster_monitoring.rs @@ -0,0 +1,139 @@ +use base64::prelude::*; +use std::sync::Arc; + +use async_trait::async_trait; +use harmony_types::id::Id; +use kube::api::DynamicObject; +use log::{debug, info, trace}; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::{ + application::Application, + monitoring::{ + grafana::grafana::Grafana, + kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus, + okd::OpenshiftClusterAlertSender, + }, + prometheus::prometheus::PrometheusMonitoring, + }, + score::Score, + topology::{ + K8sclient, Topology, + k8s::K8sClient, + oberservability::monitoring::{AlertReceiver, AlertingInterpret, ScrapeTarget}, + }, +}; + +impl Clone for Box> { + fn clone(&self) -> Self { + self.clone_box() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct OpenshiftClusterAlertScore { + pub receivers: Vec>>, +} + +impl Score for OpenshiftClusterAlertScore { + fn name(&self) -> String { + "ClusterAlertScore".to_string() + } + + #[doc(hidden)] + fn create_interpret(&self) -> Box> { + Box::new(OpenshiftClusterAlertInterpret { + receivers: self.receivers.clone(), + }) + } +} + +#[derive(Debug)] +pub struct OpenshiftClusterAlertInterpret { + receivers: Vec>>, +} + +#[async_trait] +impl Interpret for OpenshiftClusterAlertInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &T, + ) -> Result { + let client = topology.k8s_client().await?; + + let secret: DynamicObject = client + .get_secret_json_value("alertmanager-main", Some("openshift-monitoring")) + .await?; + trace!("Got secret {secret:?}"); + + let data: serde_json::Value = secret.data; + + // TODO : get config in base64 by drilling into the value + let config_b64 = match data.get("alertmanager.yaml") { + Some(value) => value.as_str().unwrap_or(""), + None => "", + }; + + // TODO : base64 decode it + let config_bytes = BASE64_STANDARD.decode(config_b64).unwrap_or_default(); + + // TODO : use serde_yaml to deserialize the string + let am_config: serde_yaml::Value = + serde_yaml::from_str(&String::from_utf8(config_bytes).unwrap_or_default()) + .unwrap_or_default(); + + // Merge current alert receivers from this config with self.receivers + if let Some(existing_receivers) = am_config.get("receivers") { + for receiver in existing_receivers.as_sequence().unwrap_or(&vec![]) { + match serde_json::to_string(receiver) { + Ok(yaml_str) => { + // TODO: validate that each receiver implements to_alertmanager_yaml() + // and compare with our receivers + info!("Found existing receiver config: {}", yaml_str); + } + Err(e) => debug!("Failed to serialize receiver: {}", e), + } + } + } + + for custom_receiver in &self.receivers { + trace!("Processing custom receiver"); + debug!( + "Custom receiver YAML output: {:?}", + custom_receiver.as_alertmanager_receiver() + ); + } + + Ok(Outcome::success(todo!("whats up"))) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("OpenshiftClusterAlertInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/okd/config.rs b/harmony/src/modules/monitoring/okd/config.rs new file mode 100644 index 0000000..b86c5f0 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/config.rs @@ -0,0 +1,90 @@ +use std::{collections::BTreeMap, sync::Arc}; + +use crate::{ + interpret::{InterpretError, Outcome}, + topology::k8s::K8sClient, +}; +use k8s_openapi::api::core::v1::ConfigMap; +use kube::api::ObjectMeta; + +pub(crate) struct Config; + +impl Config { + pub async fn create_cluster_monitoring_config_cm( + client: &Arc, + ) -> Result { + let mut data = BTreeMap::new(); + data.insert( + "config.yaml".to_string(), + r#" +enableUserWorkload: true +alertmanagerMain: + enableUserAlertmanagerConfig: true +"# + .to_string(), + ); + + let cm = ConfigMap { + metadata: ObjectMeta { + name: Some("cluster-monitoring-config".to_string()), + namespace: Some("openshift-monitoring".to_string()), + ..Default::default() + }, + data: Some(data), + ..Default::default() + }; + client.apply(&cm, Some("openshift-monitoring")).await?; + + Ok(Outcome::success( + "updated cluster-monitoring-config-map".to_string(), + )) + } + + pub async fn create_user_workload_monitoring_config_cm( + client: &Arc, + ) -> Result { + let mut data = BTreeMap::new(); + data.insert( + "config.yaml".to_string(), + r#" +alertmanager: + enabled: true + enableAlertmanagerConfig: true +"# + .to_string(), + ); + let cm = ConfigMap { + metadata: ObjectMeta { + name: Some("user-workload-monitoring-config".to_string()), + namespace: Some("openshift-user-workload-monitoring".to_string()), + ..Default::default() + }, + data: Some(data), + ..Default::default() + }; + client + .apply(&cm, Some("openshift-user-workload-monitoring")) + .await?; + + Ok(Outcome::success( + "updated openshift-user-monitoring-config-map".to_string(), + )) + } + + pub async fn verify_user_workload(client: &Arc) -> Result { + let namespace = "openshift-user-workload-monitoring"; + let alertmanager_name = "alertmanager-user-workload-0"; + let prometheus_name = "prometheus-user-workload-0"; + client + .wait_for_pod_ready(alertmanager_name, Some(namespace)) + .await?; + client + .wait_for_pod_ready(prometheus_name, Some(namespace)) + .await?; + + Ok(Outcome::success(format!( + "pods: {}, {} ready in ns: {}", + alertmanager_name, prometheus_name, namespace + ))) + } +} diff --git a/harmony/src/modules/monitoring/okd/enable_user_workload.rs b/harmony/src/modules/monitoring/okd/enable_user_workload.rs index b322b4d..2ed0fa4 100644 --- a/harmony/src/modules/monitoring/okd/enable_user_workload.rs +++ b/harmony/src/modules/monitoring/okd/enable_user_workload.rs @@ -1,16 +1,13 @@ -use std::{collections::BTreeMap, sync::Arc}; - use crate::{ data::Version, interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, inventory::Inventory, + modules::monitoring::okd::config::Config, score::Score, - topology::{K8sclient, Topology, k8s::K8sClient}, + topology::{K8sclient, Topology}, }; use async_trait::async_trait; use harmony_types::id::Id; -use k8s_openapi::api::core::v1::ConfigMap; -use kube::api::ObjectMeta; use serde::Serialize; #[derive(Clone, Debug, Serialize)] @@ -37,10 +34,9 @@ impl Interpret for OpenshiftUserWorkloadMonitoringIn topology: &T, ) -> Result { let client = topology.k8s_client().await.unwrap(); - self.update_cluster_monitoring_config_cm(&client).await?; - self.update_user_workload_monitoring_config_cm(&client) - .await?; - self.verify_user_workload(&client).await?; + Config::create_cluster_monitoring_config_cm(&client).await?; + Config::create_user_workload_monitoring_config_cm(&client).await?; + Config::verify_user_workload(&client).await?; Ok(Outcome::success( "successfully enabled user-workload-monitoring".to_string(), )) @@ -62,88 +58,3 @@ impl Interpret for OpenshiftUserWorkloadMonitoringIn todo!() } } - -impl OpenshiftUserWorkloadMonitoringInterpret { - pub async fn update_cluster_monitoring_config_cm( - &self, - client: &Arc, - ) -> Result { - let mut data = BTreeMap::new(); - data.insert( - "config.yaml".to_string(), - r#" -enableUserWorkload: true -alertmanagerMain: - enableUserAlertmanagerConfig: true -"# - .to_string(), - ); - - let cm = ConfigMap { - metadata: ObjectMeta { - name: Some("cluster-monitoring-config".to_string()), - namespace: Some("openshift-monitoring".to_string()), - ..Default::default() - }, - data: Some(data), - ..Default::default() - }; - client.apply(&cm, Some("openshift-monitoring")).await?; - - Ok(Outcome::success( - "updated cluster-monitoring-config-map".to_string(), - )) - } - - pub async fn update_user_workload_monitoring_config_cm( - &self, - client: &Arc, - ) -> Result { - let mut data = BTreeMap::new(); - data.insert( - "config.yaml".to_string(), - r#" -alertmanager: - enabled: true - enableAlertmanagerConfig: true -"# - .to_string(), - ); - let cm = ConfigMap { - metadata: ObjectMeta { - name: Some("user-workload-monitoring-config".to_string()), - namespace: Some("openshift-user-workload-monitoring".to_string()), - ..Default::default() - }, - data: Some(data), - ..Default::default() - }; - client - .apply(&cm, Some("openshift-user-workload-monitoring")) - .await?; - - Ok(Outcome::success( - "updated openshift-user-monitoring-config-map".to_string(), - )) - } - - pub async fn verify_user_workload( - &self, - client: &Arc, - ) -> Result { - let namespace = "openshift-user-workload-monitoring"; - let alertmanager_name = "alertmanager-user-workload-0"; - let prometheus_name = "prometheus-user-workload-0"; - client - .wait_for_pod_ready(alertmanager_name, Some(namespace)) - .await?; - client - .wait_for_pod_ready(prometheus_name, Some(namespace)) - .await?; - - Ok(Outcome::success(format!( - "pods: {}, {} ready in ns: {}", - alertmanager_name, prometheus_name, namespace - ))) - } -} diff --git a/harmony/src/modules/monitoring/okd/mod.rs b/harmony/src/modules/monitoring/okd/mod.rs index 50339ba..ac246c5 100644 --- a/harmony/src/modules/monitoring/okd/mod.rs +++ b/harmony/src/modules/monitoring/okd/mod.rs @@ -1 +1,14 @@ +use crate::topology::oberservability::monitoring::AlertSender; + +pub mod cluster_monitoring; +pub(crate) mod config; pub mod enable_user_workload; + +#[derive(Debug)] +pub struct OpenshiftClusterAlertSender; + +impl AlertSender for OpenshiftClusterAlertSender { + fn name(&self) -> String { + "OpenshiftClusterAlertSender".to_string() + } +}