From 40cd765019ec93e2125a10dc92e7b30bf632bfe3 Mon Sep 17 00:00:00 2001 From: Willem Date: Mon, 28 Apr 2025 16:18:44 -0400 Subject: [PATCH 01/11] WIP: initial layout for MonitoringStackScore --- harmony/src/modules/mod.rs | 1 + harmony/src/modules/monitoring/mod.rs | 3 +++ .../modules/monitoring/monitoring_alerting.rs | 23 +++++++++++++++++++ .../modules/monitoring/resources/grafana.rs | 19 +++++++++++++++ .../src/modules/monitoring/resources/mod.rs | 3 +++ .../monitoring/resources/prometheus.rs | 19 +++++++++++++++ .../resources/prometheus_alert_manager.rs | 20 ++++++++++++++++ 7 files changed, 88 insertions(+) create mode 100644 harmony/src/modules/monitoring/mod.rs create mode 100644 harmony/src/modules/monitoring/monitoring_alerting.rs create mode 100644 harmony/src/modules/monitoring/resources/grafana.rs create mode 100644 harmony/src/modules/monitoring/resources/mod.rs create mode 100644 harmony/src/modules/monitoring/resources/prometheus.rs create mode 100644 harmony/src/modules/monitoring/resources/prometheus_alert_manager.rs diff --git a/harmony/src/modules/mod.rs b/harmony/src/modules/mod.rs index a578ada..9875302 100644 --- a/harmony/src/modules/mod.rs +++ b/harmony/src/modules/mod.rs @@ -10,3 +10,4 @@ pub mod load_balancer; pub mod okd; pub mod opnsense; pub mod tftp; +pub mod monitoring; diff --git a/harmony/src/modules/monitoring/mod.rs b/harmony/src/modules/monitoring/mod.rs new file mode 100644 index 0000000..dd2a70f --- /dev/null +++ b/harmony/src/modules/monitoring/mod.rs @@ -0,0 +1,3 @@ +mod monitoring_alerting; +mod resources; +pub use monitoring_alerting::*; diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs new file mode 100644 index 0000000..fd7772b --- /dev/null +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -0,0 +1,23 @@ +use serde::Serialize; + +use crate::{ + interpret::Interpret, maestro::Maestro, modules::helm::chart::HelmChartScore, score::Score, topology::{K8sclient, Topology} +}; + +#[derive(Debug, Clone, Serialize)] +pub struct MonitoringAlertingStackScore { + monitoring: HelmChartScore, + alerting: HelmChartScore, + alert_manager: HelmChartScore, +} + +impl Score for MonitoringAlertingStackScore { + + fn create_interpret(&self) -> Box> { + todo!() + } + + fn name(&self) -> String { + format!("Monitoring: {}\n, Alerting: {}\n, Alert Manager: {}\n", self.monitoring, self.alerting, self.alert_manager) + } +} diff --git a/harmony/src/modules/monitoring/resources/grafana.rs b/harmony/src/modules/monitoring/resources/grafana.rs new file mode 100644 index 0000000..87cc512 --- /dev/null +++ b/harmony/src/modules/monitoring/resources/grafana.rs @@ -0,0 +1,19 @@ +use std::str::FromStr; + +use non_blank_string_rs::NonBlankString; + +use crate::modules::helm::chart::HelmChartScore; + + +pub fn grafana_score(ns: &str) -> HelmChartScore { + HelmChartScore { + namespace: Some(NonBlankString::from_str(ns).unwrap()), + release_name: NonBlankString::from_str("es").unwrap(), + chart_name: NonBlankString::from_str( + "oci://registry-1.docker.io/bitnamicharts/grafana", + ) + .unwrap(), + chart_version: None, + values_overrides: None, + } +} diff --git a/harmony/src/modules/monitoring/resources/mod.rs b/harmony/src/modules/monitoring/resources/mod.rs new file mode 100644 index 0000000..4af87f6 --- /dev/null +++ b/harmony/src/modules/monitoring/resources/mod.rs @@ -0,0 +1,3 @@ +mod grafana; +mod prometheus; +mod prometheus_alert_manager; diff --git a/harmony/src/modules/monitoring/resources/prometheus.rs b/harmony/src/modules/monitoring/resources/prometheus.rs new file mode 100644 index 0000000..844e351 --- /dev/null +++ b/harmony/src/modules/monitoring/resources/prometheus.rs @@ -0,0 +1,19 @@ +use std::str::FromStr; + +use non_blank_string_rs::NonBlankString; + +use crate::modules::helm::chart::HelmChartScore; + + +pub fn elasticsearch_score(ns: &str) -> HelmChartScore { + HelmChartScore { + namespace: Some(NonBlankString::from_str(ns).unwrap()), + release_name: NonBlankString::from_str("es").unwrap(), + chart_name: NonBlankString::from_str( + todo()! + ) + .unwrap(), + chart_version: None, + values_overrides: None, + } +} diff --git a/harmony/src/modules/monitoring/resources/prometheus_alert_manager.rs b/harmony/src/modules/monitoring/resources/prometheus_alert_manager.rs new file mode 100644 index 0000000..4b2a583 --- /dev/null +++ b/harmony/src/modules/monitoring/resources/prometheus_alert_manager.rs @@ -0,0 +1,20 @@ +use std::str::FromStr; + +use non_blank_string_rs::NonBlankString; + +use crate::modules::helm::chart::HelmChartScore; + + +pub fn elasticsearch_score(ns: &str) -> HelmChartScore { + HelmChartScore { + namespace: Some(NonBlankString::from_str(ns).unwrap()), + release_name: NonBlankString::from_str("es").unwrap(), + chart_name: NonBlankString::from_str( + todo()! + ) + .unwrap(), + chart_version: None, + values_overrides: None, + } +} + -- 2.39.5 From 6c145f1100f75ac051b4405e5437d67fc591f91e Mon Sep 17 00:00:00 2001 From: Willem Date: Mon, 28 Apr 2025 16:31:22 -0400 Subject: [PATCH 02/11] wip: initial layout --- .../modules/monitoring/monitoring_alerting.rs | 4 +--- .../modules/monitoring/resources/grafana.rs | 19 ------------------ .../{prometheus.rs => kube_prometheus.rs} | 6 ++++-- .../src/modules/monitoring/resources/mod.rs | 4 +--- .../resources/prometheus_alert_manager.rs | 20 ------------------- 5 files changed, 6 insertions(+), 47 deletions(-) delete mode 100644 harmony/src/modules/monitoring/resources/grafana.rs rename harmony/src/modules/monitoring/resources/{prometheus.rs => kube_prometheus.rs} (70%) delete mode 100644 harmony/src/modules/monitoring/resources/prometheus_alert_manager.rs diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index fd7772b..47d7e50 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -7,8 +7,6 @@ use crate::{ #[derive(Debug, Clone, Serialize)] pub struct MonitoringAlertingStackScore { monitoring: HelmChartScore, - alerting: HelmChartScore, - alert_manager: HelmChartScore, } impl Score for MonitoringAlertingStackScore { @@ -18,6 +16,6 @@ impl Score for MonitoringAlertingStackScore { } fn name(&self) -> String { - format!("Monitoring: {}\n, Alerting: {}\n, Alert Manager: {}\n", self.monitoring, self.alerting, self.alert_manager) + todo!() } } diff --git a/harmony/src/modules/monitoring/resources/grafana.rs b/harmony/src/modules/monitoring/resources/grafana.rs deleted file mode 100644 index 87cc512..0000000 --- a/harmony/src/modules/monitoring/resources/grafana.rs +++ /dev/null @@ -1,19 +0,0 @@ -use std::str::FromStr; - -use non_blank_string_rs::NonBlankString; - -use crate::modules::helm::chart::HelmChartScore; - - -pub fn grafana_score(ns: &str) -> HelmChartScore { - HelmChartScore { - namespace: Some(NonBlankString::from_str(ns).unwrap()), - release_name: NonBlankString::from_str("es").unwrap(), - chart_name: NonBlankString::from_str( - "oci://registry-1.docker.io/bitnamicharts/grafana", - ) - .unwrap(), - chart_version: None, - values_overrides: None, - } -} diff --git a/harmony/src/modules/monitoring/resources/prometheus.rs b/harmony/src/modules/monitoring/resources/kube_prometheus.rs similarity index 70% rename from harmony/src/modules/monitoring/resources/prometheus.rs rename to harmony/src/modules/monitoring/resources/kube_prometheus.rs index 844e351..4be77b3 100644 --- a/harmony/src/modules/monitoring/resources/prometheus.rs +++ b/harmony/src/modules/monitoring/resources/kube_prometheus.rs @@ -1,3 +1,4 @@ + use std::str::FromStr; use non_blank_string_rs::NonBlankString; @@ -5,12 +6,13 @@ use non_blank_string_rs::NonBlankString; use crate::modules::helm::chart::HelmChartScore; -pub fn elasticsearch_score(ns: &str) -> HelmChartScore { +pub fn kube_prometheus_score(ns: &str) -> HelmChartScore { HelmChartScore { namespace: Some(NonBlankString::from_str(ns).unwrap()), release_name: NonBlankString::from_str("es").unwrap(), chart_name: NonBlankString::from_str( - todo()! + todo!() //use kube prometheus chart which includes grafana, prometheus, alert + //manager, etc ) .unwrap(), chart_version: None, diff --git a/harmony/src/modules/monitoring/resources/mod.rs b/harmony/src/modules/monitoring/resources/mod.rs index 4af87f6..fd3d57a 100644 --- a/harmony/src/modules/monitoring/resources/mod.rs +++ b/harmony/src/modules/monitoring/resources/mod.rs @@ -1,3 +1 @@ -mod grafana; -mod prometheus; -mod prometheus_alert_manager; +mod kube_prometheus; diff --git a/harmony/src/modules/monitoring/resources/prometheus_alert_manager.rs b/harmony/src/modules/monitoring/resources/prometheus_alert_manager.rs deleted file mode 100644 index 4b2a583..0000000 --- a/harmony/src/modules/monitoring/resources/prometheus_alert_manager.rs +++ /dev/null @@ -1,20 +0,0 @@ -use std::str::FromStr; - -use non_blank_string_rs::NonBlankString; - -use crate::modules::helm::chart::HelmChartScore; - - -pub fn elasticsearch_score(ns: &str) -> HelmChartScore { - HelmChartScore { - namespace: Some(NonBlankString::from_str(ns).unwrap()), - release_name: NonBlankString::from_str("es").unwrap(), - chart_name: NonBlankString::from_str( - todo()! - ) - .unwrap(), - chart_version: None, - values_overrides: None, - } -} - -- 2.39.5 From a6bcaade46679c326a3850cea486a20561e64c2b Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 29 Apr 2025 11:28:32 -0400 Subject: [PATCH 03/11] wip: alerting --- .../modules/monitoring/monitoring_alerting.rs | 58 ++++++++++++++++++- .../monitoring/resources/kube_prometheus.rs | 7 ++- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index 47d7e50..1379e29 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -1,7 +1,11 @@ use serde::Serialize; use crate::{ - interpret::Interpret, maestro::Maestro, modules::helm::chart::HelmChartScore, score::Score, topology::{K8sclient, Topology} + interpret::Interpret, + maestro::Maestro, + modules::helm::chart::HelmChartScore, + score::Score, + topology::{K8sclient, Topology}, }; #[derive(Debug, Clone, Serialize)] @@ -10,12 +14,60 @@ pub struct MonitoringAlertingStackScore { } impl Score for MonitoringAlertingStackScore { - fn create_interpret(&self) -> Box> { - todo!() + Box::new(MonitoringAlertingStackInterpret { + score: self.clone(), + }) } fn name(&self) -> String { + format!( + "{} {} MonitoringAlertingStackScore", + self.monitoring.chart_name, self.monitoring.release_name + ) + } +} + +#[derive(Debug, Serialize)] +struct MonitoringAlertingStackInterpret { + pub score: MonitoringAlertingStackScore, +} + +#[async_trait] +impl Interpret for MonitoringAlertingStackInterpret { + fn execute<'life0, 'life1, 'life2, 'async_trait>( + &'life0 self, + inventory: &'life1 Inventory, + topology: &'life2 T, + ) -> ::core::pin::Pin< + Box< + dyn ::core::future::Future> + + ::core::marker::Send + + 'async_trait, + >, + > + where + 'life0: 'async_trait, + 'life1: 'async_trait, + 'life2: 'async_trait, + Self: 'async_trait, + { + todo!() + } + + fn get_name(&self) -> InterpretName { + todo!() + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { todo!() } } diff --git a/harmony/src/modules/monitoring/resources/kube_prometheus.rs b/harmony/src/modules/monitoring/resources/kube_prometheus.rs index 4be77b3..620b27e 100644 --- a/harmony/src/modules/monitoring/resources/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/resources/kube_prometheus.rs @@ -9,10 +9,11 @@ use crate::modules::helm::chart::HelmChartScore; pub fn kube_prometheus_score(ns: &str) -> HelmChartScore { HelmChartScore { namespace: Some(NonBlankString::from_str(ns).unwrap()), - release_name: NonBlankString::from_str("es").unwrap(), + release_name: NonBlankString::from_str("kube-prometheus").unwrap(), chart_name: NonBlankString::from_str( - todo!() //use kube prometheus chart which includes grafana, prometheus, alert - //manager, etc + "https://prometheus-community.github.io/helm-charts" + //use kube prometheus chart which includes grafana, prometheus, alert + //manager, etc ) .unwrap(), chart_version: None, -- 2.39.5 From 2f8e150f41d921fd72fdfda95db321a78dbb8c38 Mon Sep 17 00:00:00 2001 From: Willem Date: Mon, 5 May 2025 12:49:28 -0400 Subject: [PATCH 04/11] feat: added Score and topology to create kube prometheus monitoring and alerting stack --- harmony/src/domain/topology/mod.rs | 1 + .../{resources => }/kube_prometheus.rs | 1 - harmony/src/modules/monitoring/mod.rs | 6 +- .../modules/monitoring/monitoring_alerting.rs | 105 +++++++++++++----- .../src/modules/monitoring/resources/mod.rs | 1 - 5 files changed, 79 insertions(+), 35 deletions(-) rename harmony/src/modules/monitoring/{resources => }/kube_prometheus.rs (99%) delete mode 100644 harmony/src/modules/monitoring/resources/mod.rs diff --git a/harmony/src/domain/topology/mod.rs b/harmony/src/domain/topology/mod.rs index 3d773ff..0cbfee6 100644 --- a/harmony/src/domain/topology/mod.rs +++ b/harmony/src/domain/topology/mod.rs @@ -1,3 +1,4 @@ +pub mod monitoring_alerting; mod ha_cluster; mod host_binding; mod http; diff --git a/harmony/src/modules/monitoring/resources/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs similarity index 99% rename from harmony/src/modules/monitoring/resources/kube_prometheus.rs rename to harmony/src/modules/monitoring/kube_prometheus.rs index 620b27e..6dbccae 100644 --- a/harmony/src/modules/monitoring/resources/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -1,4 +1,3 @@ - use std::str::FromStr; use non_blank_string_rs::NonBlankString; diff --git a/harmony/src/modules/monitoring/mod.rs b/harmony/src/modules/monitoring/mod.rs index dd2a70f..dd17cc1 100644 --- a/harmony/src/modules/monitoring/mod.rs +++ b/harmony/src/modules/monitoring/mod.rs @@ -1,3 +1,3 @@ -mod monitoring_alerting; -mod resources; -pub use monitoring_alerting::*; +pub mod monitoring_alerting; +mod kube_prometheus; + diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index 1379e29..1dd8233 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -1,58 +1,103 @@ +use async_trait::async_trait; use serde::Serialize; use crate::{ - interpret::Interpret, - maestro::Maestro, + data::{Id, Version}, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + maestro::{self, Maestro}, modules::helm::chart::HelmChartScore, - score::Score, - topology::{K8sclient, Topology}, + score::{CloneBoxScore, Score}, + topology::{K8sclient, Topology, monitoring_alerting::MonitoringAlertingTopology}, }; -#[derive(Debug, Clone, Serialize)] +use super::kube_prometheus::kube_prometheus_score; + + +#[derive(Debug)] pub struct MonitoringAlertingStackScore { - monitoring: HelmChartScore, + pub monitoring_stack: Vec>>, +} + +impl Default for MonitoringAlertingStackScore { + fn default() -> Self { + let ns = "monitoring"; + Self { + monitoring_stack: vec![ + Box::new(kube_prometheus_score(ns)) as Box>, + ], + } + } +} +impl Clone for MonitoringAlertingStackScore { + fn clone(&self) -> Self { + Self { + monitoring_stack: self.monitoring_stack.iter().map(|s| s.clone_box()).collect(), + } + } +} + +impl Serialize for MonitoringAlertingStackScore { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + let mut s = serializer.serialize_struct("MonitoringAlertingStackScore", 1)?; + let monitoring_values: Vec<_> = self.monitoring_stack.iter().map(|m| m.serialize()).collect(); + s.serialize_field("monitoring", &monitoring_values)?; + s.end() + } } impl Score for MonitoringAlertingStackScore { fn create_interpret(&self) -> Box> { Box::new(MonitoringAlertingStackInterpret { - score: self.clone(), + score: MonitoringAlertingStackScore { + monitoring_stack: self.monitoring_stack.iter().map(|s| s.clone_box()).collect(), + }, }) } fn name(&self) -> String { - format!( - "{} {} MonitoringAlertingStackScore", - self.monitoring.chart_name, self.monitoring.release_name - ) + format!("MonitoringAlertingStackScore") } } -#[derive(Debug, Serialize)] +#[derive(Debug)] struct MonitoringAlertingStackInterpret { pub score: MonitoringAlertingStackScore, } +impl MonitoringAlertingStackInterpret { + pub async fn build_monitoring_stack( + &self, + monitoring_stack: MonitoringAlertingStackScore, + ) -> Result { + let inventory = Inventory::autoload(); + let topology = MonitoringAlertingTopology::new(); + let mut maestro = match Maestro::initialize(inventory, topology).await { + Ok(m) => m, + Err(e) => { + println!("failed to initialize Maestro: {}", e); + std::process::exit(1); + } + }; + maestro.register_all(monitoring_stack.monitoring_stack); + Ok(Outcome::success(format!( + "installed kube-prometheus monitoring and alerting stack" + ))) + } +} + #[async_trait] impl Interpret for MonitoringAlertingStackInterpret { - fn execute<'life0, 'life1, 'life2, 'async_trait>( - &'life0 self, - inventory: &'life1 Inventory, - topology: &'life2 T, - ) -> ::core::pin::Pin< - Box< - dyn ::core::future::Future> - + ::core::marker::Send - + 'async_trait, - >, - > - where - 'life0: 'async_trait, - 'life1: 'async_trait, - 'life2: 'async_trait, - Self: 'async_trait, - { - todo!() + async fn execute( + &self, + _inventory: &Inventory, + _topology: &T, + ) -> Result { + self.build_monitoring_stack(self.score.clone()).await } fn get_name(&self) -> InterpretName { diff --git a/harmony/src/modules/monitoring/resources/mod.rs b/harmony/src/modules/monitoring/resources/mod.rs deleted file mode 100644 index fd3d57a..0000000 --- a/harmony/src/modules/monitoring/resources/mod.rs +++ /dev/null @@ -1 +0,0 @@ -mod kube_prometheus; -- 2.39.5 From fbd466a85cce1a1acac2e1ca6088f94249e3fe6c Mon Sep 17 00:00:00 2001 From: Willem Date: Mon, 5 May 2025 13:40:32 -0400 Subject: [PATCH 05/11] added file --- .../domain/topology/monitoring_alerting.rs | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 harmony/src/domain/topology/monitoring_alerting.rs diff --git a/harmony/src/domain/topology/monitoring_alerting.rs b/harmony/src/domain/topology/monitoring_alerting.rs new file mode 100644 index 0000000..a5e4743 --- /dev/null +++ b/harmony/src/domain/topology/monitoring_alerting.rs @@ -0,0 +1,59 @@ +use serde::Serialize; +use tokio::sync::OnceCell; + +use async_trait::async_trait; + +use crate::interpret::{InterpretError, Outcome}; + +use super::{HelmCommand, Topology}; + +#[derive(Clone, Debug)] +struct MonitoringState { + message: String, +} + +#[derive(Clone, Debug)] +pub struct MonitoringAlertingTopology { + monitoring_state: OnceCell>, +} + + +impl MonitoringAlertingTopology { + pub fn new() -> Self { + Self { + monitoring_state: OnceCell::new(), + } + } + fn get_monitoring_state(&self) -> Result, InterpretError> { + let state = MonitoringState { + message: "monitoring stack not installed".to_string(), + }; + Ok(Some(state)) + } +} + +#[async_trait] +impl Topology for MonitoringAlertingTopology { + fn name(&self) -> &str { + "MonitoringAlertingTopology" + } + + async fn ensure_ready(&self) -> Result { + let monitoring_state = self + .monitoring_state + .get_or_try_init(|| async { self.get_monitoring_state() }) + .await?; + + if monitoring_state.is_some() { + Ok(Outcome::success( + "Monitoring stack already installed".to_string(), + )) + } else { + Ok(Outcome::success( + "Monitoring stack not installed".to_string(), + )) + } + } +} + +impl HelmCommand for MonitoringAlertingTopology {} -- 2.39.5 From e7cfbf914ae13169a37adb99c92673310cb2d5fe Mon Sep 17 00:00:00 2001 From: Willem Date: Mon, 5 May 2025 15:38:37 -0400 Subject: [PATCH 06/11] feat: added basic alert for pvc 95% full to kube-prometheus score --- .../src/modules/monitoring/kube_prometheus.rs | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/harmony/src/modules/monitoring/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs index 6dbccae..8cfbcf3 100644 --- a/harmony/src/modules/monitoring/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -6,6 +6,27 @@ use crate::modules::helm::chart::HelmChartScore; pub fn kube_prometheus_score(ns: &str) -> HelmChartScore { +//TODO this should be make into a rule with default formatting that can be easily passed as a vec +//to the overrides or something leaving the user to deal with formatting here seems bad + let values = r#" +additionalPrometheusRules: +- name: kubelet-alerts + groups: + - name: pvc-alerts + rules: + - alert: 'PVC Fill Over 95 Percent In 2 Days' + annotations: + description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ + $labels.namespace }} is predicted to fill over 95% in less than 2 days. + title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace + }} will fill over 95% in less than 2 days + expr: "(\n kubelet_volume_stats_used_bytes \n / \n kubelet_volume_stats_capacity_bytes\n) + > 0.95\nAND\npredict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 + * 60 * 60)\n/\nkubelet_volume_stats_capacity_bytes\n> 0.95\n" + for: 5m + labels: + severity: warning +"#; HelmChartScore { namespace: Some(NonBlankString::from_str(ns).unwrap()), release_name: NonBlankString::from_str("kube-prometheus").unwrap(), -- 2.39.5 From 88270ece61145fc6996201abede33887d3c28a86 Mon Sep 17 00:00:00 2001 From: Willem Date: Mon, 5 May 2025 16:37:15 -0400 Subject: [PATCH 07/11] fix: refactor so that the topology installs the MonitoringAlertingStack depending on if it is already present in the cluster --- .../domain/topology/monitoring_alerting.rs | 96 +++++++++++++++---- .../modules/monitoring/monitoring_alerting.rs | 22 +---- 2 files changed, 79 insertions(+), 39 deletions(-) diff --git a/harmony/src/domain/topology/monitoring_alerting.rs b/harmony/src/domain/topology/monitoring_alerting.rs index a5e4743..d41c2da 100644 --- a/harmony/src/domain/topology/monitoring_alerting.rs +++ b/harmony/src/domain/topology/monitoring_alerting.rs @@ -1,9 +1,21 @@ +use log::warn; use serde::Serialize; use tokio::sync::OnceCell; +use k8s_openapi::api::core::v1::Pod; +use kube::{ + Client, + api::{Api, ListParams}, +}; + use async_trait::async_trait; -use crate::interpret::{InterpretError, Outcome}; +use crate::{ + interpret::{InterpretError, Outcome}, + inventory::Inventory, + maestro::Maestro, + modules::monitoring::monitoring_alerting::MonitoringAlertingStackScore, +}; use super::{HelmCommand, Topology}; @@ -17,21 +29,75 @@ pub struct MonitoringAlertingTopology { monitoring_state: OnceCell>, } - impl MonitoringAlertingTopology { pub fn new() -> Self { Self { monitoring_state: OnceCell::new(), } } - fn get_monitoring_state(&self) -> Result, InterpretError> { - let state = MonitoringState { - message: "monitoring stack not installed".to_string(), + + async fn get_monitoring_state(&self) -> Result, InterpretError> { + let client = Client::try_default() + .await + .map_err(|e| InterpretError::new(format!("Kubernetes client error: {}", e)))?; + + for ns in &["monitoring", "openshift-monitoring"] { + let pods: Api = Api::namespaced(client.clone(), ns); + let lp = ListParams::default().labels("app.kubernetes.io/name=prometheus"); + + match pods.list(&lp).await { + Ok(pod_list) => { + for p in pod_list.items { + if let Some(status) = p.status { + if let Some(conditions) = status.conditions { + if conditions + .iter() + .any(|c| c.type_ == "Ready" && c.status == "True") + { + return Ok(Some(MonitoringState { + message: format!( + "Prometheus is ready in namespace: {}", + ns + ), + })); + } + } + } + } + } + Err(e) => { + warn!("Failed to query pods in ns {}: {}", ns, e); + } + } + } + + Ok(None) + } + + async fn try_install_monitoring_stack( + &self, + ) -> Result, InterpretError> { + let inventory = Inventory::autoload(); + let topology = MonitoringAlertingTopology::new(); + let mut maestro = match Maestro::initialize(inventory, topology).await { + Ok(m) => m, + Err(e) => { + println!("failed to initialize Maestro: {}", e); + std::process::exit(1); + } + }; + maestro.register_all(vec![Box::new(MonitoringAlertingStackScore::default())]); + let state = match self.get_monitoring_state().await { + Ok(_) => MonitoringState { + message: "Monitoring Stack Ready".to_string(), + }, + Err(_) => todo!(), }; Ok(Some(state)) } } + #[async_trait] impl Topology for MonitoringAlertingTopology { fn name(&self) -> &str { @@ -39,20 +105,14 @@ impl Topology for MonitoringAlertingTopology { } async fn ensure_ready(&self) -> Result { - let monitoring_state = self - .monitoring_state - .get_or_try_init(|| async { self.get_monitoring_state() }) - .await?; - - if monitoring_state.is_some() { - Ok(Outcome::success( - "Monitoring stack already installed".to_string(), - )) + let state = if let Some(state) = self.get_monitoring_state().await? { + state } else { - Ok(Outcome::success( - "Monitoring stack not installed".to_string(), - )) - } + self.try_install_monitoring_stack().await? + .ok_or_else(|| InterpretError::new("Failed to install monitoring stack".into()))? + }; + + Ok(Outcome::success(state.message)) } } diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index 1dd8233..ca6accb 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -69,26 +69,6 @@ struct MonitoringAlertingStackInterpret { pub score: MonitoringAlertingStackScore, } -impl MonitoringAlertingStackInterpret { - pub async fn build_monitoring_stack( - &self, - monitoring_stack: MonitoringAlertingStackScore, - ) -> Result { - let inventory = Inventory::autoload(); - let topology = MonitoringAlertingTopology::new(); - let mut maestro = match Maestro::initialize(inventory, topology).await { - Ok(m) => m, - Err(e) => { - println!("failed to initialize Maestro: {}", e); - std::process::exit(1); - } - }; - maestro.register_all(monitoring_stack.monitoring_stack); - Ok(Outcome::success(format!( - "installed kube-prometheus monitoring and alerting stack" - ))) - } -} #[async_trait] impl Interpret for MonitoringAlertingStackInterpret { @@ -97,7 +77,7 @@ impl Interpret for MonitoringAlertingStackInterpret { _inventory: &Inventory, _topology: &T, ) -> Result { - self.build_monitoring_stack(self.score.clone()).await + todo!() } fn get_name(&self) -> InterpretName { -- 2.39.5 From 472a3c10514b6439e4b06809023d6fc9d5cd9f67 Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 6 May 2025 10:02:21 -0400 Subject: [PATCH 08/11] fix: correctly pass namespace and monitoring stack to topology so it can be used to init the maestro and exec the score --- .../domain/topology/monitoring_alerting.rs | 27 +++++++++--- .../modules/monitoring/monitoring_alerting.rs | 44 ++++++++++++++++--- 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/harmony/src/domain/topology/monitoring_alerting.rs b/harmony/src/domain/topology/monitoring_alerting.rs index d41c2da..3dee18c 100644 --- a/harmony/src/domain/topology/monitoring_alerting.rs +++ b/harmony/src/domain/topology/monitoring_alerting.rs @@ -15,24 +15,32 @@ use crate::{ inventory::Inventory, maestro::Maestro, modules::monitoring::monitoring_alerting::MonitoringAlertingStackScore, + score::Score, }; -use super::{HelmCommand, Topology}; +use super::{HelmCommand, K8sAnywhereTopology, Topology}; #[derive(Clone, Debug)] struct MonitoringState { message: String, } -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct MonitoringAlertingTopology { monitoring_state: OnceCell>, + namespace: String, + monitoring_stack: Vec>>, } impl MonitoringAlertingTopology { - pub fn new() -> Self { + pub fn new( + namespace: String, + monitoring_stack: Vec>>, + ) -> Self { Self { monitoring_state: OnceCell::new(), + namespace, + monitoring_stack, } } @@ -78,7 +86,7 @@ impl MonitoringAlertingTopology { &self, ) -> Result, InterpretError> { let inventory = Inventory::autoload(); - let topology = MonitoringAlertingTopology::new(); + let topology = K8sAnywhereTopology::new(); let mut maestro = match Maestro::initialize(inventory, topology).await { Ok(m) => m, Err(e) => { @@ -86,7 +94,7 @@ impl MonitoringAlertingTopology { std::process::exit(1); } }; - maestro.register_all(vec![Box::new(MonitoringAlertingStackScore::default())]); + maestro.register_all(vec![Box::new(MonitoringAlertingStackScore::new(self.monitoring_stack.clone(), self.namespace.clone()))]); let state = match self.get_monitoring_state().await { Ok(_) => MonitoringState { message: "Monitoring Stack Ready".to_string(), @@ -98,6 +106,12 @@ impl MonitoringAlertingTopology { } +impl Clone for Box> { + fn clone(&self) -> Box> { + self.clone_box() + } +} + #[async_trait] impl Topology for MonitoringAlertingTopology { fn name(&self) -> &str { @@ -108,7 +122,8 @@ impl Topology for MonitoringAlertingTopology { let state = if let Some(state) = self.get_monitoring_state().await? { state } else { - self.try_install_monitoring_stack().await? + self.try_install_monitoring_stack() + .await? .ok_or_else(|| InterpretError::new("Failed to install monitoring stack".into()))? }; diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index ca6accb..2bc0cb4 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -13,10 +13,16 @@ use crate::{ use super::kube_prometheus::kube_prometheus_score; - #[derive(Debug)] pub struct MonitoringAlertingStackScore { pub monitoring_stack: Vec>>, + pub namespace: String, +} + +impl MonitoringAlertingStackScore { + pub fn new(monitoring_stack: Vec>>, namespace: String) -> Self { + Self { monitoring_stack, namespace } + } } impl Default for MonitoringAlertingStackScore { @@ -24,15 +30,21 @@ impl Default for MonitoringAlertingStackScore { let ns = "monitoring"; Self { monitoring_stack: vec![ - Box::new(kube_prometheus_score(ns)) as Box>, + Box::new(kube_prometheus_score(ns)) as Box> ], + namespace: ns.to_string() } } } impl Clone for MonitoringAlertingStackScore { fn clone(&self) -> Self { Self { - monitoring_stack: self.monitoring_stack.iter().map(|s| s.clone_box()).collect(), + monitoring_stack: self + .monitoring_stack + .iter() + .map(|s| s.clone_box()) + .collect(), + namespace: self.namespace.clone(), } } } @@ -44,7 +56,11 @@ impl Serialize for MonitoringAlertingStackScore { { use serde::ser::SerializeStruct; let mut s = serializer.serialize_struct("MonitoringAlertingStackScore", 1)?; - let monitoring_values: Vec<_> = self.monitoring_stack.iter().map(|m| m.serialize()).collect(); + let monitoring_values: Vec<_> = self + .monitoring_stack + .iter() + .map(|m| m.serialize()) + .collect(); s.serialize_field("monitoring", &monitoring_values)?; s.end() } @@ -54,7 +70,12 @@ impl Score for MonitoringAlertingStackScore { fn create_interpret(&self) -> Box> { Box::new(MonitoringAlertingStackInterpret { score: MonitoringAlertingStackScore { - monitoring_stack: self.monitoring_stack.iter().map(|s| s.clone_box()).collect(), + monitoring_stack: self + .monitoring_stack + .iter() + .map(|s| s.clone_box()) + .collect(), + namespace: self.namespace.clone(), }, }) } @@ -69,7 +90,6 @@ struct MonitoringAlertingStackInterpret { pub score: MonitoringAlertingStackScore, } - #[async_trait] impl Interpret for MonitoringAlertingStackInterpret { async fn execute( @@ -77,8 +97,18 @@ impl Interpret for MonitoringAlertingStackInterpret { _inventory: &Inventory, _topology: &T, ) -> Result { - todo!() + let inventory = Inventory::autoload(); + let topology = MonitoringAlertingTopology::new(self.score.namespace.clone(), self.score.monitoring_stack.clone()); + let mut maestro = match Maestro::initialize(inventory, topology).await { + Ok(m) => m, + Err(e) => { + println!("failed to initialize Maestro: {}", e); + std::process::exit(1); + } + }; + Ok(Outcome::success(format!("monitoring stack installed in {} namespace",self.score.namespace ))) } + fn get_name(&self) -> InterpretName { todo!() -- 2.39.5 From 2d74c66fc6f928bf600db14372b7967d96407706 Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 6 May 2025 11:54:10 -0400 Subject: [PATCH 09/11] wip: trying to get the kube-prometheus score to install --- .../domain/topology/monitoring_alerting.rs | 54 +++++-------------- .../src/modules/monitoring/kube_prometheus.rs | 3 ++ .../modules/monitoring/monitoring_alerting.rs | 34 +++++++----- 3 files changed, 36 insertions(+), 55 deletions(-) diff --git a/harmony/src/domain/topology/monitoring_alerting.rs b/harmony/src/domain/topology/monitoring_alerting.rs index 3dee18c..fa60f37 100644 --- a/harmony/src/domain/topology/monitoring_alerting.rs +++ b/harmony/src/domain/topology/monitoring_alerting.rs @@ -1,5 +1,6 @@ +use std::sync::Arc; + use log::warn; -use serde::Serialize; use tokio::sync::OnceCell; use k8s_openapi::api::core::v1::Pod; @@ -18,7 +19,7 @@ use crate::{ score::Score, }; -use super::{HelmCommand, K8sAnywhereTopology, Topology}; +use super::{HelmCommand, K8sAnywhereTopology, Topology, k8s::K8sClient}; #[derive(Clone, Debug)] struct MonitoringState { @@ -28,19 +29,12 @@ struct MonitoringState { #[derive(Debug)] pub struct MonitoringAlertingTopology { monitoring_state: OnceCell>, - namespace: String, - monitoring_stack: Vec>>, } impl MonitoringAlertingTopology { - pub fn new( - namespace: String, - monitoring_stack: Vec>>, - ) -> Self { + pub fn new() -> Self { Self { monitoring_state: OnceCell::new(), - namespace, - monitoring_stack, } } @@ -81,31 +75,8 @@ impl MonitoringAlertingTopology { Ok(None) } - - async fn try_install_monitoring_stack( - &self, - ) -> Result, InterpretError> { - let inventory = Inventory::autoload(); - let topology = K8sAnywhereTopology::new(); - let mut maestro = match Maestro::initialize(inventory, topology).await { - Ok(m) => m, - Err(e) => { - println!("failed to initialize Maestro: {}", e); - std::process::exit(1); - } - }; - maestro.register_all(vec![Box::new(MonitoringAlertingStackScore::new(self.monitoring_stack.clone(), self.namespace.clone()))]); - let state = match self.get_monitoring_state().await { - Ok(_) => MonitoringState { - message: "Monitoring Stack Ready".to_string(), - }, - Err(_) => todo!(), - }; - Ok(Some(state)) - } } - impl Clone for Box> { fn clone(&self) -> Box> { self.clone_box() @@ -119,15 +90,16 @@ impl Topology for MonitoringAlertingTopology { } async fn ensure_ready(&self) -> Result { - let state = if let Some(state) = self.get_monitoring_state().await? { - state - } else { - self.try_install_monitoring_stack() - .await? - .ok_or_else(|| InterpretError::new("Failed to install monitoring stack".into()))? - }; + if let Some(state) = self.get_monitoring_state().await? { + // Monitoring stack is already ready — stop app. + println!("{}", state.message); + std::process::exit(0); + } - Ok(Outcome::success(state.message)) + // Monitoring not found — proceed with installation. + Ok(Outcome::success( + "Monitoring stack installation started.".to_string(), + )) } } diff --git a/harmony/src/modules/monitoring/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs index 8cfbcf3..52dcee9 100644 --- a/harmony/src/modules/monitoring/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -38,5 +38,8 @@ additionalPrometheusRules: .unwrap(), chart_version: None, values_overrides: None, + values_yaml: Some(values.to_string()), + create_namespace: true, + install_only: true, } } diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index 2bc0cb4..7622f86 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -5,10 +5,9 @@ use crate::{ data::{Id, Version}, interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, inventory::Inventory, - maestro::{self, Maestro}, - modules::helm::chart::HelmChartScore, + maestro::Maestro, score::{CloneBoxScore, Score}, - topology::{K8sclient, Topology, monitoring_alerting::MonitoringAlertingTopology}, + topology::{HelmCommand, Topology, monitoring_alerting::MonitoringAlertingTopology}, }; use super::kube_prometheus::kube_prometheus_score; @@ -20,8 +19,14 @@ pub struct MonitoringAlertingStackScore { } impl MonitoringAlertingStackScore { - pub fn new(monitoring_stack: Vec>>, namespace: String) -> Self { - Self { monitoring_stack, namespace } + pub fn new( + monitoring_stack: Vec>>, + namespace: String, + ) -> Self { + Self { + monitoring_stack, + namespace, + } } } @@ -29,10 +34,8 @@ impl Default for MonitoringAlertingStackScore { fn default() -> Self { let ns = "monitoring"; Self { - monitoring_stack: vec![ - Box::new(kube_prometheus_score(ns)) as Box> - ], - namespace: ns.to_string() + monitoring_stack: vec![Box::new(kube_prometheus_score(ns))], + namespace: ns.to_string(), } } } @@ -66,7 +69,7 @@ impl Serialize for MonitoringAlertingStackScore { } } -impl Score for MonitoringAlertingStackScore { +impl Score for MonitoringAlertingStackScore { fn create_interpret(&self) -> Box> { Box::new(MonitoringAlertingStackInterpret { score: MonitoringAlertingStackScore { @@ -91,14 +94,14 @@ struct MonitoringAlertingStackInterpret { } #[async_trait] -impl Interpret for MonitoringAlertingStackInterpret { +impl Interpret for MonitoringAlertingStackInterpret { async fn execute( &self, _inventory: &Inventory, _topology: &T, ) -> Result { let inventory = Inventory::autoload(); - let topology = MonitoringAlertingTopology::new(self.score.namespace.clone(), self.score.monitoring_stack.clone()); + let topology = MonitoringAlertingTopology::new(); let mut maestro = match Maestro::initialize(inventory, topology).await { Ok(m) => m, Err(e) => { @@ -106,9 +109,12 @@ impl Interpret for MonitoringAlertingStackInterpret { std::process::exit(1); } }; - Ok(Outcome::success(format!("monitoring stack installed in {} namespace",self.score.namespace ))) + maestro.register_all(self.score.monitoring_stack.clone()); + Ok(Outcome::success(format!( + "monitoring stack installed in {} namespace", + self.score.namespace + ))) } - fn get_name(&self) -> InterpretName { todo!() -- 2.39.5 From d9921b857b3083630550316eff45b3ee76dcb49c Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 6 May 2025 12:23:03 -0400 Subject: [PATCH 10/11] fix:installs helm chart --- harmony/src/domain/topology/monitoring_alerting.rs | 2 ++ harmony/src/modules/monitoring/kube_prometheus.rs | 2 +- .../src/modules/monitoring/monitoring_alerting.rs | 14 ++++++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/harmony/src/domain/topology/monitoring_alerting.rs b/harmony/src/domain/topology/monitoring_alerting.rs index fa60f37..4951333 100644 --- a/harmony/src/domain/topology/monitoring_alerting.rs +++ b/harmony/src/domain/topology/monitoring_alerting.rs @@ -45,6 +45,8 @@ impl MonitoringAlertingTopology { for ns in &["monitoring", "openshift-monitoring"] { let pods: Api = Api::namespaced(client.clone(), ns); + //TODO hardcoding the label is a problem + //check all pods are ready let lp = ListParams::default().labels("app.kubernetes.io/name=prometheus"); match pods.list(&lp).await { diff --git a/harmony/src/modules/monitoring/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs index 52dcee9..fcc98fb 100644 --- a/harmony/src/modules/monitoring/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -31,7 +31,7 @@ additionalPrometheusRules: namespace: Some(NonBlankString::from_str(ns).unwrap()), release_name: NonBlankString::from_str("kube-prometheus").unwrap(), chart_name: NonBlankString::from_str( - "https://prometheus-community.github.io/helm-charts" + "oci://registry-1.docker.io/bitnamicharts/kube-prometheus" //use kube prometheus chart which includes grafana, prometheus, alert //manager, etc ) diff --git a/harmony/src/modules/monitoring/monitoring_alerting.rs b/harmony/src/modules/monitoring/monitoring_alerting.rs index 7622f86..acc5969 100644 --- a/harmony/src/modules/monitoring/monitoring_alerting.rs +++ b/harmony/src/modules/monitoring/monitoring_alerting.rs @@ -1,4 +1,5 @@ use async_trait::async_trait; +use log::info; use serde::Serialize; use crate::{ @@ -14,6 +15,9 @@ use super::kube_prometheus::kube_prometheus_score; #[derive(Debug)] pub struct MonitoringAlertingStackScore { + //TODO add documenation to explain why its here + //keeps it open for the end user to specify which stack they want + //if it isnt default kube-prometheus pub monitoring_stack: Vec>>, pub namespace: String, } @@ -102,14 +106,20 @@ impl Interpret for MonitoringAlertingStackInterpret { ) -> Result { let inventory = Inventory::autoload(); let topology = MonitoringAlertingTopology::new(); - let mut maestro = match Maestro::initialize(inventory, topology).await { + let maestro = match Maestro::initialize(inventory, topology).await { Ok(m) => m, Err(e) => { println!("failed to initialize Maestro: {}", e); std::process::exit(1); } }; - maestro.register_all(self.score.monitoring_stack.clone()); + + let scores_vec = self.score.monitoring_stack.clone(); + for s in scores_vec{ + info!("Running: {}", s.name()); + maestro.interpret(s).await?; + } + Ok(Outcome::success(format!( "monitoring stack installed in {} namespace", self.score.namespace -- 2.39.5 From ef9c1cce77c0de623f4d6d076bfd101edddd70cd Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 6 May 2025 13:42:59 -0400 Subject: [PATCH 11/11] fix:yaml structure --- .../src/modules/monitoring/kube_prometheus.rs | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/harmony/src/modules/monitoring/kube_prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus.rs index fcc98fb..dd4b3f1 100644 --- a/harmony/src/modules/monitoring/kube_prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus.rs @@ -4,37 +4,41 @@ use non_blank_string_rs::NonBlankString; use crate::modules::helm::chart::HelmChartScore; - pub fn kube_prometheus_score(ns: &str) -> HelmChartScore { -//TODO this should be make into a rule with default formatting that can be easily passed as a vec -//to the overrides or something leaving the user to deal with formatting here seems bad + //TODO this should be make into a rule with default formatting that can be easily passed as a vec + //to the overrides or something leaving the user to deal with formatting here seems bad let values = r#" -additionalPrometheusRules: -- name: kubelet-alerts - groups: - - name: pvc-alerts - rules: - - alert: 'PVC Fill Over 95 Percent In 2 Days' - annotations: - description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ - $labels.namespace }} is predicted to fill over 95% in less than 2 days. - title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace - }} will fill over 95% in less than 2 days - expr: "(\n kubelet_volume_stats_used_bytes \n / \n kubelet_volume_stats_capacity_bytes\n) - > 0.95\nAND\npredict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 - * 60 * 60)\n/\nkubelet_volume_stats_capacity_bytes\n> 0.95\n" - for: 5m - labels: - severity: warning +additionalPrometheusRulesMap: + pvc-alerts: + groups: + - name: pvc-alerts + rules: + - alert: 'PVC Fill Over 95 Percent In 2 Days' + expr: | + ( + kubelet_volume_stats_used_bytes + / + kubelet_volume_stats_capacity_bytes + ) > 0.95 + AND + predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60) + / + kubelet_volume_stats_capacity_bytes + > 0.95 + for: 1m + labels: + severity: warning + annotations: + description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days. + title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days "#; HelmChartScore { namespace: Some(NonBlankString::from_str(ns).unwrap()), release_name: NonBlankString::from_str("kube-prometheus").unwrap(), chart_name: NonBlankString::from_str( - "oci://registry-1.docker.io/bitnamicharts/kube-prometheus" - //use kube prometheus chart which includes grafana, prometheus, alert - //manager, etc - ) + "oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", //use kube prometheus chart which includes grafana, prometheus, alert + //manager, etc + ) .unwrap(), chart_version: None, values_overrides: None, -- 2.39.5