feat: added alert rule and impl for prometheus as well as a few preconfigured bmc alerts for dell server that are used in the monitoring example #67
| @ -3,7 +3,18 @@ use harmony::{ | ||||
|     maestro::Maestro, | ||||
|     modules::monitoring::{ | ||||
|         alert_channel::discord_alert_channel::DiscordWebhook, | ||||
|         kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore, | ||||
|         alert_rule::prometheus_alert_rule::{AlertManagerRuleGroup, PrometheusAlertRule}, | ||||
|         kube_prometheus::{ | ||||
|             alerts::{ | ||||
|                 dell_server::{ | ||||
|                     alert_global_storage_status_critical, | ||||
|                     alert_global_storage_status_non_recoverable, | ||||
|                     global_storage_status_degraded_non_critical, | ||||
|                 }, | ||||
|                 pvc::high_pvc_fill_rate_over_two_days, | ||||
|             }, | ||||
|             helm_prometheus_alert_score::HelmPrometheusAlertingScore, | ||||
|         }, | ||||
|     }, | ||||
|     topology::{K8sAnywhereTopology, Url}, | ||||
| }; | ||||
| @ -12,10 +23,28 @@ use harmony::{ | ||||
| async fn main() { | ||||
|     let discord_receiver = DiscordWebhook { | ||||
|         name: "test-discord".to_string(), | ||||
|         url: Url::Url(url::Url::parse("discord.doesnt.exist.com").unwrap()), | ||||
|         url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()), | ||||
|     }; | ||||
| 
 | ||||
|     let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days(); | ||||
|     let dell_system_storage_degraded = global_storage_status_degraded_non_critical(); | ||||
|     let alert_global_storage_status_critical = alert_global_storage_status_critical(); | ||||
|     let alert_global_storage_status_non_recoverable = alert_global_storage_status_non_recoverable(); | ||||
| 
 | ||||
|     let additional_rules = | ||||
|         AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]); | ||||
|     let additional_rules2 = AlertManagerRuleGroup::new( | ||||
|         "dell-server-alerts", | ||||
|         vec![ | ||||
|             dell_system_storage_degraded, | ||||
|             alert_global_storage_status_critical, | ||||
|             alert_global_storage_status_non_recoverable, | ||||
|         ], | ||||
|     ); | ||||
| 
 | ||||
|     let alerting_score = HelmPrometheusAlertingScore { | ||||
|         receivers: vec![Box::new(discord_receiver)], | ||||
|         rules: vec![Box::new(additional_rules), Box::new(additional_rules2)], | ||||
|     }; | ||||
|     let mut maestro = Maestro::<K8sAnywhereTopology>::initialize( | ||||
|         Inventory::autoload(), | ||||
|  | ||||
| @ -1,10 +1,11 @@ | ||||
| use async_trait::async_trait; | ||||
| use log::debug; | ||||
| 
 | ||||
| use crate::{ | ||||
|     data::{Id, Version}, | ||||
|     interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, | ||||
|     inventory::Inventory, | ||||
|     topology::{HelmCommand, Topology, installable::Installable}, | ||||
|     topology::{Topology, installable::Installable}, | ||||
| }; | ||||
| 
 | ||||
| #[async_trait] | ||||
| @ -16,6 +17,7 @@ pub trait AlertSender: Send + Sync + std::fmt::Debug { | ||||
| pub struct AlertingInterpret<S: AlertSender> { | ||||
|     pub sender: S, | ||||
|     pub receivers: Vec<Box<dyn AlertReceiver<S>>>, | ||||
|     pub rules: Vec<Box<dyn AlertRule<S>>>, | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| @ -28,6 +30,10 @@ impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInte | ||||
|         for receiver in self.receivers.iter() { | ||||
|             receiver.install(&self.sender).await?; | ||||
|         } | ||||
|         for rule in self.rules.iter() { | ||||
|             debug!("installing rule: {:#?}", rule); | ||||
|             rule.install(&self.sender).await?; | ||||
|         } | ||||
|         self.sender.ensure_installed(inventory, topology).await?; | ||||
|         Ok(Outcome::success(format!( | ||||
|             "successfully installed alert sender {}", | ||||
| @ -59,8 +65,9 @@ pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync { | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| pub trait AlertRule<S: AlertSender> { | ||||
|     async fn install(&self, sender: &S) -> Result<(), InterpretError>; | ||||
| pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync { | ||||
|     async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>; | ||||
|     fn clone_box(&self) -> Box<dyn AlertRule<S>>; | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
|  | ||||
							
								
								
									
										1
									
								
								harmony/src/modules/monitoring/alert_rule/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								harmony/src/modules/monitoring/alert_rule/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| pub mod prometheus_alert_rule; | ||||
| @ -0,0 +1,99 @@ | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use serde::Serialize; | ||||
| 
 | ||||
| use crate::{ | ||||
|     interpret::{InterpretError, Outcome}, | ||||
|     modules::monitoring::kube_prometheus::{ | ||||
|         prometheus::{Prometheus, PrometheusRule}, | ||||
|         types::{AlertGroup, AlertManagerAdditionalPromRules}, | ||||
|     }, | ||||
|     topology::oberservability::monitoring::AlertRule, | ||||
| }; | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl AlertRule<Prometheus> for AlertManagerRuleGroup { | ||||
|     async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> { | ||||
|         sender.install_rule(&self).await | ||||
|     } | ||||
|     fn clone_box(&self) -> Box<dyn AlertRule<Prometheus>> { | ||||
|         Box::new(self.clone()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl PrometheusRule for AlertManagerRuleGroup { | ||||
|     fn name(&self) -> String { | ||||
|         self.name.clone() | ||||
|     } | ||||
|     async fn configure_rule(&self) -> AlertManagerAdditionalPromRules { | ||||
|         let mut additional_prom_rules = BTreeMap::new(); | ||||
| 
 | ||||
|         additional_prom_rules.insert( | ||||
|             self.name.clone(), | ||||
|             AlertGroup { | ||||
|                 groups: vec![self.clone()], | ||||
|             }, | ||||
|         ); | ||||
|         AlertManagerAdditionalPromRules { | ||||
|             rules: additional_prom_rules, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl AlertManagerRuleGroup { | ||||
|     pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup { | ||||
|         AlertManagerRuleGroup { | ||||
|             name: name.to_string().to_lowercase(), | ||||
|             rules, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| ///logical group of alert rules
 | ||||
| ///evaluates to:
 | ||||
| ///name:
 | ||||
| ///  groups:
 | ||||
| ///  - name: name
 | ||||
| ///    rules: PrometheusAlertRule
 | ||||
| pub struct AlertManagerRuleGroup { | ||||
|     pub name: String, | ||||
|     pub rules: Vec<PrometheusAlertRule>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct PrometheusAlertRule { | ||||
|     pub alert: String, | ||||
|     pub expr: String, | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub r#for: Option<String>, | ||||
|     pub labels: HashMap<String, String>, | ||||
|     pub annotations: HashMap<String, String>, | ||||
| } | ||||
| 
 | ||||
| impl PrometheusAlertRule { | ||||
|     pub fn new(alert_name: &str, expr: &str) -> Self { | ||||
|         Self { | ||||
|             alert: alert_name.into(), | ||||
|             expr: expr.into(), | ||||
|             r#for: Some("1m".into()), | ||||
|             labels: HashMap::new(), | ||||
|             annotations: HashMap::new(), | ||||
|         } | ||||
|     } | ||||
|     pub fn for_duration(mut self, duration: &str) -> Self { | ||||
|         self.r#for = Some(duration.into()); | ||||
|         self | ||||
|     } | ||||
|     pub fn label(mut self, key: &str, value: &str) -> Self { | ||||
|         self.labels.insert(key.into(), value.into()); | ||||
|         self | ||||
|     } | ||||
| 
 | ||||
|     pub fn annotation(mut self, key: &str, value: &str) -> Self { | ||||
|         self.annotations.insert(key.into(), value.into()); | ||||
|         self | ||||
|     } | ||||
| } | ||||
| @ -0,0 +1,40 @@ | ||||
| use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; | ||||
| 
 | ||||
| pub fn global_storage_status_degraded_non_critical() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule::new("GlobalStorageStatusNonCritical", "globalStorageStatus == 4") | ||||
|         .for_duration("5m") | ||||
|         .label("severity", "warning") | ||||
|         .annotation( | ||||
|             "description", | ||||
|             "- **system**: {{ $labels.instance }}\n- **Status**: nonCritical\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}", | ||||
|         ) | ||||
|         .annotation("title", " System storage status is in degraded state") | ||||
| } | ||||
| 
 | ||||
| pub fn alert_global_storage_status_critical() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule::new( | ||||
|         "GlobalStorageStatus critical", | ||||
|         "globalStorageStatus == 5", | ||||
|     ) | ||||
|     .for_duration("5m") | ||||
|     .label("severity", "warning") | ||||
|     .annotation("title", "System storage status is critical at {{ $labels.instance }}") | ||||
|     .annotation( | ||||
|         "description", | ||||
|         "- **System**: {{ $labels.instance }}\n- **Status**: Critical\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}", | ||||
|     ) | ||||
| } | ||||
| 
 | ||||
| pub fn alert_global_storage_status_non_recoverable() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule::new( | ||||
|         "GlobalStorageStatus nonRecoverable", | ||||
|         "globalStorageStatus == 6", | ||||
|     ) | ||||
|     .for_duration("5m") | ||||
|     .label("severity", "warning") | ||||
|     .annotation("title", "System storage status is nonRecoverable at {{ $labels.instance }}") | ||||
|     .annotation( | ||||
|         "description", | ||||
|         "- **System**: {{ $labels.instance }}\n- **Status**: nonRecoverable\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}", | ||||
|     ) | ||||
| } | ||||
| @ -0,0 +1,2 @@ | ||||
| pub mod dell_server; | ||||
| pub mod pvc; | ||||
							
								
								
									
										11
									
								
								harmony/src/modules/monitoring/kube_prometheus/alerts/pvc.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								harmony/src/modules/monitoring/kube_prometheus/alerts/pvc.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | ||||
| use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; | ||||
| 
 | ||||
| pub fn high_pvc_fill_rate_over_two_days() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule::new( | ||||
|         "PVC Fill Over 95 Percent In 2 Days", | ||||
|         "(kubelet_volume_stats_used_bytes/kubelet_volume_stats_capacity_bytes) > 0.95 AND predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60)/kubelet_volume_stats_capacity_bytes > 0.95",) | ||||
|         .for_duration("1m") | ||||
|         .label("severity", "warning") | ||||
|         .annotation("summary", "The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days.") | ||||
|         .annotation("description", "PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days",) | ||||
| } | ||||
| @ -1,6 +1,9 @@ | ||||
| use serde::Serialize; | ||||
| 
 | ||||
| use crate::modules::monitoring::kube_prometheus::types::AlertManagerChannelConfig; | ||||
| use crate::modules::monitoring::{ | ||||
|     alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, | ||||
|     kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig}, | ||||
| }; | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct KubePrometheusConfig { | ||||
| @ -22,6 +25,7 @@ pub struct KubePrometheusConfig { | ||||
|     pub kube_state_metrics: bool, | ||||
|     pub prometheus_operator: bool, | ||||
|     pub alert_receiver_configs: Vec<AlertManagerChannelConfig>, | ||||
|     pub alert_rules: Vec<AlertManagerAdditionalPromRules>, | ||||
| } | ||||
| impl KubePrometheusConfig { | ||||
|     pub fn new() -> Self { | ||||
| @ -44,6 +48,7 @@ impl KubePrometheusConfig { | ||||
|             core_dns: false, | ||||
|             kube_scheduler: false, | ||||
|             alert_receiver_configs: vec![], | ||||
|             alert_rules: vec![], | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -3,6 +3,7 @@ use log::debug; | ||||
| use non_blank_string_rs::NonBlankString; | ||||
| use serde_yaml::{Mapping, Value}; | ||||
| use std::{ | ||||
|     collections::BTreeMap, | ||||
|     str::FromStr, | ||||
|     sync::{Arc, Mutex}, | ||||
| }; | ||||
| @ -10,7 +11,8 @@ use std::{ | ||||
| use crate::modules::{ | ||||
|     helm::chart::HelmChartScore, | ||||
|     monitoring::kube_prometheus::types::{ | ||||
|         AlertManager, AlertManagerConfig, AlertManagerRoute, AlertManagerValues, | ||||
|         AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig, | ||||
|         AlertManagerRoute, AlertManagerValues, | ||||
|     }, | ||||
| }; | ||||
| 
 | ||||
| @ -18,15 +20,13 @@ pub fn kube_prometheus_helm_chart_score( | ||||
|     config: Arc<Mutex<KubePrometheusConfig>>, | ||||
| ) -> HelmChartScore { | ||||
|     let config = config.lock().unwrap(); | ||||
|     //TODO this should be make into a rule with default formatting that can be easily passed as a vec
 | ||||
|     //to the overrides or something leaving the user to deal with formatting here seems bad
 | ||||
| 
 | ||||
|     let default_rules = config.default_rules.to_string(); | ||||
|     let windows_monitoring = config.windows_monitoring.to_string(); | ||||
|     let grafana = config.grafana.to_string(); | ||||
|     let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string(); | ||||
|     let kubernetes_api_server = config.kubernetes_api_server.to_string(); | ||||
|     let kubelet = config.kubelet.to_string(); | ||||
|     let alert_manager = config.alert_manager.to_string(); | ||||
|     let kube_controller_manager = config.kube_controller_manager.to_string(); | ||||
|     let core_dns = config.core_dns.to_string(); | ||||
|     let kube_etcd = config.kube_etcd.to_string(); | ||||
| @ -38,56 +38,6 @@ pub fn kube_prometheus_helm_chart_score( | ||||
|     let prometheus = config.prometheus.to_string(); | ||||
|     let mut values = format!( | ||||
|         r#" | ||||
| additionalPrometheusRulesMap: | ||||
|   pods-status-alerts: | ||||
|     groups: | ||||
|       - name: pods | ||||
|         rules: | ||||
|           - alert: "[CRIT] POD not healthy" | ||||
|             expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{{phase=~"Pending|Unknown|Failed"}})[15m:1m]) > 0 | ||||
|             for: 0m | ||||
|             labels: | ||||
|               severity: critical | ||||
|             annotations: | ||||
|               title: "[CRIT] POD not healthy : {{{{ $labels.pod }}}}" | ||||
|               description: | | ||||
|                A POD is in a non-ready state! | ||||
|                - **Pod**: {{{{ $labels.pod }}}} | ||||
|                - **Namespace**: {{{{ $labels.namespace }}}} | ||||
|           - alert: "[CRIT] POD crash looping" | ||||
|             expr: increase(kube_pod_container_status_restarts_total[5m]) > 3 | ||||
|             for: 0m | ||||
|             labels: | ||||
|               severity: critical | ||||
|             annotations: | ||||
|               title: "[CRIT] POD crash looping : {{{{ $labels.pod }}}}" | ||||
|               description: | | ||||
|                A POD is drowning in a crash loop! | ||||
|                - **Pod**: {{{{ $labels.pod }}}} | ||||
|                - **Namespace**: {{{{ $labels.namespace }}}} | ||||
|                - **Instance**: {{{{ $labels.instance }}}} | ||||
|   pvc-alerts: | ||||
|     groups: | ||||
|       - name: pvc-alerts | ||||
|         rules: | ||||
|           - alert: 'PVC Fill Over 95 Percent In 2 Days' | ||||
|             expr: | | ||||
|               ( | ||||
|                 kubelet_volume_stats_used_bytes | ||||
|                 / | ||||
|                 kubelet_volume_stats_capacity_bytes | ||||
|               ) > 0.95 | ||||
|               AND | ||||
|               predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60) | ||||
|               / | ||||
|               kubelet_volume_stats_capacity_bytes | ||||
|               > 0.95 | ||||
|             for: 1m | ||||
|             labels: | ||||
|               severity: warning | ||||
|             annotations: | ||||
|               description: The PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} is predicted to fill over 95% in less than 2 days. | ||||
|               title: PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} will fill over 95% in less than 2 days | ||||
| defaultRules: | ||||
|   create: {default_rules} | ||||
|   rules: | ||||
| @ -156,6 +106,7 @@ prometheus: | ||||
| "#,
 | ||||
|     ); | ||||
| 
 | ||||
|     // add required null receiver for prometheus alert manager
 | ||||
|     let mut null_receiver = Mapping::new(); | ||||
|     null_receiver.insert( | ||||
|         Value::String("receiver".to_string()), | ||||
| @ -167,6 +118,7 @@ prometheus: | ||||
|     ); | ||||
|     null_receiver.insert(Value::String("continue".to_string()), Value::Bool(true)); | ||||
| 
 | ||||
|     //add alert channels
 | ||||
|     let mut alert_manager_channel_config = AlertManagerConfig { | ||||
|         global: Mapping::new(), | ||||
|         route: AlertManagerRoute { | ||||
| @ -200,7 +152,38 @@ prometheus: | ||||
|         serde_yaml::to_string(&alert_manager_values).expect("Failed to serialize YAML"); | ||||
|     debug!("serialized alert manager: \n {:#}", alert_manager_yaml); | ||||
|     values.push_str(&alert_manager_yaml); | ||||
| 
 | ||||
|     //format alert manager additional rules for helm chart
 | ||||
|     let mut merged_rules: BTreeMap<String, AlertGroup> = BTreeMap::new(); | ||||
| 
 | ||||
|     for additional_rule in config.alert_rules.clone() { | ||||
|         for (key, group) in additional_rule.rules { | ||||
|             merged_rules.insert(key, group); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     let merged_rules = AlertManagerAdditionalPromRules { | ||||
|         rules: merged_rules, | ||||
|     }; | ||||
| 
 | ||||
|     let mut alert_manager_additional_rules = serde_yaml::Mapping::new(); | ||||
|     let rules_value = serde_yaml::to_value(merged_rules).unwrap(); | ||||
| 
 | ||||
|     alert_manager_additional_rules.insert( | ||||
|         serde_yaml::Value::String("additionalPrometheusRulesMap".to_string()), | ||||
|         rules_value, | ||||
|     ); | ||||
| 
 | ||||
|     let alert_manager_additional_rules_yaml = | ||||
|         serde_yaml::to_string(&alert_manager_additional_rules).expect("Failed to serialize YAML"); | ||||
|     debug!( | ||||
|         "alert_rules_yaml:\n{:#}", | ||||
|         alert_manager_additional_rules_yaml | ||||
|     ); | ||||
| 
 | ||||
|     values.push_str(&alert_manager_additional_rules_yaml); | ||||
|     debug!("full values.yaml: \n {:#}", values); | ||||
| 
 | ||||
|     HelmChartScore { | ||||
|         namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), | ||||
|         release_name: NonBlankString::from_str("kube-prometheus").unwrap(), | ||||
|  | ||||
| @ -2,19 +2,19 @@ use std::sync::{Arc, Mutex}; | ||||
| 
 | ||||
| use serde::Serialize; | ||||
| 
 | ||||
| use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus}; | ||||
| use crate::{ | ||||
|     score::Score, | ||||
|     topology::{ | ||||
|         HelmCommand, Topology, | ||||
|         oberservability::monitoring::{AlertReceiver, AlertingInterpret}, | ||||
|         oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret}, | ||||
|     }, | ||||
| }; | ||||
| 
 | ||||
| use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus}; | ||||
| 
 | ||||
| #[derive(Clone, Debug, Serialize)] | ||||
| pub struct HelmPrometheusAlertingScore { | ||||
|     pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>, | ||||
|     pub rules: Vec<Box<dyn AlertRule<Prometheus>>>, | ||||
| } | ||||
| 
 | ||||
| impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore { | ||||
| @ -24,24 +24,10 @@ impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore { | ||||
|                 config: Arc::new(Mutex::new(KubePrometheusConfig::new())), | ||||
|             }, | ||||
|             receivers: self.receivers.clone(), | ||||
|             rules: self.rules.clone(), | ||||
|         }) | ||||
|     } | ||||
|     fn name(&self) -> String { | ||||
|         "HelmPrometheusAlertingScore".to_string() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Serialize for Box<dyn AlertReceiver<Prometheus>> { | ||||
|     fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> | ||||
|     where | ||||
|         S: serde::Serializer, | ||||
|     { | ||||
|         todo!() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Clone for Box<dyn AlertReceiver<Prometheus>> { | ||||
|     fn clone(&self) -> Self { | ||||
|         self.clone_box() | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -1,3 +1,4 @@ | ||||
| pub mod alerts; | ||||
| pub mod helm; | ||||
| pub mod helm_prometheus_alert_score; | ||||
| pub mod prometheus; | ||||
|  | ||||
| @ -2,13 +2,17 @@ use std::sync::{Arc, Mutex}; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use log::debug; | ||||
| use serde::Serialize; | ||||
| 
 | ||||
| use crate::{ | ||||
|     interpret::{InterpretError, Outcome}, | ||||
|     inventory::Inventory, | ||||
|     modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, | ||||
|     score, | ||||
|     topology::{ | ||||
|         HelmCommand, Topology, installable::Installable, oberservability::monitoring::AlertSender, | ||||
|         HelmCommand, Topology, | ||||
|         installable::Installable, | ||||
|         oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender}, | ||||
|     }, | ||||
| }; | ||||
| 
 | ||||
| @ -18,7 +22,7 @@ use super::{ | ||||
|     helm::{ | ||||
|         config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score, | ||||
|     }, | ||||
|     types::AlertManagerChannelConfig, | ||||
|     types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig}, | ||||
| }; | ||||
| 
 | ||||
| #[async_trait] | ||||
| @ -35,7 +39,6 @@ impl<T: Topology + HelmCommand> Installable<T> for Prometheus { | ||||
|         inventory: &Inventory, | ||||
|         topology: &T, | ||||
|     ) -> Result<(), InterpretError> { | ||||
|         //install_prometheus
 | ||||
|         self.install_prometheus(inventory, topology).await?; | ||||
|         Ok(()) | ||||
|     } | ||||
| @ -67,6 +70,20 @@ impl Prometheus { | ||||
|         ))) | ||||
|     } | ||||
| 
 | ||||
|     pub async fn install_rule( | ||||
|         &self, | ||||
|         prometheus_rule: &AlertManagerRuleGroup, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         let prometheus_rule = prometheus_rule.configure_rule().await; | ||||
|         let mut config = self.config.lock().unwrap(); | ||||
| 
 | ||||
|         config.alert_rules.push(prometheus_rule.clone()); | ||||
|         Ok(Outcome::success(format!( | ||||
|             "Successfully installed alert rule: {:#?},", | ||||
|             prometheus_rule | ||||
|         ))) | ||||
|     } | ||||
| 
 | ||||
|     pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>( | ||||
|         &self, | ||||
|         inventory: &Inventory, | ||||
| @ -84,3 +101,39 @@ pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug { | ||||
|     fn name(&self) -> String; | ||||
|     async fn configure_receiver(&self) -> AlertManagerChannelConfig; | ||||
| } | ||||
| 
 | ||||
| impl Serialize for Box<dyn AlertReceiver<Prometheus>> { | ||||
|     fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> | ||||
|     where | ||||
|         S: serde::Serializer, | ||||
|     { | ||||
|         todo!() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Clone for Box<dyn AlertReceiver<Prometheus>> { | ||||
|     fn clone(&self) -> Self { | ||||
|         self.clone_box() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| pub trait PrometheusRule: Send + Sync + std::fmt::Debug { | ||||
|     fn name(&self) -> String; | ||||
|     async fn configure_rule(&self) -> AlertManagerAdditionalPromRules; | ||||
| } | ||||
| 
 | ||||
| impl Serialize for Box<dyn AlertRule<Prometheus>> { | ||||
|     fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> | ||||
|     where | ||||
|         S: serde::Serializer, | ||||
|     { | ||||
|         todo!() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Clone for Box<dyn AlertRule<Prometheus>> { | ||||
|     fn clone(&self) -> Self { | ||||
|         self.clone_box() | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -1,7 +1,11 @@ | ||||
| use std::collections::BTreeMap; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use serde::Serialize; | ||||
| use serde_yaml::{Mapping, Sequence, Value}; | ||||
| 
 | ||||
| use crate::modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup; | ||||
| 
 | ||||
| #[async_trait] | ||||
| pub trait AlertChannelConfig { | ||||
|     async fn get_config(&self) -> AlertManagerChannelConfig; | ||||
| @ -38,3 +42,14 @@ pub struct AlertManagerChannelConfig { | ||||
|     pub channel_route: Value, | ||||
|     pub channel_receiver: Value, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct AlertManagerAdditionalPromRules { | ||||
|     #[serde(flatten)] | ||||
|     pub rules: BTreeMap<String, AlertGroup>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct AlertGroup { | ||||
|     pub groups: Vec<AlertManagerRuleGroup>, | ||||
| } | ||||
|  | ||||
| @ -1,2 +1,3 @@ | ||||
| pub mod alert_channel; | ||||
| pub mod alert_rule; | ||||
| pub mod kube_prometheus; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user