monitoringalerting #37
							
								
								
									
										10
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						| @ -936,6 +936,15 @@ dependencies = [ | ||||
|  "zeroize", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "email_address" | ||||
| version = "0.2.9" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "e079f19b08ca6239f47f8ba8509c11cf3ea30095831f7fed61441475edd8c449" | ||||
| dependencies = [ | ||||
|  "serde", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "encoding_rs" | ||||
| version = "0.8.35" | ||||
| @ -1396,6 +1405,7 @@ dependencies = [ | ||||
|  "derive-new", | ||||
|  "directories", | ||||
|  "dockerfile_builder", | ||||
|  "email_address", | ||||
|  "env_logger", | ||||
|  "harmony_macros", | ||||
|  "harmony_types", | ||||
|  | ||||
| @ -39,3 +39,4 @@ lazy_static = "1.5.0" | ||||
| dockerfile_builder = "0.1.5" | ||||
| temp-file = "0.1.9" | ||||
| convert_case.workspace = true | ||||
| email_address = "0.2.9" | ||||
|  | ||||
							
								
								
									
										47
									
								
								harmony/src/modules/monitoring/config.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @ -0,0 +1,47 @@ | ||||
| use email_address::EmailAddress; | ||||
| use serde::Serialize; | ||||
| use url::Url; | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct KubePrometheusConfig { | ||||
|     pub namespace: String, | ||||
|     pub node_exporter: bool, | ||||
|     pub alert_manager: bool, | ||||
|     pub prometheus: bool, | ||||
|     pub grafana: bool, | ||||
|     pub windows_monitoring: bool, | ||||
|     pub kubernetes_service_monitors: bool, | ||||
|     pub kubelet: bool, | ||||
|     pub kube_controller_manager: bool, | ||||
|     pub kube_etcd: bool, | ||||
|     pub kube_proxy: bool, | ||||
|     pub kube_state_metrics: bool, | ||||
|     pub prometheus_operator: bool, | ||||
|     pub webhook_url: Option<Url>, | ||||
|     pub webhook_service_name: Option<String>, | ||||
|     pub smpt_email_address: Option<EmailAddress>, | ||||
|     pub smtp_service_name: Option<String>, | ||||
| } | ||||
| impl KubePrometheusConfig { | ||||
|     pub fn new() -> Self { | ||||
|         Self { | ||||
|             namespace: "monitoring".into(), | ||||
|             node_exporter: false, | ||||
|             alert_manager: false, | ||||
|             prometheus: true, | ||||
|             grafana: true, | ||||
|             windows_monitoring: false, | ||||
|             kubernetes_service_monitors: true, | ||||
|             kubelet: true, | ||||
|             kube_controller_manager: true, | ||||
|             kube_etcd: true, | ||||
|             kube_proxy: true, | ||||
|             kube_state_metrics: true, | ||||
|             prometheus_operator: true, | ||||
|             webhook_url: None, | ||||
|             webhook_service_name: None, | ||||
|             smpt_email_address: None, | ||||
|             smtp_service_name: None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @ -1,10 +1,10 @@ | ||||
| use std::str::FromStr; | ||||
| 
 | ||||
| use super::config::KubePrometheusConfig; | ||||
| use non_blank_string_rs::NonBlankString; | ||||
| use std::{collections::HashMap, str::FromStr}; | ||||
| 
 | ||||
| use crate::modules::helm::chart::HelmChartScore; | ||||
| 
 | ||||
| pub fn kube_prometheus_score(ns: &str) -> HelmChartScore { | ||||
| pub fn kube_prometheus_score(config: &KubePrometheusConfig) -> HelmChartScore { | ||||
|     //TODO this should be make into a rule with default formatting that can be easily passed as a vec
 | ||||
|     //to the overrides or something leaving the user to deal with formatting here seems bad
 | ||||
|     let values = r#" | ||||
| @ -32,8 +32,40 @@ additionalPrometheusRulesMap: | ||||
|               description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days. | ||||
|               title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days | ||||
| "#;
 | ||||
|     let mut values_overrides: HashMap<NonBlankString, String> = HashMap::new(); | ||||
| 
 | ||||
|     macro_rules! insert_flag { | ||||
|         ($key:expr, $val:expr) => { | ||||
|             values_overrides.insert(NonBlankString::from_str($key).unwrap(), $val.to_string()); | ||||
|         }; | ||||
|     } | ||||
| 
 | ||||
|     insert_flag!("nodeExporter.enabled", config.node_exporter); | ||||
|     insert_flag!("windowsMonitoring.enabled", config.windows_monitoring); | ||||
|     insert_flag!("grafana.enabled", config.grafana); | ||||
|     insert_flag!("alertmanager.enabled", config.alert_manager); | ||||
|     insert_flag!("prometheus.enabled", config.prometheus); | ||||
|     insert_flag!( | ||||
|         "kubernetes_service_monitors.enabled", | ||||
|         config.kubernetes_service_monitors | ||||
|     ); | ||||
|     insert_flag!("kubelet.enabled", config.kubelet); | ||||
|     insert_flag!( | ||||
|         "kubeControllerManager.enabled", | ||||
|         config.kube_controller_manager | ||||
|     ); | ||||
|     insert_flag!("kubeProxy.enabled", config.kube_proxy); | ||||
|     insert_flag!("kubeEtcd.enabled", config.kube_etcd); | ||||
|     insert_flag!("kubeStateMetrics.enabled", config.kube_state_metrics); | ||||
|     insert_flag!("prometheusOperator.enabled", config.prometheus_operator); | ||||
|     
 | ||||
|     if let (Some(url), Some(name)) = (&config.webhook_url, &config.webhook_service_name) { | ||||
|         insert_flag!("alertmanager.config.receivers.webhook_configs.url", url.as_str()); | ||||
|         insert_flag!("alertmanager.config.receivers.name", name.as_str()); | ||||
|     } | ||||
| 
 | ||||
|     HelmChartScore { | ||||
|         namespace: Some(NonBlankString::from_str(ns).unwrap()), | ||||
|         namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), | ||||
|         release_name: NonBlankString::from_str("kube-prometheus").unwrap(), | ||||
|         chart_name: NonBlankString::from_str( | ||||
|             "oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", //use kube prometheus chart which includes grafana, prometheus, alert
 | ||||
| @ -41,7 +73,7 @@ additionalPrometheusRulesMap: | ||||
|         ) | ||||
|         .unwrap(), | ||||
|         chart_version: None, | ||||
|         values_overrides: None, | ||||
|         values_overrides: Some(values_overrides), | ||||
|         values_yaml: Some(values.to_string()), | ||||
|         create_namespace: true, | ||||
|         install_only: true, | ||||
|  | ||||
| @ -1,2 +1,3 @@ | ||||
| mod kube_prometheus; | ||||
| pub mod monitoring_alerting; | ||||
| mod config; | ||||
|  | ||||
| @ -1,44 +1,95 @@ | ||||
| use email_address::EmailAddress; | ||||
| 
 | ||||
| use serde::Serialize; | ||||
| use url::Url; | ||||
| 
 | ||||
| use crate::{ | ||||
|     interpret::Interpret, | ||||
|     modules::helm::chart::HelmChartScore, | ||||
|     score::Score, | ||||
|     topology::{HelmCommand, Topology}, | ||||
| }; | ||||
| 
 | ||||
| use super::kube_prometheus::kube_prometheus_score; | ||||
| use super::{config::KubePrometheusConfig, kube_prometheus::kube_prometheus_score}; | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub enum AlertChannel { | ||||
|     WebHookUrl { | ||||
|         url: Url, | ||||
|         service_name: String, | ||||
|     }, | ||||
|     Smpt { | ||||
|         email_address: EmailAddress, | ||||
|         service_name: String, | ||||
| 
					
					johnride marked this conversation as resolved
					
				 | ||||
|     }, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub enum Stack { | ||||
|     KubePrometheusStack, | ||||
|     OtherStack, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct MonitoringAlertingStackScore { | ||||
|     // TODO Support other components in our monitoring/alerting stack instead of a single helm
 | ||||
|     // chart
 | ||||
|     pub monitoring_stack: HelmChartScore, | ||||
|     pub namespace: String, | ||||
|     pub alert_channel: Option<AlertChannel>, | ||||
|     pub monitoring_stack: Stack, | ||||
| 
					
					johnride marked this conversation as resolved
					
				 
				
					
						johnride
						commented  No need for this  
 Another note, I'm realising now something that slipped by me yesterday : I think we should have two scores for monitoring : cluster monitoring and app monitoring. The one we want with LampScore is ApplicationMonitoringScore. It comes with quite a bit of boilerplate but at least it makes sense to deploy it anywhere. Then we also provide a ClusterMonitoringScore. Under the hood they could very well use the same Interpret and work together efficiently. No need for this `monitoring_stack: Stack` field for now. This is a case of YAGNI. We support only one type and I don't see in the very short term a use case that would force us.
> Ron Jeffries, a co-founder of XP, explained the philosophy: "Always implement things when you actually need them, never when you just foresee that you [will] need them."[8] John Carmack wrote "It is hard for less experienced developers to appreciate how rarely architecting for future requirements / applications turns out net-positive."[9] 
> https://en.wikipedia.org/wiki/You_aren%27t_gonna_need_it
---
Another note, I'm realising now something that slipped by me yesterday : 
I think we should have two scores for monitoring : cluster monitoring and app monitoring. The one we want with LampScore is ApplicationMonitoringScore. It comes with quite a bit of boilerplate but at least it makes sense to deploy it anywhere. Then we also provide a ClusterMonitoringScore. Under the hood they could very well use the same Interpret and work together efficiently. | ||||
| } | ||||
| 
 | ||||
| impl MonitoringAlertingStackScore { | ||||
|     pub fn new_with_ns(ns: &str) -> Self { | ||||
|         Self { | ||||
|             monitoring_stack: kube_prometheus_score(ns), | ||||
|             namespace: ns.to_string(), | ||||
|     fn match_alert_channel(&self, config: &mut KubePrometheusConfig) { | ||||
|         if let Some(alert_channel) = &self.alert_channel { | ||||
|             match alert_channel { | ||||
|                 //opt1
 | ||||
|                 AlertChannel::WebHookUrl { url, service_name } => { | ||||
|                     config.webhook_url = Some(url.clone()); | ||||
|                     config.webhook_service_name = Some(service_name.clone()); | ||||
|                 } | ||||
|                 //opt2
 | ||||
|                 AlertChannel::Smpt { | ||||
|                     email_address, | ||||
|                     service_name, | ||||
|                 } => { | ||||
|                     config.smpt_email_address = Some(email_address.clone()); | ||||
|                     config.smtp_service_name = Some(service_name.clone()); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| //    pub fn new_with_ns(ns: &str) -> Self {
 | ||||
| //        let mut config = KubePrometheusConfig::default();
 | ||||
| //        let namespace = ns.to_string();
 | ||||
| //        config.namespace = namespace;
 | ||||
| //        let score = kube_prometheus_score(&config);
 | ||||
| //        Self {
 | ||||
| //            alert_channel: None,
 | ||||
| //            monitoring_stack: Some(Stack::KubePrometheusStack),
 | ||||
| //        }
 | ||||
| //    }
 | ||||
| //}
 | ||||
| 
 | ||||
| impl Default for MonitoringAlertingStackScore { | ||||
|     fn default() -> Self { | ||||
|         let ns = "monitoring"; | ||||
|         Self { | ||||
|             monitoring_stack: kube_prometheus_score(ns), | ||||
|             namespace: ns.to_string(), | ||||
|             alert_channel: None, | ||||
|             monitoring_stack: Stack::KubePrometheusStack, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore { | ||||
|     fn create_interpret(&self) -> Box<dyn Interpret<T>> { | ||||
|         self.monitoring_stack.create_interpret() | ||||
|         match &self.monitoring_stack { | ||||
|             Stack::KubePrometheusStack => { | ||||
|                 let mut config = KubePrometheusConfig::new(); | ||||
|                 self.match_alert_channel(&mut config); | ||||
|                 let score = kube_prometheus_score(&config); | ||||
|                 score.create_interpret() | ||||
|             } | ||||
|             Stack::OtherStack => { | ||||
|                 todo!() | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn name(&self) -> String { | ||||
|  | ||||
What is service_name used for? As this is user facing, a bit of rust doc (with triple slashes ///) to describe how to use would be useful here.