cleanup unused k3d prometheus monitoring score & simplify design
This commit is contained in:
		
							parent
							
								
									a5deda647b
								
							
						
					
					
						commit
						961a300154
					
				| @ -12,16 +12,14 @@ use crate::{ | |||||||
|     inventory::Inventory, |     inventory::Inventory, | ||||||
|     maestro::Maestro, |     maestro::Maestro, | ||||||
|     modules::{ |     modules::{ | ||||||
|         application::Application, |  | ||||||
|         k3d::K3DInstallationScore, |         k3d::K3DInstallationScore, | ||||||
|         monitoring::kube_prometheus::crd::{ |         monitoring::kube_prometheus::crd::{ | ||||||
|             crd_alertmanager_config::{CRDAlertManagerReceiver, CRDPrometheus}, |             crd_alertmanager_config::{CRDAlertManagerReceiver, CRDPrometheus}, | ||||||
|             prometheus_operator::prometheus_operator_helm_chart_score, |             prometheus_operator::prometheus_operator_helm_chart_score, | ||||||
|         }, |         }, | ||||||
|         prometheus::{ |         prometheus::{ | ||||||
|             k3d_prometheus_alerting_score::K3dPrometheusCRDAlertingScore, |  | ||||||
|             k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore, |             k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore, | ||||||
|             prometheus::{PrometheusApplicationMonitoring, PrometheusMonitoring}, |             prometheus::PrometheusApplicationMonitoring, | ||||||
|         }, |         }, | ||||||
|     }, |     }, | ||||||
|     score::Score, |     score::Score, | ||||||
| @ -31,7 +29,7 @@ use crate::{ | |||||||
| use super::{ | use super::{ | ||||||
|     DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology, |     DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology, | ||||||
|     k8s::K8sClient, |     k8s::K8sClient, | ||||||
|     oberservability::monitoring::{AlertReceiver, AlertSender}, |     oberservability::monitoring::AlertReceiver, | ||||||
|     tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager}, |     tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager}, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| @ -72,47 +70,6 @@ impl K8sclient for K8sAnywhereTopology { | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #[async_trait] |  | ||||||
| impl PrometheusMonitoring<CRDPrometheus> for K8sAnywhereTopology { |  | ||||||
|     async fn ensure_prometheus_operator( |  | ||||||
|         &self, |  | ||||||
|         sender: &CRDPrometheus, |  | ||||||
|     ) -> Result<Outcome, InterpretError> { |  | ||||||
|         let output = Command::new("sh") |  | ||||||
|             .args(["-c", "kubectl get crd -A | grep -i prometheuses"]) |  | ||||||
|             .output() |  | ||||||
|             .map_err(|e| InterpretError::new(format!("could not connect to cluster: {}", e)))?; |  | ||||||
|         if output.status.success() && output.stdout.is_empty() { |  | ||||||
|             if let Some(Some(k8s_state)) = self.k8s_state.get() { |  | ||||||
|                 match k8s_state.source { |  | ||||||
|                     K8sSource::LocalK3d => { |  | ||||||
|                         debug!("installing prometheus operator"); |  | ||||||
|                         let op_score = |  | ||||||
|                             prometheus_operator_helm_chart_score(sender.namespace.clone()); |  | ||||||
|                         op_score |  | ||||||
|                             .create_interpret() |  | ||||||
|                             .execute(&Inventory::empty(), self) |  | ||||||
|                             .await?; |  | ||||||
|                         return Ok(Outcome::success( |  | ||||||
|                             "installed prometheus operator".to_string(), |  | ||||||
|                         )); |  | ||||||
|                     } |  | ||||||
|                     K8sSource::Kubeconfig => { |  | ||||||
|                         debug!("unable to install prometheus operator, contact cluster admin"); |  | ||||||
|                         return Ok(Outcome::noop()); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } else { |  | ||||||
|                 warn!("Unable to detect k8s_state. Skipping Prometheus Operator install."); |  | ||||||
|                 return Ok(Outcome::noop()); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         debug!("Prometheus operator is already present, skipping install"); |  | ||||||
|         Ok(Outcome::success( |  | ||||||
|             "prometheus operator present in cluster".to_string(), |  | ||||||
|         )) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| #[async_trait] | #[async_trait] | ||||||
| impl PrometheusApplicationMonitoring<CRDPrometheus> for K8sAnywhereTopology { | impl PrometheusApplicationMonitoring<CRDPrometheus> for K8sAnywhereTopology { | ||||||
|     async fn configure_receivers( |     async fn configure_receivers( | ||||||
| @ -125,6 +82,7 @@ impl PrometheusApplicationMonitoring<CRDPrometheus> for K8sAnywhereTopology { | |||||||
| 
 | 
 | ||||||
|         todo!() |         todo!() | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|     async fn install_prometheus( |     async fn install_prometheus( | ||||||
|         &self, |         &self, | ||||||
|         sender: &CRDPrometheus, |         sender: &CRDPrometheus, | ||||||
| @ -322,6 +280,45 @@ impl K8sAnywhereTopology { | |||||||
|             )), |             )), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     async fn ensure_prometheus_operator( | ||||||
|  |         &self, | ||||||
|  |         sender: &CRDPrometheus, | ||||||
|  |     ) -> Result<Outcome, InterpretError> { | ||||||
|  |         let output = Command::new("sh") | ||||||
|  |             .args(["-c", "kubectl get crd -A | grep -i prometheuses"]) | ||||||
|  |             .output() | ||||||
|  |             .map_err(|e| InterpretError::new(format!("could not connect to cluster: {}", e)))?; | ||||||
|  |         if output.status.success() && output.stdout.is_empty() { | ||||||
|  |             if let Some(Some(k8s_state)) = self.k8s_state.get() { | ||||||
|  |                 match k8s_state.source { | ||||||
|  |                     K8sSource::LocalK3d => { | ||||||
|  |                         debug!("installing prometheus operator"); | ||||||
|  |                         let op_score = | ||||||
|  |                             prometheus_operator_helm_chart_score(sender.namespace.clone()); | ||||||
|  |                         op_score | ||||||
|  |                             .create_interpret() | ||||||
|  |                             .execute(&Inventory::empty(), self) | ||||||
|  |                             .await?; | ||||||
|  |                         return Ok(Outcome::success( | ||||||
|  |                             "installed prometheus operator".to_string(), | ||||||
|  |                         )); | ||||||
|  |                     } | ||||||
|  |                     K8sSource::Kubeconfig => { | ||||||
|  |                         debug!("unable to install prometheus operator, contact cluster admin"); | ||||||
|  |                         return Ok(Outcome::noop()); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } else { | ||||||
|  |                 warn!("Unable to detect k8s_state. Skipping Prometheus Operator install."); | ||||||
|  |                 return Ok(Outcome::noop()); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         debug!("Prometheus operator is already present, skipping install"); | ||||||
|  |         Ok(Outcome::success( | ||||||
|  |             "prometheus operator present in cluster".to_string(), | ||||||
|  |         )) | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #[derive(Clone, Debug)] | #[derive(Clone, Debug)] | ||||||
|  | |||||||
| @ -81,7 +81,7 @@ impl CRDAlertManagerReceiver for DiscordWebhook { | |||||||
|             }), |             }), | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         let am = AlertmanagerConfig { |         AlertmanagerConfig { | ||||||
|             metadata: ObjectMeta { |             metadata: ObjectMeta { | ||||||
|                 name: Some(self.name.clone()), |                 name: Some(self.name.clone()), | ||||||
|                 labels: Some(std::collections::BTreeMap::from([( |                 labels: Some(std::collections::BTreeMap::from([( | ||||||
| @ -92,9 +92,9 @@ impl CRDAlertManagerReceiver for DiscordWebhook { | |||||||
|                 ..Default::default() |                 ..Default::default() | ||||||
|             }, |             }, | ||||||
|             spec, |             spec, | ||||||
|         }; |  | ||||||
|         am |  | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     fn clone_box(&self) -> Box<dyn CRDAlertManagerReceiver> { |     fn clone_box(&self) -> Box<dyn CRDAlertManagerReceiver> { | ||||||
|         Box::new(self.clone()) |         Box::new(self.clone()) | ||||||
|     } |     } | ||||||
|  | |||||||
| @ -9,17 +9,11 @@ use crate::{ | |||||||
|     inventory::Inventory, |     inventory::Inventory, | ||||||
|     modules::{ |     modules::{ | ||||||
|         application::Application, |         application::Application, | ||||||
|         monitoring::kube_prometheus::crd::crd_alertmanager_config::{ |         monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus, | ||||||
|             CRDAlertManagerReceiver, CRDPrometheus, |         prometheus::prometheus::PrometheusApplicationMonitoring, | ||||||
|         }, |  | ||||||
|         prometheus::prometheus::{PrometheusApplicationMonitoring, PrometheusMonitoring}, |  | ||||||
|     }, |     }, | ||||||
|     score::Score, |     score::Score, | ||||||
|     topology::{ |     topology::{Topology, oberservability::monitoring::AlertReceiver}, | ||||||
|         Topology, |  | ||||||
|         oberservability::monitoring::{AlertReceiver, AlertSender}, |  | ||||||
|         tenant::TenantManager, |  | ||||||
|     }, |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #[derive(Debug, Clone, Serialize)] | #[derive(Debug, Clone, Serialize)] | ||||||
|  | |||||||
| @ -6,8 +6,6 @@ use schemars::JsonSchema; | |||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
| 
 | 
 | ||||||
| use crate::topology::{ | use crate::topology::{ | ||||||
|     Topology, |  | ||||||
|     installable::Installable, |  | ||||||
|     k8s::K8sClient, |     k8s::K8sClient, | ||||||
|     oberservability::monitoring::{AlertReceiver, AlertSender}, |     oberservability::monitoring::{AlertReceiver, AlertSender}, | ||||||
| }; | }; | ||||||
|  | |||||||
| @ -1,582 +0,0 @@ | |||||||
| use std::fs; |  | ||||||
| use std::{collections::BTreeMap, sync::Arc}; |  | ||||||
| use tempfile::tempdir; |  | ||||||
| 
 |  | ||||||
| use async_trait::async_trait; |  | ||||||
| use kube::api::ObjectMeta; |  | ||||||
| use log::{debug, info}; |  | ||||||
| use serde::Serialize; |  | ||||||
| use tokio::process::Command; |  | ||||||
| 
 |  | ||||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{ |  | ||||||
|     AlertmanagerConfig, CRDAlertManagerReceiver, CRDPrometheus, |  | ||||||
| }; |  | ||||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules; |  | ||||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_grafana::{ |  | ||||||
|     Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig, |  | ||||||
|     GrafanaDatasourceSpec, GrafanaSpec, |  | ||||||
| }; |  | ||||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{ |  | ||||||
|     PrometheusRule, PrometheusRuleSpec, RuleGroup, |  | ||||||
| }; |  | ||||||
| use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard; |  | ||||||
| use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{ |  | ||||||
|     ServiceMonitor, ServiceMonitorSpec, |  | ||||||
| }; |  | ||||||
| use crate::topology::{K8sclient, Topology, k8s::K8sClient}; |  | ||||||
| use crate::{ |  | ||||||
|     data::{Id, Version}, |  | ||||||
|     interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, |  | ||||||
|     inventory::Inventory, |  | ||||||
|     modules::monitoring::kube_prometheus::crd::{ |  | ||||||
|         crd_alertmanagers::{Alertmanager, AlertmanagerSpec}, |  | ||||||
|         crd_prometheuses::{ |  | ||||||
|             AlertmanagerEndpoints, LabelSelector, Prometheus, PrometheusSpec, |  | ||||||
|             PrometheusSpecAlerting, |  | ||||||
|         }, |  | ||||||
|         role::{build_prom_role, build_prom_rolebinding, build_prom_service_account}, |  | ||||||
|     }, |  | ||||||
|     score::Score, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| use super::prometheus::PrometheusMonitoring; |  | ||||||
| 
 |  | ||||||
| #[derive(Clone, Debug, Serialize)] |  | ||||||
| pub struct K3dPrometheusCRDAlertingScore { |  | ||||||
|     pub namespace: String, |  | ||||||
|     pub receivers: Vec<Box<dyn CRDAlertManagerReceiver>>, |  | ||||||
|     pub service_monitors: Vec<ServiceMonitor>, |  | ||||||
|     pub prometheus_rules: Vec<RuleGroup>, |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| impl<T: Topology + K8sclient + PrometheusMonitoring<CRDPrometheus>> Score<T> |  | ||||||
|     for K3dPrometheusCRDAlertingScore |  | ||||||
| { |  | ||||||
|     fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> { |  | ||||||
|         Box::new(K3dPrometheusCRDAlertingInterpret { |  | ||||||
|             namespace: self.namespace.clone(), |  | ||||||
|             receivers: self.receivers.clone(), |  | ||||||
|             service_monitors: self.service_monitors.clone(), |  | ||||||
|             prometheus_rules: self.prometheus_rules.clone(), |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     fn name(&self) -> String { |  | ||||||
|         "CRDApplicationAlertingScore".into() |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #[derive(Clone, Debug)] |  | ||||||
| pub struct K3dPrometheusCRDAlertingInterpret { |  | ||||||
|     pub namespace: String, |  | ||||||
|     pub receivers: Vec<Box<dyn CRDAlertManagerReceiver>>, |  | ||||||
|     pub service_monitors: Vec<ServiceMonitor>, |  | ||||||
|     pub prometheus_rules: Vec<RuleGroup>, |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #[async_trait] |  | ||||||
| impl<T: Topology + K8sclient + PrometheusMonitoring<CRDPrometheus>> Interpret<T> |  | ||||||
|     for K3dPrometheusCRDAlertingInterpret |  | ||||||
| { |  | ||||||
|     async fn execute( |  | ||||||
|         &self, |  | ||||||
|         _inventory: &Inventory, |  | ||||||
|         topology: &T, |  | ||||||
|     ) -> Result<Outcome, InterpretError> { |  | ||||||
|         let client = topology.k8s_client().await.unwrap(); |  | ||||||
|         self.ensure_prometheus_operator().await?; |  | ||||||
|         self.ensure_grafana_operator().await?; |  | ||||||
|         self.install_prometheus(&client).await?; |  | ||||||
|         self.install_alert_manager(&client).await?; |  | ||||||
|         self.install_client_kube_metrics().await?; |  | ||||||
|         self.install_grafana(&client).await?; |  | ||||||
|         self.install_receivers(&self.receivers, &client).await?; |  | ||||||
|         self.install_rules(&self.prometheus_rules, &client).await?; |  | ||||||
|         self.install_monitors(self.service_monitors.clone(), &client) |  | ||||||
|             .await?; |  | ||||||
|         Ok(Outcome::success(format!( |  | ||||||
|             "deployed application monitoring composants" |  | ||||||
|         ))) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     fn get_name(&self) -> InterpretName { |  | ||||||
|         todo!() |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     fn get_version(&self) -> Version { |  | ||||||
|         todo!() |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     fn get_status(&self) -> InterpretStatus { |  | ||||||
|         todo!() |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     fn get_children(&self) -> Vec<Id> { |  | ||||||
|         todo!() |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| impl K3dPrometheusCRDAlertingInterpret { |  | ||||||
|     async fn crd_exists(&self, crd: &str) -> bool { |  | ||||||
|         let output = Command::new("kubectl") |  | ||||||
|             .args(["get", "crd", crd]) |  | ||||||
|             .output() |  | ||||||
|             .await; |  | ||||||
| 
 |  | ||||||
|         matches!(output, Ok(o) if o.status.success()) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     async fn install_chart( |  | ||||||
|         &self, |  | ||||||
|         chart_path: String, |  | ||||||
|         chart_name: String, |  | ||||||
|     ) -> Result<(), InterpretError> { |  | ||||||
|         let temp_dir = |  | ||||||
|             tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?; |  | ||||||
|         let temp_path = temp_dir.path().to_path_buf(); |  | ||||||
|         debug!("Using temp directory: {}", temp_path.display()); |  | ||||||
|         let chart = format!("{}/{}", chart_path, chart_name); |  | ||||||
|         let pull_output = Command::new("helm") |  | ||||||
|             .args(&["pull", &chart, "--destination", temp_path.to_str().unwrap()]) |  | ||||||
|             .output() |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?; |  | ||||||
| 
 |  | ||||||
|         if !pull_output.status.success() { |  | ||||||
|             return Err(InterpretError::new(format!( |  | ||||||
|                 "Helm pull failed: {}", |  | ||||||
|                 String::from_utf8_lossy(&pull_output.stderr) |  | ||||||
|             ))); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         let tgz_path = fs::read_dir(&temp_path) |  | ||||||
|             .unwrap() |  | ||||||
|             .filter_map(|entry| { |  | ||||||
|                 let entry = entry.ok()?; |  | ||||||
|                 let path = entry.path(); |  | ||||||
|                 if path.extension()? == "tgz" { |  | ||||||
|                     Some(path) |  | ||||||
|                 } else { |  | ||||||
|                     None |  | ||||||
|                 } |  | ||||||
|             }) |  | ||||||
|             .next() |  | ||||||
|             .ok_or_else(|| InterpretError::new("Could not find pulled Helm chart".into()))?; |  | ||||||
| 
 |  | ||||||
|         debug!("Installing chart from: {}", tgz_path.display()); |  | ||||||
| 
 |  | ||||||
|         let install_output = Command::new("helm") |  | ||||||
|             .args(&[ |  | ||||||
|                 "install", |  | ||||||
|                 &chart_name, |  | ||||||
|                 tgz_path.to_str().unwrap(), |  | ||||||
|                 "--namespace", |  | ||||||
|                 &self.namespace, |  | ||||||
|                 "--create-namespace", |  | ||||||
|                 "--wait", |  | ||||||
|                 "--atomic", |  | ||||||
|             ]) |  | ||||||
|             .output() |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(format!("Helm install error: {}", e)))?; |  | ||||||
| 
 |  | ||||||
|         if !install_output.status.success() { |  | ||||||
|             return Err(InterpretError::new(format!( |  | ||||||
|                 "Helm install failed: {}", |  | ||||||
|                 String::from_utf8_lossy(&install_output.stderr) |  | ||||||
|             ))); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         debug!( |  | ||||||
|             "Installed chart {}/{} in namespace: {}", |  | ||||||
|             &chart_path, &chart_name, self.namespace |  | ||||||
|         ); |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     async fn ensure_prometheus_operator(&self) -> Result<Outcome, InterpretError> { |  | ||||||
|         self.install_chart( |  | ||||||
|             "oci://hub.nationtech.io/harmony".to_string(), |  | ||||||
|             "nt-prometheus-operator".to_string(), |  | ||||||
|         ) |  | ||||||
|         .await?; |  | ||||||
|         Ok(Outcome::success(format!( |  | ||||||
|             "installed prometheus operator to ns {}", |  | ||||||
|             self.namespace.clone() |  | ||||||
|         ))) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> { |  | ||||||
|         if self.crd_exists("grafanas.grafana.integreatly.org").await { |  | ||||||
|             debug!("grafana CRDs already exist — skipping install."); |  | ||||||
|             return Ok(Outcome::success("Grafana CRDs already exist".to_string())); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         let _ = Command::new("helm") |  | ||||||
|             .args(&[ |  | ||||||
|                 "repo", |  | ||||||
|                 "add", |  | ||||||
|                 "grafana-operator", |  | ||||||
|                 "https://grafana.github.io/helm-charts", |  | ||||||
|             ]) |  | ||||||
|             .output() |  | ||||||
|             .await |  | ||||||
|             .unwrap(); |  | ||||||
| 
 |  | ||||||
|         let _ = Command::new("helm") |  | ||||||
|             .args(&["repo", "update"]) |  | ||||||
|             .output() |  | ||||||
|             .await |  | ||||||
|             .unwrap(); |  | ||||||
| 
 |  | ||||||
|         let output = Command::new("helm") |  | ||||||
|             .args(&[ |  | ||||||
|                 "install", |  | ||||||
|                 "grafana-operator", |  | ||||||
|                 "grafana-operator/grafana-operator", |  | ||||||
|                 "--namespace", |  | ||||||
|                 &self.namespace, |  | ||||||
|                 "--create-namespace", |  | ||||||
|                 "--set", |  | ||||||
|                 "namespaceScope=true", |  | ||||||
|             ]) |  | ||||||
|             .output() |  | ||||||
|             .await |  | ||||||
|             .unwrap(); |  | ||||||
| 
 |  | ||||||
|         if !output.status.success() { |  | ||||||
|             return Err(InterpretError::new(format!( |  | ||||||
|                 "helm install failed:\nstdout: {}\nstderr: {}", |  | ||||||
|                 String::from_utf8_lossy(&output.stdout), |  | ||||||
|                 String::from_utf8_lossy(&output.stderr) |  | ||||||
|             ))); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         Ok(Outcome::success(format!( |  | ||||||
|             "installed grafana operator in ns {}", |  | ||||||
|             self.namespace.clone() |  | ||||||
|         ))) |  | ||||||
|     } |  | ||||||
|     async fn install_prometheus(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> { |  | ||||||
|         debug!( |  | ||||||
|             "installing crd-prometheuses in namespace {}", |  | ||||||
|             self.namespace.clone() |  | ||||||
|         ); |  | ||||||
|         debug!("building role/rolebinding/serviceaccount for crd-prometheus"); |  | ||||||
|         let rolename = format!("{}-prom", self.namespace.clone()); |  | ||||||
|         let sa_name = format!("{}-prom-sa", self.namespace.clone()); |  | ||||||
|         let role = build_prom_role(rolename.clone(), self.namespace.clone()); |  | ||||||
|         let rolebinding = |  | ||||||
|             build_prom_rolebinding(rolename.clone(), self.namespace.clone(), sa_name.clone()); |  | ||||||
|         let sa = build_prom_service_account(sa_name.clone(), self.namespace.clone()); |  | ||||||
|         let prom_spec = PrometheusSpec { |  | ||||||
|             alerting: Some(PrometheusSpecAlerting { |  | ||||||
|                 alertmanagers: Some(vec![AlertmanagerEndpoints { |  | ||||||
|                     name: Some(format!("alertmanager-operated")), |  | ||||||
|                     namespace: Some(format!("{}", self.namespace.clone())), |  | ||||||
|                     port: Some("web".into()), |  | ||||||
|                     scheme: Some("http".into()), |  | ||||||
|                 }]), |  | ||||||
|             }), |  | ||||||
|             service_account_name: sa_name.clone(), |  | ||||||
|             service_monitor_namespace_selector: Some(LabelSelector { |  | ||||||
|                 match_labels: BTreeMap::from([( |  | ||||||
|                     "kubernetes.io/metadata.name".to_string(), |  | ||||||
|                     format!("{}", self.namespace.clone()), |  | ||||||
|                 )]), |  | ||||||
|                 match_expressions: vec![], |  | ||||||
|             }), |  | ||||||
|             service_monitor_selector: Some(LabelSelector { |  | ||||||
|                 match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }), |  | ||||||
| 
 |  | ||||||
|             service_discovery_role: Some("Endpoints".into()), |  | ||||||
| 
 |  | ||||||
|             pod_monitor_selector: Some(LabelSelector { |  | ||||||
|                 match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }), |  | ||||||
| 
 |  | ||||||
|             rule_selector: Some(LabelSelector { |  | ||||||
|                 match_labels: BTreeMap::from([("role".to_string(), "prometheus-rule".to_string())]), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }), |  | ||||||
| 
 |  | ||||||
|             rule_namespace_selector: Some(LabelSelector { |  | ||||||
|                 match_labels: BTreeMap::from([( |  | ||||||
|                     "kubernetes.io/metadata.name".to_string(), |  | ||||||
|                     format!("{}", self.namespace.clone()), |  | ||||||
|                 )]), |  | ||||||
|                 match_expressions: vec![], |  | ||||||
|             }), |  | ||||||
|         }; |  | ||||||
|         let prom = Prometheus { |  | ||||||
|             metadata: ObjectMeta { |  | ||||||
|                 name: Some(self.namespace.clone()), |  | ||||||
|                 labels: Some(std::collections::BTreeMap::from([ |  | ||||||
|                     ("alertmanagerConfig".to_string(), "enabled".to_string()), |  | ||||||
|                     ("client".to_string(), "prometheus".to_string()), |  | ||||||
|                 ])), |  | ||||||
|                 namespace: Some(self.namespace.clone()), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }, |  | ||||||
|             spec: prom_spec, |  | ||||||
|         }; |  | ||||||
|         client |  | ||||||
|             .apply(&role, Some(&self.namespace.clone())) |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
|         info!( |  | ||||||
|             "installed prometheus role: {:#?} in ns {:#?}", |  | ||||||
|             role.metadata.name.unwrap(), |  | ||||||
|             role.metadata.namespace.unwrap() |  | ||||||
|         ); |  | ||||||
|         client |  | ||||||
|             .apply(&rolebinding, Some(&self.namespace.clone())) |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
|         info!( |  | ||||||
|             "installed prometheus rolebinding: {:#?} in ns {:#?}", |  | ||||||
|             rolebinding.metadata.name.unwrap(), |  | ||||||
|             rolebinding.metadata.namespace.unwrap() |  | ||||||
|         ); |  | ||||||
|         client |  | ||||||
|             .apply(&sa, Some(&self.namespace.clone())) |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
|         info!( |  | ||||||
|             "installed prometheus service account: {:#?} in ns {:#?}", |  | ||||||
|             sa.metadata.name.unwrap(), |  | ||||||
|             sa.metadata.namespace.unwrap() |  | ||||||
|         ); |  | ||||||
|         client |  | ||||||
|             .apply(&prom, Some(&self.namespace.clone())) |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
|         info!( |  | ||||||
|             "installed prometheus: {:#?} in ns {:#?}", |  | ||||||
|             &prom.metadata.name.clone().unwrap(), |  | ||||||
|             &prom.metadata.namespace.clone().unwrap() |  | ||||||
|         ); |  | ||||||
| 
 |  | ||||||
|         Ok(Outcome::success(format!( |  | ||||||
|             "successfully deployed crd-prometheus {:#?}", |  | ||||||
|             prom |  | ||||||
|         ))) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     async fn install_alert_manager( |  | ||||||
|         &self, |  | ||||||
|         client: &Arc<K8sClient>, |  | ||||||
|     ) -> Result<Outcome, InterpretError> { |  | ||||||
|         let am = Alertmanager { |  | ||||||
|             metadata: ObjectMeta { |  | ||||||
|                 name: Some(self.namespace.clone()), |  | ||||||
|                 labels: Some(std::collections::BTreeMap::from([( |  | ||||||
|                     "alertmanagerConfig".to_string(), |  | ||||||
|                     "enabled".to_string(), |  | ||||||
|                 )])), |  | ||||||
|                 namespace: Some(self.namespace.clone()), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }, |  | ||||||
|             spec: AlertmanagerSpec::default(), |  | ||||||
|         }; |  | ||||||
|         client |  | ||||||
|             .apply(&am, Some(&self.namespace.clone())) |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
|         Ok(Outcome::success(format!( |  | ||||||
|             "successfully deployed service monitor {:#?}", |  | ||||||
|             am.metadata.name |  | ||||||
|         ))) |  | ||||||
|     } |  | ||||||
|     async fn install_monitors( |  | ||||||
|         &self, |  | ||||||
|         mut monitors: Vec<ServiceMonitor>, |  | ||||||
|         client: &Arc<K8sClient>, |  | ||||||
|     ) -> Result<Outcome, InterpretError> { |  | ||||||
|         let default_service_monitor = ServiceMonitor { |  | ||||||
|             metadata: ObjectMeta { |  | ||||||
|                 name: Some(self.namespace.clone()), |  | ||||||
|                 labels: Some(std::collections::BTreeMap::from([ |  | ||||||
|                     ("alertmanagerConfig".to_string(), "enabled".to_string()), |  | ||||||
|                     ("client".to_string(), "prometheus".to_string()), |  | ||||||
|                     ( |  | ||||||
|                         "app.kubernetes.io/name".to_string(), |  | ||||||
|                         "kube-state-metrics".to_string(), |  | ||||||
|                     ), |  | ||||||
|                 ])), |  | ||||||
|                 namespace: Some(self.namespace.clone()), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }, |  | ||||||
|             spec: ServiceMonitorSpec::default(), |  | ||||||
|         }; |  | ||||||
|         monitors.push(default_service_monitor); |  | ||||||
|         for monitor in monitors.iter() { |  | ||||||
|             client |  | ||||||
|                 .apply(monitor, Some(&self.namespace.clone())) |  | ||||||
|                 .await |  | ||||||
|                 .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
|         } |  | ||||||
|         Ok(Outcome::success( |  | ||||||
|             "succesfully deployed service monitors".to_string(), |  | ||||||
|         )) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     async fn install_rules( |  | ||||||
|         &self, |  | ||||||
|         rules: &Vec<RuleGroup>, |  | ||||||
|         client: &Arc<K8sClient>, |  | ||||||
|     ) -> Result<Outcome, InterpretError> { |  | ||||||
|         let mut prom_rule_spec = PrometheusRuleSpec { |  | ||||||
|             groups: rules.clone(), |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         let default_rules_group = RuleGroup { |  | ||||||
|             name: format!("default-rules"), |  | ||||||
|             rules: build_default_application_rules(), |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         prom_rule_spec.groups.push(default_rules_group); |  | ||||||
|         let prom_rules = PrometheusRule { |  | ||||||
|             metadata: ObjectMeta { |  | ||||||
|                 name: Some(self.namespace.clone()), |  | ||||||
|                 labels: Some(std::collections::BTreeMap::from([ |  | ||||||
|                     ("alertmanagerConfig".to_string(), "enabled".to_string()), |  | ||||||
|                     ("role".to_string(), "prometheus-rule".to_string()), |  | ||||||
|                 ])), |  | ||||||
|                 namespace: Some(self.namespace.clone()), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }, |  | ||||||
|             spec: prom_rule_spec, |  | ||||||
|         }; |  | ||||||
|         client |  | ||||||
|             .apply(&prom_rules, Some(&self.namespace)) |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
|         Ok(Outcome::success(format!( |  | ||||||
|             "successfully deployed rules {:#?}", |  | ||||||
|             prom_rules.metadata.name |  | ||||||
|         ))) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     async fn install_client_kube_metrics(&self) -> Result<Outcome, InterpretError> { |  | ||||||
|         self.install_chart( |  | ||||||
|             "oci://hub.nationtech.io/harmony".to_string(), |  | ||||||
|             "nt-kube-metrics".to_string(), |  | ||||||
|         ) |  | ||||||
|         .await?; |  | ||||||
|         Ok(Outcome::success(format!( |  | ||||||
|             "Installed client kube metrics in ns {}", |  | ||||||
|             &self.namespace |  | ||||||
|         ))) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> { |  | ||||||
|         let mut label = BTreeMap::new(); |  | ||||||
|         label.insert("dashboards".to_string(), "grafana".to_string()); |  | ||||||
|         let labels = LabelSelector { |  | ||||||
|             match_labels: label.clone(), |  | ||||||
|             match_expressions: vec![], |  | ||||||
|         }; |  | ||||||
|         let mut json_data = BTreeMap::new(); |  | ||||||
|         json_data.insert("timeInterval".to_string(), "5s".to_string()); |  | ||||||
|         let namespace = self.namespace.clone(); |  | ||||||
| 
 |  | ||||||
|         let json = build_default_dashboard(&namespace); |  | ||||||
| 
 |  | ||||||
|         let graf_data_source = GrafanaDatasource { |  | ||||||
|             metadata: ObjectMeta { |  | ||||||
|                 name: Some(format!("grafana-datasource-{}", self.namespace.clone())), |  | ||||||
|                 namespace: Some(self.namespace.clone()), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }, |  | ||||||
|             spec: GrafanaDatasourceSpec { |  | ||||||
|                 instance_selector: labels.clone(), |  | ||||||
|                 allow_cross_namespace_import: Some(false), |  | ||||||
|                 datasource: GrafanaDatasourceConfig { |  | ||||||
|                     access: "proxy".to_string(), |  | ||||||
|                     database: Some("prometheus".to_string()), |  | ||||||
|                     json_data: Some(json_data), |  | ||||||
|                     //TODO this is fragile
 |  | ||||||
|                     name: format!("prometheus-{}-0", self.namespace.clone()), |  | ||||||
|                     r#type: "prometheus".to_string(), |  | ||||||
|                     url: format!( |  | ||||||
|                         "http://prometheus-operated.{}.svc.cluster.local:9090", |  | ||||||
|                         self.namespace.clone() |  | ||||||
|                     ), |  | ||||||
|                 }, |  | ||||||
|             }, |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         client |  | ||||||
|             .apply(&graf_data_source, Some(&self.namespace)) |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
| 
 |  | ||||||
|         let graf_dashboard = GrafanaDashboard { |  | ||||||
|             metadata: ObjectMeta { |  | ||||||
|                 name: Some(format!("grafana-dashboard-{}", self.namespace.clone())), |  | ||||||
|                 namespace: Some(self.namespace.clone()), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }, |  | ||||||
|             spec: GrafanaDashboardSpec { |  | ||||||
|                 resync_period: Some("30s".to_string()), |  | ||||||
|                 instance_selector: labels.clone(), |  | ||||||
|                 json, |  | ||||||
|             }, |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         client |  | ||||||
|             .apply(&graf_dashboard, Some(&self.namespace)) |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
| 
 |  | ||||||
|         let grafana = Grafana { |  | ||||||
|             metadata: ObjectMeta { |  | ||||||
|                 name: Some(format!("grafana-{}", self.namespace.clone())), |  | ||||||
|                 namespace: Some(self.namespace.clone()), |  | ||||||
|                 labels: Some(label.clone()), |  | ||||||
|                 ..Default::default() |  | ||||||
|             }, |  | ||||||
|             spec: GrafanaSpec { |  | ||||||
|                 config: None, |  | ||||||
|                 admin_user: None, |  | ||||||
|                 admin_password: None, |  | ||||||
|                 ingress: None, |  | ||||||
|                 persistence: None, |  | ||||||
|                 resources: None, |  | ||||||
|             }, |  | ||||||
|         }; |  | ||||||
|         client |  | ||||||
|             .apply(&grafana, Some(&self.namespace)) |  | ||||||
|             .await |  | ||||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; |  | ||||||
|         Ok(Outcome::success(format!( |  | ||||||
|             "successfully deployed grafana instance {:#?}", |  | ||||||
|             grafana.metadata.name |  | ||||||
|         ))) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     async fn install_receivers( |  | ||||||
|         &self, |  | ||||||
|         receivers: &Vec<Box<dyn CRDAlertManagerReceiver>>, |  | ||||||
|         client: &Arc<K8sClient>, |  | ||||||
|     ) -> Result<Outcome, InterpretError> { |  | ||||||
|         for receiver in receivers.iter() { |  | ||||||
|             let alertmanager_config: AlertmanagerConfig = receiver |  | ||||||
|                 .configure_receiver(&client, self.namespace.clone()) |  | ||||||
|                 .await; |  | ||||||
|             let sender = CRDPrometheus { |  | ||||||
|                 alertmanager_configs: alertmanager_config, |  | ||||||
|                 namespace: self.namespace.clone(), |  | ||||||
|                 client: client.clone(), |  | ||||||
|             }; |  | ||||||
|             receiver.install(&sender).await.map_err(|err| { |  | ||||||
|                 InterpretError::new(format!("failed to install receiver: {}", err)) |  | ||||||
|             })?; |  | ||||||
|         } |  | ||||||
|         Ok(Outcome::success(format!("successfully deployed receivers"))) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @ -39,7 +39,7 @@ use crate::{ | |||||||
|     score::Score, |     score::Score, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| use super::prometheus::{PrometheusApplicationMonitoring, PrometheusMonitoring}; | use super::prometheus::PrometheusApplicationMonitoring; | ||||||
| 
 | 
 | ||||||
| #[derive(Clone, Debug, Serialize)] | #[derive(Clone, Debug, Serialize)] | ||||||
| pub struct K8sPrometheusCRDAlertingScore { | pub struct K8sPrometheusCRDAlertingScore { | ||||||
|  | |||||||
| @ -1,4 +1,3 @@ | |||||||
| pub mod alerts; | pub mod alerts; | ||||||
| pub mod k3d_prometheus_alerting_score; |  | ||||||
| pub mod k8s_prometheus_alerting_score; | pub mod k8s_prometheus_alerting_score; | ||||||
| pub mod prometheus; | pub mod prometheus; | ||||||
|  | |||||||
| @ -8,12 +8,7 @@ use crate::{ | |||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #[async_trait] | #[async_trait] | ||||||
| pub trait PrometheusMonitoring<S: AlertSender> { | pub trait PrometheusApplicationMonitoring<S: AlertSender> { | ||||||
|     async fn ensure_prometheus_operator(&self, sender: &S) -> Result<Outcome, InterpretError>; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #[async_trait] |  | ||||||
| pub trait PrometheusApplicationMonitoring<S: AlertSender>: PrometheusMonitoring<S> { |  | ||||||
|     async fn configure_receivers( |     async fn configure_receivers( | ||||||
|         &self, |         &self, | ||||||
|         receivers: Option<Vec<Box<dyn AlertReceiver<S>>>>, |         receivers: Option<Vec<Box<dyn AlertReceiver<S>>>>, | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user