forked from NationTech/harmony
		
	Compare commits
	
		
			22 Commits
		
	
	
		
			master
			...
			feat/crd-a
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 5cc93d3107 | ||
|  | 569839bf66 | ||
|  | e078f5c062 | ||
|  | a8394cda47 | ||
|  | 064f6d88ba | ||
|  | 9403581be5 | ||
|  | 056152a1e5 | ||
|  | c6b255d0bd | ||
|  | 4b6bebcaf1 | ||
|  | 961a300154 | ||
| a5deda647b | |||
| 0b965b6570 | |||
| d7bce37b69 | |||
| b56a30de3c | |||
| b9e208f4cf | |||
| 1d8b503bd2 | |||
| 114219385f | |||
| 1525ac2226 | |||
| 55a4e79ec4 | |||
| 7b91088828 | |||
| e61ec015ab | |||
| 819f4a32fd | 
							
								
								
									
										86
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										86
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @ -96,6 +96,12 @@ dependencies = [ | ||||
|  "libc", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "ansi_term" | ||||
| version = "0.10.2" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "6b3568b48b7cefa6b8ce125f9bb4989e52fbcc29ebea88df04cc7c5f12f70455" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "anstream" | ||||
| version = "0.6.19" | ||||
| @ -1259,6 +1265,18 @@ dependencies = [ | ||||
| name = "example" | ||||
| version = "0.0.0" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "example-application-monitoring-with-tenant" | ||||
| version = "0.1.0" | ||||
| dependencies = [ | ||||
|  "env_logger", | ||||
|  "harmony", | ||||
|  "harmony_cli", | ||||
|  "logging", | ||||
|  "tokio", | ||||
|  "url", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "example-cli" | ||||
| version = "0.1.0" | ||||
| @ -1779,6 +1797,7 @@ dependencies = [ | ||||
|  "k3d-rs", | ||||
|  "k8s-openapi", | ||||
|  "kube", | ||||
|  "kube-derive", | ||||
|  "lazy_static", | ||||
|  "libredfish", | ||||
|  "log", | ||||
| @ -1791,6 +1810,7 @@ dependencies = [ | ||||
|  "reqwest 0.11.27", | ||||
|  "russh", | ||||
|  "rust-ipmi", | ||||
|  "schemars 0.8.22", | ||||
|  "semver", | ||||
|  "serde", | ||||
|  "serde-value", | ||||
| @ -2669,6 +2689,7 @@ dependencies = [ | ||||
|  "k8s-openapi", | ||||
|  "kube-client", | ||||
|  "kube-core", | ||||
|  "kube-derive", | ||||
|  "kube-runtime", | ||||
| ] | ||||
| 
 | ||||
| @ -2722,12 +2743,27 @@ dependencies = [ | ||||
|  "http 1.3.1", | ||||
|  "json-patch", | ||||
|  "k8s-openapi", | ||||
|  "schemars 0.8.22", | ||||
|  "serde", | ||||
|  "serde-value", | ||||
|  "serde_json", | ||||
|  "thiserror 2.0.12", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "kube-derive" | ||||
| version = "1.1.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "079fc8c1c397538628309cfdee20696ebdcc26745f9fb17f89b78782205bd995" | ||||
| dependencies = [ | ||||
|  "darling", | ||||
|  "proc-macro2", | ||||
|  "quote", | ||||
|  "serde", | ||||
|  "serde_json", | ||||
|  "syn", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "kube-runtime" | ||||
| version = "1.1.0" | ||||
| @ -2843,6 +2879,15 @@ dependencies = [ | ||||
|  "log", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "logging" | ||||
| version = "0.1.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "461a8beca676e8ab1bd468c92e9b4436d6368e11e96ae038209e520cfe665e46" | ||||
| dependencies = [ | ||||
|  "ansi_term", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "lru" | ||||
| version = "0.12.5" | ||||
| @ -4140,6 +4185,18 @@ dependencies = [ | ||||
|  "windows-sys 0.59.0", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "schemars" | ||||
| version = "0.8.22" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" | ||||
| dependencies = [ | ||||
|  "dyn-clone", | ||||
|  "schemars_derive", | ||||
|  "serde", | ||||
|  "serde_json", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "schemars" | ||||
| version = "0.9.0" | ||||
| @ -4154,9 +4211,9 @@ dependencies = [ | ||||
| 
 | ||||
| [[package]] | ||||
| name = "schemars" | ||||
| version = "1.0.3" | ||||
| version = "1.0.4" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "1375ba8ef45a6f15d83fa8748f1079428295d403d6ea991d09ab100155fbc06d" | ||||
| checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" | ||||
| dependencies = [ | ||||
|  "dyn-clone", | ||||
|  "ref-cast", | ||||
| @ -4164,6 +4221,18 @@ dependencies = [ | ||||
|  "serde_json", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "schemars_derive" | ||||
| version = "0.8.22" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" | ||||
| dependencies = [ | ||||
|  "proc-macro2", | ||||
|  "quote", | ||||
|  "serde_derive_internals", | ||||
|  "syn", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "scopeguard" | ||||
| version = "1.2.0" | ||||
| @ -4296,6 +4365,17 @@ dependencies = [ | ||||
|  "syn", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "serde_derive_internals" | ||||
| version = "0.29.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" | ||||
| dependencies = [ | ||||
|  "proc-macro2", | ||||
|  "quote", | ||||
|  "syn", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "serde_json" | ||||
| version = "1.0.140" | ||||
| @ -4374,7 +4454,7 @@ dependencies = [ | ||||
|  "indexmap 1.9.3", | ||||
|  "indexmap 2.10.0", | ||||
|  "schemars 0.9.0", | ||||
|  "schemars 1.0.3", | ||||
|  "schemars 1.0.4", | ||||
|  "serde", | ||||
|  "serde_derive", | ||||
|  "serde_json", | ||||
|  | ||||
							
								
								
									
										14
									
								
								examples/application_monitoring_with_tenant/Cargo.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								examples/application_monitoring_with_tenant/Cargo.toml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| [package] | ||||
| name = "example-application-monitoring-with-tenant" | ||||
| edition = "2024" | ||||
| version.workspace = true | ||||
| readme.workspace = true | ||||
| license.workspace = true | ||||
| 
 | ||||
| [dependencies] | ||||
| env_logger.workspace = true | ||||
| harmony = { version = "0.1.0", path = "../../harmony" } | ||||
| harmony_cli = { version = "0.1.0", path = "../../harmony_cli" } | ||||
| logging = "0.1.0" | ||||
| tokio.workspace = true | ||||
| url.workspace = true | ||||
							
								
								
									
										61
									
								
								examples/application_monitoring_with_tenant/src/main.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								examples/application_monitoring_with_tenant/src/main.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,61 @@ | ||||
| use std::{path::PathBuf, sync::Arc}; | ||||
| 
 | ||||
| use harmony::{ | ||||
|     data::Id, | ||||
|     inventory::Inventory, | ||||
|     maestro::Maestro, | ||||
|     modules::{ | ||||
|         application::{ | ||||
|             ApplicationScore, RustWebFramework, RustWebapp, | ||||
|             features::{ContinuousDelivery, Monitoring}, | ||||
|         }, | ||||
|         monitoring::alert_channel::{ | ||||
|             discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver, | ||||
|         }, | ||||
|         tenant::TenantScore, | ||||
|     }, | ||||
|     topology::{K8sAnywhereTopology, Url, tenant::TenantConfig}, | ||||
| }; | ||||
| 
 | ||||
| #[tokio::main] | ||||
| async fn main() { | ||||
|     //TODO there is a bug where the application is deployed into the namespace matching the
 | ||||
|     //application name and the tenant is created in the namesapce matching the tenant name
 | ||||
|     //in order for the application to be deployed in the tenant namespace the application.name and
 | ||||
|     //the TenantConfig.name must match
 | ||||
|     let tenant = TenantScore { | ||||
|         config: TenantConfig { | ||||
|             id: Id::from_str("test-tenant-id"), | ||||
|             name: "example-monitoring".to_string(), | ||||
|             ..Default::default() | ||||
|         }, | ||||
|     }; | ||||
|     let application = Arc::new(RustWebapp { | ||||
|         name: "example-monitoring".to_string(), | ||||
|         domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()), | ||||
|         project_root: PathBuf::from("./examples/rust/webapp"), | ||||
|         framework: Some(RustWebFramework::Leptos), | ||||
|     }); | ||||
| 
 | ||||
|     let webhook_receiver = WebhookReceiver { | ||||
|         name: "sample-webhook-receiver".to_string(), | ||||
|         url: Url::Url(url::Url::parse("https://webhook-doesnt-exist.com").unwrap()), | ||||
|     }; | ||||
| 
 | ||||
|     let app = ApplicationScore { | ||||
|         features: vec![Box::new(Monitoring { | ||||
|             alert_receiver: vec![Box::new(webhook_receiver)], | ||||
|             application: application.clone(), | ||||
|         })], | ||||
|         application, | ||||
|     }; | ||||
| 
 | ||||
|     harmony_cli::run( | ||||
|         Inventory::autoload(), | ||||
|         K8sAnywhereTopology::from_env(), | ||||
|         vec![Box::new(tenant), Box::new(app)], | ||||
|         None, | ||||
|     ) | ||||
|     .await | ||||
|     .unwrap(); | ||||
| } | ||||
| @ -50,8 +50,8 @@ async fn main() { | ||||
| 
 | ||||
|     let service_monitor_endpoint = ServiceMonitorEndpoint { | ||||
|         port: Some("80".to_string()), | ||||
|         path: "/metrics".to_string(), | ||||
|         scheme: HTTPScheme::HTTP, | ||||
|         path: Some("/metrics".to_string()), | ||||
|         scheme: Some(HTTPScheme::HTTP), | ||||
|         ..Default::default() | ||||
|     }; | ||||
| 
 | ||||
|  | ||||
| @ -53,8 +53,8 @@ async fn main() { | ||||
| 
 | ||||
|     let service_monitor_endpoint = ServiceMonitorEndpoint { | ||||
|         port: Some("80".to_string()), | ||||
|         path: "/metrics".to_string(), | ||||
|         scheme: HTTPScheme::HTTP, | ||||
|         path: Some("/metrics".to_string()), | ||||
|         scheme: Some(HTTPScheme::HTTP), | ||||
|         ..Default::default() | ||||
|     }; | ||||
| 
 | ||||
|  | ||||
| @ -2,9 +2,14 @@ use std::{path::PathBuf, sync::Arc}; | ||||
| 
 | ||||
| use harmony::{ | ||||
|     inventory::Inventory, | ||||
|     modules::application::{ | ||||
|         ApplicationScore, RustWebFramework, RustWebapp, | ||||
|         features::{ContinuousDelivery, Monitoring}, | ||||
|     modules::{ | ||||
|         application::{ | ||||
|             ApplicationScore, RustWebFramework, RustWebapp, | ||||
|             features::{ContinuousDelivery, Monitoring}, | ||||
|         }, | ||||
|         monitoring::alert_channel::{ | ||||
|             discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver, | ||||
|         }, | ||||
|     }, | ||||
|     topology::{K8sAnywhereTopology, Url}, | ||||
| }; | ||||
| @ -18,6 +23,16 @@ async fn main() { | ||||
|         framework: Some(RustWebFramework::Leptos), | ||||
|     }); | ||||
| 
 | ||||
|     let discord_receiver = DiscordWebhook { | ||||
|         name: "test-discord".to_string(), | ||||
|         url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()), | ||||
|     }; | ||||
| 
 | ||||
|     let webhook_receiver = WebhookReceiver { | ||||
|         name: "sample-webhook-receiver".to_string(), | ||||
|         url: Url::Url(url::Url::parse("https://webhook-doesnt-exist.com").unwrap()), | ||||
|     }; | ||||
| 
 | ||||
|     let app = ApplicationScore { | ||||
|         features: vec![ | ||||
|             Box::new(ContinuousDelivery { | ||||
| @ -25,7 +40,9 @@ async fn main() { | ||||
|             }), | ||||
|             Box::new(Monitoring { | ||||
|                 application: application.clone(), | ||||
|             }), // TODO: add backups, multisite ha, etc.
 | ||||
|                 alert_receiver: vec![Box::new(discord_receiver), Box::new(webhook_receiver)], | ||||
|             }), | ||||
|             // TODO add backups, multisite ha, etc
 | ||||
|         ], | ||||
|         application, | ||||
|     }; | ||||
|  | ||||
| @ -27,7 +27,7 @@ harmony_macros = { path = "../harmony_macros" } | ||||
| harmony_types = { path = "../harmony_types" } | ||||
| uuid.workspace = true | ||||
| url.workspace = true | ||||
| kube.workspace = true | ||||
| kube = { workspace = true, features = ["derive"] } | ||||
| k8s-openapi.workspace = true | ||||
| serde_yaml.workspace = true | ||||
| http.workspace = true | ||||
| @ -58,6 +58,8 @@ tokio-util = "0.7.15" | ||||
| strum = { version = "0.27.1", features = ["derive"] } | ||||
| tempfile = "3.20.0" | ||||
| serde_with = "3.14.0" | ||||
| schemars = "0.8.22" | ||||
| kube-derive = "1.1.0" | ||||
| bollard.workspace = true | ||||
| tar.workspace = true | ||||
| base64.workspace = true | ||||
|  | ||||
| @ -17,7 +17,7 @@ use kube::{ | ||||
|     runtime::wait::await_condition, | ||||
| }; | ||||
| use log::{debug, error, trace}; | ||||
| use serde::de::DeserializeOwned; | ||||
| use serde::{Serialize, de::DeserializeOwned}; | ||||
| use similar::{DiffableStr, TextDiff}; | ||||
| 
 | ||||
| #[derive(new, Clone)] | ||||
| @ -25,6 +25,15 @@ pub struct K8sClient { | ||||
|     client: Client, | ||||
| } | ||||
| 
 | ||||
| impl Serialize for K8sClient { | ||||
|     fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> | ||||
|     where | ||||
|         S: serde::Serializer, | ||||
|     { | ||||
|         todo!() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl std::fmt::Debug for K8sClient { | ||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||
|         // This is a poor man's debug implementation for now as kube::Client does not provide much
 | ||||
|  | ||||
| @ -7,22 +7,33 @@ use tokio::sync::OnceCell; | ||||
| 
 | ||||
| use crate::{ | ||||
|     executors::ExecutorError, | ||||
|     interpret::{InterpretError, Outcome}, | ||||
|     interpret::{InterpretError, InterpretStatus, Outcome}, | ||||
|     inventory::Inventory, | ||||
|     modules::k3d::K3DInstallationScore, | ||||
|     modules::{ | ||||
|         k3d::K3DInstallationScore, | ||||
|         monitoring::kube_prometheus::crd::{ | ||||
|             crd_alertmanager_config::CRDPrometheus, | ||||
|             prometheus_operator::prometheus_operator_helm_chart_score, | ||||
|         }, | ||||
|         prometheus::{ | ||||
|             k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore, | ||||
|             prometheus::PrometheusApplicationMonitoring, | ||||
|         }, | ||||
|     }, | ||||
|     score::Score, | ||||
| }; | ||||
| 
 | ||||
| use super::{ | ||||
|     DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology, | ||||
|     k8s::K8sClient, | ||||
|     oberservability::monitoring::AlertReceiver, | ||||
|     tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager}, | ||||
| }; | ||||
| 
 | ||||
| #[derive(Clone, Debug)] | ||||
| struct K8sState { | ||||
|     client: Arc<K8sClient>, | ||||
|     _source: K8sSource, | ||||
|     source: K8sSource, | ||||
|     message: String, | ||||
| } | ||||
| 
 | ||||
| @ -56,8 +67,32 @@ impl K8sclient for K8sAnywhereTopology { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl PrometheusApplicationMonitoring<CRDPrometheus> for K8sAnywhereTopology { | ||||
|     async fn install_prometheus( | ||||
|         &self, | ||||
|         sender: &CRDPrometheus, | ||||
|         inventory: &Inventory, | ||||
|         receivers: Option<Vec<Box<dyn AlertReceiver<CRDPrometheus>>>>, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         let po_result = self.ensure_prometheus_operator(sender).await?; | ||||
| 
 | ||||
|         if po_result.status == InterpretStatus::NOOP { | ||||
|             debug!("Skipping Prometheus CR installation due to missing operator."); | ||||
|             return Ok(Outcome::noop()); | ||||
|         } | ||||
|         self.get_k8s_prometheus_application_score(sender.clone(), receivers) | ||||
|             .await | ||||
|             .create_interpret() | ||||
|             .execute(inventory, self) | ||||
|             .await?; | ||||
| 
 | ||||
|         Ok(Outcome::success(format!("No action, working on cluster  "))) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Serialize for K8sAnywhereTopology { | ||||
|     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | ||||
|     fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> | ||||
|     where | ||||
|         S: serde::Serializer, | ||||
|     { | ||||
| @ -82,6 +117,19 @@ impl K8sAnywhereTopology { | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     async fn get_k8s_prometheus_application_score( | ||||
|         &self, | ||||
|         sender: CRDPrometheus, | ||||
|         receivers: Option<Vec<Box<dyn AlertReceiver<CRDPrometheus>>>>, | ||||
|     ) -> K8sPrometheusCRDAlertingScore { | ||||
|         K8sPrometheusCRDAlertingScore { | ||||
|             sender, | ||||
|             receivers: receivers.unwrap_or_else(Vec::new), | ||||
|             service_monitors: vec![], | ||||
|             prometheus_rules: vec![], | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn is_helm_available(&self) -> Result<(), String> { | ||||
|         let version_result = Command::new("helm") | ||||
|             .arg("version") | ||||
| @ -132,7 +180,7 @@ impl K8sAnywhereTopology { | ||||
|                     Some(client) => { | ||||
|                         return Ok(Some(K8sState { | ||||
|                             client: Arc::new(client), | ||||
|                             _source: K8sSource::Kubeconfig, | ||||
|                             source: K8sSource::Kubeconfig, | ||||
|                             message: format!("Loaded k8s client from kubeconfig {kubeconfig}"), | ||||
|                         })); | ||||
|                     } | ||||
| @ -174,7 +222,7 @@ impl K8sAnywhereTopology { | ||||
|         let state = match k3d.get_client().await { | ||||
|             Ok(client) => K8sState { | ||||
|                 client: Arc::new(K8sClient::new(client)), | ||||
|                 _source: K8sSource::LocalK3d, | ||||
|                 source: K8sSource::LocalK3d, | ||||
|                 message: "K8s client ready".to_string(), | ||||
|             }, | ||||
|             Err(_) => todo!(), | ||||
| @ -190,6 +238,7 @@ impl K8sAnywhereTopology { | ||||
| 
 | ||||
|         self.tenant_manager | ||||
|             .get_or_try_init(async || -> Result<K8sTenantManager, String> { | ||||
|                 // TOOD: checker si K8s ou K3d/s tenant manager (ref. issue https://git.nationtech.io/NationTech/harmony/issues/94)
 | ||||
|                 let k8s_client = self.k8s_client().await?; | ||||
|                 Ok(K8sTenantManager::new(k8s_client)) | ||||
|             }) | ||||
| @ -206,6 +255,48 @@ impl K8sAnywhereTopology { | ||||
|             )), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     async fn ensure_prometheus_operator( | ||||
|         &self, | ||||
|         sender: &CRDPrometheus, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         let status = Command::new("sh") | ||||
|             .args(["-c", "kubectl get crd -A | grep -i prometheuses"]) | ||||
|             .status() | ||||
|             .map_err(|e| InterpretError::new(format!("could not connect to cluster: {}", e)))?; | ||||
| 
 | ||||
|         if !status.success() { | ||||
|             if let Some(Some(k8s_state)) = self.k8s_state.get() { | ||||
|                 match k8s_state.source { | ||||
|                     K8sSource::LocalK3d => { | ||||
|                         debug!("installing prometheus operator"); | ||||
|                         let op_score = | ||||
|                             prometheus_operator_helm_chart_score(sender.namespace.clone()); | ||||
|                         op_score | ||||
|                             .create_interpret() | ||||
|                             .execute(&Inventory::empty(), self) | ||||
|                             .await?; | ||||
|                         return Ok(Outcome::success( | ||||
|                             "installed prometheus operator".to_string(), | ||||
|                         )); | ||||
|                     } | ||||
|                     K8sSource::Kubeconfig => { | ||||
|                         debug!("unable to install prometheus operator, contact cluster admin"); | ||||
|                         return Ok(Outcome::noop()); | ||||
|                     } | ||||
|                 } | ||||
|             } else { | ||||
|                 warn!("Unable to detect k8s_state. Skipping Prometheus Operator install."); | ||||
|                 return Ok(Outcome::noop()); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         debug!("Prometheus operator is already present, skipping install"); | ||||
| 
 | ||||
|         Ok(Outcome::success( | ||||
|             "prometheus operator present in cluster".to_string(), | ||||
|         )) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone, Debug)] | ||||
|  | ||||
| @ -1,3 +1,5 @@ | ||||
| use std::any::Any; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use log::debug; | ||||
| 
 | ||||
| @ -62,7 +64,9 @@ impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInte | ||||
| #[async_trait] | ||||
| pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync { | ||||
|     async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>; | ||||
|     fn name(&self) -> String; | ||||
|     fn clone_box(&self) -> Box<dyn AlertReceiver<S>>; | ||||
|     fn as_any(&self) -> &dyn Any; | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| @ -72,6 +76,6 @@ pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync { | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| pub trait ScrapeTarger<S: AlertSender> { | ||||
| pub trait ScrapeTarget<S: AlertSender> { | ||||
|     async fn install(&self, sender: &S) -> Result<(), InterpretError>; | ||||
| } | ||||
|  | ||||
| @ -231,8 +231,13 @@ impl K8sTenantManager { | ||||
|               { | ||||
|                 "to": [ | ||||
|                   { | ||||
|                       //TODO this ip is from the docker network that k3d is running on
 | ||||
|                       //since k3d does not deploy kube-api-server as a pod it needs to ahve the ip
 | ||||
|                       //address opened up
 | ||||
|                       //need to find a way to automatically detect the ip address from the docker
 | ||||
|                       //network
 | ||||
|                 "ipBlock": { | ||||
|                     "cidr": "172.23.0.0/16", | ||||
|                     "cidr": "172.24.0.0/16", | ||||
|                     } | ||||
|                   } | ||||
|                 ] | ||||
|  | ||||
| @ -1,45 +1,60 @@ | ||||
| use std::sync::Arc; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use base64::{Engine as _, engine::general_purpose}; | ||||
| use log::{debug, info}; | ||||
| use crate::modules::application::{Application, ApplicationFeature}; | ||||
| use crate::modules::monitoring::application_monitoring::application_monitoring_score::ApplicationMonitoringScore; | ||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus; | ||||
| 
 | ||||
| use crate::{ | ||||
|     inventory::Inventory, | ||||
|     modules::{ | ||||
|         application::{ApplicationFeature, OCICompliant}, | ||||
|         monitoring::{ | ||||
|             alert_channel::webhook_receiver::WebhookReceiver, | ||||
|             kube_prometheus::{ | ||||
|                 helm_prometheus_alert_score::HelmPrometheusAlertingScore, | ||||
|                 types::{NamespaceSelector, ServiceMonitor}, | ||||
|             }, | ||||
|             ntfy::ntfy::NtfyScore, | ||||
|         }, | ||||
|     modules::monitoring::{ | ||||
|         alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore, | ||||
|     }, | ||||
|     score::Score, | ||||
|     topology::{HelmCommand, K8sclient, Topology, Url, tenant::TenantManager}, | ||||
| }; | ||||
| use crate::{ | ||||
|     modules::prometheus::prometheus::PrometheusApplicationMonitoring, | ||||
|     topology::oberservability::monitoring::AlertReceiver, | ||||
| }; | ||||
| use async_trait::async_trait; | ||||
| use base64::{Engine as _, engine::general_purpose}; | ||||
| use log::{debug, info}; | ||||
| 
 | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct Monitoring { | ||||
|     pub application: Arc<dyn OCICompliant>, | ||||
|     pub application: Arc<dyn Application>, | ||||
|     pub alert_receiver: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>, | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl<T: Topology + HelmCommand + K8sclient + 'static + TenantManager> ApplicationFeature<T> | ||||
|     for Monitoring | ||||
| impl< | ||||
|     T: Topology | ||||
|         + HelmCommand | ||||
|         + 'static | ||||
|         + TenantManager | ||||
|         + K8sclient | ||||
|         + std::fmt::Debug | ||||
|         + PrometheusApplicationMonitoring<CRDPrometheus>, | ||||
| > ApplicationFeature<T> for Monitoring | ||||
| { | ||||
|     async fn ensure_installed(&self, topology: &T) -> Result<(), String> { | ||||
|         info!("Ensuring monitoring is available for application"); | ||||
|         let namespace = topology | ||||
|             .get_tenant_config() | ||||
|             .await | ||||
|             .map(|ns| ns.name.clone()) | ||||
|             .unwrap_or_else(|| self.application.name()); | ||||
| 
 | ||||
|         let mut alerting_score = ApplicationMonitoringScore { | ||||
|             sender: CRDPrometheus { | ||||
|                 namespace: namespace.clone(), | ||||
|                 client: topology.k8s_client().await.unwrap(), | ||||
|             }, | ||||
|             application: self.application.clone(), | ||||
|             receivers: self.alert_receiver.clone(), | ||||
|         }; | ||||
|         let ntfy = NtfyScore { | ||||
|             // namespace: topology
 | ||||
|             //     .get_tenant_config()
 | ||||
|             //     .await
 | ||||
|             //     .expect("couldn't get tenant config")
 | ||||
|             //     .name,
 | ||||
|             namespace: self.application.name(), | ||||
|             namespace: namespace.clone(), | ||||
|             host: "localhost".to_string(), | ||||
|         }; | ||||
|         ntfy.create_interpret() | ||||
| @ -70,7 +85,7 @@ impl<T: Topology + HelmCommand + K8sclient + 'static + TenantManager> Applicatio | ||||
|                 url::Url::parse( | ||||
|                     format!( | ||||
|                         "http://ntfy.{}.svc.cluster.local/rust-web-app?auth={ntfy_default_auth_param}", | ||||
|                         self.application.name() | ||||
|                         namespace.clone() | ||||
|                     ) | ||||
|                     .as_str(), | ||||
|                 ) | ||||
| @ -78,26 +93,7 @@ impl<T: Topology + HelmCommand + K8sclient + 'static + TenantManager> Applicatio | ||||
|             ), | ||||
|         }; | ||||
| 
 | ||||
|         let mut service_monitor = ServiceMonitor::default(); | ||||
|         service_monitor.namespace_selector = Some(NamespaceSelector { | ||||
|             any: true, | ||||
|             match_names: vec![], | ||||
|         }); | ||||
| 
 | ||||
|         service_monitor.name = "rust-webapp".to_string(); | ||||
| 
 | ||||
|         // let alerting_score = ApplicationPrometheusMonitoringScore {
 | ||||
|         //     receivers: vec![Box::new(ntfy_receiver)],
 | ||||
|         //     rules: vec![],
 | ||||
|         //     service_monitors: vec![service_monitor],
 | ||||
|         // };
 | ||||
| 
 | ||||
|         let alerting_score = HelmPrometheusAlertingScore { | ||||
|             receivers: vec![Box::new(ntfy_receiver)], | ||||
|             rules: vec![], | ||||
|             service_monitors: vec![service_monitor], | ||||
|         }; | ||||
| 
 | ||||
|         alerting_score.receivers.push(Box::new(ntfy_receiver)); | ||||
|         alerting_score | ||||
|             .create_interpret() | ||||
|             .execute(&Inventory::empty(), topology) | ||||
|  | ||||
| @ -10,6 +10,7 @@ pub use oci::*; | ||||
| pub use rust::*; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use serde::Serialize; | ||||
| 
 | ||||
| use crate::{ | ||||
|     data::{Id, Version}, | ||||
| @ -78,3 +79,12 @@ impl<A: Application, T: Topology + std::fmt::Debug> Interpret<T> for Application | ||||
|         todo!() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Serialize for dyn Application { | ||||
|     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | ||||
|     where | ||||
|         S: serde::Serializer, | ||||
|     { | ||||
|         todo!() | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -1,7 +1,16 @@ | ||||
| use std::any::Any; | ||||
| use std::collections::BTreeMap; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use k8s_openapi::api::core::v1::Secret; | ||||
| use kube::api::ObjectMeta; | ||||
| use serde::Serialize; | ||||
| use serde_json::json; | ||||
| use serde_yaml::{Mapping, Value}; | ||||
| 
 | ||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{ | ||||
|     AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus, | ||||
| }; | ||||
| use crate::{ | ||||
|     interpret::{InterpretError, Outcome}, | ||||
|     modules::monitoring::{ | ||||
| @ -20,14 +29,98 @@ pub struct DiscordWebhook { | ||||
|     pub url: Url, | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl AlertReceiver<CRDPrometheus> for DiscordWebhook { | ||||
|     async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> { | ||||
|         let ns = sender.namespace.clone(); | ||||
|         let secret_name = format!("{}-secret", self.name.clone()); | ||||
|         let webhook_key = format!("{}", self.url.clone()); | ||||
| 
 | ||||
|         let mut string_data = BTreeMap::new(); | ||||
|         string_data.insert("webhook-url".to_string(), webhook_key.clone()); | ||||
| 
 | ||||
|         let secret = Secret { | ||||
|             metadata: kube::core::ObjectMeta { | ||||
|                 name: Some(secret_name.clone()), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             string_data: Some(string_data), | ||||
|             type_: Some("Opaque".to_string()), | ||||
|             ..Default::default() | ||||
|         }; | ||||
| 
 | ||||
|         let _ = sender.client.apply(&secret, Some(&ns)).await; | ||||
| 
 | ||||
|         let spec = AlertmanagerConfigSpec { | ||||
|             data: json!({ | ||||
|                 "route": { | ||||
|                     "receiver": self.name, | ||||
|                 }, | ||||
|                 "receivers": [ | ||||
|                     { | ||||
|                         "name": self.name, | ||||
|                         "discordConfigs": [ | ||||
|                             { | ||||
|                             "apiURL": { | ||||
|                                 "name": secret_name, | ||||
|                                 "key":  "webhook-url", | ||||
|                             }, | ||||
|                             "title": "{{ template \"discord.default.title\" . }}", | ||||
|                             "message": "{{ template \"discord.default.message\" . }}" | ||||
|                             } | ||||
|                         ] | ||||
|                     } | ||||
|                 ] | ||||
|             }), | ||||
|         }; | ||||
| 
 | ||||
|         let alertmanager_configs = AlertmanagerConfig { | ||||
|             metadata: ObjectMeta { | ||||
|                 name: Some(self.name.clone()), | ||||
|                 labels: Some(std::collections::BTreeMap::from([( | ||||
|                     "alertmanagerConfig".to_string(), | ||||
|                     "enabled".to_string(), | ||||
|                 )])), | ||||
|                 namespace: Some(ns), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             spec, | ||||
|         }; | ||||
| 
 | ||||
|         sender | ||||
|             .client | ||||
|             .apply(&alertmanager_configs, Some(&sender.namespace)) | ||||
|             .await?; | ||||
|         Ok(Outcome::success(format!( | ||||
|             "installed crd-alertmanagerconfigs for {}", | ||||
|             self.name | ||||
|         ))) | ||||
|     } | ||||
|     fn name(&self) -> String { | ||||
|         "discord-webhook".to_string() | ||||
|     } | ||||
|     fn clone_box(&self) -> Box<dyn AlertReceiver<CRDPrometheus>> { | ||||
|         Box::new(self.clone()) | ||||
|     } | ||||
|     fn as_any(&self) -> &dyn Any { | ||||
|         self | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl AlertReceiver<Prometheus> for DiscordWebhook { | ||||
|     async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> { | ||||
|         sender.install_receiver(self).await | ||||
|     } | ||||
|     fn name(&self) -> String { | ||||
|         "discord-webhook".to_string() | ||||
|     } | ||||
|     fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> { | ||||
|         Box::new(self.clone()) | ||||
|     } | ||||
|     fn as_any(&self) -> &dyn Any { | ||||
|         self | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| @ -48,6 +141,12 @@ impl AlertReceiver<KubePrometheus> for DiscordWebhook { | ||||
|     fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> { | ||||
|         Box::new(self.clone()) | ||||
|     } | ||||
|     fn name(&self) -> String { | ||||
|         "discord-webhook".to_string() | ||||
|     } | ||||
|     fn as_any(&self) -> &dyn Any { | ||||
|         self | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
|  | ||||
| @ -1,11 +1,19 @@ | ||||
| use std::any::Any; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use kube::api::ObjectMeta; | ||||
| use log::debug; | ||||
| use serde::Serialize; | ||||
| use serde_json::json; | ||||
| use serde_yaml::{Mapping, Value}; | ||||
| 
 | ||||
| use crate::{ | ||||
|     interpret::{InterpretError, Outcome}, | ||||
|     modules::monitoring::{ | ||||
|         kube_prometheus::{ | ||||
|             crd::crd_alertmanager_config::{ | ||||
|                 AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus, | ||||
|             }, | ||||
|             prometheus::{KubePrometheus, KubePrometheusReceiver}, | ||||
|             types::{AlertChannelConfig, AlertManagerChannelConfig}, | ||||
|         }, | ||||
| @ -20,14 +28,81 @@ pub struct WebhookReceiver { | ||||
|     pub url: Url, | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl AlertReceiver<CRDPrometheus> for WebhookReceiver { | ||||
|     async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> { | ||||
|         let spec = AlertmanagerConfigSpec { | ||||
|             data: json!({ | ||||
|                 "route": { | ||||
|                     "receiver": self.name, | ||||
|                 }, | ||||
|                 "receivers": [ | ||||
|                     { | ||||
|                         "name": self.name, | ||||
|                         "webhookConfigs": [ | ||||
|                             { | ||||
|                             "url": self.url, | ||||
|                             } | ||||
|                         ] | ||||
|                     } | ||||
|                 ] | ||||
|             }), | ||||
|         }; | ||||
| 
 | ||||
|         let alertmanager_configs = AlertmanagerConfig { | ||||
|             metadata: ObjectMeta { | ||||
|                 name: Some(self.name.clone()), | ||||
|                 labels: Some(std::collections::BTreeMap::from([( | ||||
|                     "alertmanagerConfig".to_string(), | ||||
|                     "enabled".to_string(), | ||||
|                 )])), | ||||
|                 namespace: Some(sender.namespace.clone()), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             spec, | ||||
|         }; | ||||
|         debug!( | ||||
|             "alert manager configs: \n{:#?}", | ||||
|             alertmanager_configs.clone() | ||||
|         ); | ||||
| 
 | ||||
|         sender | ||||
|             .client | ||||
|             .apply(&alertmanager_configs, Some(&sender.namespace)) | ||||
|             .await?; | ||||
|         Ok(Outcome::success(format!( | ||||
|             "installed crd-alertmanagerconfigs for {}", | ||||
|             self.name | ||||
|         ))) | ||||
|     } | ||||
| 
 | ||||
|     fn name(&self) -> String { | ||||
|         "webhook-receiver".to_string() | ||||
|     } | ||||
| 
 | ||||
|     fn clone_box(&self) -> Box<dyn AlertReceiver<CRDPrometheus>> { | ||||
|         Box::new(self.clone()) | ||||
|     } | ||||
| 
 | ||||
|     fn as_any(&self) -> &dyn Any { | ||||
|         self | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl AlertReceiver<Prometheus> for WebhookReceiver { | ||||
|     async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> { | ||||
|         sender.install_receiver(self).await | ||||
|     } | ||||
|     fn name(&self) -> String { | ||||
|         "webhook-receiver".to_string() | ||||
|     } | ||||
|     fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> { | ||||
|         Box::new(self.clone()) | ||||
|     } | ||||
|     fn as_any(&self) -> &dyn Any { | ||||
|         self | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| @ -44,9 +119,15 @@ impl AlertReceiver<KubePrometheus> for WebhookReceiver { | ||||
|     async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> { | ||||
|         sender.install_receiver(self).await | ||||
|     } | ||||
|     fn name(&self) -> String { | ||||
|         "webhook-receiver".to_string() | ||||
|     } | ||||
|     fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> { | ||||
|         Box::new(self.clone()) | ||||
|     } | ||||
|     fn as_any(&self) -> &dyn Any { | ||||
|         self | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
|  | ||||
| @ -0,0 +1,78 @@ | ||||
| use std::sync::Arc; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use serde::Serialize; | ||||
| 
 | ||||
| use crate::{ | ||||
|     data::{Id, Version}, | ||||
|     interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, | ||||
|     inventory::Inventory, | ||||
|     modules::{ | ||||
|         application::Application, | ||||
|         monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus, | ||||
|         prometheus::prometheus::PrometheusApplicationMonitoring, | ||||
|     }, | ||||
|     score::Score, | ||||
|     topology::{Topology, oberservability::monitoring::AlertReceiver}, | ||||
| }; | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct ApplicationMonitoringScore { | ||||
|     pub sender: CRDPrometheus, | ||||
|     pub application: Arc<dyn Application>, | ||||
|     pub receivers: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>, | ||||
| } | ||||
| 
 | ||||
| impl<T: Topology + PrometheusApplicationMonitoring<CRDPrometheus>> Score<T> | ||||
|     for ApplicationMonitoringScore | ||||
| { | ||||
|     fn create_interpret(&self) -> Box<dyn Interpret<T>> { | ||||
|         Box::new(ApplicationMonitoringInterpret { | ||||
|             score: self.clone(), | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     fn name(&self) -> String { | ||||
|         "ApplicationMonitoringScore".to_string() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| pub struct ApplicationMonitoringInterpret { | ||||
|     score: ApplicationMonitoringScore, | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl<T: Topology + PrometheusApplicationMonitoring<CRDPrometheus>> Interpret<T> | ||||
|     for ApplicationMonitoringInterpret | ||||
| { | ||||
|     async fn execute( | ||||
|         &self, | ||||
|         inventory: &Inventory, | ||||
|         topology: &T, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         topology | ||||
|             .install_prometheus( | ||||
|                 &self.score.sender, | ||||
|                 inventory, | ||||
|                 Some(self.score.receivers.clone()), | ||||
|             ) | ||||
|             .await | ||||
|     } | ||||
| 
 | ||||
|     fn get_name(&self) -> InterpretName { | ||||
|         todo!() | ||||
|     } | ||||
| 
 | ||||
|     fn get_version(&self) -> Version { | ||||
|         todo!() | ||||
|     } | ||||
| 
 | ||||
|     fn get_status(&self) -> InterpretStatus { | ||||
|         todo!() | ||||
|     } | ||||
| 
 | ||||
|     fn get_children(&self) -> Vec<Id> { | ||||
|         todo!() | ||||
|     } | ||||
| } | ||||
| @ -1,44 +0,0 @@ | ||||
| use std::sync::{Arc, Mutex}; | ||||
| 
 | ||||
| use serde::Serialize; | ||||
| 
 | ||||
| use crate::{ | ||||
|     modules::monitoring::{ | ||||
|         kube_prometheus::types::ServiceMonitor, | ||||
|         prometheus::{prometheus::Prometheus, prometheus_config::PrometheusConfig}, | ||||
|     }, | ||||
|     score::Score, | ||||
|     topology::{ | ||||
|         HelmCommand, Topology, | ||||
|         oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret}, | ||||
|         tenant::TenantManager, | ||||
|     }, | ||||
| }; | ||||
| 
 | ||||
| #[derive(Clone, Debug, Serialize)] | ||||
| pub struct ApplicationPrometheusMonitoringScore { | ||||
|     pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>, | ||||
|     pub rules: Vec<Box<dyn AlertRule<Prometheus>>>, | ||||
|     pub service_monitors: Vec<ServiceMonitor>, | ||||
| } | ||||
| 
 | ||||
| impl<T: Topology + HelmCommand + TenantManager> Score<T> for ApplicationPrometheusMonitoringScore { | ||||
|     fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> { | ||||
|         let mut prom_config = PrometheusConfig::new(); | ||||
|         prom_config.alert_manager = true; | ||||
| 
 | ||||
|         let config = Arc::new(Mutex::new(prom_config)); | ||||
|         config | ||||
|             .try_lock() | ||||
|             .expect("couldn't lock config") | ||||
|             .additional_service_monitors = self.service_monitors.clone(); | ||||
|         Box::new(AlertingInterpret { | ||||
|             sender: Prometheus::new(), | ||||
|             receivers: self.receivers.clone(), | ||||
|             rules: self.rules.clone(), | ||||
|         }) | ||||
|     } | ||||
|     fn name(&self) -> String { | ||||
|         "ApplicationPrometheusMonitoringScore".to_string() | ||||
|     } | ||||
| } | ||||
| @ -1 +1 @@ | ||||
| pub mod k8s_application_monitoring_score; | ||||
| pub mod application_monitoring_score; | ||||
|  | ||||
| @ -0,0 +1,50 @@ | ||||
| use std::sync::Arc; | ||||
| 
 | ||||
| use kube::CustomResource; | ||||
| use schemars::JsonSchema; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| 
 | ||||
| use crate::topology::{ | ||||
|     k8s::K8sClient, | ||||
|     oberservability::monitoring::{AlertReceiver, AlertSender}, | ||||
| }; | ||||
| 
 | ||||
| #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[kube(
 | ||||
|     group = "monitoring.coreos.com", | ||||
|     version = "v1alpha1", | ||||
|     kind = "AlertmanagerConfig", | ||||
|     plural = "alertmanagerconfigs", | ||||
|     namespaced | ||||
| )] | ||||
| pub struct AlertmanagerConfigSpec { | ||||
|     #[serde(flatten)] | ||||
|     pub data: serde_json::Value, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| pub struct CRDPrometheus { | ||||
|     pub namespace: String, | ||||
|     pub client: Arc<K8sClient>, | ||||
| } | ||||
| 
 | ||||
| impl AlertSender for CRDPrometheus { | ||||
|     fn name(&self) -> String { | ||||
|         "CRDAlertManager".to_string() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Clone for Box<dyn AlertReceiver<CRDPrometheus>> { | ||||
|     fn clone(&self) -> Self { | ||||
|         self.clone_box() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Serialize for Box<dyn AlertReceiver<CRDPrometheus>> { | ||||
|     fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> | ||||
|     where | ||||
|         S: serde::Serializer, | ||||
|     { | ||||
|         todo!() | ||||
|     } | ||||
| } | ||||
| @ -0,0 +1,53 @@ | ||||
| use kube::CustomResource; | ||||
| use schemars::JsonSchema; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use std::collections::BTreeMap; | ||||
| 
 | ||||
| use super::crd_prometheuses::LabelSelector; | ||||
| 
 | ||||
| /// Rust CRD for `Alertmanager` from Prometheus Operator
 | ||||
| #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[kube(
 | ||||
|     group = "monitoring.coreos.com", | ||||
|     version = "v1", | ||||
|     kind = "Alertmanager", | ||||
|     plural = "alertmanagers", | ||||
|     namespaced | ||||
| )] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct AlertmanagerSpec { | ||||
|     /// Number of replicas for HA
 | ||||
|     pub replicas: i32, | ||||
| 
 | ||||
|     /// Selectors for AlertmanagerConfig CRDs
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub alertmanager_config_selector: Option<LabelSelector>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub alertmanager_config_namespace_selector: Option<LabelSelector>, | ||||
| 
 | ||||
|     /// Optional pod template metadata (annotations, labels)
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub pod_metadata: Option<LabelSelector>, | ||||
| 
 | ||||
|     /// Optional topology spread settings
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub version: Option<String>, | ||||
| } | ||||
| 
 | ||||
| impl Default for AlertmanagerSpec { | ||||
|     fn default() -> Self { | ||||
|         AlertmanagerSpec { | ||||
|             replicas: 1, | ||||
| 
 | ||||
|             // Match all AlertmanagerConfigs in the same namespace
 | ||||
|             alertmanager_config_namespace_selector: None, | ||||
| 
 | ||||
|             // Empty selector matches all AlertmanagerConfigs in that namespace
 | ||||
|             alertmanager_config_selector: Some(LabelSelector::default()), | ||||
| 
 | ||||
|             pod_metadata: None, | ||||
|             version: None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @ -0,0 +1,30 @@ | ||||
| use std::collections::BTreeMap; | ||||
| 
 | ||||
| use crate::modules::{ | ||||
|     monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule, | ||||
|     prometheus::alerts::k8s::{ | ||||
|         deployment::alert_deployment_unavailable, | ||||
|         pod::{alert_container_restarting, alert_pod_not_ready, pod_failed}, | ||||
|         pvc::high_pvc_fill_rate_over_two_days, | ||||
|         service::alert_service_down, | ||||
|     }, | ||||
| }; | ||||
| 
 | ||||
| use super::crd_prometheus_rules::Rule; | ||||
| 
 | ||||
| pub fn build_default_application_rules() -> Vec<Rule> { | ||||
|     let pod_failed: Rule = pod_failed().into(); | ||||
|     let container_restarting: Rule = alert_container_restarting().into(); | ||||
|     let pod_not_ready: Rule = alert_pod_not_ready().into(); | ||||
|     let service_down: Rule = alert_service_down().into(); | ||||
|     let deployment_unavailable: Rule = alert_deployment_unavailable().into(); | ||||
|     let high_pvc_fill_rate: Rule = high_pvc_fill_rate_over_two_days().into(); | ||||
|     vec![ | ||||
|         pod_failed, | ||||
|         container_restarting, | ||||
|         pod_not_ready, | ||||
|         service_down, | ||||
|         deployment_unavailable, | ||||
|         high_pvc_fill_rate, | ||||
|     ] | ||||
| } | ||||
| @ -0,0 +1,153 @@ | ||||
| use std::collections::BTreeMap; | ||||
| 
 | ||||
| use kube::CustomResource; | ||||
| use schemars::JsonSchema; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| 
 | ||||
| use super::crd_prometheuses::LabelSelector; | ||||
| 
 | ||||
| #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[kube(
 | ||||
|     group = "grafana.integreatly.org", | ||||
|     version = "v1beta1", | ||||
|     kind = "Grafana", | ||||
|     plural = "grafanas", | ||||
|     namespaced | ||||
| )] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct GrafanaSpec { | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub config: Option<GrafanaConfig>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub admin_user: Option<String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub admin_password: Option<String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub ingress: Option<GrafanaIngress>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub persistence: Option<GrafanaPersistence>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub resources: Option<ResourceRequirements>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct GrafanaConfig { | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub log: Option<GrafanaLogConfig>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub security: Option<GrafanaSecurityConfig>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct GrafanaLogConfig { | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub mode: Option<String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub level: Option<String>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct GrafanaSecurityConfig { | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub admin_user: Option<String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub admin_password: Option<String>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct GrafanaIngress { | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub enabled: Option<bool>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub hosts: Option<Vec<String>>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct GrafanaPersistence { | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub enabled: Option<bool>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub storage_class_name: Option<String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub size: Option<String>, | ||||
| } | ||||
| 
 | ||||
| // ------------------------------------------------------------------------------------------------
 | ||||
| 
 | ||||
| #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[kube(
 | ||||
|     group = "grafana.integreatly.org", | ||||
|     version = "v1beta1", | ||||
|     kind = "GrafanaDashboard", | ||||
|     plural = "grafanadashboards", | ||||
|     namespaced | ||||
| )] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct GrafanaDashboardSpec { | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub resync_period: Option<String>, | ||||
| 
 | ||||
|     pub instance_selector: LabelSelector, | ||||
| 
 | ||||
|     pub json: String, | ||||
| } | ||||
| 
 | ||||
| // ------------------------------------------------------------------------------------------------
 | ||||
| 
 | ||||
| #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[kube(
 | ||||
|     group = "grafana.integreatly.org", | ||||
|     version = "v1beta1", | ||||
|     kind = "GrafanaDatasource", | ||||
|     plural = "grafanadatasources", | ||||
|     namespaced | ||||
| )] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct GrafanaDatasourceSpec { | ||||
|     pub instance_selector: LabelSelector, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub allow_cross_namespace_import: Option<bool>, | ||||
| 
 | ||||
|     pub datasource: GrafanaDatasourceConfig, | ||||
| } | ||||
| 
 | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct GrafanaDatasourceConfig { | ||||
|     pub access: String, | ||||
|     pub database: Option<String>, | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub json_data: Option<BTreeMap<String, String>>, | ||||
|     pub name: String, | ||||
|     pub r#type: String, | ||||
|     pub url: String, | ||||
| } | ||||
| 
 | ||||
| // ------------------------------------------------------------------------------------------------
 | ||||
| 
 | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct ResourceRequirements { | ||||
|     #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] | ||||
|     pub limits: BTreeMap<String, String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] | ||||
|     pub requests: BTreeMap<String, String>, | ||||
| } | ||||
| @ -0,0 +1,59 @@ | ||||
| use std::collections::BTreeMap; | ||||
| 
 | ||||
| use kube::CustomResource; | ||||
| use schemars::JsonSchema; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| 
 | ||||
| use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; | ||||
| 
 | ||||
| use super::crd_default_rules::build_default_application_rules; | ||||
| 
 | ||||
| #[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema)] | ||||
| #[kube(
 | ||||
|     group = "monitoring.coreos.com", | ||||
|     version = "v1", | ||||
|     kind = "PrometheusRule", | ||||
|     plural = "prometheusrules", | ||||
|     namespaced | ||||
| )] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct PrometheusRuleSpec { | ||||
|     pub groups: Vec<RuleGroup>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | ||||
| pub struct RuleGroup { | ||||
|     pub name: String, | ||||
|     pub rules: Vec<Rule>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Rule { | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub alert: Option<String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub expr: Option<String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub for_: Option<String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub labels: Option<std::collections::BTreeMap<String, String>>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub annotations: Option<std::collections::BTreeMap<String, String>>, | ||||
| } | ||||
| 
 | ||||
| impl From<PrometheusAlertRule> for Rule { | ||||
|     fn from(value: PrometheusAlertRule) -> Self { | ||||
|         Rule { | ||||
|             alert: Some(value.alert), | ||||
|             expr: Some(value.expr), | ||||
|             for_: value.r#for, | ||||
|             labels: Some(value.labels.into_iter().collect::<BTreeMap<_, _>>()), | ||||
|             annotations: Some(value.annotations.into_iter().collect::<BTreeMap<_, _>>()), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @ -0,0 +1,118 @@ | ||||
| use std::collections::BTreeMap; | ||||
| 
 | ||||
| use kube::CustomResource; | ||||
| use schemars::JsonSchema; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| 
 | ||||
| use crate::modules::monitoring::kube_prometheus::types::Operator; | ||||
| 
 | ||||
| #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[kube(
 | ||||
|     group = "monitoring.coreos.com", | ||||
|     version = "v1", | ||||
|     kind = "Prometheus", | ||||
|     plural = "prometheuses", | ||||
|     namespaced | ||||
| )] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct PrometheusSpec { | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub alerting: Option<PrometheusSpecAlerting>, | ||||
| 
 | ||||
|     pub service_account_name: String, | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub service_monitor_namespace_selector: Option<LabelSelector>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub service_monitor_selector: Option<LabelSelector>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub service_discovery_role: Option<String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub pod_monitor_selector: Option<LabelSelector>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub rule_selector: Option<LabelSelector>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub rule_namespace_selector: Option<LabelSelector>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct NamespaceSelector { | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub match_names: Vec<String>, | ||||
| } | ||||
| 
 | ||||
| /// Contains alerting configuration, specifically Alertmanager endpoints.
 | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] | ||||
| pub struct PrometheusSpecAlerting { | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub alertmanagers: Option<Vec<AlertmanagerEndpoints>>, | ||||
| } | ||||
| 
 | ||||
| /// Represents an Alertmanager endpoint configuration used by Prometheus.
 | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] | ||||
| pub struct AlertmanagerEndpoints { | ||||
|     /// Name of the Alertmanager Service.
 | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub name: Option<String>, | ||||
| 
 | ||||
|     /// Namespace of the Alertmanager Service.
 | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub namespace: Option<String>, | ||||
| 
 | ||||
|     /// Port to access on the Alertmanager Service (e.g. "web").
 | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub port: Option<String>, | ||||
| 
 | ||||
|     /// Scheme to use for connecting (e.g. "http").
 | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub scheme: Option<String>, | ||||
|     // Other fields like `tls_config`, `path_prefix`, etc., can be added if needed.
 | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct LabelSelector { | ||||
|     #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] | ||||
|     pub match_labels: BTreeMap<String, String>, | ||||
| 
 | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub match_expressions: Vec<LabelSelectorRequirement>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct LabelSelectorRequirement { | ||||
|     pub key: String, | ||||
|     pub operator: Operator, | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub values: Vec<String>, | ||||
| } | ||||
| 
 | ||||
| impl Default for PrometheusSpec { | ||||
|     fn default() -> Self { | ||||
|         PrometheusSpec { | ||||
|             alerting: None, | ||||
| 
 | ||||
|             service_account_name: "prometheus".into(), | ||||
| 
 | ||||
|             // null means "only my namespace"
 | ||||
|             service_monitor_namespace_selector: None, | ||||
| 
 | ||||
|             // empty selector means match all ServiceMonitors in that namespace
 | ||||
|             service_monitor_selector: Some(LabelSelector::default()), | ||||
| 
 | ||||
|             service_discovery_role: Some("Endpoints".into()), | ||||
| 
 | ||||
|             pod_monitor_selector: None, | ||||
| 
 | ||||
|             rule_selector: None, | ||||
| 
 | ||||
|             rule_namespace_selector: Some(LabelSelector::default()), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @ -0,0 +1,203 @@ | ||||
| pub fn build_default_dashboard(namespace: &str) -> String { | ||||
|     let dashboard = format!( | ||||
|         r#"{{
 | ||||
|   "annotations": {{ | ||||
|     "list": [] | ||||
|   }}, | ||||
|   "editable": true, | ||||
|   "gnetId": null, | ||||
|   "graphTooltip": 0, | ||||
|   "id": null, | ||||
|   "iteration": 171105, | ||||
|   "panels": [ | ||||
|     {{ | ||||
|       "datasource": "$datasource", | ||||
|       "fieldConfig": {{ | ||||
|         "defaults": {{ | ||||
|           "unit": "short" | ||||
|         }}, | ||||
|         "overrides": [] | ||||
|       }}, | ||||
|       "gridPos": {{ | ||||
|         "h": 6, | ||||
|         "w": 6, | ||||
|         "x": 0, | ||||
|         "y": 0 | ||||
|       }}, | ||||
|       "id": 1, | ||||
|       "options": {{ | ||||
|         "reduceOptions": {{ | ||||
|           "calcs": ["lastNotNull"], | ||||
|           "fields": "", | ||||
|           "values": false | ||||
|         }} | ||||
|       }}, | ||||
|       "pluginVersion": "9.0.0", | ||||
|       "targets": [ | ||||
|         {{ | ||||
|           "expr": "sum(kube_pod_status_phase{{namespace=\"{namespace}\",  phase=\"Running\"}})", | ||||
|           "legendFormat": "", | ||||
|           "refId": "A" | ||||
|         }} | ||||
|       ], | ||||
|       "title": "Pods in Namespace", | ||||
|       "type": "stat" | ||||
|     }}, | ||||
|     {{ | ||||
|       "datasource": "$datasource", | ||||
|       "fieldConfig": {{ | ||||
|         "defaults": {{ | ||||
|           "unit": "short" | ||||
|         }}, | ||||
|         "overrides": [] | ||||
|       }}, | ||||
|       "gridPos": {{ | ||||
|         "h": 6, | ||||
|         "w": 6, | ||||
|         "x": 6, | ||||
|         "y": 0 | ||||
|       }}, | ||||
|       "id": 2, | ||||
|       "options": {{ | ||||
|         "reduceOptions": {{ | ||||
|           "calcs": ["lastNotNull"], | ||||
|           "fields": "", | ||||
|           "values": false | ||||
|         }} | ||||
|       }}, | ||||
|       "pluginVersion": "9.0.0", | ||||
|       "targets": [ | ||||
|         {{ | ||||
|           "expr": "sum(kube_pod_status_phase{{phase=\"Failed\", namespace=\"{namespace}\"}})", | ||||
|           "legendFormat": "", | ||||
|           "refId": "A" | ||||
|         }} | ||||
|       ], | ||||
|       "title": "Pods in Failed State", | ||||
|       "type": "stat" | ||||
|     }}, | ||||
|     {{ | ||||
|       "datasource": "$datasource", | ||||
|       "fieldConfig": {{ | ||||
|         "defaults": {{ | ||||
|           "unit": "percentunit" | ||||
|         }}, | ||||
|         "overrides": [] | ||||
|       }}, | ||||
|       "gridPos": {{ | ||||
|         "h": 6, | ||||
|         "w": 12, | ||||
|         "x": 0, | ||||
|         "y": 6 | ||||
|       }}, | ||||
|       "id": 3, | ||||
|       "options": {{ | ||||
|         "reduceOptions": {{ | ||||
|           "calcs": ["lastNotNull"], | ||||
|           "fields": "", | ||||
|           "values": false | ||||
|         }} | ||||
|       }}, | ||||
|       "pluginVersion": "9.0.0", | ||||
|       "targets": [ | ||||
|         {{ | ||||
|           "expr": "sum(kube_deployment_status_replicas_available{{namespace=\"{namespace}\"}}) / sum(kube_deployment_spec_replicas{{namespace=\"{namespace}\"}})", | ||||
|           "legendFormat": "", | ||||
|           "refId": "A" | ||||
|         }} | ||||
|       ], | ||||
|       "title": "Deployment Health (Available / Desired)", | ||||
|       "type": "stat" | ||||
|     }}, | ||||
|     {{ | ||||
|       "datasource": "$datasource", | ||||
|       "fieldConfig": {{ | ||||
|         "defaults": {{ | ||||
|           "unit": "short" | ||||
|         }}, | ||||
|         "overrides": [] | ||||
|       }}, | ||||
|       "gridPos": {{ | ||||
|         "h": 6, | ||||
|         "w": 12, | ||||
|         "x": 0, | ||||
|         "y": 12 | ||||
|       }}, | ||||
|       "id": 4, | ||||
|       "options": {{ | ||||
|         "reduceOptions": {{ | ||||
|           "calcs": ["lastNotNull"], | ||||
|           "fields": "", | ||||
|           "values": false | ||||
|         }} | ||||
|       }}, | ||||
|       "pluginVersion": "9.0.0", | ||||
|       "targets": [ | ||||
|         {{ | ||||
|           "expr": "sum by(pod) (rate(kube_pod_container_status_restarts_total{{namespace=\"{namespace}\"}}[5m]))", | ||||
|           "legendFormat": "{{{{pod}}}}", | ||||
|           "refId": "A" | ||||
|         }} | ||||
|       ], | ||||
|       "title": "Container Restarts (per pod)", | ||||
|       "type": "timeseries" | ||||
|     }}, | ||||
|     {{ | ||||
|       "datasource": "$datasource", | ||||
|       "fieldConfig": {{ | ||||
|         "defaults": {{ | ||||
|           "unit": "short" | ||||
|         }}, | ||||
|         "overrides": [] | ||||
|       }}, | ||||
|       "gridPos": {{ | ||||
|         "h": 6, | ||||
|         "w": 12, | ||||
|         "x": 0, | ||||
|         "y": 18 | ||||
|       }}, | ||||
|       "id": 5, | ||||
|       "options": {{ | ||||
|         "reduceOptions": {{ | ||||
|           "calcs": ["lastNotNull"], | ||||
|           "fields": "", | ||||
|           "values": false | ||||
|         }} | ||||
|       }}, | ||||
|       "pluginVersion": "9.0.0", | ||||
|       "targets": [ | ||||
|         {{ | ||||
|           "expr": "sum(ALERTS{{alertstate=\"firing\", namespace=\"{namespace}\"}}) or vector(0)", | ||||
|           "legendFormat": "", | ||||
|           "refId": "A" | ||||
|         }} | ||||
|       ], | ||||
|       "title": "Firing Alerts in Namespace", | ||||
|       "type": "stat" | ||||
|     }} | ||||
|   ], | ||||
|   "schemaVersion": 36, | ||||
|   "templating": {{ | ||||
|     "list": [ | ||||
|       {{ | ||||
|         "name": "datasource", | ||||
|         "type": "datasource", | ||||
|         "pluginId": "prometheus", | ||||
|         "label": "Prometheus", | ||||
|         "query": "prometheus", | ||||
|         "refresh": 1, | ||||
|         "hide": 0, | ||||
|         "current": {{ | ||||
|           "selected": true, | ||||
|           "text": "Prometheus", | ||||
|           "value": "Prometheus" | ||||
|         }} | ||||
|       }} | ||||
|     ] | ||||
|   }}, | ||||
|   "title": "Tenant Namespace Overview", | ||||
|   "version": 1 | ||||
| }}"#
 | ||||
|     ); | ||||
|     dashboard | ||||
| } | ||||
| @ -0,0 +1,20 @@ | ||||
| use std::str::FromStr; | ||||
| 
 | ||||
| use non_blank_string_rs::NonBlankString; | ||||
| 
 | ||||
| use crate::modules::helm::chart::HelmChartScore; | ||||
| 
 | ||||
| pub fn grafana_operator_helm_chart_score(ns: String) -> HelmChartScore { | ||||
|     HelmChartScore { | ||||
|         namespace: Some(NonBlankString::from_str(&ns).unwrap()), | ||||
|         release_name: NonBlankString::from_str("grafana_operator").unwrap(), | ||||
|         chart_name: NonBlankString::from_str("oci://ghcr.io/grafana/helm-charts/grafana-operator") | ||||
|             .unwrap(), | ||||
|         chart_version: None, | ||||
|         values_overrides: None, | ||||
|         values_yaml: None, | ||||
|         create_namespace: true, | ||||
|         install_only: true, | ||||
|         repository: None, | ||||
|     } | ||||
| } | ||||
							
								
								
									
										11
									
								
								harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | ||||
| pub mod crd_alertmanager_config; | ||||
| pub mod crd_alertmanagers; | ||||
| pub mod crd_default_rules; | ||||
| pub mod crd_grafana; | ||||
| pub mod crd_prometheus_rules; | ||||
| pub mod crd_prometheuses; | ||||
| pub mod grafana_default_dashboard; | ||||
| pub mod grafana_operator; | ||||
| pub mod prometheus_operator; | ||||
| pub mod role; | ||||
| pub mod service_monitor; | ||||
| @ -0,0 +1,22 @@ | ||||
| use std::str::FromStr; | ||||
| 
 | ||||
| use non_blank_string_rs::NonBlankString; | ||||
| 
 | ||||
| use crate::modules::helm::chart::HelmChartScore; | ||||
| 
 | ||||
| pub fn prometheus_operator_helm_chart_score(ns: String) -> HelmChartScore { | ||||
|     HelmChartScore { | ||||
|         namespace: Some(NonBlankString::from_str(&ns).unwrap()), | ||||
|         release_name: NonBlankString::from_str("prometheus-operator").unwrap(), | ||||
|         chart_name: NonBlankString::from_str( | ||||
|             "oci://hub.nationtech.io/harmony/nt-prometheus-operator", | ||||
|         ) | ||||
|         .unwrap(), | ||||
|         chart_version: None, | ||||
|         values_overrides: None, | ||||
|         values_yaml: None, | ||||
|         create_namespace: true, | ||||
|         install_only: true, | ||||
|         repository: None, | ||||
|     } | ||||
| } | ||||
							
								
								
									
										62
									
								
								harmony/src/modules/monitoring/kube_prometheus/crd/role.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								harmony/src/modules/monitoring/kube_prometheus/crd/role.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,62 @@ | ||||
| use k8s_openapi::api::{ | ||||
|     core::v1::ServiceAccount, | ||||
|     rbac::v1::{PolicyRule, Role, RoleBinding, RoleRef, Subject}, | ||||
| }; | ||||
| use kube::api::ObjectMeta; | ||||
| 
 | ||||
| pub fn build_prom_role(role_name: String, namespace: String) -> Role { | ||||
|     Role { | ||||
|         metadata: ObjectMeta { | ||||
|             name: Some(role_name), | ||||
|             namespace: Some(namespace), | ||||
|             ..Default::default() | ||||
|         }, | ||||
|         rules: Some(vec![PolicyRule { | ||||
|             api_groups: Some(vec!["".into()]), // core API group
 | ||||
|             resources: Some(vec!["services".into(), "endpoints".into(), "pods".into()]), | ||||
|             verbs: vec!["get".into(), "list".into(), "watch".into()], | ||||
|             ..Default::default() | ||||
|         }]), | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub fn build_prom_rolebinding( | ||||
|     role_name: String, | ||||
|     namespace: String, | ||||
|     service_account_name: String, | ||||
| ) -> RoleBinding { | ||||
|     RoleBinding { | ||||
|         metadata: ObjectMeta { | ||||
|             name: Some(format!("{}-rolebinding", role_name)), | ||||
|             namespace: Some(namespace.clone()), | ||||
|             ..Default::default() | ||||
|         }, | ||||
|         role_ref: RoleRef { | ||||
|             api_group: "rbac.authorization.k8s.io".into(), | ||||
|             kind: "Role".into(), | ||||
|             name: role_name, | ||||
|         }, | ||||
|         subjects: Some(vec![Subject { | ||||
|             kind: "ServiceAccount".into(), | ||||
|             name: service_account_name, | ||||
|             namespace: Some(namespace.clone()), | ||||
|             ..Default::default() | ||||
|         }]), | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub fn build_prom_service_account( | ||||
|     service_account_name: String, | ||||
|     namespace: String, | ||||
| ) -> ServiceAccount { | ||||
|     ServiceAccount { | ||||
|         automount_service_account_token: None, | ||||
|         image_pull_secrets: None, | ||||
|         metadata: ObjectMeta { | ||||
|             name: Some(service_account_name), | ||||
|             namespace: Some(namespace), | ||||
|             ..Default::default() | ||||
|         }, | ||||
|         secrets: None, | ||||
|     } | ||||
| } | ||||
| @ -0,0 +1,89 @@ | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| 
 | ||||
| use kube::{CustomResource, Resource, api::ObjectMeta}; | ||||
| use schemars::JsonSchema; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| 
 | ||||
| use crate::interpret::InterpretError; | ||||
| 
 | ||||
| use crate::modules::monitoring::kube_prometheus::types::{ | ||||
|     HTTPScheme, MatchExpression, NamespaceSelector, Operator, Selector, | ||||
|     ServiceMonitor as KubeServiceMonitor, ServiceMonitorEndpoint, | ||||
| }; | ||||
| 
 | ||||
| /// This is the top-level struct for the ServiceMonitor Custom Resource.
 | ||||
| /// The `#[derive(CustomResource)]` macro handles all the boilerplate for you,
 | ||||
| /// including the `impl Resource`.
 | ||||
| #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[kube(
 | ||||
|     group = "monitoring.coreos.com", | ||||
|     version = "v1", | ||||
|     kind = "ServiceMonitor", | ||||
|     plural = "servicemonitors", | ||||
|     namespaced | ||||
| )] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct ServiceMonitorSpec { | ||||
|     /// A label selector to select services to monitor.
 | ||||
|     pub selector: Selector, | ||||
| 
 | ||||
|     /// A list of endpoints on the selected services to be monitored.
 | ||||
|     pub endpoints: Vec<ServiceMonitorEndpoint>, | ||||
| 
 | ||||
|     /// Selector to select which namespaces the Kubernetes Endpoints objects
 | ||||
|     /// are discovered from.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub namespace_selector: Option<NamespaceSelector>, | ||||
| 
 | ||||
|     /// The label to use to retrieve the job name from.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub job_label: Option<String>, | ||||
| 
 | ||||
|     /// Pod-based target labels to transfer from the Kubernetes Pod onto the target.
 | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub pod_target_labels: Vec<String>, | ||||
| 
 | ||||
|     /// TargetLabels transfers labels on the Kubernetes Service object to the target.
 | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub target_labels: Vec<String>, | ||||
| } | ||||
| 
 | ||||
| impl Default for ServiceMonitorSpec { | ||||
|     fn default() -> Self { | ||||
|         let mut labels = HashMap::new(); | ||||
|         Self { | ||||
|             selector: Selector { | ||||
|                 match_labels: { labels }, | ||||
|                 match_expressions: vec![MatchExpression { | ||||
|                     key: "app.kubernetes.io/name".into(), | ||||
|                     operator: Operator::Exists, | ||||
|                     values: vec![], | ||||
|                 }], | ||||
|             }, | ||||
|             endpoints: vec![ServiceMonitorEndpoint { | ||||
|                 port: Some("http".to_string()), | ||||
|                 path: Some("/metrics".into()), | ||||
|                 interval: Some("30s".into()), | ||||
|                 scheme: Some(HTTPScheme::HTTP), | ||||
|                 ..Default::default() | ||||
|             }], | ||||
|             namespace_selector: None, // only the same namespace
 | ||||
|             job_label: Some("app".into()), | ||||
|             pod_target_labels: vec![], | ||||
|             target_labels: vec![], | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl From<KubeServiceMonitor> for ServiceMonitorSpec { | ||||
|     fn from(value: KubeServiceMonitor) -> Self { | ||||
|         Self { | ||||
|             selector: value.selector, | ||||
|             endpoints: value.endpoints, | ||||
|             namespace_selector: value.namespace_selector, | ||||
|             job_label: value.job_label, | ||||
|             pod_target_labels: value.pod_target_labels, | ||||
|             target_labels: value.target_labels, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @ -35,7 +35,7 @@ impl KubePrometheusConfig { | ||||
|             windows_monitoring: false, | ||||
|             alert_manager: true, | ||||
|             grafana: true, | ||||
|             node_exporter: false, | ||||
|             node_exporter: true, | ||||
|             prometheus: true, | ||||
|             kubernetes_service_monitors: true, | ||||
|             kubernetes_api_server: true, | ||||
|  | ||||
| @ -12,8 +12,8 @@ use crate::modules::{ | ||||
|     helm::chart::HelmChartScore, | ||||
|     monitoring::kube_prometheus::types::{ | ||||
|         AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig, | ||||
|         AlertManagerRoute, AlertManagerSpec, AlertManagerValues, ConfigReloader, Limits, | ||||
|         PrometheusConfig, Requests, Resources, | ||||
|         AlertManagerConfigSelector, AlertManagerRoute, AlertManagerSpec, AlertManagerValues, | ||||
|         ConfigReloader, Limits, PrometheusConfig, Requests, Resources, | ||||
|     }, | ||||
| }; | ||||
| 
 | ||||
| @ -332,6 +332,11 @@ prometheusOperator: | ||||
|             .push(receiver.channel_receiver.clone()); | ||||
|     } | ||||
| 
 | ||||
|     let mut labels = BTreeMap::new(); | ||||
|     labels.insert("alertmanagerConfig".to_string(), "enabled".to_string()); | ||||
|     let alert_manager_config_selector = AlertManagerConfigSelector { | ||||
|         match_labels: labels, | ||||
|     }; | ||||
|     let alert_manager_values = AlertManagerValues { | ||||
|         alertmanager: AlertManager { | ||||
|             enabled: config.alert_manager, | ||||
| @ -347,6 +352,8 @@ prometheusOperator: | ||||
|                         cpu: "100m".to_string(), | ||||
|                     }, | ||||
|                 }, | ||||
|                 alert_manager_config_selector, | ||||
|                 replicas: 2, | ||||
|             }, | ||||
|             init_config_reloader: ConfigReloader { | ||||
|                 resources: Resources { | ||||
|  | ||||
| @ -1,3 +1,4 @@ | ||||
| pub mod crd; | ||||
| pub mod helm; | ||||
| pub mod helm_prometheus_alert_score; | ||||
| pub mod prometheus; | ||||
|  | ||||
| @ -1,7 +1,8 @@ | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use serde::Serialize; | ||||
| use schemars::JsonSchema; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use serde_yaml::{Mapping, Sequence, Value}; | ||||
| 
 | ||||
| use crate::modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup; | ||||
| @ -55,6 +56,14 @@ pub struct AlertManagerChannelConfig { | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct AlertManagerSpec { | ||||
|     pub(crate) resources: Resources, | ||||
|     pub replicas: u32, | ||||
|     pub alert_manager_config_selector: AlertManagerConfigSelector, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct AlertManagerConfigSelector { | ||||
|     pub match_labels: BTreeMap<String, String>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| @ -86,7 +95,7 @@ pub struct AlertGroup { | ||||
|     pub groups: Vec<AlertManagerRuleGroup>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | ||||
| pub enum HTTPScheme { | ||||
|     #[serde(rename = "http")] | ||||
|     HTTP, | ||||
| @ -94,7 +103,7 @@ pub enum HTTPScheme { | ||||
|     HTTPS, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | ||||
| pub enum Operator { | ||||
|     In, | ||||
|     NotIn, | ||||
| @ -139,74 +148,83 @@ pub struct ServiceMonitorTLSConfig { | ||||
|     pub server_name: Option<String>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct ServiceMonitorEndpoint { | ||||
|     // ## Name of the endpoint's service port
 | ||||
|     // ## Mutually exclusive with targetPort
 | ||||
|     /// Name of the service port this endpoint refers to.
 | ||||
|     pub port: Option<String>, | ||||
| 
 | ||||
|     // ## Name or number of the endpoint's target port
 | ||||
|     // ## Mutually exclusive with port
 | ||||
|     pub target_port: Option<String>, | ||||
| 
 | ||||
|     // ## File containing bearer token to be used when scraping targets
 | ||||
|     // ##
 | ||||
|     pub bearer_token_file: Option<String>, | ||||
| 
 | ||||
|     // ## Interval at which metrics should be scraped
 | ||||
|     // ##
 | ||||
|     /// Interval at which metrics should be scraped.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub interval: Option<String>, | ||||
| 
 | ||||
|     // ## HTTP path to scrape for metrics
 | ||||
|     // ##
 | ||||
|     pub path: String, | ||||
|     /// The HTTP path to scrape for metrics.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub path: Option<String>, | ||||
| 
 | ||||
|     // ## HTTP scheme to use for scraping
 | ||||
|     // ##
 | ||||
|     pub scheme: HTTPScheme, | ||||
|     /// HTTP scheme to use for scraping.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub scheme: Option<HTTPScheme>, | ||||
| 
 | ||||
|     // ## TLS configuration to use when scraping the endpoint
 | ||||
|     // ##
 | ||||
|     pub tls_config: Option<ServiceMonitorTLSConfig>, | ||||
|     /// Relabelings to apply to samples before scraping.
 | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub relabelings: Vec<RelabelConfig>, | ||||
| 
 | ||||
|     // ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
 | ||||
|     // ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
 | ||||
|     // ##
 | ||||
|     // # - action: keep
 | ||||
|     // #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
 | ||||
|     // #   sourceLabels: [__name__]
 | ||||
|     pub metric_relabelings: Vec<Mapping>, | ||||
| 
 | ||||
|     // ## RelabelConfigs to apply to samples before scraping
 | ||||
|     // ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
 | ||||
|     // ##
 | ||||
|     // # - sourceLabels: [__meta_kubernetes_pod_node_name]
 | ||||
|     // #   separator: ;
 | ||||
|     // #   regex: ^(.*)$
 | ||||
|     // #   targetLabel: nodename
 | ||||
|     // #   replacement: $1
 | ||||
|     // #   action: replace
 | ||||
|     pub relabelings: Vec<Mapping>, | ||||
|     /// MetricRelabelings to apply to samples after scraping, but before ingestion.
 | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub metric_relabelings: Vec<RelabelConfig>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct RelabelConfig { | ||||
|     /// The action to perform based on the regex matching.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub action: Option<String>, | ||||
| 
 | ||||
|     /// A list of labels from which to extract values.
 | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub source_labels: Vec<String>, | ||||
| 
 | ||||
|     /// Separator to be used when concatenating source_labels.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub separator: Option<String>, | ||||
| 
 | ||||
|     /// The label to which the resulting value is written.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub target_label: Option<String>, | ||||
| 
 | ||||
|     /// A regular expression to match against the concatenated source label values.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub regex: Option<String>, | ||||
| 
 | ||||
|     /// The replacement value to use.
 | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub replacement: Option<String>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct MatchExpression { | ||||
|     pub key: String, | ||||
|     pub operator: Operator, | ||||
|     pub operator: Operator, // "In", "NotIn", "Exists", "DoesNotExist"
 | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub values: Vec<String>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Selector { | ||||
|     //   # label selector for services
 | ||||
|     /// A map of key-value pairs to match.
 | ||||
|     #[serde(default, skip_serializing_if = "HashMap::is_empty")] | ||||
|     pub match_labels: HashMap<String, String>, | ||||
| 
 | ||||
|     /// A list of label selector requirements.
 | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub match_expressions: Vec<MatchExpression>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct ServiceMonitor { | ||||
|     pub name: String, | ||||
| @ -250,10 +268,15 @@ pub struct ServiceMonitor { | ||||
|     pub fallback_scrape_protocol: Option<String>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Serialize, Clone)] | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct NamespaceSelector { | ||||
|     /// Select all namespaces.
 | ||||
|     #[serde(default, skip_serializing_if = "std::ops::Not::not")] | ||||
|     pub any: bool, | ||||
| 
 | ||||
|     /// List of namespace names to select from.
 | ||||
|     #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||||
|     pub match_names: Vec<String>, | ||||
| } | ||||
| 
 | ||||
| @ -275,19 +298,3 @@ impl Default for ServiceMonitor { | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Default for ServiceMonitorEndpoint { | ||||
|     fn default() -> Self { | ||||
|         Self { | ||||
|             port: Some("80".to_string()), | ||||
|             target_port: Default::default(), | ||||
|             bearer_token_file: Default::default(), | ||||
|             interval: Default::default(), | ||||
|             path: "/metrics".to_string(), | ||||
|             scheme: HTTPScheme::HTTP, | ||||
|             tls_config: Default::default(), | ||||
|             metric_relabelings: Default::default(), | ||||
|             relabelings: Default::default(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -58,6 +58,7 @@ config: | ||||
|     # web-root: "disable" | ||||
|     enable-signup: false | ||||
|     enable-login: "true" | ||||
|     enable-metrics: "true" | ||||
| 
 | ||||
| persistence: | ||||
|   enabled: true | ||||
|  | ||||
							
								
								
									
										23
									
								
								harmony/src/modules/prometheus/alerts/k8s/deployment.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								harmony/src/modules/prometheus/alerts/k8s/deployment.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | ||||
| use std::collections::HashMap; | ||||
| 
 | ||||
| use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; | ||||
| 
 | ||||
| pub fn alert_deployment_unavailable() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule { | ||||
|         alert: "DeploymentUnavailable".into(), | ||||
|         expr: "kube_deployment_status_replicas_unavailable > 0".into(), | ||||
|         r#for: Some("2m".into()), | ||||
|         labels: HashMap::from([("severity".into(), "warning".into())]), | ||||
|         annotations: HashMap::from([ | ||||
|             ( | ||||
|                 "summary".into(), | ||||
|                 "Deployment has unavailable replicas".into(), | ||||
|             ), | ||||
|             ( | ||||
|                 "description".into(), | ||||
|                 "A deployment in this namespace has unavailable replicas for over 2 minutes." | ||||
|                     .into(), | ||||
|             ), | ||||
|         ]), | ||||
|     } | ||||
| } | ||||
							
								
								
									
										37
									
								
								harmony/src/modules/prometheus/alerts/k8s/memory_usage.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								harmony/src/modules/prometheus/alerts/k8s/memory_usage.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,37 @@ | ||||
| use std::collections::HashMap; | ||||
| 
 | ||||
| use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; | ||||
| 
 | ||||
| pub fn alert_high_memory_usage() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule { | ||||
|         alert: "HighMemoryUsage".into(), | ||||
|         expr: "container_memory_working_set_bytes{container!=\"\",namespace!=\"\"} > 500000000" | ||||
|             .into(), | ||||
|         r#for: Some("2m".into()), | ||||
|         labels: HashMap::from([("severity".into(), "warning".into())]), | ||||
|         annotations: HashMap::from([ | ||||
|             ("summary".into(), "Pod is using high memory".into()), | ||||
|             ( | ||||
|                 "description".into(), | ||||
|                 "A pod is consuming more than 500Mi of memory.".into(), | ||||
|             ), | ||||
|         ]), | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub fn alert_high_cpu_usage() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule { | ||||
|         alert: "HighCPUUsage".into(), | ||||
|         expr: "rate(container_cpu_usage_seconds_total{container!=\"\",namespace!=\"\"}[1m]) > 0.9" | ||||
|             .into(), | ||||
|         r#for: Some("1m".into()), | ||||
|         labels: HashMap::from([("severity".into(), "warning".into())]), | ||||
|         annotations: HashMap::from([ | ||||
|             ("summary".into(), "Pod is using high CPU".into()), | ||||
|             ( | ||||
|                 "description".into(), | ||||
|                 "A pod is using more than 90% of a core over 1 minute.".into(), | ||||
|             ), | ||||
|         ]), | ||||
|     } | ||||
| } | ||||
| @ -1 +1,5 @@ | ||||
| pub mod deployment; | ||||
| pub mod memory_usage; | ||||
| pub mod pod; | ||||
| pub mod pvc; | ||||
| pub mod service; | ||||
|  | ||||
							
								
								
									
										55
									
								
								harmony/src/modules/prometheus/alerts/k8s/pod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								harmony/src/modules/prometheus/alerts/k8s/pod.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,55 @@ | ||||
| use std::collections::HashMap; | ||||
| 
 | ||||
| use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; | ||||
| 
 | ||||
| pub fn pod_failed() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule { | ||||
|         alert: "PodFailed".into(), | ||||
|         expr: "kube_pod_status_phase{phase=\"Failed\"} > 2".into(), | ||||
|         r#for: Some("2m".into()), | ||||
|         labels: HashMap::from([("severity".into(), "critical".into())]), | ||||
|         annotations: HashMap::from([ | ||||
|             ("summary".into(), "A pod has failed".into()), | ||||
|             ( | ||||
|                 "description".into(), | ||||
|                 "One or more pods are in Failed phase.".into(), | ||||
|             ), | ||||
|         ]), | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub fn alert_container_restarting() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule { | ||||
|         alert: "ContainerRestarting".into(), | ||||
|         expr: "increase(kube_pod_container_status_restarts_total[5m]) > 3".into(), | ||||
|         r#for: Some("5m".into()), | ||||
|         labels: HashMap::from([("severity".into(), "warning".into())]), | ||||
|         annotations: HashMap::from([ | ||||
|             ( | ||||
|                 "summary".into(), | ||||
|                 "Container is restarting frequently".into(), | ||||
|             ), | ||||
|             ( | ||||
|                 "description".into(), | ||||
|                 "A container in this namespace has restarted more than 3 times in 5 minutes." | ||||
|                     .into(), | ||||
|             ), | ||||
|         ]), | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub fn alert_pod_not_ready() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule { | ||||
|         alert: "PodNotReady".into(), | ||||
|         expr: "kube_pod_status_ready{condition=\"true\"} == 0".into(), | ||||
|         r#for: Some("2m".into()), | ||||
|         labels: HashMap::from([("severity".into(), "warning".into())]), | ||||
|         annotations: HashMap::from([ | ||||
|             ("summary".into(), "Pod is not ready".into()), | ||||
|             ( | ||||
|                 "description".into(), | ||||
|                 "A pod in the namespace is not reporting Ready status.".into(), | ||||
|             ), | ||||
|         ]), | ||||
|     } | ||||
| } | ||||
							
								
								
									
										19
									
								
								harmony/src/modules/prometheus/alerts/k8s/service.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								harmony/src/modules/prometheus/alerts/k8s/service.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | ||||
| use std::collections::HashMap; | ||||
| 
 | ||||
| use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; | ||||
| 
 | ||||
| pub fn alert_service_down() -> PrometheusAlertRule { | ||||
|     PrometheusAlertRule { | ||||
|         alert: "ServiceDown".into(), | ||||
|         expr: "up == 0".into(), | ||||
|         r#for: Some("1m".into()), | ||||
|         labels: HashMap::from([("severity".into(), "critical".into())]), | ||||
|         annotations: HashMap::from([ | ||||
|             ("summary".into(), "Service is down".into()), | ||||
|             ( | ||||
|                 "description".into(), | ||||
|                 "A target service in the namespace is not responding to Prometheus scrapes.".into(), | ||||
|             ), | ||||
|         ]), | ||||
|     } | ||||
| } | ||||
							
								
								
									
										569
									
								
								harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										569
									
								
								harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,569 @@ | ||||
| use std::fs; | ||||
| use std::{collections::BTreeMap, sync::Arc}; | ||||
| use tempfile::tempdir; | ||||
| 
 | ||||
| use async_trait::async_trait; | ||||
| use kube::api::ObjectMeta; | ||||
| use log::{debug, info}; | ||||
| use serde::Serialize; | ||||
| use std::process::Command; | ||||
| 
 | ||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus; | ||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules; | ||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_grafana::{ | ||||
|     Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig, | ||||
|     GrafanaDatasourceSpec, GrafanaSpec, | ||||
| }; | ||||
| use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{ | ||||
|     PrometheusRule, PrometheusRuleSpec, RuleGroup, | ||||
| }; | ||||
| use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard; | ||||
| use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{ | ||||
|     ServiceMonitor, ServiceMonitorSpec, | ||||
| }; | ||||
| use crate::topology::oberservability::monitoring::AlertReceiver; | ||||
| use crate::topology::{K8sclient, Topology, k8s::K8sClient}; | ||||
| use crate::{ | ||||
|     data::{Id, Version}, | ||||
|     interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, | ||||
|     inventory::Inventory, | ||||
|     modules::monitoring::kube_prometheus::crd::{ | ||||
|         crd_alertmanagers::{Alertmanager, AlertmanagerSpec}, | ||||
|         crd_prometheuses::{ | ||||
|             AlertmanagerEndpoints, LabelSelector, Prometheus, PrometheusSpec, | ||||
|             PrometheusSpecAlerting, | ||||
|         }, | ||||
|         role::{build_prom_role, build_prom_rolebinding, build_prom_service_account}, | ||||
|     }, | ||||
|     score::Score, | ||||
| }; | ||||
| 
 | ||||
| use super::prometheus::PrometheusApplicationMonitoring; | ||||
| 
 | ||||
| #[derive(Clone, Debug, Serialize)] | ||||
| pub struct K8sPrometheusCRDAlertingScore { | ||||
|     pub sender: CRDPrometheus, | ||||
|     pub receivers: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>, | ||||
|     pub service_monitors: Vec<ServiceMonitor>, | ||||
|     pub prometheus_rules: Vec<RuleGroup>, | ||||
| } | ||||
| 
 | ||||
| impl<T: Topology + K8sclient + PrometheusApplicationMonitoring<CRDPrometheus>> Score<T> | ||||
|     for K8sPrometheusCRDAlertingScore | ||||
| { | ||||
|     fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> { | ||||
|         Box::new(K8sPrometheusCRDAlertingInterpret { | ||||
|             sender: self.sender.clone(), | ||||
|             receivers: self.receivers.clone(), | ||||
|             service_monitors: self.service_monitors.clone(), | ||||
|             prometheus_rules: self.prometheus_rules.clone(), | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     fn name(&self) -> String { | ||||
|         "CRDApplicationAlertingScore".into() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone, Debug)] | ||||
| pub struct K8sPrometheusCRDAlertingInterpret { | ||||
|     pub sender: CRDPrometheus, | ||||
|     pub receivers: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>, | ||||
|     pub service_monitors: Vec<ServiceMonitor>, | ||||
|     pub prometheus_rules: Vec<RuleGroup>, | ||||
| } | ||||
| 
 | ||||
| #[async_trait] | ||||
| impl<T: Topology + K8sclient + PrometheusApplicationMonitoring<CRDPrometheus>> Interpret<T> | ||||
|     for K8sPrometheusCRDAlertingInterpret | ||||
| { | ||||
|     async fn execute( | ||||
|         &self, | ||||
|         _inventory: &Inventory, | ||||
|         topology: &T, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         let client = topology.k8s_client().await.unwrap(); | ||||
|         self.ensure_grafana_operator().await?; | ||||
|         self.install_prometheus(&client).await?; | ||||
|         self.install_alert_manager(&client).await?; | ||||
|         self.install_client_kube_metrics().await?; | ||||
|         self.install_grafana(&client).await?; | ||||
|         self.install_receivers(&self.sender, &self.receivers) | ||||
|             .await?; | ||||
|         self.install_rules(&self.prometheus_rules, &client).await?; | ||||
|         self.install_monitors(self.service_monitors.clone(), &client) | ||||
|             .await?; | ||||
|         Ok(Outcome::success(format!( | ||||
|             "deployed application monitoring composants" | ||||
|         ))) | ||||
|     } | ||||
| 
 | ||||
|     fn get_name(&self) -> InterpretName { | ||||
|         todo!() | ||||
|     } | ||||
| 
 | ||||
|     fn get_version(&self) -> Version { | ||||
|         todo!() | ||||
|     } | ||||
| 
 | ||||
|     fn get_status(&self) -> InterpretStatus { | ||||
|         todo!() | ||||
|     } | ||||
| 
 | ||||
|     fn get_children(&self) -> Vec<Id> { | ||||
|         todo!() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl K8sPrometheusCRDAlertingInterpret { | ||||
|     async fn crd_exists(&self, crd: &str) -> bool { | ||||
|         let status = Command::new("sh") | ||||
|             .args(["-c", "kubectl get crd -A | grep -i", crd]) | ||||
|             .status() | ||||
|             .map_err(|e| InterpretError::new(format!("could not connect to cluster: {}", e))) | ||||
|             .unwrap(); | ||||
| 
 | ||||
|         status.success() | ||||
|     } | ||||
| 
 | ||||
|     async fn install_chart( | ||||
|         &self, | ||||
|         chart_path: String, | ||||
|         chart_name: String, | ||||
|     ) -> Result<(), InterpretError> { | ||||
|         let temp_dir = | ||||
|             tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?; | ||||
|         let temp_path = temp_dir.path().to_path_buf(); | ||||
|         debug!("Using temp directory: {}", temp_path.display()); | ||||
|         let chart = format!("{}/{}", chart_path, chart_name); | ||||
|         let pull_output = Command::new("helm") | ||||
|             .args(["pull", &chart, "--destination", temp_path.to_str().unwrap()]) | ||||
|             .output() | ||||
|             .map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?; | ||||
| 
 | ||||
|         if !pull_output.status.success() { | ||||
|             return Err(InterpretError::new(format!( | ||||
|                 "Helm pull failed: {}", | ||||
|                 String::from_utf8_lossy(&pull_output.stderr) | ||||
|             ))); | ||||
|         } | ||||
| 
 | ||||
|         let tgz_path = fs::read_dir(&temp_path) | ||||
|             .unwrap() | ||||
|             .filter_map(|entry| { | ||||
|                 let entry = entry.ok()?; | ||||
|                 let path = entry.path(); | ||||
|                 if path.extension()? == "tgz" { | ||||
|                     Some(path) | ||||
|                 } else { | ||||
|                     None | ||||
|                 } | ||||
|             }) | ||||
|             .next() | ||||
|             .ok_or_else(|| InterpretError::new("Could not find pulled Helm chart".into()))?; | ||||
| 
 | ||||
|         debug!("Installing chart from: {}", tgz_path.display()); | ||||
| 
 | ||||
|         let install_output = Command::new("helm") | ||||
|             .args([ | ||||
|                 "install", | ||||
|                 &chart_name, | ||||
|                 tgz_path.to_str().unwrap(), | ||||
|                 "--namespace", | ||||
|                 &self.sender.namespace.clone(), | ||||
|                 "--create-namespace", | ||||
|                 "--wait", | ||||
|                 "--atomic", | ||||
|             ]) | ||||
|             .output() | ||||
|             .map_err(|e| InterpretError::new(format!("Helm install error: {}", e)))?; | ||||
| 
 | ||||
|         if !install_output.status.success() { | ||||
|             return Err(InterpretError::new(format!( | ||||
|                 "Helm install failed: {}", | ||||
|                 String::from_utf8_lossy(&install_output.stderr) | ||||
|             ))); | ||||
|         } | ||||
| 
 | ||||
|         debug!( | ||||
|             "Installed chart {}/{} in namespace: {}", | ||||
|             &chart_path, | ||||
|             &chart_name, | ||||
|             self.sender.namespace.clone() | ||||
|         ); | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> { | ||||
|         if self.crd_exists("grafanas.grafana.integreatly.org").await { | ||||
|             debug!("grafana CRDs already exist — skipping install."); | ||||
|             return Ok(Outcome::success("Grafana CRDs already exist".to_string())); | ||||
|         } | ||||
| 
 | ||||
|         let _ = Command::new("helm") | ||||
|             .args([ | ||||
|                 "repo", | ||||
|                 "add", | ||||
|                 "grafana-operator", | ||||
|                 "https://grafana.github.io/helm-charts", | ||||
|             ]) | ||||
|             .output() | ||||
|             .unwrap(); | ||||
| 
 | ||||
|         let _ = Command::new("helm") | ||||
|             .args(["repo", "update"]) | ||||
|             .output() | ||||
|             .unwrap(); | ||||
| 
 | ||||
|         let output = Command::new("helm") | ||||
|             .args([ | ||||
|                 "install", | ||||
|                 "grafana-operator", | ||||
|                 "grafana-operator/grafana-operator", | ||||
|                 "--namespace", | ||||
|                 &self.sender.namespace.clone(), | ||||
|                 "--create-namespace", | ||||
|                 "--set", | ||||
|                 "namespaceScope=true", | ||||
|             ]) | ||||
|             .output() | ||||
|             .unwrap(); | ||||
| 
 | ||||
|         if !output.status.success() { | ||||
|             return Err(InterpretError::new(format!( | ||||
|                 "helm install failed:\nstdout: {}\nstderr: {}", | ||||
|                 String::from_utf8_lossy(&output.stdout), | ||||
|                 String::from_utf8_lossy(&output.stderr) | ||||
|             ))); | ||||
|         } | ||||
| 
 | ||||
|         Ok(Outcome::success(format!( | ||||
|             "installed grafana operator in ns {}", | ||||
|             self.sender.namespace.clone() | ||||
|         ))) | ||||
|     } | ||||
| 
 | ||||
|     async fn install_prometheus(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> { | ||||
|         debug!( | ||||
|             "installing crd-prometheuses in namespace {}", | ||||
|             self.sender.namespace.clone() | ||||
|         ); | ||||
|         debug!("building role/rolebinding/serviceaccount for crd-prometheus"); | ||||
|         let rolename = format!("{}-prom", self.sender.namespace.clone()); | ||||
|         let sa_name = format!("{}-prom-sa", self.sender.namespace.clone()); | ||||
|         let role = build_prom_role(rolename.clone(), self.sender.namespace.clone()); | ||||
|         let rolebinding = build_prom_rolebinding( | ||||
|             rolename.clone(), | ||||
|             self.sender.namespace.clone(), | ||||
|             sa_name.clone(), | ||||
|         ); | ||||
|         let sa = build_prom_service_account(sa_name.clone(), self.sender.namespace.clone()); | ||||
|         let prom_spec = PrometheusSpec { | ||||
|             alerting: Some(PrometheusSpecAlerting { | ||||
|                 alertmanagers: Some(vec![AlertmanagerEndpoints { | ||||
|                     name: Some("alertmanager-operated".into()), | ||||
|                     namespace: Some(self.sender.namespace.clone()), | ||||
|                     port: Some("web".into()), | ||||
|                     scheme: Some("http".into()), | ||||
|                 }]), | ||||
|             }), | ||||
|             service_account_name: sa_name.clone(), | ||||
|             service_monitor_namespace_selector: Some(LabelSelector { | ||||
|                 match_labels: BTreeMap::from([( | ||||
|                     "kubernetes.io/metadata.name".to_string(), | ||||
|                     self.sender.namespace.clone(), | ||||
|                 )]), | ||||
|                 match_expressions: vec![], | ||||
|             }), | ||||
|             service_monitor_selector: Some(LabelSelector { | ||||
|                 match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]), | ||||
|                 ..Default::default() | ||||
|             }), | ||||
| 
 | ||||
|             service_discovery_role: Some("Endpoints".into()), | ||||
| 
 | ||||
|             pod_monitor_selector: Some(LabelSelector { | ||||
|                 match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]), | ||||
|                 ..Default::default() | ||||
|             }), | ||||
| 
 | ||||
|             rule_selector: Some(LabelSelector { | ||||
|                 match_labels: BTreeMap::from([("role".to_string(), "prometheus-rule".to_string())]), | ||||
|                 ..Default::default() | ||||
|             }), | ||||
| 
 | ||||
|             rule_namespace_selector: Some(LabelSelector { | ||||
|                 match_labels: BTreeMap::from([( | ||||
|                     "kubernetes.io/metadata.name".to_string(), | ||||
|                     self.sender.namespace.clone(), | ||||
|                 )]), | ||||
|                 match_expressions: vec![], | ||||
|             }), | ||||
|         }; | ||||
|         let prom = Prometheus { | ||||
|             metadata: ObjectMeta { | ||||
|                 name: Some(self.sender.namespace.clone()), | ||||
|                 labels: Some(std::collections::BTreeMap::from([ | ||||
|                     ("alertmanagerConfig".to_string(), "enabled".to_string()), | ||||
|                     ("client".to_string(), "prometheus".to_string()), | ||||
|                 ])), | ||||
|                 namespace: Some(self.sender.namespace.clone()), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             spec: prom_spec, | ||||
|         }; | ||||
|         client | ||||
|             .apply(&role, Some(&self.sender.namespace.clone())) | ||||
|             .await | ||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
|         info!( | ||||
|             "installed prometheus role: {:#?} in ns {:#?}", | ||||
|             role.metadata.name.unwrap(), | ||||
|             role.metadata.namespace.unwrap() | ||||
|         ); | ||||
|         client | ||||
|             .apply(&rolebinding, Some(&self.sender.namespace.clone())) | ||||
|             .await | ||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
|         info!( | ||||
|             "installed prometheus rolebinding: {:#?} in ns {:#?}", | ||||
|             rolebinding.metadata.name.unwrap(), | ||||
|             rolebinding.metadata.namespace.unwrap() | ||||
|         ); | ||||
|         client | ||||
|             .apply(&sa, Some(&self.sender.namespace.clone())) | ||||
|             .await | ||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
|         info!( | ||||
|             "installed prometheus service account: {:#?} in ns {:#?}", | ||||
|             sa.metadata.name.unwrap(), | ||||
|             sa.metadata.namespace.unwrap() | ||||
|         ); | ||||
|         client | ||||
|             .apply(&prom, Some(&self.sender.namespace.clone())) | ||||
|             .await | ||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
|         info!( | ||||
|             "installed prometheus: {:#?} in ns {:#?}", | ||||
|             &prom.metadata.name.clone().unwrap(), | ||||
|             &prom.metadata.namespace.clone().unwrap() | ||||
|         ); | ||||
| 
 | ||||
|         Ok(Outcome::success(format!( | ||||
|             "successfully deployed crd-prometheus {:#?}", | ||||
|             prom | ||||
|         ))) | ||||
|     } | ||||
| 
 | ||||
|     async fn install_alert_manager( | ||||
|         &self, | ||||
|         client: &Arc<K8sClient>, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         let am = Alertmanager { | ||||
|             metadata: ObjectMeta { | ||||
|                 name: Some(self.sender.namespace.clone()), | ||||
|                 labels: Some(std::collections::BTreeMap::from([( | ||||
|                     "alertmanagerConfig".to_string(), | ||||
|                     "enabled".to_string(), | ||||
|                 )])), | ||||
|                 namespace: Some(self.sender.namespace.clone()), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             spec: AlertmanagerSpec::default(), | ||||
|         }; | ||||
|         client | ||||
|             .apply(&am, Some(&self.sender.namespace.clone())) | ||||
|             .await | ||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
|         Ok(Outcome::success(format!( | ||||
|             "successfully deployed service monitor {:#?}", | ||||
|             am.metadata.name | ||||
|         ))) | ||||
|     } | ||||
|     async fn install_monitors( | ||||
|         &self, | ||||
|         mut monitors: Vec<ServiceMonitor>, | ||||
|         client: &Arc<K8sClient>, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         let default_service_monitor = ServiceMonitor { | ||||
|             metadata: ObjectMeta { | ||||
|                 name: Some(self.sender.namespace.clone()), | ||||
|                 labels: Some(std::collections::BTreeMap::from([ | ||||
|                     ("alertmanagerConfig".to_string(), "enabled".to_string()), | ||||
|                     ("client".to_string(), "prometheus".to_string()), | ||||
|                     ( | ||||
|                         "app.kubernetes.io/name".to_string(), | ||||
|                         "kube-state-metrics".to_string(), | ||||
|                     ), | ||||
|                 ])), | ||||
|                 namespace: Some(self.sender.namespace.clone()), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             spec: ServiceMonitorSpec::default(), | ||||
|         }; | ||||
|         monitors.push(default_service_monitor); | ||||
|         for monitor in monitors.iter() { | ||||
|             client | ||||
|                 .apply(monitor, Some(&self.sender.namespace.clone())) | ||||
|                 .await | ||||
|                 .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
|         } | ||||
|         Ok(Outcome::success( | ||||
|             "succesfully deployed service monitors".to_string(), | ||||
|         )) | ||||
|     } | ||||
| 
 | ||||
|     async fn install_rules( | ||||
|         &self, | ||||
|         rules: &Vec<RuleGroup>, | ||||
|         client: &Arc<K8sClient>, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         let mut prom_rule_spec = PrometheusRuleSpec { | ||||
|             groups: rules.clone(), | ||||
|         }; | ||||
| 
 | ||||
|         let default_rules_group = RuleGroup { | ||||
|             name: format!("default-rules"), | ||||
|             rules: build_default_application_rules(), | ||||
|         }; | ||||
| 
 | ||||
|         prom_rule_spec.groups.push(default_rules_group); | ||||
|         let prom_rules = PrometheusRule { | ||||
|             metadata: ObjectMeta { | ||||
|                 name: Some(self.sender.namespace.clone()), | ||||
|                 labels: Some(std::collections::BTreeMap::from([ | ||||
|                     ("alertmanagerConfig".to_string(), "enabled".to_string()), | ||||
|                     ("role".to_string(), "prometheus-rule".to_string()), | ||||
|                 ])), | ||||
|                 namespace: Some(self.sender.namespace.clone()), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             spec: prom_rule_spec, | ||||
|         }; | ||||
|         client | ||||
|             .apply(&prom_rules, Some(&self.sender.namespace.clone())) | ||||
|             .await | ||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
|         Ok(Outcome::success(format!( | ||||
|             "successfully deployed rules {:#?}", | ||||
|             prom_rules.metadata.name | ||||
|         ))) | ||||
|     } | ||||
| 
 | ||||
|     async fn install_client_kube_metrics(&self) -> Result<Outcome, InterpretError> { | ||||
|         self.install_chart( | ||||
|             "oci://hub.nationtech.io/harmony".to_string(), | ||||
|             "nt-kube-metrics".to_string(), | ||||
|         ) | ||||
|         .await?; | ||||
|         Ok(Outcome::success(format!( | ||||
|             "Installed client kube metrics in ns {}", | ||||
|             &self.sender.namespace.clone() | ||||
|         ))) | ||||
|     } | ||||
| 
 | ||||
|     async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> { | ||||
|         let mut label = BTreeMap::new(); | ||||
|         label.insert("dashboards".to_string(), "grafana".to_string()); | ||||
|         let labels = LabelSelector { | ||||
|             match_labels: label.clone(), | ||||
|             match_expressions: vec![], | ||||
|         }; | ||||
|         let mut json_data = BTreeMap::new(); | ||||
|         json_data.insert("timeInterval".to_string(), "5s".to_string()); | ||||
|         let namespace = self.sender.namespace.clone(); | ||||
| 
 | ||||
|         let json = build_default_dashboard(&namespace); | ||||
| 
 | ||||
|         let graf_data_source = GrafanaDatasource { | ||||
|             metadata: ObjectMeta { | ||||
|                 name: Some(format!( | ||||
|                     "grafana-datasource-{}", | ||||
|                     self.sender.namespace.clone() | ||||
|                 )), | ||||
|                 namespace: Some(self.sender.namespace.clone()), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             spec: GrafanaDatasourceSpec { | ||||
|                 instance_selector: labels.clone(), | ||||
|                 allow_cross_namespace_import: Some(false), | ||||
|                 datasource: GrafanaDatasourceConfig { | ||||
|                     access: "proxy".to_string(), | ||||
|                     database: Some("prometheus".to_string()), | ||||
|                     json_data: Some(json_data), | ||||
|                     //this is fragile
 | ||||
|                     name: format!("prometheus-{}-0", self.sender.namespace.clone()), | ||||
|                     r#type: "prometheus".to_string(), | ||||
|                     url: format!( | ||||
|                         "http://prometheus-operated.{}.svc.cluster.local:9090", | ||||
|                         self.sender.namespace.clone() | ||||
|                     ), | ||||
|                 }, | ||||
|             }, | ||||
|         }; | ||||
| 
 | ||||
|         client | ||||
|             .apply(&graf_data_source, Some(&self.sender.namespace.clone())) | ||||
|             .await | ||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
| 
 | ||||
|         let graf_dashboard = GrafanaDashboard { | ||||
|             metadata: ObjectMeta { | ||||
|                 name: Some(format!( | ||||
|                     "grafana-dashboard-{}", | ||||
|                     self.sender.namespace.clone() | ||||
|                 )), | ||||
|                 namespace: Some(self.sender.namespace.clone()), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             spec: GrafanaDashboardSpec { | ||||
|                 resync_period: Some("30s".to_string()), | ||||
|                 instance_selector: labels.clone(), | ||||
|                 json, | ||||
|             }, | ||||
|         }; | ||||
| 
 | ||||
|         client | ||||
|             .apply(&graf_dashboard, Some(&self.sender.namespace.clone())) | ||||
|             .await | ||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
| 
 | ||||
|         let grafana = Grafana { | ||||
|             metadata: ObjectMeta { | ||||
|                 name: Some(format!("grafana-{}", self.sender.namespace.clone())), | ||||
|                 namespace: Some(self.sender.namespace.clone()), | ||||
|                 labels: Some(label.clone()), | ||||
|                 ..Default::default() | ||||
|             }, | ||||
|             spec: GrafanaSpec { | ||||
|                 config: None, | ||||
|                 admin_user: None, | ||||
|                 admin_password: None, | ||||
|                 ingress: None, | ||||
|                 persistence: None, | ||||
|                 resources: None, | ||||
|             }, | ||||
|         }; | ||||
|         client | ||||
|             .apply(&grafana, Some(&self.sender.namespace.clone())) | ||||
|             .await | ||||
|             .map_err(|e| InterpretError::new(e.to_string()))?; | ||||
|         Ok(Outcome::success(format!( | ||||
|             "successfully deployed grafana instance {:#?}", | ||||
|             grafana.metadata.name | ||||
|         ))) | ||||
|     } | ||||
| 
 | ||||
|     async fn install_receivers( | ||||
|         &self, | ||||
|         sender: &CRDPrometheus, | ||||
|         receivers: &Vec<Box<dyn AlertReceiver<CRDPrometheus>>>, | ||||
|     ) -> Result<Outcome, InterpretError> { | ||||
|         for receiver in receivers.iter() { | ||||
|             receiver.install(sender).await.map_err(|err| { | ||||
|                 InterpretError::new(format!("failed to install receiver: {}", err)) | ||||
|             })?; | ||||
|         } | ||||
|         Ok(Outcome::success("successfully deployed receivers".into())) | ||||
|     } | ||||
| } | ||||
| @ -1 +1,3 @@ | ||||
| pub mod alerts; | ||||
| pub mod k8s_prometheus_alerting_score; | ||||
| pub mod prometheus; | ||||
|  | ||||
							
								
								
									
										17
									
								
								harmony/src/modules/prometheus/prometheus.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								harmony/src/modules/prometheus/prometheus.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | ||||
| use async_trait::async_trait; | ||||
| 
 | ||||
| use crate::{ | ||||
|     interpret::{InterpretError, Outcome}, | ||||
|     inventory::Inventory, | ||||
|     topology::oberservability::monitoring::{AlertReceiver, AlertSender}, | ||||
| }; | ||||
| 
 | ||||
| #[async_trait] | ||||
| pub trait PrometheusApplicationMonitoring<S: AlertSender> { | ||||
|     async fn install_prometheus( | ||||
|         &self, | ||||
|         sender: &S, | ||||
|         inventory: &Inventory, | ||||
|         receivers: Option<Vec<Box<dyn AlertReceiver<S>>>>, | ||||
|     ) -> Result<Outcome, InterpretError>; | ||||
| } | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user