diff --git a/examples/monitoring/src/main.rs b/examples/monitoring/src/main.rs index 7037094d..d1304db7 100644 --- a/examples/monitoring/src/main.rs +++ b/examples/monitoring/src/main.rs @@ -1,37 +1,45 @@ -use std::collections::HashMap; +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, +}; use harmony::{ inventory::Inventory, - modules::{ - monitoring::{ - alert_channel::discord_alert_channel::DiscordWebhook, - alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, - kube_prometheus::{ - helm_prometheus_alert_score::HelmPrometheusAlertingScore, - types::{ - HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor, - ServiceMonitorEndpoint, + modules::monitoring::{ + alert_channel::discord_alert_channel::DiscordReceiver, + alert_rule::{ + alerts::{ + infra::dell_server::{ + alert_global_storage_status_critical, + alert_global_storage_status_non_recoverable, + global_storage_status_degraded_non_critical, }, + k8s::pvc::high_pvc_fill_rate_over_two_days, }, + prometheus_alert_rule::AlertManagerRuleGroup, }, - prometheus::alerts::{ - infra::dell_server::{ - alert_global_storage_status_critical, alert_global_storage_status_non_recoverable, - global_storage_status_degraded_non_critical, + kube_prometheus::{ + helm::config::KubePrometheusConfig, + kube_prometheus_alerting_score::KubePrometheusAlertingScore, + types::{ + HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor, + ServiceMonitorEndpoint, }, - k8s::pvc::high_pvc_fill_rate_over_two_days, }, }, - topology::K8sAnywhereTopology, + topology::{K8sAnywhereTopology, monitoring::AlertRoute}, }; use harmony_types::{k8s_name::K8sName, net::Url}; #[tokio::main] async fn main() { - let discord_receiver = DiscordWebhook { - name: K8sName("test-discord".to_string()), + let receiver_name = "test-discord".to_string(); + let discord_receiver = DiscordReceiver { + name: receiver_name.clone(), url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()), - selectors: vec![], + route: AlertRoute { + ..AlertRoute::default(receiver_name) + }, }; let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days(); @@ -70,10 +78,15 @@ async fn main() { endpoints: vec![service_monitor_endpoint], ..Default::default() }; - let alerting_score = HelmPrometheusAlertingScore { + + let config = Arc::new(Mutex::new(KubePrometheusConfig::new())); + + let alerting_score = KubePrometheusAlertingScore { receivers: vec![Box::new(discord_receiver)], rules: vec![Box::new(additional_rules), Box::new(additional_rules2)], service_monitors: vec![service_monitor], + scrape_targets: None, + config, }; harmony_cli::run( diff --git a/examples/monitoring_with_tenant/src/main.rs b/examples/monitoring_with_tenant/src/main.rs index f67f9d8a..6afc302c 100644 --- a/examples/monitoring_with_tenant/src/main.rs +++ b/examples/monitoring_with_tenant/src/main.rs @@ -1,24 +1,32 @@ -use std::{collections::HashMap, str::FromStr}; +use std::{ + collections::HashMap, + str::FromStr, + sync::{Arc, Mutex}, +}; use harmony::{ inventory::Inventory, modules::{ monitoring::{ - alert_channel::discord_alert_channel::DiscordWebhook, - alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, + alert_channel::discord_alert_channel::DiscordReceiver, + alert_rule::{ + alerts::k8s::pvc::high_pvc_fill_rate_over_two_days, + prometheus_alert_rule::AlertManagerRuleGroup, + }, kube_prometheus::{ - helm_prometheus_alert_score::HelmPrometheusAlertingScore, + helm::config::KubePrometheusConfig, + kube_prometheus_alerting_score::KubePrometheusAlertingScore, types::{ HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor, ServiceMonitorEndpoint, }, }, }, - prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days, tenant::TenantScore, }, topology::{ K8sAnywhereTopology, + monitoring::AlertRoute, tenant::{ResourceLimits, TenantConfig, TenantNetworkPolicy}, }, }; @@ -42,10 +50,13 @@ async fn main() { }, }; - let discord_receiver = DiscordWebhook { - name: K8sName("test-discord".to_string()), + let receiver_name = "test-discord".to_string(); + let discord_receiver = DiscordReceiver { + name: receiver_name.clone(), url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()), - selectors: vec![], + route: AlertRoute { + ..AlertRoute::default(receiver_name) + }, }; let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days(); @@ -74,10 +85,14 @@ async fn main() { ..Default::default() }; - let alerting_score = HelmPrometheusAlertingScore { + let config = Arc::new(Mutex::new(KubePrometheusConfig::new())); + + let alerting_score = KubePrometheusAlertingScore { receivers: vec![Box::new(discord_receiver)], rules: vec![Box::new(additional_rules)], service_monitors: vec![service_monitor], + scrape_targets: None, + config, }; harmony_cli::run( diff --git a/examples/okd_cluster_alerts/src/main.rs b/examples/okd_cluster_alerts/src/main.rs index 93dac3b4..f83e1281 100644 --- a/examples/okd_cluster_alerts/src/main.rs +++ b/examples/okd_cluster_alerts/src/main.rs @@ -1,35 +1,64 @@ -use std::collections::HashMap; - use harmony::{ inventory::Inventory, modules::monitoring::{ - alert_channel::discord_alert_channel::DiscordWebhook, - okd::cluster_monitoring::OpenshiftClusterAlertScore, + alert_channel::discord_alert_channel::DiscordReceiver, + alert_rule::{ + alerts::{ + infra::opnsense::high_http_error_rate, k8s::pvc::high_pvc_fill_rate_over_two_days, + }, + prometheus_alert_rule::AlertManagerRuleGroup, + }, + okd::openshift_cluster_alerting_score::OpenshiftClusterAlertScore, + scrape_target::prometheus_node_exporter::PrometheusNodeExporter, + }, + topology::{ + K8sAnywhereTopology, + monitoring::{AlertMatcher, AlertRoute, MatchOp}, }, - topology::K8sAnywhereTopology, }; -use harmony_macros::hurl; -use harmony_types::k8s_name::K8sName; + +use harmony_macros::{hurl, ip}; #[tokio::main] async fn main() { - let mut sel = HashMap::new(); - sel.insert( - "openshift_io_alert_source".to_string(), - "platform".to_string(), - ); - let mut sel2 = HashMap::new(); - sel2.insert("openshift_io_alert_source".to_string(), "".to_string()); - let selectors = vec![sel, sel2]; + let platform_matcher = AlertMatcher { + label: "prometheus".to_string(), + operator: MatchOp::Eq, + value: "openshift-monitoring/k8s".to_string(), + }; + let severity = AlertMatcher { + label: "severity".to_string(), + operator: MatchOp::Eq, + value: "critical".to_string(), + }; + + let high_http_error_rate = high_http_error_rate(); + + let additional_rules = AlertManagerRuleGroup::new("", vec![high_http_error_rate]); + + let scrape_target = PrometheusNodeExporter { + job_name: "firewall".to_string(), + metrics_path: "/metrics".to_string(), + listen_address: ip!("127.0.0.1"), + port: 9100, + ..Default::default() + }; + harmony_cli::run( Inventory::autoload(), K8sAnywhereTopology::from_env(), vec![Box::new(OpenshiftClusterAlertScore { - receivers: vec![Box::new(DiscordWebhook { - name: K8sName("wills-discord-webhook-example".to_string()), - url: hurl!("https://something.io"), - selectors: selectors, + receivers: vec![Box::new(DiscordReceiver { + name: "crit-wills-discord-channel-example".to_string(), + url: hurl!("https://test.io"), + route: AlertRoute { + matchers: vec![severity], + ..AlertRoute::default("crit-wills-discord-channel-example".to_string()) + }, })], + sender: harmony::modules::monitoring::okd::OpenshiftClusterAlertSender, + rules: vec![Box::new(additional_rules)], + scrape_targets: Some(vec![Box::new(scrape_target)]), })], None, ) diff --git a/examples/rhob_application_monitoring/src/main.rs b/examples/rhob_application_monitoring/src/main.rs index 6eeaea2c..5f0578ed 100644 --- a/examples/rhob_application_monitoring/src/main.rs +++ b/examples/rhob_application_monitoring/src/main.rs @@ -6,9 +6,9 @@ use harmony::{ application::{ ApplicationScore, RustWebFramework, RustWebapp, features::rhob_monitoring::Monitoring, }, - monitoring::alert_channel::discord_alert_channel::DiscordWebhook, + monitoring::alert_channel::discord_alert_channel::DiscordReceiver, }, - topology::K8sAnywhereTopology, + topology::{K8sAnywhereTopology, monitoring::AlertRoute}, }; use harmony_types::{k8s_name::K8sName, net::Url}; @@ -22,18 +22,21 @@ async fn main() { service_port: 3000, }); - let discord_receiver = DiscordWebhook { - name: K8sName("test-discord".to_string()), + let receiver_name = "test-discord".to_string(); + let discord_receiver = DiscordReceiver { + name: receiver_name.clone(), url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()), - selectors: vec![], + route: AlertRoute { + ..AlertRoute::default(receiver_name) + }, }; let app = ApplicationScore { features: vec![ - Box::new(Monitoring { - application: application.clone(), - alert_receiver: vec![Box::new(discord_receiver)], - }), + // Box::new(Monitoring { + // application: application.clone(), + // alert_receiver: vec![Box::new(discord_receiver)], + // }), // TODO add backups, multisite ha, etc ], application, diff --git a/examples/rust/src/main.rs b/examples/rust/src/main.rs index a1006068..0ab2bd8c 100644 --- a/examples/rust/src/main.rs +++ b/examples/rust/src/main.rs @@ -8,13 +8,13 @@ use harmony::{ features::{Monitoring, PackagingDeployment}, }, monitoring::alert_channel::{ - discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver, + discord_alert_channel::DiscordReceiver, webhook_receiver::WebhookReceiver, }, }, - topology::K8sAnywhereTopology, + topology::{K8sAnywhereTopology, monitoring::AlertRoute}, }; use harmony_macros::hurl; -use harmony_types::k8s_name::K8sName; +use harmony_types::{k8s_name::K8sName, net::Url}; #[tokio::main] async fn main() { @@ -26,10 +26,13 @@ async fn main() { service_port: 3000, }); - let discord_receiver = DiscordWebhook { - name: K8sName("test-discord".to_string()), - url: hurl!("https://discord.doesnt.exist.com"), - selectors: vec![], + let receiver_name = "test-discord".to_string(); + let discord_receiver = DiscordReceiver { + name: receiver_name.clone(), + url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()), + route: AlertRoute { + ..AlertRoute::default(receiver_name) + }, }; let webhook_receiver = WebhookReceiver { @@ -42,10 +45,10 @@ async fn main() { Box::new(PackagingDeployment { application: application.clone(), }), - Box::new(Monitoring { - application: application.clone(), - alert_receiver: vec![Box::new(discord_receiver), Box::new(webhook_receiver)], - }), + // Box::new(Monitoring { + // application: application.clone(), + // alert_receiver: vec![Box::new(discord_receiver), Box::new(webhook_receiver)], + // }), // TODO add backups, multisite ha, etc ], application, diff --git a/examples/try_rust_webapp/src/main.rs b/examples/try_rust_webapp/src/main.rs index d3c88b32..aa954f63 100644 --- a/examples/try_rust_webapp/src/main.rs +++ b/examples/try_rust_webapp/src/main.rs @@ -1,11 +1,8 @@ use harmony::{ inventory::Inventory, - modules::{ - application::{ - ApplicationScore, RustWebFramework, RustWebapp, - features::{Monitoring, PackagingDeployment}, - }, - monitoring::alert_channel::discord_alert_channel::DiscordWebhook, + modules::application::{ + ApplicationScore, RustWebFramework, RustWebapp, + features::{Monitoring, PackagingDeployment}, }, topology::K8sAnywhereTopology, }; @@ -30,14 +27,14 @@ async fn main() { Box::new(PackagingDeployment { application: application.clone(), }), - Box::new(Monitoring { - application: application.clone(), - alert_receiver: vec![Box::new(DiscordWebhook { - name: K8sName("test-discord".to_string()), - url: hurl!("https://discord.doesnt.exist.com"), - selectors: vec![], - })], - }), + // Box::new(Monitoring { + // application: application.clone(), + // alert_receiver: vec![Box::new(DiscordWebhook { + // name: K8sName("test-discord".to_string()), + // url: hurl!("https://discord.doesnt.exist.com"), + // selectors: vec![], + // })], + // }), ], application, }; diff --git a/harmony/src/domain/topology/k8s_anywhere/k8s_anywhere.rs b/harmony/src/domain/topology/k8s_anywhere/k8s_anywhere.rs index 55091d23..9cab8b90 100644 --- a/harmony/src/domain/topology/k8s_anywhere/k8s_anywhere.rs +++ b/harmony/src/domain/topology/k8s_anywhere/k8s_anywhere.rs @@ -1,13 +1,12 @@ -use std::{collections::BTreeMap, process::Command, sync::Arc, time::Duration}; +use std::{collections::BTreeMap, process::Command, sync::Arc}; use async_trait::async_trait; -use base64::{Engine, engine::general_purpose}; use harmony_types::rfc1123::Rfc1123Name; use k8s_openapi::api::{ core::v1::{Pod, Secret}, rbac::v1::{ClusterRoleBinding, RoleRef, Subject}, }; -use kube::api::{DynamicObject, GroupVersionKind, ObjectMeta}; +use kube::api::{GroupVersionKind, ObjectMeta}; use log::{debug, info, trace, warn}; use serde::Serialize; use tokio::sync::OnceCell; @@ -28,28 +27,7 @@ use crate::{ score_cert_management::CertificateManagementScore, }, k3d::K3DInstallationScore, - k8s::ingress::{K8sIngressScore, PathType}, - monitoring::{ - grafana::{grafana::Grafana, helm::helm_grafana::grafana_helm_chart_score}, - kube_prometheus::crd::{ - crd_alertmanager_config::CRDPrometheus, - crd_grafana::{ - Grafana as GrafanaCRD, GrafanaCom, GrafanaDashboard, - GrafanaDashboardDatasource, GrafanaDashboardSpec, GrafanaDatasource, - GrafanaDatasourceConfig, GrafanaDatasourceJsonData, - GrafanaDatasourceSecureJsonData, GrafanaDatasourceSpec, GrafanaSpec, - }, - crd_prometheuses::LabelSelector, - prometheus_operator::prometheus_operator_helm_chart_score, - rhob_alertmanager_config::RHOBObservability, - service_monitor::ServiceMonitor, - }, - }, okd::{crd::ingresses_config::Ingress as IngressResource, route::OKDTlsPassthroughScore}, - prometheus::{ - k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore, - prometheus::PrometheusMonitoring, rhob_alerting_score::RHOBAlertingScore, - }, }, score::Score, topology::{TlsRoute, TlsRouter, ingress::Ingress}, @@ -59,7 +37,6 @@ use super::super::{ DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, PreparationError, PreparationOutcome, Topology, k8s::K8sClient, - oberservability::monitoring::AlertReceiver, tenant::{ TenantConfig, TenantManager, k8s::K8sTenantManager, @@ -173,216 +150,6 @@ impl TlsRouter for K8sAnywhereTopology { } } -#[async_trait] -impl Grafana for K8sAnywhereTopology { - async fn ensure_grafana_operator( - &self, - inventory: &Inventory, - ) -> Result { - debug!("ensure grafana operator"); - let client = self.k8s_client().await.unwrap(); - let grafana_gvk = GroupVersionKind { - group: "grafana.integreatly.org".to_string(), - version: "v1beta1".to_string(), - kind: "Grafana".to_string(), - }; - let name = "grafanas.grafana.integreatly.org"; - let ns = "grafana"; - - let grafana_crd = client - .get_resource_json_value(name, Some(ns), &grafana_gvk) - .await; - match grafana_crd { - Ok(_) => { - return Ok(PreparationOutcome::Success { - details: "Found grafana CRDs in cluster".to_string(), - }); - } - - Err(_) => { - return self - .install_grafana_operator(inventory, Some("grafana")) - .await; - } - }; - } - async fn install_grafana(&self) -> Result { - let ns = "grafana"; - - let mut label = BTreeMap::new(); - - label.insert("dashboards".to_string(), "grafana".to_string()); - - let label_selector = LabelSelector { - match_labels: label.clone(), - match_expressions: vec![], - }; - - let client = self.k8s_client().await?; - - let grafana = self.build_grafana(ns, &label); - - client.apply(&grafana, Some(ns)).await?; - //TODO change this to a ensure ready or something better than just a timeout - client - .wait_until_deployment_ready( - "grafana-grafana-deployment", - Some("grafana"), - Some(Duration::from_secs(30)), - ) - .await?; - - let sa_name = "grafana-grafana-sa"; - let token_secret_name = "grafana-sa-token-secret"; - - let sa_token_secret = self.build_sa_token_secret(token_secret_name, sa_name, ns); - - client.apply(&sa_token_secret, Some(ns)).await?; - let secret_gvk = GroupVersionKind { - group: "".to_string(), - version: "v1".to_string(), - kind: "Secret".to_string(), - }; - - let secret = client - .get_resource_json_value(token_secret_name, Some(ns), &secret_gvk) - .await?; - - let token = format!( - "Bearer {}", - self.extract_and_normalize_token(&secret).unwrap() - ); - - debug!("creating grafana clusterrole binding"); - - let clusterrolebinding = - self.build_cluster_rolebinding(sa_name, "cluster-monitoring-view", ns); - - client.apply(&clusterrolebinding, Some(ns)).await?; - - debug!("creating grafana datasource crd"); - - let thanos_url = format!( - "https://{}", - self.get_domain("thanos-querier-openshift-monitoring") - .await - .unwrap() - ); - - let thanos_openshift_datasource = self.build_grafana_datasource( - "thanos-openshift-monitoring", - ns, - &label_selector, - &thanos_url, - &token, - ); - - client.apply(&thanos_openshift_datasource, Some(ns)).await?; - - debug!("creating grafana dashboard crd"); - let dashboard = self.build_grafana_dashboard(ns, &label_selector); - - client.apply(&dashboard, Some(ns)).await?; - debug!("creating grafana ingress"); - let grafana_ingress = self.build_grafana_ingress(ns).await; - - grafana_ingress - .interpret(&Inventory::empty(), self) - .await - .map_err(|e| PreparationError::new(e.to_string()))?; - - Ok(PreparationOutcome::Success { - details: "Installed grafana composants".to_string(), - }) - } -} - -#[async_trait] -impl PrometheusMonitoring for K8sAnywhereTopology { - async fn install_prometheus( - &self, - sender: &CRDPrometheus, - _inventory: &Inventory, - _receivers: Option>>>, - ) -> Result { - let client = self.k8s_client().await?; - - for monitor in sender.service_monitor.iter() { - client - .apply(monitor, Some(&sender.namespace)) - .await - .map_err(|e| PreparationError::new(e.to_string()))?; - } - Ok(PreparationOutcome::Success { - details: "successfuly installed prometheus components".to_string(), - }) - } - - async fn ensure_prometheus_operator( - &self, - sender: &CRDPrometheus, - _inventory: &Inventory, - ) -> Result { - let po_result = self.ensure_prometheus_operator(sender).await?; - - match po_result { - PreparationOutcome::Success { details: _ } => { - debug!("Detected prometheus crds operator present in cluster."); - return Ok(po_result); - } - PreparationOutcome::Noop => { - debug!("Skipping Prometheus CR installation due to missing operator."); - return Ok(po_result); - } - } - } -} - -#[async_trait] -impl PrometheusMonitoring for K8sAnywhereTopology { - async fn install_prometheus( - &self, - sender: &RHOBObservability, - inventory: &Inventory, - receivers: Option>>>, - ) -> Result { - let po_result = self.ensure_cluster_observability_operator(sender).await?; - - if po_result == PreparationOutcome::Noop { - debug!("Skipping Prometheus CR installation due to missing operator."); - return Ok(po_result); - } - - let result = self - .get_cluster_observability_operator_prometheus_application_score( - sender.clone(), - receivers, - ) - .await - .interpret(inventory, self) - .await; - - match result { - Ok(outcome) => match outcome.status { - InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success { - details: outcome.message, - }), - InterpretStatus::NOOP => Ok(PreparationOutcome::Noop), - _ => Err(PreparationError::new(outcome.message)), - }, - Err(err) => Err(PreparationError::new(err.to_string())), - } - } - - async fn ensure_prometheus_operator( - &self, - sender: &RHOBObservability, - inventory: &Inventory, - ) -> Result { - todo!() - } -} - impl Serialize for K8sAnywhereTopology { fn serialize(&self, _serializer: S) -> Result where @@ -587,23 +354,6 @@ impl K8sAnywhereTopology { } } - fn extract_and_normalize_token(&self, secret: &DynamicObject) -> Option { - let token_b64 = secret - .data - .get("token") - .or_else(|| secret.data.get("data").and_then(|d| d.get("token"))) - .and_then(|v| v.as_str())?; - - let bytes = general_purpose::STANDARD.decode(token_b64).ok()?; - - let s = String::from_utf8(bytes).ok()?; - - let cleaned = s - .trim_matches(|c: char| c.is_whitespace() || c == '\0') - .to_string(); - Some(cleaned) - } - pub async fn get_k8s_distribution(&self) -> Result { self.k8s_client() .await? @@ -663,141 +413,6 @@ impl K8sAnywhereTopology { } } - fn build_grafana_datasource( - &self, - name: &str, - ns: &str, - label_selector: &LabelSelector, - url: &str, - token: &str, - ) -> GrafanaDatasource { - let mut json_data = BTreeMap::new(); - json_data.insert("timeInterval".to_string(), "5s".to_string()); - - GrafanaDatasource { - metadata: ObjectMeta { - name: Some(name.to_string()), - namespace: Some(ns.to_string()), - ..Default::default() - }, - spec: GrafanaDatasourceSpec { - instance_selector: label_selector.clone(), - allow_cross_namespace_import: Some(true), - values_from: None, - datasource: GrafanaDatasourceConfig { - access: "proxy".to_string(), - name: name.to_string(), - r#type: "prometheus".to_string(), - url: url.to_string(), - database: None, - json_data: Some(GrafanaDatasourceJsonData { - time_interval: Some("60s".to_string()), - http_header_name1: Some("Authorization".to_string()), - tls_skip_verify: Some(true), - oauth_pass_thru: Some(true), - }), - secure_json_data: Some(GrafanaDatasourceSecureJsonData { - http_header_value1: Some(format!("Bearer {token}")), - }), - is_default: Some(false), - editable: Some(true), - }, - }, - } - } - - fn build_grafana_dashboard( - &self, - ns: &str, - label_selector: &LabelSelector, - ) -> GrafanaDashboard { - let graf_dashboard = GrafanaDashboard { - metadata: ObjectMeta { - name: Some(format!("grafana-dashboard-{}", ns)), - namespace: Some(ns.to_string()), - ..Default::default() - }, - spec: GrafanaDashboardSpec { - resync_period: Some("30s".to_string()), - instance_selector: label_selector.clone(), - datasources: Some(vec![GrafanaDashboardDatasource { - input_name: "DS_PROMETHEUS".to_string(), - datasource_name: "thanos-openshift-monitoring".to_string(), - }]), - json: None, - grafana_com: Some(GrafanaCom { - id: 17406, - revision: None, - }), - }, - }; - graf_dashboard - } - - fn build_grafana(&self, ns: &str, labels: &BTreeMap) -> GrafanaCRD { - let grafana = GrafanaCRD { - metadata: ObjectMeta { - name: Some(format!("grafana-{}", ns)), - namespace: Some(ns.to_string()), - labels: Some(labels.clone()), - ..Default::default() - }, - spec: GrafanaSpec { - config: None, - admin_user: None, - admin_password: None, - ingress: None, - persistence: None, - resources: None, - }, - }; - grafana - } - - async fn build_grafana_ingress(&self, ns: &str) -> K8sIngressScore { - let domain = self.get_domain(&format!("grafana-{}", ns)).await.unwrap(); - let name = format!("{}-grafana", ns); - let backend_service = format!("grafana-{}-service", ns); - - K8sIngressScore { - name: fqdn::fqdn!(&name), - host: fqdn::fqdn!(&domain), - backend_service: fqdn::fqdn!(&backend_service), - port: 3000, - path: Some("/".to_string()), - path_type: Some(PathType::Prefix), - namespace: Some(fqdn::fqdn!(&ns)), - ingress_class_name: Some("openshift-default".to_string()), - } - } - - async fn get_cluster_observability_operator_prometheus_application_score( - &self, - sender: RHOBObservability, - receivers: Option>>>, - ) -> RHOBAlertingScore { - RHOBAlertingScore { - sender, - receivers: receivers.unwrap_or_default(), - service_monitors: vec![], - prometheus_rules: vec![], - } - } - - async fn get_k8s_prometheus_application_score( - &self, - sender: CRDPrometheus, - receivers: Option>>>, - service_monitors: Option>, - ) -> K8sPrometheusCRDAlertingScore { - return K8sPrometheusCRDAlertingScore { - sender, - receivers: receivers.unwrap_or_default(), - service_monitors: service_monitors.unwrap_or_default(), - prometheus_rules: vec![], - }; - } - async fn openshift_ingress_operator_available(&self) -> Result<(), PreparationError> { let client = self.k8s_client().await?; let gvk = GroupVersionKind { @@ -963,137 +578,6 @@ impl K8sAnywhereTopology { )), } } - - async fn ensure_cluster_observability_operator( - &self, - sender: &RHOBObservability, - ) -> Result { - let status = Command::new("sh") - .args(["-c", "kubectl get crd -A | grep -i rhobs"]) - .status() - .map_err(|e| PreparationError::new(format!("could not connect to cluster: {}", e)))?; - - if !status.success() { - if let Some(Some(k8s_state)) = self.k8s_state.get() { - match k8s_state.source { - K8sSource::LocalK3d => { - warn!( - "Installing observability operator is not supported on LocalK3d source" - ); - return Ok(PreparationOutcome::Noop); - debug!("installing cluster observability operator"); - todo!(); - let op_score = - prometheus_operator_helm_chart_score(sender.namespace.clone()); - let result = op_score.interpret(&Inventory::empty(), self).await; - - return match result { - Ok(outcome) => match outcome.status { - InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success { - details: "installed cluster observability operator".into(), - }), - InterpretStatus::NOOP => Ok(PreparationOutcome::Noop), - _ => Err(PreparationError::new( - "failed to install cluster observability operator (unknown error)".into(), - )), - }, - Err(err) => Err(PreparationError::new(err.to_string())), - }; - } - K8sSource::Kubeconfig => { - debug!( - "unable to install cluster observability operator, contact cluster admin" - ); - return Ok(PreparationOutcome::Noop); - } - } - } else { - warn!( - "Unable to detect k8s_state. Skipping Cluster Observability Operator install." - ); - return Ok(PreparationOutcome::Noop); - } - } - - debug!("Cluster Observability Operator is already present, skipping install"); - - Ok(PreparationOutcome::Success { - details: "cluster observability operator present in cluster".into(), - }) - } - - async fn ensure_prometheus_operator( - &self, - sender: &CRDPrometheus, - ) -> Result { - let status = Command::new("sh") - .args(["-c", "kubectl get crd -A | grep -i prometheuses"]) - .status() - .map_err(|e| PreparationError::new(format!("could not connect to cluster: {}", e)))?; - - if !status.success() { - if let Some(Some(k8s_state)) = self.k8s_state.get() { - match k8s_state.source { - K8sSource::LocalK3d => { - debug!("installing prometheus operator"); - let op_score = - prometheus_operator_helm_chart_score(sender.namespace.clone()); - let result = op_score.interpret(&Inventory::empty(), self).await; - - return match result { - Ok(outcome) => match outcome.status { - InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success { - details: "installed prometheus operator".into(), - }), - InterpretStatus::NOOP => Ok(PreparationOutcome::Noop), - _ => Err(PreparationError::new( - "failed to install prometheus operator (unknown error)".into(), - )), - }, - Err(err) => Err(PreparationError::new(err.to_string())), - }; - } - K8sSource::Kubeconfig => { - debug!("unable to install prometheus operator, contact cluster admin"); - return Ok(PreparationOutcome::Noop); - } - } - } else { - warn!("Unable to detect k8s_state. Skipping Prometheus Operator install."); - return Ok(PreparationOutcome::Noop); - } - } - - debug!("Prometheus operator is already present, skipping install"); - - Ok(PreparationOutcome::Success { - details: "prometheus operator present in cluster".into(), - }) - } - - async fn install_grafana_operator( - &self, - inventory: &Inventory, - ns: Option<&str>, - ) -> Result { - let namespace = ns.unwrap_or("grafana"); - info!("installing grafana operator in ns {namespace}"); - let tenant = self.get_k8s_tenant_manager()?.get_tenant_config().await; - let mut namespace_scope = false; - if tenant.is_some() { - namespace_scope = true; - } - let _grafana_operator_score = grafana_helm_chart_score(namespace, namespace_scope) - .interpret(inventory, self) - .await - .map_err(|e| PreparationError::new(e.to_string())); - Ok(PreparationOutcome::Success { - details: format!( - "Successfully installed grafana operator in ns {}", - ns.unwrap() - ), - }) - } } #[derive(Clone, Debug)] diff --git a/harmony/src/domain/topology/k8s_anywhere/mod.rs b/harmony/src/domain/topology/k8s_anywhere/mod.rs index d14c28ba..a10dd38a 100644 --- a/harmony/src/domain/topology/k8s_anywhere/mod.rs +++ b/harmony/src/domain/topology/k8s_anywhere/mod.rs @@ -1,4 +1,5 @@ mod k8s_anywhere; pub mod nats; +pub mod observability; mod postgres; pub use k8s_anywhere::*; diff --git a/harmony/src/domain/topology/k8s_anywhere/observability/grafana.rs b/harmony/src/domain/topology/k8s_anywhere/observability/grafana.rs new file mode 100644 index 00000000..7f06a2af --- /dev/null +++ b/harmony/src/domain/topology/k8s_anywhere/observability/grafana.rs @@ -0,0 +1,147 @@ +use async_trait::async_trait; + +use crate::{ + inventory::Inventory, + modules::monitoring::grafana::{ + grafana::Grafana, + k8s::{ + score_ensure_grafana_ready::GrafanaK8sEnsureReadyScore, + score_grafana_alert_receiver::GrafanaK8sReceiverScore, + score_grafana_datasource::GrafanaK8sDatasourceScore, + score_grafana_rule::GrafanaK8sRuleScore, score_install_grafana::GrafanaK8sInstallScore, + }, + }, + score::Score, + topology::{ + K8sAnywhereTopology, PreparationError, PreparationOutcome, + monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget}, + }, +}; + +#[async_trait] +impl Observability for K8sAnywhereTopology { + async fn install_alert_sender( + &self, + sender: &Grafana, + inventory: &Inventory, + ) -> Result { + let score = GrafanaK8sInstallScore { + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Grafana not installed {}", e)))?; + Ok(PreparationOutcome::Success { + details: "Successfully installed grafana alert sender".to_string(), + }) + } + + async fn install_receivers( + &self, + sender: &Grafana, + inventory: &Inventory, + receivers: Option>>>, + ) -> Result { + let receivers = match receivers { + Some(r) if !r.is_empty() => r, + _ => return Ok(PreparationOutcome::Noop), + }; + + for receiver in receivers { + let score = GrafanaK8sReceiverScore { + receiver, + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Failed to install receiver: {}", e)))?; + } + + Ok(PreparationOutcome::Success { + details: "All alert receivers installed successfully".to_string(), + }) + } + + async fn install_rules( + &self, + sender: &Grafana, + inventory: &Inventory, + rules: Option>>>, + ) -> Result { + let rules = match rules { + Some(r) if !r.is_empty() => r, + _ => return Ok(PreparationOutcome::Noop), + }; + + for rule in rules { + let score = GrafanaK8sRuleScore { + sender: sender.clone(), + rule, + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?; + } + + Ok(PreparationOutcome::Success { + details: "All alert rules installed successfully".to_string(), + }) + } + + async fn add_scrape_targets( + &self, + sender: &Grafana, + inventory: &Inventory, + scrape_targets: Option>>>, + ) -> Result { + let scrape_targets = match scrape_targets { + Some(r) if !r.is_empty() => r, + _ => return Ok(PreparationOutcome::Noop), + }; + + for scrape_target in scrape_targets { + let score = GrafanaK8sDatasourceScore { + scrape_target, + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Failed to add DataSource: {}", e)))?; + } + + Ok(PreparationOutcome::Success { + details: "All datasources installed successfully".to_string(), + }) + } + + async fn ensure_monitoring_installed( + &self, + sender: &Grafana, + inventory: &Inventory, + ) -> Result { + let score = GrafanaK8sEnsureReadyScore { + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Grafana not ready {}", e)))?; + Ok(PreparationOutcome::Success { + details: "Grafana Ready".to_string(), + }) + } +} diff --git a/harmony/src/domain/topology/k8s_anywhere/observability/kube_prometheus.rs b/harmony/src/domain/topology/k8s_anywhere/observability/kube_prometheus.rs new file mode 100644 index 00000000..05cd3f6b --- /dev/null +++ b/harmony/src/domain/topology/k8s_anywhere/observability/kube_prometheus.rs @@ -0,0 +1,142 @@ +use async_trait::async_trait; + +use crate::{ + inventory::Inventory, + modules::monitoring::kube_prometheus::{ + KubePrometheus, helm::kube_prometheus_helm_chart::kube_prometheus_helm_chart_score, + score_kube_prometheus_alert_receivers::KubePrometheusReceiverScore, + score_kube_prometheus_ensure_ready::KubePrometheusEnsureReadyScore, + score_kube_prometheus_rule::KubePrometheusRuleScore, + score_kube_prometheus_scrape_target::KubePrometheusScrapeTargetScore, + }, + score::Score, + topology::{ + K8sAnywhereTopology, PreparationError, PreparationOutcome, + monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget}, + }, +}; + +#[async_trait] +impl Observability for K8sAnywhereTopology { + async fn install_alert_sender( + &self, + sender: &KubePrometheus, + inventory: &Inventory, + ) -> Result { + kube_prometheus_helm_chart_score(sender.config.clone()) + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(e.to_string()))?; + + Ok(PreparationOutcome::Success { + details: "Successfully installed kubeprometheus alert sender".to_string(), + }) + } + + async fn install_receivers( + &self, + sender: &KubePrometheus, + inventory: &Inventory, + receivers: Option>>>, + ) -> Result { + let receivers = match receivers { + Some(r) if !r.is_empty() => r, + _ => return Ok(PreparationOutcome::Noop), + }; + + for receiver in receivers { + let score = KubePrometheusReceiverScore { + receiver, + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Failed to install receiver: {}", e)))?; + } + + Ok(PreparationOutcome::Success { + details: "All alert receivers installed successfully".to_string(), + }) + } + + async fn install_rules( + &self, + sender: &KubePrometheus, + inventory: &Inventory, + rules: Option>>>, + ) -> Result { + let rules = match rules { + Some(r) if !r.is_empty() => r, + _ => return Ok(PreparationOutcome::Noop), + }; + + for rule in rules { + let score = KubePrometheusRuleScore { + sender: sender.clone(), + rule, + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?; + } + + Ok(PreparationOutcome::Success { + details: "All alert rules installed successfully".to_string(), + }) + } + + async fn add_scrape_targets( + &self, + sender: &KubePrometheus, + inventory: &Inventory, + scrape_targets: Option>>>, + ) -> Result { + let scrape_targets = match scrape_targets { + Some(r) if !r.is_empty() => r, + _ => return Ok(PreparationOutcome::Noop), + }; + + for scrape_target in scrape_targets { + let score = KubePrometheusScrapeTargetScore { + scrape_target, + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?; + } + + Ok(PreparationOutcome::Success { + details: "All scrap targets installed successfully".to_string(), + }) + } + + async fn ensure_monitoring_installed( + &self, + sender: &KubePrometheus, + inventory: &Inventory, + ) -> Result { + let score = KubePrometheusEnsureReadyScore { + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("KubePrometheus not ready {}", e)))?; + Ok(PreparationOutcome::Success { + details: "KubePrometheus Ready".to_string(), + }) + } +} diff --git a/harmony/src/domain/topology/k8s_anywhere/observability/mod.rs b/harmony/src/domain/topology/k8s_anywhere/observability/mod.rs new file mode 100644 index 00000000..6e105557 --- /dev/null +++ b/harmony/src/domain/topology/k8s_anywhere/observability/mod.rs @@ -0,0 +1,5 @@ +pub mod grafana; +pub mod kube_prometheus; +pub mod openshift_monitoring; +pub mod prometheus; +pub mod redhat_cluster_observability; diff --git a/harmony/src/domain/topology/k8s_anywhere/observability/openshift_monitoring.rs b/harmony/src/domain/topology/k8s_anywhere/observability/openshift_monitoring.rs new file mode 100644 index 00000000..aa42a220 --- /dev/null +++ b/harmony/src/domain/topology/k8s_anywhere/observability/openshift_monitoring.rs @@ -0,0 +1,142 @@ +use async_trait::async_trait; +use log::info; + +use crate::score::Score; +use crate::{ + inventory::Inventory, + modules::monitoring::okd::{ + OpenshiftClusterAlertSender, + score_enable_cluster_monitoring::OpenshiftEnableClusterMonitoringScore, + score_openshift_alert_rule::OpenshiftAlertRuleScore, + score_openshift_receiver::OpenshiftReceiverScore, + score_openshift_scrape_target::OpenshiftScrapeTargetScore, + score_user_workload::OpenshiftUserWorkloadMonitoring, + score_verify_user_workload_monitoring::VerifyUserWorkload, + }, + topology::{ + K8sAnywhereTopology, PreparationError, PreparationOutcome, + monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget}, + }, +}; + +#[async_trait] +impl Observability for K8sAnywhereTopology { + async fn install_alert_sender( + &self, + _sender: &OpenshiftClusterAlertSender, + inventory: &Inventory, + ) -> Result { + info!("enabling cluster monitoring"); + let cluster_monitoring_score = OpenshiftEnableClusterMonitoringScore {}; + cluster_monitoring_score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError { msg: e.to_string() })?; + + info!("enabling user workload monitoring"); + let user_workload_score = OpenshiftUserWorkloadMonitoring {}; + user_workload_score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError { msg: e.to_string() })?; + + Ok(PreparationOutcome::Success { + details: "Successfully configured cluster monitoring".to_string(), + }) + } + + async fn install_receivers( + &self, + _sender: &OpenshiftClusterAlertSender, + inventory: &Inventory, + receivers: Option>>>, + ) -> Result { + if let Some(receivers) = receivers { + for receiver in receivers { + info!("Installing receiver {}", receiver.name()); + let receiver_score = OpenshiftReceiverScore { receiver }; + receiver_score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError { msg: e.to_string() })?; + } + Ok(PreparationOutcome::Success { + details: "Successfully installed receivers for OpenshiftClusterMonitoring" + .to_string(), + }) + } else { + Ok(PreparationOutcome::Noop) + } + } + + async fn install_rules( + &self, + _sender: &OpenshiftClusterAlertSender, + inventory: &Inventory, + rules: Option>>>, + ) -> Result { + if let Some(rules) = rules { + for rule in rules { + info!("Installing rule "); + let rule_score = OpenshiftAlertRuleScore { rule: rule }; + rule_score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError { msg: e.to_string() })?; + } + Ok(PreparationOutcome::Success { + details: "Successfully installed rules for OpenshiftClusterMonitoring".to_string(), + }) + } else { + Ok(PreparationOutcome::Noop) + } + } + + async fn add_scrape_targets( + &self, + _sender: &OpenshiftClusterAlertSender, + inventory: &Inventory, + scrape_targets: Option>>>, + ) -> Result { + if let Some(scrape_targets) = scrape_targets { + for scrape_target in scrape_targets { + info!("Installing scrape target"); + let scrape_target_score = OpenshiftScrapeTargetScore { + scrape_target: scrape_target, + }; + scrape_target_score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError { msg: e.to_string() })?; + } + Ok(PreparationOutcome::Success { + details: "Successfully added scrape targets for OpenshiftClusterMonitoring" + .to_string(), + }) + } else { + Ok(PreparationOutcome::Noop) + } + } + + async fn ensure_monitoring_installed( + &self, + _sender: &OpenshiftClusterAlertSender, + inventory: &Inventory, + ) -> Result { + let verify_monitoring_score = VerifyUserWorkload {}; + info!("Verifying user workload and cluster monitoring installed"); + verify_monitoring_score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError { msg: e.to_string() })?; + Ok(PreparationOutcome::Success { + details: "OpenshiftClusterMonitoring ready".to_string(), + }) + } +} diff --git a/harmony/src/domain/topology/k8s_anywhere/observability/prometheus.rs b/harmony/src/domain/topology/k8s_anywhere/observability/prometheus.rs new file mode 100644 index 00000000..9085a607 --- /dev/null +++ b/harmony/src/domain/topology/k8s_anywhere/observability/prometheus.rs @@ -0,0 +1,147 @@ +use async_trait::async_trait; + +use crate::{ + inventory::Inventory, + modules::monitoring::prometheus::{ + Prometheus, score_prometheus_alert_receivers::PrometheusReceiverScore, + score_prometheus_ensure_ready::PrometheusEnsureReadyScore, + score_prometheus_install::PrometheusInstallScore, + score_prometheus_rule::PrometheusRuleScore, + score_prometheus_scrape_target::PrometheusScrapeTargetScore, + }, + score::Score, + topology::{ + K8sAnywhereTopology, PreparationError, PreparationOutcome, + monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget}, + }, +}; + +#[async_trait] +impl Observability for K8sAnywhereTopology { + async fn install_alert_sender( + &self, + sender: &Prometheus, + inventory: &Inventory, + ) -> Result { + let score = PrometheusInstallScore { + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Prometheus not installed {}", e)))?; + + Ok(PreparationOutcome::Success { + details: "Successfully installed kubeprometheus alert sender".to_string(), + }) + } + + async fn install_receivers( + &self, + sender: &Prometheus, + inventory: &Inventory, + receivers: Option>>>, + ) -> Result { + let receivers = match receivers { + Some(r) if !r.is_empty() => r, + _ => return Ok(PreparationOutcome::Noop), + }; + + for receiver in receivers { + let score = PrometheusReceiverScore { + receiver, + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Failed to install receiver: {}", e)))?; + } + + Ok(PreparationOutcome::Success { + details: "All alert receivers installed successfully".to_string(), + }) + } + + async fn install_rules( + &self, + sender: &Prometheus, + inventory: &Inventory, + rules: Option>>>, + ) -> Result { + let rules = match rules { + Some(r) if !r.is_empty() => r, + _ => return Ok(PreparationOutcome::Noop), + }; + + for rule in rules { + let score = PrometheusRuleScore { + sender: sender.clone(), + rule, + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?; + } + + Ok(PreparationOutcome::Success { + details: "All alert rules installed successfully".to_string(), + }) + } + + async fn add_scrape_targets( + &self, + sender: &Prometheus, + inventory: &Inventory, + scrape_targets: Option>>>, + ) -> Result { + let scrape_targets = match scrape_targets { + Some(r) if !r.is_empty() => r, + _ => return Ok(PreparationOutcome::Noop), + }; + + for scrape_target in scrape_targets { + let score = PrometheusScrapeTargetScore { + scrape_target, + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?; + } + + Ok(PreparationOutcome::Success { + details: "All scrap targets installed successfully".to_string(), + }) + } + + async fn ensure_monitoring_installed( + &self, + sender: &Prometheus, + inventory: &Inventory, + ) -> Result { + let score = PrometheusEnsureReadyScore { + sender: sender.clone(), + }; + + score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(format!("Prometheus not ready {}", e)))?; + + Ok(PreparationOutcome::Success { + details: "Prometheus Ready".to_string(), + }) + } +} diff --git a/harmony/src/domain/topology/k8s_anywhere/observability/redhat_cluster_observability.rs b/harmony/src/domain/topology/k8s_anywhere/observability/redhat_cluster_observability.rs new file mode 100644 index 00000000..0a2ada47 --- /dev/null +++ b/harmony/src/domain/topology/k8s_anywhere/observability/redhat_cluster_observability.rs @@ -0,0 +1,114 @@ +use crate::{ + modules::monitoring::red_hat_cluster_observability::{ + score_alert_receiver::RedHatClusterObservabilityReceiverScore, + score_coo_monitoring_stack::RedHatClusterObservabilityMonitoringStackScore, + }, + score::Score, +}; +use async_trait::async_trait; +use log::info; + +use crate::{ + inventory::Inventory, + modules::monitoring::red_hat_cluster_observability::{ + RedHatClusterObservability, + score_redhat_cluster_observability_operator::RedHatClusterObservabilityOperatorScore, + }, + topology::{ + K8sAnywhereTopology, PreparationError, PreparationOutcome, + monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget}, + }, +}; + +#[async_trait] +impl Observability for K8sAnywhereTopology { + async fn install_alert_sender( + &self, + sender: &RedHatClusterObservability, + inventory: &Inventory, + ) -> Result { + info!("Verifying Redhat Cluster Observability Operator"); + + let coo_score = RedHatClusterObservabilityOperatorScore::default(); + + coo_score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(e.to_string()))?; + + info!( + "Installing Cluster Observability Operator Monitoring Stack in ns {}", + sender.namespace.clone() + ); + + let coo_monitoring_stack_score = RedHatClusterObservabilityMonitoringStackScore { + namespace: sender.namespace.clone(), + resource_selector: sender.resource_selector.clone(), + }; + + coo_monitoring_stack_score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(e.to_string()))?; + + Ok(PreparationOutcome::Success { + details: "Successfully installed RedHatClusterObservability Operator".to_string(), + }) + } + + async fn install_receivers( + &self, + sender: &RedHatClusterObservability, + inventory: &Inventory, + receivers: Option>>>, + ) -> Result { + if let Some(receivers) = receivers { + for receiver in receivers { + info!("Installing receiver {}", receiver.name()); + let receiver_score = RedHatClusterObservabilityReceiverScore { + receiver, + sender: sender.clone(), + }; + receiver_score + .create_interpret() + .execute(inventory, self) + .await + .map_err(|e| PreparationError::new(e.to_string()))?; + } + Ok(PreparationOutcome::Success { + details: "Successfully installed receivers for OpenshiftClusterMonitoring" + .to_string(), + }) + } else { + Ok(PreparationOutcome::Noop) + } + } + + async fn install_rules( + &self, + _sender: &RedHatClusterObservability, + _inventory: &Inventory, + _rules: Option>>>, + ) -> Result { + todo!() + } + + async fn add_scrape_targets( + &self, + _sender: &RedHatClusterObservability, + _inventory: &Inventory, + _scrape_targets: Option>>>, + ) -> Result { + todo!() + } + + async fn ensure_monitoring_installed( + &self, + _sender: &RedHatClusterObservability, + _inventory: &Inventory, + ) -> Result { + todo!() + } +} diff --git a/harmony/src/domain/topology/mod.rs b/harmony/src/domain/topology/mod.rs index 28161fcf..c21b2846 100644 --- a/harmony/src/domain/topology/mod.rs +++ b/harmony/src/domain/topology/mod.rs @@ -2,6 +2,7 @@ pub mod decentralized; mod failover; mod ha_cluster; pub mod ingress; +pub mod monitoring; pub mod node_exporter; pub mod opnsense; pub use failover::*; @@ -11,7 +12,6 @@ mod http; pub mod installable; mod k8s_anywhere; mod localhost; -pub mod oberservability; pub mod tenant; use derive_new::new; pub use k8s_anywhere::*; diff --git a/harmony/src/domain/topology/monitoring.rs b/harmony/src/domain/topology/monitoring.rs new file mode 100644 index 00000000..805dbcc9 --- /dev/null +++ b/harmony/src/domain/topology/monitoring.rs @@ -0,0 +1,234 @@ +use std::{ + any::Any, + collections::{BTreeMap, HashMap}, + net::IpAddr, +}; + +use async_trait::async_trait; +use kube::api::DynamicObject; +use log::{debug, info}; +use serde::{Deserialize, Serialize}; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + topology::{PreparationError, PreparationOutcome, Topology, installable::Installable}, +}; +use harmony_types::id::Id; + +/// Defines the application that sends the alerts to a receivers +/// for example prometheus +#[async_trait] +pub trait AlertSender: Send + Sync + std::fmt::Debug { + fn name(&self) -> String; +} + +/// Trait which defines how an alert sender is impleneted for a specific topology +#[async_trait] +pub trait Observability { + async fn install_alert_sender( + &self, + sender: &S, + inventory: &Inventory, + ) -> Result; + + async fn install_receivers( + &self, + sender: &S, + inventory: &Inventory, + receivers: Option>>>, + ) -> Result; + + async fn install_rules( + &self, + sender: &S, + inventory: &Inventory, + rules: Option>>>, + ) -> Result; + + async fn add_scrape_targets( + &self, + sender: &S, + inventory: &Inventory, + scrape_targets: Option>>>, + ) -> Result; + + async fn ensure_monitoring_installed( + &self, + sender: &S, + inventory: &Inventory, + ) -> Result; +} + +/// Defines the entity that receives the alerts from a sender. For example Discord, Slack, etc +/// +pub trait AlertReceiver: std::fmt::Debug + Send + Sync { + fn build_route(&self) -> Result; + fn build_receiver(&self) -> Result; + fn name(&self) -> String; + fn clone_box(&self) -> Box>; +} + +/// Defines a generic rule that can be applied to a sender, such as aprometheus alert rule +pub trait AlertRule: std::fmt::Debug + Send + Sync { + fn build_rule(&self) -> Result; + fn name(&self) -> String; + fn clone_box(&self) -> Box>; +} + +/// A generic scrape target that can be added to a sender to scrape metrics from, for example a +/// server outside of the cluster +pub trait ScrapeTarget: std::fmt::Debug + Send + Sync { + fn build_scrape_target(&self) -> Result; + fn name(&self) -> String; + fn clone_box(&self) -> Box>; +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExternalScrapeTarget { + pub ip: IpAddr, + pub port: i32, + pub interval: Option, + pub path: Option, + pub labels: Option>, +} + +/// Alerting interpret to install an alert sender on a given topology +#[derive(Debug)] +pub struct AlertingInterpret { + pub sender: S, + pub receivers: Vec>>, + pub rules: Vec>>, + pub scrape_targets: Option>>>, +} + +#[async_trait] +impl> Interpret for AlertingInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + info!("Configuring alert sender {}", self.sender.name()); + topology + .install_alert_sender(&self.sender, inventory) + .await?; + + info!("Installing receivers"); + topology + .install_receivers(&self.sender, inventory, Some(self.receivers.clone())) + .await?; + + info!("Installing rules"); + topology + .install_rules(&self.sender, inventory, Some(self.rules.clone())) + .await?; + + info!("Adding extra scrape targets"); + topology + .add_scrape_targets(&self.sender, inventory, self.scrape_targets.clone()) + .await?; + + info!("Ensuring alert sender {} is ready", self.sender.name()); + topology + .ensure_monitoring_installed(&self.sender, inventory) + .await?; + + Ok(Outcome::success(format!( + "successfully installed alert sender {}", + self.sender.name() + ))) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Alerting + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} + +impl Clone for Box> { + fn clone(&self) -> Self { + self.clone_box() + } +} + +impl Clone for Box> { + fn clone(&self) -> Self { + self.clone_box() + } +} + +impl Clone for Box> { + fn clone(&self) -> Self { + self.clone_box() + } +} + +///Generic routing that can map to various alert sender backends +#[derive(Debug, Clone, Serialize)] +pub struct AlertRoute { + pub receiver: String, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub matchers: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub group_by: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub repeat_interval: Option, + #[serde(rename = "continue")] + pub continue_matching: bool, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub children: Vec, +} + +impl AlertRoute { + pub fn default(name: String) -> Self { + Self { + receiver: name, + matchers: vec![], + group_by: vec![], + repeat_interval: Some("30s".to_string()), + continue_matching: true, + children: vec![], + } + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct AlertMatcher { + pub label: String, + pub operator: MatchOp, + pub value: String, +} + +#[derive(Debug, Clone)] +pub enum MatchOp { + Eq, + NotEq, + Regex, +} + +impl Serialize for MatchOp { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let op = match self { + MatchOp::Eq => "=", + MatchOp::NotEq => "!=", + MatchOp::Regex => "=~", + }; + serializer.serialize_str(op) + } +} diff --git a/harmony/src/domain/topology/oberservability/mod.rs b/harmony/src/domain/topology/oberservability/mod.rs deleted file mode 100644 index 7f2ac952..00000000 --- a/harmony/src/domain/topology/oberservability/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod monitoring; diff --git a/harmony/src/domain/topology/oberservability/monitoring.rs b/harmony/src/domain/topology/oberservability/monitoring.rs deleted file mode 100644 index 78bb1419..00000000 --- a/harmony/src/domain/topology/oberservability/monitoring.rs +++ /dev/null @@ -1,101 +0,0 @@ -use std::{any::Any, collections::HashMap}; - -use async_trait::async_trait; -use kube::api::DynamicObject; -use log::debug; - -use crate::{ - data::Version, - interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, - inventory::Inventory, - topology::{Topology, installable::Installable}, -}; -use harmony_types::id::Id; - -#[async_trait] -pub trait AlertSender: Send + Sync + std::fmt::Debug { - fn name(&self) -> String; -} - -#[derive(Debug)] -pub struct AlertingInterpret { - pub sender: S, - pub receivers: Vec>>, - pub rules: Vec>>, - pub scrape_targets: Option>>>, -} - -#[async_trait] -impl, T: Topology> Interpret for AlertingInterpret { - async fn execute( - &self, - inventory: &Inventory, - topology: &T, - ) -> Result { - debug!("hit sender configure for AlertingInterpret"); - self.sender.configure(inventory, topology).await?; - for receiver in self.receivers.iter() { - receiver.install(&self.sender).await?; - } - for rule in self.rules.iter() { - debug!("installing rule: {:#?}", rule); - rule.install(&self.sender).await?; - } - if let Some(targets) = &self.scrape_targets { - for target in targets.iter() { - debug!("installing scrape_target: {:#?}", target); - target.install(&self.sender).await?; - } - } - self.sender.ensure_installed(inventory, topology).await?; - Ok(Outcome::success(format!( - "successfully installed alert sender {}", - self.sender.name() - ))) - } - - fn get_name(&self) -> InterpretName { - InterpretName::Alerting - } - - fn get_version(&self) -> Version { - todo!() - } - - fn get_status(&self) -> InterpretStatus { - todo!() - } - - fn get_children(&self) -> Vec { - todo!() - } -} - -#[async_trait] -pub trait AlertReceiver: std::fmt::Debug + Send + Sync { - async fn install(&self, sender: &S) -> Result; - fn name(&self) -> String; - fn clone_box(&self) -> Box>; - fn as_any(&self) -> &dyn Any; - fn as_alertmanager_receiver(&self) -> Result; -} - -#[derive(Debug)] -pub struct AlertManagerReceiver { - pub receiver_config: serde_json::Value, - // FIXME we should not leak k8s here. DynamicObject is k8s specific - pub additional_ressources: Vec, - pub route_config: serde_json::Value, -} - -#[async_trait] -pub trait AlertRule: std::fmt::Debug + Send + Sync { - async fn install(&self, sender: &S) -> Result; - fn clone_box(&self) -> Box>; -} - -#[async_trait] -pub trait ScrapeTarget: std::fmt::Debug + Send + Sync { - async fn install(&self, sender: &S) -> Result; - fn clone_box(&self) -> Box>; -} diff --git a/harmony/src/modules/application/features/monitoring.rs b/harmony/src/modules/application/features/monitoring.rs index 6f5bb214..28186e4f 100644 --- a/harmony/src/modules/application/features/monitoring.rs +++ b/harmony/src/modules/application/features/monitoring.rs @@ -2,13 +2,15 @@ use crate::modules::application::{ Application, ApplicationFeature, InstallationError, InstallationOutcome, }; use crate::modules::monitoring::application_monitoring::application_monitoring_score::ApplicationMonitoringScore; -use crate::modules::monitoring::grafana::grafana::Grafana; -use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus; use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{ ServiceMonitor, ServiceMonitorSpec, }; +use crate::modules::monitoring::prometheus::Prometheus; +use crate::modules::monitoring::prometheus::helm::prometheus_config::PrometheusConfig; use crate::topology::MultiTargetTopology; use crate::topology::ingress::Ingress; +use crate::topology::monitoring::AlertReceiver; +use crate::topology::monitoring::Observability; use crate::{ inventory::Inventory, modules::monitoring::{ @@ -17,10 +19,6 @@ use crate::{ score::Score, topology::{HelmCommand, K8sclient, Topology, tenant::TenantManager}, }; -use crate::{ - modules::prometheus::prometheus::PrometheusMonitoring, - topology::oberservability::monitoring::AlertReceiver, -}; use async_trait::async_trait; use base64::{Engine as _, engine::general_purpose}; use harmony_secret::SecretManager; @@ -30,12 +28,12 @@ use kube::api::ObjectMeta; use log::{debug, info}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; #[derive(Debug, Clone)] pub struct Monitoring { pub application: Arc, - pub alert_receiver: Vec>>, + pub alert_receiver: Vec>>, } #[async_trait] @@ -46,8 +44,7 @@ impl< + TenantManager + K8sclient + MultiTargetTopology - + PrometheusMonitoring - + Grafana + + Observability + Ingress + std::fmt::Debug, > ApplicationFeature for Monitoring @@ -74,10 +71,8 @@ impl< }; let mut alerting_score = ApplicationMonitoringScore { - sender: CRDPrometheus { - namespace: namespace.clone(), - client: topology.k8s_client().await.unwrap(), - service_monitor: vec![app_service_monitor], + sender: Prometheus { + config: Arc::new(Mutex::new(PrometheusConfig::new())), }, application: self.application.clone(), receivers: self.alert_receiver.clone(), @@ -119,11 +114,12 @@ impl< ), }; - alerting_score.receivers.push(Box::new(ntfy_receiver)); - alerting_score - .interpret(&Inventory::empty(), topology) - .await - .map_err(|e| e.to_string())?; + todo!(); + // alerting_score.receivers.push(Box::new(ntfy_receiver)); + // alerting_score + // .interpret(&Inventory::empty(), topology) + // .await + // .map_err(|e| e.to_string())?; Ok(InstallationOutcome::success()) } diff --git a/harmony/src/modules/application/features/rhob_monitoring.rs b/harmony/src/modules/application/features/rhob_monitoring.rs index 60c7aca9..02150ef0 100644 --- a/harmony/src/modules/application/features/rhob_monitoring.rs +++ b/harmony/src/modules/application/features/rhob_monitoring.rs @@ -3,11 +3,13 @@ use std::sync::Arc; use crate::modules::application::{ Application, ApplicationFeature, InstallationError, InstallationOutcome, }; -use crate::modules::monitoring::application_monitoring::rhobs_application_monitoring_score::ApplicationRHOBMonitoringScore; -use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability; +use crate::modules::monitoring::red_hat_cluster_observability::RedHatClusterObservability; +use crate::modules::monitoring::red_hat_cluster_observability::redhat_cluster_observability::RedHatClusterObservabilityScore; use crate::topology::MultiTargetTopology; use crate::topology::ingress::Ingress; +use crate::topology::monitoring::AlertReceiver; +use crate::topology::monitoring::Observability; use crate::{ inventory::Inventory, modules::monitoring::{ @@ -16,10 +18,6 @@ use crate::{ score::Score, topology::{HelmCommand, K8sclient, Topology, tenant::TenantManager}, }; -use crate::{ - modules::prometheus::prometheus::PrometheusMonitoring, - topology::oberservability::monitoring::AlertReceiver, -}; use async_trait::async_trait; use base64::{Engine as _, engine::general_purpose}; use harmony_types::net::Url; @@ -28,9 +26,10 @@ use log::{debug, info}; #[derive(Debug, Clone)] pub struct Monitoring { pub application: Arc, - pub alert_receiver: Vec>>, + pub alert_receiver: Vec>>, } +///TODO TEST this #[async_trait] impl< T: Topology @@ -41,7 +40,7 @@ impl< + MultiTargetTopology + Ingress + std::fmt::Debug - + PrometheusMonitoring, + + Observability, > ApplicationFeature for Monitoring { async fn ensure_installed( @@ -55,13 +54,14 @@ impl< .map(|ns| ns.name.clone()) .unwrap_or_else(|| self.application.name()); - let mut alerting_score = ApplicationRHOBMonitoringScore { - sender: RHOBObservability { + let mut alerting_score = RedHatClusterObservabilityScore { + sender: RedHatClusterObservability { namespace: namespace.clone(), - client: topology.k8s_client().await.unwrap(), + resource_selector: todo!(), }, - application: self.application.clone(), receivers: self.alert_receiver.clone(), + rules: vec![], + scrape_targets: None, }; let domain = topology .get_domain("ntfy") diff --git a/harmony/src/modules/k8s/resource.rs b/harmony/src/modules/k8s/resource.rs index dde73390..9bbcd78b 100644 --- a/harmony/src/modules/k8s/resource.rs +++ b/harmony/src/modules/k8s/resource.rs @@ -9,7 +9,7 @@ use crate::{ interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, inventory::Inventory, score::Score, - topology::{K8sclient, Topology}, + topology::{K8sclient, Topology, k8s::ApplyStrategy}, }; use harmony_types::id::Id; @@ -29,7 +29,7 @@ impl K8sResourceScore { } impl< - K: Resource + K: Resource + std::fmt::Debug + Sync + DeserializeOwned @@ -42,6 +42,7 @@ impl< > Score for K8sResourceScore where ::DynamicType: Default, + ::Scope: ApplyStrategy, { fn create_interpret(&self) -> Box> { Box::new(K8sResourceInterpret { @@ -61,7 +62,7 @@ pub struct K8sResourceInterpret { #[async_trait] impl< - K: Resource + K: Resource + Clone + std::fmt::Debug + DeserializeOwned @@ -73,6 +74,7 @@ impl< > Interpret for K8sResourceInterpret where ::DynamicType: Default, + ::Scope: ApplyStrategy, { async fn execute( &self, diff --git a/harmony/src/modules/mod.rs b/harmony/src/modules/mod.rs index 3fa69469..21f2295c 100644 --- a/harmony/src/modules/mod.rs +++ b/harmony/src/modules/mod.rs @@ -18,7 +18,6 @@ pub mod network; pub mod okd; pub mod opnsense; pub mod postgresql; -pub mod prometheus; pub mod storage; pub mod tenant; pub mod tftp; diff --git a/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs b/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs index 94627896..ee3b2387 100644 --- a/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs +++ b/harmony/src/modules/monitoring/alert_channel/discord_alert_channel.rs @@ -1,98 +1,54 @@ -use std::any::Any; -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; -use async_trait::async_trait; -use harmony_types::k8s_name::K8sName; +use crate::modules::monitoring::kube_prometheus::KubePrometheus; +use crate::modules::monitoring::okd::OpenshiftClusterAlertSender; +use crate::modules::monitoring::red_hat_cluster_observability::RedHatClusterObservability; +use crate::topology::monitoring::{AlertRoute, MatchOp}; +use crate::{interpret::InterpretError, topology::monitoring::AlertReceiver}; +use harmony_types::net::Url; use k8s_openapi::api::core::v1::Secret; -use kube::Resource; -use kube::api::{DynamicObject, ObjectMeta}; -use log::{debug, trace}; use serde::Serialize; use serde_json::json; -use serde_yaml::{Mapping, Value}; - -use crate::infra::kube::kube_resource_to_dynamic; -use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{ - AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus, -}; -use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability; -use crate::modules::monitoring::okd::OpenshiftClusterAlertSender; -use crate::topology::oberservability::monitoring::AlertManagerReceiver; -use crate::{ - interpret::{InterpretError, Outcome}, - modules::monitoring::{ - kube_prometheus::{ - prometheus::{KubePrometheus, KubePrometheusReceiver}, - types::{AlertChannelConfig, AlertManagerChannelConfig}, - }, - prometheus::prometheus::{Prometheus, PrometheusReceiver}, - }, - topology::oberservability::monitoring::AlertReceiver, -}; -use harmony_types::net::Url; #[derive(Debug, Clone, Serialize)] -pub struct DiscordWebhook { - pub name: K8sName, +pub struct DiscordReceiver { + pub name: String, pub url: Url, - pub selectors: Vec>, + pub route: AlertRoute, } -impl DiscordWebhook { - fn get_receiver_config(&self) -> Result { - let secret_name = format!("{}-secret", self.name.clone()); - let webhook_key = format!("{}", self.url.clone()); +impl AlertReceiver for DiscordReceiver { + fn build_route(&self) -> Result { + let matchers: Vec = self + .route + .matchers + .iter() + .map(|m| match m.operator { + MatchOp::Eq => format!("{} = {}", m.label, m.value), + MatchOp::NotEq => format!("{} != {}", m.label, m.value), + MatchOp::Regex => format!("{} =~ {}", m.label, m.value), + }) + .collect(); - let mut string_data = BTreeMap::new(); - string_data.insert("webhook-url".to_string(), webhook_key.clone()); - - let secret = Secret { - metadata: kube::core::ObjectMeta { - name: Some(secret_name.clone()), - ..Default::default() - }, - string_data: Some(string_data), - type_: Some("Opaque".to_string()), - ..Default::default() - }; - - let mut matchers: Vec = Vec::new(); - for selector in &self.selectors { - trace!("selector: {:#?}", selector); - for (k, v) in selector { - matchers.push(format!("{} = {}", k, v)); - } - } - - Ok(AlertManagerReceiver { - additional_ressources: vec![kube_resource_to_dynamic(&secret)?], - - receiver_config: json!({ - "name": self.name, - "discord_configs": [ - { - "webhook_url": self.url.clone(), - "title": "{{ template \"discord.default.title\" . }}", - "message": "{{ template \"discord.default.message\" . }}" - } - ] - }), - route_config: json!({ - "receiver": self.name, - "matchers": matchers, - - }), - }) + let route_block = serde_yaml::to_value(json!({ + "receiver": self.name, + "matchers": matchers, + })) + .unwrap(); + Ok(route_block) } -} -#[async_trait] -impl AlertReceiver for DiscordWebhook { - async fn install( - &self, - sender: &OpenshiftClusterAlertSender, - ) -> Result { - todo!() + fn build_receiver(&self) -> Result { + let receiver_block = serde_yaml::to_value(json!({ + "name": self.name, + "discord_configs": [{ + "webhook_url": format!("{}", self.url), + "title": "{{ template \"discord.default.title\" . }}", + "message": "{{ template \"discord.default.message\" . }}" + }] + })) + .map_err(|e| InterpretError::new(e.to_string()))?; + Ok(receiver_block) } fn name(&self) -> String { @@ -102,93 +58,16 @@ impl AlertReceiver for DiscordWebhook { fn clone_box(&self) -> Box> { Box::new(self.clone()) } - - fn as_any(&self) -> &dyn Any { - todo!() - } - - fn as_alertmanager_receiver(&self) -> Result { - self.get_receiver_config() - } } -#[async_trait] -impl AlertReceiver for DiscordWebhook { - fn as_alertmanager_receiver(&self) -> Result { - todo!() +impl AlertReceiver for DiscordReceiver { + fn build_route(&self) -> Result { + serde_yaml::to_value(&self.route).map_err(|e| InterpretError::new(e.to_string())) } - async fn install(&self, sender: &RHOBObservability) -> Result { - let ns = sender.namespace.clone(); - - let config = self.get_receiver_config()?; - for resource in config.additional_ressources.iter() { - todo!("can I apply a dynamicresource"); - // sender.client.apply(resource, Some(&ns)).await; - } - - let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec { - data: json!({ - "route": { - "receiver": self.name, - }, - "receivers": [ - config.receiver_config - ] - }), - }; - - let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig { - metadata: ObjectMeta { - name: Some(self.name.clone().to_string()), - labels: Some(std::collections::BTreeMap::from([( - "alertmanagerConfig".to_string(), - "enabled".to_string(), - )])), - namespace: Some(sender.namespace.clone()), - ..Default::default() - }, - spec, - }; - debug!( - "alertmanager_configs yaml:\n{:#?}", - serde_yaml::to_string(&alertmanager_configs) - ); - debug!( - "alert manager configs: \n{:#?}", - alertmanager_configs.clone() - ); - - sender - .client - .apply(&alertmanager_configs, Some(&sender.namespace)) - .await?; - Ok(Outcome::success(format!( - "installed rhob-alertmanagerconfigs for {}", - self.name - ))) - } - - fn name(&self) -> String { - "webhook-receiver".to_string() - } - - fn clone_box(&self) -> Box> { - Box::new(self.clone()) - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -#[async_trait] -impl AlertReceiver for DiscordWebhook { - fn as_alertmanager_receiver(&self) -> Result { - todo!() - } - async fn install(&self, sender: &CRDPrometheus) -> Result { - let ns = sender.namespace.clone(); + fn build_receiver(&self) -> Result { + //FIXME this secret needs to be applied so that the discord Configs for RedHatCO + //CRD AlertmanagerConfigs can access the URL let secret_name = format!("{}-secret", self.name.clone()); let webhook_key = format!("{}", self.url.clone()); @@ -205,206 +84,54 @@ impl AlertReceiver for DiscordWebhook { ..Default::default() }; - let _ = sender.client.apply(&secret, Some(&ns)).await; - - let spec = AlertmanagerConfigSpec { - data: json!({ - "route": { - "receiver": self.name, - }, - "receivers": [ - { - "name": self.name, - "discordConfigs": [ - { - "apiURL": { - "name": secret_name, - "key": "webhook-url", - }, - "title": "{{ template \"discord.default.title\" . }}", - "message": "{{ template \"discord.default.message\" . }}" - } - ] - } - ] - }), - }; - - let alertmanager_configs = AlertmanagerConfig { - metadata: ObjectMeta { - name: Some(self.name.clone().to_string()), - labels: Some(std::collections::BTreeMap::from([( - "alertmanagerConfig".to_string(), - "enabled".to_string(), - )])), - namespace: Some(ns), - ..Default::default() - }, - spec, - }; - - sender - .client - .apply(&alertmanager_configs, Some(&sender.namespace)) - .await?; - Ok(Outcome::success(format!( - "installed crd-alertmanagerconfigs for {}", - self.name - ))) + let receiver_config = json!({ + "name": self.name, + "discordConfigs": [ + { + "apiURL": { + "key": "webhook-url", + "name": format!("{}-secret", self.name) + }, + "title": "{{ template \"discord.default.title\" . }}", + "message": "{{ template \"discord.default.message\" . }}" + } + ] + }); + serde_yaml::to_value(receiver_config).map_err(|e| InterpretError::new(e.to_string())) } + fn name(&self) -> String { - "discord-webhook".to_string() + self.name.clone() } - fn clone_box(&self) -> Box> { + + fn clone_box(&self) -> Box> { Box::new(self.clone()) } - fn as_any(&self) -> &dyn Any { - self - } } -#[async_trait] -impl AlertReceiver for DiscordWebhook { - fn as_alertmanager_receiver(&self) -> Result { - todo!() +impl AlertReceiver for DiscordReceiver { + fn build_route(&self) -> Result { + serde_yaml::to_value(self.route.clone()).map_err(|e| InterpretError::new(e.to_string())) } - async fn install(&self, sender: &Prometheus) -> Result { - sender.install_receiver(self).await + + fn build_receiver(&self) -> Result { + let receiver_block = serde_yaml::to_value(json!({ + "name": self.name, + "discord_configs": [{ + "webhook_url": format!("{}", self.url), + "title": "{{ template \"discord.default.title\" . }}", + "message": "{{ template \"discord.default.message\" . }}" + }] + })) + .map_err(|e| InterpretError::new(e.to_string()))?; + Ok(receiver_block) } + fn name(&self) -> String { - "discord-webhook".to_string() + self.name.clone() } - fn clone_box(&self) -> Box> { - Box::new(self.clone()) - } - fn as_any(&self) -> &dyn Any { - self - } -} -#[async_trait] -impl PrometheusReceiver for DiscordWebhook { - fn name(&self) -> String { - self.name.clone().to_string() - } - async fn configure_receiver(&self) -> AlertManagerChannelConfig { - self.get_config().await - } -} - -#[async_trait] -impl AlertReceiver for DiscordWebhook { - fn as_alertmanager_receiver(&self) -> Result { - todo!() - } - async fn install(&self, sender: &KubePrometheus) -> Result { - sender.install_receiver(self).await - } fn clone_box(&self) -> Box> { Box::new(self.clone()) } - fn name(&self) -> String { - "discord-webhook".to_string() - } - fn as_any(&self) -> &dyn Any { - self - } -} - -#[async_trait] -impl KubePrometheusReceiver for DiscordWebhook { - fn name(&self) -> String { - self.name.clone().to_string() - } - async fn configure_receiver(&self) -> AlertManagerChannelConfig { - self.get_config().await - } -} - -#[async_trait] -impl AlertChannelConfig for DiscordWebhook { - async fn get_config(&self) -> AlertManagerChannelConfig { - let channel_global_config = None; - let channel_receiver = self.alert_channel_receiver().await; - let channel_route = self.alert_channel_route().await; - - AlertManagerChannelConfig { - channel_global_config, - channel_receiver, - channel_route, - } - } -} - -impl DiscordWebhook { - async fn alert_channel_route(&self) -> serde_yaml::Value { - let mut route = Mapping::new(); - route.insert( - Value::String("receiver".to_string()), - Value::String(self.name.clone().to_string()), - ); - route.insert( - Value::String("matchers".to_string()), - Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]), - ); - route.insert(Value::String("continue".to_string()), Value::Bool(true)); - Value::Mapping(route) - } - - async fn alert_channel_receiver(&self) -> serde_yaml::Value { - let mut receiver = Mapping::new(); - receiver.insert( - Value::String("name".to_string()), - Value::String(self.name.clone().to_string()), - ); - - let mut discord_config = Mapping::new(); - discord_config.insert( - Value::String("webhook_url".to_string()), - Value::String(self.url.to_string()), - ); - - receiver.insert( - Value::String("discord_configs".to_string()), - Value::Sequence(vec![Value::Mapping(discord_config)]), - ); - - Value::Mapping(receiver) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn discord_serialize_should_match() { - let discord_receiver = DiscordWebhook { - name: K8sName("test-discord".to_string()), - url: Url::Url(url::Url::parse("https://discord.i.dont.exist.com").unwrap()), - selectors: vec![], - }; - - let discord_receiver_receiver = - serde_yaml::to_string(&discord_receiver.alert_channel_receiver().await).unwrap(); - println!("receiver \n{:#}", discord_receiver_receiver); - let discord_receiver_receiver_yaml = r#"name: test-discord -discord_configs: -- webhook_url: https://discord.i.dont.exist.com/ -"# - .to_string(); - - let discord_receiver_route = - serde_yaml::to_string(&discord_receiver.alert_channel_route().await).unwrap(); - println!("route \n{:#}", discord_receiver_route); - let discord_receiver_route_yaml = r#"receiver: test-discord -matchers: -- alertname!=Watchdog -continue: true -"# - .to_string(); - - assert_eq!(discord_receiver_receiver, discord_receiver_receiver_yaml); - assert_eq!(discord_receiver_route, discord_receiver_route_yaml); - } } diff --git a/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs b/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs index a141df02..24a20496 100644 --- a/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs +++ b/harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs @@ -1,25 +1,13 @@ -use std::any::Any; - -use async_trait::async_trait; -use kube::api::ObjectMeta; -use log::debug; use serde::Serialize; use serde_json::json; -use serde_yaml::{Mapping, Value}; use crate::{ - interpret::{InterpretError, Outcome}, + interpret::InterpretError, modules::monitoring::{ - kube_prometheus::{ - crd::{ - crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability, - }, - prometheus::{KubePrometheus, KubePrometheusReceiver}, - types::{AlertChannelConfig, AlertManagerChannelConfig}, - }, - prometheus::prometheus::{Prometheus, PrometheusReceiver}, + kube_prometheus::KubePrometheus, okd::OpenshiftClusterAlertSender, prometheus::Prometheus, + red_hat_cluster_observability::RedHatClusterObservability, }, - topology::oberservability::monitoring::{AlertManagerReceiver, AlertReceiver}, + topology::monitoring::AlertReceiver, }; use harmony_types::net::Url; @@ -29,279 +17,104 @@ pub struct WebhookReceiver { pub url: Url, } -#[async_trait] -impl AlertReceiver for WebhookReceiver { - fn as_alertmanager_receiver(&self) -> Result { - todo!() - } - async fn install(&self, sender: &RHOBObservability) -> Result { - let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec { - data: json!({ - "route": { - "receiver": self.name, - }, - "receivers": [ - { - "name": self.name, - "webhookConfigs": [ - { - "url": self.url, - "httpConfig": { - "tlsConfig": { - "insecureSkipVerify": true - } - } - } - ] +impl WebhookReceiver { + fn build_receiver(&self) -> serde_json::Value { + json!({ + "name": self.name, + "webhookConfigs": [ + { + "url": self.url, + "httpConfig": { + "tlsConfig": { + "insecureSkipVerify": true } - ] - }), - }; - - let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig { - metadata: ObjectMeta { - name: Some(self.name.clone()), - labels: Some(std::collections::BTreeMap::from([( - "alertmanagerConfig".to_string(), - "enabled".to_string(), - )])), - namespace: Some(sender.namespace.clone()), - ..Default::default() - }, - spec, - }; - debug!( - "alert manager configs: \n{:#?}", - alertmanager_configs.clone() - ); - - sender - .client - .apply(&alertmanager_configs, Some(&sender.namespace)) - .await?; - Ok(Outcome::success(format!( - "installed rhob-alertmanagerconfigs for {}", - self.name - ))) + } + } + ]}) } - fn name(&self) -> String { - "webhook-receiver".to_string() - } - - fn clone_box(&self) -> Box> { - Box::new(self.clone()) - } - - fn as_any(&self) -> &dyn Any { - self + fn build_route(&self) -> serde_json::Value { + json!({ + "name": self.name}) } } -#[async_trait] -impl AlertReceiver for WebhookReceiver { - fn as_alertmanager_receiver(&self) -> Result { - todo!() - } - async fn install(&self, sender: &CRDPrometheus) -> Result { - let spec = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfigSpec { - data: json!({ - "route": { - "receiver": self.name, - }, - "receivers": [ - { - "name": self.name, - "webhookConfigs": [ - { - "url": self.url, - } - ] - } - ] - }), - }; - - let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfig { - metadata: ObjectMeta { - name: Some(self.name.clone()), - labels: Some(std::collections::BTreeMap::from([( - "alertmanagerConfig".to_string(), - "enabled".to_string(), - )])), - namespace: Some(sender.namespace.clone()), - ..Default::default() - }, - spec, - }; - debug!( - "alert manager configs: \n{:#?}", - alertmanager_configs.clone() - ); - - sender - .client - .apply(&alertmanager_configs, Some(&sender.namespace)) - .await?; - Ok(Outcome::success(format!( - "installed crd-alertmanagerconfigs for {}", - self.name - ))) +impl AlertReceiver for WebhookReceiver { + fn build_receiver(&self) -> Result { + let receiver = self.build_receiver(); + serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string())) } - fn name(&self) -> String { - "webhook-receiver".to_string() + fn build_route(&self) -> Result { + let route = self.build_route(); + serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string())) } - fn clone_box(&self) -> Box> { - Box::new(self.clone()) - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -#[async_trait] -impl AlertReceiver for WebhookReceiver { - fn as_alertmanager_receiver(&self) -> Result { - todo!() - } - async fn install(&self, sender: &Prometheus) -> Result { - sender.install_receiver(self).await - } - fn name(&self) -> String { - "webhook-receiver".to_string() - } - fn clone_box(&self) -> Box> { - Box::new(self.clone()) - } - fn as_any(&self) -> &dyn Any { - self - } -} - -#[async_trait] -impl PrometheusReceiver for WebhookReceiver { fn name(&self) -> String { self.name.clone() } - async fn configure_receiver(&self) -> AlertManagerChannelConfig { - self.get_config().await + + fn clone_box(&self) -> Box> { + Box::new(self.clone()) } } -#[async_trait] -impl AlertReceiver for WebhookReceiver { - fn as_alertmanager_receiver(&self) -> Result { - todo!() +impl AlertReceiver for WebhookReceiver { + fn build_receiver(&self) -> Result { + let receiver = self.build_receiver(); + serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string())) } - async fn install(&self, sender: &KubePrometheus) -> Result { - sender.install_receiver(self).await + + fn build_route(&self) -> Result { + let route = self.build_route(); + serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string())) } + fn name(&self) -> String { - "webhook-receiver".to_string() + self.name.clone() } + + fn clone_box(&self) -> Box> { + Box::new(self.clone()) + } +} + +impl AlertReceiver for WebhookReceiver { + fn build_receiver(&self) -> Result { + let receiver = self.build_receiver(); + serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string())) + } + + fn build_route(&self) -> Result { + let route = self.build_route(); + serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string())) + } + + fn name(&self) -> String { + self.name.clone() + } + fn clone_box(&self) -> Box> { Box::new(self.clone()) } - fn as_any(&self) -> &dyn Any { - self - } } -#[async_trait] -impl KubePrometheusReceiver for WebhookReceiver { +impl AlertReceiver for WebhookReceiver { + fn build_receiver(&self) -> Result { + let receiver = self.build_receiver(); + serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string())) + } + + fn build_route(&self) -> Result { + let route = self.build_route(); + serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string())) + } + fn name(&self) -> String { self.name.clone() } - async fn configure_receiver(&self) -> AlertManagerChannelConfig { - self.get_config().await - } -} - -#[async_trait] -impl AlertChannelConfig for WebhookReceiver { - async fn get_config(&self) -> AlertManagerChannelConfig { - let channel_global_config = None; - let channel_receiver = self.alert_channel_receiver().await; - let channel_route = self.alert_channel_route().await; - - AlertManagerChannelConfig { - channel_global_config, - channel_receiver, - channel_route, - } - } -} - -impl WebhookReceiver { - async fn alert_channel_route(&self) -> serde_yaml::Value { - let mut route = Mapping::new(); - route.insert( - Value::String("receiver".to_string()), - Value::String(self.name.clone()), - ); - route.insert( - Value::String("matchers".to_string()), - Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]), - ); - route.insert(Value::String("continue".to_string()), Value::Bool(true)); - Value::Mapping(route) - } - - async fn alert_channel_receiver(&self) -> serde_yaml::Value { - let mut receiver = Mapping::new(); - receiver.insert( - Value::String("name".to_string()), - Value::String(self.name.clone()), - ); - - let mut webhook_config = Mapping::new(); - webhook_config.insert( - Value::String("url".to_string()), - Value::String(self.url.to_string()), - ); - - receiver.insert( - Value::String("webhook_configs".to_string()), - Value::Sequence(vec![Value::Mapping(webhook_config)]), - ); - - Value::Mapping(receiver) - } -} - -#[cfg(test)] -mod tests { - use super::*; - #[tokio::test] - async fn webhook_serialize_should_match() { - let webhook_receiver = WebhookReceiver { - name: "test-webhook".to_string(), - url: Url::Url(url::Url::parse("https://webhook.i.dont.exist.com").unwrap()), - }; - - let webhook_receiver_receiver = - serde_yaml::to_string(&webhook_receiver.alert_channel_receiver().await).unwrap(); - println!("receiver \n{:#}", webhook_receiver_receiver); - let webhook_receiver_receiver_yaml = r#"name: test-webhook -webhook_configs: -- url: https://webhook.i.dont.exist.com/ -"# - .to_string(); - - let webhook_receiver_route = - serde_yaml::to_string(&webhook_receiver.alert_channel_route().await).unwrap(); - println!("route \n{:#}", webhook_receiver_route); - let webhook_receiver_route_yaml = r#"receiver: test-webhook -matchers: -- alertname!=Watchdog -continue: true -"# - .to_string(); - - assert_eq!(webhook_receiver_receiver, webhook_receiver_receiver_yaml); - assert_eq!(webhook_receiver_route, webhook_receiver_route_yaml); + + fn clone_box(&self) -> Box> { + Box::new(self.clone()) } } diff --git a/harmony/src/modules/prometheus/alerts/infra/dell_server.rs b/harmony/src/modules/monitoring/alert_rule/alerts/infra/dell_server.rs similarity index 100% rename from harmony/src/modules/prometheus/alerts/infra/dell_server.rs rename to harmony/src/modules/monitoring/alert_rule/alerts/infra/dell_server.rs diff --git a/harmony/src/modules/prometheus/alerts/infra/mod.rs b/harmony/src/modules/monitoring/alert_rule/alerts/infra/mod.rs similarity index 53% rename from harmony/src/modules/prometheus/alerts/infra/mod.rs rename to harmony/src/modules/monitoring/alert_rule/alerts/infra/mod.rs index 47fbc2f1..2b11bded 100644 --- a/harmony/src/modules/prometheus/alerts/infra/mod.rs +++ b/harmony/src/modules/monitoring/alert_rule/alerts/infra/mod.rs @@ -1 +1,2 @@ pub mod dell_server; +pub mod opnsense; diff --git a/harmony/src/modules/monitoring/alert_rule/alerts/infra/opnsense.rs b/harmony/src/modules/monitoring/alert_rule/alerts/infra/opnsense.rs new file mode 100644 index 00000000..c163a76d --- /dev/null +++ b/harmony/src/modules/monitoring/alert_rule/alerts/infra/opnsense.rs @@ -0,0 +1,15 @@ +use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; + +pub fn high_http_error_rate() -> PrometheusAlertRule { + let expression = r#"( + sum(rate(http_requests_total{status=~"5.."}[5m])) by (job, route, service) + / + sum(rate(http_requests_total[5m])) by (job, route, service) +) > 0.05 and sum(rate(http_requests_total[5m])) by (job, route, service) > 10"#; + + PrometheusAlertRule::new("HighApplicationErrorRate", expression) + .for_duration("10m") + .label("severity", "warning") + .annotation("summary", "High HTTP error rate on {{ $labels.job }}") + .annotation("description", "Job {{ $labels.job }} (route {{ $labels.route }}) has an error rate > 5% over the last 10m.") +} diff --git a/harmony/src/modules/prometheus/alerts/k8s/deployment.rs b/harmony/src/modules/monitoring/alert_rule/alerts/k8s/deployment.rs similarity index 100% rename from harmony/src/modules/prometheus/alerts/k8s/deployment.rs rename to harmony/src/modules/monitoring/alert_rule/alerts/k8s/deployment.rs diff --git a/harmony/src/modules/prometheus/alerts/k8s/memory_usage.rs b/harmony/src/modules/monitoring/alert_rule/alerts/k8s/memory_usage.rs similarity index 100% rename from harmony/src/modules/prometheus/alerts/k8s/memory_usage.rs rename to harmony/src/modules/monitoring/alert_rule/alerts/k8s/memory_usage.rs diff --git a/harmony/src/modules/prometheus/alerts/k8s/mod.rs b/harmony/src/modules/monitoring/alert_rule/alerts/k8s/mod.rs similarity index 100% rename from harmony/src/modules/prometheus/alerts/k8s/mod.rs rename to harmony/src/modules/monitoring/alert_rule/alerts/k8s/mod.rs diff --git a/harmony/src/modules/prometheus/alerts/k8s/pod.rs b/harmony/src/modules/monitoring/alert_rule/alerts/k8s/pod.rs similarity index 100% rename from harmony/src/modules/prometheus/alerts/k8s/pod.rs rename to harmony/src/modules/monitoring/alert_rule/alerts/k8s/pod.rs diff --git a/harmony/src/modules/prometheus/alerts/k8s/pvc.rs b/harmony/src/modules/monitoring/alert_rule/alerts/k8s/pvc.rs similarity index 100% rename from harmony/src/modules/prometheus/alerts/k8s/pvc.rs rename to harmony/src/modules/monitoring/alert_rule/alerts/k8s/pvc.rs diff --git a/harmony/src/modules/prometheus/alerts/k8s/service.rs b/harmony/src/modules/monitoring/alert_rule/alerts/k8s/service.rs similarity index 100% rename from harmony/src/modules/prometheus/alerts/k8s/service.rs rename to harmony/src/modules/monitoring/alert_rule/alerts/k8s/service.rs diff --git a/harmony/src/modules/prometheus/alerts/mod.rs b/harmony/src/modules/monitoring/alert_rule/alerts/mod.rs similarity index 100% rename from harmony/src/modules/prometheus/alerts/mod.rs rename to harmony/src/modules/monitoring/alert_rule/alerts/mod.rs diff --git a/harmony/src/modules/monitoring/alert_rule/mod.rs b/harmony/src/modules/monitoring/alert_rule/mod.rs index 846c7696..cb7bba9e 100644 --- a/harmony/src/modules/monitoring/alert_rule/mod.rs +++ b/harmony/src/modules/monitoring/alert_rule/mod.rs @@ -1 +1,2 @@ +pub mod alerts; pub mod prometheus_alert_rule; diff --git a/harmony/src/modules/monitoring/alert_rule/prometheus_alert_rule.rs b/harmony/src/modules/monitoring/alert_rule/prometheus_alert_rule.rs index e001c68a..2cc7fceb 100644 --- a/harmony/src/modules/monitoring/alert_rule/prometheus_alert_rule.rs +++ b/harmony/src/modules/monitoring/alert_rule/prometheus_alert_rule.rs @@ -1,79 +1,13 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; -use async_trait::async_trait; use serde::Serialize; use crate::{ - interpret::{InterpretError, Outcome}, - modules::monitoring::{ - kube_prometheus::{ - prometheus::{KubePrometheus, KubePrometheusRule}, - types::{AlertGroup, AlertManagerAdditionalPromRules}, - }, - prometheus::prometheus::{Prometheus, PrometheusRule}, - }, - topology::oberservability::monitoring::AlertRule, + interpret::InterpretError, + modules::monitoring::{kube_prometheus::KubePrometheus, okd::OpenshiftClusterAlertSender}, + topology::monitoring::AlertRule, }; -#[async_trait] -impl AlertRule for AlertManagerRuleGroup { - async fn install(&self, sender: &KubePrometheus) -> Result { - sender.install_rule(self).await - } - fn clone_box(&self) -> Box> { - Box::new(self.clone()) - } -} - -#[async_trait] -impl AlertRule for AlertManagerRuleGroup { - async fn install(&self, sender: &Prometheus) -> Result { - sender.install_rule(self).await - } - fn clone_box(&self) -> Box> { - Box::new(self.clone()) - } -} - -#[async_trait] -impl PrometheusRule for AlertManagerRuleGroup { - fn name(&self) -> String { - self.name.clone() - } - async fn configure_rule(&self) -> AlertManagerAdditionalPromRules { - let mut additional_prom_rules = BTreeMap::new(); - - additional_prom_rules.insert( - self.name.clone(), - AlertGroup { - groups: vec![self.clone()], - }, - ); - AlertManagerAdditionalPromRules { - rules: additional_prom_rules, - } - } -} -#[async_trait] -impl KubePrometheusRule for AlertManagerRuleGroup { - fn name(&self) -> String { - self.name.clone() - } - async fn configure_rule(&self) -> AlertManagerAdditionalPromRules { - let mut additional_prom_rules = BTreeMap::new(); - - additional_prom_rules.insert( - self.name.clone(), - AlertGroup { - groups: vec![self.clone()], - }, - ); - AlertManagerAdditionalPromRules { - rules: additional_prom_rules, - } - } -} - impl AlertManagerRuleGroup { pub fn new(name: &str, rules: Vec) -> AlertManagerRuleGroup { AlertManagerRuleGroup { @@ -129,3 +63,55 @@ impl PrometheusAlertRule { self } } + +impl AlertRule for AlertManagerRuleGroup { + fn build_rule(&self) -> Result { + let name = self.name.clone(); + let mut rules: Vec = vec![]; + for rule in self.rules.clone() { + rules.push(rule.into()) + } + + let rule_groups = + vec![crate::modules::monitoring::okd::crd::alerting_rules::RuleGroup { name, rules }]; + + Ok(serde_json::to_value(rule_groups).map_err(|e| InterpretError::new(e.to_string()))?) + } + + fn name(&self) -> String { + self.name.clone() + } + + fn clone_box(&self) -> Box> { + Box::new(self.clone()) + } +} + +impl AlertRule for AlertManagerRuleGroup { + fn build_rule(&self) -> Result { + let name = self.name.clone(); + let mut rules: Vec< + crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::Rule, + > = vec![]; + for rule in self.rules.clone() { + rules.push(rule.into()) + } + + let rule_groups = vec![ + crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::RuleGroup { + name, + rules, + }, + ]; + + Ok(serde_json::to_value(rule_groups).map_err(|e| InterpretError::new(e.to_string()))?) + } + + fn name(&self) -> String { + self.name.clone() + } + + fn clone_box(&self) -> Box> { + Box::new(self.clone()) + } +} diff --git a/harmony/src/modules/monitoring/application_monitoring/application_monitoring_score.rs b/harmony/src/modules/monitoring/application_monitoring/application_monitoring_score.rs index 8f6b6245..b72e1f43 100644 --- a/harmony/src/modules/monitoring/application_monitoring/application_monitoring_score.rs +++ b/harmony/src/modules/monitoring/application_monitoring/application_monitoring_score.rs @@ -5,32 +5,26 @@ use serde::Serialize; use crate::{ interpret::Interpret, - modules::{ - application::Application, - monitoring::{ - grafana::grafana::Grafana, kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus, - }, - prometheus::prometheus::PrometheusMonitoring, - }, + modules::{application::Application, monitoring::prometheus::Prometheus}, score::Score, topology::{ K8sclient, Topology, - oberservability::monitoring::{AlertReceiver, AlertingInterpret, ScrapeTarget}, + monitoring::{AlertReceiver, AlertingInterpret, Observability, ScrapeTarget}, }, }; #[derive(Debug, Clone, Serialize)] pub struct ApplicationMonitoringScore { - pub sender: CRDPrometheus, + pub sender: Prometheus, pub application: Arc, - pub receivers: Vec>>, + pub receivers: Vec>>, } -impl + K8sclient + Grafana> Score - for ApplicationMonitoringScore -{ +impl + K8sclient> Score for ApplicationMonitoringScore { fn create_interpret(&self) -> Box> { debug!("creating alerting interpret"); + //TODO will need to use k8sclient to apply service monitors or find a way to pass + //them to the AlertingInterpret potentially via Sender Prometheus Box::new(AlertingInterpret { sender: self.sender.clone(), receivers: self.receivers.clone(), diff --git a/harmony/src/modules/monitoring/application_monitoring/rhobs_application_monitoring_score.rs b/harmony/src/modules/monitoring/application_monitoring/rhobs_application_monitoring_score.rs index 6f45c883..76aab06c 100644 --- a/harmony/src/modules/monitoring/application_monitoring/rhobs_application_monitoring_score.rs +++ b/harmony/src/modules/monitoring/application_monitoring/rhobs_application_monitoring_score.rs @@ -9,28 +9,27 @@ use crate::{ inventory::Inventory, modules::{ application::Application, - monitoring::kube_prometheus::crd::{ - crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability, - }, - prometheus::prometheus::PrometheusMonitoring, + monitoring::red_hat_cluster_observability::RedHatClusterObservability, }, score::Score, - topology::{PreparationOutcome, Topology, oberservability::monitoring::AlertReceiver}, + topology::{ + Topology, + monitoring::{AlertReceiver, AlertingInterpret, Observability}, + }, }; use harmony_types::id::Id; - #[derive(Debug, Clone, Serialize)] -pub struct ApplicationRHOBMonitoringScore { - pub sender: RHOBObservability, +pub struct ApplicationRedHatClusterMonitoringScore { + pub sender: RedHatClusterObservability, pub application: Arc, - pub receivers: Vec>>, + pub receivers: Vec>>, } -impl> Score - for ApplicationRHOBMonitoringScore +impl> Score + for ApplicationRedHatClusterMonitoringScore { fn create_interpret(&self) -> Box> { - Box::new(ApplicationRHOBMonitoringInterpret { + Box::new(ApplicationRedHatClusterMonitoringInterpret { score: self.clone(), }) } @@ -44,38 +43,28 @@ impl> Score } #[derive(Debug)] -pub struct ApplicationRHOBMonitoringInterpret { - score: ApplicationRHOBMonitoringScore, +pub struct ApplicationRedHatClusterMonitoringInterpret { + score: ApplicationRedHatClusterMonitoringScore, } #[async_trait] -impl> Interpret - for ApplicationRHOBMonitoringInterpret +impl> Interpret + for ApplicationRedHatClusterMonitoringInterpret { async fn execute( &self, inventory: &Inventory, topology: &T, ) -> Result { - let result = topology - .install_prometheus( - &self.score.sender, - inventory, - Some(self.score.receivers.clone()), - ) - .await; - - match result { - Ok(outcome) => match outcome { - PreparationOutcome::Success { details: _ } => { - Ok(Outcome::success("Prometheus installed".into())) - } - PreparationOutcome::Noop => { - Ok(Outcome::noop("Prometheus installation skipped".into())) - } - }, - Err(err) => Err(InterpretError::from(err)), - } + //TODO will need to use k8sclient to apply crd ServiceMonitor or find a way to pass + //them to the AlertingInterpret potentially via Sender RedHatClusterObservability + let alerting_interpret = AlertingInterpret { + sender: self.score.sender.clone(), + receivers: self.score.receivers.clone(), + rules: vec![], + scrape_targets: None, + }; + alerting_interpret.execute(inventory, topology).await } fn get_name(&self) -> InterpretName { diff --git a/harmony/src/modules/monitoring/grafana/grafana.rs b/harmony/src/modules/monitoring/grafana/grafana.rs index 5ab57c27..e28f61a3 100644 --- a/harmony/src/modules/monitoring/grafana/grafana.rs +++ b/harmony/src/modules/monitoring/grafana/grafana.rs @@ -1,17 +1,48 @@ use async_trait::async_trait; -use k8s_openapi::Resource; +use serde::Serialize; use crate::{ inventory::Inventory, - topology::{PreparationError, PreparationOutcome}, + topology::{ + PreparationError, PreparationOutcome, + monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget}, + }, }; -#[async_trait] -pub trait Grafana { - async fn ensure_grafana_operator( - &self, - inventory: &Inventory, - ) -> Result; - - async fn install_grafana(&self) -> Result; +#[derive(Debug, Clone, Serialize)] +pub struct Grafana { + pub namespace: String, +} + +impl AlertSender for Grafana { + fn name(&self) -> String { + "grafana".to_string() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } } diff --git a/harmony/src/modules/monitoring/grafana/grafana_alerting_score.rs b/harmony/src/modules/monitoring/grafana/grafana_alerting_score.rs new file mode 100644 index 00000000..b032ed89 --- /dev/null +++ b/harmony/src/modules/monitoring/grafana/grafana_alerting_score.rs @@ -0,0 +1,32 @@ +use serde::Serialize; + +use crate::{ + modules::monitoring::grafana::grafana::Grafana, + score::Score, + topology::{ + HelmCommand, Topology, + monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget}, + }, +}; + +#[derive(Clone, Debug, Serialize)] +pub struct GrafanaAlertingScore { + pub receivers: Vec>>, + pub rules: Vec>>, + pub scrape_targets: Option>>>, + pub sender: Grafana, +} + +impl> Score for GrafanaAlertingScore { + fn create_interpret(&self) -> Box> { + Box::new(AlertingInterpret { + sender: self.sender.clone(), + receivers: self.receivers.clone(), + rules: self.rules.clone(), + scrape_targets: self.scrape_targets.clone(), + }) + } + fn name(&self) -> String { + "HelmPrometheusAlertingScore".to_string() + } +} diff --git a/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs b/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs deleted file mode 100644 index c9ccacb0..00000000 --- a/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs +++ /dev/null @@ -1,28 +0,0 @@ -use harmony_macros::hurl; -use non_blank_string_rs::NonBlankString; -use std::{collections::HashMap, str::FromStr}; - -use crate::modules::helm::chart::{HelmChartScore, HelmRepository}; - -pub fn grafana_helm_chart_score(ns: &str, namespace_scope: bool) -> HelmChartScore { - let mut values_overrides = HashMap::new(); - values_overrides.insert( - NonBlankString::from_str("namespaceScope").unwrap(), - namespace_scope.to_string(), - ); - HelmChartScore { - namespace: Some(NonBlankString::from_str(ns).unwrap()), - release_name: NonBlankString::from_str("grafana-operator").unwrap(), - chart_name: NonBlankString::from_str("grafana/grafana-operator").unwrap(), - chart_version: None, - values_overrides: Some(values_overrides), - values_yaml: None, - create_namespace: true, - install_only: true, - repository: Some(HelmRepository::new( - "grafana".to_string(), - hurl!("https://grafana.github.io/helm-charts"), - true, - )), - } -} diff --git a/harmony/src/modules/monitoring/grafana/helm/mod.rs b/harmony/src/modules/monitoring/grafana/helm/mod.rs deleted file mode 100644 index 1d35fdf0..00000000 --- a/harmony/src/modules/monitoring/grafana/helm/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod helm_grafana; diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs b/harmony/src/modules/monitoring/grafana/k8s/crd/crd_grafana.rs similarity index 98% rename from harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs rename to harmony/src/modules/monitoring/grafana/k8s/crd/crd_grafana.rs index 386890e0..0f02a844 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs +++ b/harmony/src/modules/monitoring/grafana/k8s/crd/crd_grafana.rs @@ -4,7 +4,7 @@ use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use super::crd_prometheuses::LabelSelector; +use crate::modules::monitoring::kube_prometheus::crd::crd_prometheuses::LabelSelector; #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] #[kube( diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/grafana_default_dashboard.rs b/harmony/src/modules/monitoring/grafana/k8s/crd/grafana_default_dashboard.rs similarity index 100% rename from harmony/src/modules/monitoring/kube_prometheus/crd/grafana_default_dashboard.rs rename to harmony/src/modules/monitoring/grafana/k8s/crd/grafana_default_dashboard.rs diff --git a/harmony/src/modules/monitoring/grafana/k8s/crd/mod.rs b/harmony/src/modules/monitoring/grafana/k8s/crd/mod.rs new file mode 100644 index 00000000..bc9ac6de --- /dev/null +++ b/harmony/src/modules/monitoring/grafana/k8s/crd/mod.rs @@ -0,0 +1,3 @@ +pub mod crd_grafana; +pub mod grafana_default_dashboard; +pub mod rhob_grafana; diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_grafana.rs b/harmony/src/modules/monitoring/grafana/k8s/crd/rhob_grafana.rs similarity index 97% rename from harmony/src/modules/monitoring/kube_prometheus/crd/rhob_grafana.rs rename to harmony/src/modules/monitoring/grafana/k8s/crd/rhob_grafana.rs index 65efab9a..ce13f61d 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_grafana.rs +++ b/harmony/src/modules/monitoring/grafana/k8s/crd/rhob_grafana.rs @@ -4,7 +4,7 @@ use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector; +use crate::modules::monitoring::red_hat_cluster_observability::crd::rhob_prometheuses::LabelSelector; #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] #[kube( diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/grafana_operator.rs b/harmony/src/modules/monitoring/grafana/k8s/helm/grafana_operator.rs similarity index 100% rename from harmony/src/modules/monitoring/kube_prometheus/crd/grafana_operator.rs rename to harmony/src/modules/monitoring/grafana/k8s/helm/grafana_operator.rs diff --git a/harmony/src/modules/monitoring/grafana/k8s/helm/mod.rs b/harmony/src/modules/monitoring/grafana/k8s/helm/mod.rs new file mode 100644 index 00000000..97edab97 --- /dev/null +++ b/harmony/src/modules/monitoring/grafana/k8s/helm/mod.rs @@ -0,0 +1 @@ +pub mod grafana_operator; diff --git a/harmony/src/modules/monitoring/grafana/k8s/mod.rs b/harmony/src/modules/monitoring/grafana/k8s/mod.rs new file mode 100644 index 00000000..107a5bd7 --- /dev/null +++ b/harmony/src/modules/monitoring/grafana/k8s/mod.rs @@ -0,0 +1,7 @@ +pub mod crd; +pub mod helm; +pub mod score_ensure_grafana_ready; +pub mod score_grafana_alert_receiver; +pub mod score_grafana_datasource; +pub mod score_grafana_rule; +pub mod score_install_grafana; diff --git a/harmony/src/modules/monitoring/grafana/k8s/score_ensure_grafana_ready.rs b/harmony/src/modules/monitoring/grafana/k8s/score_ensure_grafana_ready.rs new file mode 100644 index 00000000..81c3d396 --- /dev/null +++ b/harmony/src/modules/monitoring/grafana/k8s/score_ensure_grafana_ready.rs @@ -0,0 +1,54 @@ +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::monitoring::grafana::grafana::Grafana, + score::Score, + topology::{K8sclient, Topology}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct GrafanaK8sEnsureReadyScore { + pub sender: Grafana, +} + +impl Score for GrafanaK8sEnsureReadyScore { + fn name(&self) -> String { + "GrafanaK8sEnsureReadyScore".to_string() + } + + fn create_interpret(&self) -> Box> { + todo!() + } +} +// async fn ensure_ready( +// &self, +// inventory: &Inventory, +// ) -> Result { +// debug!("ensure grafana operator"); +// let client = self.k8s_client().await.unwrap(); +// let grafana_gvk = GroupVersionKind { +// group: "grafana.integreatly.org".to_string(), +// version: "v1beta1".to_string(), +// kind: "Grafana".to_string(), +// }; +// let name = "grafanas.grafana.integreatly.org"; +// let ns = "grafana"; +// +// let grafana_crd = client +// .get_resource_json_value(name, Some(ns), &grafana_gvk) +// .await; +// match grafana_crd { +// Ok(_) => { +// return Ok(PreparationOutcome::Success { +// details: "Found grafana CRDs in cluster".to_string(), +// }); +// } +// +// Err(_) => { +// return self +// .install_grafana_operator(inventory, Some("grafana")) +// .await; +// } +// }; +// } diff --git a/harmony/src/modules/monitoring/grafana/k8s/score_grafana_alert_receiver.rs b/harmony/src/modules/monitoring/grafana/k8s/score_grafana_alert_receiver.rs new file mode 100644 index 00000000..9e4d15fa --- /dev/null +++ b/harmony/src/modules/monitoring/grafana/k8s/score_grafana_alert_receiver.rs @@ -0,0 +1,24 @@ +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::monitoring::grafana::grafana::Grafana, + score::Score, + topology::{K8sclient, Topology, monitoring::AlertReceiver}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct GrafanaK8sReceiverScore { + pub sender: Grafana, + pub receiver: Box>, +} + +impl Score for GrafanaK8sReceiverScore { + fn name(&self) -> String { + "GrafanaK8sReceiverScore".to_string() + } + + fn create_interpret(&self) -> Box> { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/grafana/k8s/score_grafana_datasource.rs b/harmony/src/modules/monitoring/grafana/k8s/score_grafana_datasource.rs new file mode 100644 index 00000000..71b723b1 --- /dev/null +++ b/harmony/src/modules/monitoring/grafana/k8s/score_grafana_datasource.rs @@ -0,0 +1,83 @@ +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::monitoring::grafana::grafana::Grafana, + score::Score, + topology::{K8sclient, Topology, monitoring::ScrapeTarget}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct GrafanaK8sDatasourceScore { + pub sender: Grafana, + pub scrape_target: Box>, +} + +impl Score for GrafanaK8sDatasourceScore { + fn name(&self) -> String { + "GrafanaK8sDatasourceScore".to_string() + } + + fn create_interpret(&self) -> Box> { + todo!() + } +} + +// fn extract_and_normalize_token(&self, secret: &DynamicObject) -> Option { +// let token_b64 = secret +// .data +// .get("token") +// .or_else(|| secret.data.get("data").and_then(|d| d.get("token"))) +// .and_then(|v| v.as_str())?; +// +// let bytes = general_purpose::STANDARD.decode(token_b64).ok()?; +// +// let s = String::from_utf8(bytes).ok()?; +// +// let cleaned = s +// .trim_matches(|c: char| c.is_whitespace() || c == '\0') +// .to_string(); +// Some(cleaned) +// } +// fn build_grafana_datasource( +// &self, +// name: &str, +// ns: &str, +// label_selector: &LabelSelector, +// url: &str, +// token: &str, +// ) -> GrafanaDatasource { +// let mut json_data = BTreeMap::new(); +// json_data.insert("timeInterval".to_string(), "5s".to_string()); +// +// GrafanaDatasource { +// metadata: ObjectMeta { +// name: Some(name.to_string()), +// namespace: Some(ns.to_string()), +// ..Default::default() +// }, +// spec: GrafanaDatasourceSpec { +// instance_selector: label_selector.clone(), +// allow_cross_namespace_import: Some(true), +// values_from: None, +// datasource: GrafanaDatasourceConfig { +// access: "proxy".to_string(), +// name: name.to_string(), +// rype: "prometheus".to_string(), +// url: url.to_string(), +// database: None, +// json_data: Some(GrafanaDatasourceJsonData { +// time_interval: Some("60s".to_string()), +// http_header_name1: Some("Authorization".to_string()), +// tls_skip_verify: Some(true), +// oauth_pass_thru: Some(true), +// }), +// secure_json_data: Some(GrafanaDatasourceSecureJsonData { +// http_header_value1: Some(format!("Bearer {token}")), +// }), +// is_default: Some(false), +// editable: Some(true), +// }, +// }, +// } +// } diff --git a/harmony/src/modules/monitoring/grafana/k8s/score_grafana_rule.rs b/harmony/src/modules/monitoring/grafana/k8s/score_grafana_rule.rs new file mode 100644 index 00000000..1a3c19d1 --- /dev/null +++ b/harmony/src/modules/monitoring/grafana/k8s/score_grafana_rule.rs @@ -0,0 +1,67 @@ +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::monitoring::grafana::grafana::Grafana, + score::Score, + topology::{K8sclient, Topology, monitoring::AlertRule}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct GrafanaK8sRuleScore { + pub sender: Grafana, + pub rule: Box>, +} + +impl Score for GrafanaK8sRuleScore { + fn name(&self) -> String { + "GrafanaK8sRuleScore".to_string() + } + + fn create_interpret(&self) -> Box> { + todo!() + } +} + +// kind: Secret +// apiVersion: v1 +// metadata: +// name: credentials +// namespace: grafana +// stringData: +// PROMETHEUS_USERNAME: root +// PROMETHEUS_PASSWORD: secret +// type: Opaque +// --- +// apiVersion: grafana.integreatly.org/v1beta1 +// kind: GrafanaDatasource +// metadata: +// name: grafanadatasource-sample +// spec: +// valuesFrom: +// - targetPath: "basicAuthUser" +// valueFrom: +// secretKeyRef: +// name: "credentials" +// key: "PROMETHEUS_USERNAME" +// - targetPath: "secureJsonData.basicAuthPassword" +// valueFrom: +// secretKeyRef: +// name: "credentials" +// key: "PROMETHEUS_PASSWORD" +// instanceSelector: +// matchLabels: +// dashboards: "grafana" +// datasource: +// name: prometheus +// type: prometheus +// access: proxy +// basicAuth: true +// url: http://prometheus-service:9090 +// isDefault: true +// basicAuthUser: ${PROMETHEUS_USERNAME} +// jsonData: +// "tlsSkipVerify": true +// "timeInterval": "5s" +// secureJsonData: +// "basicAuthPassword": ${PROMETHEUS_PASSWORD} # diff --git a/harmony/src/modules/monitoring/grafana/k8s/score_install_grafana.rs b/harmony/src/modules/monitoring/grafana/k8s/score_install_grafana.rs new file mode 100644 index 00000000..60c109e5 --- /dev/null +++ b/harmony/src/modules/monitoring/grafana/k8s/score_install_grafana.rs @@ -0,0 +1,189 @@ +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::monitoring::grafana::grafana::Grafana, + score::Score, + topology::{K8sclient, Topology}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct GrafanaK8sInstallScore { + pub sender: Grafana, +} + +impl Score for GrafanaK8sInstallScore { + fn name(&self) -> String { + "GrafanaK8sEnsureReadyScore".to_string() + } + + fn create_interpret(&self) -> Box> { + todo!() + } +} +// let score = grafana_operator_helm_chart_score(sender.namespace.clone()); +// +// score +// .create_interpret() +// .execute(inventory, self) +// .await +// .map_err(|e| PreparationError::new(e.to_string()))?; +// + +// +// fn build_grafana_dashboard( +// &self, +// ns: &str, +// label_selector: &LabelSelector, +// ) -> GrafanaDashboard { +// let graf_dashboard = GrafanaDashboard { +// metadata: ObjectMeta { +// name: Some(format!("grafana-dashboard-{}", ns)), +// namespace: Some(ns.to_string()), +// ..Default::default() +// }, +// spec: GrafanaDashboardSpec { +// resync_period: Some("30s".to_string()), +// instance_selector: label_selector.clone(), +// datasources: Some(vec![GrafanaDashboardDatasource { +// input_name: "DS_PROMETHEUS".to_string(), +// datasource_name: "thanos-openshift-monitoring".to_string(), +// }]), +// json: None, +// grafana_com: Some(GrafanaCom { +// id: 17406, +// revision: None, +// }), +// }, +// }; +// graf_dashboard +// } +// +// fn build_grafana(&self, ns: &str, labels: &BTreeMap) -> GrafanaCRD { +// let grafana = GrafanaCRD { +// metadata: ObjectMeta { +// name: Some(format!("grafana-{}", ns)), +// namespace: Some(ns.to_string()), +// labels: Some(labels.clone()), +// ..Default::default() +// }, +// spec: GrafanaSpec { +// config: None, +// admin_user: None, +// admin_password: None, +// ingress: None, +// persistence: None, +// resources: None, +// }, +// }; +// grafana +// } +// +// async fn build_grafana_ingress(&self, ns: &str) -> K8sIngressScore { +// let domain = self.get_domain(&format!("grafana-{}", ns)).await.unwrap(); +// let name = format!("{}-grafana", ns); +// let backend_service = format!("grafana-{}-service", ns); +// +// K8sIngressScore { +// name: fqdn::fqdn!(&name), +// host: fqdn::fqdn!(&domain), +// backend_service: fqdn::fqdn!(&backend_service), +// port: 3000, +// path: Some("/".to_string()), +// path_type: Some(PathType::Prefix), +// namespace: Some(fqdn::fqdn!(&ns)), +// ingress_class_name: Some("openshift-default".to_string()), +// } +// } +// #[async_trait] +// impl Grafana for K8sAnywhereTopology { +// async fn install_grafana(&self) -> Result { +// let ns = "grafana"; +// +// let mut label = BTreeMap::new(); +// +// label.insert("dashboards".to_string(), "grafana".to_string()); +// +// let label_selector = LabelSelector { +// match_labels: label.clone(), +// match_expressions: vec![], +// }; +// +// let client = self.k8s_client().await?; +// +// let grafana = self.build_grafana(ns, &label); +// +// client.apply(&grafana, Some(ns)).await?; +// //TODO change this to a ensure ready or something better than just a timeout +// client +// .wait_until_deployment_ready( +// "grafana-grafana-deployment", +// Some("grafana"), +// Some(Duration::from_secs(30)), +// ) +// .await?; +// +// let sa_name = "grafana-grafana-sa"; +// let token_secret_name = "grafana-sa-token-secret"; +// +// let sa_token_secret = self.build_sa_token_secret(token_secret_name, sa_name, ns); +// +// client.apply(&sa_token_secret, Some(ns)).await?; +// let secret_gvk = GroupVersionKind { +// group: "".to_string(), +// version: "v1".to_string(), +// kind: "Secret".to_string(), +// }; +// +// let secret = client +// .get_resource_json_value(token_secret_name, Some(ns), &secret_gvk) +// .await?; +// +// let token = format!( +// "Bearer {}", +// self.extract_and_normalize_token(&secret).unwrap() +// ); +// +// debug!("creating grafana clusterrole binding"); +// +// let clusterrolebinding = +// self.build_cluster_rolebinding(sa_name, "cluster-monitoring-view", ns); +// +// client.apply(&clusterrolebinding, Some(ns)).await?; +// +// debug!("creating grafana datasource crd"); +// +// let thanos_url = format!( +// "https://{}", +// self.get_domain("thanos-querier-openshift-monitoring") +// .await +// .unwrap() +// ); +// +// let thanos_openshift_datasource = self.build_grafana_datasource( +// "thanos-openshift-monitoring", +// ns, +// &label_selector, +// &thanos_url, +// &token, +// ); +// +// client.apply(&thanos_openshift_datasource, Some(ns)).await?; +// +// debug!("creating grafana dashboard crd"); +// let dashboard = self.build_grafana_dashboard(ns, &label_selector); +// +// client.apply(&dashboard, Some(ns)).await?; +// debug!("creating grafana ingress"); +// let grafana_ingress = self.build_grafana_ingress(ns).await; +// +// grafana_ingress +// .interpret(&Inventory::empty(), self) +// .await +// .map_err(|e| PreparationError::new(e.to_string()))?; +// +// Ok(PreparationOutcome::Success { +// details: "Installed grafana composants".to_string(), +// }) +// } +// } diff --git a/harmony/src/modules/monitoring/grafana/mod.rs b/harmony/src/modules/monitoring/grafana/mod.rs index 8dccab1d..3176b923 100644 --- a/harmony/src/modules/monitoring/grafana/mod.rs +++ b/harmony/src/modules/monitoring/grafana/mod.rs @@ -1,2 +1,3 @@ pub mod grafana; -pub mod helm; +pub mod grafana_alerting_score; +pub mod k8s; diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_alertmanager_config.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_alertmanager_config.rs index 88ec7454..f544b603 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_alertmanager_config.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_alertmanager_config.rs @@ -1,91 +1,17 @@ -use std::sync::Arc; - -use async_trait::async_trait; use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use crate::{ - interpret::{InterpretError, Outcome}, - inventory::Inventory, - modules::{ - monitoring::{ - grafana::grafana::Grafana, kube_prometheus::crd::service_monitor::ServiceMonitor, - }, - prometheus::prometheus::PrometheusMonitoring, - }, - topology::{ - K8sclient, Topology, - installable::Installable, - k8s::K8sClient, - oberservability::monitoring::{AlertReceiver, AlertSender, ScrapeTarget}, - }, -}; - -#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[derive(CustomResource, Serialize, Deserialize, Default, Debug, Clone, JsonSchema)] #[kube( group = "monitoring.coreos.com", - version = "v1alpha1", + version = "v1", kind = "AlertmanagerConfig", plural = "alertmanagerconfigs", - namespaced + namespaced, + derive = "Default" )] pub struct AlertmanagerConfigSpec { #[serde(flatten)] pub data: serde_json::Value, } - -#[derive(Debug, Clone, Serialize)] -pub struct CRDPrometheus { - pub namespace: String, - pub client: Arc, - pub service_monitor: Vec, -} - -impl AlertSender for CRDPrometheus { - fn name(&self) -> String { - "CRDAlertManager".to_string() - } -} - -impl Clone for Box> { - fn clone(&self) -> Self { - self.clone_box() - } -} - -impl Clone for Box> { - fn clone(&self) -> Self { - self.clone_box() - } -} - -impl Serialize for Box> { - fn serialize(&self, _serializer: S) -> Result - where - S: serde::Serializer, - { - todo!() - } -} - -#[async_trait] -impl + Grafana> Installable - for CRDPrometheus -{ - async fn configure(&self, inventory: &Inventory, topology: &T) -> Result<(), InterpretError> { - topology.ensure_grafana_operator(inventory).await?; - topology.ensure_prometheus_operator(self, inventory).await?; - Ok(()) - } - - async fn ensure_installed( - &self, - inventory: &Inventory, - topology: &T, - ) -> Result<(), InterpretError> { - topology.install_grafana().await?; - topology.install_prometheus(&self, inventory, None).await?; - Ok(()) - } -} diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_default_rules.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_default_rules.rs index 014650cb..060dcc3a 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_default_rules.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_default_rules.rs @@ -1,4 +1,4 @@ -use crate::modules::prometheus::alerts::k8s::{ +use crate::modules::monitoring::alert_rule::alerts::k8s::{ deployment::alert_deployment_unavailable, pod::{alert_container_restarting, alert_pod_not_ready, pod_failed}, pvc::high_pvc_fill_rate_over_two_days, diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheus_rules.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheus_rules.rs index 6d08456b..f3202564 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheus_rules.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheus_rules.rs @@ -6,13 +6,14 @@ use serde::{Deserialize, Serialize}; use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; -#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema)] +#[derive(CustomResource, Default, Debug, Serialize, Deserialize, Clone, JsonSchema)] #[kube( group = "monitoring.coreos.com", version = "v1", kind = "PrometheusRule", plural = "prometheusrules", - namespaced + namespaced, + derive = "Default" )] #[serde(rename_all = "camelCase")] pub struct PrometheusRuleSpec { diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_scrape_config.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_scrape_config.rs index 24a28330..0973e118 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_scrape_config.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_scrape_config.rs @@ -1,23 +1,18 @@ -use std::net::IpAddr; +use std::collections::BTreeMap; -use async_trait::async_trait; use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use crate::{ - modules::monitoring::kube_prometheus::crd::{ - crd_alertmanager_config::CRDPrometheus, crd_prometheuses::LabelSelector, - }, - topology::oberservability::monitoring::ScrapeTarget, -}; +use crate::modules::monitoring::kube_prometheus::crd::crd_prometheuses::LabelSelector; -#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[derive(CustomResource, Default, Serialize, Deserialize, Debug, Clone, JsonSchema)] #[kube( group = "monitoring.coreos.com", version = "v1alpha1", kind = "ScrapeConfig", plural = "scrapeconfigs", + derive = "Default", namespaced )] #[serde(rename_all = "camelCase")] @@ -70,8 +65,8 @@ pub struct ScrapeConfigSpec { #[serde(rename_all = "camelCase")] pub struct StaticConfig { pub targets: Vec, - - pub labels: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub labels: Option>, } /// Relabeling configuration for target or metric relabeling. diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs index c8cb8546..ff6b2ee8 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs @@ -1,22 +1,9 @@ pub mod crd_alertmanager_config; pub mod crd_alertmanagers; pub mod crd_default_rules; -pub mod crd_grafana; pub mod crd_prometheus_rules; pub mod crd_prometheuses; pub mod crd_scrape_config; -pub mod grafana_default_dashboard; -pub mod grafana_operator; pub mod prometheus_operator; -pub mod rhob_alertmanager_config; -pub mod rhob_alertmanagers; -pub mod rhob_cluster_observability_operator; -pub mod rhob_default_rules; -pub mod rhob_grafana; -pub mod rhob_monitoring_stack; -pub mod rhob_prometheus_rules; -pub mod rhob_prometheuses; -pub mod rhob_role; -pub mod rhob_service_monitor; pub mod role; pub mod service_monitor; diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_alertmanager_config.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_alertmanager_config.rs deleted file mode 100644 index a53b24e3..00000000 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_alertmanager_config.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::sync::Arc; - -use kube::CustomResource; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -use crate::topology::{ - k8s::K8sClient, - oberservability::monitoring::{AlertReceiver, AlertSender}, -}; - -#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] -#[kube( - group = "monitoring.rhobs", - version = "v1alpha1", - kind = "AlertmanagerConfig", - plural = "alertmanagerconfigs", - namespaced -)] -pub struct AlertmanagerConfigSpec { - #[serde(flatten)] - pub data: serde_json::Value, -} - -#[derive(Debug, Clone, Serialize)] -pub struct RHOBObservability { - pub namespace: String, - pub client: Arc, -} - -impl AlertSender for RHOBObservability { - fn name(&self) -> String { - "RHOBAlertManager".to_string() - } -} - -impl Clone for Box> { - fn clone(&self) -> Self { - self.clone_box() - } -} - -impl Serialize for Box> { - fn serialize(&self, _serializer: S) -> Result - where - S: serde::Serializer, - { - todo!() - } -} diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_cluster_observability_operator.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_cluster_observability_operator.rs deleted file mode 100644 index bc7ad9fe..00000000 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_cluster_observability_operator.rs +++ /dev/null @@ -1,22 +0,0 @@ -use std::str::FromStr; - -use non_blank_string_rs::NonBlankString; - -use crate::modules::helm::chart::HelmChartScore; -//TODO package chart or something for COO okd -pub fn rhob_cluster_observability_operator() -> HelmChartScore { - HelmChartScore { - namespace: None, - release_name: NonBlankString::from_str("").unwrap(), - chart_name: NonBlankString::from_str( - "oci://hub.nationtech.io/harmony/nt-prometheus-operator", - ) - .unwrap(), - chart_version: None, - values_overrides: None, - values_yaml: None, - create_namespace: true, - install_only: true, - repository: None, - } -} diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs index 7d6aec89..3f38f0b1 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs @@ -1,20 +1,13 @@ use super::config::KubePrometheusConfig; -use log::debug; use non_blank_string_rs::NonBlankString; -use serde_yaml::{Mapping, Value}; use std::{ - collections::BTreeMap, str::FromStr, sync::{Arc, Mutex}, }; use crate::modules::{ helm::chart::HelmChartScore, - monitoring::kube_prometheus::types::{ - AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig, - AlertManagerConfigSelector, AlertManagerRoute, AlertManagerSpec, AlertManagerValues, - ConfigReloader, Limits, PrometheusConfig, Requests, Resources, - }, + monitoring::kube_prometheus::types::{Limits, Requests, Resources}, }; pub fn kube_prometheus_helm_chart_score( @@ -66,7 +59,7 @@ pub fn kube_prometheus_helm_chart_score( } let _resource_section = resource_block(&resource_limit, 2); - let mut values = format!( + let values = format!( r#" global: rbac: @@ -281,131 +274,6 @@ prometheusOperator: "#, ); - let prometheus_config = - crate::modules::monitoring::kube_prometheus::types::PrometheusConfigValues { - prometheus: PrometheusConfig { - prometheus: bool::from_str(prometheus.as_str()).expect("couldn't parse bool"), - additional_service_monitors: config.additional_service_monitors.clone(), - }, - }; - let prometheus_config_yaml = - serde_yaml::to_string(&prometheus_config).expect("Failed to serialize YAML"); - - debug!( - "serialized prometheus config: \n {:#}", - prometheus_config_yaml - ); - values.push_str(&prometheus_config_yaml); - - // add required null receiver for prometheus alert manager - let mut null_receiver = Mapping::new(); - null_receiver.insert( - Value::String("receiver".to_string()), - Value::String("null".to_string()), - ); - null_receiver.insert( - Value::String("matchers".to_string()), - Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]), - ); - null_receiver.insert(Value::String("continue".to_string()), Value::Bool(true)); - - //add alert channels - let mut alert_manager_channel_config = AlertManagerConfig { - global: Mapping::new(), - route: AlertManagerRoute { - routes: vec![Value::Mapping(null_receiver)], - }, - receivers: vec![serde_yaml::from_str("name: 'null'").unwrap()], - }; - for receiver in config.alert_receiver_configs.iter() { - if let Some(global) = receiver.channel_global_config.clone() { - alert_manager_channel_config - .global - .insert(global.0, global.1); - } - alert_manager_channel_config - .route - .routes - .push(receiver.channel_route.clone()); - alert_manager_channel_config - .receivers - .push(receiver.channel_receiver.clone()); - } - - let mut labels = BTreeMap::new(); - labels.insert("alertmanagerConfig".to_string(), "enabled".to_string()); - let alert_manager_config_selector = AlertManagerConfigSelector { - match_labels: labels, - }; - let alert_manager_values = AlertManagerValues { - alertmanager: AlertManager { - enabled: config.alert_manager, - config: alert_manager_channel_config, - alertmanager_spec: AlertManagerSpec { - resources: Resources { - limits: Limits { - memory: "100Mi".to_string(), - cpu: "100m".to_string(), - }, - requests: Requests { - memory: "100Mi".to_string(), - cpu: "100m".to_string(), - }, - }, - alert_manager_config_selector, - replicas: 2, - }, - init_config_reloader: ConfigReloader { - resources: Resources { - limits: Limits { - memory: "100Mi".to_string(), - cpu: "100m".to_string(), - }, - requests: Requests { - memory: "100Mi".to_string(), - cpu: "100m".to_string(), - }, - }, - }, - }, - }; - - let alert_manager_yaml = - serde_yaml::to_string(&alert_manager_values).expect("Failed to serialize YAML"); - debug!("serialized alert manager: \n {:#}", alert_manager_yaml); - values.push_str(&alert_manager_yaml); - - //format alert manager additional rules for helm chart - let mut merged_rules: BTreeMap = BTreeMap::new(); - - for additional_rule in config.alert_rules.clone() { - for (key, group) in additional_rule.rules { - merged_rules.insert(key, group); - } - } - - let merged_rules = AlertManagerAdditionalPromRules { - rules: merged_rules, - }; - - let mut alert_manager_additional_rules = serde_yaml::Mapping::new(); - let rules_value = serde_yaml::to_value(merged_rules).unwrap(); - - alert_manager_additional_rules.insert( - serde_yaml::Value::String("additionalPrometheusRulesMap".to_string()), - rules_value, - ); - - let alert_manager_additional_rules_yaml = - serde_yaml::to_string(&alert_manager_additional_rules).expect("Failed to serialize YAML"); - debug!( - "alert_rules_yaml:\n{:#}", - alert_manager_additional_rules_yaml - ); - - values.push_str(&alert_manager_additional_rules_yaml); - debug!("full values.yaml: \n {:#}", values); - HelmChartScore { namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()), release_name: NonBlankString::from_str("kube-prometheus").unwrap(), diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs b/harmony/src/modules/monitoring/kube_prometheus/kube_prometheus_alerting_score.rs similarity index 50% rename from harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs rename to harmony/src/modules/monitoring/kube_prometheus/kube_prometheus_alerting_score.rs index 468d3088..97f12d03 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/kube_prometheus_alerting_score.rs @@ -2,36 +2,41 @@ use std::sync::{Arc, Mutex}; use serde::Serialize; -use super::{helm::config::KubePrometheusConfig, prometheus::KubePrometheus}; +use super::helm::config::KubePrometheusConfig; use crate::{ - modules::monitoring::kube_prometheus::types::ServiceMonitor, + modules::monitoring::kube_prometheus::{KubePrometheus, types::ServiceMonitor}, score::Score, topology::{ - HelmCommand, Topology, - oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret}, - tenant::TenantManager, + Topology, + monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget}, }, }; +//TODO untested #[derive(Clone, Debug, Serialize)] -pub struct HelmPrometheusAlertingScore { +pub struct KubePrometheusAlertingScore { pub receivers: Vec>>, pub rules: Vec>>, + pub scrape_targets: Option>>>, pub service_monitors: Vec, + pub config: Arc>, } -impl Score for HelmPrometheusAlertingScore { +impl> Score for KubePrometheusAlertingScore { fn create_interpret(&self) -> Box> { - let config = Arc::new(Mutex::new(KubePrometheusConfig::new())); - config + //TODO test that additional service monitor is added + self.config .try_lock() .expect("couldn't lock config") .additional_service_monitors = self.service_monitors.clone(); + Box::new(AlertingInterpret { - sender: KubePrometheus { config }, + sender: KubePrometheus { + config: self.config.clone(), + }, receivers: self.receivers.clone(), rules: self.rules.clone(), - scrape_targets: None, + scrape_targets: self.scrape_targets.clone(), }) } fn name(&self) -> String { diff --git a/harmony/src/modules/monitoring/kube_prometheus/mod.rs b/harmony/src/modules/monitoring/kube_prometheus/mod.rs index 122e9396..7fed749f 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/mod.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/mod.rs @@ -1,5 +1,71 @@ +use std::sync::{Arc, Mutex}; + +use async_trait::async_trait; +use serde::Serialize; + +use crate::{ + modules::monitoring::kube_prometheus::helm::config::KubePrometheusConfig, + topology::monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget}, +}; + pub mod crd; pub mod helm; -pub mod helm_prometheus_alert_score; -pub mod prometheus; +pub mod kube_prometheus_alerting_score; +pub mod score_kube_prometheus_alert_receivers; +pub mod score_kube_prometheus_ensure_ready; +pub mod score_kube_prometheus_rule; +pub mod score_kube_prometheus_scrape_target; pub mod types; + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +#[async_trait] +impl AlertSender for KubePrometheus { + fn name(&self) -> String { + "HelmKubePrometheus".to_string() + } +} + +#[derive(Clone, Debug, Serialize)] +pub struct KubePrometheus { + pub config: Arc>, +} + +impl Default for KubePrometheus { + fn default() -> Self { + Self::new() + } +} + +impl KubePrometheus { + pub fn new() -> Self { + Self { + config: Arc::new(Mutex::new(KubePrometheusConfig::new())), + } + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs deleted file mode 100644 index 98970e6c..00000000 --- a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs +++ /dev/null @@ -1,167 +0,0 @@ -use std::sync::{Arc, Mutex}; - -use async_trait::async_trait; -use log::{debug, error}; -use serde::Serialize; - -use crate::{ - interpret::{InterpretError, Outcome}, - inventory::Inventory, - modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, - score, - topology::{ - HelmCommand, Topology, - installable::Installable, - oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender}, - tenant::TenantManager, - }, -}; - -use score::Score; - -use super::{ - helm::{ - config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score, - }, - types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig}, -}; - -#[async_trait] -impl AlertSender for KubePrometheus { - fn name(&self) -> String { - "HelmKubePrometheus".to_string() - } -} - -#[async_trait] -impl Installable for KubePrometheus { - async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> { - self.configure_with_topology(topology).await; - Ok(()) - } - - async fn ensure_installed( - &self, - inventory: &Inventory, - topology: &T, - ) -> Result<(), InterpretError> { - self.install_prometheus(inventory, topology).await?; - Ok(()) - } -} - -#[derive(Debug)] -pub struct KubePrometheus { - pub config: Arc>, -} - -impl Default for KubePrometheus { - fn default() -> Self { - Self::new() - } -} - -impl KubePrometheus { - pub fn new() -> Self { - Self { - config: Arc::new(Mutex::new(KubePrometheusConfig::new())), - } - } - - pub async fn configure_with_topology(&self, topology: &T) { - let ns = topology - .get_tenant_config() - .await - .map(|cfg| cfg.name.clone()) - .unwrap_or_else(|| "monitoring".to_string()); - error!("This must be refactored, see comments in pr #74"); - debug!("NS: {}", ns); - self.config.lock().unwrap().namespace = Some(ns); - } - - pub async fn install_receiver( - &self, - prometheus_receiver: &dyn KubePrometheusReceiver, - ) -> Result { - let prom_receiver = prometheus_receiver.configure_receiver().await; - debug!( - "adding alert receiver to prometheus config: {:#?}", - &prom_receiver - ); - let mut config = self.config.lock().unwrap(); - - config.alert_receiver_configs.push(prom_receiver); - let prom_receiver_name = prometheus_receiver.name(); - debug!("installed alert receiver {}", &prom_receiver_name); - Ok(Outcome::success(format!( - "Sucessfully installed receiver {}", - prom_receiver_name - ))) - } - - pub async fn install_rule( - &self, - prometheus_rule: &AlertManagerRuleGroup, - ) -> Result { - let prometheus_rule = prometheus_rule.configure_rule().await; - let mut config = self.config.lock().unwrap(); - - config.alert_rules.push(prometheus_rule.clone()); - Ok(Outcome::success(format!( - "Successfully installed alert rule: {:#?},", - prometheus_rule - ))) - } - - pub async fn install_prometheus( - &self, - inventory: &Inventory, - topology: &T, - ) -> Result { - kube_prometheus_helm_chart_score(self.config.clone()) - .interpret(inventory, topology) - .await - } -} - -#[async_trait] -pub trait KubePrometheusReceiver: Send + Sync + std::fmt::Debug { - fn name(&self) -> String; - async fn configure_receiver(&self) -> AlertManagerChannelConfig; -} - -impl Serialize for Box> { - fn serialize(&self, _serializer: S) -> Result - where - S: serde::Serializer, - { - todo!() - } -} - -impl Clone for Box> { - fn clone(&self) -> Self { - self.clone_box() - } -} - -#[async_trait] -pub trait KubePrometheusRule: Send + Sync + std::fmt::Debug { - fn name(&self) -> String; - async fn configure_rule(&self) -> AlertManagerAdditionalPromRules; -} - -impl Serialize for Box> { - fn serialize(&self, _serializer: S) -> Result - where - S: serde::Serializer, - { - todo!() - } -} - -impl Clone for Box> { - fn clone(&self) -> Self { - self.clone_box() - } -} diff --git a/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_alert_receivers.rs b/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_alert_receivers.rs new file mode 100644 index 00000000..09ebecc5 --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_alert_receivers.rs @@ -0,0 +1,56 @@ +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::kube_prometheus::{ + KubePrometheus, + crd::crd_alertmanager_config::{AlertmanagerConfig, AlertmanagerConfigSpec}, + }, + }, + score::Score, + topology::{K8sclient, Topology, monitoring::AlertReceiver}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct KubePrometheusReceiverScore { + pub sender: KubePrometheus, + pub receiver: Box>, +} + +impl Score for KubePrometheusReceiverScore { + fn name(&self) -> String { + "KubePrometheusReceiverScore".to_string() + } + + fn create_interpret(&self) -> Box> { + let name = self.receiver.name(); + let namespace = self.sender.config.lock().unwrap().namespace.clone(); + let route = self.receiver.build_route().expect(&format!( + "failed to build route for receveiver {}", + name.clone() + )); + let receiver = self.receiver.build_receiver().expect(&format!( + "failed to build receiver path for receiver {}", + name.clone() + )); + + let data = serde_json::json!({ + "route": route, + "receivers": [receiver] + }); + + let alertmanager_config = AlertmanagerConfig { + metadata: ObjectMeta { + name: Some(name), + namespace: namespace.clone(), + ..Default::default() + }, + spec: AlertmanagerConfigSpec { data: data }, + }; + + K8sResourceScore::single(alertmanager_config, namespace).create_interpret() + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_ensure_ready.rs b/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_ensure_ready.rs new file mode 100644 index 00000000..1c3a6665 --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_ensure_ready.rs @@ -0,0 +1,80 @@ +use async_trait::async_trait; +use harmony_types::id::Id; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::monitoring::kube_prometheus::KubePrometheus, + score::Score, + topology::{K8sclient, Topology}, +}; + +#[derive(Clone, Debug, Serialize)] +pub struct KubePrometheusEnsureReadyScore { + pub sender: KubePrometheus, +} + +impl Score for KubePrometheusEnsureReadyScore { + fn name(&self) -> String { + "KubePrometheusEnsureReadyScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(KubePrometheusEnsureReadyInterpret { + sender: self.sender.clone(), + }) + } +} + +#[derive(Clone, Debug, Serialize)] +pub struct KubePrometheusEnsureReadyInterpret { + pub sender: KubePrometheus, +} + +#[async_trait] +impl Interpret for KubePrometheusEnsureReadyInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &T, + ) -> Result { + let client = topology.k8s_client().await?; + let namespace = self + .sender + .config + .lock() + .unwrap() + .namespace + .clone() + .unwrap_or("default".to_string()); + + let prometheus_name = "kube-prometheues-kube-prometheus-operator"; + + client + .wait_until_deployment_ready(prometheus_name, Some(&namespace), None) + .await?; + + Ok(Outcome::success(format!( + "deployment: {} ready in ns: {}", + prometheus_name, namespace + ))) + } + + fn get_name(&self) -> InterpretName { + todo!() + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_rule.rs b/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_rule.rs new file mode 100644 index 00000000..86a5b42a --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_rule.rs @@ -0,0 +1,46 @@ +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::kube_prometheus::{ + KubePrometheus, + crd::crd_prometheus_rules::{PrometheusRule, PrometheusRuleSpec, RuleGroup}, + }, + }, + score::Score, + topology::{K8sclient, Topology, monitoring::AlertRule}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct KubePrometheusRuleScore { + pub sender: KubePrometheus, + pub rule: Box>, +} + +impl Score for KubePrometheusRuleScore { + fn name(&self) -> String { + "KubePrometheusRuleScore".to_string() + } + + fn create_interpret(&self) -> Box> { + let name = self.rule.name(); + let namespace = self.sender.config.lock().unwrap().namespace.clone(); + let groups: Vec = + serde_json::from_value(self.rule.build_rule().expect("failed to build alert rule")) + .expect("failed to serialize rule group"); + + let prometheus_rule = PrometheusRule { + metadata: ObjectMeta { + name: Some(name.clone()), + namespace: namespace.clone(), + ..Default::default() + }, + + spec: PrometheusRuleSpec { groups }, + }; + K8sResourceScore::single(prometheus_rule, namespace).create_interpret() + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_scrape_target.rs b/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_scrape_target.rs new file mode 100644 index 00000000..9e34fb0f --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/score_kube_prometheus_scrape_target.rs @@ -0,0 +1,61 @@ +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::kube_prometheus::{ + KubePrometheus, + crd::crd_scrape_config::{ScrapeConfig, ScrapeConfigSpec, StaticConfig}, + }, + }, + score::Score, + topology::{K8sclient, Topology, monitoring::ScrapeTarget}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct KubePrometheusScrapeTargetScore { + pub sender: KubePrometheus, + pub scrape_target: Box>, +} + +impl Score for KubePrometheusScrapeTargetScore { + fn name(&self) -> String { + "KubePrometheusScrapeTargetScore".to_string() + } + + fn create_interpret(&self) -> Box> { + let name = self.scrape_target.name(); + let namespace = self.sender.config.lock().unwrap().namespace.clone(); + + let external_target = self + .scrape_target + .build_scrape_target() + .expect("failed to build external scrape target"); + + //TODO this may need to modified to include a scrapeConfigSelector label from the + //prometheus operator + let labels = external_target.labels; + + let scrape_target = ScrapeConfig { + metadata: ObjectMeta { + name: Some(name.clone()), + namespace: namespace.clone(), + ..Default::default() + }, + spec: ScrapeConfigSpec { + static_configs: Some(vec![StaticConfig { + targets: vec![format!("{}:{}", external_target.ip, external_target.port)], + labels, + }]), + metrics_path: external_target.path, + scrape_interval: external_target.interval, + job_name: Some(name), + ..Default::default() + }, + }; + + K8sResourceScore::single(scrape_target, namespace).create_interpret() + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/types.rs b/harmony/src/modules/monitoring/kube_prometheus/types.rs index abe5896a..e697b50c 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/types.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/types.rs @@ -12,36 +12,6 @@ pub trait AlertChannelConfig { async fn get_config(&self) -> AlertManagerChannelConfig; } -#[derive(Debug, Clone, Serialize)] -pub struct AlertManagerValues { - pub alertmanager: AlertManager, -} -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct AlertManager { - pub enabled: bool, - pub config: AlertManagerConfig, - pub alertmanager_spec: AlertManagerSpec, - pub init_config_reloader: ConfigReloader, -} - -#[derive(Debug, Clone, Serialize)] -pub struct ConfigReloader { - pub resources: Resources, -} - -#[derive(Debug, Clone, Serialize)] -pub struct AlertManagerConfig { - pub global: Mapping, - pub route: AlertManagerRoute, - pub receivers: Sequence, -} - -#[derive(Debug, Clone, Serialize)] -pub struct AlertManagerRoute { - pub routes: Sequence, -} - #[derive(Debug, Clone, Serialize)] pub struct AlertManagerChannelConfig { ///expecting an option that contains two values @@ -52,20 +22,6 @@ pub struct AlertManagerChannelConfig { pub channel_receiver: Value, } -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct AlertManagerSpec { - pub(crate) resources: Resources, - pub replicas: u32, - pub alert_manager_config_selector: AlertManagerConfigSelector, -} - -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct AlertManagerConfigSelector { - pub match_labels: BTreeMap, -} - #[derive(Debug, Clone, Serialize)] pub struct Resources { pub limits: Limits, diff --git a/harmony/src/modules/monitoring/mod.rs b/harmony/src/modules/monitoring/mod.rs index 7f07d5af..a9ace9fc 100644 --- a/harmony/src/modules/monitoring/mod.rs +++ b/harmony/src/modules/monitoring/mod.rs @@ -6,4 +6,5 @@ pub mod kube_prometheus; pub mod ntfy; pub mod okd; pub mod prometheus; +pub mod red_hat_cluster_observability; pub mod scrape_target; diff --git a/harmony/src/modules/monitoring/okd/cluster_monitoring.rs b/harmony/src/modules/monitoring/okd/cluster_monitoring.rs deleted file mode 100644 index 56fb59e0..00000000 --- a/harmony/src/modules/monitoring/okd/cluster_monitoring.rs +++ /dev/null @@ -1,270 +0,0 @@ -use base64::prelude::*; - -use async_trait::async_trait; -use harmony_types::id::Id; -use kube::api::DynamicObject; -use log::{debug, info, trace}; -use serde::Serialize; - -use crate::{ - data::Version, - interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, - inventory::Inventory, - modules::monitoring::okd::OpenshiftClusterAlertSender, - score::Score, - topology::{K8sclient, Topology, oberservability::monitoring::AlertReceiver}, -}; - -impl Clone for Box> { - fn clone(&self) -> Self { - self.clone_box() - } -} - -impl Serialize for Box> { - fn serialize(&self, _serializer: S) -> Result - where - S: serde::Serializer, - { - todo!() - } -} - -#[derive(Debug, Clone, Serialize)] -pub struct OpenshiftClusterAlertScore { - pub receivers: Vec>>, -} - -impl Score for OpenshiftClusterAlertScore { - fn name(&self) -> String { - "ClusterAlertScore".to_string() - } - - #[doc(hidden)] - fn create_interpret(&self) -> Box> { - Box::new(OpenshiftClusterAlertInterpret { - receivers: self.receivers.clone(), - }) - } -} - -#[derive(Debug)] -pub struct OpenshiftClusterAlertInterpret { - receivers: Vec>>, -} - -#[async_trait] -impl Interpret for OpenshiftClusterAlertInterpret { - async fn execute( - &self, - _inventory: &Inventory, - topology: &T, - ) -> Result { - let client = topology.k8s_client().await?; - let openshift_monitoring_namespace = "openshift-monitoring"; - - let mut alertmanager_main_secret: DynamicObject = client - .get_secret_json_value("alertmanager-main", Some(openshift_monitoring_namespace)) - .await?; - trace!("Got secret {alertmanager_main_secret:#?}"); - - let data: &mut serde_json::Value = &mut alertmanager_main_secret.data; - trace!("Alertmanager-main secret data {data:#?}"); - let data_obj = data - .get_mut("data") - .ok_or(InterpretError::new( - "Missing 'data' field in alertmanager-main secret.".to_string(), - ))? - .as_object_mut() - .ok_or(InterpretError::new( - "'data' field in alertmanager-main secret is expected to be an object ." - .to_string(), - ))?; - - let config_b64 = data_obj - .get("alertmanager.yaml") - .ok_or(InterpretError::new( - "Missing 'alertmanager.yaml' in alertmanager-main secret data".to_string(), - ))? - .as_str() - .unwrap_or(""); - trace!("Config base64 {config_b64}"); - - let config_bytes = BASE64_STANDARD.decode(config_b64).unwrap_or_default(); - - let mut am_config: serde_yaml::Value = - serde_yaml::from_str(&String::from_utf8(config_bytes).unwrap_or_default()) - .unwrap_or_default(); - - debug!("Current alertmanager config {am_config:#?}"); - - let existing_receivers_sequence = if let Some(receivers) = am_config.get_mut("receivers") { - match receivers.as_sequence_mut() { - Some(seq) => seq, - None => { - return Err(InterpretError::new(format!( - "Expected alertmanager config receivers to be a sequence, got {:?}", - receivers - ))); - } - } - } else { - &mut serde_yaml::Sequence::default() - }; - - let mut additional_resources = vec![]; - - for custom_receiver in &self.receivers { - let name = custom_receiver.name(); - let alertmanager_receiver = custom_receiver.as_alertmanager_receiver()?; - - let receiver_json_value = alertmanager_receiver.receiver_config; - - let receiver_yaml_string = - serde_json::to_string(&receiver_json_value).map_err(|e| { - InterpretError::new(format!("Failed to serialize receiver config: {}", e)) - })?; - - let receiver_yaml_value: serde_yaml::Value = - serde_yaml::from_str(&receiver_yaml_string).map_err(|e| { - InterpretError::new(format!("Failed to parse receiver config as YAML: {}", e)) - })?; - - if let Some(idx) = existing_receivers_sequence.iter().position(|r| { - r.get("name") - .and_then(|n| n.as_str()) - .map_or(false, |n| n == name) - }) { - info!("Replacing existing AlertManager receiver: {}", name); - existing_receivers_sequence[idx] = receiver_yaml_value; - } else { - debug!("Adding new AlertManager receiver: {}", name); - existing_receivers_sequence.push(receiver_yaml_value); - } - - additional_resources.push(alertmanager_receiver.additional_ressources); - } - - let existing_route_mapping = if let Some(route) = am_config.get_mut("route") { - match route.as_mapping_mut() { - Some(map) => map, - None => { - return Err(InterpretError::new(format!( - "Expected alertmanager config route to be a mapping, got {:?}", - route - ))); - } - } - } else { - &mut serde_yaml::Mapping::default() - }; - - let existing_route_sequence = if let Some(routes) = existing_route_mapping.get_mut("routes") - { - match routes.as_sequence_mut() { - Some(seq) => seq, - None => { - return Err(InterpretError::new(format!( - "Expected alertmanager config routes to be a sequence, got {:?}", - routes - ))); - } - } - } else { - &mut serde_yaml::Sequence::default() - }; - - for custom_receiver in &self.receivers { - let name = custom_receiver.name(); - let alertmanager_receiver = custom_receiver.as_alertmanager_receiver()?; - - let route_json_value = alertmanager_receiver.route_config; - let route_yaml_string = serde_json::to_string(&route_json_value).map_err(|e| { - InterpretError::new(format!("Failed to serialize route config: {}", e)) - })?; - - let route_yaml_value: serde_yaml::Value = serde_yaml::from_str(&route_yaml_string) - .map_err(|e| { - InterpretError::new(format!("Failed to parse route config as YAML: {}", e)) - })?; - - if let Some(idy) = existing_route_sequence.iter().position(|r| { - r.get("receiver") - .and_then(|n| n.as_str()) - .map_or(false, |n| n == name) - }) { - info!("Replacing existing AlertManager receiver: {}", name); - existing_route_sequence[idy] = route_yaml_value; - } else { - debug!("Adding new AlertManager receiver: {}", name); - existing_route_sequence.push(route_yaml_value); - } - } - debug!("Current alertmanager config {am_config:#?}"); - // TODO - // - save new version of alertmanager config - // - write additional ressources to the cluster - let am_config = serde_yaml::to_string(&am_config).map_err(|e| { - InterpretError::new(format!( - "Failed to serialize new alertmanager config to string : {e}" - )) - })?; - - let mut am_config_b64 = String::new(); - BASE64_STANDARD.encode_string(am_config, &mut am_config_b64); - - // TODO put update configmap value and save new value - data_obj.insert( - "alertmanager.yaml".to_string(), - serde_json::Value::String(am_config_b64), - ); - - // https://kubernetes.io/docs/reference/using-api/server-side-apply/#field-management - alertmanager_main_secret.metadata.managed_fields = None; - - trace!("Applying new alertmanager_main_secret {alertmanager_main_secret:#?}"); - client - .apply_dynamic( - &alertmanager_main_secret, - Some(openshift_monitoring_namespace), - true, - ) - .await?; - - let additional_resources = additional_resources.concat(); - trace!("Applying additional ressources for alert receivers {additional_resources:#?}"); - client - .apply_dynamic_many( - &additional_resources, - Some(openshift_monitoring_namespace), - true, - ) - .await?; - - Ok(Outcome::success(format!( - "Successfully configured {} cluster alert receivers: {}", - self.receivers.len(), - self.receivers - .iter() - .map(|r| r.name()) - .collect::>() - .join(", ") - ))) - } - - fn get_name(&self) -> InterpretName { - InterpretName::Custom("OpenshiftClusterAlertInterpret") - } - - fn get_version(&self) -> Version { - todo!() - } - - fn get_status(&self) -> InterpretStatus { - todo!() - } - - fn get_children(&self) -> Vec { - todo!() - } -} diff --git a/harmony/src/modules/monitoring/okd/config.rs b/harmony/src/modules/monitoring/okd/config.rs deleted file mode 100644 index b86c5f02..00000000 --- a/harmony/src/modules/monitoring/okd/config.rs +++ /dev/null @@ -1,90 +0,0 @@ -use std::{collections::BTreeMap, sync::Arc}; - -use crate::{ - interpret::{InterpretError, Outcome}, - topology::k8s::K8sClient, -}; -use k8s_openapi::api::core::v1::ConfigMap; -use kube::api::ObjectMeta; - -pub(crate) struct Config; - -impl Config { - pub async fn create_cluster_monitoring_config_cm( - client: &Arc, - ) -> Result { - let mut data = BTreeMap::new(); - data.insert( - "config.yaml".to_string(), - r#" -enableUserWorkload: true -alertmanagerMain: - enableUserAlertmanagerConfig: true -"# - .to_string(), - ); - - let cm = ConfigMap { - metadata: ObjectMeta { - name: Some("cluster-monitoring-config".to_string()), - namespace: Some("openshift-monitoring".to_string()), - ..Default::default() - }, - data: Some(data), - ..Default::default() - }; - client.apply(&cm, Some("openshift-monitoring")).await?; - - Ok(Outcome::success( - "updated cluster-monitoring-config-map".to_string(), - )) - } - - pub async fn create_user_workload_monitoring_config_cm( - client: &Arc, - ) -> Result { - let mut data = BTreeMap::new(); - data.insert( - "config.yaml".to_string(), - r#" -alertmanager: - enabled: true - enableAlertmanagerConfig: true -"# - .to_string(), - ); - let cm = ConfigMap { - metadata: ObjectMeta { - name: Some("user-workload-monitoring-config".to_string()), - namespace: Some("openshift-user-workload-monitoring".to_string()), - ..Default::default() - }, - data: Some(data), - ..Default::default() - }; - client - .apply(&cm, Some("openshift-user-workload-monitoring")) - .await?; - - Ok(Outcome::success( - "updated openshift-user-monitoring-config-map".to_string(), - )) - } - - pub async fn verify_user_workload(client: &Arc) -> Result { - let namespace = "openshift-user-workload-monitoring"; - let alertmanager_name = "alertmanager-user-workload-0"; - let prometheus_name = "prometheus-user-workload-0"; - client - .wait_for_pod_ready(alertmanager_name, Some(namespace)) - .await?; - client - .wait_for_pod_ready(prometheus_name, Some(namespace)) - .await?; - - Ok(Outcome::success(format!( - "pods: {}, {} ready in ns: {}", - alertmanager_name, prometheus_name, namespace - ))) - } -} diff --git a/harmony/src/modules/monitoring/okd/crd/alerting_rules.rs b/harmony/src/modules/monitoring/okd/crd/alerting_rules.rs new file mode 100644 index 00000000..688198ea --- /dev/null +++ b/harmony/src/modules/monitoring/okd/crd/alerting_rules.rs @@ -0,0 +1,58 @@ +use std::collections::BTreeMap; + +use kube::CustomResource; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule; + +#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema, Default)] +#[kube( + group = "monitoring.openshift.io", + version = "v1", + kind = "AlertingRule", + plural = "alertingrules", + namespaced, + derive = "Default" +)] +#[serde(rename_all = "camelCase")] +pub struct AlertingRuleSpec { + pub groups: serde_json::Value, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct RuleGroup { + pub name: String, + pub rules: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct Rule { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub alert: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub expr: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub for_: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub labels: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub annotations: Option>, +} + +impl From for Rule { + fn from(value: PrometheusAlertRule) -> Self { + Rule { + alert: Some(value.alert), + expr: Some(value.expr), + for_: value.r#for, + labels: Some(value.labels.into_iter().collect::>()), + annotations: Some(value.annotations.into_iter().collect::>()), + } + } +} diff --git a/harmony/src/modules/monitoring/okd/crd/mod.rs b/harmony/src/modules/monitoring/okd/crd/mod.rs new file mode 100644 index 00000000..a3b92bd0 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/crd/mod.rs @@ -0,0 +1,3 @@ +pub mod alerting_rules; +pub mod scrape_target; +pub mod service_monitor; diff --git a/harmony/src/modules/monitoring/okd/crd/scrape_target.rs b/harmony/src/modules/monitoring/okd/crd/scrape_target.rs new file mode 100644 index 00000000..6a77f003 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/crd/scrape_target.rs @@ -0,0 +1,72 @@ +use kube::CustomResource; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; + +#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema, Default)] +#[kube( + group = "monitoring.coreos.com", + version = "v1alpha1", + kind = "ScrapeConfig", + plural = "scrapeconfigs", + namespaced, + derive = "Default" +)] +#[serde(rename_all = "camelCase")] +pub struct ScrapeConfigSpec { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub job_name: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metrics_path: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub scrape_interval: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub scrape_timeout: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub scheme: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub static_configs: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub relabelings: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metric_relabelings: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct StaticConfig { + /// targets: ["host:port"] + pub targets: Vec, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub labels: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct RelabelConfig { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub source_labels: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub separator: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub target_label: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub replacement: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub action: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub regex: Option, +} diff --git a/harmony/src/modules/monitoring/okd/crd/service_monitor.rs b/harmony/src/modules/monitoring/okd/crd/service_monitor.rs new file mode 100644 index 00000000..f60f1399 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/crd/service_monitor.rs @@ -0,0 +1,97 @@ +use kube::CustomResource; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; + +#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema, Default)] +#[kube( + group = "monitoring.coreos.com", + version = "v1", + kind = "ServiceMonitor", + plural = "servicemonitors", + namespaced, + derive = "Default" +)] +#[serde(rename_all = "camelCase")] +pub struct ServiceMonitorSpec { + /// The label to use to retrieve the job name from. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub job_label: Option, + + /// TargetLabels transfers labels on the Kubernetes Service onto the target. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub target_labels: Option>, + + /// PodTargetLabels transfers labels on the Kubernetes Pod onto the target. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pod_target_labels: Option>, + + /// A list of endpoints allowed as part of this ServiceMonitor. + pub endpoints: Vec, + + /// Selector to select Endpoints objects. + pub selector: LabelSelector, + + /// Selector to select which namespaces the Kubernetes Endpoints objects are discovered from. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub namespace_selector: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct Endpoint { + /// Name of the service port this endpoint refers to. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub port: Option, + + /// HTTP path to scrape for metrics. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub path: Option, + + /// HTTP scheme to use for scraping. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub scheme: Option, + + /// Interval at which metrics should be scraped. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub interval: Option, + + /// Timeout after which the scrape is ended. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub scrape_timeout: Option, + + /// HonorLabels chooses the metric's labels on collisions with target labels. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub honor_labels: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct LabelSelector { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub match_labels: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub match_expressions: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct LabelSelectorRequirement { + pub key: String, + pub operator: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub values: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct NamespaceSelector { + /// Boolean describing whether all namespaces are selected in contrast to a list restricting them. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub any: Option, + + /// List of namespace names. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub match_names: Option>, +} diff --git a/harmony/src/modules/monitoring/okd/enable_user_workload.rs b/harmony/src/modules/monitoring/okd/enable_user_workload.rs deleted file mode 100644 index 2ed0fa41..00000000 --- a/harmony/src/modules/monitoring/okd/enable_user_workload.rs +++ /dev/null @@ -1,60 +0,0 @@ -use crate::{ - data::Version, - interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, - inventory::Inventory, - modules::monitoring::okd::config::Config, - score::Score, - topology::{K8sclient, Topology}, -}; -use async_trait::async_trait; -use harmony_types::id::Id; -use serde::Serialize; - -#[derive(Clone, Debug, Serialize)] -pub struct OpenshiftUserWorkloadMonitoring {} - -impl Score for OpenshiftUserWorkloadMonitoring { - fn name(&self) -> String { - "OpenshiftUserWorkloadMonitoringScore".to_string() - } - - fn create_interpret(&self) -> Box> { - Box::new(OpenshiftUserWorkloadMonitoringInterpret {}) - } -} - -#[derive(Clone, Debug, Serialize)] -pub struct OpenshiftUserWorkloadMonitoringInterpret {} - -#[async_trait] -impl Interpret for OpenshiftUserWorkloadMonitoringInterpret { - async fn execute( - &self, - _inventory: &Inventory, - topology: &T, - ) -> Result { - let client = topology.k8s_client().await.unwrap(); - Config::create_cluster_monitoring_config_cm(&client).await?; - Config::create_user_workload_monitoring_config_cm(&client).await?; - Config::verify_user_workload(&client).await?; - Ok(Outcome::success( - "successfully enabled user-workload-monitoring".to_string(), - )) - } - - fn get_name(&self) -> InterpretName { - InterpretName::Custom("OpenshiftUserWorkloadMonitoring") - } - - fn get_version(&self) -> Version { - todo!() - } - - fn get_status(&self) -> InterpretStatus { - todo!() - } - - fn get_children(&self) -> Vec { - todo!() - } -} diff --git a/harmony/src/modules/monitoring/okd/mod.rs b/harmony/src/modules/monitoring/okd/mod.rs index ac246c5f..62d00745 100644 --- a/harmony/src/modules/monitoring/okd/mod.rs +++ b/harmony/src/modules/monitoring/okd/mod.rs @@ -1,10 +1,17 @@ -use crate::topology::oberservability::monitoring::AlertSender; +use serde::Serialize; -pub mod cluster_monitoring; -pub(crate) mod config; -pub mod enable_user_workload; +use crate::topology::monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget}; -#[derive(Debug)] +pub mod crd; +pub mod openshift_cluster_alerting_score; +pub mod score_enable_cluster_monitoring; +pub mod score_openshift_alert_rule; +pub mod score_openshift_receiver; +pub mod score_openshift_scrape_target; +pub mod score_user_workload; +pub mod score_verify_user_workload_monitoring; + +#[derive(Debug, Clone, Serialize)] pub struct OpenshiftClusterAlertSender; impl AlertSender for OpenshiftClusterAlertSender { @@ -12,3 +19,30 @@ impl AlertSender for OpenshiftClusterAlertSender { "OpenshiftClusterAlertSender".to_string() } } + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/okd/openshift_cluster_alerting_score.rs b/harmony/src/modules/monitoring/okd/openshift_cluster_alerting_score.rs new file mode 100644 index 00000000..ee801bb8 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/openshift_cluster_alerting_score.rs @@ -0,0 +1,36 @@ +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::monitoring::okd::OpenshiftClusterAlertSender, + score::Score, + topology::{ + Topology, + monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget}, + }, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct OpenshiftClusterAlertScore { + pub sender: OpenshiftClusterAlertSender, + pub receivers: Vec>>, + pub rules: Vec>>, + pub scrape_targets: Option>>>, +} + +impl> Score + for OpenshiftClusterAlertScore +{ + fn name(&self) -> String { + "OpenshiftClusterAlertScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(AlertingInterpret { + sender: OpenshiftClusterAlertSender, + receivers: self.receivers.clone(), + rules: self.rules.clone(), + scrape_targets: self.scrape_targets.clone(), + }) + } +} diff --git a/harmony/src/modules/monitoring/okd/score_enable_cluster_monitoring.rs b/harmony/src/modules/monitoring/okd/score_enable_cluster_monitoring.rs new file mode 100644 index 00000000..5459c186 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/score_enable_cluster_monitoring.rs @@ -0,0 +1,149 @@ +use std::{collections::BTreeMap, sync::Arc}; + +use async_trait::async_trait; +use harmony_types::id::Id; +use k8s_openapi::api::core::v1::ConfigMap; +use kube::api::{GroupVersionKind, ObjectMeta}; +use log::debug; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::k8s::resource::K8sResourceScore, + score::Score, + topology::{K8sclient, Topology, k8s::K8sClient}, +}; + +#[derive(Clone, Debug, Serialize)] +pub struct OpenshiftEnableClusterMonitoringScore {} + +impl Score for OpenshiftEnableClusterMonitoringScore { + fn name(&self) -> String { + "OpenshiftClusterMonitoringScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OpenshiftEnableClusterMonitoringInterpret {}) + } +} + +#[derive(Clone, Debug, Serialize)] +pub struct OpenshiftEnableClusterMonitoringInterpret {} + +#[async_trait] +impl Interpret for OpenshiftEnableClusterMonitoringInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + let namespace = "openshift-monitoring".to_string(); + let name = "cluster-monitoring-config".to_string(); + let client = topology.k8s_client().await?; + let enabled = self + .check_cluster_monitoring_enabled(client, &name, &namespace) + .await + .map_err(|e| InterpretError::new(e))?; + + debug!("enabled {:#?}", enabled); + + match enabled { + true => Ok(Outcome::success( + "Openshift Cluster Monitoring already enabled".to_string(), + )), + false => { + let mut data = BTreeMap::new(); + data.insert( + "config.yaml".to_string(), + r#" +enableUserWorkload: true +alertmanagerMain: + enableUserAlertmanagerConfig: true +"# + .to_string(), + ); + + let cm = ConfigMap { + metadata: ObjectMeta { + name: Some(name), + namespace: Some(namespace.clone()), + ..Default::default() + }, + data: Some(data), + ..Default::default() + }; + K8sResourceScore::single(cm, Some(namespace)) + .create_interpret() + .execute(inventory, topology) + .await?; + + Ok(Outcome::success( + "Successfully enabled Openshift Cluster Monitoring".to_string(), + )) + } + } + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("OpenshiftEnableClusterMonitoringInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} + +impl OpenshiftEnableClusterMonitoringInterpret { + async fn check_cluster_monitoring_enabled( + &self, + client: Arc, + name: &str, + namespace: &str, + ) -> Result { + let gvk = GroupVersionKind { + group: "".to_string(), + version: "v1".to_string(), + kind: "ConfigMap".to_string(), + }; + + let cm = match client + .get_resource_json_value(name, Some(namespace), &gvk) + .await + { + Ok(obj) => obj, + Err(_) => return Ok(false), + }; + + debug!("{:#?}", cm.data.pointer("/data/config.yaml")); + let config_yaml_str = match cm + .data + .pointer("/data/config.yaml") + .and_then(|v| v.as_str()) + { + Some(s) => s, + None => return Ok(false), + }; + + debug!("{:#?}", config_yaml_str); + let parsed_config: serde_yaml::Value = serde_yaml::from_str(config_yaml_str) + .map_err(|e| format!("Failed to parse nested YAML: {}", e))?; + + let enabled = parsed_config + .get("enableUserWorkload") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + debug!("{:#?}", enabled); + Ok(enabled) + } +} diff --git a/harmony/src/modules/monitoring/okd/score_openshift_alert_rule.rs b/harmony/src/modules/monitoring/okd/score_openshift_alert_rule.rs new file mode 100644 index 00000000..9c902c66 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/score_openshift_alert_rule.rs @@ -0,0 +1,42 @@ +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::okd::{ + OpenshiftClusterAlertSender, + crd::alerting_rules::{AlertingRule, AlertingRuleSpec}, + }, + }, + score::Score, + topology::{K8sclient, Topology, monitoring::AlertRule}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct OpenshiftAlertRuleScore { + pub rule: Box>, +} + +impl Score for OpenshiftAlertRuleScore { + fn name(&self) -> String { + "OpenshiftAlertingRuleScore".to_string() + } + + fn create_interpret(&self) -> Box> { + let namespace = "openshift-monitoring".to_string(); + let alerting_rule = AlertingRule { + metadata: ObjectMeta { + name: Some(self.rule.name()), + namespace: Some(namespace.clone()), + ..Default::default() + }, + spec: AlertingRuleSpec { + groups: self.rule.build_rule().unwrap(), + }, + }; + + K8sResourceScore::single(alerting_rule, Some(namespace)).create_interpret() + } +} diff --git a/harmony/src/modules/monitoring/okd/score_openshift_receiver.rs b/harmony/src/modules/monitoring/okd/score_openshift_receiver.rs new file mode 100644 index 00000000..6f7013c3 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/score_openshift_receiver.rs @@ -0,0 +1,144 @@ +use async_trait::async_trait; +use base64::{Engine as _, prelude::BASE64_STANDARD}; +use harmony_types::id::Id; +use kube::api::DynamicObject; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::monitoring::okd::OpenshiftClusterAlertSender, + score::Score, + topology::{K8sclient, Topology, monitoring::AlertReceiver}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct OpenshiftReceiverScore { + pub receiver: Box>, +} + +impl Score for OpenshiftReceiverScore { + fn name(&self) -> String { + "OpenshiftAlertReceiverScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OpenshiftReceiverInterpret { + receiver: self.receiver.clone(), + }) + } +} + +#[derive(Debug)] +pub struct OpenshiftReceiverInterpret { + receiver: Box>, +} + +#[async_trait] +impl Interpret for OpenshiftReceiverInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &T, + ) -> Result { + let client = topology.k8s_client().await?; + let ns = "openshift-monitoring"; + + let mut am_secret: DynamicObject = client + .get_secret_json_value("alertmanager-main", Some(ns)) + .await?; + + let data = am_secret + .data + .get_mut("data") + .ok_or_else(|| { + InterpretError::new("Missing 'data' field in alertmanager-main secret".into()) + })? + .as_object_mut() + .ok_or_else(|| InterpretError::new("'data' field must be a JSON object".into()))?; + + let config_b64 = data + .get("alertmanager.yaml") + .and_then(|v| v.as_str()) + .unwrap_or_default(); + + let config_bytes = BASE64_STANDARD.decode(config_b64).unwrap_or_default(); + + let mut am_config: serde_yaml::Value = serde_yaml::from_slice(&config_bytes) + .unwrap_or_else(|_| serde_yaml::Value::Mapping(serde_yaml::Mapping::new())); + + let name = self.receiver.name(); + let receiver = self.receiver.build_receiver()?; + let route = self.receiver.build_route().unwrap(); + + if am_config.get("receivers").is_none() { + am_config["receivers"] = serde_yaml::Value::Sequence(vec![]); + } + if am_config.get("route").is_none() { + am_config["route"] = serde_yaml::Value::Mapping(serde_yaml::Mapping::new()); + } + if am_config["route"].get("routes").is_none() { + am_config["route"]["routes"] = serde_yaml::Value::Sequence(vec![]); + } + + { + let receivers_seq = am_config["receivers"].as_sequence_mut().unwrap(); + if let Some(idx) = receivers_seq + .iter() + .position(|r| r.get("name").and_then(|n| n.as_str()) == Some(&name)) + { + receivers_seq[idx] = receiver; + } else { + receivers_seq.push(receiver); + } + } + + { + let route_seq = am_config["route"]["routes"].as_sequence_mut().unwrap(); + if let Some(idx) = route_seq + .iter() + .position(|r| r.get("receiver").and_then(|n| n.as_str()) == Some(&name)) + { + route_seq[idx] = route; + } else { + route_seq.push(route); + } + } + + let yaml_str = + serde_yaml::to_string(&am_config).map_err(|e| InterpretError::new(e.to_string()))?; + + let mut yaml_b64 = String::new(); + + BASE64_STANDARD.encode_string(yaml_str, &mut yaml_b64); + data.insert( + "alertmanager.yaml".to_string(), + serde_json::Value::String(yaml_b64), + ); + am_secret.metadata.managed_fields = None; + + client.apply_dynamic(&am_secret, Some(ns), true).await?; + + Ok(Outcome::success(format!( + "Configured OpenShift cluster alert receiver: {}", + name + ))) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("OpenshiftAlertReceiverInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/okd/score_openshift_scrape_target.rs b/harmony/src/modules/monitoring/okd/score_openshift_scrape_target.rs new file mode 100644 index 00000000..44b0106c --- /dev/null +++ b/harmony/src/modules/monitoring/okd/score_openshift_scrape_target.rs @@ -0,0 +1,188 @@ +use std::collections::BTreeMap; + +use async_trait::async_trait; +use harmony_types::id::Id; +use k8s_openapi::{ + api::core::v1::{ + EndpointAddress, EndpointPort, EndpointSubset, Endpoints, Service, ServicePort, ServiceSpec, + }, + apimachinery::pkg::util::intstr::IntOrString, +}; +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::okd::{ + OpenshiftClusterAlertSender, + crd::service_monitor::{Endpoint, LabelSelector, ServiceMonitor, ServiceMonitorSpec}, + }, + }, + score::Score, + topology::{ + K8sclient, Topology, + monitoring::{ExternalScrapeTarget, ScrapeTarget}, + }, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct OpenshiftScrapeTargetScore { + pub scrape_target: Box>, +} + +impl Score for OpenshiftScrapeTargetScore { + fn name(&self) -> String { + "OpenshiftAlertingRuleScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OpenshiftScrapeTargetInterpret { + scrape_target: self.scrape_target.clone(), + }) + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct OpenshiftScrapeTargetInterpret { + scrape_target: Box>, +} + +#[async_trait] +impl Interpret for OpenshiftScrapeTargetInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + let namespace = "openshift-monitoring".to_string(); + let name = self.scrape_target.name(); + let external_target = self + .scrape_target + .build_scrape_target() + .expect("failed to build scrape target ExternalScrapeTarget"); + + let (service, endpoints, service_monitor) = + self.to_k8s_resources(&name, &namespace, external_target); + + K8sResourceScore::single(service, Some(namespace.clone())) + .create_interpret() + .execute(inventory, topology) + .await?; + + K8sResourceScore::single(endpoints, Some(namespace.clone())) + .create_interpret() + .execute(inventory, topology) + .await?; + + K8sResourceScore::single(service_monitor, Some(namespace.clone())) + .create_interpret() + .execute(inventory, topology) + .await?; + + Ok(Outcome::success( + "Installed scrape target of Openshift".to_string(), + )) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("OpenshiftScrapeTargetInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} + +impl OpenshiftScrapeTargetInterpret { + /// Maps the generic intent into the 3 required Kubernetes objects + pub fn to_k8s_resources( + &self, + name: &str, + namespace: &str, + external_target: ExternalScrapeTarget, + ) -> (Service, Endpoints, ServiceMonitor) { + let mut labels = external_target.labels.clone().unwrap_or(BTreeMap::new()); + + labels.insert("harmony/target-name".to_string(), name.to_string().clone()); + + let service = Service { + metadata: ObjectMeta { + name: Some(name.to_string().clone()), + namespace: Some(namespace.to_string()), + labels: Some(labels.clone()), + ..Default::default() + }, + spec: Some(ServiceSpec { + cluster_ip: Some("None".to_string()), // Headless + ports: Some(vec![ServicePort { + name: Some("metrics".to_string()), + port: external_target.port.clone(), + target_port: Some(IntOrString::Int(external_target.port)), + ..Default::default() + }]), + ..Default::default() + }), + ..Default::default() + }; + + let endpoints = Endpoints { + metadata: ObjectMeta { + name: Some(name.to_string().clone()), + namespace: Some(namespace.to_string()), + labels: Some(labels.clone()), + ..Default::default() + }, + subsets: Some(vec![EndpointSubset { + addresses: Some(vec![EndpointAddress { + ip: external_target.ip.to_string().clone(), + ..Default::default() + }]), + ports: Some(vec![EndpointPort { + name: Some("metrics".to_string()), + port: external_target.port, + ..Default::default() + }]), + ..Default::default() + }]), + }; + + let service_monitor = ServiceMonitor { + metadata: ObjectMeta { + name: Some(name.to_string().clone()), + namespace: Some(namespace.to_string()), + ..Default::default() + }, + spec: ServiceMonitorSpec { + job_label: Some("harmony/target-name".to_string()), + endpoints: vec![Endpoint { + port: Some("metrics".to_string()), + interval: external_target.interval.clone(), + path: external_target.path.clone(), + ..Default::default() + }], + selector: LabelSelector { + match_labels: Some(BTreeMap::from([( + "harmony/target-name".to_string(), + name.to_string().clone(), + )])), + ..Default::default() + }, + ..Default::default() + }, + }; + + (service, endpoints, service_monitor) + } +} diff --git a/harmony/src/modules/monitoring/okd/score_user_workload.rs b/harmony/src/modules/monitoring/okd/score_user_workload.rs new file mode 100644 index 00000000..54553be7 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/score_user_workload.rs @@ -0,0 +1,157 @@ +use std::{collections::BTreeMap, sync::Arc}; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::k8s::resource::K8sResourceScore, + score::Score, + topology::{K8sclient, Topology, k8s::K8sClient}, +}; +use async_trait::async_trait; +use harmony_types::id::Id; +use k8s_openapi::api::core::v1::ConfigMap; +use kube::api::{GroupVersionKind, ObjectMeta}; +use log::debug; +use serde::Serialize; + +#[derive(Clone, Debug, Serialize)] +pub struct OpenshiftUserWorkloadMonitoring {} + +impl Score for OpenshiftUserWorkloadMonitoring { + fn name(&self) -> String { + "OpenshiftUserWorkloadMonitoringScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OpenshiftUserWorkloadMonitoringInterpret {}) + } +} + +#[derive(Clone, Debug, Serialize)] +pub struct OpenshiftUserWorkloadMonitoringInterpret {} + +#[async_trait] +impl Interpret for OpenshiftUserWorkloadMonitoringInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + let namespace = "openshift-user-workload-monitoring".to_string(); + let cm_name = "user-workload-monitoring-config".to_string(); + let client = topology.k8s_client().await?; + + let cm_enabled = self + .check_cluster_user_workload_monitoring_enabled(client, &cm_name, &namespace) + .await?; + + match cm_enabled { + true => Ok(Outcome::success( + "OpenshiftUserWorkloadMonitoringEnabled".to_string(), + )), + false => { + let mut data = BTreeMap::new(); + data.insert( + "config.yaml".to_string(), + r#" +alertmanager: + enabled: true + enableAlertmanagerConfig: true +"# + .to_string(), + ); + + let cm = ConfigMap { + metadata: ObjectMeta { + name: Some("user-workload-monitoring-config".to_string()), + namespace: Some(namespace.clone()), + ..Default::default() + }, + data: Some(data), + ..Default::default() + }; + + K8sResourceScore::single(cm, Some(namespace)) + .create_interpret() + .execute(inventory, topology) + .await + } + } + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("OpenshiftUserWorkloadMonitoringInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} + +impl OpenshiftUserWorkloadMonitoringInterpret { + async fn check_cluster_user_workload_monitoring_enabled( + &self, + client: Arc, + name: &str, + namespace: &str, + ) -> Result { + let gvk = GroupVersionKind { + group: "".to_string(), + version: "v1".to_string(), + kind: "ConfigMap".to_string(), + }; + + let cm = match client + .get_resource_json_value(name, Some(namespace), &gvk) + .await + { + Ok(obj) => obj, + Err(_) => return Ok(false), // CM doesn't exist? Treat as disabled. + }; + + debug!("{:#?}", cm.data.pointer("/data/config.yaml")); + let config_yaml_str = match cm + .data + .pointer("/data/config.yaml") + .and_then(|v| v.as_str()) + { + Some(s) => s, + None => return Ok(false), // Key missing? Treat as disabled. + }; + + debug!("{:#?}", config_yaml_str); + let parsed_config: serde_yaml::Value = serde_yaml::from_str(config_yaml_str) + .map_err(|e| format!("Failed to parse nested YAML: {}", e))?; + + let alert_manager_enabled = parsed_config + .get("alertmanager") + .and_then(|a| a.get("enableAlertmanagerConfig")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + debug!("alertmanagerenabled: {:#?}", alert_manager_enabled); + + let enabled = parsed_config + .get("alertmanager") + .and_then(|enabled| enabled.get("enabled")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + debug!("user workload monitoring enabled: {:#?}", enabled); + + if alert_manager_enabled && enabled == true { + Ok(true) + } else { + Ok(false) + } + } +} diff --git a/harmony/src/modules/monitoring/okd/score_verify_user_workload_monitoring.rs b/harmony/src/modules/monitoring/okd/score_verify_user_workload_monitoring.rs new file mode 100644 index 00000000..29786763 --- /dev/null +++ b/harmony/src/modules/monitoring/okd/score_verify_user_workload_monitoring.rs @@ -0,0 +1,70 @@ +use async_trait::async_trait; +use harmony_types::id::Id; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + score::Score, + topology::{K8sclient, Topology}, +}; + +#[derive(Clone, Debug, Serialize)] +pub struct VerifyUserWorkload {} + +impl Score for VerifyUserWorkload { + fn name(&self) -> String { + "VerifyUserWorkload".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(VerifyUserWorkloadInterpret {}) + } +} + +#[derive(Clone, Debug, Serialize)] +pub struct VerifyUserWorkloadInterpret {} + +#[async_trait] +impl Interpret for VerifyUserWorkloadInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &T, + ) -> Result { + let client = topology.k8s_client().await?; + let namespace = "openshift-user-workload-monitoring"; + let alertmanager_name = "alertmanager-user-workload-0"; + let prometheus_name = "prometheus-user-workload-0"; + + client + .wait_for_pod_ready(alertmanager_name, Some(namespace)) + .await?; + + client + .wait_for_pod_ready(prometheus_name, Some(namespace)) + .await?; + + Ok(Outcome::success(format!( + "pods: {}, {} ready in ns: {}", + alertmanager_name, prometheus_name, namespace + ))) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("VerifyUserWorkloadInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/prometheus/helm/mod.rs b/harmony/src/modules/monitoring/prometheus/helm/mod.rs index 431fc6ca..99a01e51 100644 --- a/harmony/src/modules/monitoring/prometheus/helm/mod.rs +++ b/harmony/src/modules/monitoring/prometheus/helm/mod.rs @@ -1 +1,2 @@ +pub mod prometheus_config; pub mod prometheus_helm; diff --git a/harmony/src/modules/monitoring/prometheus/prometheus_config.rs b/harmony/src/modules/monitoring/prometheus/helm/prometheus_config.rs similarity index 95% rename from harmony/src/modules/monitoring/prometheus/prometheus_config.rs rename to harmony/src/modules/monitoring/prometheus/helm/prometheus_config.rs index 32c3fab5..031d1ac1 100644 --- a/harmony/src/modules/monitoring/prometheus/prometheus_config.rs +++ b/harmony/src/modules/monitoring/prometheus/helm/prometheus_config.rs @@ -1,8 +1,10 @@ +use serde::Serialize; + use crate::modules::monitoring::kube_prometheus::types::{ AlertManagerAdditionalPromRules, AlertManagerChannelConfig, ServiceMonitor, }; -#[derive(Debug)] +#[derive(Debug, Clone, Serialize)] pub struct PrometheusConfig { pub namespace: Option, pub default_rules: bool, diff --git a/harmony/src/modules/monitoring/prometheus/helm/prometheus_helm.rs b/harmony/src/modules/monitoring/prometheus/helm/prometheus_helm.rs index 611c500f..b0305a8d 100644 --- a/harmony/src/modules/monitoring/prometheus/helm/prometheus_helm.rs +++ b/harmony/src/modules/monitoring/prometheus/helm/prometheus_helm.rs @@ -3,9 +3,8 @@ use std::sync::{Arc, Mutex}; use non_blank_string_rs::NonBlankString; -use crate::modules::{ - helm::chart::HelmChartScore, monitoring::prometheus::prometheus_config::PrometheusConfig, -}; +use crate::modules::helm::chart::HelmChartScore; +use crate::modules::monitoring::prometheus::helm::prometheus_config::PrometheusConfig; pub fn prometheus_helm_chart_score(config: Arc>) -> HelmChartScore { let config = config.lock().unwrap(); @@ -30,6 +29,7 @@ server: fullnameOverride: prometheus-{ns} "# ); + HelmChartScore { namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()), release_name: NonBlankString::from_str("prometheus").unwrap(), diff --git a/harmony/src/modules/monitoring/prometheus/mod.rs b/harmony/src/modules/monitoring/prometheus/mod.rs index d1bda0e2..0cf651a6 100644 --- a/harmony/src/modules/monitoring/prometheus/mod.rs +++ b/harmony/src/modules/monitoring/prometheus/mod.rs @@ -1,4 +1,56 @@ +use std::sync::{Arc, Mutex}; + +use async_trait::async_trait; +use serde::Serialize; + +use crate::{ + modules::monitoring::prometheus::helm::prometheus_config::PrometheusConfig, + topology::monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget}, +}; + pub mod helm; -#[allow(clippy::module_inception)] -pub mod prometheus; -pub mod prometheus_config; +pub mod prometheus_alerting_score; +pub mod score_prometheus_alert_receivers; +pub mod score_prometheus_ensure_ready; +pub mod score_prometheus_install; +pub mod score_prometheus_rule; +pub mod score_prometheus_scrape_target; + +#[derive(Debug, Clone, Serialize)] +pub struct Prometheus { + pub config: Arc>, +} + +#[async_trait] +impl AlertSender for Prometheus { + fn name(&self) -> String { + "Prometheus".to_string() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/prometheus/prometheus.rs b/harmony/src/modules/monitoring/prometheus/prometheus.rs deleted file mode 100644 index 2fe0d06c..00000000 --- a/harmony/src/modules/monitoring/prometheus/prometheus.rs +++ /dev/null @@ -1,194 +0,0 @@ -use std::sync::{Arc, Mutex}; - -use async_trait::async_trait; -use log::{debug, error}; -use serde::Serialize; - -use crate::{ - interpret::{InterpretError, Outcome}, - inventory::Inventory, - modules::monitoring::{ - alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, - grafana::helm::helm_grafana::grafana_helm_chart_score, - kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig}, - }, - score::Score, - topology::{ - HelmCommand, Topology, - installable::Installable, - oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender}, - tenant::TenantManager, - }, -}; - -use super::{ - helm::prometheus_helm::prometheus_helm_chart_score, prometheus_config::PrometheusConfig, -}; - -#[derive(Debug)] -pub struct Prometheus { - pub config: Arc>, -} - -#[async_trait] -impl AlertSender for Prometheus { - fn name(&self) -> String { - "Prometheus".to_string() - } -} - -impl Default for Prometheus { - fn default() -> Self { - Self::new() - } -} - -impl Prometheus { - pub fn new() -> Self { - Self { - config: Arc::new(Mutex::new(PrometheusConfig::new())), - } - } - pub async fn configure_with_topology(&self, topology: &T) { - let ns = topology - .get_tenant_config() - .await - .map(|cfg| cfg.name.clone()) - .unwrap_or_else(|| "monitoring".to_string()); - error!("This must be refactored, see comments in pr #74"); - debug!("NS: {}", ns); - self.config.lock().unwrap().namespace = Some(ns); - } - - pub async fn install_receiver( - &self, - prometheus_receiver: &dyn PrometheusReceiver, - ) -> Result { - let prom_receiver = prometheus_receiver.configure_receiver().await; - debug!( - "adding alert receiver to prometheus config: {:#?}", - &prom_receiver - ); - let mut config = self.config.lock().unwrap(); - - config.alert_receiver_configs.push(prom_receiver); - let prom_receiver_name = prometheus_receiver.name(); - debug!("installed alert receiver {}", &prom_receiver_name); - Ok(Outcome::success(format!( - "Sucessfully installed receiver {}", - prom_receiver_name - ))) - } - - pub async fn install_rule( - &self, - prometheus_rule: &AlertManagerRuleGroup, - ) -> Result { - let prometheus_rule = prometheus_rule.configure_rule().await; - let mut config = self.config.lock().unwrap(); - - config.alert_rules.push(prometheus_rule.clone()); - Ok(Outcome::success(format!( - "Successfully installed alert rule: {:#?},", - prometheus_rule - ))) - } - - pub async fn install_prometheus( - &self, - inventory: &Inventory, - topology: &T, - ) -> Result { - prometheus_helm_chart_score(self.config.clone()) - .interpret(inventory, topology) - .await - } - pub async fn install_grafana( - &self, - inventory: &Inventory, - topology: &T, - ) -> Result { - let namespace = { - let config = self.config.lock().unwrap(); - config.namespace.clone() - }; - - if let Some(ns) = namespace.as_deref() { - grafana_helm_chart_score(ns, false) - .interpret(inventory, topology) - .await - } else { - Err(InterpretError::new( - "could not install grafana, missing namespace".to_string(), - )) - } - } -} -#[async_trait] -impl Installable for Prometheus { - async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> { - self.configure_with_topology(topology).await; - Ok(()) - } - - async fn ensure_installed( - &self, - inventory: &Inventory, - topology: &T, - ) -> Result<(), InterpretError> { - self.install_prometheus(inventory, topology).await?; - - let install_grafana = { - let config = self.config.lock().unwrap(); - config.grafana - }; - - if install_grafana { - self.install_grafana(inventory, topology).await?; - } - - Ok(()) - } -} - -#[async_trait] -pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug { - fn name(&self) -> String; - async fn configure_receiver(&self) -> AlertManagerChannelConfig; -} - -impl Serialize for Box> { - fn serialize(&self, _serializer: S) -> Result - where - S: serde::Serializer, - { - todo!() - } -} - -impl Clone for Box> { - fn clone(&self) -> Self { - self.clone_box() - } -} - -#[async_trait] -pub trait PrometheusRule: Send + Sync + std::fmt::Debug { - fn name(&self) -> String; - async fn configure_rule(&self) -> AlertManagerAdditionalPromRules; -} - -impl Serialize for Box> { - fn serialize(&self, _serializer: S) -> Result - where - S: serde::Serializer, - { - todo!() - } -} - -impl Clone for Box> { - fn clone(&self) -> Self { - self.clone_box() - } -} diff --git a/harmony/src/modules/monitoring/prometheus/prometheus_alerting_score.rs b/harmony/src/modules/monitoring/prometheus/prometheus_alerting_score.rs new file mode 100644 index 00000000..52d9d8cc --- /dev/null +++ b/harmony/src/modules/monitoring/prometheus/prometheus_alerting_score.rs @@ -0,0 +1,48 @@ +use std::sync::{Arc, Mutex}; + +use serde::Serialize; + +use crate::{ + modules::monitoring::{ + kube_prometheus::types::ServiceMonitor, + prometheus::{Prometheus, helm::prometheus_config::PrometheusConfig}, + }, + score::Score, + topology::{ + Topology, + monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget}, + }, +}; + +//TODO untested +#[derive(Clone, Debug, Serialize)] +pub struct PrometheusAlertingScore { + pub receivers: Vec>>, + pub rules: Vec>>, + pub scrape_targets: Option>>>, + pub service_monitors: Vec, + pub config: Arc>, +} + +impl> Score for PrometheusAlertingScore { + fn create_interpret(&self) -> Box> { + //TODO test that additional service monitor is added + self.config + .try_lock() + .expect("couldn't lock config") + .additional_service_monitors = self.service_monitors.clone(); + + Box::new(AlertingInterpret { + sender: Prometheus { + config: self.config.clone(), + }, + receivers: self.receivers.clone(), + rules: self.rules.clone(), + scrape_targets: self.scrape_targets.clone(), + }) + } + + fn name(&self) -> String { + "HelmPrometheusAlertingScore".to_string() + } +} diff --git a/harmony/src/modules/monitoring/prometheus/score_prometheus_alert_receivers.rs b/harmony/src/modules/monitoring/prometheus/score_prometheus_alert_receivers.rs new file mode 100644 index 00000000..58acfdb9 --- /dev/null +++ b/harmony/src/modules/monitoring/prometheus/score_prometheus_alert_receivers.rs @@ -0,0 +1,59 @@ +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::{ + kube_prometheus::crd::crd_alertmanager_config::{ + AlertmanagerConfig, AlertmanagerConfigSpec, + }, + prometheus::Prometheus, + }, + }, + score::Score, + topology::{K8sclient, Topology, monitoring::AlertReceiver}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct PrometheusReceiverScore { + pub sender: Prometheus, + pub receiver: Box>, +} + +impl Score for PrometheusReceiverScore { + fn name(&self) -> String { + "PrometheusReceiverScore".to_string() + } + + fn create_interpret(&self) -> Box> { + let name = self.receiver.name(); + let namespace = self.sender.config.lock().unwrap().namespace.clone(); + let route = self.receiver.build_route().expect(&format!( + "failed to build route for receveiver {}", + name.clone() + )); + + let receiver = self.receiver.build_receiver().expect(&format!( + "failed to build receiver path for receiver {}", + name.clone() + )); + + let data = serde_json::json!({ + "route": route, + "receivers": [receiver] + }); + + let alertmanager_config = AlertmanagerConfig { + metadata: ObjectMeta { + name: Some(name), + namespace: namespace.clone(), + ..Default::default() + }, + spec: AlertmanagerConfigSpec { data: data }, + }; + + K8sResourceScore::single(alertmanager_config, namespace).create_interpret() + } +} diff --git a/harmony/src/modules/monitoring/prometheus/score_prometheus_ensure_ready.rs b/harmony/src/modules/monitoring/prometheus/score_prometheus_ensure_ready.rs new file mode 100644 index 00000000..7bb14113 --- /dev/null +++ b/harmony/src/modules/monitoring/prometheus/score_prometheus_ensure_ready.rs @@ -0,0 +1,80 @@ +use async_trait::async_trait; +use harmony_types::id::Id; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::monitoring::prometheus::Prometheus, + score::Score, + topology::{K8sclient, Topology}, +}; + +#[derive(Clone, Debug, Serialize)] +pub struct PrometheusEnsureReadyScore { + pub sender: Prometheus, +} + +impl Score for PrometheusEnsureReadyScore { + fn name(&self) -> String { + "PrometheusEnsureReadyScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(PrometheusEnsureReadyInterpret { + sender: self.sender.clone(), + }) + } +} + +#[derive(Clone, Debug, Serialize)] +pub struct PrometheusEnsureReadyInterpret { + pub sender: Prometheus, +} + +#[async_trait] +impl Interpret for PrometheusEnsureReadyInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &T, + ) -> Result { + let client = topology.k8s_client().await?; + let namespace = self + .sender + .config + .lock() + .unwrap() + .namespace + .clone() + .unwrap_or("default".to_string()); + + let prometheus_name = "prometheues-prometheus-operator"; + + client + .wait_until_deployment_ready(prometheus_name, Some(&namespace), None) + .await?; + + Ok(Outcome::success(format!( + "deployment: {} ready in ns: {}", + prometheus_name, namespace + ))) + } + + fn get_name(&self) -> InterpretName { + todo!() + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/prometheus/score_prometheus_install.rs b/harmony/src/modules/monitoring/prometheus/score_prometheus_install.rs new file mode 100644 index 00000000..39eeef2c --- /dev/null +++ b/harmony/src/modules/monitoring/prometheus/score_prometheus_install.rs @@ -0,0 +1,65 @@ +use async_trait::async_trait; +use harmony_types::id::Id; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::monitoring::prometheus::{ + Prometheus, helm::prometheus_helm::prometheus_helm_chart_score, + }, + score::Score, + topology::{HelmCommand, K8sclient, Topology}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct PrometheusInstallScore { + pub sender: Prometheus, +} + +impl Score for PrometheusInstallScore { + fn name(&self) -> String { + "PrometheusInstallScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(PrometheusInstallInterpret { + score: self.clone(), + }) + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct PrometheusInstallInterpret { + score: PrometheusInstallScore, +} + +#[async_trait] +impl Interpret for PrometheusInstallInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + //TODO add interpret to install service monitors CRD from sender Prometheus + let score = prometheus_helm_chart_score(self.score.sender.config.clone()); + score.create_interpret().execute(inventory, topology).await + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("PrometheusInstallInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/prometheus/score_prometheus_rule.rs b/harmony/src/modules/monitoring/prometheus/score_prometheus_rule.rs new file mode 100644 index 00000000..c9840c16 --- /dev/null +++ b/harmony/src/modules/monitoring/prometheus/score_prometheus_rule.rs @@ -0,0 +1,48 @@ +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::{ + kube_prometheus::crd::crd_prometheus_rules::{ + PrometheusRule, PrometheusRuleSpec, RuleGroup, + }, + prometheus::Prometheus, + }, + }, + score::Score, + topology::{K8sclient, Topology, monitoring::AlertRule}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct PrometheusRuleScore { + pub sender: Prometheus, + pub rule: Box>, +} + +impl Score for PrometheusRuleScore { + fn name(&self) -> String { + "PrometheusRuleScore".to_string() + } + + fn create_interpret(&self) -> Box> { + let name = self.rule.name(); + let namespace = self.sender.config.lock().unwrap().namespace.clone(); + let groups: Vec = + serde_json::from_value(self.rule.build_rule().expect("failed to build alert rule")) + .expect("failed to serialize rule group"); + + let prometheus_rule = PrometheusRule { + metadata: ObjectMeta { + name: Some(name.clone()), + namespace: namespace.clone(), + ..Default::default() + }, + + spec: PrometheusRuleSpec { groups }, + }; + K8sResourceScore::single(prometheus_rule, namespace).create_interpret() + } +} diff --git a/harmony/src/modules/monitoring/prometheus/score_prometheus_scrape_target.rs b/harmony/src/modules/monitoring/prometheus/score_prometheus_scrape_target.rs new file mode 100644 index 00000000..df4db9eb --- /dev/null +++ b/harmony/src/modules/monitoring/prometheus/score_prometheus_scrape_target.rs @@ -0,0 +1,64 @@ +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::topology::monitoring::AlertRule; +use crate::{ + interpret::Interpret, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::{ + kube_prometheus::crd::crd_scrape_config::{ + ScrapeConfig, ScrapeConfigSpec, StaticConfig, + }, + prometheus::Prometheus, + }, + }, + score::Score, + topology::{K8sclient, Topology, monitoring::ScrapeTarget}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct PrometheusScrapeTargetScore { + pub sender: Prometheus, + pub scrape_target: Box>, +} + +impl Score for PrometheusScrapeTargetScore { + fn name(&self) -> String { + "PrometheusScrapeTargetScore".to_string() + } + + fn create_interpret(&self) -> Box> { + let name = self.scrape_target.name(); + let namespace = self.sender.config.lock().unwrap().namespace.clone(); + + let external_target = self + .scrape_target + .build_scrape_target() + .expect("failed to build external scrape target"); + + //TODO this may need to modified to include a scrapeConfigSelector label from the + //prometheus operator + let labels = external_target.labels; + + let scrape_target = ScrapeConfig { + metadata: ObjectMeta { + name: Some(name.clone()), + namespace: namespace.clone(), + ..Default::default() + }, + spec: ScrapeConfigSpec { + static_configs: Some(vec![StaticConfig { + targets: vec![format!("{}:{}", external_target.ip, external_target.port)], + labels, + }]), + metrics_path: external_target.path, + scrape_interval: external_target.interval, + job_name: Some(name), + ..Default::default() + }, + }; + + K8sResourceScore::single(scrape_target, namespace).create_interpret() + } +} diff --git a/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/mod.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/mod.rs new file mode 100644 index 00000000..0842d910 --- /dev/null +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/mod.rs @@ -0,0 +1,8 @@ +pub mod rhob_alertmanager_config; +pub mod rhob_alertmanagers; +pub mod rhob_default_rules; +pub mod rhob_monitoring_stack; +pub mod rhob_prometheus_rules; +pub mod rhob_prometheuses; +pub mod rhob_role; +pub mod rhob_service_monitor; diff --git a/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_alertmanager_config.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_alertmanager_config.rs new file mode 100644 index 00000000..0d56dda1 --- /dev/null +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_alertmanager_config.rs @@ -0,0 +1,17 @@ +use kube::CustomResource; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] +#[kube( + group = "monitoring.rhobs", + version = "v1alpha1", + kind = "AlertmanagerConfig", + plural = "alertmanagerconfigs", + namespaced, + derive = "Default" +)] +pub struct AlertmanagerConfigSpec { + #[serde(flatten)] + pub data: serde_json::Value, +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_alertmanagers.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_alertmanagers.rs similarity index 93% rename from harmony/src/modules/monitoring/kube_prometheus/crd/rhob_alertmanagers.rs rename to harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_alertmanagers.rs index 44354673..22c3386a 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_alertmanagers.rs +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_alertmanagers.rs @@ -2,7 +2,7 @@ use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use super::crd_prometheuses::LabelSelector; +use crate::modules::monitoring::red_hat_cluster_observability::crd::rhob_prometheuses::LabelSelector; /// Rust CRD for `Alertmanager` from Prometheus Operator #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_default_rules.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_default_rules.rs similarity index 86% rename from harmony/src/modules/monitoring/kube_prometheus/crd/rhob_default_rules.rs rename to harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_default_rules.rs index 459bd3fb..5adea7e4 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_default_rules.rs +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_default_rules.rs @@ -1,11 +1,11 @@ -use crate::modules::{ - monitoring::kube_prometheus::crd::rhob_prometheus_rules::Rule, - prometheus::alerts::k8s::{ +use crate::modules::monitoring::{ + alert_rule::alerts::k8s::{ deployment::alert_deployment_unavailable, pod::{alert_container_restarting, alert_pod_not_ready, pod_failed}, pvc::high_pvc_fill_rate_over_two_days, service::alert_service_down, }, + red_hat_cluster_observability::crd::rhob_prometheus_rules::Rule, }; pub fn build_default_application_rules() -> Vec { diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_monitoring_stack.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_monitoring_stack.rs similarity index 89% rename from harmony/src/modules/monitoring/kube_prometheus/crd/rhob_monitoring_stack.rs rename to harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_monitoring_stack.rs index be9ccc0f..99e72460 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_monitoring_stack.rs +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_monitoring_stack.rs @@ -2,7 +2,7 @@ use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector; +use crate::modules::monitoring::red_hat_cluster_observability::crd::rhob_prometheuses::LabelSelector; /// MonitoringStack CRD for monitoring.rhobs/v1alpha1 #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] @@ -11,7 +11,8 @@ use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSe version = "v1alpha1", kind = "MonitoringStack", plural = "monitoringstacks", - namespaced + namespaced, + derive = "Default" )] #[serde(rename_all = "camelCase")] pub struct MonitoringStackSpec { diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_prometheus_rules.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_prometheus_rules.rs similarity index 100% rename from harmony/src/modules/monitoring/kube_prometheus/crd/rhob_prometheus_rules.rs rename to harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_prometheus_rules.rs diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_prometheuses.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_prometheuses.rs similarity index 96% rename from harmony/src/modules/monitoring/kube_prometheus/crd/rhob_prometheuses.rs rename to harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_prometheuses.rs index 18d3f57f..b93ecc76 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_prometheuses.rs +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_prometheuses.rs @@ -4,8 +4,6 @@ use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use crate::modules::monitoring::kube_prometheus::types::Operator; - #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] #[kube( group = "monitoring.rhobs", @@ -93,6 +91,14 @@ pub struct LabelSelectorRequirement { pub values: Vec, } +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub enum Operator { + In, + NotIn, + Exists, + DoesNotExist, +} + impl Default for PrometheusSpec { fn default() -> Self { PrometheusSpec { diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_role.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_role.rs similarity index 100% rename from harmony/src/modules/monitoring/kube_prometheus/crd/rhob_role.rs rename to harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_role.rs diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_service_monitor.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_service_monitor.rs similarity index 100% rename from harmony/src/modules/monitoring/kube_prometheus/crd/rhob_service_monitor.rs rename to harmony/src/modules/monitoring/red_hat_cluster_observability/crd/rhob_service_monitor.rs diff --git a/harmony/src/modules/monitoring/red_hat_cluster_observability/mod.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/mod.rs new file mode 100644 index 00000000..5c28b7a7 --- /dev/null +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/mod.rs @@ -0,0 +1,55 @@ +use serde::Serialize; + +use crate::{ + modules::monitoring::red_hat_cluster_observability::crd::rhob_prometheuses::LabelSelector, + topology::monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget}, +}; + +pub mod crd; +pub mod redhat_cluster_observability; +pub mod score_alert_receiver; +pub mod score_coo_monitoring_stack; +pub mod score_redhat_cluster_observability_operator; + +/// RedHat Cluster Observability Operator allows fine grained control of monitoring stack +/// solutions. This can be run in parallel with the default openshift-monitoring stack. +/// https://www.redhat.com/en/blog/introducing-cluster-observability-operator +#[derive(Debug, Clone, Serialize)] +pub struct RedHatClusterObservability { + ///namespace where the MonitoringStack CRD will be deployed + pub namespace: String, + /// Labels used to match resources the stack will monitor + pub resource_selector: Option, +} + +impl AlertSender for RedHatClusterObservability { + fn name(&self) -> String { + "RedHatClusterObs".to_string() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} +impl Serialize for Box> { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/red_hat_cluster_observability/redhat_cluster_observability.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/redhat_cluster_observability.rs new file mode 100644 index 00000000..c8417a58 --- /dev/null +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/redhat_cluster_observability.rs @@ -0,0 +1,37 @@ +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::monitoring::red_hat_cluster_observability::RedHatClusterObservability, + score::Score, + topology::{ + Topology, + monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget}, + }, +}; + +//TODO untested +#[derive(Debug, Clone, Serialize)] +pub struct RedHatClusterObservabilityScore { + pub sender: RedHatClusterObservability, + pub receivers: Vec>>, + pub rules: Vec>>, + pub scrape_targets: Option>>>, +} + +impl> Score + for RedHatClusterObservabilityScore +{ + fn name(&self) -> String { + "RedHatClusterObservabilityScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(AlertingInterpret { + sender: self.sender.clone(), + receivers: self.receivers.clone(), + rules: self.rules.clone(), + scrape_targets: self.scrape_targets.clone(), + }) + } +} diff --git a/harmony/src/modules/monitoring/red_hat_cluster_observability/score_alert_receiver.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/score_alert_receiver.rs new file mode 100644 index 00000000..1eebd8ec --- /dev/null +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/score_alert_receiver.rs @@ -0,0 +1,97 @@ +use async_trait::async_trait; +use harmony_types::id::Id; +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::red_hat_cluster_observability::{ + RedHatClusterObservability, + crd::rhob_alertmanager_config::{AlertmanagerConfig, AlertmanagerConfigSpec}, + }, + }, + score::Score, + topology::{K8sclient, Topology, monitoring::AlertReceiver}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct RedHatClusterObservabilityReceiverScore { + pub sender: RedHatClusterObservability, + pub receiver: Box>, +} + +impl Score for RedHatClusterObservabilityReceiverScore { + fn name(&self) -> String { + "RedHatClusterObservabilityReceiverScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(RedHatClusterObservabilityReceiverInterpret { + sender: self.sender.clone(), + receiver: self.receiver.clone(), + }) + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct RedHatClusterObservabilityReceiverInterpret { + sender: RedHatClusterObservability, + pub receiver: Box>, +} + +#[async_trait] +impl Interpret for RedHatClusterObservabilityReceiverInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + let name = self.receiver.name(); + let route = self.receiver.build_route()?; + let receiver = self.receiver.build_receiver()?; + + let data = serde_json::json!({ + "route": route, + "receivers": [receiver] + }); + + let spec = AlertmanagerConfigSpec { data }; + + let metadata = ObjectMeta { + name: Some(name), + namespace: Some(self.sender.namespace.clone()), + labels: Some(std::collections::BTreeMap::from([( + "alertmanagerConfig".to_string(), + "enabled".to_string(), + )])), + ..ObjectMeta::default() + }; + + let alert_manager_config = AlertmanagerConfig { metadata, spec }; + + K8sResourceScore::single(alert_manager_config, Some(self.sender.namespace.clone())) + .create_interpret() + .execute(inventory, topology) + .await + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("RedHatClusterObsReceiverInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/red_hat_cluster_observability/score_coo_monitoring_stack.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/score_coo_monitoring_stack.rs new file mode 100644 index 00000000..45caaf53 --- /dev/null +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/score_coo_monitoring_stack.rs @@ -0,0 +1,45 @@ +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + interpret::Interpret, + modules::{ + k8s::resource::K8sResourceScore, + monitoring::red_hat_cluster_observability::crd::{ + rhob_monitoring_stack::{MonitoringStack, MonitoringStackSpec}, + rhob_prometheuses::LabelSelector, + }, + }, + score::Score, + topology::{K8sclient, Topology}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct RedHatClusterObservabilityMonitoringStackScore { + pub namespace: String, + pub resource_selector: Option, +} + +impl Score for RedHatClusterObservabilityMonitoringStackScore { + fn name(&self) -> String { + "RedHatClusterObservabilityMonitoringStackScore".to_string() + } + + fn create_interpret(&self) -> Box> { + let metadata = ObjectMeta { + name: Some(format!("{}-monitoring", self.namespace.clone()).into()), + namespace: Some(self.namespace.clone()), + labels: Some([("monitoring-stack".into(), "true".into())].into()), + ..Default::default() + }; + let spec = MonitoringStackSpec { + log_level: Some("debug".into()), + retention: Some("1d".into()), + resource_selector: self.resource_selector.clone(), + }; + + let monitoring_stack = MonitoringStack { metadata, spec }; + + K8sResourceScore::single(monitoring_stack, Some(self.namespace.clone())).create_interpret() + } +} diff --git a/harmony/src/modules/monitoring/red_hat_cluster_observability/score_redhat_cluster_observability_operator.rs b/harmony/src/modules/monitoring/red_hat_cluster_observability/score_redhat_cluster_observability_operator.rs new file mode 100644 index 00000000..2c529e78 --- /dev/null +++ b/harmony/src/modules/monitoring/red_hat_cluster_observability/score_redhat_cluster_observability_operator.rs @@ -0,0 +1,113 @@ +use async_trait::async_trait; +use harmony_types::id::Id; +use kube::api::ObjectMeta; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::k8s::{ + apps::crd::{Subscription, SubscriptionSpec}, + resource::K8sResourceScore, + }, + score::Score, + topology::{K8sclient, Topology}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct RedHatClusterObservabilityOperatorScore { + pub namespace: String, + pub channel: String, + pub install_plan_approval: String, + pub source: String, + pub source_namespace: String, +} + +impl Default for RedHatClusterObservabilityOperatorScore { + fn default() -> Self { + Self { + namespace: "openshift-operators".to_string(), + channel: "stable".to_string(), + install_plan_approval: "Automatic".to_string(), + source: "community-operators".to_string(), + source_namespace: "openshift-marketplace".to_string(), + } + } +} + +impl RedHatClusterObservabilityOperatorScore { + pub fn new() -> Self { + Self { + ..Default::default() + } + } +} + +impl Score for RedHatClusterObservabilityOperatorScore { + fn name(&self) -> String { + "RedHatClusterObservabilityScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(RedHatClusterObservabilityOperatorInterpret { + score: self.clone(), + }) + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct RedHatClusterObservabilityOperatorInterpret { + score: RedHatClusterObservabilityOperatorScore, +} + +#[async_trait] +impl Interpret for RedHatClusterObservabilityOperatorInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + let metadata = ObjectMeta { + name: Some("observabiliy-operator".to_string()), + namespace: Some(self.score.namespace.clone()), + ..ObjectMeta::default() + }; + + let spec = SubscriptionSpec { + channel: Some("stable".to_string()), + config: None, + install_plan_approval: Some(self.score.install_plan_approval.clone()), + name: "observability-operator".to_string(), + source: self.score.source.clone(), + source_namespace: self.score.source_namespace.clone(), + starting_csv: None, + }; + + let subscription = Subscription { metadata, spec }; + K8sResourceScore::single(subscription, Some(self.score.namespace.clone())) + .create_interpret() + .execute(inventory, topology) + .await?; + + Ok(Outcome::success( + "Installed RedhatClusterObservabilityOperator".to_string(), + )) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("RedHatClusterObservabilityInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/monitoring/scrape_target/mod.rs b/harmony/src/modules/monitoring/scrape_target/mod.rs index 74f47ad3..fe31ce9d 100644 --- a/harmony/src/modules/monitoring/scrape_target/mod.rs +++ b/harmony/src/modules/monitoring/scrape_target/mod.rs @@ -1 +1,2 @@ +pub mod prometheus_node_exporter; pub mod server; diff --git a/harmony/src/modules/monitoring/scrape_target/prometheus_node_exporter.rs b/harmony/src/modules/monitoring/scrape_target/prometheus_node_exporter.rs new file mode 100644 index 00000000..e82a9c5b --- /dev/null +++ b/harmony/src/modules/monitoring/scrape_target/prometheus_node_exporter.rs @@ -0,0 +1,70 @@ +use std::{collections::BTreeMap, net::IpAddr}; + +use harmony_macros::ip; +use serde::Serialize; + +use crate::{ + interpret::InterpretError, + modules::monitoring::okd::OpenshiftClusterAlertSender, + topology::monitoring::{ExternalScrapeTarget, ScrapeTarget}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct PrometheusNodeExporter { + pub job_name: String, + pub metrics_path: String, + pub scrape_interval: Option, + pub scrape_timeout: Option, + pub listen_address: IpAddr, + pub port: i32, + pub labels: Option>, +} + +impl Default for PrometheusNodeExporter { + fn default() -> Self { + Self { + job_name: "node-exporter".into(), + metrics_path: "/metrics".into(), + scrape_interval: None, + scrape_timeout: None, + listen_address: ip!("127.0.0.1"), + port: 9100, + labels: None, + } + } +} + +impl PrometheusNodeExporter { + fn new(job_name: String, metrics_path: String, listen_address: IpAddr, port: i32) -> Self { + Self { + job_name: job_name, + metrics_path: metrics_path, + scrape_interval: None, + scrape_timeout: None, + listen_address: listen_address, + port: port, + labels: None, + } + } +} + +impl ScrapeTarget for PrometheusNodeExporter { + fn build_scrape_target(&self) -> Result { + let external_target = ExternalScrapeTarget { + ip: self.listen_address.clone(), + port: self.port.clone(), + interval: self.scrape_interval.clone(), + path: Some(self.metrics_path.clone()), + labels: self.labels.clone(), + }; + Ok(external_target) + } + + fn name(&self) -> String { + self.job_name.clone() + } + + fn clone_box(&self) -> Box> { + Box::new(self.clone()) + } +} diff --git a/harmony/src/modules/monitoring/scrape_target/server.rs b/harmony/src/modules/monitoring/scrape_target/server.rs index 178e9140..b153e27e 100644 --- a/harmony/src/modules/monitoring/scrape_target/server.rs +++ b/harmony/src/modules/monitoring/scrape_target/server.rs @@ -1,16 +1,11 @@ use std::net::IpAddr; -use async_trait::async_trait; -use kube::api::ObjectMeta; use serde::Serialize; use crate::{ - interpret::{InterpretError, Outcome}, - modules::monitoring::kube_prometheus::crd::{ - crd_alertmanager_config::CRDPrometheus, - crd_scrape_config::{Params, RelabelConfig, ScrapeConfig, ScrapeConfigSpec, StaticConfig}, - }, - topology::oberservability::monitoring::ScrapeTarget, + interpret::InterpretError, + modules::monitoring::prometheus::Prometheus, + topology::monitoring::{ExternalScrapeTarget, ScrapeTarget}, }; #[derive(Debug, Clone, Serialize)] @@ -22,59 +17,56 @@ pub struct Server { pub domain: String, } -#[async_trait] -impl ScrapeTarget for Server { - async fn install(&self, sender: &CRDPrometheus) -> Result { - let scrape_config_spec = ScrapeConfigSpec { - static_configs: Some(vec![StaticConfig { - targets: vec![self.ip.to_string()], - labels: None, - }]), - scrape_interval: Some("2m".to_string()), - kubernetes_sd_configs: None, - http_sd_configs: None, - file_sd_configs: None, - dns_sd_configs: None, - params: Some(Params { - auth: Some(vec![self.auth.clone()]), - module: Some(vec![self.module.clone()]), - }), - consul_sd_configs: None, - relabel_configs: Some(vec![RelabelConfig { - action: None, - source_labels: Some(vec!["__address__".to_string()]), - separator: None, - target_label: Some("__param_target".to_string()), - regex: None, - replacement: Some(format!("snmp.{}:31080", self.domain.clone())), - modulus: None, - }]), - metric_relabel_configs: None, - metrics_path: Some("/snmp".to_string()), - scrape_timeout: Some("2m".to_string()), - job_name: Some(format!("snmp_exporter/cloud/{}", self.name.clone())), - scheme: None, - }; - - let scrape_config = ScrapeConfig { - metadata: ObjectMeta { - name: Some(self.name.clone()), - namespace: Some(sender.namespace.clone()), - ..Default::default() - }, - spec: scrape_config_spec, - }; - sender - .client - .apply(&scrape_config, Some(&sender.namespace.clone())) - .await?; - Ok(Outcome::success(format!( - "installed scrape target {}", - self.name.clone() - ))) +impl ScrapeTarget for Server { + fn build_scrape_target(&self) -> Result { + todo!() + // let scrape_config_spec = ScrapeConfigSpec { + // static_configs: Some(vec![StaticConfig { + // targets: vec![self.ip.to_string()], + // labels: None, + // }]), + // scrape_interval: Some("2m".to_string()), + // kubernetes_sd_configs: None, + // http_sd_configs: None, + // file_sd_configs: None, + // dns_sd_configs: None, + // params: Some(Params { + // auth: Some(vec![self.auth.clone()]), + // module: Some(vec![self.module.clone()]), + // }), + // consul_sd_configs: None, + // relabel_configs: Some(vec![RelabelConfig { + // action: None, + // source_labels: Some(vec!["__address__".to_string()]), + // separator: None, + // target_label: Some("__param_target".to_string()), + // regex: None, + // replacement: Some(format!("snmp.{}:31080", self.domain.clone())), + // modulus: None, + // }]), + // metric_relabel_configs: None, + // metrics_path: Some("/snmp".to_string()), + // scrape_timeout: Some("2m".to_string()), + // job_name: Some(format!("snmp_exporter/cloud/{}", self.name.clone())), + // scheme: None, + // }; + // + // let scrape_config = ScrapeConfig { + // metadata: ObjectMeta { + // name: Some(self.name.clone()), + // namespace: Some(sender.namespace.clone()), + // ..Default::default() + // }, + // spec: scrape_config_spec, + // }; + // Ok(serde_json::to_value(scrape_config).map_err(|e| InterpretError::new(e.to_string()))?) } - fn clone_box(&self) -> Box> { + fn clone_box(&self) -> Box> { Box::new(self.clone()) } + + fn name(&self) -> String { + todo!() + } } diff --git a/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs b/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs deleted file mode 100644 index 7093ee8b..00000000 --- a/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs +++ /dev/null @@ -1,576 +0,0 @@ -use std::fs; -use std::{collections::BTreeMap, sync::Arc}; -use tempfile::tempdir; - -use async_trait::async_trait; -use kube::api::ObjectMeta; -use log::{debug, info}; -use serde::Serialize; -use std::process::Command; - -use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus; -use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules; -use crate::modules::monitoring::kube_prometheus::crd::crd_grafana::{ - Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig, - GrafanaDatasourceJsonData, GrafanaDatasourceSpec, GrafanaSecretKeyRef, GrafanaSpec, - GrafanaValueFrom, GrafanaValueSource, -}; -use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{ - PrometheusRule, PrometheusRuleSpec, RuleGroup, -}; -use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard; -use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{ - ServiceMonitor, ServiceMonitorSpec, -}; -use crate::topology::oberservability::monitoring::AlertReceiver; -use crate::topology::{K8sclient, Topology, k8s::K8sClient}; -use crate::{ - data::Version, - interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, - inventory::Inventory, - modules::monitoring::kube_prometheus::crd::{ - crd_alertmanagers::{Alertmanager, AlertmanagerSpec}, - crd_prometheuses::{ - AlertmanagerEndpoints, LabelSelector, Prometheus, PrometheusSpec, - PrometheusSpecAlerting, - }, - role::{build_prom_role, build_prom_rolebinding, build_prom_service_account}, - }, - score::Score, -}; -use harmony_types::id::Id; - -use super::prometheus::PrometheusMonitoring; - -#[derive(Clone, Debug, Serialize)] -pub struct K8sPrometheusCRDAlertingScore { - pub sender: CRDPrometheus, - pub receivers: Vec>>, - pub service_monitors: Vec, - pub prometheus_rules: Vec, -} - -impl> Score - for K8sPrometheusCRDAlertingScore -{ - fn create_interpret(&self) -> Box> { - Box::new(K8sPrometheusCRDAlertingInterpret { - sender: self.sender.clone(), - receivers: self.receivers.clone(), - service_monitors: self.service_monitors.clone(), - prometheus_rules: self.prometheus_rules.clone(), - }) - } - - fn name(&self) -> String { - "prometheus alerting [CRDAlertingScore]".into() - } -} - -#[derive(Clone, Debug)] -pub struct K8sPrometheusCRDAlertingInterpret { - pub sender: CRDPrometheus, - pub receivers: Vec>>, - pub service_monitors: Vec, - pub prometheus_rules: Vec, -} - -#[async_trait] -impl> Interpret - for K8sPrometheusCRDAlertingInterpret -{ - async fn execute( - &self, - _inventory: &Inventory, - topology: &T, - ) -> Result { - let client = topology.k8s_client().await.unwrap(); - self.ensure_grafana_operator().await?; - self.install_prometheus(&client).await?; - self.install_alert_manager(&client).await?; - self.install_client_kube_metrics().await?; - self.install_grafana(&client).await?; - self.install_receivers(&self.sender, &self.receivers) - .await?; - self.install_rules(&self.prometheus_rules, &client).await?; - self.install_monitors(self.service_monitors.clone(), &client) - .await?; - Ok(Outcome::success( - "K8s monitoring components installed".to_string(), - )) - } - - fn get_name(&self) -> InterpretName { - InterpretName::K8sPrometheusCrdAlerting - } - - fn get_version(&self) -> Version { - todo!() - } - - fn get_status(&self) -> InterpretStatus { - todo!() - } - - fn get_children(&self) -> Vec { - todo!() - } -} - -impl K8sPrometheusCRDAlertingInterpret { - async fn crd_exists(&self, crd: &str) -> bool { - let status = Command::new("sh") - .args(["-c", &format!("kubectl get crd -A | grep -i {crd}")]) - .status() - .map_err(|e| InterpretError::new(format!("could not connect to cluster: {}", e))) - .unwrap(); - - status.success() - } - - async fn install_chart( - &self, - chart_path: String, - chart_name: String, - ) -> Result<(), InterpretError> { - let temp_dir = - tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?; - let temp_path = temp_dir.path().to_path_buf(); - debug!("Using temp directory: {}", temp_path.display()); - let chart = format!("{}/{}", chart_path, chart_name); - let pull_output = Command::new("helm") - .args(["pull", &chart, "--destination", temp_path.to_str().unwrap()]) - .output() - .map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?; - - if !pull_output.status.success() { - return Err(InterpretError::new(format!( - "Helm pull failed: {}", - String::from_utf8_lossy(&pull_output.stderr) - ))); - } - - let tgz_path = fs::read_dir(&temp_path) - .unwrap() - .filter_map(|entry| { - let entry = entry.ok()?; - let path = entry.path(); - if path.extension()? == "tgz" { - Some(path) - } else { - None - } - }) - .next() - .ok_or_else(|| InterpretError::new("Could not find pulled Helm chart".into()))?; - - debug!("Installing chart from: {}", tgz_path.display()); - - let install_output = Command::new("helm") - .args([ - "upgrade", - "--install", - &chart_name, - tgz_path.to_str().unwrap(), - "--namespace", - &self.sender.namespace.clone(), - "--create-namespace", - "--wait", - "--atomic", - ]) - .output() - .map_err(|e| InterpretError::new(format!("Helm install error: {}", e)))?; - - if !install_output.status.success() { - return Err(InterpretError::new(format!( - "Helm install failed: {}", - String::from_utf8_lossy(&install_output.stderr) - ))); - } - - debug!( - "Installed chart {}/{} in namespace: {}", - &chart_path, - &chart_name, - self.sender.namespace.clone() - ); - Ok(()) - } - - async fn ensure_grafana_operator(&self) -> Result { - let _ = Command::new("helm") - .args([ - "repo", - "add", - "grafana-operator", - "https://grafana.github.io/helm-charts", - ]) - .output() - .unwrap(); - - let _ = Command::new("helm") - .args(["repo", "update"]) - .output() - .unwrap(); - - let output = Command::new("helm") - .args([ - "install", - "grafana-operator", - "grafana-operator/grafana-operator", - "--namespace", - &self.sender.namespace.clone(), - "--create-namespace", - "--set", - "namespaceScope=true", - ]) - .output() - .unwrap(); - - if !output.status.success() { - return Err(InterpretError::new(format!( - "helm install failed:\nstdout: {}\nstderr: {}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ))); - } - - Ok(Outcome::success(format!( - "installed grafana operator in ns {}", - self.sender.namespace.clone() - ))) - } - - async fn install_prometheus(&self, client: &Arc) -> Result { - debug!( - "installing crd-prometheuses in namespace {}", - self.sender.namespace.clone() - ); - debug!("building role/rolebinding/serviceaccount for crd-prometheus"); - let rolename = format!("{}-prom", self.sender.namespace.clone()); - let sa_name = format!("{}-prom-sa", self.sender.namespace.clone()); - let role = build_prom_role(rolename.clone(), self.sender.namespace.clone()); - let rolebinding = build_prom_rolebinding( - rolename.clone(), - self.sender.namespace.clone(), - sa_name.clone(), - ); - let sa = build_prom_service_account(sa_name.clone(), self.sender.namespace.clone()); - let prom_spec = PrometheusSpec { - alerting: Some(PrometheusSpecAlerting { - alertmanagers: Some(vec![AlertmanagerEndpoints { - name: Some("alertmanager-operated".into()), - namespace: Some(self.sender.namespace.clone()), - port: Some("web".into()), - scheme: Some("http".into()), - }]), - }), - service_account_name: sa_name.clone(), - service_monitor_namespace_selector: Some(LabelSelector { - match_labels: BTreeMap::from([( - "kubernetes.io/metadata.name".to_string(), - self.sender.namespace.clone(), - )]), - match_expressions: vec![], - }), - service_monitor_selector: Some(LabelSelector { - match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]), - ..Default::default() - }), - - service_discovery_role: Some("Endpoints".into()), - - pod_monitor_selector: Some(LabelSelector { - match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]), - ..Default::default() - }), - - rule_selector: Some(LabelSelector { - match_labels: BTreeMap::from([("role".to_string(), "prometheus-rule".to_string())]), - ..Default::default() - }), - - rule_namespace_selector: Some(LabelSelector { - match_labels: BTreeMap::from([( - "kubernetes.io/metadata.name".to_string(), - self.sender.namespace.clone(), - )]), - match_expressions: vec![], - }), - }; - let prom = Prometheus { - metadata: ObjectMeta { - name: Some(self.sender.namespace.clone()), - labels: Some(std::collections::BTreeMap::from([ - ("alertmanagerConfig".to_string(), "enabled".to_string()), - ("client".to_string(), "prometheus".to_string()), - ])), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: prom_spec, - }; - client - .apply(&role, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - info!( - "installed prometheus role: {:#?} in ns {:#?}", - role.metadata.name.unwrap(), - role.metadata.namespace.unwrap() - ); - client - .apply(&rolebinding, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - info!( - "installed prometheus rolebinding: {:#?} in ns {:#?}", - rolebinding.metadata.name.unwrap(), - rolebinding.metadata.namespace.unwrap() - ); - client - .apply(&sa, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - info!( - "installed prometheus service account: {:#?} in ns {:#?}", - sa.metadata.name.unwrap(), - sa.metadata.namespace.unwrap() - ); - client - .apply(&prom, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - info!( - "installed prometheus: {:#?} in ns {:#?}", - &prom.metadata.name.clone().unwrap(), - &prom.metadata.namespace.clone().unwrap() - ); - - Ok(Outcome::success(format!( - "successfully deployed crd-prometheus {:#?}", - prom - ))) - } - - async fn install_alert_manager( - &self, - client: &Arc, - ) -> Result { - let am = Alertmanager { - metadata: ObjectMeta { - name: Some(self.sender.namespace.clone()), - labels: Some(std::collections::BTreeMap::from([( - "alertmanagerConfig".to_string(), - "enabled".to_string(), - )])), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: AlertmanagerSpec::default(), - }; - client - .apply(&am, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - Ok(Outcome::success(format!( - "successfully deployed service monitor {:#?}", - am.metadata.name - ))) - } - async fn install_monitors( - &self, - mut monitors: Vec, - client: &Arc, - ) -> Result { - let default_service_monitor = ServiceMonitor { - metadata: ObjectMeta { - name: Some(self.sender.namespace.clone()), - labels: Some(std::collections::BTreeMap::from([ - ("alertmanagerConfig".to_string(), "enabled".to_string()), - ("client".to_string(), "prometheus".to_string()), - ( - "app.kubernetes.io/name".to_string(), - "kube-state-metrics".to_string(), - ), - ])), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: ServiceMonitorSpec::default(), - }; - monitors.push(default_service_monitor); - for monitor in monitors.iter() { - client - .apply(monitor, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - } - Ok(Outcome::success( - "succesfully deployed service monitors".to_string(), - )) - } - - async fn install_rules( - &self, - #[allow(clippy::ptr_arg)] rules: &Vec, - client: &Arc, - ) -> Result { - let mut prom_rule_spec = PrometheusRuleSpec { - groups: rules.clone(), - }; - - let default_rules_group = RuleGroup { - name: "default-rules".to_string(), - rules: build_default_application_rules(), - }; - - prom_rule_spec.groups.push(default_rules_group); - let prom_rules = PrometheusRule { - metadata: ObjectMeta { - name: Some(self.sender.namespace.clone()), - labels: Some(std::collections::BTreeMap::from([ - ("alertmanagerConfig".to_string(), "enabled".to_string()), - ("role".to_string(), "prometheus-rule".to_string()), - ])), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: prom_rule_spec, - }; - client - .apply(&prom_rules, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - Ok(Outcome::success(format!( - "successfully deployed rules {:#?}", - prom_rules.metadata.name - ))) - } - - async fn install_client_kube_metrics(&self) -> Result { - self.install_chart( - "oci://hub.nationtech.io/harmony".to_string(), - "nt-kube-metrics".to_string(), - ) - .await?; - Ok(Outcome::success(format!( - "Installed client kube metrics in ns {}", - &self.sender.namespace.clone() - ))) - } - - async fn install_grafana(&self, client: &Arc) -> Result { - let mut label = BTreeMap::new(); - label.insert("dashboards".to_string(), "grafana".to_string()); - let labels = LabelSelector { - match_labels: label.clone(), - match_expressions: vec![], - }; - let namespace = self.sender.namespace.clone(); - let json_data = GrafanaDatasourceJsonData { - time_interval: Some("5s".to_string()), - http_header_name1: None, - tls_skip_verify: Some(true), - oauth_pass_thru: Some(true), - }; - let json = build_default_dashboard(&namespace); - - let graf_data_source = GrafanaDatasource { - metadata: ObjectMeta { - name: Some(format!( - "grafana-datasource-{}", - self.sender.namespace.clone() - )), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: GrafanaDatasourceSpec { - instance_selector: labels.clone(), - allow_cross_namespace_import: Some(false), - datasource: GrafanaDatasourceConfig { - access: "proxy".to_string(), - database: Some("prometheus".to_string()), - json_data: Some(json_data), - //this is fragile - name: format!("prometheus-{}-0", self.sender.namespace.clone()), - r#type: "prometheus".to_string(), - url: format!( - "http://prometheus-operated.{}.svc.cluster.local:9090", - self.sender.namespace.clone() - ), - secure_json_data: None, - is_default: None, - editable: None, - }, - values_from: None, - }, - }; - - client - .apply(&graf_data_source, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - - let graf_dashboard = GrafanaDashboard { - metadata: ObjectMeta { - name: Some(format!( - "grafana-dashboard-{}", - self.sender.namespace.clone() - )), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: GrafanaDashboardSpec { - resync_period: Some("30s".to_string()), - instance_selector: labels.clone(), - json: Some(json), - grafana_com: None, - datasources: None, - }, - }; - - client - .apply(&graf_dashboard, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - - let grafana = Grafana { - metadata: ObjectMeta { - name: Some(format!("grafana-{}", self.sender.namespace.clone())), - namespace: Some(self.sender.namespace.clone()), - labels: Some(label.clone()), - ..Default::default() - }, - spec: GrafanaSpec { - config: None, - admin_user: None, - admin_password: None, - ingress: None, - persistence: None, - resources: None, - }, - }; - client - .apply(&grafana, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - Ok(Outcome::success(format!( - "successfully deployed grafana instance {:#?}", - grafana.metadata.name - ))) - } - - async fn install_receivers( - &self, - sender: &CRDPrometheus, - receivers: &Vec>>, - ) -> Result { - for receiver in receivers.iter() { - receiver.install(sender).await.map_err(|err| { - InterpretError::new(format!("failed to install receiver: {}", err)) - })?; - } - Ok(Outcome::success("successfully deployed receivers".into())) - } -} diff --git a/harmony/src/modules/prometheus/mod.rs b/harmony/src/modules/prometheus/mod.rs deleted file mode 100644 index c4f25ba2..00000000 --- a/harmony/src/modules/prometheus/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod alerts; -pub mod k8s_prometheus_alerting_score; -#[allow(clippy::module_inception)] -pub mod prometheus; -pub mod rhob_alerting_score; diff --git a/harmony/src/modules/prometheus/prometheus.rs b/harmony/src/modules/prometheus/prometheus.rs deleted file mode 100644 index efb89da8..00000000 --- a/harmony/src/modules/prometheus/prometheus.rs +++ /dev/null @@ -1,25 +0,0 @@ -use async_trait::async_trait; - -use crate::{ - inventory::Inventory, - topology::{ - PreparationError, PreparationOutcome, - oberservability::monitoring::{AlertReceiver, AlertSender}, - }, -}; - -#[async_trait] -pub trait PrometheusMonitoring { - async fn install_prometheus( - &self, - sender: &S, - inventory: &Inventory, - receivers: Option>>>, - ) -> Result; - - async fn ensure_prometheus_operator( - &self, - sender: &S, - inventory: &Inventory, - ) -> Result; -} diff --git a/harmony/src/modules/prometheus/rhob_alerting_score.rs b/harmony/src/modules/prometheus/rhob_alerting_score.rs deleted file mode 100644 index 644e6f95..00000000 --- a/harmony/src/modules/prometheus/rhob_alerting_score.rs +++ /dev/null @@ -1,527 +0,0 @@ -use fqdn::fqdn; -use std::fs; -use std::{collections::BTreeMap, sync::Arc}; -use tempfile::tempdir; - -use async_trait::async_trait; -use kube::api::ObjectMeta; -use log::{debug, info}; -use serde::Serialize; -use std::process::Command; - -use crate::modules::k8s::ingress::{K8sIngressScore, PathType}; -use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard; -use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability; -use crate::modules::monitoring::kube_prometheus::crd::rhob_grafana::{ - Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig, - GrafanaDatasourceSpec, GrafanaSpec, -}; -use crate::modules::monitoring::kube_prometheus::crd::rhob_monitoring_stack::{ - MonitoringStack, MonitoringStackSpec, -}; -use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheus_rules::{ - PrometheusRule, PrometheusRuleSpec, RuleGroup, -}; -use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector; - -use crate::modules::monitoring::kube_prometheus::crd::rhob_service_monitor::{ - ServiceMonitor, ServiceMonitorSpec, -}; -use crate::score::Score; -use crate::topology::ingress::Ingress; -use crate::topology::oberservability::monitoring::AlertReceiver; -use crate::topology::{K8sclient, Topology, k8s::K8sClient}; -use crate::{ - data::Version, - interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, - inventory::Inventory, -}; -use harmony_types::id::Id; - -use super::prometheus::PrometheusMonitoring; - -#[derive(Clone, Debug, Serialize)] -pub struct RHOBAlertingScore { - pub sender: RHOBObservability, - pub receivers: Vec>>, - pub service_monitors: Vec, - pub prometheus_rules: Vec, -} - -impl> Score - for RHOBAlertingScore -{ - fn create_interpret(&self) -> Box> { - Box::new(RHOBAlertingInterpret { - sender: self.sender.clone(), - receivers: self.receivers.clone(), - service_monitors: self.service_monitors.clone(), - prometheus_rules: self.prometheus_rules.clone(), - }) - } - - fn name(&self) -> String { - "RHOB alerting [RHOBAlertingScore]".into() - } -} - -#[derive(Clone, Debug)] -pub struct RHOBAlertingInterpret { - pub sender: RHOBObservability, - pub receivers: Vec>>, - pub service_monitors: Vec, - pub prometheus_rules: Vec, -} - -#[async_trait] -impl> Interpret - for RHOBAlertingInterpret -{ - async fn execute( - &self, - inventory: &Inventory, - topology: &T, - ) -> Result { - let client = topology.k8s_client().await.unwrap(); - self.ensure_grafana_operator().await?; - self.install_prometheus(inventory, topology, &client) - .await?; - self.install_client_kube_metrics().await?; - self.install_grafana(inventory, topology, &client).await?; - self.install_receivers(&self.sender, &self.receivers) - .await?; - self.install_rules(&self.prometheus_rules, &client).await?; - self.install_monitors(self.service_monitors.clone(), &client) - .await?; - Ok(Outcome::success( - "K8s monitoring components installed".to_string(), - )) - } - - fn get_name(&self) -> InterpretName { - InterpretName::RHOBAlerting - } - - fn get_version(&self) -> Version { - todo!() - } - - fn get_status(&self) -> InterpretStatus { - todo!() - } - - fn get_children(&self) -> Vec { - todo!() - } -} - -impl RHOBAlertingInterpret { - async fn crd_exists(&self, crd: &str) -> bool { - let status = Command::new("sh") - .args(["-c", &format!("kubectl get crd -A | grep -i {crd}")]) - .status() - .map_err(|e| InterpretError::new(format!("could not connect to cluster: {}", e))) - .unwrap(); - - status.success() - } - - async fn install_chart( - &self, - chart_path: String, - chart_name: String, - ) -> Result<(), InterpretError> { - let temp_dir = - tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?; - let temp_path = temp_dir.path().to_path_buf(); - debug!("Using temp directory: {}", temp_path.display()); - let chart = format!("{}/{}", chart_path, chart_name); - let pull_output = Command::new("helm") - .args(["pull", &chart, "--destination", temp_path.to_str().unwrap()]) - .output() - .map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?; - - if !pull_output.status.success() { - return Err(InterpretError::new(format!( - "Helm pull failed: {}", - String::from_utf8_lossy(&pull_output.stderr) - ))); - } - - let tgz_path = fs::read_dir(&temp_path) - .unwrap() - .filter_map(|entry| { - let entry = entry.ok()?; - let path = entry.path(); - if path.extension()? == "tgz" { - Some(path) - } else { - None - } - }) - .next() - .ok_or_else(|| InterpretError::new("Could not find pulled Helm chart".into()))?; - - debug!("Installing chart from: {}", tgz_path.display()); - - let install_output = Command::new("helm") - .args([ - "upgrade", - "--install", - &chart_name, - tgz_path.to_str().unwrap(), - "--namespace", - &self.sender.namespace.clone(), - "--create-namespace", - "--wait", - "--atomic", - ]) - .output() - .map_err(|e| InterpretError::new(format!("Helm install error: {}", e)))?; - - if !install_output.status.success() { - return Err(InterpretError::new(format!( - "Helm install failed: {}", - String::from_utf8_lossy(&install_output.stderr) - ))); - } - - debug!( - "Installed chart {}/{} in namespace: {}", - &chart_path, - &chart_name, - self.sender.namespace.clone() - ); - Ok(()) - } - - async fn ensure_grafana_operator(&self) -> Result { - let _ = Command::new("helm") - .args([ - "repo", - "add", - "grafana-operator", - "https://grafana.github.io/helm-charts", - ]) - .output() - .unwrap(); - - let _ = Command::new("helm") - .args(["repo", "update"]) - .output() - .unwrap(); - - let output = Command::new("helm") - .args([ - "upgrade", - "--install", - "grafana-operator", - "grafana-operator/grafana-operator", - "--namespace", - &self.sender.namespace.clone(), - "--create-namespace", - "--set", - "namespaceScope=true", - ]) - .output() - .unwrap(); - - if !output.status.success() { - return Err(InterpretError::new(format!( - "helm upgrade --install failed:\nstdout: {}\nstderr: {}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ))); - } - - Ok(Outcome::success(format!( - "installed grafana operator in ns {}", - self.sender.namespace.clone() - ))) - } - - async fn install_prometheus( - &self, - inventory: &Inventory, - topology: &T, - client: &Arc, - ) -> Result { - debug!( - "installing crd-prometheuses in namespace {}", - self.sender.namespace.clone() - ); - debug!("building role/rolebinding/serviceaccount for crd-prometheus"); - - let stack = MonitoringStack { - metadata: ObjectMeta { - name: Some(format!("{}-monitoring", self.sender.namespace.clone()).into()), - namespace: Some(self.sender.namespace.clone()), - labels: Some([("monitoring-stack".into(), "true".into())].into()), - ..Default::default() - }, - spec: MonitoringStackSpec { - log_level: Some("debug".into()), - retention: Some("1d".into()), - resource_selector: Some(LabelSelector { - match_labels: Default::default(), - match_expressions: vec![], - }), - }, - }; - - client - .apply(&stack, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - - let alert_manager_domain = topology - .get_domain(&format!("alert-manager-{}", self.sender.namespace.clone())) - .await?; - let name = format!("{}-alert-manager", self.sender.namespace.clone()); - let backend_service = format!("alertmanager-operated"); - let namespace = self.sender.namespace.clone(); - let alert_manager_ingress = K8sIngressScore { - name: fqdn!(&name), - host: fqdn!(&alert_manager_domain), - backend_service: fqdn!(&backend_service), - port: 9093, - path: Some("/".to_string()), - path_type: Some(PathType::Prefix), - namespace: Some(fqdn!(&namespace)), - ingress_class_name: Some("openshift-default".to_string()), - }; - - let prometheus_domain = topology - .get_domain(&format!("prometheus-{}", self.sender.namespace.clone())) - .await?; - let name = format!("{}-prometheus", self.sender.namespace.clone()); - let backend_service = format!("prometheus-operated"); - let prometheus_ingress = K8sIngressScore { - name: fqdn!(&name), - host: fqdn!(&prometheus_domain), - backend_service: fqdn!(&backend_service), - port: 9090, - path: Some("/".to_string()), - path_type: Some(PathType::Prefix), - namespace: Some(fqdn!(&namespace)), - ingress_class_name: Some("openshift-default".to_string()), - }; - - alert_manager_ingress.interpret(inventory, topology).await?; - prometheus_ingress.interpret(inventory, topology).await?; - info!("installed rhob monitoring stack",); - Ok(Outcome::success(format!( - "successfully deployed rhob-prometheus {:#?}", - stack - ))) - } - - async fn install_monitors( - &self, - mut monitors: Vec, - client: &Arc, - ) -> Result { - let default_service_monitor = ServiceMonitor { - metadata: ObjectMeta { - name: Some(self.sender.namespace.clone()), - labels: Some(std::collections::BTreeMap::from([ - ("alertmanagerConfig".to_string(), "enabled".to_string()), - ("client".to_string(), "prometheus".to_string()), - ( - "app.kubernetes.io/name".to_string(), - "kube-state-metrics".to_string(), - ), - ])), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: ServiceMonitorSpec::default(), - }; - monitors.push(default_service_monitor); - for monitor in monitors.iter() { - client - .apply(monitor, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - } - Ok(Outcome::success( - "succesfully deployed service monitors".to_string(), - )) - } - - async fn install_rules( - &self, - #[allow(clippy::ptr_arg)] rules: &Vec, - client: &Arc, - ) -> Result { - let mut prom_rule_spec = PrometheusRuleSpec { - groups: rules.clone(), - }; - - let default_rules_group = RuleGroup { - name: "default-rules".to_string(), - rules: crate::modules::monitoring::kube_prometheus::crd::rhob_default_rules::build_default_application_rules(), - }; - - prom_rule_spec.groups.push(default_rules_group); - let prom_rules = PrometheusRule { - metadata: ObjectMeta { - name: Some(self.sender.namespace.clone()), - labels: Some(std::collections::BTreeMap::from([ - ("alertmanagerConfig".to_string(), "enabled".to_string()), - ("role".to_string(), "prometheus-rule".to_string()), - ])), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: prom_rule_spec, - }; - client - .apply(&prom_rules, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - Ok(Outcome::success(format!( - "successfully deployed rules {:#?}", - prom_rules.metadata.name - ))) - } - - async fn install_client_kube_metrics(&self) -> Result { - self.install_chart( - "oci://hub.nationtech.io/harmony".to_string(), - "nt-kube-metrics".to_string(), - ) - .await?; - Ok(Outcome::success(format!( - "Installed client kube metrics in ns {}", - &self.sender.namespace.clone() - ))) - } - - async fn install_grafana( - &self, - inventory: &Inventory, - topology: &T, - client: &Arc, - ) -> Result { - let mut label = BTreeMap::new(); - label.insert("dashboards".to_string(), "grafana".to_string()); - let labels = LabelSelector { - match_labels: label.clone(), - match_expressions: vec![], - }; - let mut json_data = BTreeMap::new(); - json_data.insert("timeInterval".to_string(), "5s".to_string()); - let namespace = self.sender.namespace.clone(); - - let json = build_default_dashboard(&namespace); - - let graf_data_source = GrafanaDatasource { - metadata: ObjectMeta { - name: Some(format!( - "grafana-datasource-{}", - self.sender.namespace.clone() - )), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: GrafanaDatasourceSpec { - instance_selector: labels.clone(), - allow_cross_namespace_import: Some(false), - datasource: GrafanaDatasourceConfig { - access: "proxy".to_string(), - database: Some("prometheus".to_string()), - json_data: Some(json_data), - //this is fragile - name: format!("prometheus-{}-0", self.sender.namespace.clone()), - r#type: "prometheus".to_string(), - url: format!( - "http://prometheus-operated.{}.svc.cluster.local:9090", - self.sender.namespace.clone() - ), - }, - }, - }; - - client - .apply(&graf_data_source, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - - let graf_dashboard = GrafanaDashboard { - metadata: ObjectMeta { - name: Some(format!( - "grafana-dashboard-{}", - self.sender.namespace.clone() - )), - namespace: Some(self.sender.namespace.clone()), - ..Default::default() - }, - spec: GrafanaDashboardSpec { - resync_period: Some("30s".to_string()), - instance_selector: labels.clone(), - json, - }, - }; - - client - .apply(&graf_dashboard, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - - let grafana = Grafana { - metadata: ObjectMeta { - name: Some(format!("grafana-{}", self.sender.namespace.clone())), - namespace: Some(self.sender.namespace.clone()), - labels: Some(label.clone()), - ..Default::default() - }, - spec: GrafanaSpec { - config: None, - admin_user: None, - admin_password: None, - ingress: None, - persistence: None, - resources: None, - }, - }; - client - .apply(&grafana, Some(&self.sender.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - let domain = topology - .get_domain(&format!("grafana-{}", self.sender.namespace.clone())) - .await?; - let name = format!("{}-grafana", self.sender.namespace.clone()); - let backend_service = format!("grafana-{}-service", self.sender.namespace.clone()); - let grafana_ingress = K8sIngressScore { - name: fqdn!(&name), - host: fqdn!(&domain), - backend_service: fqdn!(&backend_service), - port: 3000, - path: Some("/".to_string()), - path_type: Some(PathType::Prefix), - namespace: Some(fqdn!(&namespace)), - ingress_class_name: Some("openshift-default".to_string()), - }; - - grafana_ingress.interpret(inventory, topology).await?; - Ok(Outcome::success(format!( - "successfully deployed grafana instance {:#?}", - grafana.metadata.name - ))) - } - - async fn install_receivers( - &self, - sender: &RHOBObservability, - receivers: &Vec>>, - ) -> Result { - for receiver in receivers.iter() { - receiver.install(sender).await.map_err(|err| { - InterpretError::new(format!("failed to install receiver: {}", err)) - })?; - } - Ok(Outcome::success("successfully deployed receivers".into())) - } -} diff --git a/harmony_agent/deploy/src/main.rs b/harmony_agent/deploy/src/main.rs index 8baab66b..548f3745 100644 --- a/harmony_agent/deploy/src/main.rs +++ b/harmony_agent/deploy/src/main.rs @@ -1,12 +1,9 @@ use harmony::{ inventory::Inventory, - modules::{ - application::{ - ApplicationScore, - backend_app::{BackendApp, BuildCommand}, - features::{Monitoring, PackagingDeployment}, - }, - monitoring::alert_channel::discord_alert_channel::DiscordWebhook, + modules::application::{ + ApplicationScore, + backend_app::{BackendApp, BuildCommand}, + features::{Monitoring, PackagingDeployment}, }, topology::K8sAnywhereTopology, }; @@ -40,14 +37,14 @@ async fn main() { Box::new(PackagingDeployment { application: application.clone(), }), - Box::new(Monitoring { - application: application.clone(), - alert_receiver: vec![Box::new(DiscordWebhook { - name: K8sName("test-discord".to_string()), - url: hurl!("https://discord.doesnt.exist.com"), - selectors: vec![], - })], - }), + // Box::new(Monitoring { + // application: application.clone(), + // alert_receiver: vec![Box::new(DiscordWebhook { + // name: K8sName("test-discord".to_string()), + // url: hurl!("https://discord.doesnt.exist.com"), + // selectors: vec![], + // })], + // }), ], application, };