diff --git a/harmony/src/domain/topology/oberservability/monitoring.rs b/harmony/src/domain/topology/oberservability/monitoring.rs index a215924..effc978 100644 --- a/harmony/src/domain/topology/oberservability/monitoring.rs +++ b/harmony/src/domain/topology/oberservability/monitoring.rs @@ -73,6 +73,6 @@ pub trait AlertRule: std::fmt::Debug + Send + Sync { } #[async_trait] -pub trait ScrapeTarger { +pub trait ScrapeTarget { async fn install(&self, sender: &S) -> Result<(), InterpretError>; } diff --git a/harmony/src/modules/application/features/monitoring.rs b/harmony/src/modules/application/features/monitoring.rs index ffd7783..8c01b2d 100644 --- a/harmony/src/modules/application/features/monitoring.rs +++ b/harmony/src/modules/application/features/monitoring.rs @@ -1,32 +1,23 @@ use std::sync::Arc; +use crate::modules::monitoring::application_monitoring::helm_prometheus_application_alerting::HelmPrometheusApplicationAlertingScore; +use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDAlertManagerReceiver; use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::{ build_rule_container_restarting, build_rule_pod_failed, }; use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::RuleGroup; -use crate::modules::monitoring::kube_prometheus::service_monitor::{ +use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{ ServiceMonitor, ServiceMonitorSpec, }; -use crate::modules::monitoring::kube_prometheus::types::{Selector, ServiceMonitorEndpoint}; +use crate::modules::monitoring::kube_prometheus::types::ServiceMonitorEndpoint; use crate::{ inventory::Inventory, modules::{ application::{Application, ApplicationFeature, OCICompliant}, - monitoring::{ - alert_channel::webhook_receiver::WebhookReceiver, - kube_prometheus::{ - alert_manager_config::{CRDAlertManager, CRDAlertManagerReceiver}, - helm_prometheus_application_alerting::HelmPrometheusApplicationAlertingScore, - types::{NamespaceSelector, ServiceMonitor as KubePrometheusServiceMonitor}, - }, - ntfy::ntfy::NtfyScore, - }, + monitoring::{alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore}, }, score::Score, - topology::{ - HelmCommand, K8sclient, Topology, Url, oberservability::monitoring::AlertReceiver, - tenant::TenantManager, - }, + topology::{HelmCommand, K8sclient, Topology, Url, tenant::TenantManager}, }; use async_trait::async_trait; use base64::{Engine as _, engine::general_purpose}; @@ -103,18 +94,32 @@ impl, ns: String) -> AlertmanagerConfig { - // let secret_name = format!("{}-secret", self.name.clone()); - // let webhook_key = format!("{}", self.url.clone()); - // - // let mut string_data = BTreeMap::new(); - // string_data.insert("webhook-url".to_string(), webhook_key.clone()); - // - // let secret = Secret { - // metadata: kube::core::ObjectMeta { - // name: Some(secret_name.clone()), - // ..Default::default() - // }, - // string_data: Some(string_data), - // type_: Some("Opaque".to_string()), - // ..Default::default() - // }; - // - // let _ = client.apply(&secret, Some(&ns)).await; - let spec = AlertmanagerConfigSpec { data: json!({ "route": { diff --git a/harmony/src/modules/monitoring/application_monitoring/helm_prometheus_application_alerting.rs b/harmony/src/modules/monitoring/application_monitoring/helm_prometheus_application_alerting.rs new file mode 100644 index 0000000..2753f44 --- /dev/null +++ b/harmony/src/modules/monitoring/application_monitoring/helm_prometheus_application_alerting.rs @@ -0,0 +1,556 @@ +use std::fs; +use std::{collections::BTreeMap, sync::Arc}; +use tempfile::tempdir; +use tokio::io::AsyncWriteExt; + +use async_trait::async_trait; +use kube::api::ObjectMeta; +use log::{debug, info}; +use serde::Serialize; +use tokio::process::Command; + +use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{ + AlertmanagerConfig, CRDAlertManager, CRDAlertManagerReceiver, +}; +use crate::modules::monitoring::kube_prometheus::crd::crd_grafana::{ + Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig, + GrafanaDatasourceSpec, GrafanaSpec, +}; +use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{ + PrometheusRule, PrometheusRuleSpec, RuleGroup, +}; +use crate::modules::monitoring::kube_prometheus::crd::service_monitor::ServiceMonitor; +use crate::topology::{K8sclient, Topology, k8s::K8sClient}; +use crate::{ + data::{Id, Version}, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::monitoring::kube_prometheus::crd::{ + crd_alertmanagers::{Alertmanager, AlertmanagerSpec}, + crd_prometheuses::{ + AlertmanagerEndpoints, LabelSelector, Prometheus, PrometheusSpec, + PrometheusSpecAlerting, + }, + role::{build_prom_role, build_prom_rolebinding, build_prom_service_account}, + }, + score::Score, +}; + +#[derive(Clone, Debug, Serialize)] +pub struct HelmPrometheusApplicationAlertingScore { + pub namespace: String, + pub receivers: Vec>, + pub service_monitors: Vec, + pub prometheus_rules: Vec, +} + +impl Score for HelmPrometheusApplicationAlertingScore { + fn create_interpret(&self) -> Box> { + Box::new(HelmPrometheusApplicationAlertingInterpret { + namespace: self.namespace.clone(), + receivers: self.receivers.clone(), + service_monitors: self.service_monitors.clone(), + prometheus_rules: self.prometheus_rules.clone(), + }) + } + + fn name(&self) -> String { + "HelmPrometheusApplicationAlertingScore".into() + } +} + +#[derive(Clone, Debug)] +pub struct HelmPrometheusApplicationAlertingInterpret { + pub namespace: String, + pub receivers: Vec>, + pub service_monitors: Vec, + pub prometheus_rules: Vec, +} + +#[async_trait] +impl Interpret for HelmPrometheusApplicationAlertingInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &T, + ) -> Result { + let client = topology.k8s_client().await.unwrap(); + self.ensure_prometheus_operator().await?; + self.ensure_grafana_operator().await?; + self.install_prometheus(&client).await?; + self.install_alert_manager(&client).await?; + self.install_grafana(&client).await?; + self.install_receivers(&self.receivers, &client).await?; + self.install_rules(&self.prometheus_rules, &client).await?; + self.install_monitors(self.service_monitors.clone(), &client) + .await?; + Ok(Outcome::success(format!( + "deployed application monitoring composants channels" + ))) + } + + fn get_name(&self) -> InterpretName { + todo!() + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} + +impl HelmPrometheusApplicationAlertingInterpret { + async fn crd_exists(&self, crd: &str) -> bool { + let output = Command::new("kubectl") + .args(["get", "crd", crd]) + .output() + .await; + + matches!(output, Ok(o) if o.status.success()) + } + + async fn ensure_prometheus_operator(&self) -> Result { + if self.crd_exists("prometheuses.monitoring.coreos.com").await { + debug!("Prometheus CRDs already exist — skipping install."); + return Ok(Outcome::success( + "Prometheus CRDs already exist".to_string(), + )); + } + + let temp_dir = + tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?; + let temp_path = temp_dir.path().to_path_buf(); + debug!("Using temp directory: {}", temp_path.display()); + + let pull_output = Command::new("helm") + .args(&[ + "pull", + "oci://hub.nationtech.io/harmony/nt-prometheus-operator", + "--destination", + temp_path.to_str().unwrap(), + ]) + .output() + .await + .map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?; + + if !pull_output.status.success() { + return Err(InterpretError::new(format!( + "Helm pull failed: {}", + String::from_utf8_lossy(&pull_output.stderr) + ))); + } + + let tgz_path = fs::read_dir(&temp_path) + .unwrap() + .filter_map(|entry| { + let entry = entry.ok()?; + let path = entry.path(); + if path.extension()? == "tgz" { + Some(path) + } else { + None + } + }) + .next() + .ok_or_else(|| InterpretError::new("Could not find pulled Helm chart".into()))?; + + debug!("Installing chart from: {}", tgz_path.display()); + + let install_output = Command::new("helm") + .args(&[ + "install", + "nt-prometheus-operator", + tgz_path.to_str().unwrap(), + "--namespace", + &self.namespace, + "--create-namespace", + "--wait", + "--atomic", + ]) + .output() + .await + .map_err(|e| InterpretError::new(format!("Helm install error: {}", e)))?; + + if !install_output.status.success() { + return Err(InterpretError::new(format!( + "Helm install failed: {}", + String::from_utf8_lossy(&install_output.stderr) + ))); + } + + debug!( + "Installed prometheus operator in namespace: {}", + self.namespace + ); + Ok(Outcome::success(format!( + "Installed prometheus operator in namespace {}", + self.namespace + ))) + } + + async fn ensure_grafana_operator(&self) -> Result { + if self.crd_exists("grafanas.grafana.integreatly.org").await { + debug!("grafana CRDs already exist — skipping install."); + return Ok(Outcome::success("Grafana CRDs already exist".to_string())); + } + + let _ = Command::new("helm") + .args(&[ + "repo", + "add", + "grafana-operator", + "https://grafana.github.io/helm-charts", + ]) + .output() + .await + .unwrap(); + + let _ = Command::new("helm") + .args(&["repo", "update"]) + .output() + .await + .unwrap(); + + let _ = Command::new("helm") + .args(&[ + "install", + "grafana-operator", + "grafana-operator/grafana-operator", + "--namespace", + &self.namespace, + "--create-namespace", + ]) + .output() + .await + .unwrap(); + + Ok(Outcome::success(format!( + "installed grafana operator in ns {}", + self.namespace.clone() + ))) + } + async fn install_prometheus(&self, client: &Arc) -> Result { + debug!( + "installing crd-prometheuses in namespace {}", + self.namespace.clone() + ); + debug!("building role/rolebinding/serviceaccount for crd-prometheus"); + let rolename = format!("{}-prom", self.namespace.clone()); + let sa_name = format!("{}-prom-sa", self.namespace.clone()); + let role = build_prom_role(rolename.clone(), self.namespace.clone()); + let rolebinding = + build_prom_rolebinding(rolename.clone(), self.namespace.clone(), sa_name.clone()); + let sa = build_prom_service_account(sa_name.clone(), self.namespace.clone()); + let prom_spec = PrometheusSpec { + alerting: Some(PrometheusSpecAlerting { + alertmanagers: Some(vec![AlertmanagerEndpoints { + name: Some(format!("alertmanager-operated")), + namespace: Some(format!("{}", self.namespace.clone())), + port: Some("web".into()), + scheme: Some("http".into()), + }]), + }), + service_account_name: sa_name.clone(), + service_monitor_namespace_selector: Some(LabelSelector { + match_labels: BTreeMap::from([( + "kubernetes.io/metadata.name".to_string(), + format!("{}", self.namespace.clone()), + )]), + match_expressions: vec![], + }), + service_monitor_selector: Some(LabelSelector { + match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]), + ..Default::default() + }), + + service_discovery_role: Some("Endpoints".into()), + + pod_monitor_selector: Some(LabelSelector { + match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]), + ..Default::default() + }), + + rule_selector: Some(LabelSelector { + match_labels: BTreeMap::from([("role".to_string(), "prometheus-rule".to_string())]), + ..Default::default() + }), + + rule_namespace_selector: Some(LabelSelector { + match_labels: BTreeMap::from([( + "kubernetes.io/metadata.name".to_string(), + format!("{}", self.namespace.clone()), + )]), + match_expressions: vec![], + }), + }; + let prom = Prometheus { + metadata: ObjectMeta { + name: Some(self.namespace.clone()), + labels: Some(std::collections::BTreeMap::from([( + "alertmanagerConfig".to_string(), + "enabled".to_string(), + )])), + namespace: Some(self.namespace.clone()), + ..Default::default() + }, + spec: prom_spec, + }; + client + .apply(&role, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + info!( + "installed prometheus role: {:#?} in ns {:#?}", + role.metadata.name.unwrap(), + role.metadata.namespace.unwrap() + ); + client + .apply(&rolebinding, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + info!( + "installed prometheus rolebinding: {:#?} in ns {:#?}", + rolebinding.metadata.name.unwrap(), + rolebinding.metadata.namespace.unwrap() + ); + client + .apply(&sa, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + info!( + "installed prometheus service account: {:#?} in ns {:#?}", + sa.metadata.name.unwrap(), + sa.metadata.namespace.unwrap() + ); + client + .apply(&prom, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + info!( + "installed prometheus: {:#?} in ns {:#?}", + &prom.metadata.name.clone().unwrap(), + &prom.metadata.namespace.clone().unwrap() + ); + + Ok(Outcome::success(format!( + "successfully deployed crd-prometheus {:#?}", + prom + ))) + } + + async fn install_alert_manager( + &self, + client: &Arc, + ) -> Result { + let am = Alertmanager { + metadata: ObjectMeta { + name: Some(self.namespace.clone()), + labels: Some(std::collections::BTreeMap::from([( + "alertmanagerConfig".to_string(), + "enabled".to_string(), + )])), + namespace: Some(self.namespace.clone()), + ..Default::default() + }, + spec: AlertmanagerSpec::default(), + }; + client + .apply(&am, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + Ok(Outcome::success(format!( + "successfully deployed service monitor {:#?}", + am.metadata.name + ))) + } + async fn install_monitors( + &self, + monitors: Vec, + client: &Arc, + ) -> Result { + for monitor in monitors.iter() { + client + .apply(monitor, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + } + Ok(Outcome::success( + "succesfully deployed service monitors".to_string(), + )) + } + + async fn install_rules( + &self, + rules: &Vec, + client: &Arc, + ) -> Result { + let prom_rule_spec = PrometheusRuleSpec { + groups: rules.clone(), + }; + + let prom_rules = PrometheusRule { + metadata: ObjectMeta { + name: Some(self.namespace.clone()), + labels: Some(std::collections::BTreeMap::from([ + ("alertmanagerConfig".to_string(), "enabled".to_string()), + ("role".to_string(), "prometheus-rule".to_string()), + ])), + namespace: Some(self.namespace.clone()), + ..Default::default() + }, + spec: prom_rule_spec, + }; + client + .apply(&prom_rules, Some(&self.namespace)) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + Ok(Outcome::success(format!( + "successfully deployed rules {:#?}", + prom_rules.metadata.name + ))) + } + + async fn install_grafana(&self, client: &Arc) -> Result { + let mut label = BTreeMap::new(); + label.insert("dashboards".to_string(), "grafana".to_string()); + let labels = LabelSelector { + match_labels: label.clone(), + match_expressions: vec![], + }; + let mut json_data = BTreeMap::new(); + json_data.insert("timeInterval".to_string(), "5s".to_string()); + let namespace = self.namespace.clone(); + + let json = format!( + r#"{{ + "title": "UP Status Dashboard", + "timezone": "browser", + "panels": [ + {{ + "type": "table", + "title": "Service UP Status", + "gridPos": {{ "x": 0, "y": 0, "w": 24, "h": 10 }}, + "targets": [ + {{ + "expr": "up{{namespace=\"{namespace}\"}}", + "format": "table", + "refId": "A" + }} + ], + "options": {{ + "showHeader": true + }}, + "fieldConfig": {{ + "defaults": {{ + "custom": {{}} + }}, + "overrides": [] + }} + }} + ], + "schemaVersion": 30, + "version": 1 + }}"# + ); + + let graf_data_source = GrafanaDatasource { + metadata: ObjectMeta { + name: Some(self.namespace.clone()), + namespace: Some(self.namespace.clone()), + ..Default::default() + }, + spec: GrafanaDatasourceSpec { + instance_selector: labels.clone(), + allow_cross_namespace_import: Some(false), + datasource: GrafanaDatasourceConfig { + access: "proxy".to_string(), + database: Some("prometheus".to_string()), + json_data: Some(json_data), + //this is fragile + name: format!("prometheus-{}-0", self.namespace.clone()), + r#type: "prometheus".to_string(), + url: format!( + "http://prometheus-operated.{}.svc.cluster.local:9090", + self.namespace.clone() + ), + }, + }, + }; + + client + .apply(&graf_data_source, Some(&self.namespace)) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + + let graf_dashboard = GrafanaDashboard { + metadata: ObjectMeta { + name: Some(self.namespace.clone()), + namespace: Some(self.namespace.clone()), + ..Default::default() + }, + spec: GrafanaDashboardSpec { + resync_period: Some("30s".to_string()), + instance_selector: labels.clone(), + json, + }, + }; + + client + .apply(&graf_dashboard, Some(&self.namespace)) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + + let grafana = Grafana { + metadata: ObjectMeta { + name: Some(self.namespace.clone()), + namespace: Some(self.namespace.clone()), + labels: Some(label.clone()), + ..Default::default() + }, + spec: GrafanaSpec { + config: None, + admin_user: None, + admin_password: None, + ingress: None, + persistence: None, + resources: None, + }, + }; + client + .apply(&grafana, Some(&self.namespace)) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + Ok(Outcome::success(format!( + "successfully deployed grafana instance {:#?}", + grafana.metadata.name + ))) + } + + async fn install_receivers( + &self, + receivers: &Vec>, + client: &Arc, + ) -> Result { + for receiver in receivers.iter() { + let alertmanager_config: AlertmanagerConfig = receiver + .configure_receiver(&client, self.namespace.clone()) + .await; + let sender = CRDAlertManager { + alertmanager_configs: alertmanager_config, + namespace: self.namespace.clone(), + client: client.clone(), + }; + receiver.install(&sender).await.map_err(|err| { + InterpretError::new(format!("failed to install receiver: {}", err)) + })?; + } + Ok(Outcome::success(format!("successfully deployed receivers"))) + } +} diff --git a/harmony/src/modules/monitoring/application_monitoring/mod.rs b/harmony/src/modules/monitoring/application_monitoring/mod.rs index d9a313b..6274032 100644 --- a/harmony/src/modules/monitoring/application_monitoring/mod.rs +++ b/harmony/src/modules/monitoring/application_monitoring/mod.rs @@ -1 +1,2 @@ +pub mod helm_prometheus_application_alerting; pub mod k8s_application_monitoring_score; diff --git a/harmony/src/modules/monitoring/kube_prometheus/alert_manager_config.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_alertmanager_config.rs similarity index 83% rename from harmony/src/modules/monitoring/kube_prometheus/alert_manager_config.rs rename to harmony/src/modules/monitoring/kube_prometheus/crd/crd_alertmanager_config.rs index abedc48..260aaf8 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/alert_manager_config.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_alertmanager_config.rs @@ -1,20 +1,13 @@ use std::sync::Arc; use async_trait::async_trait; -use kube::{CustomResource, api::ObjectMeta}; +use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use crate::{ - interpret::{InterpretError, Outcome}, - inventory::Inventory, - topology::{ - HelmCommand, K8sclient, Topology, - installable::Installable, - k8s::K8sClient, - oberservability::monitoring::{AlertReceiver, AlertSender}, - tenant::TenantManager, - }, +use crate::topology::{ + k8s::K8sClient, + oberservability::monitoring::{AlertReceiver, AlertSender}, }; #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs index 18074b8..74e76fe 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs @@ -4,6 +4,10 @@ use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use crate::modules::monitoring::kube_prometheus::types::Operator; + +use super::crd_prometheuses::LabelSelector; + #[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] #[kube( group = "grafana.integreatly.org", @@ -132,7 +136,7 @@ pub struct GrafanaDatasourceConfig { pub access: String, pub database: Option, #[serde(default, skip_serializing_if = "Option::is_none")] - pub json_data: Option>, + pub json_data: Option>, pub name: String, pub r#type: String, pub url: String, @@ -140,25 +144,6 @@ pub struct GrafanaDatasourceConfig { // ------------------------------------------------------------------------------------------------ -#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct LabelSelector { - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] - pub match_labels: BTreeMap, - - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub match_expressions: Vec, -} - -#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct LabelSelectorRequirement { - pub key: String, - pub operator: String, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub values: Vec, -} - #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] #[serde(rename_all = "camelCase")] pub struct ResourceRequirements { diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheuses.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheuses.rs index 0b9101f..90b2e8c 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheuses.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheuses.rs @@ -16,6 +16,9 @@ use crate::modules::monitoring::kube_prometheus::types::Operator; )] #[serde(rename_all = "camelCase")] pub struct PrometheusSpec { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub alerting: Option, + pub service_account_name: String, #[serde(default, skip_serializing_if = "Option::is_none")] pub service_monitor_namespace_selector: Option, @@ -36,6 +39,41 @@ pub struct PrometheusSpec { pub rule_namespace_selector: Option, } +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct NamespaceSelector { + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub match_names: Vec, +} + +/// Contains alerting configuration, specifically Alertmanager endpoints. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +pub struct PrometheusSpecAlerting { + #[serde(skip_serializing_if = "Option::is_none")] + pub alertmanagers: Option>, +} + +/// Represents an Alertmanager endpoint configuration used by Prometheus. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +pub struct AlertmanagerEndpoints { + /// Name of the Alertmanager Service. + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + + /// Namespace of the Alertmanager Service. + #[serde(skip_serializing_if = "Option::is_none")] + pub namespace: Option, + + /// Port to access on the Alertmanager Service (e.g. "web"). + #[serde(skip_serializing_if = "Option::is_none")] + pub port: Option, + + /// Scheme to use for connecting (e.g. "http"). + #[serde(skip_serializing_if = "Option::is_none")] + pub scheme: Option, + // Other fields like `tls_config`, `path_prefix`, etc., can be added if needed. +} + #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] #[serde(rename_all = "camelCase")] pub struct LabelSelector { @@ -58,6 +96,8 @@ pub struct LabelSelectorRequirement { impl Default for PrometheusSpec { fn default() -> Self { PrometheusSpec { + alerting: None, + service_account_name: "prometheus".into(), // null means "only my namespace" diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/grafana_operator.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/grafana_operator.rs index 00864cd..42d6e0a 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/grafana_operator.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/grafana_operator.rs @@ -7,7 +7,7 @@ use crate::modules::helm::chart::HelmChartScore; pub fn grafana_operator_helm_chart_score(ns: String) -> HelmChartScore { HelmChartScore { namespace: Some(NonBlankString::from_str(&ns).unwrap()), - release_name: NonBlankString::from_str("kube-prometheus").unwrap(), + release_name: NonBlankString::from_str("grafana_operator").unwrap(), chart_name: NonBlankString::from_str( "grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator", ) diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs index ca8a12d..85ddf35 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs @@ -1,7 +1,10 @@ +pub mod crd_alertmanager_config; pub mod crd_alertmanagers; pub mod crd_default_rules; pub mod crd_grafana; pub mod crd_prometheus_rules; pub mod crd_prometheuses; pub mod grafana_operator; +pub mod prometheus_operator; pub mod role; +pub mod service_monitor; diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/prometheus_operator.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/prometheus_operator.rs new file mode 100644 index 0000000..c3bc084 --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/prometheus_operator.rs @@ -0,0 +1,22 @@ +use std::str::FromStr; + +use non_blank_string_rs::NonBlankString; + +use crate::modules::helm::chart::HelmChartScore; + +pub fn prometheus_operator_helm_chart_score(ns: String) -> HelmChartScore { + HelmChartScore { + namespace: Some(NonBlankString::from_str(&ns).unwrap()), + release_name: NonBlankString::from_str("prometheus-operator").unwrap(), + chart_name: NonBlankString::from_str( + "grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator", + ) + .unwrap(), + chart_version: None, + values_overrides: None, + values_yaml: None, + create_namespace: true, + install_only: true, + repository: None, + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/service_monitor.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/service_monitor.rs similarity index 98% rename from harmony/src/modules/monitoring/kube_prometheus/service_monitor.rs rename to harmony/src/modules/monitoring/kube_prometheus/crd/service_monitor.rs index 049e32d..7c613e7 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/service_monitor.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/service_monitor.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use crate::interpret::InterpretError; -use super::types::{ +use crate::modules::monitoring::kube_prometheus::types::{ HTTPScheme, MatchExpression, NamespaceSelector, Operator, Selector, ServiceMonitor as KubeServiceMonitor, ServiceMonitorEndpoint, }; diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs deleted file mode 100644 index 1765a64..0000000 --- a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs +++ /dev/null @@ -1,253 +0,0 @@ -use std::sync::Arc; - -use async_trait::async_trait; -use kube::{Api, api::ObjectMeta}; -use log::{debug, info}; -use serde::Serialize; - -use crate::{ - data::{Id, Version}, - interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, - inventory::Inventory, - modules::monitoring::kube_prometheus::crd::{ - crd_alertmanagers::{Alertmanager, AlertmanagerSpec}, - crd_prometheuses::{Prometheus, PrometheusSpec}, - role::{build_prom_role, build_prom_rolebinding, build_prom_service_account}, - }, - score::Score, - topology::{K8sclient, Topology, k8s::K8sClient, oberservability::monitoring::AlertReceiver}, -}; - -use super::{ - alert_manager_config::{ - AlertmanagerConfig, AlertmanagerConfigSpec, CRDAlertManager, CRDAlertManagerReceiver, - }, - crd::crd_prometheus_rules::{PrometheusRule, PrometheusRuleSpec, RuleGroup}, - prometheus::KubePrometheus, -}; - -#[derive(Clone, Debug, Serialize)] -pub struct HelmPrometheusApplicationAlertingScore { - pub namespace: String, - pub receivers: Vec>, - pub service_monitors: Vec, - pub prometheus_rules: Vec, -} - -impl Score for HelmPrometheusApplicationAlertingScore { - fn create_interpret(&self) -> Box> { - Box::new(HelmPrometheusApplicationAlertingInterpret { - namespace: self.namespace.clone(), - receivers: self.receivers.clone(), - service_monitors: self.service_monitors.clone(), - prometheus_rules: self.prometheus_rules.clone(), - }) - } - - fn name(&self) -> String { - "HelmPrometheusApplicationAlertingScore".into() - } -} - -#[derive(Clone, Debug)] -pub struct HelmPrometheusApplicationAlertingInterpret { - pub namespace: String, - pub receivers: Vec>, - pub service_monitors: Vec, - pub prometheus_rules: Vec, -} - -#[async_trait] -impl Interpret for HelmPrometheusApplicationAlertingInterpret { - async fn execute( - &self, - inventory: &Inventory, - topology: &T, - ) -> Result { - let client = topology.k8s_client().await.unwrap(); - self.install_prometheus(&client).await?; - self.install_alert_manager(&client).await?; - for receiver in self.receivers.iter() { - let alertmanager_config: AlertmanagerConfig = receiver - .configure_receiver(&client, self.namespace.clone()) - .await; - let sender = CRDAlertManager { - alertmanager_configs: alertmanager_config, - namespace: self.namespace.clone(), - client: client.clone(), - }; - receiver.install(&sender).await.map_err(|err| { - InterpretError::new(format!("failed to install receiver: {}", err)) - })?; - } - self.install_rules(self.prometheus_rules.clone(), client.clone()) - .await - .map_err(|err| InterpretError::new(format!("failed to install rules: {}", err)))?; - - debug!("\n\n\n monitors: {:#?}", self.service_monitors.clone()); - for monitor in self.service_monitors.iter() { - self.install_monitor(monitor.clone(), client.clone()) - .await?; - } - Ok(Outcome::success(format!("deployed alert channels"))) - } - - fn get_name(&self) -> InterpretName { - todo!() - } - - fn get_version(&self) -> Version { - todo!() - } - - fn get_status(&self) -> InterpretStatus { - todo!() - } - - fn get_children(&self) -> Vec { - todo!() - } -} - -impl HelmPrometheusApplicationAlertingInterpret { - async fn install_prometheus(&self, client: &Arc) -> Result { - debug!( - "installing crd-prometheuses in namespace {}", - self.namespace.clone() - ); - debug!("building role/rolebinding/serviceaccount for crd-prometheus"); - let rolename = format!("{}-prom", self.namespace.clone()); - let sa_name = format!("{}-prom-sa", self.namespace.clone()); - let role = build_prom_role(rolename.clone(), self.namespace.clone()); - let rolebinding = - build_prom_rolebinding(rolename.clone(), self.namespace.clone(), sa_name.clone()); - let sa = build_prom_service_account(sa_name.clone(), self.namespace.clone()); - let mut prom_spec = PrometheusSpec::default(); - prom_spec.service_account_name = sa_name.clone(); - let prom = Prometheus { - metadata: ObjectMeta { - name: Some(self.namespace.clone()), - labels: Some(std::collections::BTreeMap::from([( - "alertmanagerConfig".to_string(), - "enabled".to_string(), - )])), - namespace: Some(self.namespace.clone()), - ..Default::default() - }, - spec: prom_spec, - }; - client - .apply(&role, Some(&self.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - info!( - "installed prometheus role: {:#?} in ns {:#?}", - role.metadata.name.unwrap(), - role.metadata.namespace.unwrap() - ); - client - .apply(&rolebinding, Some(&self.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - info!( - "installed prometheus rolebinding: {:#?} in ns {:#?}", - rolebinding.metadata.name.unwrap(), - rolebinding.metadata.namespace.unwrap() - ); - client - .apply(&sa, Some(&self.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - info!( - "installed prometheus service account: {:#?} in ns {:#?}", - sa.metadata.name.unwrap(), - sa.metadata.namespace.unwrap() - ); - client - .apply(&prom, Some(&self.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - info!( - "installed prometheus: {:#?} in ns {:#?}", - &prom.metadata.name.clone().unwrap(), - &prom.metadata.namespace.clone().unwrap() - ); - - Ok(Outcome::success(format!( - "successfully deployed crd-prometheus {:#?}", - prom - ))) - } - - async fn install_alert_manager( - &self, - client: &Arc, - ) -> Result { - let am = Alertmanager { - metadata: ObjectMeta { - name: Some(self.namespace.clone()), - labels: Some(std::collections::BTreeMap::from([( - "alertmanagerConfig".to_string(), - "enabled".to_string(), - )])), - namespace: Some(self.namespace.clone()), - ..Default::default() - }, - spec: AlertmanagerSpec::default(), - }; - client - .apply(&am, Some(&self.namespace.clone())) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - Ok(Outcome::success(format!( - "successfully deployed service monitor {:#?}", - am.metadata.name - ))) - } - - async fn install_monitor( - &self, - monitor: super::service_monitor::ServiceMonitor, - client: Arc, - ) -> Result { - debug!("service monitor: \n{:#?}", monitor.clone()); - let namespace = self.namespace.clone(); - client - .apply(&monitor, Some(&namespace)) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - Ok(Outcome::success(format!( - "successfully deployed service monitor {:#?}", - monitor.metadata.name - ))) - } - - async fn install_rules( - &self, - rules: Vec, - client: Arc, - ) -> Result { - let prom_rule_spec = PrometheusRuleSpec { groups: rules }; - - let prom_rules = PrometheusRule { - metadata: ObjectMeta { - name: Some(self.namespace.clone()), - labels: Some(std::collections::BTreeMap::from([( - "alertmanagerConfig".to_string(), - "enabled".to_string(), - )])), - namespace: Some(self.namespace.clone()), - ..Default::default() - }, - spec: prom_rule_spec, - }; - client - .apply(&prom_rules, Some(&self.namespace)) - .await - .map_err(|e| InterpretError::new(e.to_string()))?; - Ok(Outcome::success(format!( - "successfully deployed service monitor {:#?}", - prom_rules.metadata.name - ))) - } -} diff --git a/harmony/src/modules/monitoring/kube_prometheus/mod.rs b/harmony/src/modules/monitoring/kube_prometheus/mod.rs index 5fc0fea..122e939 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/mod.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/mod.rs @@ -1,8 +1,5 @@ -pub mod alert_manager_config; pub mod crd; pub mod helm; pub mod helm_prometheus_alert_score; -pub mod helm_prometheus_application_alerting; pub mod prometheus; -pub mod service_monitor; pub mod types; diff --git a/harmony/src/modules/monitoring/kube_prometheus/types.rs b/harmony/src/modules/monitoring/kube_prometheus/types.rs index 2423e5d..abe5896 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/types.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/types.rs @@ -224,7 +224,7 @@ pub struct Selector { pub match_expressions: Vec, } -#[derive(Debug, Clone, Serialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ServiceMonitor { pub name: String, @@ -268,7 +268,7 @@ pub struct ServiceMonitor { pub fallback_scrape_protocol: Option, } -#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] #[serde(rename_all = "camelCase")] pub struct NamespaceSelector { /// Select all namespaces.