From 8e857bc72a27bbc6babf1aaf6ac41fdbf25bf631 Mon Sep 17 00:00:00 2001 From: Willem Date: Thu, 26 Jun 2025 16:24:19 -0400 Subject: [PATCH 1/5] wip: using the name from tenant config as deployment namespace for kubeprometheus deployment or defaulting to monitoring if no tenant config exists --- harmony/src/domain/topology/installable.rs | 2 ++ harmony/src/domain/topology/k8s_anywhere.rs | 9 +++++++- .../topology/oberservability/monitoring.rs | 1 + harmony/src/domain/topology/tenant/k8s.rs | 4 ++++ harmony/src/domain/topology/tenant/manager.rs | 2 ++ .../monitoring/kube_prometheus/helm/config.rs | 11 +++++----- .../helm/kube_prometheus_helm_chart.rs | 3 ++- .../helm_prometheus_alert_score.rs | 7 +++--- .../monitoring/kube_prometheus/prometheus.rs | 22 +++++++++++++++++-- 9 files changed, 47 insertions(+), 14 deletions(-) diff --git a/harmony/src/domain/topology/installable.rs b/harmony/src/domain/topology/installable.rs index 8d8178c..0e81448 100644 --- a/harmony/src/domain/topology/installable.rs +++ b/harmony/src/domain/topology/installable.rs @@ -4,6 +4,8 @@ use crate::{interpret::InterpretError, inventory::Inventory}; #[async_trait] pub trait Installable: Send + Sync { + fn configure(&self, inventory: &Inventory, topology: &T) -> Result<(), InterpretError>; + async fn ensure_installed( &self, inventory: &Inventory, diff --git a/harmony/src/domain/topology/k8s_anywhere.rs b/harmony/src/domain/topology/k8s_anywhere.rs index 6742b5a..61a0642 100644 --- a/harmony/src/domain/topology/k8s_anywhere.rs +++ b/harmony/src/domain/topology/k8s_anywhere.rs @@ -35,6 +35,7 @@ enum K8sSource { pub struct K8sAnywhereTopology { k8s_state: OnceCell>, tenant_manager: OnceCell, + tenant_manager_config: OnceCell, config: K8sAnywhereConfig, } @@ -60,6 +61,7 @@ impl K8sAnywhereTopology { Self { k8s_state: OnceCell::new(), tenant_manager: OnceCell::new(), + tenant_manager_config: OnceCell::new(), config: K8sAnywhereConfig::from_env(), } } @@ -68,6 +70,7 @@ impl K8sAnywhereTopology { Self { k8s_state: OnceCell::new(), tenant_manager: OnceCell::new(), + tenant_manager_config: OnceCell::new(), config, } } @@ -182,7 +185,7 @@ impl K8sAnywhereTopology { self.tenant_manager .get_or_try_init(async || -> Result { let k8s_client = self.k8s_client().await?; - Ok(K8sTenantManager::new(k8s_client)) + Ok(K8sTenantManager::new(k8s_client, TenantConfig::default())) }) .await .unwrap(); @@ -272,4 +275,8 @@ impl TenantManager for K8sAnywhereTopology { .provision_tenant(config) .await } + + fn get_tenant_config(&self) -> Option { + self.tenant_manager_config.get().cloned() + } } diff --git a/harmony/src/domain/topology/oberservability/monitoring.rs b/harmony/src/domain/topology/oberservability/monitoring.rs index ed7e936..f65e159 100644 --- a/harmony/src/domain/topology/oberservability/monitoring.rs +++ b/harmony/src/domain/topology/oberservability/monitoring.rs @@ -27,6 +27,7 @@ impl, T: Topology> Interpret for AlertingInte inventory: &Inventory, topology: &T, ) -> Result { + self.sender.configure(inventory, topology)?; for receiver in self.receivers.iter() { receiver.install(&self.sender).await?; } diff --git a/harmony/src/domain/topology/tenant/k8s.rs b/harmony/src/domain/topology/tenant/k8s.rs index a03e8d7..45f4530 100644 --- a/harmony/src/domain/topology/tenant/k8s.rs +++ b/harmony/src/domain/topology/tenant/k8s.rs @@ -25,6 +25,7 @@ use super::{TenantConfig, TenantManager}; #[derive(new)] pub struct K8sTenantManager { k8s_client: Arc, + k8s_tenant_config: TenantConfig, } impl K8sTenantManager { @@ -324,4 +325,7 @@ impl TenantManager for K8sTenantManager { ); Ok(()) } + fn get_tenant_config(&self) -> Option { + Some(self.k8s_tenant_config.clone()) + } } diff --git a/harmony/src/domain/topology/tenant/manager.rs b/harmony/src/domain/topology/tenant/manager.rs index 0df380d..0e0d426 100644 --- a/harmony/src/domain/topology/tenant/manager.rs +++ b/harmony/src/domain/topology/tenant/manager.rs @@ -15,4 +15,6 @@ pub trait TenantManager { /// # Arguments /// * `config`: The desired configuration for the new tenant. async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError>; + + fn get_tenant_config(&self) -> Option; } diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs index ecbf8d8..3aede84 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs @@ -1,13 +1,12 @@ use serde::Serialize; -use crate::modules::monitoring::{ - alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, - kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig}, -}; +use crate::modules::monitoring:: + kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig} +; #[derive(Debug, Clone, Serialize)] pub struct KubePrometheusConfig { - pub namespace: String, + pub namespace: Option, pub default_rules: bool, pub windows_monitoring: bool, pub alert_manager: bool, @@ -30,7 +29,7 @@ pub struct KubePrometheusConfig { impl KubePrometheusConfig { pub fn new() -> Self { Self { - namespace: "monitoring".into(), + namespace: None, default_rules: true, windows_monitoring: false, alert_manager: true, diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs index 843a677..9616c4b 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs @@ -184,8 +184,9 @@ prometheus: values.push_str(&alert_manager_additional_rules_yaml); debug!("full values.yaml: \n {:#}", values); + HelmChartScore { - namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), + namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()), release_name: NonBlankString::from_str("kube-prometheus").unwrap(), chart_name: NonBlankString::from_str( "oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs index 8844309..a47fa92 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs @@ -8,6 +8,7 @@ use crate::{ topology::{ HelmCommand, Topology, oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret}, + tenant::TenantManager, }, }; @@ -17,12 +18,10 @@ pub struct HelmPrometheusAlertingScore { pub rules: Vec>>, } -impl Score for HelmPrometheusAlertingScore { +impl Score for HelmPrometheusAlertingScore { fn create_interpret(&self) -> Box> { Box::new(AlertingInterpret { - sender: Prometheus { - config: Arc::new(Mutex::new(KubePrometheusConfig::new())), - }, + sender: Prometheus::new() , receivers: self.receivers.clone(), rules: self.rules.clone(), }) diff --git a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs index 148f91c..acd9dae 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs @@ -10,9 +10,10 @@ use crate::{ modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, score, topology::{ - HelmCommand, Topology, + HelmCommand, K8sAnywhereTopology, Topology, installable::Installable, oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender}, + tenant::TenantManager, }, }; @@ -33,7 +34,12 @@ impl AlertSender for Prometheus { } #[async_trait] -impl Installable for Prometheus { +impl Installable for Prometheus { + fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> { + self.configure_with_topology(topology); + Ok(()) + } + async fn ensure_installed( &self, inventory: &Inventory, @@ -50,6 +56,18 @@ pub struct Prometheus { } impl Prometheus { + pub fn new() -> Self { + Self { + config: Arc::new(Mutex::new(KubePrometheusConfig::new())), + } + } + + pub fn configure_with_topology(&self, topology: &T) { + let ns = topology.get_tenant_config().map(|cfg| cfg.name.clone()) + .unwrap_or_else(|| "monitoring".to_string()); + self.config.lock().unwrap().namespace = Some(ns); + } + pub async fn install_receiver( &self, prometheus_receiver: &dyn PrometheusReceiver, From 460c8b59e1582fbd33567d5911af1413ea137194 Mon Sep 17 00:00:00 2001 From: Willem Date: Fri, 27 Jun 2025 14:47:28 -0400 Subject: [PATCH 2/5] wip: helm chart deploys to namespace with resource limits and requests, trying to fix connection refused to api error --- examples/monitoring_with_tenant/Cargo.toml | 13 +++ examples/monitoring_with_tenant/src/main.rs | 63 +++++++++++ harmony/src/domain/topology/k8s_anywhere.rs | 8 +- harmony/src/domain/topology/tenant/k8s.rs | 3 +- .../monitoring/kube_prometheus/helm/config.rs | 6 +- .../helm/kube_prometheus_helm_chart.rs | 104 +++++++++++++++++- .../helm_prometheus_alert_score.rs | 2 +- .../monitoring/kube_prometheus/prometheus.rs | 7 +- .../monitoring/kube_prometheus/types.rs | 53 +++++++++ 9 files changed, 246 insertions(+), 13 deletions(-) create mode 100644 examples/monitoring_with_tenant/Cargo.toml create mode 100644 examples/monitoring_with_tenant/src/main.rs diff --git a/examples/monitoring_with_tenant/Cargo.toml b/examples/monitoring_with_tenant/Cargo.toml new file mode 100644 index 0000000..27fd4dd --- /dev/null +++ b/examples/monitoring_with_tenant/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "example-monitoring-with-tenant" +edition = "2024" +version.workspace = true +readme.workspace = true +license.workspace = true + +[dependencies] +cidr.workspace = true +harmony = { version = "0.1.0", path = "../../harmony" } +harmony_cli = { version = "0.1.0", path = "../../harmony_cli" } +tokio.workspace = true +url.workspace = true diff --git a/examples/monitoring_with_tenant/src/main.rs b/examples/monitoring_with_tenant/src/main.rs new file mode 100644 index 0000000..eb15100 --- /dev/null +++ b/examples/monitoring_with_tenant/src/main.rs @@ -0,0 +1,63 @@ +use cidr::Ipv4Cidr; +use harmony::{ + data::Id, + inventory::Inventory, + maestro::Maestro, + modules::{ + monitoring::{ + alert_channel::discord_alert_channel::DiscordWebhook, + alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, + kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore, + }, + prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days, + tenant::TenantScore, + }, + topology::{ + K8sAnywhereTopology, Url, + tenant::{InternetEgressPolicy, ResourceLimits, TenantConfig, TenantNetworkPolicy}, + }, +}; +use std::net::Ipv4Addr; +use std::str::FromStr; + +#[tokio::main] +async fn main() { + let tenant = TenantScore { + config: TenantConfig { + id: Id::from_string("1234".to_string()), + name: "test-tenant".to_string(), + resource_limits: ResourceLimits { + cpu_request_cores: 4.0, + cpu_limit_cores: 4.0, + memory_request_gb: 4.0, + memory_limit_gb: 4.0, + storage_total_gb: 10.0, + }, + network_policy: TenantNetworkPolicy::default(), + }, + }; + + let discord_receiver = DiscordWebhook { + name: "test-discord".to_string(), + url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()), + }; + + let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days(); + + let additional_rules = + AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]); + + let alerting_score = HelmPrometheusAlertingScore { + receivers: vec![Box::new(discord_receiver)], + rules: vec![Box::new(additional_rules)], + }; + let mut maestro = Maestro::::initialize( + Inventory::autoload(), + K8sAnywhereTopology::from_env(), + ) + .await + .unwrap(); + + maestro.register_all(vec![Box::new(tenant), Box::new(alerting_score)]); + harmony_cli::init(maestro, None).await.unwrap(); +} diff --git a/harmony/src/domain/topology/k8s_anywhere.rs b/harmony/src/domain/topology/k8s_anywhere.rs index 61a0642..ea8b033 100644 --- a/harmony/src/domain/topology/k8s_anywhere.rs +++ b/harmony/src/domain/topology/k8s_anywhere.rs @@ -185,13 +185,18 @@ impl K8sAnywhereTopology { self.tenant_manager .get_or_try_init(async || -> Result { let k8s_client = self.k8s_client().await?; - Ok(K8sTenantManager::new(k8s_client, TenantConfig::default())) + Ok(K8sTenantManager::new(k8s_client)) }) .await .unwrap(); Ok(()) } + async fn store_tenant_config(&self, config: TenantConfig) { + self.tenant_manager_config + .get_or_init(|| async { config }) + .await; + } fn get_k8s_tenant_manager(&self) -> Result<&K8sTenantManager, ExecutorError> { match self.tenant_manager.get() { @@ -271,6 +276,7 @@ impl HelmCommand for K8sAnywhereTopology {} #[async_trait] impl TenantManager for K8sAnywhereTopology { async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> { + self.store_tenant_config(config.clone()).await; self.get_k8s_tenant_manager()? .provision_tenant(config) .await diff --git a/harmony/src/domain/topology/tenant/k8s.rs b/harmony/src/domain/topology/tenant/k8s.rs index 45f4530..4e603e8 100644 --- a/harmony/src/domain/topology/tenant/k8s.rs +++ b/harmony/src/domain/topology/tenant/k8s.rs @@ -25,7 +25,6 @@ use super::{TenantConfig, TenantManager}; #[derive(new)] pub struct K8sTenantManager { k8s_client: Arc, - k8s_tenant_config: TenantConfig, } impl K8sTenantManager { @@ -326,6 +325,6 @@ impl TenantManager for K8sTenantManager { Ok(()) } fn get_tenant_config(&self) -> Option { - Some(self.k8s_tenant_config.clone()) + todo!() } } diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs index 3aede84..6c56676 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs @@ -1,8 +1,8 @@ use serde::Serialize; -use crate::modules::monitoring:: - kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig} -; +use crate::modules::monitoring::kube_prometheus::types::{ + AlertManagerAdditionalPromRules, AlertManagerChannelConfig, +}; #[derive(Debug, Clone, Serialize)] pub struct KubePrometheusConfig { diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs index 9616c4b..7b7b993 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs @@ -12,7 +12,8 @@ use crate::modules::{ helm::chart::HelmChartScore, monitoring::kube_prometheus::types::{ AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig, - AlertManagerRoute, AlertManagerValues, + AlertManagerRoute, AlertManagerSpec, AlertManagerValues, Cpu, CpuUnit, Limits, Memory, + MemoryUnit, Requests, Resources, }, }; @@ -36,8 +37,53 @@ pub fn kube_prometheus_helm_chart_score( let node_exporter = config.node_exporter.to_string(); let prometheus_operator = config.prometheus_operator.to_string(); let prometheus = config.prometheus.to_string(); + let resource_limit = Resources { + limits: Limits { + memory: Memory { + value: 100, + unit: MemoryUnit::Mi, + }, + cpu: Cpu { + value: 100, + unit: CpuUnit::Milli, + }, + }, + requests: Requests { + memory: Memory { + value: 100, + unit: MemoryUnit::Mi, + }, + cpu: Cpu { + value: 100, + unit: CpuUnit::Milli, + }, + }, + }; + + fn indent_lines(s: &str, spaces: usize) -> String { + let pad = " ".repeat(spaces); + s.lines() + .map(|line| format!("{pad}{line}")) + .collect::>() + .join("\n") + } + + fn resource_block(resource: &Resources, indent_level: usize) -> String { + let yaml = serde_yaml::to_string(resource).unwrap(); + format!( + "{}resources:\n{}", + " ".repeat(indent_level), + indent_lines(&yaml, indent_level + 2) + ) + } + let resource_section = resource_block(&resource_limit, 2); + let mut values = format!( r#" +prometheus: + enabled: {prometheus} + prometheusSpec: + {resource_section} defaultRules: create: {default_rules} rules: @@ -77,32 +123,59 @@ defaultRules: windows: true windowsMonitoring: enabled: {windows_monitoring} +{resource_section} grafana: enabled: {grafana} +{resource_section} kubernetesServiceMonitors: enabled: {kubernetes_service_monitors} +{resource_section} kubeApiServer: enabled: {kubernetes_api_server} +{resource_section} kubelet: enabled: {kubelet} +{resource_section} kubeControllerManager: enabled: {kube_controller_manager} +{resource_section} coreDns: enabled: {core_dns} +{resource_section} kubeEtcd: enabled: {kube_etcd} +{resource_section} kubeScheduler: enabled: {kube_scheduler} +{resource_section} kubeProxy: enabled: {kube_proxy} +{resource_section} kubeStateMetrics: enabled: {kube_state_metrics} +{resource_section} nodeExporter: enabled: {node_exporter} +{resource_section} prometheusOperator: enabled: {prometheus_operator} -prometheus: - enabled: {prometheus} + admissionWebhooks: + deployment: + resources: + limits: + cpu: 10m + memory: 100Mi + requests: + cpu: 10m + memory: 100Mi + patch: + resources: + limits: + cpu: 10m + memory: 100Mi + requests: + cpu: 10m + memory: 100Mi "#, ); @@ -145,6 +218,30 @@ prometheus: alertmanager: AlertManager { enabled: config.alert_manager, config: alert_manager_channel_config, + alertManagerSpec: AlertManagerSpec { + resources: Resources { + limits: Limits { + memory: Memory { + value: 100, + unit: MemoryUnit::Mi, + }, + cpu: Cpu { + value: 100, + unit: CpuUnit::Milli, + }, + }, + requests: Requests { + memory: Memory { + value: 100, + unit: MemoryUnit::Mi, + }, + cpu: Cpu { + value: 100, + unit: CpuUnit::Milli, + }, + }, + }, + }, }, }; @@ -184,7 +281,6 @@ prometheus: values.push_str(&alert_manager_additional_rules_yaml); debug!("full values.yaml: \n {:#}", values); - HelmChartScore { namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()), release_name: NonBlankString::from_str("kube-prometheus").unwrap(), diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs index a47fa92..90a3022 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs @@ -21,7 +21,7 @@ pub struct HelmPrometheusAlertingScore { impl Score for HelmPrometheusAlertingScore { fn create_interpret(&self) -> Box> { Box::new(AlertingInterpret { - sender: Prometheus::new() , + sender: Prometheus::new(), receivers: self.receivers.clone(), rules: self.rules.clone(), }) diff --git a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs index acd9dae..57e72f9 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs @@ -63,8 +63,11 @@ impl Prometheus { } pub fn configure_with_topology(&self, topology: &T) { - let ns = topology.get_tenant_config().map(|cfg| cfg.name.clone()) - .unwrap_or_else(|| "monitoring".to_string()); + let ns = topology + .get_tenant_config() + .map(|cfg| cfg.name.clone()) + .unwrap_or_else(|| "monitoring".to_string()); + debug!("NS: {}", ns); self.config.lock().unwrap().namespace = Some(ns); } diff --git a/harmony/src/modules/monitoring/kube_prometheus/types.rs b/harmony/src/modules/monitoring/kube_prometheus/types.rs index 878d527..e0ef5d8 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/types.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/types.rs @@ -19,6 +19,7 @@ pub struct AlertManagerValues { pub struct AlertManager { pub enabled: bool, pub config: AlertManagerConfig, + pub alertManagerSpec: AlertManagerSpec, } #[derive(Debug, Clone, Serialize)] @@ -43,6 +44,58 @@ pub struct AlertManagerChannelConfig { pub channel_receiver: Value, } +#[derive(Debug, Clone, Serialize)] +pub struct AlertManagerSpec { + pub(crate) resources: Resources, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Resources { + pub limits: Limits, + pub requests: Requests, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Limits { + pub memory: Memory, + pub cpu: Cpu, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Requests { + pub memory: Memory, + pub cpu: Cpu, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Memory { + pub value: u64, + pub unit: MemoryUnit, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Cpu { + pub value: u64, + pub unit: CpuUnit, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum MemoryUnit { + Ki, + Mi, + Gi, + Ti, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum CpuUnit { + // 1 = 1 core, m = millicore + Core, + Milli, +} + #[derive(Debug, Clone, Serialize)] pub struct AlertManagerAdditionalPromRules { #[serde(flatten)] From 7de9860dcf99acea4327a89a168b82e34c99a3c1 Mon Sep 17 00:00:00 2001 From: Willem Date: Wed, 2 Jul 2025 11:13:03 -0400 Subject: [PATCH 3/5] refactor: monitoring takes namespace from tenant --- Cargo.lock | 11 + examples/monitoring_with_tenant/src/main.rs | 7 +- harmony/src/domain/topology/tenant/k8s.rs | 132 +++++++----- .../monitoring/kube_prometheus/helm/config.rs | 18 +- .../helm/kube_prometheus_helm_chart.rs | 195 +++++++++++++----- .../monitoring/kube_prometheus/helm/mod.rs | 1 + .../monitoring/kube_prometheus/helm/types.rs | 142 +++++++++++++ .../monitoring/kube_prometheus/types.rs | 47 ++--- 8 files changed, 408 insertions(+), 145 deletions(-) create mode 100644 harmony/src/modules/monitoring/kube_prometheus/helm/types.rs diff --git a/Cargo.lock b/Cargo.lock index 18f8abf..4f0e1d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1164,6 +1164,17 @@ dependencies = [ "url", ] +[[package]] +name = "example-monitoring-with-tenant" +version = "0.1.0" +dependencies = [ + "cidr", + "harmony", + "harmony_cli", + "tokio", + "url", +] + [[package]] name = "example-nanodc" version = "0.1.0" diff --git a/examples/monitoring_with_tenant/src/main.rs b/examples/monitoring_with_tenant/src/main.rs index eb15100..080cea7 100644 --- a/examples/monitoring_with_tenant/src/main.rs +++ b/examples/monitoring_with_tenant/src/main.rs @@ -1,4 +1,3 @@ -use cidr::Ipv4Cidr; use harmony::{ data::Id, inventory::Inventory, @@ -14,11 +13,9 @@ use harmony::{ }, topology::{ K8sAnywhereTopology, Url, - tenant::{InternetEgressPolicy, ResourceLimits, TenantConfig, TenantNetworkPolicy}, + tenant::{ResourceLimits, TenantConfig, TenantNetworkPolicy}, }, }; -use std::net::Ipv4Addr; -use std::str::FromStr; #[tokio::main] async fn main() { @@ -27,7 +24,7 @@ async fn main() { id: Id::from_string("1234".to_string()), name: "test-tenant".to_string(), resource_limits: ResourceLimits { - cpu_request_cores: 4.0, + cpu_request_cores: 6.0, cpu_limit_cores: 4.0, memory_request_gb: 4.0, memory_limit_gb: 4.0, diff --git a/harmony/src/domain/topology/tenant/k8s.rs b/harmony/src/domain/topology/tenant/k8s.rs index 4e603e8..d0b3345 100644 --- a/harmony/src/domain/topology/tenant/k8s.rs +++ b/harmony/src/domain/topology/tenant/k8s.rs @@ -112,8 +112,8 @@ impl K8sTenantManager { "requests.storage": format!("{:.3}Gi", config.resource_limits.storage_total_gb), "pods": "20", "services": "10", - "configmaps": "30", - "secrets": "30", + "configmaps": "60", + "secrets": "60", "persistentvolumeclaims": "15", "services.loadbalancers": "2", "services.nodeports": "5", @@ -137,65 +137,99 @@ impl K8sTenantManager { "apiVersion": "networking.k8s.io/v1", "kind": "NetworkPolicy", "metadata": { - "name": format!("{}-network-policy", config.name), + "name": format!("{}-network-policy", config.name) }, "spec": { "podSelector": {}, "egress": [ - { "to": [ {"podSelector": {}}]}, - { "to": - [ - { - "podSelector": {}, - "namespaceSelector": { - "matchLabels": { - "kubernetes.io/metadata.name":"openshift-dns" - } - } - }, - ] - }, - { "to": [ - { - "ipBlock": { - - "cidr": "0.0.0.0/0", - // See https://en.wikipedia.org/wiki/Reserved_IP_addresses - "except": [ - "10.0.0.0/8", - "172.16.0.0/12", - "192.168.0.0/16", - "192.0.0.0/24", - "192.0.2.0/24", - "192.88.99.0/24", - "192.18.0.0/15", - "198.51.100.0/24", - "169.254.0.0/16", - "203.0.113.0/24", - "127.0.0.0/8", - - // Not sure we should block this one as it is - // used for multicast. But better block more than less. - "224.0.0.0/4", - "240.0.0.0/4", - "100.64.0.0/10", - "233.252.0.0/24", - "0.0.0.0/8", - ], - } + { + "to": [ + { "podSelector": {} } + ] + }, + { + "to": [ + { + "podSelector": {}, + "namespaceSelector": { + "matchLabels": { + "kubernetes.io/metadata.name": "kube-system" } - ] - }, + } + } + ] + }, + { + "to": [ + { + "podSelector": {}, + "namespaceSelector": { + "matchLabels": { + "kubernetes.io/metadata.name": "openshift-dns" + } + } + } + ] + }, + { + "to": [ + { + "ipBlock": { + "cidr": "10.43.0.1/32", + } + } + ] + }, + { + "to": [ + { + "ipBlock": { + "cidr": "172.23.0.0/16", + } + } + ] + }, + { + "to": [ + { + "ipBlock": { + "cidr": "0.0.0.0/0", + "except": [ + "10.0.0.0/8", + "172.16.0.0/12", + "192.168.0.0/16", + "192.0.0.0/24", + "192.0.2.0/24", + "192.88.99.0/24", + "192.18.0.0/15", + "198.51.100.0/24", + "169.254.0.0/16", + "203.0.113.0/24", + "127.0.0.0/8", + "224.0.0.0/4", + "240.0.0.0/4", + "100.64.0.0/10", + "233.252.0.0/24", + "0.0.0.0/8" + ] + } + } + ] + } ], "ingress": [ - { "from": [ {"podSelector": {}}]} + { + "from": [ + { "podSelector": {} } + ] + } ], "policyTypes": [ - "Ingress", "Egress", + "Ingress", + "Egress" ] } }); - let mut network_policy: NetworkPolicy = serde_json::from_value(network_policy).map_err(|e| { ExecutorError::ConfigurationError(format!( diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs index 6c56676..62fd164 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs @@ -31,21 +31,21 @@ impl KubePrometheusConfig { Self { namespace: None, default_rules: true, - windows_monitoring: false, + windows_monitoring: true, alert_manager: true, grafana: true, - node_exporter: false, + node_exporter: true, prometheus: true, kubernetes_service_monitors: true, - kubernetes_api_server: false, - kubelet: false, - kube_controller_manager: false, - kube_etcd: false, - kube_proxy: false, + kubernetes_api_server: true, + kubelet: true, + kube_controller_manager: true, + kube_etcd: true, + kube_proxy: true, kube_state_metrics: true, prometheus_operator: true, - core_dns: false, - kube_scheduler: false, + core_dns: true, + kube_scheduler: true, alert_receiver_configs: vec![], alert_rules: vec![], } diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs index 7b7b993..5371080 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs @@ -12,8 +12,8 @@ use crate::modules::{ helm::chart::HelmChartScore, monitoring::kube_prometheus::types::{ AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig, - AlertManagerRoute, AlertManagerSpec, AlertManagerValues, Cpu, CpuUnit, Limits, Memory, - MemoryUnit, Requests, Resources, + AlertManagerRoute, AlertManagerSpec, AlertManagerValues, ConfigReloader, Limits, Requests, + Resources, }, }; @@ -39,24 +39,12 @@ pub fn kube_prometheus_helm_chart_score( let prometheus = config.prometheus.to_string(); let resource_limit = Resources { limits: Limits { - memory: Memory { - value: 100, - unit: MemoryUnit::Mi, - }, - cpu: Cpu { - value: 100, - unit: CpuUnit::Milli, - }, + memory: "100Mi".to_string(), + cpu: "100m".to_string(), }, requests: Requests { - memory: Memory { - value: 100, - unit: MemoryUnit::Mi, - }, - cpu: Cpu { - value: 100, - unit: CpuUnit::Milli, - }, + memory: "100Mi".to_string(), + cpu: "100m".to_string(), }, }; @@ -83,7 +71,13 @@ pub fn kube_prometheus_helm_chart_score( prometheus: enabled: {prometheus} prometheusSpec: - {resource_section} + resources: + requests: + cpu: 100m + memory: 500Mi + limits: + cpu: 200m + memory: 1000Mi defaultRules: create: {default_rules} rules: @@ -123,42 +117,147 @@ defaultRules: windows: true windowsMonitoring: enabled: {windows_monitoring} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi grafana: enabled: {grafana} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi + initChownData: + resources: + requests: + cpu: 10m + memory: 50Mi + limits: + cpu: 50m + memory: 100Mi + sidecar: + resources: + requests: + cpu: 10m + memory: 50Mi + limits: + cpu: 50m + memory: 100Mi kubernetesServiceMonitors: enabled: {kubernetes_service_monitors} -{resource_section} kubeApiServer: enabled: {kubernetes_api_server} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi kubelet: enabled: {kubelet} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi kubeControllerManager: enabled: {kube_controller_manager} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi coreDns: enabled: {core_dns} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi kubeEtcd: enabled: {kube_etcd} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi kubeScheduler: enabled: {kube_scheduler} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi kubeProxy: enabled: {kube_proxy} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi kubeStateMetrics: enabled: {kube_state_metrics} -{resource_section} +kube-state-metrics: + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi nodeExporter: enabled: {node_exporter} -{resource_section} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi +prometheus-node-exporter: + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 200m + memory: 250Mi prometheusOperator: enabled: {prometheus_operator} + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 100m + memory: 200Mi + prometheusConfigReloader: + resources: + requests: + cpu: 100m + memory: 150Mi + limits: + cpu: 100m + memory: 200Mi admissionWebhooks: deployment: resources: @@ -218,27 +317,27 @@ prometheusOperator: alertmanager: AlertManager { enabled: config.alert_manager, config: alert_manager_channel_config, - alertManagerSpec: AlertManagerSpec { + alertmanager_spec: AlertManagerSpec { resources: Resources { limits: Limits { - memory: Memory { - value: 100, - unit: MemoryUnit::Mi, - }, - cpu: Cpu { - value: 100, - unit: CpuUnit::Milli, - }, + memory: "100Mi".to_string(), + cpu: "100m".to_string(), }, requests: Requests { - memory: Memory { - value: 100, - unit: MemoryUnit::Mi, - }, - cpu: Cpu { - value: 100, - unit: CpuUnit::Milli, - }, + memory: "100Mi".to_string(), + cpu: "100m".to_string(), + }, + }, + }, + init_config_reloader: ConfigReloader { + resources: Resources { + limits: Limits { + memory: "100Mi".to_string(), + cpu: "100m".to_string(), + }, + requests: Requests { + memory: "100Mi".to_string(), + cpu: "100m".to_string(), }, }, }, diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/mod.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/mod.rs index 4b07750..4184f48 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/mod.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/mod.rs @@ -1,2 +1,3 @@ pub mod config; pub mod kube_prometheus_helm_chart; +pub mod types; diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/types.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/types.rs new file mode 100644 index 0000000..ed59296 --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/types.rs @@ -0,0 +1,142 @@ +// // in your build_score function... +// +// // --- Step 1: Define the structs that match the ENTIRE values.yaml structure --- +// +// #[derive(Serialize, Debug)] +// #[serde(rename_all = "camelCase")] +// struct FullValues { +// // Top-level keys for each component +// prometheus: Prometheus, +// grafana: Grafana, +// alertmanager: Alertmanager, +// kube_state_metrics: KubeStateMetrics, +// prometheus_operator: PrometheusOperator, +// // Add other components like nodeExporter if you enable them +// +// // Key for your custom rules +// additional_prometheus_rules_map: AlertManagerAdditionalPromRules, +// } +// +// #[derive(Serialize, Debug)] +// #[serde(rename_all = "camelCase")] +// struct Prometheus { +// enabled: bool, +// prometheus_spec: PrometheusSpec, +// } +// +// #[derive(Serialize, Debug)] +// #[serde(rename_all = "camelCase")] +// struct PrometheusSpec { +// resources: K8sResources, +// } +// +// #[derive(Serialize, Debug)] +// #[serde(rename_all = "camelCase")] +// struct Grafana { +// enabled: bool, +// resources: K8sResources, +// sidecar: GrafanaSidecar, +// } +// +// #[derive(Serialize, Debug)] +// #[serde(rename_all = "camelCase")] +// struct GrafanaSidecar { +// resources: K8sResources, +// } +// +// #[derive(Serialize, Debug)] +// #[serde(rename_all = "camelCase")] +// struct Alertmanager { +// enabled: bool, +// config: AlertManagerConfig, // Your existing struct for this +// alert_manager_spec: AlertManagerSpec, +// } +// +// #[derive(Serialize, Debug)] +// #[serde(rename_all = "camelCase")] +// struct AlertManagerSpec { +// resources: K8sResources, +// // You will need to add a `config_reloader` field here for its resources +// } +// +// // Define KubeStateMetrics, PrometheusOperator, etc. in the same way +// // ... +// +// // Your K8sResources struct (flat, with strings) +// #[derive(Serialize, Debug)] +// struct K8sResources { +// requests: ResourceValues, +// limits: ResourceValues, +// } +// +// #[derive(Serialize, Debug)] +// struct ResourceValues { +// cpu: String, +// memory: String, +// } +// +// +// // --- Step 2: Populate the single, unified struct --- +// +// // Prepare your alertmanager config +// let mut alert_manager_channel_config = build_your_alert_manager_config(); // Your existing logic +// +// // Prepare your custom rules +// let merged_rules = build_your_prometheus_rules(); // Your existing logic +// +// // Define the resource profiles +// let heavy_res = K8sResources { /* ... */ }; +// let medium_res = K8sResources { /* ... */ }; +// let light_res = K8sResources { /* ... */ }; +// +// // Create the single source of truth for your values +// let full_values = FullValues { +// prometheus: Prometheus { +// enabled: config.prometheus, +// prometheus_spec: PrometheusSpec { +// resources: heavy_res, +// }, +// }, +// grafana: Grafana { +// enabled: config.grafana, +// resources: medium_res, +// sidecar: GrafanaSidecar { +// resources: light_res, +// }, +// }, +// alertmanager: Alertmanager { +// enabled: config.alert_manager, +// config: alert_manager_channel_config, +// alert_manager_spec: AlertManagerSpec { +// resources: light_res, +// // You'd add the config_reloader resources here +// }, +// }, +// kube_state_metrics: KubeStateMetrics { +// enabled: config.kube_state_metrics, +// resources: medium_res, +// }, +// prometheus_operator: PrometheusOperator { +// enabled: config.prometheus_operator, +// resources: light_res, +// // ... and so on for its sidecars +// }, +// additional_prometheus_rules_map: merged_rules, +// }; +// +// +// // --- Step 3: Serialize the single struct ONCE --- +// +// let final_values_yaml = serde_yaml::to_string(&full_values) +// .expect("Failed to serialize final values YAML"); +// +// debug!("full values.yaml: \n {:#}", final_values_yaml); +// +// +// // --- Step 4: Use the final string in your Helm score --- +// +// HelmChartScore { +// // ... +// values_yaml: Some(final_values_yaml), +// // ... +// } diff --git a/harmony/src/modules/monitoring/kube_prometheus/types.rs b/harmony/src/modules/monitoring/kube_prometheus/types.rs index e0ef5d8..3b6f0ff 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/types.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/types.rs @@ -16,10 +16,17 @@ pub struct AlertManagerValues { pub alertmanager: AlertManager, } #[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] pub struct AlertManager { pub enabled: bool, pub config: AlertManagerConfig, - pub alertManagerSpec: AlertManagerSpec, + pub alertmanager_spec: AlertManagerSpec, + pub init_config_reloader: ConfigReloader, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ConfigReloader { + pub resources: Resources, } #[derive(Debug, Clone, Serialize)] @@ -45,6 +52,7 @@ pub struct AlertManagerChannelConfig { } #[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] pub struct AlertManagerSpec { pub(crate) resources: Resources, } @@ -57,43 +65,14 @@ pub struct Resources { #[derive(Debug, Clone, Serialize)] pub struct Limits { - pub memory: Memory, - pub cpu: Cpu, + pub memory: String, + pub cpu: String, } #[derive(Debug, Clone, Serialize)] pub struct Requests { - pub memory: Memory, - pub cpu: Cpu, -} - -#[derive(Debug, Clone, Serialize)] -pub struct Memory { - pub value: u64, - pub unit: MemoryUnit, -} - -#[derive(Debug, Clone, Serialize)] -pub struct Cpu { - pub value: u64, - pub unit: CpuUnit, -} - -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "lowercase")] -pub enum MemoryUnit { - Ki, - Mi, - Gi, - Ti, -} - -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "lowercase")] -pub enum CpuUnit { - // 1 = 1 core, m = millicore - Core, - Milli, + pub memory: String, + pub cpu: String, } #[derive(Debug, Clone, Serialize)] From f2a350fae671ee6e02aa10acd8aafea68661cb3f Mon Sep 17 00:00:00 2001 From: Willem Date: Wed, 2 Jul 2025 13:35:20 -0400 Subject: [PATCH 4/5] fix: comments from pr --- .../monitoring/kube_prometheus/helm/config.rs | 16 +- .../monitoring/kube_prometheus/helm/mod.rs | 1 - .../monitoring/kube_prometheus/helm/types.rs | 142 ------------------ .../monitoring/kube_prometheus/prometheus.rs | 3 +- 4 files changed, 10 insertions(+), 152 deletions(-) delete mode 100644 harmony/src/modules/monitoring/kube_prometheus/helm/types.rs diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs index 62fd164..1784d6d 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs @@ -31,21 +31,21 @@ impl KubePrometheusConfig { Self { namespace: None, default_rules: true, - windows_monitoring: true, + windows_monitoring: false, alert_manager: true, grafana: true, - node_exporter: true, + node_exporter: false, prometheus: true, kubernetes_service_monitors: true, - kubernetes_api_server: true, + kubernetes_api_server: false, kubelet: true, - kube_controller_manager: true, - kube_etcd: true, - kube_proxy: true, + kube_controller_manager: false, + kube_etcd: false, + kube_proxy: false, kube_state_metrics: true, prometheus_operator: true, - core_dns: true, - kube_scheduler: true, + core_dns: false, + kube_scheduler: false, alert_receiver_configs: vec![], alert_rules: vec![], } diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/mod.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/mod.rs index 4184f48..4b07750 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/mod.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/mod.rs @@ -1,3 +1,2 @@ pub mod config; pub mod kube_prometheus_helm_chart; -pub mod types; diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/types.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/types.rs deleted file mode 100644 index ed59296..0000000 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/types.rs +++ /dev/null @@ -1,142 +0,0 @@ -// // in your build_score function... -// -// // --- Step 1: Define the structs that match the ENTIRE values.yaml structure --- -// -// #[derive(Serialize, Debug)] -// #[serde(rename_all = "camelCase")] -// struct FullValues { -// // Top-level keys for each component -// prometheus: Prometheus, -// grafana: Grafana, -// alertmanager: Alertmanager, -// kube_state_metrics: KubeStateMetrics, -// prometheus_operator: PrometheusOperator, -// // Add other components like nodeExporter if you enable them -// -// // Key for your custom rules -// additional_prometheus_rules_map: AlertManagerAdditionalPromRules, -// } -// -// #[derive(Serialize, Debug)] -// #[serde(rename_all = "camelCase")] -// struct Prometheus { -// enabled: bool, -// prometheus_spec: PrometheusSpec, -// } -// -// #[derive(Serialize, Debug)] -// #[serde(rename_all = "camelCase")] -// struct PrometheusSpec { -// resources: K8sResources, -// } -// -// #[derive(Serialize, Debug)] -// #[serde(rename_all = "camelCase")] -// struct Grafana { -// enabled: bool, -// resources: K8sResources, -// sidecar: GrafanaSidecar, -// } -// -// #[derive(Serialize, Debug)] -// #[serde(rename_all = "camelCase")] -// struct GrafanaSidecar { -// resources: K8sResources, -// } -// -// #[derive(Serialize, Debug)] -// #[serde(rename_all = "camelCase")] -// struct Alertmanager { -// enabled: bool, -// config: AlertManagerConfig, // Your existing struct for this -// alert_manager_spec: AlertManagerSpec, -// } -// -// #[derive(Serialize, Debug)] -// #[serde(rename_all = "camelCase")] -// struct AlertManagerSpec { -// resources: K8sResources, -// // You will need to add a `config_reloader` field here for its resources -// } -// -// // Define KubeStateMetrics, PrometheusOperator, etc. in the same way -// // ... -// -// // Your K8sResources struct (flat, with strings) -// #[derive(Serialize, Debug)] -// struct K8sResources { -// requests: ResourceValues, -// limits: ResourceValues, -// } -// -// #[derive(Serialize, Debug)] -// struct ResourceValues { -// cpu: String, -// memory: String, -// } -// -// -// // --- Step 2: Populate the single, unified struct --- -// -// // Prepare your alertmanager config -// let mut alert_manager_channel_config = build_your_alert_manager_config(); // Your existing logic -// -// // Prepare your custom rules -// let merged_rules = build_your_prometheus_rules(); // Your existing logic -// -// // Define the resource profiles -// let heavy_res = K8sResources { /* ... */ }; -// let medium_res = K8sResources { /* ... */ }; -// let light_res = K8sResources { /* ... */ }; -// -// // Create the single source of truth for your values -// let full_values = FullValues { -// prometheus: Prometheus { -// enabled: config.prometheus, -// prometheus_spec: PrometheusSpec { -// resources: heavy_res, -// }, -// }, -// grafana: Grafana { -// enabled: config.grafana, -// resources: medium_res, -// sidecar: GrafanaSidecar { -// resources: light_res, -// }, -// }, -// alertmanager: Alertmanager { -// enabled: config.alert_manager, -// config: alert_manager_channel_config, -// alert_manager_spec: AlertManagerSpec { -// resources: light_res, -// // You'd add the config_reloader resources here -// }, -// }, -// kube_state_metrics: KubeStateMetrics { -// enabled: config.kube_state_metrics, -// resources: medium_res, -// }, -// prometheus_operator: PrometheusOperator { -// enabled: config.prometheus_operator, -// resources: light_res, -// // ... and so on for its sidecars -// }, -// additional_prometheus_rules_map: merged_rules, -// }; -// -// -// // --- Step 3: Serialize the single struct ONCE --- -// -// let final_values_yaml = serde_yaml::to_string(&full_values) -// .expect("Failed to serialize final values YAML"); -// -// debug!("full values.yaml: \n {:#}", final_values_yaml); -// -// -// // --- Step 4: Use the final string in your Helm score --- -// -// HelmChartScore { -// // ... -// values_yaml: Some(final_values_yaml), -// // ... -// } diff --git a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs index 57e72f9..0216957 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs @@ -1,7 +1,7 @@ use std::sync::{Arc, Mutex}; use async_trait::async_trait; -use log::debug; +use log::{debug, error}; use serde::Serialize; use crate::{ @@ -67,6 +67,7 @@ impl Prometheus { .get_tenant_config() .map(|cfg| cfg.name.clone()) .unwrap_or_else(|| "monitoring".to_string()); + error!("This must be refactored, see comments in pr #74"); debug!("NS: {}", ns); self.config.lock().unwrap().namespace = Some(ns); } From 3eecc2f5901beaea484d78c383a13116a84cab80 Mon Sep 17 00:00:00 2001 From: Willem Date: Wed, 2 Jul 2025 15:50:13 -0400 Subject: [PATCH 5/5] fix: K8sTenantManager is responsible for concrete implementation. K8sAnywhere should delegate --- examples/monitoring/src/main.rs | 8 ++++- examples/monitoring_with_tenant/src/main.rs | 32 ++++++++++++++++++- harmony/src/domain/topology/installable.rs | 2 +- harmony/src/domain/topology/k8s_anywhere.rs | 19 ++++------- .../topology/oberservability/monitoring.rs | 2 +- harmony/src/domain/topology/tenant/k8s.rs | 24 +++++++++++--- harmony/src/domain/topology/tenant/manager.rs | 2 +- .../helm/kube_prometheus_helm_chart.rs | 4 ++- .../monitoring/kube_prometheus/prometheus.rs | 7 ++-- 9 files changed, 73 insertions(+), 27 deletions(-) diff --git a/examples/monitoring/src/main.rs b/examples/monitoring/src/main.rs index da9c1c1..989b1ec 100644 --- a/examples/monitoring/src/main.rs +++ b/examples/monitoring/src/main.rs @@ -7,7 +7,13 @@ use harmony::{ monitoring::{ alert_channel::discord_alert_channel::DiscordWebhook, alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, - kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore, + kube_prometheus::{ + helm_prometheus_alert_score::HelmPrometheusAlertingScore, + types::{ + HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor, + ServiceMonitorEndpoint, + }, + }, }, prometheus::alerts::{ infra::dell_server::{ diff --git a/examples/monitoring_with_tenant/src/main.rs b/examples/monitoring_with_tenant/src/main.rs index 080cea7..ec80542 100644 --- a/examples/monitoring_with_tenant/src/main.rs +++ b/examples/monitoring_with_tenant/src/main.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use harmony::{ data::Id, inventory::Inventory, @@ -6,7 +8,13 @@ use harmony::{ monitoring::{ alert_channel::discord_alert_channel::DiscordWebhook, alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, - kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore, + kube_prometheus::{ + helm_prometheus_alert_score::HelmPrometheusAlertingScore, + types::{ + HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor, + ServiceMonitorEndpoint, + }, + }, }, prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days, tenant::TenantScore, @@ -44,9 +52,31 @@ async fn main() { let additional_rules = AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]); + let service_monitor_endpoint = ServiceMonitorEndpoint { + port: Some("80".to_string()), + path: "/metrics".to_string(), + scheme: HTTPScheme::HTTP, + ..Default::default() + }; + + let service_monitor = ServiceMonitor { + name: "test-service-monitor".to_string(), + selector: Selector { + match_labels: HashMap::new(), + match_expressions: vec![MatchExpression { + key: "test".to_string(), + operator: Operator::In, + values: vec!["test-service".to_string()], + }], + }, + endpoints: vec![service_monitor_endpoint], + ..Default::default() + }; + let alerting_score = HelmPrometheusAlertingScore { receivers: vec![Box::new(discord_receiver)], rules: vec![Box::new(additional_rules)], + service_monitors: vec![service_monitor], }; let mut maestro = Maestro::::initialize( Inventory::autoload(), diff --git a/harmony/src/domain/topology/installable.rs b/harmony/src/domain/topology/installable.rs index 0e81448..72b7b31 100644 --- a/harmony/src/domain/topology/installable.rs +++ b/harmony/src/domain/topology/installable.rs @@ -4,7 +4,7 @@ use crate::{interpret::InterpretError, inventory::Inventory}; #[async_trait] pub trait Installable: Send + Sync { - fn configure(&self, inventory: &Inventory, topology: &T) -> Result<(), InterpretError>; + async fn configure(&self, inventory: &Inventory, topology: &T) -> Result<(), InterpretError>; async fn ensure_installed( &self, diff --git a/harmony/src/domain/topology/k8s_anywhere.rs b/harmony/src/domain/topology/k8s_anywhere.rs index cb5cd76..5eebd1d 100644 --- a/harmony/src/domain/topology/k8s_anywhere.rs +++ b/harmony/src/domain/topology/k8s_anywhere.rs @@ -39,7 +39,6 @@ pub struct K8sAnywhereTopology { k8s_state: Arc>>, tenant_manager: Arc>, config: Arc, - tenant_manager_config: OnceCell, } #[async_trait] @@ -74,7 +73,6 @@ impl K8sAnywhereTopology { k8s_state: Arc::new(OnceCell::new()), tenant_manager: Arc::new(OnceCell::new()), config: Arc::new(K8sAnywhereConfig::from_env()), - tenant_manager_config: OnceCell::new(), } } @@ -83,7 +81,6 @@ impl K8sAnywhereTopology { k8s_state: Arc::new(OnceCell::new()), tenant_manager: Arc::new(OnceCell::new()), config: Arc::new(config), - tenant_manager_config: OnceCell::new(), } } @@ -199,16 +196,10 @@ impl K8sAnywhereTopology { let k8s_client = self.k8s_client().await?; Ok(K8sTenantManager::new(k8s_client)) }) - .await - .unwrap(); + .await?; Ok(()) } - async fn store_tenant_config(&self, config: TenantConfig) { - self.tenant_manager_config - .get_or_init(|| async { config }) - .await; - } fn get_k8s_tenant_manager(&self) -> Result<&K8sTenantManager, ExecutorError> { match self.tenant_manager.get() { @@ -289,13 +280,15 @@ impl HelmCommand for K8sAnywhereTopology {} #[async_trait] impl TenantManager for K8sAnywhereTopology { async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> { - self.store_tenant_config(config.clone()).await; self.get_k8s_tenant_manager()? .provision_tenant(config) .await } - fn get_tenant_config(&self) -> Option { - self.tenant_manager_config.get().cloned() + async fn get_tenant_config(&self) -> Option { + self.get_k8s_tenant_manager() + .ok()? + .get_tenant_config() + .await } } diff --git a/harmony/src/domain/topology/oberservability/monitoring.rs b/harmony/src/domain/topology/oberservability/monitoring.rs index f65e159..6d60c7a 100644 --- a/harmony/src/domain/topology/oberservability/monitoring.rs +++ b/harmony/src/domain/topology/oberservability/monitoring.rs @@ -27,7 +27,7 @@ impl, T: Topology> Interpret for AlertingInte inventory: &Inventory, topology: &T, ) -> Result { - self.sender.configure(inventory, topology)?; + self.sender.configure(inventory, topology).await?; for receiver in self.receivers.iter() { receiver.install(&self.sender).await?; } diff --git a/harmony/src/domain/topology/tenant/k8s.rs b/harmony/src/domain/topology/tenant/k8s.rs index 36cf9f0..723c0d9 100644 --- a/harmony/src/domain/topology/tenant/k8s.rs +++ b/harmony/src/domain/topology/tenant/k8s.rs @@ -5,7 +5,6 @@ use crate::{ topology::k8s::{ApplyStrategy, K8sClient}, }; use async_trait::async_trait; -use derive_new::new; use k8s_openapi::{ api::{ core::v1::{LimitRange, Namespace, ResourceQuota}, @@ -19,12 +18,23 @@ use kube::Resource; use log::{debug, info, warn}; use serde::de::DeserializeOwned; use serde_json::json; +use tokio::sync::OnceCell; use super::{TenantConfig, TenantManager}; -#[derive(new, Clone, Debug)] +#[derive(Clone, Debug)] pub struct K8sTenantManager { k8s_client: Arc, + k8s_tenant_config: Arc>, +} + +impl K8sTenantManager { + pub fn new(client: Arc) -> Self { + Self { + k8s_client: client, + k8s_tenant_config: Arc::new(OnceCell::new()), + } + } } impl K8sTenantManager { @@ -147,7 +157,7 @@ impl K8sTenantManager { "spec": { "limits": [ { - "type": "Container", + "type": "Container", "default": { "cpu": "500m", "memory": "500Mi" @@ -391,6 +401,9 @@ impl K8sTenantManager { Ok(network_policy) } + fn store_config(&self, config: &TenantConfig) { + let _ = self.k8s_tenant_config.set(config.clone()); + } } #[async_trait] @@ -419,9 +432,10 @@ impl TenantManager for K8sTenantManager { "Success provisionning K8s tenant id {} name {}", config.id, config.name ); + self.store_config(config); Ok(()) } - fn get_tenant_config(&self) -> Option { - todo!() + async fn get_tenant_config(&self) -> Option { + self.k8s_tenant_config.get().cloned() } } diff --git a/harmony/src/domain/topology/tenant/manager.rs b/harmony/src/domain/topology/tenant/manager.rs index 0e0d426..d7c75ce 100644 --- a/harmony/src/domain/topology/tenant/manager.rs +++ b/harmony/src/domain/topology/tenant/manager.rs @@ -16,5 +16,5 @@ pub trait TenantManager { /// * `config`: The desired configuration for the new tenant. async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError>; - fn get_tenant_config(&self) -> Option; + async fn get_tenant_config(&self) -> Option; } diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs index 6158f5f..3fd773f 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs @@ -11,7 +11,9 @@ use std::{ use crate::modules::{ helm::chart::HelmChartScore, monitoring::kube_prometheus::types::{ - AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig, AlertManagerRoute, AlertManagerSpec, AlertManagerValues, ConfigReloader, Limits, PrometheusConfig, Requests, Resources + AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig, + AlertManagerRoute, AlertManagerSpec, AlertManagerValues, ConfigReloader, Limits, + PrometheusConfig, Requests, Resources, }, }; diff --git a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs b/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs index 0216957..fdf2057 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/prometheus.rs @@ -35,8 +35,8 @@ impl AlertSender for Prometheus { #[async_trait] impl Installable for Prometheus { - fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> { - self.configure_with_topology(topology); + async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> { + self.configure_with_topology(topology).await; Ok(()) } @@ -62,9 +62,10 @@ impl Prometheus { } } - pub fn configure_with_topology(&self, topology: &T) { + pub async fn configure_with_topology(&self, topology: &T) { let ns = topology .get_tenant_config() + .await .map(|cfg| cfg.name.clone()) .unwrap_or_else(|| "monitoring".to_string()); error!("This must be refactored, see comments in pr #74");