From 2c208df1434b2c5b7e1f038f88644c30c941a288 Mon Sep 17 00:00:00 2001 From: Willem Date: Mon, 7 Jul 2025 13:24:21 -0400 Subject: [PATCH] fix: deploys by default in the application name namespace --- .../application/features/monitoring.rs | 17 +++++----- .../k8s_application_monitoring_score.rs | 8 ++++- .../monitoring/grafana/helm/helm_grafana.rs | 31 +++++++++++++++++-- .../prometheus/helm/prometheus_helm.rs | 12 ++++--- .../monitoring/prometheus/prometheus.rs | 13 ++++---- 5 files changed, 58 insertions(+), 23 deletions(-) diff --git a/harmony/src/modules/application/features/monitoring.rs b/harmony/src/modules/application/features/monitoring.rs index 0a8d421..dd16226 100644 --- a/harmony/src/modules/application/features/monitoring.rs +++ b/harmony/src/modules/application/features/monitoring.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use async_trait::async_trait; use log::info; @@ -7,29 +9,30 @@ use crate::{ application::{Application, ApplicationFeature}, monitoring::{ application_monitoring::k8s_application_monitoring_score::ApplicationPrometheusMonitoringScore, - kube_prometheus::{ - helm_prometheus_alert_score::HelmPrometheusAlertingScore, - types::{NamespaceSelector, ServiceMonitor}, - }, + kube_prometheus::types::{NamespaceSelector, ServiceMonitor}, }, }, score::Score, topology::{HelmCommand, Topology, tenant::TenantManager}, }; -#[derive(Debug, Default, Clone)] -pub struct Monitoring {} +#[derive(Debug, Clone)] +pub struct Monitoring { + pub application: Arc, +} #[async_trait] impl ApplicationFeature for Monitoring { async fn ensure_installed(&self, topology: &T) -> Result<(), String> { info!("Ensuring monitoring is available for application"); + let ns = self.application.name(); let mut service_monitor = ServiceMonitor::default(); service_monitor.namespace_selector = Some(NamespaceSelector { any: true, - match_names: vec![], + match_names: vec![ns.clone()], }); let alerting_score = ApplicationPrometheusMonitoringScore { + namespace: ns, receivers: vec![], rules: vec![], service_monitors: vec![service_monitor], diff --git a/harmony/src/modules/monitoring/application_monitoring/k8s_application_monitoring_score.rs b/harmony/src/modules/monitoring/application_monitoring/k8s_application_monitoring_score.rs index 29e2893..dc0b9d7 100644 --- a/harmony/src/modules/monitoring/application_monitoring/k8s_application_monitoring_score.rs +++ b/harmony/src/modules/monitoring/application_monitoring/k8s_application_monitoring_score.rs @@ -1,5 +1,6 @@ use std::sync::{Arc, Mutex}; +use log::debug; use serde::Serialize; use crate::{ @@ -17,6 +18,7 @@ use crate::{ #[derive(Clone, Debug, Serialize)] pub struct ApplicationPrometheusMonitoringScore { + pub namespace: String, pub receivers: Vec>>, pub rules: Vec>>, pub service_monitors: Vec, @@ -29,8 +31,12 @@ impl Score for ApplicationPromethe .try_lock() .expect("couldn't lock config") .additional_service_monitors = self.service_monitors.clone(); + let ns = self.namespace.clone(); + + config.try_lock().expect("couldn't lock config").namespace = Some(ns.clone()); + debug!("set namespace to {}", ns); Box::new(AlertingInterpret { - sender: Prometheus::new(), + sender: Prometheus { config }, receivers: self.receivers.clone(), rules: self.rules.clone(), }) diff --git a/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs b/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs index c53fe3f..7e24747 100644 --- a/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs +++ b/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs @@ -8,9 +8,34 @@ pub fn grafana_helm_chart_score(ns: &str) -> HelmChartScore { r#" rbac: namespaced: true -sidecar: +datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus-server.{ns}.svc.cluster.local + isDefault: true +downloadDashboards: dashboards: - enabled: true + - url: https://raw.githubusercontent.com/grafana/grafana/main/devenv/dev-dashboards/node-exporter-full_rev1.json + file: node-exporter-full.json + + - url: https://grafana.com/api/dashboards/7685/revisions/1/download + file: kubernetes-pvs-usage.json + + # Namespace resource usage vs quotas + - url: https://grafana.com/api/dashboards/17044/revisions/1/download + file: namespace-resources-vs-quotas.json + + # Kubernetes namespace resources (CPU, RAM, network) + - url: https://grafana.com/api/dashboards/9809/revisions/1/download + file: kubernetes-namespace-resources.json + + # Top 10 namespaces by memory usage + - url: https://grafana.com/api/dashboards/10678/revisions/1/download + file: top10-namespace-memory.json "# ); @@ -22,7 +47,7 @@ sidecar: values_overrides: None, values_yaml: Some(values.to_string()), create_namespace: true, - install_only: true, + install_only: false, repository: None, } } diff --git a/harmony/src/modules/monitoring/prometheus/helm/prometheus_helm.rs b/harmony/src/modules/monitoring/prometheus/helm/prometheus_helm.rs index 611c500..8dd8087 100644 --- a/harmony/src/modules/monitoring/prometheus/helm/prometheus_helm.rs +++ b/harmony/src/modules/monitoring/prometheus/helm/prometheus_helm.rs @@ -12,21 +12,23 @@ pub fn prometheus_helm_chart_score(config: Arc>) -> Helm let ns = config.namespace.clone().unwrap(); let values = format!( r#" +releaseNamespace: true rbac: create: true + namespaced: true kube-state-metrics: enabled: false -nodeExporter: +prometheus-node-exporter: enabled: false alertmanager: - enabled: false + enabled: true pushgateway: enabled: false server: serviceAccount: - create: false - rbac: create: true + rbac: + create: false fullnameOverride: prometheus-{ns} "# ); @@ -41,7 +43,7 @@ fullnameOverride: prometheus-{ns} values_overrides: None, values_yaml: Some(values.to_string()), create_namespace: true, - install_only: true, + install_only: false, repository: None, } } diff --git a/harmony/src/modules/monitoring/prometheus/prometheus.rs b/harmony/src/modules/monitoring/prometheus/prometheus.rs index 4e6a981..da955a3 100644 --- a/harmony/src/modules/monitoring/prometheus/prometheus.rs +++ b/harmony/src/modules/monitoring/prometheus/prometheus.rs @@ -44,14 +44,13 @@ impl Prometheus { } } pub async fn configure_with_topology(&self, topology: &T) { - let ns = topology - .get_tenant_config() - .await - .map(|cfg| cfg.name.clone()) - .unwrap_or_else(|| "monitoring".to_string()); + if let Some(cfg) = topology.get_tenant_config().await { + debug!("Overriding namespace with tenant config: {}", cfg.name); + self.config.lock().unwrap().namespace = Some(cfg.name.clone()); + } else { + debug!("No tenant config found; keeping existing namespace."); + } error!("This must be refactored, see comments in pr #74"); - debug!("NS: {}", ns); - self.config.lock().unwrap().namespace = Some(ns); } pub async fn install_receiver(