feat: add service monitors support to prom (#66)

Co-authored-by: tahahawa <tahahawa@gmail.com> Reviewed-on: #66 Co-authored-by: taha <taha@noreply.git.nationtech.io> Co-committed-by: taha <taha@noreply.git.nationtech.io>
2025-07-02 15:29:16 +00:00
parent 2ff3f4afa9
commit ab69a2c264
7 changed files with 252 additions and 6 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1160,6 +1160,7 @@ version = "0.1.0"
 dependencies = [
 "harmony",
 "harmony_cli",
+ "harmony_macros",
 "tokio",
 "url",
 ]
--- a/examples/monitoring/Cargo.toml
+++ b/examples/monitoring/Cargo.toml
@@ -8,5 +8,6 @@ license.workspace = true
 [dependencies]
 harmony = { version = "0.1.0", path = "../../harmony" }
 harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
+harmony_macros = { version = "0.1.0", path = "../../harmony_macros" }
 tokio.workspace = true
 url.workspace = true
--- a/examples/monitoring/src/main.rs
+++ b/examples/monitoring/src/main.rs
@@ -1,3 +1,5 @@
+use std::collections::HashMap;
+
 use harmony::{
    inventory::Inventory,
    maestro::Maestro,
@@ -41,9 +43,30 @@ async fn main() {
        ],
    );

+    let service_monitor_endpoint = ServiceMonitorEndpoint {
+        port: Some("80".to_string()),
+        path: "/metrics".to_string(),
+        scheme: HTTPScheme::HTTP,
+        ..Default::default()
+    };
+
+    let service_monitor = ServiceMonitor {
+        name: "test-service-monitor".to_string(),
+        selector: Selector {
+            match_labels: HashMap::new(),
+            match_expressions: vec![MatchExpression {
+                key: "test".to_string(),
+                operator: Operator::In,
+                values: vec!["test-service".to_string()],
+            }],
+        },
+        endpoints: vec![service_monitor_endpoint],
+        ..Default::default()
+    };
    let alerting_score = HelmPrometheusAlertingScore {
        receivers: vec![Box::new(discord_receiver)],
        rules: vec![Box::new(additional_rules), Box::new(additional_rules2)],
+        service_monitors: vec![service_monitor],
    };
    let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
        Inventory::autoload(),
--- a/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs
+++ b/harmony/src/modules/monitoring/kube_prometheus/helm/config.rs
@@ -26,6 +26,7 @@ pub struct KubePrometheusConfig {
    pub prometheus_operator: bool,
    pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
    pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
+    pub additional_service_monitors: Vec<ServiceMonitor>,
 }
 impl KubePrometheusConfig {
    pub fn new() -> Self {
@@ -49,6 +50,7 @@ impl KubePrometheusConfig {
            kube_scheduler: false,
            alert_receiver_configs: vec![],
            alert_rules: vec![],
+            additional_service_monitors: vec![],
        }
    }
 }
--- a/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs
+++ b/harmony/src/modules/monitoring/kube_prometheus/helm/kube_prometheus_helm_chart.rs
@@ -12,7 +12,7 @@ use crate::modules::{
    helm::chart::HelmChartScore,
    monitoring::kube_prometheus::types::{
        AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
-        AlertManagerRoute, AlertManagerValues,
+        AlertManagerRoute, AlertManagerValues, PrometheusConfig,
    },
 };

@@ -101,11 +101,26 @@ nodeExporter:
  enabled: {node_exporter}
 prometheusOperator:
  enabled: {prometheus_operator}
-prometheus:
-  enabled: {prometheus}
+
 "#,
    );

+    let prometheus_config =
+        crate::modules::monitoring::kube_prometheus::types::PrometheusConfigValues {
+            prometheus: PrometheusConfig {
+                prometheus: bool::from_str(prometheus.as_str()).expect("couldn't parse bool"),
+                additional_service_monitors: config.additional_service_monitors.clone(),
+            },
+        };
+    let prometheus_config_yaml =
+        serde_yaml::to_string(&prometheus_config).expect("Failed to serialize YAML");
+
+    debug!(
+        "serialized prometheus config: \n {:#}",
+        prometheus_config_yaml
+    );
+    values.push_str(&prometheus_config_yaml);
+
    // add required null receiver for prometheus alert manager
    let mut null_receiver = Mapping::new();
    null_receiver.insert(
--- a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs
+++ b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_alert_score.rs
@@ -4,6 +4,7 @@ use serde::Serialize;

 use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
 use crate::{
+    modules::monitoring::kube_prometheus::types::ServiceMonitor,
    score::Score,
    topology::{
        HelmCommand, Topology,
@@ -15,14 +16,18 @@ use crate::{
 pub struct HelmPrometheusAlertingScore {
    pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
    pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
+    pub service_monitors: Vec<ServiceMonitor>,
 }

 impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore {
    fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
+        let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
+        config
+            .try_lock()
+            .expect("couldn't lock config")
+            .additional_service_monitors = self.service_monitors.clone();
        Box::new(AlertingInterpret {
-            sender: Prometheus {
-                config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
-            },
+            sender: Prometheus { config },
            receivers: self.receivers.clone(),
            rules: self.rules.clone(),
        })
--- a/harmony/src/modules/monitoring/kube_prometheus/types.rs
+++ b/harmony/src/modules/monitoring/kube_prometheus/types.rs
@@ -53,3 +53,202 @@ pub struct AlertManagerAdditionalPromRules {
 pub struct AlertGroup {
    pub groups: Vec<AlertManagerRuleGroup>,
 }
+
+#[derive(Debug, Clone, Serialize)]
+pub enum HTTPScheme {
+    #[serde(rename = "http")]
+    HTTP,
+    #[serde(rename = "https")]
+    HTTPS,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub enum Operator {
+    In,
+    NotIn,
+    Exists,
+    DoesNotExist,
+}
+
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct PrometheusConfigValues {
+    pub prometheus: PrometheusConfig,
+}
+
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct PrometheusConfig {
+    pub prometheus: bool,
+    pub additional_service_monitors: Vec<ServiceMonitor>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ServiceMonitorTLSConfig {
+    // ## Path to the CA file
+    // ##
+    pub ca_file: Option<String>,
+
+    // ## Path to client certificate file
+    // ##
+    pub cert_file: Option<String>,
+
+    // ## Skip certificate verification
+    // ##
+    pub insecure_skip_verify: Option<bool>,
+
+    // ## Path to client key file
+    // ##
+    pub key_file: Option<String>,
+
+    // ## Server name used to verify host name
+    // ##
+    pub server_name: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ServiceMonitorEndpoint {
+    // ## Name of the endpoint's service port
+    // ## Mutually exclusive with targetPort
+    pub port: Option<String>,
+
+    // ## Name or number of the endpoint's target port
+    // ## Mutually exclusive with port
+    pub target_port: Option<String>,
+
+    // ## File containing bearer token to be used when scraping targets
+    // ##
+    pub bearer_token_file: Option<String>,
+
+    // ## Interval at which metrics should be scraped
+    // ##
+    pub interval: Option<String>,
+
+    // ## HTTP path to scrape for metrics
+    // ##
+    pub path: String,
+
+    // ## HTTP scheme to use for scraping
+    // ##
+    pub scheme: HTTPScheme,
+
+    // ## TLS configuration to use when scraping the endpoint
+    // ##
+    pub tls_config: Option<ServiceMonitorTLSConfig>,
+
+    // ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
+    // ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
+    // ##
+    // # - action: keep
+    // #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
+    // #   sourceLabels: [__name__]
+    pub metric_relabelings: Vec<Mapping>,
+
+    // ## RelabelConfigs to apply to samples before scraping
+    // ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
+    // ##
+    // # - sourceLabels: [__meta_kubernetes_pod_node_name]
+    // #   separator: ;
+    // #   regex: ^(.*)$
+    // #   targetLabel: nodename
+    // #   replacement: $1
+    // #   action: replace
+    pub relabelings: Vec<Mapping>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct MatchExpression {
+    pub key: String,
+    pub operator: Operator,
+    pub values: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Selector {
+    //   # label selector for services
+    pub match_labels: HashMap<String, String>,
+    pub match_expressions: Vec<MatchExpression>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ServiceMonitor {
+    pub name: String,
+
+    // # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
+    pub additional_labels: Option<Mapping>,
+
+    // # Service label for use in assembling a job name of the form <label value>-<port>
+    // # If no label is specified, the service name is used.
+    pub job_label: Option<String>,
+
+    // # labels to transfer from the kubernetes service to the target
+    pub target_labels: Vec<String>,
+
+    // # labels to transfer from the kubernetes pods to the target
+    pub pod_target_labels: Vec<String>,
+
+    // # Label selector for services to which this ServiceMonitor applies
+    //   # Example which selects all services to be monitored
+    //   # with label "monitoredby" with values any of "example-service-1" or "example-service-2"
+    //   matchExpressions:
+    //     - key: "monitoredby"
+    //       operator: In
+    //       values:
+    //         - example-service-1
+    //         - example-service-2
+    pub selector: Selector,
+
+    // # Namespaces from which services are selected
+    //   # Match any namespace
+    //   any: bool,
+    //   # Explicit list of namespace names to select
+    //   matchNames: Vec,
+    pub namespace_selector: Option<Mapping>,
+
+    // # Endpoints of the selected service to be monitored
+    pub endpoints: Vec<ServiceMonitorEndpoint>,
+
+    // # Fallback scrape protocol used by Prometheus for scraping metrics
+    // # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.ScrapeProtocol
+    pub fallback_scrape_protocol: Option<String>,
+}
+
+impl Default for ServiceMonitor {
+    fn default() -> Self {
+        Self {
+            name: Default::default(),
+            additional_labels: Default::default(),
+            job_label: Default::default(),
+            target_labels: Default::default(),
+            pod_target_labels: Default::default(),
+            selector: Selector {
+                match_labels: HashMap::new(),
+                match_expressions: vec![],
+            },
+            namespace_selector: Default::default(),
+            endpoints: Default::default(),
+            fallback_scrape_protocol: Default::default(),
+        }
+    }
+}
+
+impl Default for ServiceMonitorEndpoint {
+    fn default() -> Self {
+        Self {
+            port: Some("80".to_string()),
+            target_port: Default::default(),
+            bearer_token_file: Default::default(),
+            interval: Default::default(),
+            path: "/metrics".to_string(),
+            scheme: HTTPScheme::HTTP,
+            tls_config: Default::default(),
+            metric_relabelings: Default::default(),
+            relabelings: Default::default(),
+        }
+    }
+}