feat: add service monitors support to prom #66

Merged
johnride merged 9 commits from monitoring_servicemonitor into master 2025-07-02 15:29:23 +00:00
7 changed files with 252 additions and 6 deletions

1
Cargo.lock generated
View File

@ -1160,6 +1160,7 @@ version = "0.1.0"
dependencies = [
"harmony",
"harmony_cli",
"harmony_macros",
"tokio",
"url",
]

View File

@ -8,5 +8,6 @@ license.workspace = true
[dependencies]
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
harmony_macros = { version = "0.1.0", path = "../../harmony_macros" }
tokio.workspace = true
url.workspace = true

View File

@ -1,3 +1,5 @@
use std::collections::HashMap;
use harmony::{
inventory::Inventory,
maestro::Maestro,
@ -41,9 +43,30 @@ async fn main() {
],
);
let service_monitor_endpoint = ServiceMonitorEndpoint {
port: Some("80".to_string()),
path: "/metrics".to_string(),
scheme: HTTPScheme::HTTP,
..Default::default()
};
let service_monitor = ServiceMonitor {
name: "test-service-monitor".to_string(),
selector: Selector {
match_labels: HashMap::new(),
match_expressions: vec![MatchExpression {
key: "test".to_string(),
operator: Operator::In,
values: vec!["test-service".to_string()],
}],
},
endpoints: vec![service_monitor_endpoint],
..Default::default()
};
let alerting_score = HelmPrometheusAlertingScore {
receivers: vec![Box::new(discord_receiver)],
rules: vec![Box::new(additional_rules), Box::new(additional_rules2)],
service_monitors: vec![service_monitor],
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),

View File

@ -26,6 +26,7 @@ pub struct KubePrometheusConfig {
pub prometheus_operator: bool,
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
pub additional_service_monitors: Vec<ServiceMonitor>,
}
impl KubePrometheusConfig {
pub fn new() -> Self {
@ -49,6 +50,7 @@ impl KubePrometheusConfig {
kube_scheduler: false,
alert_receiver_configs: vec![],
alert_rules: vec![],
additional_service_monitors: vec![],
}
}
}

View File

@ -12,7 +12,7 @@ use crate::modules::{
helm::chart::HelmChartScore,
monitoring::kube_prometheus::types::{
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
AlertManagerRoute, AlertManagerValues,
AlertManagerRoute, AlertManagerValues, PrometheusConfig,
},
};
@ -101,11 +101,26 @@ nodeExporter:
enabled: {node_exporter}
prometheusOperator:
enabled: {prometheus_operator}
prometheus:
enabled: {prometheus}
"#,
);
let prometheus_config =
crate::modules::monitoring::kube_prometheus::types::PrometheusConfigValues {
prometheus: PrometheusConfig {
prometheus: bool::from_str(prometheus.as_str()).expect("couldn't parse bool"),
additional_service_monitors: config.additional_service_monitors.clone(),
},
};
let prometheus_config_yaml =
serde_yaml::to_string(&prometheus_config).expect("Failed to serialize YAML");
debug!(
"serialized prometheus config: \n {:#}",
prometheus_config_yaml
);
values.push_str(&prometheus_config_yaml);
// add required null receiver for prometheus alert manager
let mut null_receiver = Mapping::new();
null_receiver.insert(

View File

@ -4,6 +4,7 @@ use serde::Serialize;
use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
use crate::{
modules::monitoring::kube_prometheus::types::ServiceMonitor,
score::Score,
topology::{
HelmCommand, Topology,
@ -15,14 +16,18 @@ use crate::{
pub struct HelmPrometheusAlertingScore {
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
pub service_monitors: Vec<ServiceMonitor>,
}
impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
config
.try_lock()
.expect("couldn't lock config")
.additional_service_monitors = self.service_monitors.clone();
Box::new(AlertingInterpret {
sender: Prometheus {
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
},
sender: Prometheus { config },
receivers: self.receivers.clone(),
rules: self.rules.clone(),
})

View File

@ -53,3 +53,202 @@ pub struct AlertManagerAdditionalPromRules {
pub struct AlertGroup {
pub groups: Vec<AlertManagerRuleGroup>,
}
#[derive(Debug, Clone, Serialize)]
pub enum HTTPScheme {
#[serde(rename = "http")]
HTTP,
#[serde(rename = "https")]
HTTPS,
johnride marked this conversation as resolved Outdated

Not a String, should be a path type of some sort. I think we already handle this somewhere else correctly.

You might have to implement Serialize for it or wrap it into another type that Serializes as a String but that's ok.

Not a String, should be a path type of some sort. I think we already handle this somewhere else correctly. You might have to implement Serialize for it or wrap it into another type that Serializes as a String but that's ok.
}
#[derive(Debug, Clone, Serialize)]
pub enum Operator {
In,
NotIn,
Exists,
DoesNotExist,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct PrometheusConfigValues {
pub prometheus: PrometheusConfig,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct PrometheusConfig {
pub prometheus: bool,
pub additional_service_monitors: Vec<ServiceMonitor>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ServiceMonitorTLSConfig {
// ## Path to the CA file
// ##
pub ca_file: Option<String>,
// ## Path to client certificate file
// ##
pub cert_file: Option<String>,
// ## Skip certificate verification
// ##
pub insecure_skip_verify: Option<bool>,
// ## Path to client key file
// ##

This should be a specific type that validates the path

This should be a specific type that validates the path
pub key_file: Option<String>,
// ## Server name used to verify host name
// ##
taha marked this conversation as resolved Outdated

This should be an enum :

pub enum URLScheme {
 HTTP,
 HTTPS,
 // Maybe others such as :
 FILE,
 FTP,
 OTHER(String), // With this we are both usable with more frequent schemes and extensible 
}

impl Display for URLScheme {
 // TODO
}
 
This should be an enum : ```rust pub enum URLScheme { HTTP, HTTPS, // Maybe others such as : FILE, FTP, OTHER(String), // With this we are both usable with more frequent schemes and extensible } impl Display for URLScheme { // TODO }
pub server_name: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ServiceMonitorEndpoint {
// ## Name of the endpoint's service port
// ## Mutually exclusive with targetPort
pub port: Option<String>,
// ## Name or number of the endpoint's target port
// ## Mutually exclusive with port
pub target_port: Option<String>,
// ## File containing bearer token to be used when scraping targets
// ##
pub bearer_token_file: Option<String>,
// ## Interval at which metrics should be scraped
// ##
pub interval: Option<String>,
// ## HTTP path to scrape for metrics
// ##
pub path: String,
// ## HTTP scheme to use for scraping
// ##
pub scheme: HTTPScheme,
taha marked this conversation as resolved Outdated

I guess operator is not any string? Probably should be an enum too.

I guess operator is not any string? Probably should be an enum too.
// ## TLS configuration to use when scraping the endpoint
// ##
pub tls_config: Option<ServiceMonitorTLSConfig>,
// ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
// ##
// # - action: keep
// # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
// # sourceLabels: [__name__]
pub metric_relabelings: Vec<Mapping>,
// ## RelabelConfigs to apply to samples before scraping
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
// ##
// # - sourceLabels: [__meta_kubernetes_pod_node_name]
// # separator: ;
// # regex: ^(.*)$
// # targetLabel: nodename
// # replacement: $1
// # action: replace
pub relabelings: Vec<Mapping>,

I think we already do have a Label type somewhere. I think it would be more appropriate here than String. That's true for all the label related fields in this file.

This Label type might not be fully compatible in its current form/place but it is definitely a semantic that we will see very often in various use cases and implementations. I think it is worth for us to maintain a Label type which we can eventually provide very interesting functionnality for such as search, tracking, matching, versionning, etc.

I think we already do have a Label type somewhere. I think it would be more appropriate here than String. That's true for all the label related fields in this file. This Label type might not be fully compatible in its current form/place but it is definitely a semantic that we will see very often in various use cases and implementations. I think it is worth for us to maintain a Label type which we can eventually provide very interesting functionnality for such as search, tracking, matching, versionning, etc.
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct MatchExpression {
pub key: String,
pub operator: Operator,
pub values: Vec<String>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Selector {
// # label selector for services
pub match_labels: HashMap<String, String>,
pub match_expressions: Vec<MatchExpression>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ServiceMonitor {
pub name: String,
// # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
pub additional_labels: Option<Mapping>,
// # Service label for use in assembling a job name of the form <label value>-<port>
// # If no label is specified, the service name is used.
pub job_label: Option<String>,
// # labels to transfer from the kubernetes service to the target
pub target_labels: Vec<String>,
// # labels to transfer from the kubernetes pods to the target
pub pod_target_labels: Vec<String>,
// # Label selector for services to which this ServiceMonitor applies
// # Example which selects all services to be monitored
// # with label "monitoredby" with values any of "example-service-1" or "example-service-2"
// matchExpressions:
// - key: "monitoredby"
// operator: In
// values:
// - example-service-1
// - example-service-2
pub selector: Selector,
// # Namespaces from which services are selected
// # Match any namespace
// any: bool,
// # Explicit list of namespace names to select
// matchNames: Vec,
pub namespace_selector: Option<Mapping>,
// # Endpoints of the selected service to be monitored
pub endpoints: Vec<ServiceMonitorEndpoint>,
// # Fallback scrape protocol used by Prometheus for scraping metrics
// # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.ScrapeProtocol
pub fallback_scrape_protocol: Option<String>,
}
impl Default for ServiceMonitor {
fn default() -> Self {
Self {
name: Default::default(),
additional_labels: Default::default(),
job_label: Default::default(),
target_labels: Default::default(),
pod_target_labels: Default::default(),
selector: Selector {
match_labels: HashMap::new(),
match_expressions: vec![],
},
namespace_selector: Default::default(),
endpoints: Default::default(),
fallback_scrape_protocol: Default::default(),
}
}
}
impl Default for ServiceMonitorEndpoint {
fn default() -> Self {
Self {
port: Some("80".to_string()),
target_port: Default::default(),
bearer_token_file: Default::default(),
interval: Default::default(),
path: "/metrics".to_string(),
scheme: HTTPScheme::HTTP,
tls_config: Default::default(),
metric_relabelings: Default::default(),
relabelings: Default::default(),
}
}
}