feat: add service monitors support to prom #66
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1160,6 +1160,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
@ -8,5 +8,6 @@ license.workspace = true
|
||||
[dependencies]
|
||||
harmony = { version = "0.1.0", path = "../../harmony" }
|
||||
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
|
||||
harmony_macros = { version = "0.1.0", path = "../../harmony_macros" }
|
||||
tokio.workspace = true
|
||||
url.workspace = true
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
@ -41,9 +43,30 @@ async fn main() {
|
||||
],
|
||||
);
|
||||
|
||||
let service_monitor_endpoint = ServiceMonitorEndpoint {
|
||||
port: Some("80".to_string()),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let service_monitor = ServiceMonitor {
|
||||
name: "test-service-monitor".to_string(),
|
||||
selector: Selector {
|
||||
match_labels: HashMap::new(),
|
||||
match_expressions: vec![MatchExpression {
|
||||
key: "test".to_string(),
|
||||
operator: Operator::In,
|
||||
values: vec!["test-service".to_string()],
|
||||
}],
|
||||
},
|
||||
endpoints: vec![service_monitor_endpoint],
|
||||
..Default::default()
|
||||
};
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
receivers: vec![Box::new(discord_receiver)],
|
||||
rules: vec![Box::new(additional_rules), Box::new(additional_rules2)],
|
||||
service_monitors: vec![service_monitor],
|
||||
};
|
||||
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
|
||||
Inventory::autoload(),
|
||||
|
||||
@ -26,6 +26,7 @@ pub struct KubePrometheusConfig {
|
||||
pub prometheus_operator: bool,
|
||||
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
|
||||
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
|
||||
pub additional_service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
impl KubePrometheusConfig {
|
||||
pub fn new() -> Self {
|
||||
@ -49,6 +50,7 @@ impl KubePrometheusConfig {
|
||||
kube_scheduler: false,
|
||||
alert_receiver_configs: vec![],
|
||||
alert_rules: vec![],
|
||||
additional_service_monitors: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -12,7 +12,7 @@ use crate::modules::{
|
||||
helm::chart::HelmChartScore,
|
||||
monitoring::kube_prometheus::types::{
|
||||
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
|
||||
AlertManagerRoute, AlertManagerValues,
|
||||
AlertManagerRoute, AlertManagerValues, PrometheusConfig,
|
||||
},
|
||||
};
|
||||
|
||||
@ -101,11 +101,26 @@ nodeExporter:
|
||||
enabled: {node_exporter}
|
||||
prometheusOperator:
|
||||
enabled: {prometheus_operator}
|
||||
prometheus:
|
||||
enabled: {prometheus}
|
||||
|
||||
"#,
|
||||
);
|
||||
|
||||
let prometheus_config =
|
||||
crate::modules::monitoring::kube_prometheus::types::PrometheusConfigValues {
|
||||
prometheus: PrometheusConfig {
|
||||
prometheus: bool::from_str(prometheus.as_str()).expect("couldn't parse bool"),
|
||||
additional_service_monitors: config.additional_service_monitors.clone(),
|
||||
},
|
||||
};
|
||||
let prometheus_config_yaml =
|
||||
serde_yaml::to_string(&prometheus_config).expect("Failed to serialize YAML");
|
||||
|
||||
debug!(
|
||||
"serialized prometheus config: \n {:#}",
|
||||
prometheus_config_yaml
|
||||
);
|
||||
values.push_str(&prometheus_config_yaml);
|
||||
|
||||
// add required null receiver for prometheus alert manager
|
||||
let mut null_receiver = Mapping::new();
|
||||
null_receiver.insert(
|
||||
|
||||
@ -4,6 +4,7 @@ use serde::Serialize;
|
||||
|
||||
use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
|
||||
use crate::{
|
||||
modules::monitoring::kube_prometheus::types::ServiceMonitor,
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
@ -15,14 +16,18 @@ use crate::{
|
||||
pub struct HelmPrometheusAlertingScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
|
||||
config
|
||||
.try_lock()
|
||||
.expect("couldn't lock config")
|
||||
.additional_service_monitors = self.service_monitors.clone();
|
||||
Box::new(AlertingInterpret {
|
||||
sender: Prometheus {
|
||||
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
|
||||
},
|
||||
sender: Prometheus { config },
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
})
|
||||
|
||||
@ -53,3 +53,202 @@ pub struct AlertManagerAdditionalPromRules {
|
||||
pub struct AlertGroup {
|
||||
pub groups: Vec<AlertManagerRuleGroup>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub enum HTTPScheme {
|
||||
#[serde(rename = "http")]
|
||||
HTTP,
|
||||
#[serde(rename = "https")]
|
||||
HTTPS,
|
||||
|
johnride marked this conversation as resolved
Outdated
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub enum Operator {
|
||||
In,
|
||||
NotIn,
|
||||
Exists,
|
||||
DoesNotExist,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusConfigValues {
|
||||
pub prometheus: PrometheusConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusConfig {
|
||||
pub prometheus: bool,
|
||||
pub additional_service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitorTLSConfig {
|
||||
// ## Path to the CA file
|
||||
// ##
|
||||
pub ca_file: Option<String>,
|
||||
|
||||
// ## Path to client certificate file
|
||||
// ##
|
||||
pub cert_file: Option<String>,
|
||||
|
||||
// ## Skip certificate verification
|
||||
// ##
|
||||
pub insecure_skip_verify: Option<bool>,
|
||||
|
||||
// ## Path to client key file
|
||||
// ##
|
||||
|
johnride
commented
This should be a specific type that validates the path This should be a specific type that validates the path
|
||||
pub key_file: Option<String>,
|
||||
|
||||
// ## Server name used to verify host name
|
||||
// ##
|
||||
|
taha marked this conversation as resolved
Outdated
johnride
commented
This should be an enum :
This should be an enum :
```rust
pub enum URLScheme {
HTTP,
HTTPS,
// Maybe others such as :
FILE,
FTP,
OTHER(String), // With this we are both usable with more frequent schemes and extensible
}
impl Display for URLScheme {
// TODO
}
|
||||
pub server_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitorEndpoint {
|
||||
// ## Name of the endpoint's service port
|
||||
// ## Mutually exclusive with targetPort
|
||||
pub port: Option<String>,
|
||||
|
||||
// ## Name or number of the endpoint's target port
|
||||
// ## Mutually exclusive with port
|
||||
pub target_port: Option<String>,
|
||||
|
||||
// ## File containing bearer token to be used when scraping targets
|
||||
// ##
|
||||
pub bearer_token_file: Option<String>,
|
||||
|
||||
// ## Interval at which metrics should be scraped
|
||||
// ##
|
||||
pub interval: Option<String>,
|
||||
|
||||
// ## HTTP path to scrape for metrics
|
||||
// ##
|
||||
pub path: String,
|
||||
|
||||
// ## HTTP scheme to use for scraping
|
||||
// ##
|
||||
pub scheme: HTTPScheme,
|
||||
|
||||
|
taha marked this conversation as resolved
Outdated
johnride
commented
I guess operator is not any string? Probably should be an enum too. I guess operator is not any string? Probably should be an enum too.
|
||||
// ## TLS configuration to use when scraping the endpoint
|
||||
// ##
|
||||
pub tls_config: Option<ServiceMonitorTLSConfig>,
|
||||
|
||||
// ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
|
||||
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
|
||||
// ##
|
||||
// # - action: keep
|
||||
// # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
// # sourceLabels: [__name__]
|
||||
pub metric_relabelings: Vec<Mapping>,
|
||||
|
||||
// ## RelabelConfigs to apply to samples before scraping
|
||||
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
|
||||
// ##
|
||||
// # - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
// # separator: ;
|
||||
// # regex: ^(.*)$
|
||||
// # targetLabel: nodename
|
||||
// # replacement: $1
|
||||
// # action: replace
|
||||
pub relabelings: Vec<Mapping>,
|
||||
|
johnride
commented
I think we already do have a Label type somewhere. I think it would be more appropriate here than String. That's true for all the label related fields in this file. This Label type might not be fully compatible in its current form/place but it is definitely a semantic that we will see very often in various use cases and implementations. I think it is worth for us to maintain a Label type which we can eventually provide very interesting functionnality for such as search, tracking, matching, versionning, etc. I think we already do have a Label type somewhere. I think it would be more appropriate here than String. That's true for all the label related fields in this file.
This Label type might not be fully compatible in its current form/place but it is definitely a semantic that we will see very often in various use cases and implementations. I think it is worth for us to maintain a Label type which we can eventually provide very interesting functionnality for such as search, tracking, matching, versionning, etc.
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MatchExpression {
|
||||
pub key: String,
|
||||
pub operator: Operator,
|
||||
pub values: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Selector {
|
||||
// # label selector for services
|
||||
pub match_labels: HashMap<String, String>,
|
||||
pub match_expressions: Vec<MatchExpression>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitor {
|
||||
pub name: String,
|
||||
|
||||
// # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
|
||||
pub additional_labels: Option<Mapping>,
|
||||
|
||||
// # Service label for use in assembling a job name of the form <label value>-<port>
|
||||
// # If no label is specified, the service name is used.
|
||||
pub job_label: Option<String>,
|
||||
|
||||
// # labels to transfer from the kubernetes service to the target
|
||||
pub target_labels: Vec<String>,
|
||||
|
||||
// # labels to transfer from the kubernetes pods to the target
|
||||
pub pod_target_labels: Vec<String>,
|
||||
|
||||
// # Label selector for services to which this ServiceMonitor applies
|
||||
// # Example which selects all services to be monitored
|
||||
// # with label "monitoredby" with values any of "example-service-1" or "example-service-2"
|
||||
// matchExpressions:
|
||||
// - key: "monitoredby"
|
||||
// operator: In
|
||||
// values:
|
||||
// - example-service-1
|
||||
// - example-service-2
|
||||
pub selector: Selector,
|
||||
|
||||
// # Namespaces from which services are selected
|
||||
// # Match any namespace
|
||||
// any: bool,
|
||||
// # Explicit list of namespace names to select
|
||||
// matchNames: Vec,
|
||||
pub namespace_selector: Option<Mapping>,
|
||||
|
||||
// # Endpoints of the selected service to be monitored
|
||||
pub endpoints: Vec<ServiceMonitorEndpoint>,
|
||||
|
||||
// # Fallback scrape protocol used by Prometheus for scraping metrics
|
||||
// # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.ScrapeProtocol
|
||||
pub fallback_scrape_protocol: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for ServiceMonitor {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: Default::default(),
|
||||
additional_labels: Default::default(),
|
||||
job_label: Default::default(),
|
||||
target_labels: Default::default(),
|
||||
pod_target_labels: Default::default(),
|
||||
selector: Selector {
|
||||
match_labels: HashMap::new(),
|
||||
match_expressions: vec![],
|
||||
},
|
||||
namespace_selector: Default::default(),
|
||||
endpoints: Default::default(),
|
||||
fallback_scrape_protocol: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ServiceMonitorEndpoint {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
port: Some("80".to_string()),
|
||||
target_port: Default::default(),
|
||||
bearer_token_file: Default::default(),
|
||||
interval: Default::default(),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
tls_config: Default::default(),
|
||||
metric_relabelings: Default::default(),
|
||||
relabelings: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user
Not a String, should be a path type of some sort. I think we already handle this somewhere else correctly.
You might have to implement Serialize for it or wrap it into another type that Serializes as a String but that's ok.