feat: add service monitors support to prom (#66)
Co-authored-by: tahahawa <tahahawa@gmail.com> Reviewed-on: https://git.nationtech.io/NationTech/harmony/pulls/66 Co-authored-by: taha <taha@noreply.git.nationtech.io> Co-committed-by: taha <taha@noreply.git.nationtech.io>
This commit is contained in:
parent
2ff3f4afa9
commit
ab69a2c264
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1160,6 +1160,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
@ -8,5 +8,6 @@ license.workspace = true
|
||||
[dependencies]
|
||||
harmony = { version = "0.1.0", path = "../../harmony" }
|
||||
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
|
||||
harmony_macros = { version = "0.1.0", path = "../../harmony_macros" }
|
||||
tokio.workspace = true
|
||||
url.workspace = true
|
||||
|
@ -1,3 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
@ -41,9 +43,30 @@ async fn main() {
|
||||
],
|
||||
);
|
||||
|
||||
let service_monitor_endpoint = ServiceMonitorEndpoint {
|
||||
port: Some("80".to_string()),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let service_monitor = ServiceMonitor {
|
||||
name: "test-service-monitor".to_string(),
|
||||
selector: Selector {
|
||||
match_labels: HashMap::new(),
|
||||
match_expressions: vec![MatchExpression {
|
||||
key: "test".to_string(),
|
||||
operator: Operator::In,
|
||||
values: vec!["test-service".to_string()],
|
||||
}],
|
||||
},
|
||||
endpoints: vec![service_monitor_endpoint],
|
||||
..Default::default()
|
||||
};
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
receivers: vec![Box::new(discord_receiver)],
|
||||
rules: vec![Box::new(additional_rules), Box::new(additional_rules2)],
|
||||
service_monitors: vec![service_monitor],
|
||||
};
|
||||
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
|
||||
Inventory::autoload(),
|
||||
|
@ -26,6 +26,7 @@ pub struct KubePrometheusConfig {
|
||||
pub prometheus_operator: bool,
|
||||
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
|
||||
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
|
||||
pub additional_service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
impl KubePrometheusConfig {
|
||||
pub fn new() -> Self {
|
||||
@ -49,6 +50,7 @@ impl KubePrometheusConfig {
|
||||
kube_scheduler: false,
|
||||
alert_receiver_configs: vec![],
|
||||
alert_rules: vec![],
|
||||
additional_service_monitors: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ use crate::modules::{
|
||||
helm::chart::HelmChartScore,
|
||||
monitoring::kube_prometheus::types::{
|
||||
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
|
||||
AlertManagerRoute, AlertManagerValues,
|
||||
AlertManagerRoute, AlertManagerValues, PrometheusConfig,
|
||||
},
|
||||
};
|
||||
|
||||
@ -101,11 +101,26 @@ nodeExporter:
|
||||
enabled: {node_exporter}
|
||||
prometheusOperator:
|
||||
enabled: {prometheus_operator}
|
||||
prometheus:
|
||||
enabled: {prometheus}
|
||||
|
||||
"#,
|
||||
);
|
||||
|
||||
let prometheus_config =
|
||||
crate::modules::monitoring::kube_prometheus::types::PrometheusConfigValues {
|
||||
prometheus: PrometheusConfig {
|
||||
prometheus: bool::from_str(prometheus.as_str()).expect("couldn't parse bool"),
|
||||
additional_service_monitors: config.additional_service_monitors.clone(),
|
||||
},
|
||||
};
|
||||
let prometheus_config_yaml =
|
||||
serde_yaml::to_string(&prometheus_config).expect("Failed to serialize YAML");
|
||||
|
||||
debug!(
|
||||
"serialized prometheus config: \n {:#}",
|
||||
prometheus_config_yaml
|
||||
);
|
||||
values.push_str(&prometheus_config_yaml);
|
||||
|
||||
// add required null receiver for prometheus alert manager
|
||||
let mut null_receiver = Mapping::new();
|
||||
null_receiver.insert(
|
||||
|
@ -4,6 +4,7 @@ use serde::Serialize;
|
||||
|
||||
use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
|
||||
use crate::{
|
||||
modules::monitoring::kube_prometheus::types::ServiceMonitor,
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
@ -15,14 +16,18 @@ use crate::{
|
||||
pub struct HelmPrometheusAlertingScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
|
||||
config
|
||||
.try_lock()
|
||||
.expect("couldn't lock config")
|
||||
.additional_service_monitors = self.service_monitors.clone();
|
||||
Box::new(AlertingInterpret {
|
||||
sender: Prometheus {
|
||||
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
|
||||
},
|
||||
sender: Prometheus { config },
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
})
|
||||
|
@ -53,3 +53,202 @@ pub struct AlertManagerAdditionalPromRules {
|
||||
pub struct AlertGroup {
|
||||
pub groups: Vec<AlertManagerRuleGroup>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub enum HTTPScheme {
|
||||
#[serde(rename = "http")]
|
||||
HTTP,
|
||||
#[serde(rename = "https")]
|
||||
HTTPS,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub enum Operator {
|
||||
In,
|
||||
NotIn,
|
||||
Exists,
|
||||
DoesNotExist,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusConfigValues {
|
||||
pub prometheus: PrometheusConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusConfig {
|
||||
pub prometheus: bool,
|
||||
pub additional_service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitorTLSConfig {
|
||||
// ## Path to the CA file
|
||||
// ##
|
||||
pub ca_file: Option<String>,
|
||||
|
||||
// ## Path to client certificate file
|
||||
// ##
|
||||
pub cert_file: Option<String>,
|
||||
|
||||
// ## Skip certificate verification
|
||||
// ##
|
||||
pub insecure_skip_verify: Option<bool>,
|
||||
|
||||
// ## Path to client key file
|
||||
// ##
|
||||
pub key_file: Option<String>,
|
||||
|
||||
// ## Server name used to verify host name
|
||||
// ##
|
||||
pub server_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitorEndpoint {
|
||||
// ## Name of the endpoint's service port
|
||||
// ## Mutually exclusive with targetPort
|
||||
pub port: Option<String>,
|
||||
|
||||
// ## Name or number of the endpoint's target port
|
||||
// ## Mutually exclusive with port
|
||||
pub target_port: Option<String>,
|
||||
|
||||
// ## File containing bearer token to be used when scraping targets
|
||||
// ##
|
||||
pub bearer_token_file: Option<String>,
|
||||
|
||||
// ## Interval at which metrics should be scraped
|
||||
// ##
|
||||
pub interval: Option<String>,
|
||||
|
||||
// ## HTTP path to scrape for metrics
|
||||
// ##
|
||||
pub path: String,
|
||||
|
||||
// ## HTTP scheme to use for scraping
|
||||
// ##
|
||||
pub scheme: HTTPScheme,
|
||||
|
||||
// ## TLS configuration to use when scraping the endpoint
|
||||
// ##
|
||||
pub tls_config: Option<ServiceMonitorTLSConfig>,
|
||||
|
||||
// ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
|
||||
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
|
||||
// ##
|
||||
// # - action: keep
|
||||
// # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
// # sourceLabels: [__name__]
|
||||
pub metric_relabelings: Vec<Mapping>,
|
||||
|
||||
// ## RelabelConfigs to apply to samples before scraping
|
||||
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
|
||||
// ##
|
||||
// # - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
// # separator: ;
|
||||
// # regex: ^(.*)$
|
||||
// # targetLabel: nodename
|
||||
// # replacement: $1
|
||||
// # action: replace
|
||||
pub relabelings: Vec<Mapping>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MatchExpression {
|
||||
pub key: String,
|
||||
pub operator: Operator,
|
||||
pub values: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Selector {
|
||||
// # label selector for services
|
||||
pub match_labels: HashMap<String, String>,
|
||||
pub match_expressions: Vec<MatchExpression>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitor {
|
||||
pub name: String,
|
||||
|
||||
// # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
|
||||
pub additional_labels: Option<Mapping>,
|
||||
|
||||
// # Service label for use in assembling a job name of the form <label value>-<port>
|
||||
// # If no label is specified, the service name is used.
|
||||
pub job_label: Option<String>,
|
||||
|
||||
// # labels to transfer from the kubernetes service to the target
|
||||
pub target_labels: Vec<String>,
|
||||
|
||||
// # labels to transfer from the kubernetes pods to the target
|
||||
pub pod_target_labels: Vec<String>,
|
||||
|
||||
// # Label selector for services to which this ServiceMonitor applies
|
||||
// # Example which selects all services to be monitored
|
||||
// # with label "monitoredby" with values any of "example-service-1" or "example-service-2"
|
||||
// matchExpressions:
|
||||
// - key: "monitoredby"
|
||||
// operator: In
|
||||
// values:
|
||||
// - example-service-1
|
||||
// - example-service-2
|
||||
pub selector: Selector,
|
||||
|
||||
// # Namespaces from which services are selected
|
||||
// # Match any namespace
|
||||
// any: bool,
|
||||
// # Explicit list of namespace names to select
|
||||
// matchNames: Vec,
|
||||
pub namespace_selector: Option<Mapping>,
|
||||
|
||||
// # Endpoints of the selected service to be monitored
|
||||
pub endpoints: Vec<ServiceMonitorEndpoint>,
|
||||
|
||||
// # Fallback scrape protocol used by Prometheus for scraping metrics
|
||||
// # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.ScrapeProtocol
|
||||
pub fallback_scrape_protocol: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for ServiceMonitor {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: Default::default(),
|
||||
additional_labels: Default::default(),
|
||||
job_label: Default::default(),
|
||||
target_labels: Default::default(),
|
||||
pod_target_labels: Default::default(),
|
||||
selector: Selector {
|
||||
match_labels: HashMap::new(),
|
||||
match_expressions: vec![],
|
||||
},
|
||||
namespace_selector: Default::default(),
|
||||
endpoints: Default::default(),
|
||||
fallback_scrape_protocol: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ServiceMonitorEndpoint {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
port: Some("80".to_string()),
|
||||
target_port: Default::default(),
|
||||
bearer_token_file: Default::default(),
|
||||
interval: Default::default(),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
tls_config: Default::default(),
|
||||
metric_relabelings: Default::default(),
|
||||
relabelings: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user