diff --git a/examples/monitoring/src/main.rs b/examples/monitoring/src/main.rs index 989b1ec..6d3a5b0 100644 --- a/examples/monitoring/src/main.rs +++ b/examples/monitoring/src/main.rs @@ -51,8 +51,8 @@ async fn main() { let service_monitor_endpoint = ServiceMonitorEndpoint { port: Some("80".to_string()), - path: "/metrics".to_string(), - scheme: HTTPScheme::HTTP, + path: Some("/metrics".to_string()), + scheme: Some(HTTPScheme::HTTP), ..Default::default() }; diff --git a/examples/monitoring_with_tenant/src/main.rs b/examples/monitoring_with_tenant/src/main.rs index ec80542..035d071 100644 --- a/examples/monitoring_with_tenant/src/main.rs +++ b/examples/monitoring_with_tenant/src/main.rs @@ -54,8 +54,8 @@ async fn main() { let service_monitor_endpoint = ServiceMonitorEndpoint { port: Some("80".to_string()), - path: "/metrics".to_string(), - scheme: HTTPScheme::HTTP, + path: Some("/metrics".to_string()), + scheme: Some(HTTPScheme::HTTP), ..Default::default() }; diff --git a/examples/rust/src/main.rs b/examples/rust/src/main.rs index f39fb9f..d2ff2ff 100644 --- a/examples/rust/src/main.rs +++ b/examples/rust/src/main.rs @@ -6,7 +6,7 @@ use harmony::{ modules::{ application::{ ApplicationScore, RustWebFramework, RustWebapp, - features::{ContinuousDelivery, PrometheusApplicationMonitoring}, + features::{ContinuousDelivery, Monitoring}, }, monitoring::alert_channel::{ discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver, @@ -46,9 +46,11 @@ async fn main() { Box::new(ContinuousDelivery { application: application.clone(), }), // TODO add monitoring, backups, multisite ha, etc - Box::new(PrometheusApplicationMonitoring { + Box::new(Monitoring { application: application.clone(), alert_receiver: vec![Box::new(discord_receiver), Box::new(webhook_receiver)], + service_monitors: vec![], + alert_rules: vec![], }), // TODO add backups, multisite ha, etc ], diff --git a/harmony/src/modules/application/features/monitoring.rs b/harmony/src/modules/application/features/monitoring.rs index cef26e3..ffd7783 100644 --- a/harmony/src/modules/application/features/monitoring.rs +++ b/harmony/src/modules/application/features/monitoring.rs @@ -1,9 +1,13 @@ use std::sync::Arc; -use async_trait::async_trait; -use base64::{Engine as _, engine::general_purpose}; -use log::{debug, info}; - +use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::{ + build_rule_container_restarting, build_rule_pod_failed, +}; +use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::RuleGroup; +use crate::modules::monitoring::kube_prometheus::service_monitor::{ + ServiceMonitor, ServiceMonitorSpec, +}; +use crate::modules::monitoring::kube_prometheus::types::{Selector, ServiceMonitorEndpoint}; use crate::{ inventory::Inventory, modules::{ @@ -13,7 +17,7 @@ use crate::{ kube_prometheus::{ alert_manager_config::{CRDAlertManager, CRDAlertManagerReceiver}, helm_prometheus_application_alerting::HelmPrometheusApplicationAlertingScore, - types::{NamespaceSelector, ServiceMonitor}, + types::{NamespaceSelector, ServiceMonitor as KubePrometheusServiceMonitor}, }, ntfy::ntfy::NtfyScore, }, @@ -24,23 +28,31 @@ use crate::{ tenant::TenantManager, }, }; +use async_trait::async_trait; +use base64::{Engine as _, engine::general_purpose}; +use kube::api::ObjectMeta; +use log::{debug, info}; #[derive(Debug, Clone)] -pub struct PrometheusApplicationMonitoring { +pub struct Monitoring { pub application: Arc, pub alert_receiver: Vec>, + pub service_monitors: Vec, + pub alert_rules: Vec, } #[async_trait] impl - ApplicationFeature for PrometheusApplicationMonitoring + ApplicationFeature for Monitoring { async fn ensure_installed(&self, topology: &T) -> Result<(), String> { info!("Ensuring monitoring is available for application"); - + let namespace = self.application.name().clone(); let mut alerting_score = HelmPrometheusApplicationAlertingScore { - namespace: self.application.name().clone(), + namespace: namespace.clone(), receivers: self.alert_receiver.clone(), + service_monitors: self.service_monitors.clone(), + prometheus_rules: self.alert_rules.clone(), }; let ntfy = NtfyScore { // namespace: topology @@ -91,14 +103,27 @@ impl, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub alertmanager_config_namespace_selector: Option, + + /// Optional pod template metadata (annotations, labels) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pod_metadata: Option, + + /// Optional topology spread settings + #[serde(default, skip_serializing_if = "Option::is_none")] + pub version: Option, +} + +impl Default for AlertmanagerSpec { + fn default() -> Self { + AlertmanagerSpec { + replicas: 1, + + // Match all AlertmanagerConfigs in the same namespace + alertmanager_config_namespace_selector: None, + + // Empty selector matches all AlertmanagerConfigs in that namespace + alertmanager_config_selector: Some(LabelSelector::default()), + + pod_metadata: None, + version: None, + } + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_default_rules.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_default_rules.rs new file mode 100644 index 0000000..1feb5ed --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_default_rules.rs @@ -0,0 +1,38 @@ +use std::collections::BTreeMap; + +use super::crd_prometheus_rules::Rule; + +pub fn build_rule_container_restarting() -> Rule { + Rule { + alert: Some("ContainerRestarting".into()), + expr: Some("increase(kube_pod_container_status_restarts_total[5m]) > 3".into()), + for_: Some("5m".into()), + labels: Some(BTreeMap::from([("severity".into(), "warning".into())])), + annotations: Some(BTreeMap::from([ + ( + "summary".into(), + "Container is restarting frequently".into(), + ), + ( + "description".into(), + "Container in this namespace is restarting more than 3 times in 5 minutes.".into(), + ), + ])), + } +} + +pub fn build_rule_pod_failed() -> Rule { + Rule { + alert: Some("PodFailed".into()), + expr: Some("kube_pod_status_phase{phase=\"Failed\"} > 0".into()), + for_: Some("0m".into()), + labels: Some(BTreeMap::from([("severity".into(), "critical".into())])), + annotations: Some(BTreeMap::from([ + ("summary".into(), "A pod has failed".into()), + ( + "description".into(), + "One or more pods are in Failed phase.".into(), + ), + ])), + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs new file mode 100644 index 0000000..18074b8 --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs @@ -0,0 +1,170 @@ +use std::collections::BTreeMap; + +use kube::CustomResource; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[kube( + group = "grafana.integreatly.org", + version = "v1beta1", + kind = "Grafana", + plural = "grafanas", + namespaced +)] +#[serde(rename_all = "camelCase")] +pub struct GrafanaSpec { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub config: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub admin_user: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub admin_password: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ingress: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub persistence: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub resources: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct GrafanaConfig { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub log: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub security: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct GrafanaLogConfig { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub mode: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub level: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct GrafanaSecurityConfig { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub admin_user: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub admin_password: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct GrafanaIngress { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub enabled: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub hosts: Option>, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct GrafanaPersistence { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub enabled: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub storage_class_name: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub size: Option, +} + +// ------------------------------------------------------------------------------------------------ + +#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[kube( + group = "grafana.integreatly.org", + version = "v1beta1", + kind = "GrafanaDashboard", + plural = "grafanadashboards", + namespaced +)] +#[serde(rename_all = "camelCase")] +pub struct GrafanaDashboardSpec { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub resync_period: Option, + + pub instance_selector: LabelSelector, + + pub json: String, +} + +// ------------------------------------------------------------------------------------------------ + +#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[kube( + group = "grafana.integreatly.org", + version = "v1beta1", + kind = "GrafanaDatasource", + plural = "grafanadatasources", + namespaced +)] +#[serde(rename_all = "camelCase")] +pub struct GrafanaDatasourceSpec { + pub instance_selector: LabelSelector, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub allow_cross_namespace_import: Option, + + pub datasource: GrafanaDatasourceConfig, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct GrafanaDatasourceConfig { + pub access: String, + pub database: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub json_data: Option>, + pub name: String, + pub r#type: String, + pub url: String, +} + +// ------------------------------------------------------------------------------------------------ + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct LabelSelector { + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub match_labels: BTreeMap, + + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub match_expressions: Vec, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct LabelSelectorRequirement { + pub key: String, + pub operator: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub values: Vec, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct ResourceRequirements { + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub limits: BTreeMap, + + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub requests: BTreeMap, +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheus_rules.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheus_rules.rs new file mode 100644 index 0000000..49bee3b --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheus_rules.rs @@ -0,0 +1,54 @@ +use kube::CustomResource; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use super::crd_default_rules::{build_rule_container_restarting, build_rule_pod_failed}; + +#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema)] +#[kube( + group = "monitoring.coreos.com", + version = "v1", + kind = "PrometheusRule", + plural = "prometheusrules", + namespaced +)] +#[serde(rename_all = "camelCase")] +pub struct PrometheusRuleSpec { + pub groups: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct RuleGroup { + pub name: String, + pub rules: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct Rule { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub alert: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub expr: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub for_: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub labels: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub annotations: Option>, +} + +impl PrometheusRuleSpec { + pub fn with_default_rules() -> Self { + PrometheusRuleSpec { + groups: vec![RuleGroup { + name: "default.rules".into(), + rules: vec![build_rule_container_restarting(), build_rule_pod_failed()], + }], + } + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheuses.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheuses.rs new file mode 100644 index 0000000..0b9101f --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_prometheuses.rs @@ -0,0 +1,78 @@ +use std::collections::BTreeMap; + +use kube::CustomResource; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::modules::monitoring::kube_prometheus::types::Operator; + +#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[kube( + group = "monitoring.coreos.com", + version = "v1", + kind = "Prometheus", + plural = "prometheuses", + namespaced +)] +#[serde(rename_all = "camelCase")] +pub struct PrometheusSpec { + pub service_account_name: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub service_monitor_namespace_selector: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub service_monitor_selector: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub service_discovery_role: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pod_monitor_selector: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub rule_selector: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub rule_namespace_selector: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct LabelSelector { + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub match_labels: BTreeMap, + + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub match_expressions: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct LabelSelectorRequirement { + pub key: String, + pub operator: Operator, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub values: Vec, +} + +impl Default for PrometheusSpec { + fn default() -> Self { + PrometheusSpec { + service_account_name: "prometheus".into(), + + // null means "only my namespace" + service_monitor_namespace_selector: None, + + // empty selector means match all ServiceMonitors in that namespace + service_monitor_selector: Some(LabelSelector::default()), + + service_discovery_role: Some("Endpoints".into()), + + pod_monitor_selector: None, + + rule_selector: None, + + rule_namespace_selector: Some(LabelSelector::default()), + } + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/grafana_operator.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/grafana_operator.rs new file mode 100644 index 0000000..00864cd --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/grafana_operator.rs @@ -0,0 +1,22 @@ +use std::str::FromStr; + +use non_blank_string_rs::NonBlankString; + +use crate::modules::helm::chart::HelmChartScore; + +pub fn grafana_operator_helm_chart_score(ns: String) -> HelmChartScore { + HelmChartScore { + namespace: Some(NonBlankString::from_str(&ns).unwrap()), + release_name: NonBlankString::from_str("kube-prometheus").unwrap(), + chart_name: NonBlankString::from_str( + "grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator", + ) + .unwrap(), + chart_version: None, + values_overrides: None, + values_yaml: None, + create_namespace: true, + install_only: true, + repository: None, + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs new file mode 100644 index 0000000..ca8a12d --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs @@ -0,0 +1,7 @@ +pub mod crd_alertmanagers; +pub mod crd_default_rules; +pub mod crd_grafana; +pub mod crd_prometheus_rules; +pub mod crd_prometheuses; +pub mod grafana_operator; +pub mod role; diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/role.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/role.rs new file mode 100644 index 0000000..9add9a9 --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/role.rs @@ -0,0 +1,62 @@ +use k8s_openapi::api::{ + core::v1::ServiceAccount, + rbac::v1::{PolicyRule, Role, RoleBinding, RoleRef, Subject}, +}; +use kube::api::ObjectMeta; + +pub fn build_prom_role(role_name: String, namespace: String) -> Role { + Role { + metadata: ObjectMeta { + name: Some(role_name), + namespace: Some(namespace), + ..Default::default() + }, + rules: Some(vec![PolicyRule { + api_groups: Some(vec!["".into()]), // core API group + resources: Some(vec!["services".into(), "endpoints".into(), "pods".into()]), + verbs: vec!["get".into(), "list".into(), "watch".into()], + ..Default::default() + }]), + } +} + +pub fn build_prom_rolebinding( + role_name: String, + namespace: String, + service_account_name: String, +) -> RoleBinding { + RoleBinding { + metadata: ObjectMeta { + name: Some(format!("{}-rolebinding", role_name)), + namespace: Some(namespace.clone()), + ..Default::default() + }, + role_ref: RoleRef { + api_group: "rbac.authorization.k8s.io".into(), + kind: "Role".into(), + name: role_name, + }, + subjects: Some(vec![Subject { + kind: "ServiceAccount".into(), + name: service_account_name, + namespace: Some(namespace.clone()), + ..Default::default() + }]), + } +} + +pub fn build_prom_service_account( + service_account_name: String, + namespace: String, +) -> ServiceAccount { + ServiceAccount { + automount_service_account_token: None, + image_pull_secrets: None, + metadata: ObjectMeta { + name: Some(service_account_name), + namespace: Some(namespace), + ..Default::default() + }, + secrets: None, + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs index bbc59d7..1765a64 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/helm_prometheus_application_alerting.rs @@ -1,20 +1,28 @@ +use std::sync::Arc; + use async_trait::async_trait; use kube::{Api, api::ObjectMeta}; -use log::debug; +use log::{debug, info}; use serde::Serialize; use crate::{ data::{Id, Version}, interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, inventory::Inventory, + modules::monitoring::kube_prometheus::crd::{ + crd_alertmanagers::{Alertmanager, AlertmanagerSpec}, + crd_prometheuses::{Prometheus, PrometheusSpec}, + role::{build_prom_role, build_prom_rolebinding, build_prom_service_account}, + }, score::Score, - topology::{K8sclient, Topology, oberservability::monitoring::AlertReceiver}, + topology::{K8sclient, Topology, k8s::K8sClient, oberservability::monitoring::AlertReceiver}, }; use super::{ alert_manager_config::{ AlertmanagerConfig, AlertmanagerConfigSpec, CRDAlertManager, CRDAlertManagerReceiver, }, + crd::crd_prometheus_rules::{PrometheusRule, PrometheusRuleSpec, RuleGroup}, prometheus::KubePrometheus, }; @@ -22,6 +30,8 @@ use super::{ pub struct HelmPrometheusApplicationAlertingScore { pub namespace: String, pub receivers: Vec>, + pub service_monitors: Vec, + pub prometheus_rules: Vec, } impl Score for HelmPrometheusApplicationAlertingScore { @@ -29,6 +39,8 @@ impl Score for HelmPrometheusApplicationAlertingScor Box::new(HelmPrometheusApplicationAlertingInterpret { namespace: self.namespace.clone(), receivers: self.receivers.clone(), + service_monitors: self.service_monitors.clone(), + prometheus_rules: self.prometheus_rules.clone(), }) } @@ -41,6 +53,8 @@ impl Score for HelmPrometheusApplicationAlertingScor pub struct HelmPrometheusApplicationAlertingInterpret { pub namespace: String, pub receivers: Vec>, + pub service_monitors: Vec, + pub prometheus_rules: Vec, } #[async_trait] @@ -51,6 +65,8 @@ impl Interpret for HelmPrometheusApplicationAlerting topology: &T, ) -> Result { let client = topology.k8s_client().await.unwrap(); + self.install_prometheus(&client).await?; + self.install_alert_manager(&client).await?; for receiver in self.receivers.iter() { let alertmanager_config: AlertmanagerConfig = receiver .configure_receiver(&client, self.namespace.clone()) @@ -64,6 +80,15 @@ impl Interpret for HelmPrometheusApplicationAlerting InterpretError::new(format!("failed to install receiver: {}", err)) })?; } + self.install_rules(self.prometheus_rules.clone(), client.clone()) + .await + .map_err(|err| InterpretError::new(format!("failed to install rules: {}", err)))?; + + debug!("\n\n\n monitors: {:#?}", self.service_monitors.clone()); + for monitor in self.service_monitors.iter() { + self.install_monitor(monitor.clone(), client.clone()) + .await?; + } Ok(Outcome::success(format!("deployed alert channels"))) } @@ -83,3 +108,146 @@ impl Interpret for HelmPrometheusApplicationAlerting todo!() } } + +impl HelmPrometheusApplicationAlertingInterpret { + async fn install_prometheus(&self, client: &Arc) -> Result { + debug!( + "installing crd-prometheuses in namespace {}", + self.namespace.clone() + ); + debug!("building role/rolebinding/serviceaccount for crd-prometheus"); + let rolename = format!("{}-prom", self.namespace.clone()); + let sa_name = format!("{}-prom-sa", self.namespace.clone()); + let role = build_prom_role(rolename.clone(), self.namespace.clone()); + let rolebinding = + build_prom_rolebinding(rolename.clone(), self.namespace.clone(), sa_name.clone()); + let sa = build_prom_service_account(sa_name.clone(), self.namespace.clone()); + let mut prom_spec = PrometheusSpec::default(); + prom_spec.service_account_name = sa_name.clone(); + let prom = Prometheus { + metadata: ObjectMeta { + name: Some(self.namespace.clone()), + labels: Some(std::collections::BTreeMap::from([( + "alertmanagerConfig".to_string(), + "enabled".to_string(), + )])), + namespace: Some(self.namespace.clone()), + ..Default::default() + }, + spec: prom_spec, + }; + client + .apply(&role, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + info!( + "installed prometheus role: {:#?} in ns {:#?}", + role.metadata.name.unwrap(), + role.metadata.namespace.unwrap() + ); + client + .apply(&rolebinding, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + info!( + "installed prometheus rolebinding: {:#?} in ns {:#?}", + rolebinding.metadata.name.unwrap(), + rolebinding.metadata.namespace.unwrap() + ); + client + .apply(&sa, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + info!( + "installed prometheus service account: {:#?} in ns {:#?}", + sa.metadata.name.unwrap(), + sa.metadata.namespace.unwrap() + ); + client + .apply(&prom, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + info!( + "installed prometheus: {:#?} in ns {:#?}", + &prom.metadata.name.clone().unwrap(), + &prom.metadata.namespace.clone().unwrap() + ); + + Ok(Outcome::success(format!( + "successfully deployed crd-prometheus {:#?}", + prom + ))) + } + + async fn install_alert_manager( + &self, + client: &Arc, + ) -> Result { + let am = Alertmanager { + metadata: ObjectMeta { + name: Some(self.namespace.clone()), + labels: Some(std::collections::BTreeMap::from([( + "alertmanagerConfig".to_string(), + "enabled".to_string(), + )])), + namespace: Some(self.namespace.clone()), + ..Default::default() + }, + spec: AlertmanagerSpec::default(), + }; + client + .apply(&am, Some(&self.namespace.clone())) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + Ok(Outcome::success(format!( + "successfully deployed service monitor {:#?}", + am.metadata.name + ))) + } + + async fn install_monitor( + &self, + monitor: super::service_monitor::ServiceMonitor, + client: Arc, + ) -> Result { + debug!("service monitor: \n{:#?}", monitor.clone()); + let namespace = self.namespace.clone(); + client + .apply(&monitor, Some(&namespace)) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + Ok(Outcome::success(format!( + "successfully deployed service monitor {:#?}", + monitor.metadata.name + ))) + } + + async fn install_rules( + &self, + rules: Vec, + client: Arc, + ) -> Result { + let prom_rule_spec = PrometheusRuleSpec { groups: rules }; + + let prom_rules = PrometheusRule { + metadata: ObjectMeta { + name: Some(self.namespace.clone()), + labels: Some(std::collections::BTreeMap::from([( + "alertmanagerConfig".to_string(), + "enabled".to_string(), + )])), + namespace: Some(self.namespace.clone()), + ..Default::default() + }, + spec: prom_rule_spec, + }; + client + .apply(&prom_rules, Some(&self.namespace)) + .await + .map_err(|e| InterpretError::new(e.to_string()))?; + Ok(Outcome::success(format!( + "successfully deployed service monitor {:#?}", + prom_rules.metadata.name + ))) + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/mod.rs b/harmony/src/modules/monitoring/kube_prometheus/mod.rs index 4843509..5fc0fea 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/mod.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/mod.rs @@ -1,6 +1,8 @@ pub mod alert_manager_config; +pub mod crd; pub mod helm; pub mod helm_prometheus_alert_score; pub mod helm_prometheus_application_alerting; pub mod prometheus; +pub mod service_monitor; pub mod types; diff --git a/harmony/src/modules/monitoring/kube_prometheus/service_monitor.rs b/harmony/src/modules/monitoring/kube_prometheus/service_monitor.rs new file mode 100644 index 0000000..049e32d --- /dev/null +++ b/harmony/src/modules/monitoring/kube_prometheus/service_monitor.rs @@ -0,0 +1,89 @@ +use std::collections::{BTreeMap, HashMap}; + +use kube::{CustomResource, Resource, api::ObjectMeta}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::interpret::InterpretError; + +use super::types::{ + HTTPScheme, MatchExpression, NamespaceSelector, Operator, Selector, + ServiceMonitor as KubeServiceMonitor, ServiceMonitorEndpoint, +}; + +/// This is the top-level struct for the ServiceMonitor Custom Resource. +/// The `#[derive(CustomResource)]` macro handles all the boilerplate for you, +/// including the `impl Resource`. +#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[kube( + group = "monitoring.coreos.com", + version = "v1", + kind = "ServiceMonitor", + plural = "servicemonitors", + namespaced +)] +#[serde(rename_all = "camelCase")] +pub struct ServiceMonitorSpec { + /// A label selector to select services to monitor. + pub selector: Selector, + + /// A list of endpoints on the selected services to be monitored. + pub endpoints: Vec, + + /// Selector to select which namespaces the Kubernetes Endpoints objects + /// are discovered from. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub namespace_selector: Option, + + /// The label to use to retrieve the job name from. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub job_label: Option, + + /// Pod-based target labels to transfer from the Kubernetes Pod onto the target. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub pod_target_labels: Vec, + + /// TargetLabels transfers labels on the Kubernetes Service object to the target. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub target_labels: Vec, +} + +impl Default for ServiceMonitorSpec { + fn default() -> Self { + let mut labels = HashMap::new(); + Self { + selector: Selector { + match_labels: { labels }, + match_expressions: vec![MatchExpression { + key: "app.kubernetes.io/name".into(), + operator: Operator::Exists, + values: vec![], + }], + }, + endpoints: vec![ServiceMonitorEndpoint { + port: Some("http".to_string()), + path: Some("/metrics".into()), + interval: Some("30s".into()), + scheme: Some(HTTPScheme::HTTP), + ..Default::default() + }], + namespace_selector: None, // only the same namespace + job_label: Some("app".into()), + pod_target_labels: vec![], + target_labels: vec![], + } + } +} + +impl From for ServiceMonitorSpec { + fn from(value: KubeServiceMonitor) -> Self { + Self { + selector: value.selector, + endpoints: value.endpoints, + namespace_selector: value.namespace_selector, + job_label: value.job_label, + pod_target_labels: value.pod_target_labels, + target_labels: value.target_labels, + } + } +} diff --git a/harmony/src/modules/monitoring/kube_prometheus/types.rs b/harmony/src/modules/monitoring/kube_prometheus/types.rs index c9209dc..2423e5d 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/types.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/types.rs @@ -1,7 +1,8 @@ use std::collections::{BTreeMap, HashMap}; use async_trait::async_trait; -use serde::Serialize; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use serde_yaml::{Mapping, Sequence, Value}; use crate::modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup; @@ -94,7 +95,7 @@ pub struct AlertGroup { pub groups: Vec, } -#[derive(Debug, Clone, Serialize)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub enum HTTPScheme { #[serde(rename = "http")] HTTP, @@ -102,7 +103,7 @@ pub enum HTTPScheme { HTTPS, } -#[derive(Debug, Clone, Serialize)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub enum Operator { In, NotIn, @@ -147,70 +148,79 @@ pub struct ServiceMonitorTLSConfig { pub server_name: Option, } -#[derive(Debug, Clone, Serialize)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] #[serde(rename_all = "camelCase")] pub struct ServiceMonitorEndpoint { - // ## Name of the endpoint's service port - // ## Mutually exclusive with targetPort + /// Name of the service port this endpoint refers to. pub port: Option, - // ## Name or number of the endpoint's target port - // ## Mutually exclusive with port - pub target_port: Option, - - // ## File containing bearer token to be used when scraping targets - // ## - pub bearer_token_file: Option, - - // ## Interval at which metrics should be scraped - // ## + /// Interval at which metrics should be scraped. + #[serde(default, skip_serializing_if = "Option::is_none")] pub interval: Option, - // ## HTTP path to scrape for metrics - // ## - pub path: String, + /// The HTTP path to scrape for metrics. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub path: Option, - // ## HTTP scheme to use for scraping - // ## - pub scheme: HTTPScheme, + /// HTTP scheme to use for scraping. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub scheme: Option, - // ## TLS configuration to use when scraping the endpoint - // ## - pub tls_config: Option, + /// Relabelings to apply to samples before scraping. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub relabelings: Vec, - // ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. - // ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig - // ## - // # - action: keep - // # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' - // # sourceLabels: [__name__] - pub metric_relabelings: Vec, - - // ## RelabelConfigs to apply to samples before scraping - // ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig - // ## - // # - sourceLabels: [__meta_kubernetes_pod_node_name] - // # separator: ; - // # regex: ^(.*)$ - // # targetLabel: nodename - // # replacement: $1 - // # action: replace - pub relabelings: Vec, + /// MetricRelabelings to apply to samples after scraping, but before ingestion. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub metric_relabelings: Vec, } -#[derive(Debug, Clone, Serialize)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct RelabelConfig { + /// The action to perform based on the regex matching. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub action: Option, + + /// A list of labels from which to extract values. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub source_labels: Vec, + + /// Separator to be used when concatenating source_labels. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub separator: Option, + + /// The label to which the resulting value is written. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub target_label: Option, + + /// A regular expression to match against the concatenated source label values. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub regex: Option, + + /// The replacement value to use. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub replacement: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct MatchExpression { pub key: String, - pub operator: Operator, + pub operator: Operator, // "In", "NotIn", "Exists", "DoesNotExist" + #[serde(default, skip_serializing_if = "Vec::is_empty")] pub values: Vec, } -#[derive(Debug, Clone, Serialize)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)] #[serde(rename_all = "camelCase")] pub struct Selector { - // # label selector for services + /// A map of key-value pairs to match. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] pub match_labels: HashMap, + + /// A list of label selector requirements. + #[serde(default, skip_serializing_if = "Vec::is_empty")] pub match_expressions: Vec, } @@ -258,10 +268,15 @@ pub struct ServiceMonitor { pub fallback_scrape_protocol: Option, } -#[derive(Debug, Serialize, Clone)] +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct NamespaceSelector { + /// Select all namespaces. + #[serde(default, skip_serializing_if = "std::ops::Not::not")] pub any: bool, + + /// List of namespace names to select from. + #[serde(default, skip_serializing_if = "Vec::is_empty")] pub match_names: Vec, } @@ -283,19 +298,3 @@ impl Default for ServiceMonitor { } } } - -impl Default for ServiceMonitorEndpoint { - fn default() -> Self { - Self { - port: Some("80".to_string()), - target_port: Default::default(), - bearer_token_file: Default::default(), - interval: Default::default(), - path: "/metrics".to_string(), - scheme: HTTPScheme::HTTP, - tls_config: Default::default(), - metric_relabelings: Default::default(), - relabelings: Default::default(), - } - } -}