impl_monitoring_alerting_kube_prometheus (#64)

Co-authored-by: tahahawa <tahahawa@gmail.com>
Reviewed-on: https://git.nationtech.io/NationTech/harmony/pulls/64
Co-authored-by: Willem <wrolleman@nationtech.io>
Co-committed-by: Willem <wrolleman@nationtech.io>
This commit is contained in:
Willem 2025-06-24 18:54:15 +00:00 committed by wjro
parent e06548ac44
commit f437c40428
12 changed files with 333 additions and 72 deletions

1
Cargo.lock generated
View File

@ -1161,6 +1161,7 @@ dependencies = [
"harmony", "harmony",
"harmony_cli", "harmony_cli",
"tokio", "tokio",
"url",
] ]
[[package]] [[package]]

View File

@ -2,10 +2,7 @@ use harmony::{
data::Version, data::Version,
inventory::Inventory, inventory::Inventory,
maestro::Maestro, maestro::Maestro,
modules::{ modules::lamp::{LAMPConfig, LAMPScore},
lamp::{LAMPConfig, LAMPScore},
monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
},
topology::{K8sAnywhereTopology, Url}, topology::{K8sAnywhereTopology, Url},
}; };
@ -46,7 +43,7 @@ async fn main() {
// K8sAnywhereTopology as it is the most automatic one that enables you to easily deploy // K8sAnywhereTopology as it is the most automatic one that enables you to easily deploy
// locally, to development environment from a CI, to staging, and to production with settings // locally, to development environment from a CI, to staging, and to production with settings
// that automatically adapt to each environment grade. // that automatically adapt to each environment grade.
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize( let maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(), Inventory::autoload(),
K8sAnywhereTopology::from_env(), K8sAnywhereTopology::from_env(),
) )

View File

@ -9,3 +9,4 @@ license.workspace = true
harmony = { version = "0.1.0", path = "../../harmony" } harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" } harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true tokio.workspace = true
url.workspace = true

View File

@ -1,12 +1,22 @@
use harmony::{ use harmony::{
inventory::Inventory, maestro::Maestro, inventory::Inventory,
modules::monitoring::kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore, maestro::Maestro,
topology::K8sAnywhereTopology, modules::monitoring::{
alert_channel::discord_alert_channel::DiscordWebhook,
kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore,
},
topology::{K8sAnywhereTopology, Url},
}; };
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let alerting_score = HelmPrometheusAlertingScore { receivers: vec![] }; let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
url: Url::Url(url::Url::parse("discord.doesnt.exist.com").unwrap()),
};
let alerting_score = HelmPrometheusAlertingScore {
receivers: vec![Box::new(discord_receiver)],
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize( let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(), Inventory::autoload(),
K8sAnywhereTopology::from_env(), K8sAnywhereTopology::from_env(),
@ -14,12 +24,6 @@ async fn main() {
.await .await
.unwrap(); .unwrap();
//let monitoring = MonitoringAlertingScore {
// alert_receivers: vec![],
// alert_rules: vec![],
// scrape_targets: vec![],
//};
//maestro.register_all(vec![Box::new(monitoring)]);
maestro.register_all(vec![Box::new(alerting_score)]); maestro.register_all(vec![Box::new(alerting_score)]);
harmony_cli::init(maestro, None).await.unwrap(); harmony_cli::init(maestro, None).await.unwrap();
} }

View File

@ -1,8 +1,12 @@
use async_trait::async_trait; use async_trait::async_trait;
use crate::interpret::InterpretError; use crate::{interpret::InterpretError, inventory::Inventory};
#[async_trait] #[async_trait]
pub trait Installable { pub trait Installable<T>: Send + Sync {
async fn ensure_installed(&self) -> Result<(), InterpretError>; async fn ensure_installed(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<(), InterpretError>;
} }

View File

@ -4,10 +4,13 @@ use crate::{
data::{Id, Version}, data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory, inventory::Inventory,
topology::{Topology, installable::Installable}, topology::{HelmCommand, Topology, installable::Installable},
}; };
pub trait AlertSender: Send + Sync + std::fmt::Debug + Installable {} #[async_trait]
pub trait AlertSender: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
}
#[derive(Debug)] #[derive(Debug)]
pub struct AlertingInterpret<S: AlertSender> { pub struct AlertingInterpret<S: AlertSender> {
@ -16,7 +19,7 @@ pub struct AlertingInterpret<S: AlertSender> {
} }
#[async_trait] #[async_trait]
impl<S: AlertSender, T: Topology> Interpret<T> for AlertingInterpret<S> { impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInterpret<S> {
async fn execute( async fn execute(
&self, &self,
inventory: &Inventory, inventory: &Inventory,
@ -25,7 +28,11 @@ impl<S: AlertSender, T: Topology> Interpret<T> for AlertingInterpret<S> {
for receiver in self.receivers.iter() { for receiver in self.receivers.iter() {
receiver.install(&self.sender).await?; receiver.install(&self.sender).await?;
} }
todo!() self.sender.ensure_installed(inventory, topology).await?;
Ok(Outcome::success(format!(
"successfully installed alert sender {}",
self.sender.name()
)))
} }
fn get_name(&self) -> InterpretName { fn get_name(&self) -> InterpretName {
@ -47,7 +54,8 @@ impl<S: AlertSender, T: Topology> Interpret<T> for AlertingInterpret<S> {
#[async_trait] #[async_trait]
pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync { pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync {
async fn install(&self, sender: &S) -> Result<(), InterpretError>; async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
fn clone_box(&self) -> Box<dyn AlertReceiver<S>>;
} }
#[async_trait] #[async_trait]

View File

@ -1,12 +1,17 @@
use async_trait::async_trait; use async_trait::async_trait;
use serde::Serialize;
use serde_yaml::{Mapping, Value};
use crate::{ use crate::{
interpret::InterpretError, interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::prometheus::{Prometheus, PrometheusReceiver}, modules::monitoring::kube_prometheus::{
prometheus::{Prometheus, PrometheusReceiver},
types::{AlertChannelConfig, AlertManagerChannelConfig},
},
topology::{Url, oberservability::monitoring::AlertReceiver}, topology::{Url, oberservability::monitoring::AlertReceiver},
}; };
#[derive(Debug)] #[derive(Debug, Clone, Serialize)]
pub struct DiscordWebhook { pub struct DiscordWebhook {
pub name: String, pub name: String,
pub url: Url, pub url: Url,
@ -14,7 +19,107 @@ pub struct DiscordWebhook {
#[async_trait] #[async_trait]
impl AlertReceiver<Prometheus> for DiscordWebhook { impl AlertReceiver<Prometheus> for DiscordWebhook {
async fn install(&self, sender: &Prometheus) -> Result<(), InterpretError> { async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
sender.install_receiver(PrometheusReceiver {}).await sender.install_receiver(self).await
}
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
Box::new(self.clone())
}
}
#[async_trait]
impl PrometheusReceiver for DiscordWebhook {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
self.get_config().await
}
}
#[async_trait]
impl AlertChannelConfig for DiscordWebhook {
async fn get_config(&self) -> AlertManagerChannelConfig {
let channel_global_config = None;
let channel_receiver = self.alert_channel_receiver().await;
let channel_route = self.alert_channel_route().await;
AlertManagerChannelConfig {
channel_global_config,
channel_receiver,
channel_route,
}
}
}
impl DiscordWebhook {
async fn alert_channel_route(&self) -> serde_yaml::Value {
let mut route = Mapping::new();
route.insert(
Value::String("receiver".to_string()),
Value::String(self.name.clone()),
);
route.insert(
Value::String("matchers".to_string()),
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
);
route.insert(Value::String("continue".to_string()), Value::Bool(true));
Value::Mapping(route)
}
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
let mut receiver = Mapping::new();
receiver.insert(
Value::String("name".to_string()),
Value::String(self.name.clone()),
);
let mut discord_config = Mapping::new();
discord_config.insert(
Value::String("webhook_url".to_string()),
Value::String(self.url.to_string()),
);
receiver.insert(
Value::String("discord_configs".to_string()),
Value::Sequence(vec![Value::Mapping(discord_config)]),
);
Value::Mapping(receiver)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn discord_serialize_should_match() {
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
url: Url::Url(url::Url::parse("https://discord.i.dont.exist.com").unwrap()),
};
let discord_receiver_receiver =
serde_yaml::to_string(&discord_receiver.alert_channel_receiver().await).unwrap();
println!("receiver \n{:#}", discord_receiver_receiver);
let discord_receiver_receiver_yaml = r#"name: test-discord
discord_configs:
- webhook_url: https://discord.i.dont.exist.com/
"#
.to_string();
let discord_receiver_route =
serde_yaml::to_string(&discord_receiver.alert_channel_route().await).unwrap();
println!("route \n{:#}", discord_receiver_route);
let discord_receiver_route_yaml = r#"receiver: test-discord
matchers:
- alertname!=Watchdog
continue: true
"#
.to_string();
assert_eq!(discord_receiver_receiver, discord_receiver_receiver_yaml);
assert_eq!(discord_receiver_route, discord_receiver_route_yaml);
} }
} }

View File

@ -1,5 +1,7 @@
use serde::Serialize; use serde::Serialize;
use crate::modules::monitoring::kube_prometheus::types::AlertManagerChannelConfig;
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
pub struct KubePrometheusConfig { pub struct KubePrometheusConfig {
pub namespace: String, pub namespace: String,
@ -19,6 +21,7 @@ pub struct KubePrometheusConfig {
pub kube_proxy: bool, pub kube_proxy: bool,
pub kube_state_metrics: bool, pub kube_state_metrics: bool,
pub prometheus_operator: bool, pub prometheus_operator: bool,
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
} }
impl KubePrometheusConfig { impl KubePrometheusConfig {
pub fn new() -> Self { pub fn new() -> Self {
@ -40,6 +43,7 @@ impl KubePrometheusConfig {
prometheus_operator: true, prometheus_operator: true,
core_dns: false, core_dns: false,
kube_scheduler: false, kube_scheduler: false,
alert_receiver_configs: vec![],
} }
} }
} }

View File

@ -1,21 +1,32 @@
use super::config::KubePrometheusConfig; use super::config::KubePrometheusConfig;
use log::debug;
use non_blank_string_rs::NonBlankString; use non_blank_string_rs::NonBlankString;
use std::str::FromStr; use serde_yaml::{Mapping, Value};
use std::{
str::FromStr,
sync::{Arc, Mutex},
};
use crate::modules::helm::chart::HelmChartScore; use crate::modules::{
helm::chart::HelmChartScore,
pub fn kube_prometheus_helm_chart_score() -> HelmChartScore { monitoring::kube_prometheus::types::{
let config = KubePrometheusConfig::new(); AlertManager, AlertManagerConfig, AlertManagerRoute, AlertManagerValues,
},
};
pub fn kube_prometheus_helm_chart_score(
config: Arc<Mutex<KubePrometheusConfig>>,
) -> HelmChartScore {
let config = config.lock().unwrap();
//TODO this should be make into a rule with default formatting that can be easily passed as a vec //TODO this should be make into a rule with default formatting that can be easily passed as a vec
//to the overrides or something leaving the user to deal with formatting here seems bad //to the overrides or something leaving the user to deal with formatting here seems bad
let default_rules = config.default_rules.to_string(); let default_rules = config.default_rules.to_string();
let windows_monitoring = config.windows_monitoring.to_string(); let windows_monitoring = config.windows_monitoring.to_string();
let alert_manager = config.alert_manager.to_string();
let grafana = config.grafana.to_string(); let grafana = config.grafana.to_string();
let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string(); let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string();
let kubernetes_api_server = config.kubernetes_api_server.to_string(); let kubernetes_api_server = config.kubernetes_api_server.to_string();
let kubelet = config.kubelet.to_string(); let kubelet = config.kubelet.to_string();
let alert_manager = config.alert_manager.to_string();
let kube_controller_manager = config.kube_controller_manager.to_string(); let kube_controller_manager = config.kube_controller_manager.to_string();
let core_dns = config.core_dns.to_string(); let core_dns = config.core_dns.to_string();
let kube_etcd = config.kube_etcd.to_string(); let kube_etcd = config.kube_etcd.to_string();
@ -25,7 +36,7 @@ pub fn kube_prometheus_helm_chart_score() -> HelmChartScore {
let node_exporter = config.node_exporter.to_string(); let node_exporter = config.node_exporter.to_string();
let prometheus_operator = config.prometheus_operator.to_string(); let prometheus_operator = config.prometheus_operator.to_string();
let prometheus = config.prometheus.to_string(); let prometheus = config.prometheus.to_string();
let values = format!( let mut values = format!(
r#" r#"
additionalPrometheusRulesMap: additionalPrometheusRulesMap:
pods-status-alerts: pods-status-alerts:
@ -62,14 +73,14 @@ additionalPrometheusRulesMap:
- alert: 'PVC Fill Over 95 Percent In 2 Days' - alert: 'PVC Fill Over 95 Percent In 2 Days'
expr: | expr: |
( (
kubelet_volume_stats_used_bytes kubelet_volume_stats_used_bytes
/ /
kubelet_volume_stats_capacity_bytes kubelet_volume_stats_capacity_bytes
) > 0.95 ) > 0.95
AND AND
predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60) predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60)
/ /
kubelet_volume_stats_capacity_bytes kubelet_volume_stats_capacity_bytes
> 0.95 > 0.95
for: 1m for: 1m
labels: labels:
@ -144,6 +155,52 @@ prometheus:
enabled: {prometheus} enabled: {prometheus}
"#, "#,
); );
let mut null_receiver = Mapping::new();
null_receiver.insert(
Value::String("receiver".to_string()),
Value::String("null".to_string()),
);
null_receiver.insert(
Value::String("matchers".to_string()),
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
);
null_receiver.insert(Value::String("continue".to_string()), Value::Bool(true));
let mut alert_manager_channel_config = AlertManagerConfig {
global: Mapping::new(),
route: AlertManagerRoute {
routes: vec![Value::Mapping(null_receiver)],
},
receivers: vec![serde_yaml::from_str("name: 'null'").unwrap()],
};
for receiver in config.alert_receiver_configs.iter() {
if let Some(global) = receiver.channel_global_config.clone() {
alert_manager_channel_config
.global
.insert(global.0, global.1);
}
alert_manager_channel_config
.route
.routes
.push(receiver.channel_route.clone());
alert_manager_channel_config
.receivers
.push(receiver.channel_receiver.clone());
}
let alert_manager_values = AlertManagerValues {
alertmanager: AlertManager {
enabled: config.alert_manager,
config: alert_manager_channel_config,
},
};
let alert_manager_yaml =
serde_yaml::to_string(&alert_manager_values).expect("Failed to serialize YAML");
debug!("serialized alert manager: \n {:#}", alert_manager_yaml);
values.push_str(&alert_manager_yaml);
debug!("full values.yaml: \n {:#}", values);
HelmChartScore { HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()), namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
release_name: NonBlankString::from_str("kube-prometheus").unwrap(), release_name: NonBlankString::from_str("kube-prometheus").unwrap(),

View File

@ -1,7 +1,8 @@
use std::sync::{Arc, Mutex};
use serde::Serialize; use serde::Serialize;
use crate::{ use crate::{
modules::monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
score::Score, score::Score,
topology::{ topology::{
HelmCommand, Topology, HelmCommand, Topology,
@ -9,7 +10,7 @@ use crate::{
}, },
}; };
use super::prometheus::Prometheus; use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
#[derive(Clone, Debug, Serialize)] #[derive(Clone, Debug, Serialize)]
pub struct HelmPrometheusAlertingScore { pub struct HelmPrometheusAlertingScore {
@ -19,14 +20,12 @@ pub struct HelmPrometheusAlertingScore {
impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore { impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> { fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
Box::new(AlertingInterpret { Box::new(AlertingInterpret {
sender: Prometheus {}, sender: Prometheus {
receivers: vec![Box::new(DiscordWebhook { config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
url: todo!(), },
name: todo!(), receivers: self.receivers.clone(),
})],
}) })
} }
fn name(&self) -> String { fn name(&self) -> String {
"HelmPrometheusAlertingScore".to_string() "HelmPrometheusAlertingScore".to_string()
} }
@ -40,8 +39,9 @@ impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
todo!() todo!()
} }
} }
impl Clone for Box<dyn AlertReceiver<Prometheus>> { impl Clone for Box<dyn AlertReceiver<Prometheus>> {
fn clone(&self) -> Self { fn clone(&self) -> Self {
todo!() self.clone_box()
} }
} }

View File

@ -1,34 +1,86 @@
use std::sync::{Arc, Mutex};
use async_trait::async_trait; use async_trait::async_trait;
use log::debug;
use crate::{ use crate::{
interpret::InterpretError, interpret::{InterpretError, Outcome},
topology::{installable::Installable, oberservability::monitoring::AlertSender}, inventory::Inventory,
score,
topology::{
HelmCommand, Topology, installable::Installable, oberservability::monitoring::AlertSender,
},
}; };
impl AlertSender for Prometheus {} use score::Score;
use super::{
helm::{
config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
},
types::AlertManagerChannelConfig,
};
#[async_trait] #[async_trait]
impl Installable for Prometheus { impl AlertSender for Prometheus {
async fn ensure_installed(&self) -> Result<(), InterpretError> { fn name(&self) -> String {
todo!() "HelmKubePrometheus".to_string()
} }
} }
#[async_trait]
impl<T: Topology + HelmCommand> Installable<T> for Prometheus {
async fn ensure_installed(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<(), InterpretError> {
//install_prometheus
self.install_prometheus(inventory, topology).await?;
Ok(())
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct Prometheus; pub struct Prometheus {
pub config: Arc<Mutex<KubePrometheusConfig>>,
}
impl Prometheus { impl Prometheus {
pub async fn install_receiver( pub async fn install_receiver(
&self, &self,
prometheus_receiver: PrometheusReceiver, prometheus_receiver: &dyn PrometheusReceiver,
) -> Result<(), InterpretError> { ) -> Result<Outcome, InterpretError> {
todo!() let prom_receiver = prometheus_receiver.configure_receiver().await;
debug!(
"adding alert receiver to prometheus config: {:#?}",
&prom_receiver
);
let mut config = self.config.lock().unwrap();
config.alert_receiver_configs.push(prom_receiver);
let prom_receiver_name = prometheus_receiver.name();
debug!("installed alert receiver {}", &prom_receiver_name);
Ok(Outcome::success(format!(
"Sucessfully installed receiver {}",
prom_receiver_name
)))
}
pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
kube_prometheus_helm_chart_score(self.config.clone())
.create_interpret()
.execute(inventory, topology)
.await
} }
} }
pub struct PrometheusReceiver {} #[async_trait]
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
impl PrometheusReceiver { fn name(&self) -> String;
fn get_prometheus_receiver_config(&self) {} async fn configure_receiver(&self) -> AlertManagerChannelConfig;
} }
pub struct AlertChannelGlobalConfig {}

View File

@ -1,12 +1,40 @@
use async_trait::async_trait;
use serde::Serialize; use serde::Serialize;
use serde_yaml::{Mapping, Sequence, Value};
#[derive(Serialize)] #[async_trait]
pub struct AlertReceiverRoute { pub trait AlertChannelConfig {
pub receiver: String, async fn get_config(&self) -> AlertManagerChannelConfig;
pub matchers: Vec<String>,
#[serde(default)]
pub r#continue: bool,
} }
pub struct AlertChannelReceiver {
pub name: String, #[derive(Debug, Clone, Serialize)]
pub struct AlertManagerValues {
pub alertmanager: AlertManager,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManager {
pub enabled: bool,
pub config: AlertManagerConfig,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerConfig {
pub global: Mapping,
pub route: AlertManagerRoute,
pub receivers: Sequence,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerRoute {
pub routes: Sequence,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelConfig {
///expecting an option that contains two values
///if necessary for the alertchannel
///[ jira_api_url: <string> ]
pub channel_global_config: Option<(Value, Value)>,
pub channel_route: Value,
pub channel_receiver: Value,
} }