monitoringalerting #37

Merged
wjro merged 6 commits from monitoringalerting into master 2025-05-21 17:32:27 +00:00
6 changed files with 161 additions and 19 deletions
Showing only changes of commit b4c6848433 - Show all commits

10
Cargo.lock generated
View File

@ -936,6 +936,15 @@ dependencies = [
"zeroize",
]
[[package]]
name = "email_address"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e079f19b08ca6239f47f8ba8509c11cf3ea30095831f7fed61441475edd8c449"
dependencies = [
"serde",
]
[[package]]
name = "encoding_rs"
version = "0.8.35"
@ -1396,6 +1405,7 @@ dependencies = [
"derive-new",
"directories",
"dockerfile_builder",
"email_address",
"env_logger",
"harmony_macros",
"harmony_types",

View File

@ -39,3 +39,4 @@ lazy_static = "1.5.0"
dockerfile_builder = "0.1.5"
temp-file = "0.1.9"
convert_case.workspace = true
email_address = "0.2.9"

View File

@ -0,0 +1,47 @@
use email_address::EmailAddress;
use serde::Serialize;
use url::Url;
#[derive(Debug, Clone, Serialize)]
pub struct KubePrometheusConfig {
pub namespace: String,
pub node_exporter: bool,
pub alert_manager: bool,
pub prometheus: bool,
pub grafana: bool,
pub windows_monitoring: bool,
pub kubernetes_service_monitors: bool,
pub kubelet: bool,
pub kube_controller_manager: bool,
pub kube_etcd: bool,
pub kube_proxy: bool,
pub kube_state_metrics: bool,
pub prometheus_operator: bool,
pub webhook_url: Option<Url>,
pub webhook_service_name: Option<String>,
pub smpt_email_address: Option<EmailAddress>,
pub smtp_service_name: Option<String>,
}
impl KubePrometheusConfig {
pub fn new() -> Self {
Self {
namespace: "monitoring".into(),
node_exporter: false,
alert_manager: false,
prometheus: true,
grafana: true,
windows_monitoring: false,
kubernetes_service_monitors: true,
kubelet: true,
kube_controller_manager: true,
kube_etcd: true,
kube_proxy: true,
kube_state_metrics: true,
prometheus_operator: true,
webhook_url: None,
webhook_service_name: None,
smpt_email_address: None,
smtp_service_name: None,
}
}
}

View File

@ -1,10 +1,10 @@
use std::str::FromStr;
use super::config::KubePrometheusConfig;
use non_blank_string_rs::NonBlankString;
use std::{collections::HashMap, str::FromStr};
use crate::modules::helm::chart::HelmChartScore;
pub fn kube_prometheus_score(ns: &str) -> HelmChartScore {
pub fn kube_prometheus_score(config: &KubePrometheusConfig) -> HelmChartScore {
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
//to the overrides or something leaving the user to deal with formatting here seems bad
let values = r#"
@ -32,8 +32,40 @@ additionalPrometheusRulesMap:
description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days.
title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days
"#;
let mut values_overrides: HashMap<NonBlankString, String> = HashMap::new();
macro_rules! insert_flag {

Using a macro here is weird at the very least.

It makes code not easily readable insert_flag is way less obvious to me what it does than values_overrides.insert(...) would

Use a function if you want to make the NonBlankString boilerplate more readable. And if you want to write a macro that does something valuable you should validate the NonBlankString at compile time like we do with out ip! and mac_address! macros.

Using a macro here is weird at the very least. It makes code not easily readable `insert_flag` is way less obvious to me what it does than `values_overrides.insert(...)` would Use a function if you want to make the NonBlankString boilerplate more readable. And if you want to write a macro that does something valuable you should validate the NonBlankString at compile time like we do with out ip! and mac_address! macros.
($key:expr, $val:expr) => {
values_overrides.insert(NonBlankString::from_str($key).unwrap(), $val.to_string());
};
}
insert_flag!("nodeExporter.enabled", config.node_exporter);
insert_flag!("windowsMonitoring.enabled", config.windows_monitoring);
insert_flag!("grafana.enabled", config.grafana);
insert_flag!("alertmanager.enabled", config.alert_manager);
insert_flag!("prometheus.enabled", config.prometheus);
insert_flag!(
"kubernetes_service_monitors.enabled",
config.kubernetes_service_monitors
);
insert_flag!("kubelet.enabled", config.kubelet);
insert_flag!(
"kubeControllerManager.enabled",
config.kube_controller_manager
);
insert_flag!("kubeProxy.enabled", config.kube_proxy);
insert_flag!("kubeEtcd.enabled", config.kube_etcd);
insert_flag!("kubeStateMetrics.enabled", config.kube_state_metrics);
insert_flag!("prometheusOperator.enabled", config.prometheus_operator);
if let (Some(url), Some(name)) = (&config.webhook_url, &config.webhook_service_name) {
insert_flag!("alertmanager.config.receivers.webhook_configs.url", url.as_str());
insert_flag!("alertmanager.config.receivers.name", name.as_str());
}
HelmChartScore {
namespace: Some(NonBlankString::from_str(ns).unwrap()),
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
chart_name: NonBlankString::from_str(
"oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", //use kube prometheus chart which includes grafana, prometheus, alert
@ -41,7 +73,7 @@ additionalPrometheusRulesMap:
)
.unwrap(),
chart_version: None,
values_overrides: None,
values_overrides: Some(values_overrides),
values_yaml: Some(values.to_string()),

Why use values_overrides here when there already is a values_yaml right above? Would make it a lot more readable I think.

Why use values_overrides here when there already is a values_yaml right above? Would make it a lot more readable I think.
create_namespace: true,
install_only: true,

View File

@ -1,2 +1,3 @@
mod kube_prometheus;
pub mod monitoring_alerting;
mod config;

View File

@ -1,44 +1,95 @@
use email_address::EmailAddress;
use serde::Serialize;
use url::Url;
use crate::{
interpret::Interpret,
modules::helm::chart::HelmChartScore,
score::Score,
topology::{HelmCommand, Topology},
};
use super::kube_prometheus::kube_prometheus_score;
use super::{config::KubePrometheusConfig, kube_prometheus::kube_prometheus_score};
#[derive(Debug, Clone, Serialize)]
pub enum AlertChannel {

Nice, but did you test smtp? If it's not working yet there should be a // TODO comment or something like that.

I would expect to see server, username, password or something like that here. Then there is a bajillion providers with no standards. Emails are not a simple topic. Maybe we should even only support Webhook for now.

Nice, but did you test smtp? If it's not working yet there should be a // TODO comment or something like that. I would expect to see server, username, password or something like that here. Then there is a bajillion providers with no standards. Emails are not a simple topic. Maybe we should even only support Webhook for now.
WebHookUrl {
url: Url,
service_name: String,
},
Smpt {
email_address: EmailAddress,
service_name: String,
johnride marked this conversation as resolved
Review

What is service_name used for? As this is user facing, a bit of rust doc (with triple slashes ///) to describe how to use would be useful here.

What is service_name used for? As this is user facing, a bit of rust doc (with triple slashes ///) to describe how to use would be useful here.
},
}
#[derive(Debug, Clone, Serialize)]
pub enum Stack {
KubePrometheusStack,
OtherStack,
}
#[derive(Debug, Clone, Serialize)]
pub struct MonitoringAlertingStackScore {
// TODO Support other components in our monitoring/alerting stack instead of a single helm
// chart
pub monitoring_stack: HelmChartScore,
pub namespace: String,
pub alert_channel: Option<AlertChannel>,
pub monitoring_stack: Stack,
johnride marked this conversation as resolved
Review

No need for this monitoring_stack: Stack field for now. This is a case of YAGNI. We support only one type and I don't see in the very short term a use case that would force us.

Ron Jeffries, a co-founder of XP, explained the philosophy: "Always implement things when you actually need them, never when you just foresee that you [will] need them."[8] John Carmack wrote "It is hard for less experienced developers to appreciate how rarely architecting for future requirements / applications turns out net-positive."[9]
https://en.wikipedia.org/wiki/You_aren%27t_gonna_need_it


Another note, I'm realising now something that slipped by me yesterday :

I think we should have two scores for monitoring : cluster monitoring and app monitoring. The one we want with LampScore is ApplicationMonitoringScore. It comes with quite a bit of boilerplate but at least it makes sense to deploy it anywhere. Then we also provide a ClusterMonitoringScore. Under the hood they could very well use the same Interpret and work together efficiently.

No need for this `monitoring_stack: Stack` field for now. This is a case of YAGNI. We support only one type and I don't see in the very short term a use case that would force us. > Ron Jeffries, a co-founder of XP, explained the philosophy: "Always implement things when you actually need them, never when you just foresee that you [will] need them."[8] John Carmack wrote "It is hard for less experienced developers to appreciate how rarely architecting for future requirements / applications turns out net-positive."[9] > https://en.wikipedia.org/wiki/You_aren%27t_gonna_need_it --- Another note, I'm realising now something that slipped by me yesterday : I think we should have two scores for monitoring : cluster monitoring and app monitoring. The one we want with LampScore is ApplicationMonitoringScore. It comes with quite a bit of boilerplate but at least it makes sense to deploy it anywhere. Then we also provide a ClusterMonitoringScore. Under the hood they could very well use the same Interpret and work together efficiently.
}
impl MonitoringAlertingStackScore {
pub fn new_with_ns(ns: &str) -> Self {
Self {
monitoring_stack: kube_prometheus_score(ns),
namespace: ns.to_string(),
fn match_alert_channel(&self, config: &mut KubePrometheusConfig) {
if let Some(alert_channel) = &self.alert_channel {
match alert_channel {
//opt1
AlertChannel::WebHookUrl { url, service_name } => {
config.webhook_url = Some(url.clone());
config.webhook_service_name = Some(service_name.clone());
}
//opt2

Poor function name. What's the use of this function?

Something like configure_alerts maybe?

Poor function name. What's the use of this function? Something like `configure_alerts` maybe?
AlertChannel::Smpt {
email_address,
service_name,

Useless comments

Useless comments
} => {
config.smpt_email_address = Some(email_address.clone());
config.smtp_service_name = Some(service_name.clone());
}
}
}
}
}
// pub fn new_with_ns(ns: &str) -> Self {
// let mut config = KubePrometheusConfig::default();
// let namespace = ns.to_string();
// config.namespace = namespace;
// let score = kube_prometheus_score(&config);
// Self {
// alert_channel: None,
// monitoring_stack: Some(Stack::KubePrometheusStack),
// }
// }
//}
impl Default for MonitoringAlertingStackScore {
fn default() -> Self {
let ns = "monitoring";
Self {
monitoring_stack: kube_prometheus_score(ns),
namespace: ns.to_string(),
alert_channel: None,
monitoring_stack: Stack::KubePrometheusStack,
}
}
}
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
self.monitoring_stack.create_interpret()
match &self.monitoring_stack {
Stack::KubePrometheusStack => {
let mut config = KubePrometheusConfig::new();
self.match_alert_channel(&mut config);
let score = kube_prometheus_score(&config);
score.create_interpret()
}
Stack::OtherStack => {
todo!()
}
}
}
fn name(&self) -> String {