monitoringalerting #37
10
Cargo.lock
generated
@ -936,6 +936,15 @@ dependencies = [
|
|||||||
"zeroize",
|
"zeroize",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "email_address"
|
||||||
|
version = "0.2.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e079f19b08ca6239f47f8ba8509c11cf3ea30095831f7fed61441475edd8c449"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encoding_rs"
|
name = "encoding_rs"
|
||||||
version = "0.8.35"
|
version = "0.8.35"
|
||||||
@ -1396,6 +1405,7 @@ dependencies = [
|
|||||||
"derive-new",
|
"derive-new",
|
||||||
"directories",
|
"directories",
|
||||||
"dockerfile_builder",
|
"dockerfile_builder",
|
||||||
|
"email_address",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"harmony_macros",
|
"harmony_macros",
|
||||||
"harmony_types",
|
"harmony_types",
|
||||||
|
|||||||
@ -39,3 +39,4 @@ lazy_static = "1.5.0"
|
|||||||
dockerfile_builder = "0.1.5"
|
dockerfile_builder = "0.1.5"
|
||||||
temp-file = "0.1.9"
|
temp-file = "0.1.9"
|
||||||
convert_case.workspace = true
|
convert_case.workspace = true
|
||||||
|
email_address = "0.2.9"
|
||||||
|
|||||||
47
harmony/src/modules/monitoring/config.rs
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
use email_address::EmailAddress;
|
||||||
|
use serde::Serialize;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct KubePrometheusConfig {
|
||||||
|
pub namespace: String,
|
||||||
|
pub node_exporter: bool,
|
||||||
|
pub alert_manager: bool,
|
||||||
|
pub prometheus: bool,
|
||||||
|
pub grafana: bool,
|
||||||
|
pub windows_monitoring: bool,
|
||||||
|
pub kubernetes_service_monitors: bool,
|
||||||
|
pub kubelet: bool,
|
||||||
|
pub kube_controller_manager: bool,
|
||||||
|
pub kube_etcd: bool,
|
||||||
|
pub kube_proxy: bool,
|
||||||
|
pub kube_state_metrics: bool,
|
||||||
|
pub prometheus_operator: bool,
|
||||||
|
pub webhook_url: Option<Url>,
|
||||||
|
pub webhook_service_name: Option<String>,
|
||||||
|
pub smpt_email_address: Option<EmailAddress>,
|
||||||
|
pub smtp_service_name: Option<String>,
|
||||||
|
}
|
||||||
|
impl KubePrometheusConfig {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
namespace: "monitoring".into(),
|
||||||
|
node_exporter: false,
|
||||||
|
alert_manager: false,
|
||||||
|
prometheus: true,
|
||||||
|
grafana: true,
|
||||||
|
windows_monitoring: false,
|
||||||
|
kubernetes_service_monitors: true,
|
||||||
|
kubelet: true,
|
||||||
|
kube_controller_manager: true,
|
||||||
|
kube_etcd: true,
|
||||||
|
kube_proxy: true,
|
||||||
|
kube_state_metrics: true,
|
||||||
|
prometheus_operator: true,
|
||||||
|
webhook_url: None,
|
||||||
|
webhook_service_name: None,
|
||||||
|
smpt_email_address: None,
|
||||||
|
smtp_service_name: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,10 +1,10 @@
|
|||||||
use std::str::FromStr;
|
use super::config::KubePrometheusConfig;
|
||||||
|
|
||||||
use non_blank_string_rs::NonBlankString;
|
use non_blank_string_rs::NonBlankString;
|
||||||
|
use std::{collections::HashMap, str::FromStr};
|
||||||
|
|
||||||
use crate::modules::helm::chart::HelmChartScore;
|
use crate::modules::helm::chart::HelmChartScore;
|
||||||
|
|
||||||
pub fn kube_prometheus_score(ns: &str) -> HelmChartScore {
|
pub fn kube_prometheus_score(config: &KubePrometheusConfig) -> HelmChartScore {
|
||||||
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
|
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
|
||||||
//to the overrides or something leaving the user to deal with formatting here seems bad
|
//to the overrides or something leaving the user to deal with formatting here seems bad
|
||||||
let values = r#"
|
let values = r#"
|
||||||
@ -32,8 +32,40 @@ additionalPrometheusRulesMap:
|
|||||||
description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days.
|
description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days.
|
||||||
title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days
|
title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days
|
||||||
"#;
|
"#;
|
||||||
|
let mut values_overrides: HashMap<NonBlankString, String> = HashMap::new();
|
||||||
|
|
||||||
|
macro_rules! insert_flag {
|
||||||
|
($key:expr, $val:expr) => {
|
||||||
|
values_overrides.insert(NonBlankString::from_str($key).unwrap(), $val.to_string());
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
insert_flag!("nodeExporter.enabled", config.node_exporter);
|
||||||
|
insert_flag!("windowsMonitoring.enabled", config.windows_monitoring);
|
||||||
|
insert_flag!("grafana.enabled", config.grafana);
|
||||||
|
insert_flag!("alertmanager.enabled", config.alert_manager);
|
||||||
|
insert_flag!("prometheus.enabled", config.prometheus);
|
||||||
|
insert_flag!(
|
||||||
|
"kubernetes_service_monitors.enabled",
|
||||||
|
config.kubernetes_service_monitors
|
||||||
|
);
|
||||||
|
insert_flag!("kubelet.enabled", config.kubelet);
|
||||||
|
insert_flag!(
|
||||||
|
"kubeControllerManager.enabled",
|
||||||
|
config.kube_controller_manager
|
||||||
|
);
|
||||||
|
insert_flag!("kubeProxy.enabled", config.kube_proxy);
|
||||||
|
insert_flag!("kubeEtcd.enabled", config.kube_etcd);
|
||||||
|
insert_flag!("kubeStateMetrics.enabled", config.kube_state_metrics);
|
||||||
|
insert_flag!("prometheusOperator.enabled", config.prometheus_operator);
|
||||||
|
|
||||||
|
if let (Some(url), Some(name)) = (&config.webhook_url, &config.webhook_service_name) {
|
||||||
|
insert_flag!("alertmanager.config.receivers.webhook_configs.url", url.as_str());
|
||||||
|
insert_flag!("alertmanager.config.receivers.name", name.as_str());
|
||||||
|
}
|
||||||
|
|
||||||
HelmChartScore {
|
HelmChartScore {
|
||||||
namespace: Some(NonBlankString::from_str(ns).unwrap()),
|
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
|
||||||
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
||||||
chart_name: NonBlankString::from_str(
|
chart_name: NonBlankString::from_str(
|
||||||
"oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", //use kube prometheus chart which includes grafana, prometheus, alert
|
"oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", //use kube prometheus chart which includes grafana, prometheus, alert
|
||||||
@ -41,7 +73,7 @@ additionalPrometheusRulesMap:
|
|||||||
)
|
)
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
chart_version: None,
|
chart_version: None,
|
||||||
values_overrides: None,
|
values_overrides: Some(values_overrides),
|
||||||
values_yaml: Some(values.to_string()),
|
values_yaml: Some(values.to_string()),
|
||||||
create_namespace: true,
|
create_namespace: true,
|
||||||
install_only: true,
|
install_only: true,
|
||||||
|
|||||||
@ -1,2 +1,3 @@
|
|||||||
mod kube_prometheus;
|
mod kube_prometheus;
|
||||||
pub mod monitoring_alerting;
|
pub mod monitoring_alerting;
|
||||||
|
mod config;
|
||||||
|
|||||||
@ -1,44 +1,95 @@
|
|||||||
|
use email_address::EmailAddress;
|
||||||
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
interpret::Interpret,
|
interpret::Interpret,
|
||||||
modules::helm::chart::HelmChartScore,
|
|
||||||
score::Score,
|
score::Score,
|
||||||
topology::{HelmCommand, Topology},
|
topology::{HelmCommand, Topology},
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::kube_prometheus::kube_prometheus_score;
|
use super::{config::KubePrometheusConfig, kube_prometheus::kube_prometheus_score};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub enum AlertChannel {
|
||||||
|
WebHookUrl {
|
||||||
|
url: Url,
|
||||||
|
service_name: String,
|
||||||
|
},
|
||||||
|
Smpt {
|
||||||
|
email_address: EmailAddress,
|
||||||
|
service_name: String,
|
||||||
|
johnride marked this conversation as resolved
|
|||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub enum Stack {
|
||||||
|
KubePrometheusStack,
|
||||||
|
OtherStack,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct MonitoringAlertingStackScore {
|
pub struct MonitoringAlertingStackScore {
|
||||||
// TODO Support other components in our monitoring/alerting stack instead of a single helm
|
pub alert_channel: Option<AlertChannel>,
|
||||||
// chart
|
pub monitoring_stack: Stack,
|
||||||
|
johnride marked this conversation as resolved
johnride
commented
No need for this
Another note, I'm realising now something that slipped by me yesterday : I think we should have two scores for monitoring : cluster monitoring and app monitoring. The one we want with LampScore is ApplicationMonitoringScore. It comes with quite a bit of boilerplate but at least it makes sense to deploy it anywhere. Then we also provide a ClusterMonitoringScore. Under the hood they could very well use the same Interpret and work together efficiently. No need for this `monitoring_stack: Stack` field for now. This is a case of YAGNI. We support only one type and I don't see in the very short term a use case that would force us.
> Ron Jeffries, a co-founder of XP, explained the philosophy: "Always implement things when you actually need them, never when you just foresee that you [will] need them."[8] John Carmack wrote "It is hard for less experienced developers to appreciate how rarely architecting for future requirements / applications turns out net-positive."[9]
> https://en.wikipedia.org/wiki/You_aren%27t_gonna_need_it
---
Another note, I'm realising now something that slipped by me yesterday :
I think we should have two scores for monitoring : cluster monitoring and app monitoring. The one we want with LampScore is ApplicationMonitoringScore. It comes with quite a bit of boilerplate but at least it makes sense to deploy it anywhere. Then we also provide a ClusterMonitoringScore. Under the hood they could very well use the same Interpret and work together efficiently.
|
|||||||
pub monitoring_stack: HelmChartScore,
|
|
||||||
pub namespace: String,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MonitoringAlertingStackScore {
|
impl MonitoringAlertingStackScore {
|
||||||
pub fn new_with_ns(ns: &str) -> Self {
|
fn match_alert_channel(&self, config: &mut KubePrometheusConfig) {
|
||||||
Self {
|
if let Some(alert_channel) = &self.alert_channel {
|
||||||
monitoring_stack: kube_prometheus_score(ns),
|
match alert_channel {
|
||||||
namespace: ns.to_string(),
|
//opt1
|
||||||
|
AlertChannel::WebHookUrl { url, service_name } => {
|
||||||
|
config.webhook_url = Some(url.clone());
|
||||||
|
config.webhook_service_name = Some(service_name.clone());
|
||||||
|
}
|
||||||
|
//opt2
|
||||||
|
AlertChannel::Smpt {
|
||||||
|
email_address,
|
||||||
|
service_name,
|
||||||
|
} => {
|
||||||
|
config.smpt_email_address = Some(email_address.clone());
|
||||||
|
config.smtp_service_name = Some(service_name.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// pub fn new_with_ns(ns: &str) -> Self {
|
||||||
|
// let mut config = KubePrometheusConfig::default();
|
||||||
|
// let namespace = ns.to_string();
|
||||||
|
// config.namespace = namespace;
|
||||||
|
// let score = kube_prometheus_score(&config);
|
||||||
|
// Self {
|
||||||
|
// alert_channel: None,
|
||||||
|
// monitoring_stack: Some(Stack::KubePrometheusStack),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
impl Default for MonitoringAlertingStackScore {
|
impl Default for MonitoringAlertingStackScore {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
let ns = "monitoring";
|
|
||||||
Self {
|
Self {
|
||||||
monitoring_stack: kube_prometheus_score(ns),
|
alert_channel: None,
|
||||||
namespace: ns.to_string(),
|
monitoring_stack: Stack::KubePrometheusStack,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
|
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
|
||||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||||
self.monitoring_stack.create_interpret()
|
match &self.monitoring_stack {
|
||||||
|
Stack::KubePrometheusStack => {
|
||||||
|
let mut config = KubePrometheusConfig::new();
|
||||||
|
self.match_alert_channel(&mut config);
|
||||||
|
let score = kube_prometheus_score(&config);
|
||||||
|
score.create_interpret()
|
||||||
|
}
|
||||||
|
Stack::OtherStack => {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn name(&self) -> String {
|
fn name(&self) -> String {
|
||||||
|
|||||||
What is service_name used for? As this is user facing, a bit of rust doc (with triple slashes ///) to describe how to use would be useful here.