feat:added Slack notifications support #38
@ -1,8 +1,7 @@
|
|||||||
use email_address::EmailAddress;
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
use super::monitoring_alerting::WebhookServiceType;
|
use super::monitoring_alerting::AlertChannel;
|
||||||
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct KubePrometheusConfig {
|
pub struct KubePrometheusConfig {
|
||||||
@ -23,9 +22,7 @@ pub struct KubePrometheusConfig {
|
|||||||
pub kube_proxy: bool,
|
pub kube_proxy: bool,
|
||||||
pub kube_state_metrics: bool,
|
pub kube_state_metrics: bool,
|
||||||
pub prometheus_operator: bool,
|
pub prometheus_operator: bool,
|
||||||
pub webhook_url: Option<Url>,
|
pub alert_channel: Option<AlertChannel>,
|
||||||
pub webhook_service_type: Option<WebhookServiceType>,
|
|
||||||
pub discord_alert_manager_release_name: String,
|
|
||||||
}
|
}
|
||||||
impl KubePrometheusConfig {
|
impl KubePrometheusConfig {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
@ -34,8 +31,7 @@ impl KubePrometheusConfig {
|
|||||||
default_rules: true,
|
default_rules: true,
|
||||||
windows_monitoring: false,
|
windows_monitoring: false,
|
||||||
alert_manager: true,
|
alert_manager: true,
|
||||||
webhook_service_type: None,
|
alert_channel: None,
|
||||||
webhook_url: None,
|
|
||||||
grafana: true,
|
grafana: true,
|
||||||
node_exporter: false,
|
node_exporter: false,
|
||||||
prometheus: true,
|
prometheus: true,
|
||||||
@ -49,7 +45,6 @@ impl KubePrometheusConfig {
|
|||||||
prometheus_operator: true,
|
prometheus_operator: true,
|
||||||
core_dns: false,
|
core_dns: false,
|
||||||
kube_scheduler: false,
|
kube_scheduler: false,
|
||||||
discord_alert_manager_release_name: "discord-alert-manager".into(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,13 +4,14 @@ use non_blank_string_rs::NonBlankString;
|
|||||||
|
|
||||||
use crate::modules::helm::chart::HelmChartScore;
|
use crate::modules::helm::chart::HelmChartScore;
|
||||||
|
|
||||||
use super::config::KubePrometheusConfig;
|
use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel};
|
||||||
|
|
||||||
pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore {
|
pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore {
|
||||||
let url = if let Some(url) = &config.webhook_url {
|
let (url, release_name) = match &config.alert_channel {
|
||||||
|
|
|||||||
url.to_string()
|
Some(AlertChannel::Discord { webhook_url, name }) => {
|
||||||
} else {
|
(webhook_url.to_string(), name.to_string())
|
||||||
"None".to_string()
|
}
|
||||||
|
_ => panic!("Expected Discord alert channel configuration"),
|
||||||
};
|
};
|
||||||
|
|
||||||
let values = format!(
|
let values = format!(
|
||||||
@ -24,7 +25,7 @@ environment:
|
|||||||
|
|
||||||
HelmChartScore {
|
HelmChartScore {
|
||||||
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
|
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
|
||||||
release_name: NonBlankString::from_str(&config.discord_alert_manager_release_name).unwrap(),
|
release_name: NonBlankString::from_str(&release_name).unwrap(),
|
||||||
chart_name: NonBlankString::from_str("oci://hub.nationtech.io/nt/alertmanager-discord")
|
chart_name: NonBlankString::from_str("oci://hub.nationtech.io/nt/alertmanager-discord")
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
chart_version: None,
|
chart_version: None,
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
use super::{config::KubePrometheusConfig, monitoring_alerting::WebhookServiceType};
|
use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel};
|
||||||
|
use log::info;
|
||||||
use non_blank_string_rs::NonBlankString;
|
use non_blank_string_rs::NonBlankString;
|
||||||
|
use url::Url;
|
||||||
use std::{collections::HashMap, str::FromStr};
|
use std::{collections::HashMap, str::FromStr};
|
||||||
|
|
||||||
use crate::modules::helm::chart::HelmChartScore;
|
use crate::modules::helm::chart::HelmChartScore;
|
||||||
@ -10,14 +12,6 @@ pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmCh
|
|||||||
let default_rules = config.default_rules.to_string();
|
let default_rules = config.default_rules.to_string();
|
||||||
let windows_monitoring = config.windows_monitoring.to_string();
|
let windows_monitoring = config.windows_monitoring.to_string();
|
||||||
let alert_manager = config.alert_manager.to_string();
|
let alert_manager = config.alert_manager.to_string();
|
||||||
let webhook_service_type = if let Some(service) = &config.webhook_service_type {
|
|
||||||
match service {
|
|
||||||
WebhookServiceType::Discord => "Discord".to_string(),
|
|
||||||
WebhookServiceType::Slack => "Slack".to_string(),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
"None".to_string()
|
|
||||||
};
|
|
||||||
let grafana = config.grafana.to_string();
|
let grafana = config.grafana.to_string();
|
||||||
let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string();
|
let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string();
|
||||||
let kubernetes_api_server = config.kubernetes_api_server.to_string();
|
let kubernetes_api_server = config.kubernetes_api_server.to_string();
|
||||||
@ -31,8 +25,7 @@ pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmCh
|
|||||||
let node_exporter = config.node_exporter.to_string();
|
let node_exporter = config.node_exporter.to_string();
|
||||||
let prometheus_operator = config.prometheus_operator.to_string();
|
let prometheus_operator = config.prometheus_operator.to_string();
|
||||||
let prometheus = config.prometheus.to_string();
|
let prometheus = config.prometheus.to_string();
|
||||||
let discord_alert_manager_release_name = config.discord_alert_manager_release_name.to_string();
|
let mut values = format!(
|
||||||
let values = format!(
|
|
||||||
r#"
|
r#"
|
||||||
additionalPrometheusRulesMap:
|
additionalPrometheusRulesMap:
|
||||||
pods-status-alerts:
|
pods-status-alerts:
|
||||||
@ -45,23 +38,23 @@ additionalPrometheusRulesMap:
|
|||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
title: "[CRIT] POD not healthy : {{ $labels.pod }}"
|
title: "[CRIT] POD not healthy : {{{{ $labels.pod }}}}"
|
||||||
description: |
|
description: |
|
||||||
A POD is in a non-ready state!
|
A POD is in a non-ready state!
|
||||||
- **Pod**: {{ $labels.pod }}
|
- **Pod**: {{{{ $labels.pod }}}}
|
||||||
- **Namespace**: {{ $labels.namespace }}
|
- **Namespace**: {{{{ $labels.namespace }}}}
|
||||||
- alert: "[CRIT] POD crash looping"
|
- alert: "[CRIT] POD crash looping"
|
||||||
expr: increase(kube_pod_container_status_restarts_total[5m]) > 3
|
expr: increase(kube_pod_container_status_restarts_total[5m]) > 3
|
||||||
for: 0m
|
for: 0m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
title: "[CRIT] POD crash looping : {{ $labels.pod }}"
|
title: "[CRIT] POD crash looping : {{{{ $labels.pod }}}}"
|
||||||
description: |
|
description: |
|
||||||
A POD is drowning in a crash loop!
|
A POD is drowning in a crash loop!
|
||||||
- **Pod**: {{ $labels.pod }}
|
- **Pod**: {{{{ $labels.pod }}}}
|
||||||
- **Namespace**: {{ $labels.namespace }}
|
- **Namespace**: {{{{ $labels.namespace }}}}
|
||||||
- **Instance**: {{ $labels.instance }}
|
- **Instance**: {{{{ $labels.instance }}}}
|
||||||
pvc-alerts:
|
pvc-alerts:
|
||||||
groups:
|
groups:
|
||||||
- name: pvc-alerts
|
- name: pvc-alerts
|
||||||
@ -82,8 +75,8 @@ additionalPrometheusRulesMap:
|
|||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days.
|
description: The PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} is predicted to fill over 95% in less than 2 days.
|
||||||
title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days
|
title: PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} will fill over 95% in less than 2 days
|
||||||
defaultRules:
|
defaultRules:
|
||||||
create: {default_rules}
|
create: {default_rules}
|
||||||
rules:
|
rules:
|
||||||
@ -131,7 +124,6 @@ alertmanager:
|
|||||||
group_wait: 30s
|
group_wait: 30s
|
||||||
group_interval: 5m
|
group_interval: 5m
|
||||||
repeat_interval: 12h
|
repeat_interval: 12h
|
||||||
receiver: '{webhook_service_type}'
|
|
||||||
routes:
|
routes:
|
||||||
- receiver: 'null'
|
- receiver: 'null'
|
||||||
matchers:
|
matchers:
|
||||||
@ -139,10 +131,6 @@ alertmanager:
|
|||||||
continue: false
|
continue: false
|
||||||
receivers:
|
receivers:
|
||||||
- name: 'null'
|
- name: 'null'
|
||||||
- name: '{webhook_service_type}'
|
|
||||||
webhook_configs:
|
|
||||||
- url: 'http://{discord_alert_manager_release_name}-alertmanager-discord:9094'
|
|
||||||
send_resolved: true
|
|
||||||
grafana:
|
grafana:
|
||||||
enabled: {grafana}
|
enabled: {grafana}
|
||||||
kubernetesServiceMonitors:
|
kubernetesServiceMonitors:
|
||||||
@ -172,6 +160,16 @@ prometheus:
|
|||||||
"#,
|
"#,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if let Some(alert_channel) = &config.alert_channel {
|
||||||
|
match alert_channel {
|
||||||
|
AlertChannel::Discord { name, .. } => {
|
||||||
|
values.push_str(&discord_alert_builder(name).to_string());}
|
||||||
|
AlertChannel::Slack { slack_channel, webhook_url } => {
|
||||||
|
values.push_str(&slack_alert_builder(slack_channel, webhook_url).to_string());}
|
||||||
|
AlertChannel::Smpt { .. } => todo!(),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
HelmChartScore {
|
HelmChartScore {
|
||||||
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
|
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
|
||||||
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
||||||
@ -187,3 +185,44 @@ prometheus:
|
|||||||
repository: None,
|
repository: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn discord_alert_builder(release_name: &String) -> String {
|
||||||
|
let discord_alert_manager_release_name = release_name;
|
||||||
|
let discord_alert_values = format!(
|
||||||
|
r#"
|
||||||
|
alertmanager:
|
||||||
|
config:
|
||||||
|
route:
|
||||||
|
receiver: 'Discord'
|
||||||
|
receivers:
|
||||||
|
- name: 'null'
|
||||||
|
- name: 'Discord'
|
||||||
|
webhook_configs:
|
||||||
|
- url: 'http://{discord_alert_manager_release_name}-alertmanager-discord:9094'
|
||||||
|
send_resolved: true
|
||||||
|
"#,
|
||||||
|
);
|
||||||
|
discord_alert_values
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slack_alert_builder(slack_channel: &String, webhook_url: &Url) -> String {
|
||||||
|
let slack_alert_values = format!(
|
||||||
|
r#"
|
||||||
|
alertmanager:
|
||||||
|
config:
|
||||||
|
global:
|
||||||
|
slack_api_url: {webhook_url}
|
||||||
|
route:
|
||||||
|
receiver: 'Slack'
|
||||||
|
receivers:
|
||||||
|
- name: 'null'
|
||||||
|
- name: 'Slack'
|
||||||
|
slack_configs:
|
||||||
|
- channel: '{slack_channel}'
|
||||||
|
send_resolved: true
|
||||||
|
title: '{{{{ .CommonAnnotations.title }}}}'
|
||||||
|
text: '{{{{ .CommonAnnotations.description }}}}'
|
||||||
|
johnride
commented
Yeah, these two functions here The fact that their names share the same semantics is a great givaway. Also, returning (String, String) is pretty weak. I guess there should be a type or you should build a custom type for that. Then this type can Derive Serialize. Yeah, these two functions here `discord_alert_builder` and `slack_alert_builder` here should be implementations of a trait.
The fact that their names share the same semantics is a great givaway.
Also, returning (String, String) is pretty weak. I guess there should be a type or you should build a custom type for that. Then this type can Derive Serialize.
|
|||||||
|
"#,
|
||||||
|
);
|
||||||
|
slack_alert_values
|
||||||
|
}
|
||||||
|
|||||||
@ -20,9 +20,13 @@ use super::{
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub enum AlertChannel {
|
pub enum AlertChannel {
|
||||||
WebHookUrl {
|
Discord {
|
||||||
url: Url,
|
name: String,
|
||||||
webhook_service_type: WebhookServiceType,
|
webhook_url: Url,
|
||||||
|
},
|
||||||
|
Slack {
|
||||||
|
slack_channel: String,
|
||||||
|
webhook_url: Url,
|
||||||
},
|
},
|
||||||
//TODO test and implement in helm chart
|
//TODO test and implement in helm chart
|
||||||
//currently does not work
|
//currently does not work
|
||||||
@ -32,13 +36,6 @@ pub enum AlertChannel {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
|
||||||
pub enum WebhookServiceType {
|
|
||||||
Discord,
|
|
||||||
//TODO test slack notifications
|
|
||||||
Slack,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct MonitoringAlertingStackScore {
|
pub struct MonitoringAlertingStackScore {
|
||||||
pub alert_channel: Option<AlertChannel>,
|
pub alert_channel: Option<AlertChannel>,
|
||||||
@ -52,26 +49,6 @@ impl MonitoringAlertingStackScore {
|
|||||||
namespace: None,
|
namespace: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn set_alert_channel(&self, config: &mut KubePrometheusConfig) {
|
|
||||||
if let Some(alert_channel) = &self.alert_channel {
|
|
||||||
match alert_channel {
|
|
||||||
AlertChannel::WebHookUrl {
|
|
||||||
url,
|
|
||||||
webhook_service_type,
|
|
||||||
} => {
|
|
||||||
config.webhook_url = Some(url.clone());
|
|
||||||
config.webhook_service_type = Some(webhook_service_type.clone());
|
|
||||||
}
|
|
||||||
AlertChannel::Smpt {
|
|
||||||
//TODO setup smpt alerts
|
|
||||||
email_address,
|
|
||||||
service_name,
|
|
||||||
} => {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
|
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
|
||||||
@ -93,10 +70,10 @@ struct MonitoringAlertingStackInterpret {
|
|||||||
impl MonitoringAlertingStackInterpret {
|
impl MonitoringAlertingStackInterpret {
|
||||||
async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig {
|
async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig {
|
||||||
let mut config = KubePrometheusConfig::new();
|
let mut config = KubePrometheusConfig::new();
|
||||||
self.score.set_alert_channel(&mut config);
|
|
||||||
if let Some(ns) = &self.score.namespace {
|
if let Some(ns) = &self.score.namespace {
|
||||||
config.namespace = ns.clone();
|
config.namespace = ns.clone();
|
||||||
}
|
}
|
||||||
|
config.alert_channel = self.score.alert_channel.clone();
|
||||||
config
|
config
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -120,23 +97,20 @@ impl MonitoringAlertingStackInterpret {
|
|||||||
config: &KubePrometheusConfig,
|
config: &KubePrometheusConfig,
|
||||||
) -> Result<Outcome, InterpretError> {
|
) -> Result<Outcome, InterpretError> {
|
||||||
match &self.score.alert_channel {
|
match &self.score.alert_channel {
|
||||||
Some(AlertChannel::WebHookUrl {
|
Some(channel) => match channel {
|
||||||
webhook_service_type,
|
AlertChannel::Discord { .. } => {
|
||||||
..
|
|
||||||
}) => match webhook_service_type {
|
|
||||||
WebhookServiceType::Discord => {
|
|
||||||
discord_alert_manager_score(config)
|
discord_alert_manager_score(config)
|
||||||
.create_interpret()
|
.create_interpret()
|
||||||
.execute(inventory, topology)
|
.execute(inventory, topology)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
WebhookServiceType::Slack => Ok(Outcome::success(
|
AlertChannel::Slack { .. } => Ok(Outcome::success(
|
||||||
"No extra configs for slack alerting".to_string(),
|
"No extra configs for slack alerting".to_string(),
|
||||||
)),
|
)),
|
||||||
|
AlertChannel::Smpt { .. } => {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
},
|
},
|
||||||
Some(AlertChannel::Smpt { .. }) => {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
None => Ok(Outcome::success("No alert channel configured".to_string())),
|
None => Ok(Outcome::success("No alert channel configured".to_string())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -155,7 +129,8 @@ impl<T: Topology + HelmCommand> Interpret<T> for MonitoringAlertingStackInterpre
|
|||||||
self.deploy_kube_prometheus_helm_chart_score(inventory, topology, &config)
|
self.deploy_kube_prometheus_helm_chart_score(inventory, topology, &config)
|
||||||
.await?;
|
.await?;
|
||||||
info!("Installing alert channel service");
|
info!("Installing alert channel service");
|
||||||
self.deploy_alert_channel_service(inventory, topology, &config).await?;
|
self.deploy_alert_channel_service(inventory, topology, &config)
|
||||||
|
.await?;
|
||||||
Ok(Outcome::success(format!(
|
Ok(Outcome::success(format!(
|
||||||
"succesfully deployed monitoring and alerting stack"
|
"succesfully deployed monitoring and alerting stack"
|
||||||
)))
|
)))
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user
Since we're supporting a list, shouldn't we use filter_map instead so we can handle all the instances at once?
Also this is a smell to me. All the handlers should implement the same trait. For example :
Ce serait une facon de simplifier la gestion de chacun des types ensuite. C'est plus type safe et moins error prone.