feat/teams-integration #40

Closed
wjro wants to merge 5 commits from feat/teams-integration into master
15 changed files with 5499 additions and 211 deletions

View File

@@ -16,3 +16,5 @@ harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
typetag = "0.2.20"
serde = "1.0.219"

View File

@@ -4,9 +4,7 @@ use harmony::{
maestro::Maestro,
modules::{
lamp::{LAMPConfig, LAMPScore},
monitoring::monitoring_alerting::{
AlertChannel, MonitoringAlertingStackScore, WebhookServiceType,
},
monitoring::{kube_prometheus::prometheus_alert_channel::{DiscordChannel, SlackChannel}, monitoring_alerting::MonitoringAlertingScore},
},
topology::{K8sAnywhereTopology, Url},
};
@@ -34,28 +32,42 @@ async fn main() {
},
};
// You can choose the type of Topology you want, we suggest starting with the
// K8sAnywhereTopology as it is the most automatic one that enables you to easily deploy
// locally, to development environment from a CI, to staging, and to production with settings
// that automatically adapt to each environment grade.
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize (
Inventory::autoload(),
K8sAnywhereTopology::new(),
)
.await
.unwrap();
let url = url::Url::parse("https://discord.com/api/webhooks/dummy_channel/dummy_token")
.expect("invalid URL");
let url = url::Url::parse(
"https://hooks.slack.com/services/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",
)
.expect("invalid URL");
let mut monitoring_stack_score = MonitoringAlertingStackScore::new();
let mut monitoring_stack_score = MonitoringAlertingScore::new();
monitoring_stack_score.namespace = Some(lamp_stack.config.namespace.clone());
monitoring_stack_score.alert_channel = Some(AlertChannel::WebHookUrl {
url: url,
webhook_service_type: WebhookServiceType::Discord,
});
monitoring_stack_score.alert_channels = vec![(Box::new(SlackChannel {
name: "alert-test".to_string(),
webhook_url: url,})),
(Box::new(DiscordChannel {
name: "discord".to_string(),
webhook_url: url::Url::parse("https://discord.com/api/webhooks/1372994201746276462/YRn4TA9pj8ve3lfmyj1j0Yx97i92gv4U_uavt4CV4_SSIVArYUqfDzMOmzSTic2d8XSL").expect("invalid URL"),}))];
maestro.register_all(vec![Box::new(lamp_stack), Box::new(monitoring_stack_score)]);
//TODO in process of testing
//webhook depricated in MSTeams August 2025
//(AlertChannel::MSTeams {
// connector: "alert-test".to_string(),
// webhook_url: url::Url::parse("").expect("invalid URL"),
//}),
maestro.register_all(vec![Box::new(monitoring_stack_score)]);
// Here we bootstrap the CLI, this gives some nice features if you need them
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@@ -0,0 +1,14 @@
[package]
johnride marked this conversation as resolved
Review

This should be behind a feature flag such as "msteams-deprecated"

This should be behind a feature flag such as "msteams-deprecated"
Review

Oh it's in the examples, forget this.

Oh it's in the examples, forget this.
name = "ms_teams_alert_channel"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
serde = "1.0.219"
tokio.workspace = true
typetag = "0.2.20"
url.workspace = true

View File

@@ -0,0 +1,65 @@
mod prometheus_msteams;
use harmony::{
interpret::InterpretError, inventory::Inventory, maestro::Maestro, modules::{helm::chart::HelmChartScore, monitoring::{kube_prometheus::{prometheus_alert_channel::PrometheusAlertChannel, types::{AlertChannelConfig, AlertChannelReceiver, AlertChannelRoute, WebhookConfig}}, monitoring_alerting::MonitoringAlertingScore}}, topology::K8sAnywhereTopology
};
use prometheus_msteams::prometheus_msteams_score;
use url::Url;
use serde::{Serialize, Deserialize};
#[tokio::main]
async fn main() {
let alert_channels: Vec<Box<dyn PrometheusAlertChannel>> = vec![Box::new(MSTeamsChannel {
connector: "teams-test".to_string(),
webhook_url: url::Url::parse(
"https://msteams.com/services/dummy/dummy/dummy",
)
.expect("invalid URL"),
})];
let monitoring_score = MonitoringAlertingScore {
alert_channels,
namespace: None,
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::new(),
)
.await
.unwrap();
maestro.register_all(vec![Box::new(monitoring_score)]);
harmony_cli::init(maestro, None).await.unwrap();
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct MSTeamsChannel {
connector: String,
webhook_url: Url,
}
#[typetag::serde]
Review

typetag doesn't seem to be a good idea here. Why do you use it? Show me an actual use case that requires it versus having a trait bound + Serialize on the PrometheusAlertChannel trait.

typetag doesn't seem to be a good idea here. Why do you use it? Show me an actual use case that requires it versus having a trait bound `+ Serialize` on the PrometheusAlertChannel trait.
impl PrometheusAlertChannel for MSTeamsChannel {
fn get_alert_manager_config_contribution(&self) -> Result<AlertChannelConfig, InterpretError> {
Ok(AlertChannelConfig{
receiver: AlertChannelReceiver{
name: format!("MSTeams-{}",self.connector),
slack_configs: None,
webhook_configs: Some(vec![WebhookConfig{
url: url::Url::parse("http://prometheus-msteams-prometheus-msteams.monitoring.svc.cluster.local:2000/alertmanager").expect("invalid url"),
send_resolved: true,}])
},
route: AlertChannelRoute{
receiver: format!("MSTeams-{}", self.connector),
matchers: vec!["alertname!=Watchdog".to_string()],
r#continue: true,
},
global_config: None, })
}
fn get_dependency_score(&self, ns: String) -> Option<HelmChartScore> {
Some(prometheus_msteams_score(self.connector.clone(), self.webhook_url.clone(), ns.clone()))
}
}

View File

@@ -0,0 +1,30 @@
use std::str::FromStr;
use harmony::modules::helm::chart::{HelmChartScore, NonBlankString};
use url::Url;
pub fn prometheus_msteams_score(
name: String,
webhook_url: Url,
namespace: String,
) -> HelmChartScore {
let values = format!(
r#"
connectors:
- default: "{webhook_url}"
"#,
);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
release_name: NonBlankString::from_str(&name).unwrap(),
chart_name: NonBlankString::from_str("oci://hub.nationtech.io/library/prometheus-msteams")
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values.to_string()),
create_namespace: true,
install_only: true,
repository: None,
}
}

View File

@@ -49,3 +49,5 @@ fqdn = { version = "0.4.6", features = [
"serde",
] }
temp-dir = "0.1.14"
typetag = "0.2.20"
dyn-clone = "1.0.19"

View File

@@ -1,14 +1,16 @@
use serde::Serialize;
use super::monitoring_alerting::AlertChannel;
use super::kube_prometheus::{prometheus_alert_channel::PrometheusAlertChannel, types::AlertManagerValues};
#[derive(Debug, Clone, Serialize)]
pub struct KubePrometheusConfig {
pub struct KubePrometheusChartConfig {
pub namespace: String,
pub default_rules: bool,
pub windows_monitoring: bool,
pub alert_manager: bool,
pub alert_manager_values: AlertManagerValues,
pub node_exporter: bool,
pub prometheus: bool,
pub grafana: bool,
@@ -22,16 +24,17 @@ pub struct KubePrometheusConfig {
pub kube_proxy: bool,
pub kube_state_metrics: bool,
pub prometheus_operator: bool,
pub alert_channel: Vec<AlertChannel>,
pub alert_channels: Vec<Box<dyn PrometheusAlertChannel>>,
}
impl KubePrometheusConfig {
impl KubePrometheusChartConfig {
pub fn new() -> Self {
Self {
namespace: "monitoring".into(),
default_rules: true,
windows_monitoring: false,
alert_manager: true,
alert_channel: Vec::new(),
alert_manager_values: AlertManagerValues::default(),
alert_channels: Vec::new(),
grafana: true,
node_exporter: false,
prometheus: true,

View File

@@ -1,46 +1,32 @@
use std::str::FromStr;
use non_blank_string_rs::NonBlankString;
use url::Url;
use crate::modules::helm::chart::HelmChartScore;
use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel};
fn get_discord_alert_manager_score(config: &KubePrometheusConfig) -> Option<HelmChartScore> {
let (url, name) = config.alert_channel.iter().find_map(|channel| {
if let AlertChannel::Discord { webhook_url, name } = channel {
Some((webhook_url, name))
} else {
None
}
})?;
pub fn discord_alert_manager_score(name: String, webhook: Url, namespace: String) -> HelmChartScore {
let url = webhook;
let values = format!(
r#"
environment:
- name: "DISCORD_WEBHOOK"
value: "{url}"
"#,
environment:
- name: "DISCORD_WEBHOOK"
value: "{url}"
"#,
);
Some(HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
HelmChartScore {
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
release_name: NonBlankString::from_str(&name).unwrap(),
chart_name: NonBlankString::from_str("oci://hub.nationtech.io/library/alertmanager-discord")
.unwrap(),
chart_name: NonBlankString::from_str(
"oci://hub.nationtech.io/library/alertmanager-discord",
)
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values.to_string()),
create_namespace: true,
install_only: true,
repository: None,
})
}
pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore {
if let Some(chart) = get_discord_alert_manager_score(config) {
chart
} else {
panic!("Expected discord alert manager helm chart");
}
}

View File

@@ -1,17 +1,14 @@
use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel};
use crate::modules::{helm::chart::HelmChartScore, monitoring::config::KubePrometheusChartConfig};
use log::info;
use non_blank_string_rs::NonBlankString;
use std::{collections::HashMap, str::FromStr};
use url::Url;
use serde_yaml::{self};
use std::str::FromStr;
use crate::modules::helm::chart::HelmChartScore;
pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmChartScore {
pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusChartConfig) -> HelmChartScore {
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
//to the overrides or something leaving the user to deal with formatting here seems bad
let default_rules = config.default_rules.to_string();
let windows_monitoring = config.windows_monitoring.to_string();
let alert_manager = config.alert_manager.to_string();
let grafana = config.grafana.to_string();
let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string();
let kubernetes_api_server = config.kubernetes_api_server.to_string();
@@ -25,6 +22,7 @@ pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmCh
let node_exporter = config.node_exporter.to_string();
let prometheus_operator = config.prometheus_operator.to_string();
let prometheus = config.prometheus.to_string();
let alert_manager_values = config.alert_manager_values.clone();
let mut values = format!(
r#"
additionalPrometheusRulesMap:
@@ -142,68 +140,16 @@ prometheusOperator:
enabled: {prometheus_operator}
prometheus:
enabled: {prometheus}
prometheusSpec:
maximumStartupDurationSeconds: 240
"#,
);
let alertmanager_config = alert_manager_yaml_builder(&config);
values.push_str(&alertmanager_config);
let alert_manager_yaml = serde_yaml::to_string(&alert_manager_values).expect("Failed to serialize YAML");
values.push_str(&alert_manager_yaml);
fn alert_manager_yaml_builder(config: &KubePrometheusConfig) -> String {
let mut receivers = String::new();
let mut routes = String::new();
let mut global_configs = String::new();
let alert_manager = config.alert_manager;
for alert_channel in &config.alert_channel {
match alert_channel {
AlertChannel::Discord { name, .. } => {
let (receiver, route) = discord_alert_builder(name);
info!("discord receiver: {} \nroute: {}", receiver, route);
receivers.push_str(&receiver);
routes.push_str(&route);
}
AlertChannel::Slack {
slack_channel,
webhook_url,
} => {
let (receiver, route) = slack_alert_builder(slack_channel);
info!("slack receiver: {} \nroute: {}", receiver, route);
receivers.push_str(&receiver);
routes.push_str(&route);
let global_config = format!(
r#"
global:
slack_api_url: {webhook_url}"#
);
global_configs.push_str(&global_config);
}
AlertChannel::Smpt { .. } => todo!(),
}
}
info!("after alert receiver: {}", receivers);
info!("after alert routes: {}", routes);
let alertmanager_config = format!(
r#"
alertmanager:
enabled: {alert_manager}
config: {global_configs}
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
routes:
{routes}
receivers:
- name: 'null'
{receivers}"#
);
info!("alert manager config: {}", alertmanager_config);
alertmanager_config
}
info!("{}", values);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
@@ -220,43 +166,3 @@ alertmanager:
repository: None,
}
}
fn discord_alert_builder(release_name: &String) -> (String, String) {
let discord_receiver_name = format!("Discord-{}", release_name);
let receiver = format!(
r#"
- name: '{discord_receiver_name}'
webhook_configs:
- url: 'http://{release_name}-alertmanager-discord:9094'
send_resolved: true"#,
);
let route = format!(
r#"
- receiver: '{discord_receiver_name}'
matchers:
- alertname!=Watchdog
continue: true"#,
);
(receiver, route)
}
fn slack_alert_builder(slack_channel: &String) -> (String, String) {
let slack_receiver_name = format!("Slack-{}", slack_channel);
let receiver = format!(
r#"
- name: '{slack_receiver_name}'
slack_configs:
- channel: '{slack_channel}'
send_resolved: true
title: '{{{{ .CommonAnnotations.title }}}}'
text: '{{{{ .CommonAnnotations.description }}}}'"#,
);
let route = format!(
r#"
- receiver: '{slack_receiver_name}'
matchers:
- alertname!=Watchdog
continue: true"#,
);
(receiver, route)
}

View File

@@ -0,0 +1,3 @@
pub mod kube_prometheus;
pub mod types;
pub mod prometheus_alert_channel;

View File

@@ -0,0 +1,140 @@
use crate::{
interpret::InterpretError,
modules::{
helm::chart::HelmChartScore,
monitoring::{
discord_alert_manager::discord_alert_manager_score,
kube_prometheus::types::{
AlertChannelConfig, AlertChannelGlobalConfig, AlertChannelReceiver,
AlertChannelRoute, SlackConfig, WebhookConfig,
},
},
},
};
use dyn_clone::DynClone;
use serde::{Deserialize, Serialize};
use std::fmt::Debug;
use url::Url;
#[typetag::serde(tag = "channel_type")]
Review

Yeah I don't think this will work. Show me the actual json that this produces and why you need to use this.

Always be extremely careful when introducing a new dependency in the project. If we need a new dependency it can mean two things :

  1. There is a new pattern / use case that emerged. This might be the case here but I don't think so, we already have a pattern for serializable traits that is binding the + Serialize requirement to the trait like we do with the Score trait.
  2. We did something wrong in the rest of the codebase and introducing the dependency means that we must refactor every other place in the codebase that does this thing wrong
Yeah I don't think this will work. Show me the actual json that this produces and why you need to use this. Always be extremely careful when introducing a new dependency in the project. If we need a new dependency it can mean two things : 1. There is a new pattern / use case that emerged. This might be the case here but I don't think so, we already have a pattern for serializable traits that is binding the `+ Serialize` requirement to the trait like we do with the Score trait. 2. We did something wrong in the rest of the codebase and introducing the dependency means that we must refactor every other place in the codebase that does this thing wrong
#[async_trait::async_trait]
pub trait PrometheusAlertChannel: DynClone + Debug + Send + Sync {
Review

I think PrometheusAlertChannel is a good name for the trait. However both function names feel off :

get_alert_manager_config_contribution : what does this mean? What is the word contribution doing here? I feel like the return type is correct or almost correct but the function name is weird. From my understanding here this trait's job is to provide a serializable alert channel yaml configuration that can be insterted in a list of prometheus alert channels.

get_dependency_score : this feels very wrong. I'm pretty sure this violates the Single Responsibility Principle as well as Harmony's architecture guidelines.

Please add a rustdoc explaining what is the purpose of this trait. Then review the SRP and make sure that it is followed here.

I think PrometheusAlertChannel is a good name for the trait. However both function names feel off : get_alert_manager_config_contribution : what does this mean? What is the word `contribution` doing here? I feel like the return type is correct or almost correct but the function name is weird. From my understanding here this trait's job is to provide a serializable alert channel yaml configuration that can be insterted in a list of prometheus alert channels. get_dependency_score : this feels very wrong. I'm pretty sure this violates the Single Responsibility Principle as well as Harmony's architecture guidelines. Please add a rustdoc explaining what is the purpose of this trait. Then review the SRP and make sure that it is followed here.
fn get_alert_manager_config_contribution(&self) -> Result<AlertChannelConfig, InterpretError>;
fn get_dependency_score(&self, namespace:String) -> Option<HelmChartScore>;
}
dyn_clone::clone_trait_object!(PrometheusAlertChannel);
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiscordChannel {
pub name: String,
pub webhook_url: Url,
}
#[typetag::serde]
impl PrometheusAlertChannel for DiscordChannel {
fn get_alert_manager_config_contribution(&self) -> Result<AlertChannelConfig, InterpretError> {
let service_url = format!("http://{}-alertmanager-discord:9094", &self.name);
Ok(AlertChannelConfig {
receiver: AlertChannelReceiver {
name: format!("Discord-{}", self.name),
slack_configs: None,
webhook_configs: Some(vec![WebhookConfig {
url: url::Url::parse(&service_url)
.expect("invalid url"),
send_resolved: true,
}]),
},
route: AlertChannelRoute {
receiver: format!("Discord-{}", self.name),
matchers: vec!["alertname!=Watchdog".to_string()],
r#continue: true,
},
global_config: None,
})
}
fn get_dependency_score(&self, namespace: String) -> Option<HelmChartScore> {
Some(discord_alert_manager_score(self.name.clone(), self.webhook_url.clone(), namespace.clone()))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SlackChannel {
pub name: String,
pub webhook_url: Url,
}
#[typetag::serde]
impl PrometheusAlertChannel for SlackChannel {
fn get_alert_manager_config_contribution(&self) -> Result<AlertChannelConfig, InterpretError> {
Ok(AlertChannelConfig {
receiver: AlertChannelReceiver {
name: format!("Slack-{}", self.name),
slack_configs: Some(vec![SlackConfig {
channel: self.name.clone(),
send_resolved: true,
title: "{{ .CommonAnnotations.title }}".to_string(),
text: ">-
*Alert:* {{ .CommonLabels.alertname }}
*Severity:* {{ .CommonLabels.severity }}
*Namespace:* {{ .CommonLabels.namespace }}
*Pod:* {{ .CommonLabels.pod }}
*ExternalURL:* {{ .ExternalURL }}
{{ range .Alerts }}
*Instance:* {{ .Labels.instance }}
*Summary:* {{ .Annotations.summary }}
*Description:* {{ .Annotations.description }}
*Starts At:* {{ .StartsAt }}
*Status:* {{ .Status }}
{{ end }}"
.to_string(),
}]),
webhook_configs: None,
},
route: AlertChannelRoute {
receiver: format!("Slack-{}", self.name),
matchers: vec!["alertname!=Watchdog".to_string()],
r#continue: true,
},
global_config: Some(AlertChannelGlobalConfig {
slack_api_url: Some(self.webhook_url.clone()),
}),
})
}
fn get_dependency_score(&self, _namespace: String) -> Option<HelmChartScore> {
None
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NullReceiver {}
impl NullReceiver {
pub fn new() -> Self {
Self {}
}
}
#[typetag::serde]
impl PrometheusAlertChannel for NullReceiver {
fn get_alert_manager_config_contribution(&self) -> Result<AlertChannelConfig, InterpretError> {
Ok(AlertChannelConfig {
receiver: AlertChannelReceiver {
name: "null".to_string(),
slack_configs: None,
webhook_configs: None,
},
route: AlertChannelRoute {
receiver: "null".to_string(),
matchers: vec!["alertname=Watchdog".to_string()],
r#continue: false,
},
global_config: None,
})
}
fn get_dependency_score(&self, _namespace: String) -> Option<HelmChartScore> {
None
}
}

View File

@@ -0,0 +1,94 @@
use serde::{Deserialize, Serialize};
Review

This entire file looks pretty good 👍

Almost all types seem to describe the proper abstractions of prometheus types.

However, the implementation specific types should be bundled with their respective implementations. Such as SlackConfig and WebhookConfig.

This entire file looks pretty good 👍 Almost all types seem to describe the proper abstractions of prometheus types. However, the implementation specific types should be bundled with their respective implementations. Such as SlackConfig and WebhookConfig.
use url::Url;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertManagerValues {
pub alertmanager: AlertManager,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertManager {
pub enabled: bool,
pub config: AlertManagerConfig,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AlertChannelConfig {
pub receiver: AlertChannelReceiver,
pub route: AlertChannelRoute,
pub global_config: Option<AlertChannelGlobalConfig>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertChannelReceiver {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub slack_configs: Option<Vec<SlackConfig>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub webhook_configs: Option<Vec<WebhookConfig>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertManagerRoute {
pub group_by: Vec<String>,
pub group_wait: String,
pub group_interval: String,
pub repeat_interval: String,
pub routes: Vec<AlertChannelRoute>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertChannelGlobalConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub slack_api_url: Option<Url>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SlackConfig {
pub channel: String,
pub send_resolved: bool,
pub title: String,
pub text: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebhookConfig {
pub url: Url,
pub send_resolved: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertChannelRoute {
pub receiver: String,
pub matchers: Vec<String>,
#[serde(default)]
pub r#continue: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertManagerConfig {
pub global: Option<AlertChannelGlobalConfig>,
pub route: AlertManagerRoute,
pub receivers: Vec<AlertChannelReceiver>,
}
impl AlertManagerValues {
pub fn default() -> Self {
Self {
alertmanager: AlertManager {
enabled: true,
config: AlertManagerConfig {
global: None,
route: AlertManagerRoute {
group_by: vec!["job".to_string()],
group_wait: "30s".to_string(),
group_interval: "5m".to_string(),
repeat_interval: "12h".to_string(),
routes: vec![AlertChannelRoute{ receiver: "null".to_string(), matchers: vec!["alertname=Watchdog".to_string()], r#continue: false }],
},
receivers: vec![AlertChannelReceiver{ name: "null".to_string(), slack_configs: None, webhook_configs: None }],
},
},
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,5 @@
mod kube_prometheus;
pub mod monitoring_alerting;
mod discord_alert_manager;
mod config;
mod discord_alert_manager;
pub mod kube_prometheus;
pub mod monitoring_alerting;

View File

@@ -1,59 +1,46 @@
use async_trait::async_trait;
use email_address::EmailAddress;
use log::info;
use serde::Serialize;
use url::Url;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
modules::monitoring::kube_prometheus::types::{
AlertManager, AlertManagerConfig, AlertManagerRoute,
},
score::Score,
topology::{HelmCommand, Topology},
};
use super::{
config::KubePrometheusConfig, discord_alert_manager::discord_alert_manager_score,
kube_prometheus::kube_prometheus_helm_chart_score,
config::KubePrometheusChartConfig,
kube_prometheus::{
kube_prometheus::kube_prometheus_helm_chart_score,
prometheus_alert_channel::{NullReceiver, PrometheusAlertChannel},
types::AlertManagerValues,
},
};
#[derive(Debug, Clone, Serialize)]
pub enum AlertChannel {
Discord {
name: String,
webhook_url: Url,
},
Slack {
slack_channel: String,
webhook_url: Url,
},
//TODO test and implement in helm chart
//currently does not work
Smpt {
email_address: EmailAddress,
service_name: String,
},
}
#[derive(Debug, Clone, Serialize)]
pub struct MonitoringAlertingStackScore {
pub alert_channel: Vec<AlertChannel>,
pub struct MonitoringAlertingScore {
pub alert_channels: Vec<Box<dyn PrometheusAlertChannel>>,
pub namespace: Option<String>,
}
impl MonitoringAlertingStackScore {
impl MonitoringAlertingScore {
pub fn new() -> Self {
Self {
alert_channel: Vec::new(),
alert_channels: Vec::new(),
namespace: None,
}
}
}
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(MonitoringAlertingStackInterpret {
Box::new(MonitoringAlertingInterpret {
score: self.clone(),
})
}
@@ -63,17 +50,61 @@ impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
}
#[derive(Debug, Clone, Serialize)]
struct MonitoringAlertingStackInterpret {
score: MonitoringAlertingStackScore,
struct MonitoringAlertingInterpret {
score: MonitoringAlertingScore,
}
impl MonitoringAlertingStackInterpret {
async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig {
let mut config = KubePrometheusConfig::new();
impl MonitoringAlertingInterpret {
async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusChartConfig {
let mut config = KubePrometheusChartConfig::new();
let mut receivers = Vec::new();
let mut routes = Vec::new();
let mut global_config = None;
if let Some(ns) = &self.score.namespace {
config.namespace = ns.clone();
};
let null_channel = NullReceiver::new();
Review

Is this really required? This feels wrong, at least it would deserve a small comment explaining why you are ALWAYS instanciating a hardcoded NullReceiver

Is this really required? This feels wrong, at least it would deserve a small comment explaining why you are ALWAYS instanciating a hardcoded NullReceiver
let null_channel = null_channel
.get_alert_manager_config_contribution()
.unwrap();
receivers.push(null_channel.receiver);
routes.push(null_channel.route);
for channel in self.score.alert_channels.clone() {
let alert_manager_config_contribution =
channel.get_alert_manager_config_contribution().unwrap();
receivers.push(alert_manager_config_contribution.receiver);
routes.push(alert_manager_config_contribution.route);
if let Some(global) = alert_manager_config_contribution.global_config {
global_config = Some(global);
}
}
config.alert_channel = self.score.alert_channel.clone();
info!("after alert receiver: {:#?}", receivers);
info!("after alert routes: {:#?}", routes);
let alert_manager_config = AlertManagerConfig {
global: global_config,
route: AlertManagerRoute {
Review

Why is all this config hardcoded? Shouldn't it be exposed in the Score's configuration? I think it should have defaults so that users are not forced to set everything by themselves but it should not be hardcoded.

Why is all this config hardcoded? Shouldn't it be exposed in the Score's configuration? I think it should have defaults so that users are not forced to set everything by themselves but it should not be hardcoded.
group_by: vec!["job".to_string()],
group_wait: "30s".to_string(),
group_interval: "5m".to_string(),
repeat_interval: "12h".to_string(),
routes,
},
receivers,
};
info!("alert manager config: {:?}", config);
config.alert_manager_values = AlertManagerValues {
alertmanager: AlertManager {
enabled: true,
config: alert_manager_config,
},
};
config
}
@@ -81,7 +112,7 @@ impl MonitoringAlertingStackInterpret {
&self,
inventory: &Inventory,
topology: &T,
config: &KubePrometheusConfig,
config: &KubePrometheusChartConfig,
) -> Result<Outcome, InterpretError> {
let helm_chart = kube_prometheus_helm_chart_score(config);
helm_chart
@@ -90,33 +121,29 @@ impl MonitoringAlertingStackInterpret {
.await
}
async fn deploy_alert_channel_service<T: Topology + HelmCommand>(
async fn deploy_alert_channel_dependencies<T: Topology + HelmCommand>(
Review

I am 99% sure this is wrong.

Dependencies should be handled at the Topology level as capabilities. But I'm not 100% sure because this has the drawback of not being "just in time" when the score is executed. This would be happening right at the Topology's initialization.

However, this implementation here has many drawbacks :

The PrometheusAlertChannel is handling dependencies in a unique way, that is not following the rest of Harmony's architecture.

Take the K8sAnywhereTopology for example : K8sResourceScore has a K8sClient dependency, which is provided by K8sAnywhere. Then, upon initialization, K8sAnywhere will make sure that it is fulfilling is contract by making sure that there is a K8sClient available, either by installing K3d locally or verifying that it is able to reach whatever cluster it is connected to.


This means that I would have expected this architecture :

trait PrometheusAlertChannel : Serialize {
  fn get_config(&self) -> Option<AlertChannelConfig>;
}

struct PrometheusAlertChannelInterpret {
  channel_config: AlertChannelConfig
}

#[derive(Serialize)]
struct DiscordAlertChannel<T: DiscordAlertSender> { // Use the generic here to specify the capability required by the DiscordAlertChannel. Then, in turn the PrometheusScore will be built with a `T` bound which will be the Topology in the end, and this Topology will be forced to implement the correct capability. Then it is guaranteed that when you deploy a DiscordAlertChannel on a given topology, this topology will manage the dependency.
//
// There might be some compilation detail I'm missing here, but this would be a lot more robust than the current implementation

}

impl PrometheusAlertChannel for DiscordAlertChannel {
 // TODO
}
I am 99% sure this is wrong. Dependencies should be handled at the Topology level as capabilities. But I'm not 100% sure because this has the drawback of not being "just in time" when the score is executed. This would be happening right at the Topology's initialization. However, this implementation here has many drawbacks : The PrometheusAlertChannel is handling dependencies in a unique way, that is not following the rest of Harmony's architecture. Take the K8sAnywhereTopology for example : K8sResourceScore has a K8sClient dependency, which is provided by K8sAnywhere. Then, upon initialization, K8sAnywhere will make sure that it is fulfilling is contract by making sure that there is a K8sClient available, either by installing K3d locally or verifying that it is able to reach whatever cluster it is connected to. --- This means that I would have expected this architecture : ```rust trait PrometheusAlertChannel : Serialize { fn get_config(&self) -> Option<AlertChannelConfig>; } struct PrometheusAlertChannelInterpret { channel_config: AlertChannelConfig } #[derive(Serialize)] struct DiscordAlertChannel<T: DiscordAlertSender> { // Use the generic here to specify the capability required by the DiscordAlertChannel. Then, in turn the PrometheusScore will be built with a `T` bound which will be the Topology in the end, and this Topology will be forced to implement the correct capability. Then it is guaranteed that when you deploy a DiscordAlertChannel on a given topology, this topology will manage the dependency. // // There might be some compilation detail I'm missing here, but this would be a lot more robust than the current implementation } impl PrometheusAlertChannel for DiscordAlertChannel { // TODO }
&self,
inventory: &Inventory,
topology: &T,
config: &KubePrometheusConfig,
config: &KubePrometheusChartConfig,
) -> Result<Outcome, InterpretError> {
let mut outcomes = vec![];
let mut outcomes = Vec::new();
for channel in &self.score.alert_channel {
let outcome = match channel {
AlertChannel::Discord { .. } => {
discord_alert_manager_score(config)
.create_interpret()
.execute(inventory, topology)
.await
for channel in &self.score.alert_channels {
let ns = config.namespace.clone();
if let Some(dependency_score) = channel.get_dependency_score(ns) {
match dependency_score
.create_interpret()
.execute(inventory, topology)
.await
{
Ok(outcome) => outcomes.push(outcome),
Err(e) => {
info!("failed to deploy dependency: {}", { &e });
return Err(e);
}
}
AlertChannel::Slack { .. } => Ok(Outcome::success(
"No extra configs for slack alerting".to_string(),
)),
AlertChannel::Smpt { .. } => {
todo!()
}
};
outcomes.push(outcome);
}
for result in outcomes {
result?;
}
}
Ok(Outcome::success("All alert channels deployed".to_string()))
@@ -124,22 +151,22 @@ impl MonitoringAlertingStackInterpret {
}
#[async_trait]
impl<T: Topology + HelmCommand> Interpret<T> for MonitoringAlertingStackInterpret {
impl<T: Topology + HelmCommand> Interpret<T> for MonitoringAlertingInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let config = self.build_kube_prometheus_helm_chart_config().await;
info!("Built kube prometheus config");
info!("Built kube prometheus config{:?}", config);
info!("Installing kube prometheus chart");
self.deploy_kube_prometheus_helm_chart_score(inventory, topology, &config)
.await?;
info!("Installing alert channel service");
self.deploy_alert_channel_service(inventory, topology, &config)
self.deploy_alert_channel_dependencies(inventory, topology, &config)
.await?;
Ok(Outcome::success(format!(
"succesfully deployed monitoring and alerting stack"
"succesfully deployed monitoring and alerting score"
)))
}