feat: Initial setup for monitoring and alerting #48
@@ -49,3 +49,4 @@ fqdn = { version = "0.4.6", features = [
|
||||
"serde",
|
||||
] }
|
||||
temp-dir = "0.1.14"
|
||||
dyn-clone = "1.0.19"
|
||||
|
||||
@@ -15,11 +15,9 @@ use crate::{
|
||||
};
|
||||
|
||||
use super::{
|
||||
HelmCommand, K8sclient, Topology,
|
||||
k8s::K8sClient,
|
||||
tenant::{
|
||||
ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy, k8s::K8sTenantManager,
|
||||
},
|
||||
k8s::K8sClient, oberservability::notification_adapter_deployer::NotificationAdapterDeployer, tenant::{
|
||||
k8s::K8sTenantManager, ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy
|
||||
}, HelmCommand, K8sclient, Topology
|
||||
};
|
||||
|
||||
struct K8sState {
|
||||
@@ -263,3 +261,19 @@ impl TenantManager for K8sAnywhereTopology {
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NotificationAdapterDeployer for K8sAnywhereTopology {
|
||||
fn deploy_notification_adapter(
|
||||
&self,
|
||||
_notification_adapter_id: &str,
|
||||
|
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
todo!()
|
||||
}
|
||||
fn remove_notification_adapter(
|
||||
&self,
|
||||
_notification_adapter_id: &str,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ mod http;
|
||||
mod k8s_anywhere;
|
||||
mod localhost;
|
||||
pub mod tenant;
|
||||
pub mod oberservability;
|
||||
pub use k8s_anywhere::*;
|
||||
pub use localhost::*;
|
||||
pub mod k8s;
|
||||
|
||||
16
harmony/src/domain/topology/oberservability/mod.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
use monitoring::AlertChannelConfig;
|
||||
|
||||
pub mod monitoring;
|
||||
pub mod notification_adapter_deployer;
|
||||
|
||||
pub enum MonitoringBackendType {
|
||||
|
johnride
commented
Violates OCP : we want to allow external contributors to provide new backend implementations (that is, if we want to introduce the concept of monitoring backend right now or wait until the needs arises?) Violates OCP : we want to allow external contributors to provide new backend implementations (that is, if we want to introduce the concept of monitoring backend right now or wait until the needs arises?)
|
||||
KubePrometheus,
|
||||
}
|
||||
|
||||
pub struct MonitorConfig {
|
||||
pub backend: MonitoringBackendType,
|
||||
pub alert_channels: Vec<Box<dyn AlertChannelConfig>>,
|
||||
}
|
||||
|
||||
|
||||
|
||||
60
harmony/src/domain/topology/oberservability/monitoring.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use std::fmt::Debug;
|
||||
use dyn_clone::DynClone;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::interpret::InterpretError;
|
||||
|
||||
use crate::{
|
||||
interpret::Outcome,
|
||||
topology::Topology,
|
||||
};
|
||||
|
johnride marked this conversation as resolved
Outdated
johnride
commented
A bit of rustdoc would be relevant here to explain the intended use of this trait. A bit of rustdoc would be relevant here to explain the intended use of this trait.
|
||||
|
||||
|
johnride
commented
This rustdoc is pretty good to explain the abstraction, but giving a concrete example such as Prometheus would make a complete understanding much easier. Even I still have a bit of trouble to figure it out as-is. This rustdoc is pretty good to explain the abstraction, but giving a concrete example such as Prometheus would make a complete understanding much easier. Even I still have a bit of trouble to figure it out as-is.
|
||||
use super::notification_adapter_deployer::NotificationAdapterDeployer;
|
||||
use super::{MonitorConfig, MonitoringBackendType};
|
||||
|
||||
#[async_trait]
|
||||
pub trait Monitor<T: Topology + NotificationAdapterDeployer> {
|
||||
async fn provision_monitor(
|
||||
&self,
|
||||
topology: &T,
|
||||
monitor_config: &MonitorConfig,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
|
||||
async fn delete_monitor(
|
||||
&self,
|
||||
topolgy: &T,
|
||||
|
johnride marked this conversation as resolved
Outdated
johnride
commented
I think this trait is correct but the way it is implemented feels wrong. I understand that you currently only have the discord implementation and you interact directly with it. But I feel like there will be a use case where we will want to have simple getter and setters for common attributes of AlertChannelConfig. I don't see a 100% clear use case right now though. Food for thought. Which makes me think : is this trait required at all? Probably not. Each AlertChannel implementation will have its own config and either the Monitor or AlertChannel trait will provide standardization for identification and compilation safety. But it is probably not necessary to try to expose a config specific trait. I think this trait is correct but the way it is implemented feels wrong. I understand that you currently only have the discord implementation and you interact directly with it. But I feel like there will be a use case where we will want to have simple getter and setters for common attributes of AlertChannelConfig. I don't see a 100% clear use case right now though.
Food for thought.
Which makes me think : is this trait required at all? Probably not. Each AlertChannel implementation will have its own config and either the Monitor or AlertChannel trait will provide standardization for identification and compilation safety. But it is probably not necessary to try to expose a **config** specific trait.
|
||||
monitor_config: &MonitorConfig,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
|
||||
|
johnride marked this conversation as resolved
Outdated
johnride
commented
I don't think webhook url deserves to be in this abstract trait. It is possible that the AlertChannel does not have a webhook, such as SMS, phone calls, SMTP. I don't think webhook url deserves to be in this abstract trait. It is possible that the AlertChannel does not have a webhook, such as SMS, phone calls, SMTP.
|
||||
async fn configure_alerting(
|
||||
&self,
|
||||
topology: &T,
|
||||
monitor_config: &MonitorConfig,
|
||||
|
johnride
commented
SRP problem SRP problem
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
|
||||
async fn ensure_alert_channel_dependencies(
|
||||
&self,
|
||||
topology: &T,
|
||||
monitor_config: &MonitorConfig,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
for channel in &monitor_config.alert_channels {
|
||||
if let Some(notification_adapter_id) =
|
||||
channel.requires_external_alert_channel_adapter(&monitor_config.backend)
|
||||
{
|
||||
topology.deploy_notification_adapter(
|
||||
¬ification_adapter_id.as_ref(),
|
||||
)?;
|
||||
}
|
||||
|
||||
}
|
||||
Ok(Outcome::success(format!("deployed alert channels {:?}", &monitor_config.alert_channels)))
|
||||
}
|
||||
|
johnride
commented
I think we can solve the dependency issue by introducing a T here : I think we can solve the dependency issue by introducing a T here :
```rust
pub trait AlertChannelConfig<T> {
// only alertChannel related stuff, be careful about SRP here, no dependency management at this level other than the T itself
}
impl <T: DiscordWebhookSender> AlertChannelConfig<T> for DiscordWebhookConfig { // The dependency on the DiscordWebHookSender is declared here
// implement functions
}
pub trait Monitor<T> {
config: Vec<Box<dyn AlertChannelConfig<T>>> // The T from alertChannelConfig here will force the Monitor to provide a T that implements DiscordWebhookSender
}
impl DiscordWebhookSender for K8sAnywhereTopology { // Satisfy here the trait bound on the DiscordWehbookConfig
// implement functions that make sure DiscordAlertManager is installed, use the same pattern as for k8sClient that autoinstalls k3d if no k8s cluster is available
}
```
|
||||
}
|
||||
|
||||
pub trait AlertChannelConfig: Debug + DynClone + Send + Sync {
|
||||
fn build_backend_integration_config(&self, backend: &MonitoringBackendType) -> Result<Value, InterpretError>;
|
||||
fn requires_external_alert_channel_adapter(&self, backend: &MonitoringBackendType) -> Option<String>;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
use crate::interpret::{InterpretError, Outcome};
|
||||
|
||||
pub trait NotificationAdapterDeployer {
|
||||
fn deploy_notification_adapter(
|
||||
&self,
|
||||
notification_adapter_id: &str,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
fn remove_notification_adapter(
|
||||
&self,
|
||||
notication_adapter_id: &str,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
}
|
||||
I feel like this is not a correct abstraction. NotificationAdapter has a responsibility and Deployer has another. This is a strong smell of broken Single Responsibility principle, probably interface segregation too.