Compare commits
4 Commits
feat/kube-
...
feat/disco
| Author | SHA1 | Date | |
|---|---|---|---|
| b5b77cf1ac | |||
| dda8e29843 | |||
| b33650e9d5 | |||
| ea4709a409 |
@@ -5,6 +5,7 @@ use inquire::Confirm;
|
||||
use log::{info, warn};
|
||||
use tokio::sync::{Mutex, OnceCell};
|
||||
|
||||
use crate::score::Score;
|
||||
use crate::{
|
||||
executors::ExecutorError,
|
||||
interpret::{InterpretError, Outcome},
|
||||
@@ -17,7 +18,7 @@ use crate::{
|
||||
use super::{
|
||||
HelmCommand, K8sclient, Topology,
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::AlertReceiver,
|
||||
oberservability::monitoring::{AlertReceiver, AlertReceiverProvision},
|
||||
tenant::{
|
||||
ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy, k8s::K8sTenantManager,
|
||||
},
|
||||
@@ -67,6 +68,25 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn initialize_alert_receiver<C>(
|
||||
&self,
|
||||
config: &C,
|
||||
inventory: &Inventory,
|
||||
) -> Result<AlertReceiver, InterpretError>
|
||||
where
|
||||
Self: Topology + HelmCommand,
|
||||
C: AlertReceiverProvision<Self> + Send + Sync,
|
||||
{
|
||||
let score = config.get_deployment_score();
|
||||
let interpret = score.create_interpret();
|
||||
interpret.execute(inventory, self).await?;
|
||||
|
||||
Ok(AlertReceiver {
|
||||
receiver_id: config.alert_receiver_id(),
|
||||
receiver_installed: true,
|
||||
})
|
||||
}
|
||||
|
||||
fn is_helm_available(&self) -> Result<(), String> {
|
||||
let version_result = Command::new("helm")
|
||||
.arg("version")
|
||||
|
||||
@@ -6,28 +6,50 @@ use std::fmt::Debug;
|
||||
|
||||
use crate::interpret::InterpretError;
|
||||
|
||||
use crate::inventory::Inventory;
|
||||
use crate::score::Score;
|
||||
use crate::topology::HelmCommand;
|
||||
use crate::{interpret::Outcome, topology::Topology};
|
||||
|
||||
/// Represents an entity responsible for collecting and organizing observability data
|
||||
/// from various telemetry sources
|
||||
/// from various telemetry sources such as Prometheus or Datadog
|
||||
/// A `Monitor` abstracts the logic required to scrape, aggregate, and structure
|
||||
/// monitoring data, enabling consistent processing regardless of the underlying data source.
|
||||
#[async_trait]
|
||||
pub trait Monitor<T: Topology>: Debug + Send + Sync {
|
||||
async fn deploy_monitor(&self, topology: &T) -> Result<Outcome, InterpretError>;
|
||||
async fn deploy_monitor(
|
||||
&self,
|
||||
topology: &T,
|
||||
alert_receivers: Vec<AlertReceiver>,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
|
||||
async fn delete_monitor(&self, topolgy: &T) -> Result<Outcome, InterpretError>;
|
||||
async fn delete_monitor(
|
||||
&self,
|
||||
topolgy: &T,
|
||||
alert_receivers: Vec<AlertReceiver>,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertReceiverDeployment<T: Topology>: Debug + DynClone + Send + Sync {
|
||||
async fn deploy_alert_receiver(&self, topology: &T) -> Result<Outcome, InterpretError>;
|
||||
pub trait EnsureAlertReceiver<T: Topology>: Debug + DynClone + Send + Sync {
|
||||
async fn ensure_alert_receiver(
|
||||
&self,
|
||||
inventory: Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
}
|
||||
|
||||
dyn_clone::clone_trait_object!(<T> AlertReceiverDeployment<T>);
|
||||
dyn_clone::clone_trait_object!(<T> EnsureAlertReceiver<T>);
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertReceiver {
|
||||
pub receiver_id: String,
|
||||
pub receiver_installed: bool,
|
||||
}
|
||||
|
||||
/// Provides the ability to turn an alert config into an executable score
|
||||
/// for the topology
|
||||
pub trait AlertReceiverProvision<T: Topology + HelmCommand> {
|
||||
fn get_deployment_score(&self) -> Box<dyn Score<T>>;
|
||||
fn alert_receiver_id(&self) -> String;
|
||||
}
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use url::Url;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AlertManagerValues {
|
||||
pub alertmanager: AlertManager,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AlertManager {
|
||||
pub enabled: bool,
|
||||
pub config: AlertManagerConfig,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct AlertChannelConfig {
|
||||
pub receiver: AlertChannelReceiver,
|
||||
pub route: AlertChannelRoute,
|
||||
pub global_config: Option<AlertChannelGlobalConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AlertChannelReceiver {
|
||||
pub name: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub slack_configs: Option<Vec<SlackConfig>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub webhook_configs: Option<Vec<WebhookConfig>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AlertManagerRoute {
|
||||
pub group_by: Vec<String>,
|
||||
pub group_wait: String,
|
||||
pub group_interval: String,
|
||||
pub repeat_interval: String,
|
||||
pub routes: Vec<AlertChannelRoute>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AlertChannelGlobalConfig {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub slack_api_url: Option<Url>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SlackConfig {
|
||||
pub channel: String,
|
||||
pub send_resolved: bool,
|
||||
pub title: String,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WebhookConfig {
|
||||
pub url: Url,
|
||||
pub send_resolved: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AlertChannelRoute {
|
||||
pub receiver: String,
|
||||
pub matchers: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub r#continue: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AlertManagerConfig {
|
||||
pub global: Option<AlertChannelGlobalConfig>,
|
||||
pub route: AlertManagerRoute,
|
||||
pub receivers: Vec<AlertChannelReceiver>,
|
||||
}
|
||||
|
||||
impl AlertManagerValues {
|
||||
pub fn default() -> Self {
|
||||
Self {
|
||||
alertmanager: AlertManager {
|
||||
enabled: true,
|
||||
config: AlertManagerConfig {
|
||||
global: None,
|
||||
route: AlertManagerRoute {
|
||||
group_by: vec!["job".to_string()],
|
||||
group_wait: "30s".to_string(),
|
||||
group_interval: "5m".to_string(),
|
||||
repeat_interval: "12h".to_string(),
|
||||
routes: vec![AlertChannelRoute {
|
||||
receiver: "null".to_string(),
|
||||
matchers: vec!["alertname=Watchdog".to_string()],
|
||||
r#continue: false,
|
||||
}],
|
||||
},
|
||||
receivers: vec![AlertChannelReceiver {
|
||||
name: "null".to_string(),
|
||||
slack_configs: None,
|
||||
webhook_configs: None,
|
||||
}],
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,4 @@
|
||||
use super::{
|
||||
discord_alert_manager::discord_alert_manager_score, kube_prometheus_monitor::AlertManagerConfig,
|
||||
};
|
||||
use super::discord_alert_manager::discord_alert_manager_score;
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use serde_yaml::Value;
|
||||
@@ -8,23 +6,15 @@ use tokio::sync::OnceCell;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
interpret::{Interpret, InterpretError, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, K8sAnywhereTopology, Topology,
|
||||
oberservability::monitoring::{AlertReceiver, AlertReceiverDeployment},
|
||||
oberservability::monitoring::{AlertReceiverProvision, EnsureAlertReceiver},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + DiscordWebhookReceiver> AlertReceiverDeployment<T> for DiscordWebhookConfig {
|
||||
async fn deploy_alert_receiver(&self, topology: &T) -> Result<Outcome, InterpretError> {
|
||||
topology.deploy_discord_webhook_receiver(self.clone()).await
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct DiscordWebhookConfig {
|
||||
pub webhook_url: Url,
|
||||
@@ -33,9 +23,35 @@ pub struct DiscordWebhookConfig {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait DiscordWebhookReceiver {
|
||||
async fn deploy_discord_webhook_receiver(
|
||||
impl<T: Topology + DiscordWebhookReceiver> EnsureAlertReceiver<T> for DiscordWebhookConfig {
|
||||
async fn ensure_alert_receiver(
|
||||
&self,
|
||||
inventory: Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
topology
|
||||
.ensure_discord_webhook_receiver(&inventory, self.clone())
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand> AlertReceiverProvision<T> for DiscordWebhookConfig {
|
||||
fn get_deployment_score(&self) -> Box<dyn Score<T>> {
|
||||
Box::new(DiscordWebhookReceiverScore {
|
||||
config: self.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn alert_receiver_id(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait DiscordWebhookReceiver {
|
||||
async fn ensure_discord_webhook_receiver(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
config: DiscordWebhookConfig,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
fn delete_discord_webhook_receiver(
|
||||
@@ -44,17 +60,11 @@ pub trait DiscordWebhookReceiver {
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: DiscordWebhookReceiver> AlertManagerConfig<T> for DiscordWebhookConfig {
|
||||
async fn get_alert_manager_config(&self) -> Result<Value, InterpretError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl DiscordWebhookReceiver for K8sAnywhereTopology {
|
||||
async fn deploy_discord_webhook_receiver(
|
||||
async fn ensure_discord_webhook_receiver(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
config: DiscordWebhookConfig,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let receiver_key = config.name.clone();
|
||||
@@ -72,9 +82,7 @@ impl DiscordWebhookReceiver for K8sAnywhereTopology {
|
||||
}
|
||||
|
||||
let final_state = cell
|
||||
.get_or_try_init(|| async {
|
||||
initialize_discord_webhook_receiver(config.clone(), self).await
|
||||
})
|
||||
.get_or_try_init(|| async { self.initialize_alert_receiver(&config, inventory).await })
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
@@ -91,27 +99,17 @@ impl DiscordWebhookReceiver for K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
async fn initialize_discord_webhook_receiver(
|
||||
conf: DiscordWebhookConfig,
|
||||
topology: &K8sAnywhereTopology,
|
||||
) -> Result<AlertReceiver, InterpretError> {
|
||||
println!(
|
||||
"Attempting to initialize Discord adapter for: {}",
|
||||
conf.name
|
||||
);
|
||||
let score = DiscordWebhookReceiverScore {
|
||||
config: conf.clone(),
|
||||
};
|
||||
let inventory = Inventory::autoload();
|
||||
let interpret = score.create_interpret();
|
||||
|
||||
interpret.execute(&inventory, topology).await?;
|
||||
|
||||
Ok(AlertReceiver {
|
||||
receiver_id: conf.name,
|
||||
receiver_installed: true,
|
||||
})
|
||||
pub trait AlertManagerConfig<T> {
|
||||
fn get_alert_manager_config(&self) -> Result<Value, InterpretError>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: DiscordWebhookReceiver> AlertManagerConfig<T> for DiscordWebhookConfig {
|
||||
fn get_alert_manager_config(&self) -> Result<Value, InterpretError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
struct DiscordWebhookReceiverScore {
|
||||
config: DiscordWebhookConfig,
|
||||
@@ -119,50 +117,15 @@ struct DiscordWebhookReceiverScore {
|
||||
|
||||
impl<T: Topology + HelmCommand> Score<T> for DiscordWebhookReceiverScore {
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(DiscordWebhookReceiverScoreInterpret {
|
||||
config: self.config.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"DiscordWebhookReceiverScore".to_string()
|
||||
}
|
||||
}
|
||||
#[derive(Debug)]
|
||||
struct DiscordWebhookReceiverScoreInterpret {
|
||||
config: DiscordWebhookConfig,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand> Interpret<T> for DiscordWebhookReceiverScoreInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
discord_alert_manager_score(
|
||||
self.config.webhook_url.clone(),
|
||||
self.config.name.clone(),
|
||||
self.config.name.clone(),
|
||||
)
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
fn name(&self) -> String {
|
||||
"DiscordWebhookReceiverScore".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use serde_yaml::Value;
|
||||
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
oberservability::monitoring::{AlertReceiverDeployment, Monitor},
|
||||
},
|
||||
};
|
||||
|
||||
use super::{
|
||||
config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct KubePrometheus<T> {
|
||||
alert_receivers: Vec<Box<dyn AlertReceiverDeployment<T>>>,
|
||||
config: KubePrometheusConfig,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertManagerConfig<T> {
|
||||
async fn get_alert_manager_config(&self) -> Result<Value, InterpretError>;
|
||||
}
|
||||
|
||||
impl<T: Topology> KubePrometheus<T> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
alert_receivers: Vec::new(),
|
||||
config: KubePrometheusConfig::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand + std::fmt::Debug> Monitor<T> for KubePrometheus<T> {
|
||||
async fn deploy_monitor(&self, topology: &T) -> Result<Outcome, InterpretError> {
|
||||
for alert_receiver in &self.alert_receivers {
|
||||
alert_receiver.deploy_alert_receiver(topology).await?;
|
||||
}
|
||||
let score = KubePrometheusScore {
|
||||
config: self.config.clone(),
|
||||
};
|
||||
let inventory = Inventory::autoload();
|
||||
score.create_interpret().execute(&inventory, topology).await
|
||||
}
|
||||
|
||||
async fn delete_monitor(&self, _topolgy: &T) -> Result<Outcome, InterpretError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
struct KubePrometheusScore {
|
||||
config: KubePrometheusConfig,
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand> Score<T> for KubePrometheusScore {
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(KubePromethusScoreInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
struct KubePromethusScoreInterpret {
|
||||
score: KubePrometheusScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand> Interpret<T> for KubePromethusScoreInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
kube_prometheus_helm_chart_score(&self.score.config)
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,5 @@
|
||||
pub mod alertmanager_types;
|
||||
mod config;
|
||||
mod discord_alert_manager;
|
||||
pub mod discord_webhook_sender;
|
||||
mod kube_prometheus_helm_chart;
|
||||
pub mod kube_prometheus_monitor;
|
||||
mod kube_prometheus;
|
||||
pub mod monitoring_alerting;
|
||||
|
||||
@@ -14,7 +14,8 @@ use crate::{
|
||||
};
|
||||
|
||||
use super::{
|
||||
config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
|
||||
config::KubePrometheusConfig, discord_alert_manager::discord_alert_manager_score,
|
||||
kube_prometheus::kube_prometheus_helm_chart_score,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
|
||||
Reference in New Issue
Block a user