Compare commits

..

2 Commits

Author SHA1 Message Date
de3e7869f7 feat: added impl for kube prometheus monitor
All checks were successful
Run Check Script / check (push) Successful in 1m46s
2025-06-04 11:50:36 -04:00
57eabc9834 wip: added alert manager types for use with kube-prometheus alert manager 2025-06-04 09:31:17 -04:00
8 changed files with 306 additions and 100 deletions

View File

@@ -5,7 +5,6 @@ use inquire::Confirm;
use log::{info, warn};
use tokio::sync::{Mutex, OnceCell};
use crate::score::Score;
use crate::{
executors::ExecutorError,
interpret::{InterpretError, Outcome},
@@ -18,7 +17,7 @@ use crate::{
use super::{
HelmCommand, K8sclient, Topology,
k8s::K8sClient,
oberservability::monitoring::{AlertReceiver, AlertReceiverProvision},
oberservability::monitoring::AlertReceiver,
tenant::{
ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy, k8s::K8sTenantManager,
},
@@ -68,25 +67,6 @@ impl K8sAnywhereTopology {
}
}
pub async fn initialize_alert_receiver<C>(
&self,
config: &C,
inventory: &Inventory,
) -> Result<AlertReceiver, InterpretError>
where
Self: Topology + HelmCommand,
C: AlertReceiverProvision<Self> + Send + Sync,
{
let score = config.get_deployment_score();
let interpret = score.create_interpret();
interpret.execute(inventory, self).await?;
Ok(AlertReceiver {
receiver_id: config.alert_receiver_id(),
receiver_installed: true,
})
}
fn is_helm_available(&self) -> Result<(), String> {
let version_result = Command::new("helm")
.arg("version")

View File

@@ -6,50 +6,28 @@ use std::fmt::Debug;
use crate::interpret::InterpretError;
use crate::inventory::Inventory;
use crate::score::Score;
use crate::topology::HelmCommand;
use crate::{interpret::Outcome, topology::Topology};
/// Represents an entity responsible for collecting and organizing observability data
/// from various telemetry sources such as Prometheus or Datadog
/// from various telemetry sources
/// A `Monitor` abstracts the logic required to scrape, aggregate, and structure
/// monitoring data, enabling consistent processing regardless of the underlying data source.
#[async_trait]
pub trait Monitor<T: Topology>: Debug + Send + Sync {
async fn deploy_monitor(
&self,
topology: &T,
alert_receivers: Vec<AlertReceiver>,
) -> Result<Outcome, InterpretError>;
async fn deploy_monitor(&self, topology: &T) -> Result<Outcome, InterpretError>;
async fn delete_monitor(
&self,
topolgy: &T,
alert_receivers: Vec<AlertReceiver>,
) -> Result<Outcome, InterpretError>;
async fn delete_monitor(&self, topolgy: &T) -> Result<Outcome, InterpretError>;
}
#[async_trait]
pub trait EnsureAlertReceiver<T: Topology>: Debug + DynClone + Send + Sync {
async fn ensure_alert_receiver(
&self,
inventory: Inventory,
topology: &T,
) -> Result<Outcome, InterpretError>;
pub trait AlertReceiverDeployment<T: Topology>: Debug + DynClone + Send + Sync {
async fn deploy_alert_receiver(&self, topology: &T) -> Result<Outcome, InterpretError>;
}
dyn_clone::clone_trait_object!(<T> EnsureAlertReceiver<T>);
dyn_clone::clone_trait_object!(<T> AlertReceiverDeployment<T>);
#[derive(Debug, Clone, Serialize)]
pub struct AlertReceiver {
pub receiver_id: String,
pub receiver_installed: bool,
}
/// Provides the ability to turn an alert config into an executable score
/// for the topology
pub trait AlertReceiverProvision<T: Topology + HelmCommand> {
fn get_deployment_score(&self) -> Box<dyn Score<T>>;
fn alert_receiver_id(&self) -> String;
}

View File

@@ -0,0 +1,102 @@
use serde::{Deserialize, Serialize};
use url::Url;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertManagerValues {
pub alertmanager: AlertManager,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertManager {
pub enabled: bool,
pub config: AlertManagerConfig,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AlertChannelConfig {
pub receiver: AlertChannelReceiver,
pub route: AlertChannelRoute,
pub global_config: Option<AlertChannelGlobalConfig>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertChannelReceiver {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub slack_configs: Option<Vec<SlackConfig>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub webhook_configs: Option<Vec<WebhookConfig>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertManagerRoute {
pub group_by: Vec<String>,
pub group_wait: String,
pub group_interval: String,
pub repeat_interval: String,
pub routes: Vec<AlertChannelRoute>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertChannelGlobalConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub slack_api_url: Option<Url>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SlackConfig {
pub channel: String,
pub send_resolved: bool,
pub title: String,
pub text: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebhookConfig {
pub url: Url,
pub send_resolved: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertChannelRoute {
pub receiver: String,
pub matchers: Vec<String>,
#[serde(default)]
pub r#continue: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertManagerConfig {
pub global: Option<AlertChannelGlobalConfig>,
pub route: AlertManagerRoute,
pub receivers: Vec<AlertChannelReceiver>,
}
impl AlertManagerValues {
pub fn default() -> Self {
Self {
alertmanager: AlertManager {
enabled: true,
config: AlertManagerConfig {
global: None,
route: AlertManagerRoute {
group_by: vec!["job".to_string()],
group_wait: "30s".to_string(),
group_interval: "5m".to_string(),
repeat_interval: "12h".to_string(),
routes: vec![AlertChannelRoute {
receiver: "null".to_string(),
matchers: vec!["alertname=Watchdog".to_string()],
r#continue: false,
}],
},
receivers: vec![AlertChannelReceiver {
name: "null".to_string(),
slack_configs: None,
webhook_configs: None,
}],
},
},
}
}
}

View File

@@ -1,4 +1,6 @@
use super::discord_alert_manager::discord_alert_manager_score;
use super::{
discord_alert_manager::discord_alert_manager_score, kube_prometheus_monitor::AlertManagerConfig,
};
use async_trait::async_trait;
use serde::Serialize;
use serde_yaml::Value;
@@ -6,15 +8,23 @@ use tokio::sync::OnceCell;
use url::Url;
use crate::{
interpret::{Interpret, InterpretError, Outcome},
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{
HelmCommand, K8sAnywhereTopology, Topology,
oberservability::monitoring::{AlertReceiverProvision, EnsureAlertReceiver},
oberservability::monitoring::{AlertReceiver, AlertReceiverDeployment},
},
};
#[async_trait]
impl<T: Topology + DiscordWebhookReceiver> AlertReceiverDeployment<T> for DiscordWebhookConfig {
async fn deploy_alert_receiver(&self, topology: &T) -> Result<Outcome, InterpretError> {
topology.deploy_discord_webhook_receiver(self.clone()).await
}
}
#[derive(Debug, Clone, Serialize)]
pub struct DiscordWebhookConfig {
pub webhook_url: Url,
@@ -22,36 +32,10 @@ pub struct DiscordWebhookConfig {
pub send_resolved_notifications: bool,
}
#[async_trait]
impl<T: Topology + DiscordWebhookReceiver> EnsureAlertReceiver<T> for DiscordWebhookConfig {
async fn ensure_alert_receiver(
&self,
inventory: Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
topology
.ensure_discord_webhook_receiver(&inventory, self.clone())
.await
}
}
impl<T: Topology + HelmCommand> AlertReceiverProvision<T> for DiscordWebhookConfig {
fn get_deployment_score(&self) -> Box<dyn Score<T>> {
Box::new(DiscordWebhookReceiverScore {
config: self.clone(),
})
}
fn alert_receiver_id(&self) -> String {
self.name.clone()
}
}
#[async_trait]
pub trait DiscordWebhookReceiver {
async fn ensure_discord_webhook_receiver(
async fn deploy_discord_webhook_receiver(
&self,
inventory: &Inventory,
config: DiscordWebhookConfig,
) -> Result<Outcome, InterpretError>;
fn delete_discord_webhook_receiver(
@@ -60,11 +44,17 @@ pub trait DiscordWebhookReceiver {
) -> Result<Outcome, InterpretError>;
}
#[async_trait]
impl<T: DiscordWebhookReceiver> AlertManagerConfig<T> for DiscordWebhookConfig {
async fn get_alert_manager_config(&self) -> Result<Value, InterpretError> {
todo!()
}
}
#[async_trait]
impl DiscordWebhookReceiver for K8sAnywhereTopology {
async fn ensure_discord_webhook_receiver(
async fn deploy_discord_webhook_receiver(
&self,
inventory: &Inventory,
config: DiscordWebhookConfig,
) -> Result<Outcome, InterpretError> {
let receiver_key = config.name.clone();
@@ -82,7 +72,9 @@ impl DiscordWebhookReceiver for K8sAnywhereTopology {
}
let final_state = cell
.get_or_try_init(|| async { self.initialize_alert_receiver(&config, inventory).await })
.get_or_try_init(|| async {
initialize_discord_webhook_receiver(config.clone(), self).await
})
.await?;
Ok(Outcome::success(format!(
@@ -99,17 +91,27 @@ impl DiscordWebhookReceiver for K8sAnywhereTopology {
}
}
pub trait AlertManagerConfig<T> {
fn get_alert_manager_config(&self) -> Result<Value, InterpretError>;
}
async fn initialize_discord_webhook_receiver(
conf: DiscordWebhookConfig,
topology: &K8sAnywhereTopology,
) -> Result<AlertReceiver, InterpretError> {
println!(
"Attempting to initialize Discord adapter for: {}",
conf.name
);
let score = DiscordWebhookReceiverScore {
config: conf.clone(),
};
let inventory = Inventory::autoload();
let interpret = score.create_interpret();
#[async_trait]
impl<T: DiscordWebhookReceiver> AlertManagerConfig<T> for DiscordWebhookConfig {
fn get_alert_manager_config(&self) -> Result<Value, InterpretError> {
todo!()
}
}
interpret.execute(&inventory, topology).await?;
Ok(AlertReceiver {
receiver_id: conf.name,
receiver_installed: true,
})
}
#[derive(Debug, Clone, Serialize)]
struct DiscordWebhookReceiverScore {
config: DiscordWebhookConfig,
@@ -117,15 +119,50 @@ struct DiscordWebhookReceiverScore {
impl<T: Topology + HelmCommand> Score<T> for DiscordWebhookReceiverScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
discord_alert_manager_score(
self.config.webhook_url.clone(),
self.config.name.clone(),
self.config.name.clone(),
)
.create_interpret()
Box::new(DiscordWebhookReceiverScoreInterpret {
config: self.config.clone(),
})
}
fn name(&self) -> String {
"DiscordWebhookReceiverScore".to_string()
}
}
#[derive(Debug)]
struct DiscordWebhookReceiverScoreInterpret {
config: DiscordWebhookConfig,
}
#[async_trait]
impl<T: Topology + HelmCommand> Interpret<T> for DiscordWebhookReceiverScoreInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
discord_alert_manager_score(
self.config.webhook_url.clone(),
self.config.name.clone(),
self.config.name.clone(),
)
.create_interpret()
.execute(inventory, topology)
.await
}
fn get_name(&self) -> InterpretName {
todo!()
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -0,0 +1,108 @@
use async_trait::async_trait;
use serde::Serialize;
use serde_yaml::Value;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{
HelmCommand, Topology,
oberservability::monitoring::{AlertReceiverDeployment, Monitor},
},
};
use super::{
config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
};
#[derive(Debug, Clone)]
pub struct KubePrometheus<T> {
alert_receivers: Vec<Box<dyn AlertReceiverDeployment<T>>>,
config: KubePrometheusConfig,
}
#[async_trait]
pub trait AlertManagerConfig<T> {
async fn get_alert_manager_config(&self) -> Result<Value, InterpretError>;
}
impl<T: Topology> KubePrometheus<T> {
pub fn new() -> Self {
Self {
alert_receivers: Vec::new(),
config: KubePrometheusConfig::new(),
}
}
}
#[async_trait]
impl<T: Topology + HelmCommand + std::fmt::Debug> Monitor<T> for KubePrometheus<T> {
async fn deploy_monitor(&self, topology: &T) -> Result<Outcome, InterpretError> {
for alert_receiver in &self.alert_receivers {
alert_receiver.deploy_alert_receiver(topology).await?;
}
let score = KubePrometheusScore {
config: self.config.clone(),
};
let inventory = Inventory::autoload();
score.create_interpret().execute(&inventory, topology).await
}
async fn delete_monitor(&self, _topolgy: &T) -> Result<Outcome, InterpretError> {
todo!()
}
}
#[derive(Debug, Clone, Serialize)]
struct KubePrometheusScore {
config: KubePrometheusConfig,
}
impl<T: Topology + HelmCommand> Score<T> for KubePrometheusScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(KubePromethusScoreInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {
todo!()
}
}
#[derive(Debug, Clone, Serialize)]
struct KubePromethusScoreInterpret {
score: KubePrometheusScore,
}
#[async_trait]
impl<T: Topology + HelmCommand> Interpret<T> for KubePromethusScoreInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
kube_prometheus_helm_chart_score(&self.score.config)
.create_interpret()
.execute(inventory, topology)
.await
}
fn get_name(&self) -> InterpretName {
todo!()
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -1,5 +1,7 @@
pub mod alertmanager_types;
mod config;
mod discord_alert_manager;
pub mod discord_webhook_sender;
mod kube_prometheus;
mod kube_prometheus_helm_chart;
pub mod kube_prometheus_monitor;
pub mod monitoring_alerting;

View File

@@ -14,8 +14,7 @@ use crate::{
};
use super::{
config::KubePrometheusConfig, discord_alert_manager::discord_alert_manager_score,
kube_prometheus::kube_prometheus_helm_chart_score,
config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
};
#[derive(Debug, Clone, Serialize)]