fix: refactor so that the topology installs the MonitoringAlertingStack depending on if it is already present in the cluster

This commit is contained in:
Willem 2025-05-05 16:37:15 -04:00
parent e7cfbf914a
commit 88270ece61
2 changed files with 79 additions and 39 deletions

View File

@ -1,9 +1,21 @@
use log::warn;
use serde::Serialize;
use tokio::sync::OnceCell;
use k8s_openapi::api::core::v1::Pod;
use kube::{
Client,
api::{Api, ListParams},
};
use async_trait::async_trait;
use crate::interpret::{InterpretError, Outcome};
use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
maestro::Maestro,
modules::monitoring::monitoring_alerting::MonitoringAlertingStackScore,
};
use super::{HelmCommand, Topology};
@ -17,21 +29,75 @@ pub struct MonitoringAlertingTopology {
monitoring_state: OnceCell<Option<MonitoringState>>,
}
impl MonitoringAlertingTopology {
pub fn new() -> Self {
Self {
monitoring_state: OnceCell::new(),
}
}
fn get_monitoring_state(&self) -> Result<Option<MonitoringState>, InterpretError> {
let state = MonitoringState {
message: "monitoring stack not installed".to_string(),
async fn get_monitoring_state(&self) -> Result<Option<MonitoringState>, InterpretError> {
let client = Client::try_default()
.await
.map_err(|e| InterpretError::new(format!("Kubernetes client error: {}", e)))?;
for ns in &["monitoring", "openshift-monitoring"] {
let pods: Api<Pod> = Api::namespaced(client.clone(), ns);
let lp = ListParams::default().labels("app.kubernetes.io/name=prometheus");
match pods.list(&lp).await {
Ok(pod_list) => {
for p in pod_list.items {
if let Some(status) = p.status {
if let Some(conditions) = status.conditions {
if conditions
.iter()
.any(|c| c.type_ == "Ready" && c.status == "True")
{
return Ok(Some(MonitoringState {
message: format!(
"Prometheus is ready in namespace: {}",
ns
),
}));
}
}
}
}
}
Err(e) => {
warn!("Failed to query pods in ns {}: {}", ns, e);
}
}
}
Ok(None)
}
async fn try_install_monitoring_stack(
&self,
) -> Result<Option<MonitoringState>, InterpretError> {
let inventory = Inventory::autoload();
let topology = MonitoringAlertingTopology::new();
let mut maestro = match Maestro::initialize(inventory, topology).await {
Ok(m) => m,
Err(e) => {
println!("failed to initialize Maestro: {}", e);
std::process::exit(1);
}
};
maestro.register_all(vec![Box::new(MonitoringAlertingStackScore::default())]);
let state = match self.get_monitoring_state().await {
Ok(_) => MonitoringState {
message: "Monitoring Stack Ready".to_string(),
},
Err(_) => todo!(),
};
Ok(Some(state))
}
}
#[async_trait]
impl Topology for MonitoringAlertingTopology {
fn name(&self) -> &str {
@ -39,20 +105,14 @@ impl Topology for MonitoringAlertingTopology {
}
async fn ensure_ready(&self) -> Result<Outcome, InterpretError> {
let monitoring_state = self
.monitoring_state
.get_or_try_init(|| async { self.get_monitoring_state() })
.await?;
if monitoring_state.is_some() {
Ok(Outcome::success(
"Monitoring stack already installed".to_string(),
))
let state = if let Some(state) = self.get_monitoring_state().await? {
state
} else {
Ok(Outcome::success(
"Monitoring stack not installed".to_string(),
))
}
self.try_install_monitoring_stack().await?
.ok_or_else(|| InterpretError::new("Failed to install monitoring stack".into()))?
};
Ok(Outcome::success(state.message))
}
}

View File

@ -69,26 +69,6 @@ struct MonitoringAlertingStackInterpret {
pub score: MonitoringAlertingStackScore,
}
impl MonitoringAlertingStackInterpret {
pub async fn build_monitoring_stack(
&self,
monitoring_stack: MonitoringAlertingStackScore,
) -> Result<Outcome, InterpretError> {
let inventory = Inventory::autoload();
let topology = MonitoringAlertingTopology::new();
let mut maestro = match Maestro::initialize(inventory, topology).await {
Ok(m) => m,
Err(e) => {
println!("failed to initialize Maestro: {}", e);
std::process::exit(1);
}
};
maestro.register_all(monitoring_stack.monitoring_stack);
Ok(Outcome::success(format!(
"installed kube-prometheus monitoring and alerting stack"
)))
}
}
#[async_trait]
impl<T: Topology> Interpret<T> for MonitoringAlertingStackInterpret {
@ -97,7 +77,7 @@ impl<T: Topology> Interpret<T> for MonitoringAlertingStackInterpret {
_inventory: &Inventory,
_topology: &T,
) -> Result<Outcome, InterpretError> {
self.build_monitoring_stack(self.score.clone()).await
todo!()
}
fn get_name(&self) -> InterpretName {