feat(monitoring): tested and modified ensure pod ready to wait for pod ready, which prevents check from failing immediately and gives time for the resource to be created
All checks were successful
Run Check Script / check (pull_request) Successful in 49s

This commit is contained in:
Willem 2025-09-19 15:11:28 -04:00
parent 21e51b8d80
commit cdf75faa8f
2 changed files with 48 additions and 55 deletions

View File

@ -419,9 +419,12 @@ impl K8sClient {
.as_str()
.expect("couldn't get kind as str");
let split: Vec<&str> = api_version.splitn(2, "/").collect();
let g = split[0];
let v = split[1];
let mut it = api_version.splitn(2, '/');
let first = it.next().unwrap();
let (g, v) = match it.next() {
Some(second) => (first, second),
None => ("", first),
};
let gvk = GroupVersionKind::gvk(g, v, kind);
let api_resource = ApiResource::from_gvk(&gvk);

View File

@ -1,10 +1,11 @@
use std::sync::Arc;
use std::{sync::Arc, time::Duration};
use async_trait::async_trait;
use harmony_types::id::Id;
use k8s_openapi::api::core::v1::Pod;
use kube::api::GroupVersionKind;
use log::debug;
use serde::Serialize;
use tokio::time::sleep;
use crate::{
data::Version,
@ -69,8 +70,7 @@ impl OpenshiftUserWorkloadMonitoringInterpret {
&self,
client: &Arc<K8sClient>,
) -> Result<Outcome, InterpretError> {
let cm = format!(
r#"
let cm = r#"
apiVersion: v1
kind: ConfigMap
metadata:
@ -81,11 +81,9 @@ data:
enableUserWorkload: true
alertmanagerMain:
enableUserAlertmanagerConfig: true
"#
);
let cm_yaml = serde_yaml::to_value(cm).unwrap();
"#;
let cm_yaml: serde_yaml::Value = serde_yaml::from_str(&cm).unwrap();
debug!("{:#?}", cm_yaml);
client
.apply_yaml(&cm_yaml, Some("openshift-monitoring"))
.await?;
@ -97,8 +95,7 @@ data:
&self,
client: &Arc<K8sClient>,
) -> Result<Outcome, InterpretError> {
let cm = format!(
r#"
let cm = r#"
apiVersion: v1
kind: ConfigMap
metadata:
@ -109,10 +106,9 @@ data:
alertmanager:
enabled: true
enableAlertmanagerConfig: true
"#
);
"#;
let cm_yaml = serde_yaml::to_value(cm).unwrap();
let cm_yaml: serde_yaml::Value = serde_yaml::from_str(&cm).unwrap();
client
.apply_yaml(&cm_yaml, Some("openshift-user-workload-monitoring"))
@ -128,54 +124,48 @@ data:
) -> Result<Outcome, InterpretError> {
let namespace = "openshift-user-workload-monitoring";
let alertmanager_name = "alertmanager-user-workload-0";
let alertmanager = client
.get_pod(alertmanager_name, Some(namespace))
.await
.unwrap();
let prometheus_name = "prometheus-user-workload-0";
let prometheus = client
.get_pod(prometheus_name, Some(namespace))
.await
.unwrap();
self.ensure_pod(alertmanager, alertmanager_name, namespace)
self.wait_for_pod_ready(&client, alertmanager_name, namespace)
.await?;
self.ensure_pod(prometheus, prometheus_name, namespace)
self.wait_for_pod_ready(&client, prometheus_name, namespace)
.await
}
async fn ensure_pod(
async fn wait_for_pod_ready(
&self,
pod: Option<Pod>,
client: &Arc<K8sClient>,
pod_name: &str,
namespace: &str,
) -> Result<Outcome, InterpretError> {
match pod {
Some(pod) => {
if let Some(status) = pod.status {
let phase = status.phase.unwrap_or("failed".to_string());
if phase == "running" {
Ok(Outcome::success(format!(
let mut elapsed = 0;
let interval = 5; // seconds between checks
let timeout_secs = 120;
loop {
// Fetch the pod
let pod = client.get_pod(pod_name, Some(namespace)).await?;
if let Some(p) = pod {
if let Some(status) = p.status {
if let Some(phase) = status.phase {
if phase.to_lowercase() == "running" {
return Ok(Outcome::success(format!(
"'{}' is ready with status.phase '{}'.",
pod.metadata.name.unwrap(),
phase
)))
} else {
Err(InterpretError::new(format!(
"'{}' in namespace '{}' has status.phase '{}'.",
pod_name, namespace, phase
)))
}
} else {
Err(InterpretError::new(format!(
"{} not found in ns: {}",
pod_name, namespace
)))
pod_name, phase
)));
}
}
None => Err(InterpretError::new(format!(
"'{}' not found in namespace '{}'",
pod_name, namespace
))),
}
}
if elapsed >= timeout_secs {
return Err(InterpretError::new(format!(
"'{}' in ns '{}' did not become ready within {}s",
pod_name, namespace, timeout_secs
)));
}
sleep(Duration::from_secs(interval)).await;
elapsed += interval;
}
}
}