feat(monitoring): tested and modified ensure pod ready to wait for pod ready, which prevents check from failing immediately and gives time for the resource to be created
All checks were successful
Run Check Script / check (pull_request) Successful in 49s

This commit is contained in:
Willem 2025-09-19 15:11:28 -04:00
parent 21e51b8d80
commit cdf75faa8f
2 changed files with 48 additions and 55 deletions

View File

@ -419,9 +419,12 @@ impl K8sClient {
.as_str() .as_str()
.expect("couldn't get kind as str"); .expect("couldn't get kind as str");
let split: Vec<&str> = api_version.splitn(2, "/").collect(); let mut it = api_version.splitn(2, '/');
let g = split[0]; let first = it.next().unwrap();
let v = split[1]; let (g, v) = match it.next() {
Some(second) => (first, second),
None => ("", first),
};
let gvk = GroupVersionKind::gvk(g, v, kind); let gvk = GroupVersionKind::gvk(g, v, kind);
let api_resource = ApiResource::from_gvk(&gvk); let api_resource = ApiResource::from_gvk(&gvk);

View File

@ -1,10 +1,11 @@
use std::sync::Arc; use std::{sync::Arc, time::Duration};
use async_trait::async_trait; use async_trait::async_trait;
use harmony_types::id::Id; use harmony_types::id::Id;
use k8s_openapi::api::core::v1::Pod; use k8s_openapi::api::core::v1::Pod;
use kube::api::GroupVersionKind; use log::debug;
use serde::Serialize; use serde::Serialize;
use tokio::time::sleep;
use crate::{ use crate::{
data::Version, data::Version,
@ -69,8 +70,7 @@ impl OpenshiftUserWorkloadMonitoringInterpret {
&self, &self,
client: &Arc<K8sClient>, client: &Arc<K8sClient>,
) -> Result<Outcome, InterpretError> { ) -> Result<Outcome, InterpretError> {
let cm = format!( let cm = r#"
r#"
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -81,11 +81,9 @@ data:
enableUserWorkload: true enableUserWorkload: true
alertmanagerMain: alertmanagerMain:
enableUserAlertmanagerConfig: true enableUserAlertmanagerConfig: true
"# "#;
); let cm_yaml: serde_yaml::Value = serde_yaml::from_str(&cm).unwrap();
debug!("{:#?}", cm_yaml);
let cm_yaml = serde_yaml::to_value(cm).unwrap();
client client
.apply_yaml(&cm_yaml, Some("openshift-monitoring")) .apply_yaml(&cm_yaml, Some("openshift-monitoring"))
.await?; .await?;
@ -97,8 +95,7 @@ data:
&self, &self,
client: &Arc<K8sClient>, client: &Arc<K8sClient>,
) -> Result<Outcome, InterpretError> { ) -> Result<Outcome, InterpretError> {
let cm = format!( let cm = r#"
r#"
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -109,10 +106,9 @@ data:
alertmanager: alertmanager:
enabled: true enabled: true
enableAlertmanagerConfig: true enableAlertmanagerConfig: true
"# "#;
);
let cm_yaml = serde_yaml::to_value(cm).unwrap(); let cm_yaml: serde_yaml::Value = serde_yaml::from_str(&cm).unwrap();
client client
.apply_yaml(&cm_yaml, Some("openshift-user-workload-monitoring")) .apply_yaml(&cm_yaml, Some("openshift-user-workload-monitoring"))
@ -128,54 +124,48 @@ data:
) -> Result<Outcome, InterpretError> { ) -> Result<Outcome, InterpretError> {
let namespace = "openshift-user-workload-monitoring"; let namespace = "openshift-user-workload-monitoring";
let alertmanager_name = "alertmanager-user-workload-0"; let alertmanager_name = "alertmanager-user-workload-0";
let alertmanager = client
.get_pod(alertmanager_name, Some(namespace))
.await
.unwrap();
let prometheus_name = "prometheus-user-workload-0"; let prometheus_name = "prometheus-user-workload-0";
let prometheus = client self.wait_for_pod_ready(&client, alertmanager_name, namespace)
.get_pod(prometheus_name, Some(namespace))
.await
.unwrap();
self.ensure_pod(alertmanager, alertmanager_name, namespace)
.await?; .await?;
self.ensure_pod(prometheus, prometheus_name, namespace) self.wait_for_pod_ready(&client, prometheus_name, namespace)
.await .await
} }
async fn ensure_pod( async fn wait_for_pod_ready(
&self, &self,
pod: Option<Pod>, client: &Arc<K8sClient>,
pod_name: &str, pod_name: &str,
namespace: &str, namespace: &str,
) -> Result<Outcome, InterpretError> { ) -> Result<Outcome, InterpretError> {
match pod { let mut elapsed = 0;
Some(pod) => { let interval = 5; // seconds between checks
if let Some(status) = pod.status { let timeout_secs = 120;
let phase = status.phase.unwrap_or("failed".to_string()); loop {
if phase == "running" { // Fetch the pod
Ok(Outcome::success(format!( let pod = client.get_pod(pod_name, Some(namespace)).await?;
"'{}' is ready with status.phase '{}'.",
pod.metadata.name.unwrap(), if let Some(p) = pod {
phase if let Some(status) = p.status {
))) if let Some(phase) = status.phase {
} else { if phase.to_lowercase() == "running" {
Err(InterpretError::new(format!( return Ok(Outcome::success(format!(
"'{}' in namespace '{}' has status.phase '{}'.", "'{}' is ready with status.phase '{}'.",
pod_name, namespace, phase pod_name, phase
))) )));
}
} }
} else {
Err(InterpretError::new(format!(
"{} not found in ns: {}",
pod_name, namespace
)))
} }
} }
None => Err(InterpretError::new(format!(
"'{}' not found in namespace '{}'", if elapsed >= timeout_secs {
pod_name, namespace return Err(InterpretError::new(format!(
))), "'{}' in ns '{}' did not become ready within {}s",
pod_name, namespace, timeout_secs
)));
}
sleep(Duration::from_secs(interval)).await;
elapsed += interval;
} }
} }
} }