feat(monitoring): tested and modified ensure pod ready to wait for pod ready, which prevents check from failing immediately and gives time for the resource to be created
All checks were successful
Run Check Script / check (pull_request) Successful in 49s
All checks were successful
Run Check Script / check (pull_request) Successful in 49s
This commit is contained in:
parent
21e51b8d80
commit
cdf75faa8f
@ -419,9 +419,12 @@ impl K8sClient {
|
||||
.as_str()
|
||||
.expect("couldn't get kind as str");
|
||||
|
||||
let split: Vec<&str> = api_version.splitn(2, "/").collect();
|
||||
let g = split[0];
|
||||
let v = split[1];
|
||||
let mut it = api_version.splitn(2, '/');
|
||||
let first = it.next().unwrap();
|
||||
let (g, v) = match it.next() {
|
||||
Some(second) => (first, second),
|
||||
None => ("", first),
|
||||
};
|
||||
|
||||
let gvk = GroupVersionKind::gvk(g, v, kind);
|
||||
let api_resource = ApiResource::from_gvk(&gvk);
|
||||
|
@ -1,10 +1,11 @@
|
||||
use std::sync::Arc;
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::core::v1::Pod;
|
||||
use kube::api::GroupVersionKind;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
@ -69,8 +70,7 @@ impl OpenshiftUserWorkloadMonitoringInterpret {
|
||||
&self,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let cm = format!(
|
||||
r#"
|
||||
let cm = r#"
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
@ -81,11 +81,9 @@ data:
|
||||
enableUserWorkload: true
|
||||
alertmanagerMain:
|
||||
enableUserAlertmanagerConfig: true
|
||||
"#
|
||||
);
|
||||
|
||||
let cm_yaml = serde_yaml::to_value(cm).unwrap();
|
||||
|
||||
"#;
|
||||
let cm_yaml: serde_yaml::Value = serde_yaml::from_str(&cm).unwrap();
|
||||
debug!("{:#?}", cm_yaml);
|
||||
client
|
||||
.apply_yaml(&cm_yaml, Some("openshift-monitoring"))
|
||||
.await?;
|
||||
@ -97,8 +95,7 @@ data:
|
||||
&self,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let cm = format!(
|
||||
r#"
|
||||
let cm = r#"
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
@ -107,12 +104,11 @@ metadata:
|
||||
data:
|
||||
config.yaml: |
|
||||
alertmanager:
|
||||
enabled: true
|
||||
enableAlertmanagerConfig: true
|
||||
"#
|
||||
);
|
||||
enabled: true
|
||||
enableAlertmanagerConfig: true
|
||||
"#;
|
||||
|
||||
let cm_yaml = serde_yaml::to_value(cm).unwrap();
|
||||
let cm_yaml: serde_yaml::Value = serde_yaml::from_str(&cm).unwrap();
|
||||
|
||||
client
|
||||
.apply_yaml(&cm_yaml, Some("openshift-user-workload-monitoring"))
|
||||
@ -128,54 +124,48 @@ data:
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let namespace = "openshift-user-workload-monitoring";
|
||||
let alertmanager_name = "alertmanager-user-workload-0";
|
||||
let alertmanager = client
|
||||
.get_pod(alertmanager_name, Some(namespace))
|
||||
.await
|
||||
.unwrap();
|
||||
let prometheus_name = "prometheus-user-workload-0";
|
||||
let prometheus = client
|
||||
.get_pod(prometheus_name, Some(namespace))
|
||||
.await
|
||||
.unwrap();
|
||||
self.ensure_pod(alertmanager, alertmanager_name, namespace)
|
||||
self.wait_for_pod_ready(&client, alertmanager_name, namespace)
|
||||
.await?;
|
||||
self.ensure_pod(prometheus, prometheus_name, namespace)
|
||||
self.wait_for_pod_ready(&client, prometheus_name, namespace)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn ensure_pod(
|
||||
async fn wait_for_pod_ready(
|
||||
&self,
|
||||
pod: Option<Pod>,
|
||||
client: &Arc<K8sClient>,
|
||||
pod_name: &str,
|
||||
namespace: &str,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
match pod {
|
||||
Some(pod) => {
|
||||
if let Some(status) = pod.status {
|
||||
let phase = status.phase.unwrap_or("failed".to_string());
|
||||
if phase == "running" {
|
||||
Ok(Outcome::success(format!(
|
||||
"'{}' is ready with status.phase '{}'.",
|
||||
pod.metadata.name.unwrap(),
|
||||
phase
|
||||
)))
|
||||
} else {
|
||||
Err(InterpretError::new(format!(
|
||||
"'{}' in namespace '{}' has status.phase '{}'.",
|
||||
pod_name, namespace, phase
|
||||
)))
|
||||
let mut elapsed = 0;
|
||||
let interval = 5; // seconds between checks
|
||||
let timeout_secs = 120;
|
||||
loop {
|
||||
// Fetch the pod
|
||||
let pod = client.get_pod(pod_name, Some(namespace)).await?;
|
||||
|
||||
if let Some(p) = pod {
|
||||
if let Some(status) = p.status {
|
||||
if let Some(phase) = status.phase {
|
||||
if phase.to_lowercase() == "running" {
|
||||
return Ok(Outcome::success(format!(
|
||||
"'{}' is ready with status.phase '{}'.",
|
||||
pod_name, phase
|
||||
)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Err(InterpretError::new(format!(
|
||||
"{} not found in ns: {}",
|
||||
pod_name, namespace
|
||||
)))
|
||||
}
|
||||
}
|
||||
None => Err(InterpretError::new(format!(
|
||||
"'{}' not found in namespace '{}'",
|
||||
pod_name, namespace
|
||||
))),
|
||||
|
||||
if elapsed >= timeout_secs {
|
||||
return Err(InterpretError::new(format!(
|
||||
"'{}' in ns '{}' did not become ready within {}s",
|
||||
pod_name, namespace, timeout_secs
|
||||
)));
|
||||
}
|
||||
|
||||
sleep(Duration::from_secs(interval)).await;
|
||||
elapsed += interval;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user