feat(monitoring): tested and modified ensure pod ready to wait for pod ready, which prevents check from failing immediately and gives time for the resource to be created
All checks were successful
Run Check Script / check (pull_request) Successful in 49s
All checks were successful
Run Check Script / check (pull_request) Successful in 49s
This commit is contained in:
parent
21e51b8d80
commit
cdf75faa8f
@ -419,9 +419,12 @@ impl K8sClient {
|
|||||||
.as_str()
|
.as_str()
|
||||||
.expect("couldn't get kind as str");
|
.expect("couldn't get kind as str");
|
||||||
|
|
||||||
let split: Vec<&str> = api_version.splitn(2, "/").collect();
|
let mut it = api_version.splitn(2, '/');
|
||||||
let g = split[0];
|
let first = it.next().unwrap();
|
||||||
let v = split[1];
|
let (g, v) = match it.next() {
|
||||||
|
Some(second) => (first, second),
|
||||||
|
None => ("", first),
|
||||||
|
};
|
||||||
|
|
||||||
let gvk = GroupVersionKind::gvk(g, v, kind);
|
let gvk = GroupVersionKind::gvk(g, v, kind);
|
||||||
let api_resource = ApiResource::from_gvk(&gvk);
|
let api_resource = ApiResource::from_gvk(&gvk);
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
use std::sync::Arc;
|
use std::{sync::Arc, time::Duration};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use harmony_types::id::Id;
|
use harmony_types::id::Id;
|
||||||
use k8s_openapi::api::core::v1::Pod;
|
use k8s_openapi::api::core::v1::Pod;
|
||||||
use kube::api::GroupVersionKind;
|
use log::debug;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
data::Version,
|
data::Version,
|
||||||
@ -69,8 +70,7 @@ impl OpenshiftUserWorkloadMonitoringInterpret {
|
|||||||
&self,
|
&self,
|
||||||
client: &Arc<K8sClient>,
|
client: &Arc<K8sClient>,
|
||||||
) -> Result<Outcome, InterpretError> {
|
) -> Result<Outcome, InterpretError> {
|
||||||
let cm = format!(
|
let cm = r#"
|
||||||
r#"
|
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
@ -81,11 +81,9 @@ data:
|
|||||||
enableUserWorkload: true
|
enableUserWorkload: true
|
||||||
alertmanagerMain:
|
alertmanagerMain:
|
||||||
enableUserAlertmanagerConfig: true
|
enableUserAlertmanagerConfig: true
|
||||||
"#
|
"#;
|
||||||
);
|
let cm_yaml: serde_yaml::Value = serde_yaml::from_str(&cm).unwrap();
|
||||||
|
debug!("{:#?}", cm_yaml);
|
||||||
let cm_yaml = serde_yaml::to_value(cm).unwrap();
|
|
||||||
|
|
||||||
client
|
client
|
||||||
.apply_yaml(&cm_yaml, Some("openshift-monitoring"))
|
.apply_yaml(&cm_yaml, Some("openshift-monitoring"))
|
||||||
.await?;
|
.await?;
|
||||||
@ -97,8 +95,7 @@ data:
|
|||||||
&self,
|
&self,
|
||||||
client: &Arc<K8sClient>,
|
client: &Arc<K8sClient>,
|
||||||
) -> Result<Outcome, InterpretError> {
|
) -> Result<Outcome, InterpretError> {
|
||||||
let cm = format!(
|
let cm = r#"
|
||||||
r#"
|
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
@ -107,12 +104,11 @@ metadata:
|
|||||||
data:
|
data:
|
||||||
config.yaml: |
|
config.yaml: |
|
||||||
alertmanager:
|
alertmanager:
|
||||||
enabled: true
|
enabled: true
|
||||||
enableAlertmanagerConfig: true
|
enableAlertmanagerConfig: true
|
||||||
"#
|
"#;
|
||||||
);
|
|
||||||
|
|
||||||
let cm_yaml = serde_yaml::to_value(cm).unwrap();
|
let cm_yaml: serde_yaml::Value = serde_yaml::from_str(&cm).unwrap();
|
||||||
|
|
||||||
client
|
client
|
||||||
.apply_yaml(&cm_yaml, Some("openshift-user-workload-monitoring"))
|
.apply_yaml(&cm_yaml, Some("openshift-user-workload-monitoring"))
|
||||||
@ -128,54 +124,48 @@ data:
|
|||||||
) -> Result<Outcome, InterpretError> {
|
) -> Result<Outcome, InterpretError> {
|
||||||
let namespace = "openshift-user-workload-monitoring";
|
let namespace = "openshift-user-workload-monitoring";
|
||||||
let alertmanager_name = "alertmanager-user-workload-0";
|
let alertmanager_name = "alertmanager-user-workload-0";
|
||||||
let alertmanager = client
|
|
||||||
.get_pod(alertmanager_name, Some(namespace))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let prometheus_name = "prometheus-user-workload-0";
|
let prometheus_name = "prometheus-user-workload-0";
|
||||||
let prometheus = client
|
self.wait_for_pod_ready(&client, alertmanager_name, namespace)
|
||||||
.get_pod(prometheus_name, Some(namespace))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
self.ensure_pod(alertmanager, alertmanager_name, namespace)
|
|
||||||
.await?;
|
.await?;
|
||||||
self.ensure_pod(prometheus, prometheus_name, namespace)
|
self.wait_for_pod_ready(&client, prometheus_name, namespace)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn ensure_pod(
|
async fn wait_for_pod_ready(
|
||||||
&self,
|
&self,
|
||||||
pod: Option<Pod>,
|
client: &Arc<K8sClient>,
|
||||||
pod_name: &str,
|
pod_name: &str,
|
||||||
namespace: &str,
|
namespace: &str,
|
||||||
) -> Result<Outcome, InterpretError> {
|
) -> Result<Outcome, InterpretError> {
|
||||||
match pod {
|
let mut elapsed = 0;
|
||||||
Some(pod) => {
|
let interval = 5; // seconds between checks
|
||||||
if let Some(status) = pod.status {
|
let timeout_secs = 120;
|
||||||
let phase = status.phase.unwrap_or("failed".to_string());
|
loop {
|
||||||
if phase == "running" {
|
// Fetch the pod
|
||||||
Ok(Outcome::success(format!(
|
let pod = client.get_pod(pod_name, Some(namespace)).await?;
|
||||||
"'{}' is ready with status.phase '{}'.",
|
|
||||||
pod.metadata.name.unwrap(),
|
if let Some(p) = pod {
|
||||||
phase
|
if let Some(status) = p.status {
|
||||||
)))
|
if let Some(phase) = status.phase {
|
||||||
} else {
|
if phase.to_lowercase() == "running" {
|
||||||
Err(InterpretError::new(format!(
|
return Ok(Outcome::success(format!(
|
||||||
"'{}' in namespace '{}' has status.phase '{}'.",
|
"'{}' is ready with status.phase '{}'.",
|
||||||
pod_name, namespace, phase
|
pod_name, phase
|
||||||
)))
|
)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
Err(InterpretError::new(format!(
|
|
||||||
"{} not found in ns: {}",
|
|
||||||
pod_name, namespace
|
|
||||||
)))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => Err(InterpretError::new(format!(
|
|
||||||
"'{}' not found in namespace '{}'",
|
if elapsed >= timeout_secs {
|
||||||
pod_name, namespace
|
return Err(InterpretError::new(format!(
|
||||||
))),
|
"'{}' in ns '{}' did not become ready within {}s",
|
||||||
|
pod_name, namespace, timeout_secs
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(interval)).await;
|
||||||
|
elapsed += interval;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user