okd_enable_user_workload_monitoring #160

Closed
wjro wants to merge 4 commits from okd_enable_user_workload_monitoring into master
3 changed files with 183 additions and 0 deletions
Showing only changes of commit 21e51b8d80 - Show all commits

View File

@ -4,4 +4,5 @@ pub mod application_monitoring;
pub mod grafana;
pub mod kube_prometheus;
pub mod ntfy;
pub mod okd;
pub mod prometheus;

View File

@ -0,0 +1,181 @@
use std::sync::Arc;
use async_trait::async_trait;
use harmony_types::id::Id;
use k8s_openapi::api::core::v1::Pod;
use kube::api::GroupVersionKind;
use serde::Serialize;
use crate::{
data::Version,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{K8sclient, Topology, k8s::K8sClient},
};
#[derive(Clone, Debug, Serialize)]
pub struct OpenshiftUserWorkloadMonitoring {}
impl<T: Topology + K8sclient> Score<T> for OpenshiftUserWorkloadMonitoring {
fn name(&self) -> String {
"OpenshiftUserWorkloadMonitoringScore".to_string()
}
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(OpenshiftUserWorkloadMonitoringInterpret {})
}
}
#[derive(Clone, Debug, Serialize)]
pub struct OpenshiftUserWorkloadMonitoringInterpret {}
#[async_trait]
impl<T: Topology + K8sclient> Interpret<T> for OpenshiftUserWorkloadMonitoringInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let client = topology.k8s_client().await.unwrap();
self.update_cluster_monitoring_config_cm(&client).await?;
self.update_user_workload_monitoring_config_cm(&client)
.await?;
self.verify_user_workload(&client).await?;
Ok(Outcome::success(
"successfully enabled user-workload-monitoring".to_string(),
))
}
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OpenshiftUserWorkloadMonitoring")
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}
impl OpenshiftUserWorkloadMonitoringInterpret {
pub async fn update_cluster_monitoring_config_cm(
&self,
client: &Arc<K8sClient>,
) -> Result<Outcome, InterpretError> {
let cm = format!(
r#"
apiVersion: v1
kind: ConfigMap
metadata:
name: cluster-monitoring-config
namespace: openshift-monitoring
data:
config.yaml: |
enableUserWorkload: true
alertmanagerMain:
enableUserAlertmanagerConfig: true
"#
);
let cm_yaml = serde_yaml::to_value(cm).unwrap();
client
.apply_yaml(&cm_yaml, Some("openshift-monitoring"))
.await?;
Ok(Outcome::success(
"updated cluster-monitoring-config-map".to_string(),
letian marked this conversation as resolved
Review

for better readability, please add a white line between function declarations to give a bit of space

on a side note, we should always make sure to check for unused imports and also to run cargo fmt and cargo clippy --fix before opening a PR to automatically fix some issues/warnings, the list of warnings (not related to this PR) is growing very quickly

for better readability, please add a white line between function declarations to give a bit of space on a side note, we should always make sure to check for unused imports and also to run `cargo fmt` and `cargo clippy --fix` before opening a PR to automatically fix some issues/warnings, the list of warnings (not related to this PR) is growing very quickly
))
}
pub async fn update_user_workload_monitoring_config_cm(
&self,
client: &Arc<K8sClient>,
) -> Result<Outcome, InterpretError> {
let cm = format!(
r#"
apiVersion: v1
kind: ConfigMap
metadata:
name: user-workload-monitoring-config
namespace: openshift-user-workload-monitoring
data:
config.yaml: |
alertmanager:
enabled: true
enableAlertmanagerConfig: true
"#
);
let cm_yaml = serde_yaml::to_value(cm).unwrap();
client
.apply_yaml(&cm_yaml, Some("openshift-user-workload-monitoring"))
.await?;
Ok(Outcome::success(
"updated openshift-user-monitoring-config-map".to_string(),
))
}
pub async fn verify_user_workload(
&self,
client: &Arc<K8sClient>,
) -> Result<Outcome, InterpretError> {
let namespace = "openshift-user-workload-monitoring";
let alertmanager_name = "alertmanager-user-workload-0";
let alertmanager = client
.get_pod(alertmanager_name, Some(namespace))
.await
.unwrap();

could it be interesting to move such function into the k8s_client so that others could use it easily? something like k8s_client.wait_for_pod(pod_name: &str, namespace: Option<&str>, status: PodStatus)

could it be interesting to move such function into the `k8s_client` so that others could use it easily? something like `k8s_client.wait_for_pod(pod_name: &str, namespace: Option<&str>, status: PodStatus)`
let prometheus_name = "prometheus-user-workload-0";
let prometheus = client
.get_pod(prometheus_name, Some(namespace))
.await
.unwrap();
self.ensure_pod(alertmanager, alertmanager_name, namespace)
.await?;
self.ensure_pod(prometheus, prometheus_name, namespace)
.await
}
async fn ensure_pod(
&self,
pod: Option<Pod>,
pod_name: &str,
namespace: &str,
) -> Result<Outcome, InterpretError> {
match pod {
Some(pod) => {
if let Some(status) = pod.status {
let phase = status.phase.unwrap_or("failed".to_string());
if phase == "running" {
Ok(Outcome::success(format!(
"'{}' is ready with status.phase '{}'.",
pod.metadata.name.unwrap(),
phase
)))
} else {
Err(InterpretError::new(format!(
"'{}' in namespace '{}' has status.phase '{}'.",
pod_name, namespace, phase
)))
}
} else {
Err(InterpretError::new(format!(
"{} not found in ns: {}",
pod_name, namespace
)))
}
}
None => Err(InterpretError::new(format!(
"'{}' not found in namespace '{}'",
pod_name, namespace
))),
}
}
}

View File

@ -0,0 +1 @@
pub mod enable_user_workload;