feat: implentation to use a preinstalled cluster issuer to create a certificate

2025-09-16 16:43:32 -04:00
8 changed files with 120 additions and 256 deletions
--- a/docs/doc-remove-worker-flag.md
+++ b/docs/doc-remove-worker-flag.md
@@ -1,56 +0,0 @@
-## **Remove Worker flag from OKD Control Planes** 
-
-### **Context**
-On OKD user provisioned infrastructure the control plane nodes can have the flag node-role.kubernetes.io/worker which allows non critical workloads to be scheduled on the control-planes
-
-### **Observed Symptoms**
- After adding HAProxy servers to the backend each back end appears down 
- Traffic is redirected to the control planes instead of workers
- The pods router-default are incorrectly applied on the control planes rather than on the workers
- Pods are being scheduled on the control planes causing cluster instability
-
-```
-  ss -tlnp | grep 80
-```
- shows process haproxy  is listening at 0.0.0.0:80 on cps
- same problem for port 443
- In namespace rook-ceph certain pods are deploted on cps rather than on worker nodes
-
- ### **Cause**
- - when intalling UPI, the roles (master, worker) are not managed by the Machine Config operator and the cps are made schedulable by default.
-
- ### **Diagnostic**
-check node labels:
-```
-   oc get nodes --show-labels | grep control-plane
-```
-Inspecter kubelet configuration:
-
-```
-cat /etc/systemd/system/kubelet.service
-```
-
-find the line:
-```
-   --node-labels=node-role.kubernetes.io/control-plane,node-role.kubernetes.io/master,node-role.kubernetes.io/worker
-```
-   → presence of label worker confirms the problem.
-
-Verify the flag doesnt come from MCO
-```
-   oc get machineconfig | grep rendered-master
-```
-
-**Solution:**
-To make the control planes non schedulable you must patch the cluster scheduler resource
-
-```	
-oc patch scheduler cluster --type merge -p '{"spec":{"mastersSchedulable":false}}'
-```
-after the patch is applied the workloads can be deplaced by draining the nodes
-
-```
-oc adm cordon <cp-node>
-oc adm drain <cp-node> --ignore-daemonsets –delete-emptydir-data
-```
-
--- a/harmony/src/domain/interpret/mod.rs
+++ b/harmony/src/domain/interpret/mod.rs
@@ -194,3 +194,11 @@ impl From<String> for InterpretError {
        }
    }
 }
+
+impl From<serde_yaml::Error> for InterpretError {
+    fn from(value: serde_yaml::Error) -> Self {
+        Self {
+            msg: format!("InterpretError : {value}"),
+        }
+    }
+}
--- a/harmony/src/domain/topology/k8s.rs
+++ b/harmony/src/domain/topology/k8s.rs
@@ -1,19 +1,13 @@
-use std::time::Duration;
-
 use derive_new::new;
 use k8s_openapi::{
    ClusterResourceScope, NamespaceResourceScope,
-    api::{
-        apps::v1::Deployment,
-        core::v1::{Pod, PodStatus},
-    },
+    api::{apps::v1::Deployment, core::v1::Pod},
 };
 use kube::{
    Client, Config, Error, Resource,
    api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt},
    config::{KubeConfigOptions, Kubeconfig},
    core::ErrorResponse,
-    error::DiscoveryError,
    runtime::reflector::Lookup,
 };
 use kube::{api::DynamicObject, runtime::conditions};
@@ -25,7 +19,7 @@ use log::{debug, error, trace};
 use serde::{Serialize, de::DeserializeOwned};
 use serde_json::{Value, json};
 use similar::TextDiff;
-use tokio::{io::AsyncReadExt, time::sleep};
+use tokio::io::AsyncReadExt;

 #[derive(new, Clone)]
 pub struct K8sClient {
@@ -159,41 +153,6 @@ impl K8sClient {
        }
    }

-    pub async fn wait_for_pod_ready(
-        &self,
-        pod_name: &str,
-        namespace: Option<&str>,
-    ) -> Result<(), Error> {
-        let mut elapsed = 0;
-        let interval = 5; // seconds between checks
-        let timeout_secs = 120;
-        loop {
-            let pod = self.get_pod(pod_name, namespace).await?;
-
-            if let Some(p) = pod {
-                if let Some(status) = p.status {
-                    if let Some(phase) = status.phase {
-                        if phase.to_lowercase() == "running" {
-                            return Ok(());
-                        }
-                    }
-                }
-            }
-
-            if elapsed >= timeout_secs {
-                return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
-                    "'{}' in ns '{}' did not become ready within {}s",
-                    pod_name,
-                    namespace.unwrap(),
-                    timeout_secs
-                ))));
-            }
-
-            sleep(Duration::from_secs(interval)).await;
-            elapsed += interval;
-        }
-    }
-
    /// Will execute a commond in the first pod found that matches the specified label
    /// '{label}={name}'
    pub async fn exec_app_capture_output(
@@ -460,12 +419,9 @@ impl K8sClient {
            .as_str()
            .expect("couldn't get kind as str");

-        let mut it = api_version.splitn(2, '/');
-        let first = it.next().unwrap();
-        let (g, v) = match it.next() {
-            Some(second) => (first, second),
-            None => ("", first),
-        };
+        let split: Vec<&str> = api_version.splitn(2, "/").collect();
+        let g = split[0];
+        let v = split[1];

        let gvk = GroupVersionKind::gvk(g, v, kind);
        let api_resource = ApiResource::from_gvk(&gvk);
--- a/harmony/src/modules/cert_manager/gen_ca_cert.rs
+++ b/harmony/src/modules/cert_manager/gen_ca_cert.rs
@@ -0,0 +1,106 @@
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use harmony_types::id::Id;
+use serde::Serialize;
+
+use crate::{
+    data::Version,
+    interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
+    inventory::Inventory,
+    score::Score,
+    topology::{K8sclient, Topology, k8s::K8sClient},
+};
+
+#[derive(Clone, Serialize, Debug)]
+pub struct GenerateCaCertScore {
+    cluster_issuer_name: String,
+    dns_names: String,
+    operator_namespace: String,
+}
+
+impl<T: Topology + K8sclient> Score<T> for GenerateCaCertScore {
+    fn name(&self) -> String {
+        "GenerateCaCertScore".to_string()
+    }
+
+    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
+        Box::new(GenerateCaCertIntepret {
+            score: self.clone(),
+        })
+    }
+}
+
+#[derive(Clone, Serialize, Debug)]
+pub struct GenerateCaCertIntepret {
+    score: GenerateCaCertScore,
+}
+
+#[async_trait]
+impl<T: Topology + K8sclient> Interpret<T> for GenerateCaCertIntepret {
+    async fn execute(
+        &self,
+        _inventory: &Inventory,
+        topology: &T,
+    ) -> Result<Outcome, InterpretError> {
+        let client = topology.k8s_client().await.unwrap();
+        let cert_yaml = self
+            .build_cert_request_yaml(&self.score.cluster_issuer_name, &self.score.dns_names)
+            .unwrap();
+        self.apply_cert_request(&client, cert_yaml, &self.score.operator_namespace)
+            .await?;
+        Ok(Outcome::success("created ca cert".to_string()))
+    }
+
+    fn get_name(&self) -> InterpretName {
+        InterpretName::Custom("GenerateCaCertInterpret")
+    }
+
+    fn get_version(&self) -> Version {
+        todo!()
+    }
+
+    fn get_status(&self) -> InterpretStatus {
+        todo!()
+    }
+
+    fn get_children(&self) -> Vec<Id> {
+        todo!()
+    }
+}
+
+impl GenerateCaCertIntepret {
+    pub fn build_cert_request_yaml(
+        &self,
+        cluster_issuer_name: &str,
+        dns_names: &str,
+    ) -> Result<serde_yaml::Value, InterpretError> {
+        let cert_yaml = format!(
+            r#"
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: ingress-cert
+  namespace: openshift-ingress
+spec:
+  secretName: ingress-cert-tls
+  issuerRef:
+    name: {cluster_issuer_name}
+    kind: ClusterIssuer
+  dnsNames:
+  - "*.{dns_names}"
+        "#
+        );
+        Ok(serde_yaml::to_value(cert_yaml)?)
+    }
+    pub async fn apply_cert_request(
+        &self,
+        client: &Arc<K8sClient>,
+        cert_yaml: serde_yaml::Value,
+        operator_namespace: &str,
+    ) -> Result<(), InterpretError> {
+        Ok(client
+            .apply_yaml(&cert_yaml, Some(operator_namespace))
+            .await?)
+    }
+}
--- a/harmony/src/modules/cert_manager/mod.rs
+++ b/harmony/src/modules/cert_manager/mod.rs
@@ -1,2 +1,3 @@
+mod gen_ca_cert;
 mod helm;
 pub use helm::*;
--- a/harmony/src/modules/monitoring/mod.rs
+++ b/harmony/src/modules/monitoring/mod.rs
@@ -4,5 +4,4 @@ pub mod application_monitoring;
 pub mod grafana;
 pub mod kube_prometheus;
 pub mod ntfy;
-pub mod okd;
 pub mod prometheus;
--- a/harmony/src/modules/monitoring/okd/enable_user_workload.rs
+++ b/harmony/src/modules/monitoring/okd/enable_user_workload.rs
@@ -1,149 +0,0 @@
-use std::{collections::BTreeMap, sync::Arc};
-
-use crate::{
-    data::Version,
-    interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
-    inventory::Inventory,
-    score::Score,
-    topology::{K8sclient, Topology, k8s::K8sClient},
-};
-use async_trait::async_trait;
-use harmony_types::id::Id;
-use k8s_openapi::api::core::v1::ConfigMap;
-use kube::api::ObjectMeta;
-use serde::Serialize;
-
-#[derive(Clone, Debug, Serialize)]
-pub struct OpenshiftUserWorkloadMonitoring {}
-
-impl<T: Topology + K8sclient> Score<T> for OpenshiftUserWorkloadMonitoring {
-    fn name(&self) -> String {
-        "OpenshiftUserWorkloadMonitoringScore".to_string()
-    }
-
-    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
-        Box::new(OpenshiftUserWorkloadMonitoringInterpret {})
-    }
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct OpenshiftUserWorkloadMonitoringInterpret {}
-
-#[async_trait]
-impl<T: Topology + K8sclient> Interpret<T> for OpenshiftUserWorkloadMonitoringInterpret {
-    async fn execute(
-        &self,
-        _inventory: &Inventory,
-        topology: &T,
-    ) -> Result<Outcome, InterpretError> {
-        let client = topology.k8s_client().await.unwrap();
-        self.update_cluster_monitoring_config_cm(&client).await?;
-        self.update_user_workload_monitoring_config_cm(&client)
-            .await?;
-        self.verify_user_workload(&client).await?;
-        Ok(Outcome::success(
-            "successfully enabled user-workload-monitoring".to_string(),
-        ))
-    }
-
-    fn get_name(&self) -> InterpretName {
-        InterpretName::Custom("OpenshiftUserWorkloadMonitoring")
-    }
-
-    fn get_version(&self) -> Version {
-        todo!()
-    }
-
-    fn get_status(&self) -> InterpretStatus {
-        todo!()
-    }
-
-    fn get_children(&self) -> Vec<Id> {
-        todo!()
-    }
-}
-
-impl OpenshiftUserWorkloadMonitoringInterpret {
-    pub async fn update_cluster_monitoring_config_cm(
-        &self,
-        client: &Arc<K8sClient>,
-    ) -> Result<Outcome, InterpretError> {
-        let mut data = BTreeMap::new();
-        data.insert(
-            "config.yaml".to_string(),
-            r#"
-enableUserWorkload: true
-alertmanagerMain:
-  enableUserAlertmanagerConfig: true
-"#
-            .to_string(),
-        );
-
-        let cm = ConfigMap {
-            metadata: ObjectMeta {
-                name: Some("cluster-monitoring-config".to_string()),
-                namespace: Some("openshift-monitoring".to_string()),
-                ..Default::default()
-            },
-            data: Some(data),
-            ..Default::default()
-        };
-        client.apply(&cm, Some("openshift-monitoring")).await?;
-
-        Ok(Outcome::success(
-            "updated cluster-monitoring-config-map".to_string(),
-        ))
-    }
-
-    pub async fn update_user_workload_monitoring_config_cm(
-        &self,
-        client: &Arc<K8sClient>,
-    ) -> Result<Outcome, InterpretError> {
-        let mut data = BTreeMap::new();
-        data.insert(
-            "config.yaml".to_string(),
-            r#"
-alertmanager: 
-  enabled: true
-  enableAlertmanagerConfig: true
-"#
-            .to_string(),
-        );
-        let cm = ConfigMap {
-            metadata: ObjectMeta {
-                name: Some("user-workload-monitoring-config".to_string()),
-                namespace: Some("openshift-user-workload-monitoring".to_string()),
-                ..Default::default()
-            },
-            data: Some(data),
-            ..Default::default()
-        };
-        client
-            .apply(&cm, Some("openshift-user-workload-monitoring"))
-            .await?;
-
-        Ok(Outcome::success(
-            "updated openshift-user-monitoring-config-map".to_string(),
-        ))
-    }
-
-    pub async fn verify_user_workload(
-        &self,
-        client: &Arc<K8sClient>,
-    ) -> Result<Outcome, InterpretError> {
-        let namespace = "openshift-user-workload-monitoring";
-        let alertmanager_name = "alertmanager-user-workload-0";
-        let prometheus_name = "prometheus-user-workload-0";
-        client
-            .wait_for_pod_ready(alertmanager_name, Some(namespace))
-            .await?;
-        client
-            .wait_for_pod_ready(prometheus_name, Some(namespace))
-            .await?;
-
-        Ok(Outcome::success(format!(
-            "pods: {}, {} ready in ns: {}",
-            alertmanager_name, prometheus_name, namespace
-        )))
-    }
-}
--- a/harmony/src/modules/monitoring/okd/mod.rs
+++ b/harmony/src/modules/monitoring/okd/mod.rs
@@ -1 +0,0 @@
-pub mod enable_user_workload;