diff --git a/Cargo.lock b/Cargo.lock
index 4cf88dd..db2929d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1262,22 +1262,6 @@ dependencies = [
  "url",
 ]
 
-[[package]]
-name = "brocade-switch-oricom-configuration"
-version = "0.1.0"
-dependencies = [
- "async-trait",
- "brocade",
- "env_logger",
- "harmony",
- "harmony_cli",
- "harmony_macros",
- "harmony_types",
- "log",
- "serde",
- "tokio",
-]
-
 [[package]]
 name = "brotli"
 version = "8.0.2"
@@ -2650,6 +2634,29 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "example-cluster-dashboards"
+version = "0.1.0"
+dependencies = [
+ "env_logger",
+ "harmony",
+ "harmony_cli",
+ "harmony_types",
+ "log",
+ "tokio",
+]
+
+[[package]]
+name = "example-grafana"
+version = "0.1.0"
+dependencies = [
+ "harmony",
+ "harmony_cli",
+ "harmony_types",
+ "log",
+ "tokio",
+]
+
 [[package]]
 name = "example-harmony-sso"
 version = "0.1.0"
diff --git a/examples/cluster_dashboards/Cargo.toml b/examples/cluster_dashboards/Cargo.toml
new file mode 100644
index 0000000..7845145
--- /dev/null
+++ b/examples/cluster_dashboards/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "example-cluster-dashboards"
+edition = "2021"
+version = "0.1.0"
+license = "GNU AGPL v3"
+publish = false
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony_cli = { path = "../../harmony_cli" }
+harmony_types = { path = "../../harmony_types" }
+tokio = { version = "1.40", features = ["macros", "rt-multi-thread"] }
+log = "0.4"
+env_logger = "0.11"
diff --git a/examples/cluster_dashboards/src/main.rs b/examples/cluster_dashboards/src/main.rs
new file mode 100644
index 0000000..d587016
--- /dev/null
+++ b/examples/cluster_dashboards/src/main.rs
@@ -0,0 +1,20 @@
+use harmony::{
+    inventory::Inventory, modules::monitoring::cluster_dashboards::ClusterDashboardsScore,
+    topology::K8sAnywhereTopology,
+};
+
+#[tokio::main]
+async fn main() {
+    harmony_cli::cli_logger::init();
+
+    let cluster_dashboards_score = ClusterDashboardsScore::default();
+
+    harmony_cli::run(
+        Inventory::autoload(),
+        K8sAnywhereTopology::from_env(),
+        vec![Box::new(cluster_dashboards_score)],
+        None,
+    )
+    .await
+    .unwrap();
+}
diff --git a/examples/grafana/Cargo.toml b/examples/grafana/Cargo.toml
new file mode 100644
index 0000000..0758351
--- /dev/null
+++ b/examples/grafana/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "example-grafana"
+edition = "2021"
+version = "0.1.0"
+license = "GNU AGPL v3"
+publish = false
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony_cli = { path = "../../harmony_cli" }
+harmony_types = { path = "../../harmony_types" }
+tokio = { version = "1.40", features = ["macros", "rt-multi-thread"] }
+log = "0.4"
diff --git a/examples/grafana/env.sh b/examples/grafana/env.sh
new file mode 100644
index 0000000..5a58931
--- /dev/null
+++ b/examples/grafana/env.sh
@@ -0,0 +1,5 @@
+export HARMONY_SECRET_NAMESPACE=example-grafana
+export HARMONY_SECRET_STORE=file
+export HARMONY_DATABASE_URL=sqlite://harmony_grafana.sqlite
+export RUST_LOG=harmony=debug
+export HARMONY_USE_LOCAL_K3D=false
diff --git a/examples/grafana/src/main.rs b/examples/grafana/src/main.rs
new file mode 100644
index 0000000..58a374a
--- /dev/null
+++ b/examples/grafana/src/main.rs
@@ -0,0 +1,31 @@
+use harmony::{
+    inventory::Inventory,
+    modules::monitoring::{
+        cluster_dashboards::ClusterDashboardsScore,
+        grafana::helm::helm_grafana::GrafanaOperatorScore,
+    },
+    topology::K8sAnywhereTopology,
+};
+
+const GRAFANA_OPERATOR_CHART_VERSION: &str = "v5.22.2";
+
+#[tokio::main]
+async fn main() {
+    harmony_cli::cli_logger::init();
+
+    let grafana_operator =
+        GrafanaOperatorScore::new("grafana", Some(GRAFANA_OPERATOR_CHART_VERSION));
+    let cluster_dashboards_score = ClusterDashboardsScore::default();
+
+    harmony_cli::run(
+        Inventory::autoload(),
+        K8sAnywhereTopology::from_env(),
+        vec![
+            Box::new(grafana_operator),
+            Box::new(cluster_dashboards_score),
+        ],
+        None,
+    )
+    .await
+    .unwrap();
+}
diff --git a/harmony-k8s/src/domain.rs b/harmony-k8s/src/domain.rs
new file mode 100644
index 0000000..d1115c5
--- /dev/null
+++ b/harmony-k8s/src/domain.rs
@@ -0,0 +1,117 @@
+use kube::Error;
+use kube::api::GroupVersionKind;
+use log::{debug, trace, warn};
+
+use crate::client::K8sClient;
+use crate::types::KubernetesDistribution;
+
+impl K8sClient {
+    /// Resolve an external hostname for the given service name by querying the
+    /// cluster's ingress infrastructure.
+    ///
+    /// Detection order:
+    /// 1. **OpenShift** — reads `status.domain` from the default
+    ///    `IngressController` in `openshift-ingress-operator`.
+    /// 2. **NGINX Ingress Controller** — looks for well-known Services in
+    ///    common namespaces and extracts the LoadBalancer hostname.
+    /// 3. **Fallback** — returns internal cluster DNS
+    ///    (`{service}.default.svc.cluster.local`).
+    pub async fn get_domain(&self, service: &str) -> Result<String, Error> {
+        let distribution = self.get_k8s_distribution().await?;
+
+        if matches!(distribution, KubernetesDistribution::OpenshiftFamily) {
+            if let Some(domain) = self.try_openshift_ingress_domain().await? {
+                return Ok(format!("{service}.{domain}"));
+            }
+        }
+
+        if let Some(domain) = self.try_nginx_lb_domain().await? {
+            return Ok(format!("{service}.{domain}"));
+        }
+
+        warn!("Could not determine external ingress domain; falling back to internal-only DNS");
+        Ok(format!("{service}.default.svc.cluster.local"))
+    }
+
+    async fn try_openshift_ingress_domain(&self) -> Result<Option<String>, Error> {
+        let gvk = GroupVersionKind {
+            group: "operator.openshift.io".into(),
+            version: "v1".into(),
+            kind: "IngressController".into(),
+        };
+
+        let ic = match self
+            .get_resource_json_value("default", Some("openshift-ingress-operator"), &gvk)
+            .await
+        {
+            Ok(ic) => ic,
+            Err(e) => {
+                debug!("Could not fetch OpenShift IngressController: {e}");
+                return Ok(None);
+            }
+        };
+
+        let replicas = ic.data["status"]["availableReplicas"].as_i64().unwrap_or(0);
+        if replicas < 1 {
+            debug!("OpenShift IngressController present but no available replicas");
+            return Ok(None);
+        }
+
+        if let Some(domain) = ic.data["status"]["domain"].as_str() {
+            trace!("OpenShift IngressController domain: {domain}");
+            return Ok(Some(domain.to_string()));
+        }
+
+        warn!("OpenShift IngressController present but no status.domain set");
+        Ok(None)
+    }
+
+    async fn try_nginx_lb_domain(&self) -> Result<Option<String>, Error> {
+        let svc_gvk = GroupVersionKind {
+            group: "".into(),
+            version: "v1".into(),
+            kind: "Service".into(),
+        };
+
+        let candidates = [
+            ("ingress-nginx", "ingress-nginx-controller"),
+            ("ingress-nginx", "ingress-nginx-controller-internal"),
+            ("ingress-nginx", "ingress-nginx"),
+            ("kube-system", "ingress-nginx-controller"),
+        ];
+
+        for (ns, name) in candidates {
+            trace!("Checking NGINX Service {ns}/{name} for LoadBalancer hostname");
+            if let Ok(svc) = self.get_resource_json_value(name, Some(ns), &svc_gvk).await {
+                let lb_hosts = svc.data["status"]["loadBalancer"]["ingress"]
+                    .as_array()
+                    .cloned()
+                    .unwrap_or_default();
+                for entry in lb_hosts {
+                    if let Some(host) = entry.get("hostname").and_then(|v| v.as_str()) {
+                        debug!("Found NGINX LB hostname: {host}");
+                        if let Some(domain) = extract_base_domain(host) {
+                            return Ok(Some(domain));
+                        } else {
+                            return Ok(Some(host.to_string()));
+                        }
+                    }
+                    if let Some(ip) = entry.get("ip").and_then(|v| v.as_str()) {
+                        debug!("NGINX LB exposes IP {ip} (no hostname); skipping");
+                    }
+                }
+            }
+        }
+
+        Ok(None)
+    }
+}
+
+fn extract_base_domain(host: &str) -> Option<String> {
+    let parts: Vec<&str> = host.split('.').collect();
+    if parts.len() >= 2 {
+        Some(parts[parts.len() - 2..].join("."))
+    } else {
+        None
+    }
+}
diff --git a/harmony-k8s/src/lib.rs b/harmony-k8s/src/lib.rs
index 2704c65..1943540 100644
--- a/harmony-k8s/src/lib.rs
+++ b/harmony-k8s/src/lib.rs
@@ -3,6 +3,7 @@ pub mod bundle;
 pub mod client;
 pub mod config;
 pub mod discovery;
+pub mod domain;
 pub mod helper;
 pub mod node;
 pub mod pod;
diff --git a/harmony/src/domain/topology/k8s_anywhere/k8s_anywhere.rs b/harmony/src/domain/topology/k8s_anywhere/k8s_anywhere.rs
index f457610..75acc9a 100644
--- a/harmony/src/domain/topology/k8s_anywhere/k8s_anywhere.rs
+++ b/harmony/src/domain/topology/k8s_anywhere/k8s_anywhere.rs
@@ -742,18 +742,17 @@ impl K8sAnywhereTopology {
                 labels: Some(labels.clone()),
                 ..Default::default()
             },
-            spec: GrafanaSpec {
-                config: None,
-                admin_user: None,
-                admin_password: None,
-                ingress: None,
-                persistence: None,
-                resources: None,
-            },
+            spec: GrafanaSpec::default(),
         };
         grafana
     }
 
+    // NOTE: This creates a harmony-owned Ingress resource, separate from the
+    // grafana-operator. The newer pattern (used in `ClusterDashboardsScore`)
+    // delegates Ingress creation to grafana-operator via `.spec.ingress` on
+    // the Grafana CR, using `K8sClient::get_domain()` for hostname
+    // resolution. This method is kept for backward compatibility with the
+    // `install_grafana()` flow.
     async fn build_grafana_ingress(&self, ns: &str) -> K8sIngressScore {
         let domain = self.get_domain(&format!("grafana-{}", ns)).await.unwrap();
         let name = format!("{}-grafana", ns);
@@ -1083,7 +1082,7 @@ impl K8sAnywhereTopology {
         if tenant.is_some() {
             namespace_scope = true;
         }
-        let _grafana_operator_score = grafana_helm_chart_score(namespace, namespace_scope)
+        let _grafana_operator_score = grafana_helm_chart_score(namespace, namespace_scope, None)
             .interpret(inventory, self)
             .await
             .map_err(|e| PreparationError::new(e.to_string()));
@@ -1317,134 +1316,18 @@ impl TenantManager for K8sAnywhereTopology {
 #[async_trait]
 impl Ingress for K8sAnywhereTopology {
     async fn get_domain(&self, service: &str) -> Result<String, PreparationError> {
-        use log::{trace, warn};
+        // k3d local-dev shortcut (topology-specific state not available on K8sClient)
+        if let Some(Some(k8s_state)) = self.k8s_state.get() {
+            if matches!(k8s_state.source, K8sSource::LocalK3d) {
+                return Ok(format!("{service}.local.k3d"));
+            }
+        }
 
         let client = self.k8s_client().await?;
-
-        if let Some(Some(k8s_state)) = self.k8s_state.get() {
-            match k8s_state.source {
-                K8sSource::LocalK3d => {
-                    // Local developer UX
-                    return Ok(format!("{service}.local.k3d"));
-                }
-                K8sSource::Kubeconfig => {
-                    trace!("K8sSource is kubeconfig; attempting to detect domain");
-
-                    // 1) Try OpenShift IngressController domain (backward compatible)
-                    if self.openshift_ingress_operator_available().await.is_ok() {
-                        trace!("OpenShift ingress operator detected; using IngressController");
-                        let gvk = GroupVersionKind {
-                            group: "operator.openshift.io".into(),
-                            version: "v1".into(),
-                            kind: "IngressController".into(),
-                        };
-                        let ic = client
-                            .get_resource_json_value(
-                                "default",
-                                Some("openshift-ingress-operator"),
-                                &gvk,
-                            )
-                            .await
-                            .map_err(|_| {
-                                PreparationError::new(
-                                    "Failed to fetch IngressController".to_string(),
-                                )
-                            })?;
-
-                        if let Some(domain) = ic.data["status"]["domain"].as_str() {
-                            return Ok(format!("{service}.{domain}"));
-                        } else {
-                            warn!("OpenShift IngressController present but no status.domain set");
-                        }
-                    } else {
-                        trace!(
-                            "OpenShift ingress operator not detected; trying generic Kubernetes"
-                        );
-                    }
-
-                    // 2) Try NGINX Ingress Controller common setups
-                    // 2.a) Well-known namespace/name for the controller Service
-                    //      - upstream default: namespace "ingress-nginx", service "ingress-nginx-controller"
-                    //      - some distros: "ingress-nginx-controller" svc in "ingress-nginx" ns
-                    // If found with LoadBalancer ingress hostname, use its base domain.
-                    if let Some(domain) = try_nginx_lb_domain(&client).await? {
-                        return Ok(format!("{service}.{domain}"));
-                    }
-
-                    // 3) Fallback: internal cluster DNS suffix (service.namespace.svc.cluster.local)
-                    // We don't have tenant namespace here, so we fallback to 'default' with a warning.
-                    warn!(
-                        "Could not determine external ingress domain; falling back to internal-only DNS"
-                    );
-                    let internal = format!("{service}.default.svc.cluster.local");
-                    Ok(internal)
-                }
-            }
-        } else {
-            Err(PreparationError::new(
-                "Cannot get domain: unable to detect K8s state".to_string(),
-            ))
-        }
-    }
-}
-
-async fn try_nginx_lb_domain(client: &K8sClient) -> Result<Option<String>, PreparationError> {
-    use log::{debug, trace};
-
-    // Try common service path: svc/ingress-nginx-controller in ns/ingress-nginx
-    let svc_gvk = GroupVersionKind {
-        group: "".into(), // core
-        version: "v1".into(),
-        kind: "Service".into(),
-    };
-
-    let candidates = [
-        ("ingress-nginx", "ingress-nginx-controller"),
-        ("ingress-nginx", "ingress-nginx-controller-internal"),
-        ("ingress-nginx", "ingress-nginx"), // some charts name the svc like this
-        ("kube-system", "ingress-nginx-controller"), // less common but seen
-    ];
-
-    for (ns, name) in candidates {
-        trace!("Checking NGINX Service {ns}/{name} for LoadBalancer hostname");
-        if let Ok(svc) = client
-            .get_resource_json_value(ns, Some(name), &svc_gvk)
+        client
+            .get_domain(service)
             .await
-        {
-            let lb_hosts = svc.data["status"]["loadBalancer"]["ingress"]
-                .as_array()
-                .cloned()
-                .unwrap_or_default();
-            for entry in lb_hosts {
-                if let Some(host) = entry.get("hostname").and_then(|v| v.as_str()) {
-                    debug!("Found NGINX LB hostname: {host}");
-                    if let Some(domain) = extract_base_domain(host) {
-                        return Ok(Some(domain.to_string()));
-                    } else {
-                        return Ok(Some(host.to_string())); // already a domain
-                    }
-                }
-                if let Some(ip) = entry.get("ip").and_then(|v| v.as_str()) {
-                    // If only an IP is exposed, we can't create a hostname; return None to keep searching
-                    debug!("NGINX LB exposes IP {ip} (no hostname); skipping");
-                }
-            }
-        }
-    }
-
-    Ok(None)
-}
-
-fn extract_base_domain(host: &str) -> Option<String> {
-    // For a host like a1b2c3d4e5f6abcdef.elb.amazonaws.com -> base domain elb.amazonaws.com
-    // For a managed DNS like xyz.example.com -> base domain example.com (keep 2+ labels)
-    // Heuristic: keep last 2 labels by default; special-case known multi-label TLDs if needed.
-    let parts: Vec<&str> = host.split('.').collect();
-    if parts.len() >= 2 {
-        // Very conservative: last 2 labels
-        Some(parts[parts.len() - 2..].join("."))
-    } else {
-        None
+            .map_err(|e| PreparationError::new(e.to_string()))
     }
 }
 
diff --git a/harmony/src/modules/helm/chart.rs b/harmony/src/modules/helm/chart.rs
index d447126..cbdc7cb 100644
--- a/harmony/src/modules/helm/chart.rs
+++ b/harmony/src/modules/helm/chart.rs
@@ -60,7 +60,69 @@ impl<T: Topology + HelmCommand> Score<T> for HelmChartScore {
 pub struct HelmChartInterpret {
     pub score: HelmChartScore,
 }
+#[derive(serde::Deserialize)]
+struct HelmListEntry {
+    name: String,
+    chart: String,
+}
+
 impl HelmChartInterpret {
+    fn find_installed_release<T: HelmCommand>(
+        &self,
+        topology: &T,
+        ns: &str,
+    ) -> Result<Option<String>, InterpretError> {
+        let release = self.score.release_name.to_string();
+        let filter = format!("^{}$", release);
+        let args = vec!["list", "--namespace", ns, "--filter", &filter, "-o", "json"];
+        let output = run_helm_command(topology, &args)?;
+        if !output.status.success() {
+            return Err(InterpretError::new(format!(
+                "helm list failed: {}",
+                String::from_utf8_lossy(&output.stderr)
+            )));
+        }
+        let entries: Vec<HelmListEntry> = serde_json::from_slice(&output.stdout)
+            .map_err(|e| InterpretError::new(format!("parse helm list output: {e}")))?;
+        Ok(entries
+            .into_iter()
+            .find(|e| e.name == release)
+            .map(|e| e.chart))
+    }
+
+    fn expected_chart_field(&self) -> Option<String> {
+        let version = self.score.chart_version.as_ref()?.to_string();
+        let short = self
+            .score
+            .chart_name
+            .to_string()
+            .rsplit('/')
+            .next()
+            .unwrap_or("")
+            .to_string();
+        Some(format!(
+            "{short}-{}",
+            version.strip_prefix('v').unwrap_or(&version)
+        ))
+    }
+
+    fn normalize_chart_field(s: &str) -> String {
+        // Helm strips a leading `v` from chart versions in the `chart` column
+        // (normalized to semver). Users often write `v5.22.2` on the score.
+        // Normalize both sides by dropping a `-v` → `-` before the version.
+        match s.rfind("-v") {
+            Some(i)
+                if s[i + 2..]
+                    .chars()
+                    .next()
+                    .is_some_and(|c| c.is_ascii_digit()) =>
+            {
+                format!("{}-{}", &s[..i], &s[i + 2..])
+            }
+            _ => s.to_string(),
+        }
+    }
+
     fn add_repo<T: HelmCommand>(&self, topology: &T) -> Result<(), InterpretError> {
         let repo = match &self.score.repository {
             Some(repo) => repo,
@@ -142,6 +204,41 @@ impl<T: Topology + HelmCommand> Interpret<T> for HelmChartInterpret {
             .as_ref()
             .unwrap_or_else(|| todo!("Get namespace from active kubernetes cluster"));
 
+        let ns_str = ns.to_string();
+        if let Some(installed_chart) = self.find_installed_release(topology, &ns_str)? {
+            return match self.expected_chart_field() {
+                Some(expected)
+                    if Self::normalize_chart_field(&expected)
+                        == Self::normalize_chart_field(&installed_chart) =>
+                {
+                    warn!(
+                        "Helm release '{}' already installed at desired version ('{}'); skipping.",
+                        self.score.release_name, installed_chart
+                    );
+                    Ok(Outcome::success(format!(
+                        "Helm Chart {} already at desired version",
+                        self.score.release_name
+                    )))
+                }
+                Some(expected) => Err(InterpretError::new(format!(
+                    "Helm release '{}' already installed as '{}', but score requests '{}'. \
+                     Refusing to upgrade/downgrade; resolve manually.",
+                    self.score.release_name, installed_chart, expected
+                ))),
+                None => {
+                    warn!(
+                        "Helm release '{}' already installed as '{}'; score has no pinned \
+                         chart_version so skipping re-install.",
+                        self.score.release_name, installed_chart
+                    );
+                    Ok(Outcome::success(format!(
+                        "Helm Chart {} already installed (version not pinned)",
+                        self.score.release_name
+                    )))
+                }
+            };
+        }
+
         self.add_repo(topology)?;
 
         let mut args = if self.score.install_only {
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/ceph_01-ServiceMonitor.yaml b/harmony/src/modules/monitoring/cluster_dashboards/ceph_01-ServiceMonitor.yaml
new file mode 100644
index 0000000..b445dff
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/ceph_01-ServiceMonitor.yaml
@@ -0,0 +1,49 @@
+# These are probably already created by rook-ceph operator, not sure, needs to validate.
+# in fact, 100% sure for the second one (rook-ceph-exporter)
+# i over-wrote the first one (rook-ceph-mgr) with what is here, it was probably already working
+# all what was missing was a label on the rook-ceph namespace to tell prometheus to look for monitors in this namespace
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: rook-ceph-mgr
+  namespace: rook-ceph
+  labels:
+    # This specific label is what tells OKD's Prometheus to pick this up
+    openshift.io/cluster-monitoring: "true"
+spec:
+  namespaceSelector:
+    matchNames:
+      - rook-ceph
+  selector:
+    matchLabels:
+      # This matches your 'rook-ceph-mgr' service
+      app: rook-ceph-mgr
+  endpoints:
+  - port: ""
+    # The port name in your service is empty/integers, so we use targetPort
+    targetPort: 9283
+    path: /metrics
+    interval: 30s
+---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: rook-ceph-exporter
+  namespace: rook-ceph
+  labels:
+    # This label is required for OKD cluster-wide monitoring to pick it up
+    openshift.io/cluster-monitoring: "true"
+    team: rook
+spec:
+  endpoints:
+  - honorLabels: true
+    interval: 10s
+    path: /metrics
+    port: ceph-exporter-http-metrics
+  namespaceSelector:
+    matchNames:
+    - rook-ceph
+  selector:
+    matchLabels:
+      app: rook-ceph-exporter
+      rook_cluster: rook-ceph
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/ceph_02-RBAC.yaml b/harmony/src/modules/monitoring/cluster_dashboards/ceph_02-RBAC.yaml
new file mode 100644
index 0000000..0564fa8
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/ceph_02-RBAC.yaml
@@ -0,0 +1,23 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: rook-ceph-metrics-viewer
+  namespace: rook-ceph
+rules:
+- apiGroups: [""]
+  resources: ["services", "endpoints", "pods"]
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: rook-ceph-metrics-viewer
+  namespace: rook-ceph
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: rook-ceph-metrics-viewer
+subjects:
+- kind: ServiceAccount
+  name: prometheus-k8s
+  namespace: openshift-monitoring
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/ceph_03-NamespaceLabel.yaml b/harmony/src/modules/monitoring/cluster_dashboards/ceph_03-NamespaceLabel.yaml
new file mode 100644
index 0000000..1134ff7
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/ceph_03-NamespaceLabel.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: rook-ceph
+  labels:
+    # This is the critical label that allows OKD Prometheus to see the namespace
+    openshift.io/cluster-monitoring: "true"
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/alerts-events-problems.json b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/alerts-events-problems.json
new file mode 100644
index 0000000..3132e92
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/alerts-events-problems.json
@@ -0,0 +1,731 @@
+{
+  "title": "Alerts & Events — Active Problems",
+  "uid": "okd-alerts-events",
+  "schemaVersion": 36,
+  "version": 1,
+  "refresh": "30s",
+  "time": { "from": "now-3h", "to": "now" },
+  "tags": ["okd", "alerts", "events"],
+  "templating": {
+    "list": [
+      {
+        "name": "severity",
+        "type": "custom",
+        "label": "Severity Filter",
+        "query": "critical,warning,info",
+        "current": { "selected": true, "text": "All", "value": "$__all" },
+        "includeAll": true,
+        "allValue": "critical|warning|info",
+        "multi": false,
+        "options": [
+          { "selected": true,  "text": "All",      "value": "$__all" },
+          { "selected": false, "text": "Critical",  "value": "critical" },
+          { "selected": false, "text": "Warning",   "value": "warning" },
+          { "selected": false, "text": "Info",      "value": "info" }
+        ]
+      },
+      {
+        "name": "namespace",
+        "type": "query",
+        "label": "Namespace",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(ALERTS{alertstate=\"firing\"}, namespace)", "refId": "A" },
+        "refresh": 2,
+        "includeAll": true,
+        "allValue": ".*",
+        "multi": true,
+        "sort": 1,
+        "current": {},
+        "options": []
+      }
+    ]
+  },
+  "panels": [
+
+    {
+      "id": 1, "type": "stat", "title": "Critical Alerts Firing",
+      "description": "Alerting rule instances currently in the firing state with severity=\"critical\". Any non-zero value represents a breached SLO or infrastructure condition requiring immediate on-call response. The ALERTS metric is generated by Prometheus directly from your alerting rules — it reflects what Prometheus knows, before Alertmanager routing or silencing.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(ALERTS{alertstate=\"firing\",severity=\"critical\"}) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red",   "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+    },
+
+    {
+      "id": 2, "type": "stat", "title": "Warning Alerts Firing",
+      "description": "Firing alerts at severity=\"warning\". Warnings indicate a degraded or elevated-risk condition that has not yet crossed the critical threshold. A sustained or growing warning count often precedes a critical fire — treat them as early-warning signals, not background noise.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(ALERTS{alertstate=\"firing\",severity=\"warning\"}) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "orange", "value": 5 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+    },
+
+    {
+      "id": 3, "type": "stat", "title": "Info / Unclassified Alerts Firing",
+      "description": "Firing alerts with severity=\"info\" or no severity label. These are informational and do not normally require immediate action. A sudden large jump may reveal noisy alerting rules generating alert fatigue — rules worth reviewing for threshold tuning or adding inhibition rules.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(ALERTS{alertstate=\"firing\",severity!~\"critical|warning\"}) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "blue",  "value": 1 },
+            { "color": "blue",  "value": 25 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+    },
+
+    {
+      "id": 4, "type": "stat", "title": "Alerts Silenced (Suppressed)",
+      "description": "Alerts currently matched by an active Alertmanager silence rule and therefore not routed to receivers. Silences are intentional during maintenance windows, but a large suppressed count outside of planned maintenance = an overly broad silence masking real problems. Zero silences when a maintenance window is active = the silence has expired or was misconfigured.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "sum(alertmanager_alerts{state=\"suppressed\"}) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "red",    "value": 20 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+    },
+
+    {
+      "id": 5, "type": "stat", "title": "CrashLoopBackOff Pods",
+      "description": "Container instances currently waiting in the CrashLoopBackOff state — the container crashed and Kubernetes is retrying with exponential back-off. Each instance is a pod that cannot stay running. Common root causes: OOM kill, bad entrypoint, missing Secret or ConfigMap, an unavailable init dependency, or a broken image layer.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "red",    "value": 3 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+    },
+
+    {
+      "id": 6, "type": "stat", "title": "OOMKilled Containers",
+      "description": "Containers whose most recent termination reason was OOMKilled. This is a current-state snapshot: a container that was OOMKilled, restarted, and is now Running will still appear here until its next termination occurs for a different reason. Non-zero and stable = recurring OOM, likely a workload memory leak or under-provisioned memory limit.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_pod_container_status_last_terminated_reason{reason=\"OOMKilled\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "orange", "value": 1 },
+            { "color": "red",    "value": 5 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+    },
+
+    {
+      "id": 7, "type": "stat", "title": "NotReady Nodes",
+      "description": "Nodes where the Ready condition is currently not True (False or Unknown). A NotReady node stops receiving new pod scheduling and, after the node eviction timeout (~5 min default), pods on it will be evicted. Control plane nodes going NotReady simultaneously = potential quorum loss. Any non-zero value is a tier-1 incident signal.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\"} == 0) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red",   "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+    },
+
+    {
+      "id": 8, "type": "stat", "title": "Degraded Cluster Operators (OKD)",
+      "description": "OKD ClusterOperators currently reporting Degraded=True. Each ClusterOperator owns a core platform component — authentication, networking, image-registry, monitoring, ingress, storage, etc. A degraded operator means its managed component is impaired or unavailable. Zero is the only acceptable steady-state value outside of an active upgrade.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(cluster_operator_conditions{condition=\"Degraded\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red",   "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+    },
+
+    {
+      "id": 9, "type": "row", "title": "Alert Overview", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+    },
+
+    {
+      "id": 10, "type": "timeseries", "title": "Firing Alert Count by Severity Over Time",
+      "description": "Instantaneous count of firing ALERTS series grouped by severity over the selected window. A vertical rise = new alerting condition emerged. A horizontal plateau = a persistent, unresolved problem. A step-down = alert resolved or Prometheus rule evaluation stopped matching. Use the Severity Filter variable to narrow scope during triage.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "count by(severity)(ALERTS{alertstate=\"firing\",severity=~\"$severity\",namespace=~\"$namespace\"})",
+        "refId": "A",
+        "legendFormat": "{{severity}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "critical" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "warning"  }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "info"     }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max", "lastNotNull"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 5 }
+    },
+
+    {
+      "id": 11, "type": "timeseries", "title": "Alertmanager Notification Rate by Integration",
+      "description": "Rate of notification delivery attempts from Alertmanager per second, split by integration type (slack, pagerduty, email, webhook, etc.). Solid lines = successful deliveries; dashed red lines = failed deliveries. A drop to zero on all integrations = Alertmanager is not processing or the cluster is completely quiet. Persistent failures on one integration = check that receiver's credentials or endpoint availability.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum by(integration)(rate(alertmanager_notifications_total[5m]))",        "refId": "A", "legendFormat": "✓ {{integration}}" },
+        { "expr": "sum by(integration)(rate(alertmanager_notifications_failed_total[5m]))", "refId": "B", "legendFormat": "✗ {{integration}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byFrameRefID", "options": "B" },
+            "properties": [
+              { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } },
+              { "id": "custom.lineStyle", "value": { "dash": [6, 4], "fill": "dash" } },
+              { "id": "custom.lineWidth", "value": 1 }
+            ]
+          }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 5 }
+    },
+
+    {
+      "id": 12, "type": "bargauge", "title": "Longest-Firing Active Alerts",
+      "description": "Duration (now - ALERTS_FOR_STATE timestamp) for each currently firing alert, sorted descending. Alerts at the top have been firing longest and are the most likely candidates for known-but-unresolved issues, stale firing conditions, or alerts that should have a silence applied. Red bars (> 2 hours) strongly suggest a problem that has been acknowledged but not resolved.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sort_desc(time() - ALERTS_FOR_STATE{alertstate=\"firing\",severity=~\"$severity\",namespace=~\"$namespace\"})",
+        "refId": "A",
+        "legendFormat": "{{alertname}} · {{severity}} · {{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null  },
+            { "color": "yellow", "value": 300   },
+            { "color": "orange", "value": 1800  },
+            { "color": "red",    "value": 7200  }
+          ]}
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "displayMode": "gradient",
+        "showUnfilled": true,
+        "valueMode": "color"
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 5 }
+    },
+
+    {
+      "id": 13, "type": "row", "title": "Active Firing Alerts — Full Detail", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 }
+    },
+
+    {
+      "id": 14, "type": "table", "title": "All Firing Alerts",
+      "description": "Instant-query table of every currently firing alert visible to Prometheus, filtered by the Namespace and Severity variables above. Each row is one alert instance (unique label combination). The value column is omitted — by definition every row here is firing. Use the built-in column filter (funnel icon) to further narrow to a specific alertname, pod, or node. Columns are sparse: labels not defined in a given alert rule will show '—'.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "ALERTS{alertstate=\"firing\",severity=~\"$severity\",namespace=~\"$namespace\"}",
+        "refId": "A",
+        "instant": true,
+        "legendFormat": ""
+      }],
+      "transformations": [
+        { "id": "labelsToFields", "options": { "mode": "columns" } },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "alertstate": true,
+              "__name__":   true,
+              "Value":      true,
+              "Time":       true
+            },
+            "renameByName": {
+              "alertname": "Alert Name",
+              "severity":  "Severity",
+              "namespace": "Namespace",
+              "pod":       "Pod",
+              "node":      "Node",
+              "container": "Container",
+              "job":       "Job",
+              "service":   "Service",
+              "reason":    "Reason",
+              "instance":  "Instance"
+            },
+            "indexByName": {
+              "severity":  0,
+              "alertname": 1,
+              "namespace": 2,
+              "pod":       3,
+              "node":      4,
+              "container": 5,
+              "job":       6,
+              "service":   7,
+              "reason":    8,
+              "instance":  9
+            }
+          }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "custom": { "align": "left", "filterable": true },
+          "noValue": "—"
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Severity" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "custom.width", "value": 110 },
+              {
+                "id": "mappings",
+                "value": [{
+                  "type": "value",
+                  "options": {
+                    "critical": { "text": "CRITICAL", "color": "dark-red",    "index": 0 },
+                    "warning":  { "text": "WARNING",  "color": "dark-yellow", "index": 1 },
+                    "info":     { "text": "INFO",     "color": "dark-blue",   "index": 2 }
+                  }
+                }]
+              }
+            ]
+          },
+          { "matcher": { "id": "byName", "options": "Alert Name" }, "properties": [{ "id": "custom.width", "value": 300 }] },
+          { "matcher": { "id": "byName", "options": "Namespace"  }, "properties": [{ "id": "custom.width", "value": 180 }] },
+          { "matcher": { "id": "byName", "options": "Pod"        }, "properties": [{ "id": "custom.width", "value": 200 }] },
+          { "matcher": { "id": "byName", "options": "Node"       }, "properties": [{ "id": "custom.width", "value": 200 }] }
+        ]
+      },
+      "options": {
+        "sortBy": [{ "desc": false, "displayName": "Severity" }],
+        "footer": { "show": false }
+      },
+      "gridPos": { "h": 12, "w": 24, "x": 0, "y": 14 }
+    },
+
+    {
+      "id": 15, "type": "row", "title": "Kubernetes Warning Events", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 }
+    },
+
+    {
+      "id": 16, "type": "timeseries", "title": "Warning Event Rate by Reason",
+      "description": "Rate of Kubernetes Warning-type events per second grouped by reason code. BackOff = container is CrashLooping. FailedScheduling = no node satisfies pod constraints. FailedMount = volume attachment or CSI failure. Evicted = kubelet evicted a pod due to memory or disk pressure. NodeNotReady = node lost contact. A spike in a single reason narrows the incident root-cause immediately without needing to read raw event logs. Requires kube-state-metrics with --resources=events.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(10, sum by(reason)(rate(kube_event_count{type=\"Warning\",namespace=~\"$namespace\"}[5m])))",
+        "refId": "A",
+        "legendFormat": "{{reason}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 27 }
+    },
+
+    {
+      "id": 17, "type": "bargauge", "title": "Warning Events — Top Namespaces (Accumulated Count)",
+      "description": "Total accumulated Warning event count (the count field on the Kubernetes Event object) per namespace, showing the top 15 most active. A namespace dominating this chart is generating significantly more abnormal conditions than its peers, useful for identifying noisy tenants, misconfigured deployments, or namespaces experiencing a persistent infrastructure problem. Note this is the raw Event.count field — it resets if the event object is deleted and recreated.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(15, sum by(namespace)(kube_event_count{type=\"Warning\"}))",
+        "refId": "A",
+        "legendFormat": "{{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 10 },
+            { "color": "orange", "value": 50 },
+            { "color": "red",    "value": 200 }
+          ]}
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "displayMode": "gradient",
+        "showUnfilled": true
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 27 }
+    },
+
+    {
+      "id": 18, "type": "timeseries", "title": "Warning Events — Accumulated Count by Reason Over Time",
+      "description": "Raw accumulated event count gauge over time, split by reason. Unlike the rate panel this shows total volume and slope simultaneously. A line that climbs steeply = events are occurring frequently right now. A line that plateaus = the condition causing that reason has stopped. A line that drops to zero = the event object was deleted and recreated or the condition fully resolved.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(10, sum by(reason)(kube_event_count{type=\"Warning\",namespace=~\"$namespace\"}))",
+        "refId": "A",
+        "legendFormat": "{{reason}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 27 }
+    },
+
+    {
+      "id": 19, "type": "row", "title": "Pod Problems", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 35 }
+    },
+
+    {
+      "id": 20, "type": "timeseries", "title": "CrashLoopBackOff Pods by Namespace",
+      "description": "Count of container instances in CrashLoopBackOff waiting state over time, broken down by namespace. A sudden rise in one namespace = a workload deployment is failing. A persistent baseline across many namespaces = a shared dependency (Secret, ConfigMap, network policy, or an upstream service) has become unavailable. Unlike restart rate, this panel shows the steady-state count of pods currently stuck — not flapping.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(namespace)(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\",namespace=~\"$namespace\"} == 1)",
+        "refId": "A",
+        "legendFormat": "{{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false,
+            "thresholdsStyle": { "mode": "line" }
+          },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "red",    "value": 5 }
+          ]}
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 0, "y": 36 }
+    },
+
+    {
+      "id": 21, "type": "timeseries", "title": "Container Restart Rate by Namespace",
+      "description": "Rate of container restarts per second across all reasons (OOMKill, liveness probe failure, process exit) grouped by namespace. A namespace with a rising restart rate that has not yet entered CrashLoopBackOff is in the early failure window before the exponential back-off penalty kicks in. Cross-reference with the OOMKilled stat tile and the last-terminated-reason to separate crash types.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(10, sum by(namespace)(rate(kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}[5m])))",
+        "refId": "A",
+        "legendFormat": "{{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 8, "y": 36 }
+    },
+
+    {
+      "id": 22, "type": "timeseries", "title": "Pods by Problem Phase (Failed / Pending / Unknown)",
+      "description": "Count of pods in Failed, Pending, or Unknown phase over time. Failed = container terminated with a non-zero exit code or was evicted and not rescheduled. Pending for more than a few minutes = scheduler unable to bind the pod (check FailedScheduling events, node capacity, and taint/toleration mismatches). Unknown = kubelet is not reporting to the apiserver, typically indicating a node network partition or kubelet crash.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum by(phase)(kube_pod_status_phase{phase=~\"Failed|Unknown\",namespace=~\"$namespace\"} == 1)", "refId": "A", "legendFormat": "{{phase}}" },
+        { "expr": "sum(kube_pod_status_phase{phase=\"Pending\",namespace=~\"$namespace\"} == 1)",                 "refId": "B", "legendFormat": "Pending" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false,
+            "thresholdsStyle": { "mode": "line" }
+          },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red",   "value": 1 }
+          ]}
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "Failed"  }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Pending" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Unknown" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 16, "y": 36 }
+    },
+
+    {
+      "id": 23, "type": "row", "title": "Node & Cluster Operator Conditions", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 43 }
+    },
+
+    {
+      "id": 24, "type": "table", "title": "Node Condition Status Matrix",
+      "description": "Instant snapshot of every active node condition across all nodes. Each row is one (node, condition, status) triple where value=1, meaning that combination is currently true. Ready=true is the normal healthy state; MemoryPressure=true, DiskPressure=true, PIDPressure=true, and NetworkUnavailable=true all indicate problem states that will affect pod scheduling on that node. Use the column filter to show only conditions where status=\"true\" and condition != \"Ready\" to isolate problems quickly.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "kube_node_status_condition == 1",
+        "refId": "A",
+        "instant": true,
+        "legendFormat": ""
+      }],
+      "transformations": [
+        { "id": "labelsToFields", "options": { "mode": "columns" } },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "Time":     true,
+              "Value":    true,
+              "__name__": true,
+              "endpoint": true,
+              "job":      true,
+              "service":  true,
+              "instance": true
+            },
+            "renameByName": {
+              "node":      "Node",
+              "condition": "Condition",
+              "status":    "Status"
+            },
+            "indexByName": { "node": 0, "condition": 1, "status": 2 }
+          }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "custom": { "align": "left", "filterable": true },
+          "noValue": "—"
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Status" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "custom.width", "value": 90 },
+              {
+                "id": "mappings",
+                "value": [{
+                  "type": "value",
+                  "options": {
+                    "true":    { "text": "true",    "color": "green",       "index": 0 },
+                    "false":   { "text": "false",   "color": "dark-red",    "index": 1 },
+                    "unknown": { "text": "unknown", "color": "dark-orange", "index": 2 }
+                  }
+                }]
+              }
+            ]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Condition" },
+            "properties": [
+              { "id": "custom.width", "value": 190 },
+              { "id": "custom.displayMode", "value": "color-text" },
+              {
+                "id": "mappings",
+                "value": [{
+                  "type": "value",
+                  "options": {
+                    "Ready":              { "color": "green",  "index": 0 },
+                    "MemoryPressure":     { "color": "red",    "index": 1 },
+                    "DiskPressure":       { "color": "red",    "index": 2 },
+                    "PIDPressure":        { "color": "red",    "index": 3 },
+                    "NetworkUnavailable": { "color": "red",    "index": 4 }
+                  }
+                }]
+              }
+            ]
+          },
+          { "matcher": { "id": "byName", "options": "Node" }, "properties": [{ "id": "custom.width", "value": 230 }] }
+        ]
+      },
+      "options": {
+        "sortBy": [{ "desc": false, "displayName": "Node" }],
+        "footer": { "show": false }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 44 }
+    },
+
+    {
+      "id": 25, "type": "table", "title": "Cluster Operator Conditions — Degraded & Progressing (OKD)",
+      "description": "Shows only ClusterOperator conditions that indicate a problem state: Degraded=True (operator has failed to achieve its desired state) or Progressing=True (operator is actively reconciling — normal during upgrades but alarming in steady state). Operators not appearing in this table are healthy. The reason column gives the operator's own explanation for the condition, which maps directly to the relevant operator log stream and OpenShift runbook.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "cluster_operator_conditions{condition=\"Degraded\"} == 1",
+          "refId": "A",
+          "instant": true,
+          "legendFormat": ""
+        },
+        {
+          "expr": "cluster_operator_conditions{condition=\"Progressing\"} == 1",
+          "refId": "B",
+          "instant": true,
+          "legendFormat": ""
+        }
+      ],
+      "transformations": [
+        { "id": "labelsToFields", "options": { "mode": "columns" } },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "Time":      true,
+              "Value":     true,
+              "__name__":  true,
+              "endpoint":  true,
+              "job":       true,
+              "service":   true,
+              "instance":  true,
+              "namespace": true
+            },
+            "renameByName": {
+              "name":      "Operator",
+              "condition": "Condition",
+              "reason":    "Reason"
+            },
+            "indexByName": { "name": 0, "condition": 1, "reason": 2 }
+          }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "custom": { "align": "left", "filterable": true },
+          "noValue": "—"
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Condition" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "custom.width", "value": 140 },
+              {
+                "id": "mappings",
+                "value": [{
+                  "type": "value",
+                  "options": {
+                    "Degraded":    { "text": "Degraded",    "color": "dark-red",    "index": 0 },
+                    "Progressing": { "text": "Progressing", "color": "dark-yellow", "index": 1 }
+                  }
+                }]
+              }
+            ]
+          },
+          { "matcher": { "id": "byName", "options": "Operator" }, "properties": [{ "id": "custom.width", "value": 240 }] },
+          { "matcher": { "id": "byName", "options": "Reason"   }, "properties": [{ "id": "custom.width", "value": 220 }] }
+        ]
+      },
+      "options": {
+        "sortBy": [{ "desc": false, "displayName": "Condition" }],
+        "footer": { "show": false }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 44 }
+    }
+
+  ]
+}
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/cluster-overview.json b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/cluster-overview.json
new file mode 100644
index 0000000..43079ce
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/cluster-overview.json
@@ -0,0 +1,739 @@
+{
+  "title": "Cluster Overview",
+  "uid": "okd-cluster-overview",
+  "schemaVersion": 36,
+  "version": 2,
+  "refresh": "30s",
+  "time": { "from": "now-1h", "to": "now" },
+  "tags": ["okd", "cluster", "overview"],
+  "panels": [
+    {
+      "id": 1,
+      "type": "stat",
+      "title": "Ready Nodes",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\"} == 1)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "red", "value": null },
+              { "color": "green", "value": 1 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+    },
+    {
+      "id": 2,
+      "type": "stat",
+      "title": "Not Ready Nodes",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"false\"} == 1) or vector(0)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 1 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+    },
+    {
+      "id": 3,
+      "type": "stat",
+      "title": "Running Pods",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(kube_pod_status_phase{phase=\"Running\"} == 1)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "red", "value": null },
+              { "color": "green", "value": 1 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+    },
+    {
+      "id": 4,
+      "type": "stat",
+      "title": "Pending Pods",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(kube_pod_status_phase{phase=\"Pending\"} == 1) or vector(0)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 1 },
+              { "color": "red", "value": 5 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+    },
+    {
+      "id": 5,
+      "type": "stat",
+      "title": "Failed Pods",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(kube_pod_status_phase{phase=\"Failed\"} == 1) or vector(0)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 1 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+    },
+    {
+      "id": 6,
+      "type": "stat",
+      "title": "CrashLoopBackOff",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\"} == 1) or vector(0)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 1 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+    },
+    {
+      "id": 7,
+      "type": "stat",
+      "title": "Critical Alerts",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(ALERTS{alertstate=\"firing\",severity=\"critical\"}) or vector(0)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 1 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+    },
+    {
+      "id": 8,
+      "type": "stat",
+      "title": "Warning Alerts",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(ALERTS{alertstate=\"firing\",severity=\"warning\"}) or vector(0)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 1 },
+              { "color": "red", "value": 10 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+    },
+    {
+      "id": 9,
+      "type": "gauge",
+      "title": "CPU Usage",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
+          "refId": "A",
+          "legendFormat": "CPU"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 70 },
+              { "color": "red", "value": 85 }
+            ]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "orientation": "auto"
+      },
+      "gridPos": { "h": 6, "w": 5, "x": 0, "y": 4 }
+    },
+    {
+      "id": 10,
+      "type": "gauge",
+      "title": "Memory Usage",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - (sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)))",
+          "refId": "A",
+          "legendFormat": "Memory"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 75 },
+              { "color": "red", "value": 90 }
+            ]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "orientation": "auto"
+      },
+      "gridPos": { "h": 6, "w": 5, "x": 5, "y": 4 }
+    },
+    {
+      "id": 11,
+      "type": "gauge",
+      "title": "Root Disk Usage",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - (sum(node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"})))",
+          "refId": "A",
+          "legendFormat": "Disk"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 70 },
+              { "color": "red", "value": 85 }
+            ]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "orientation": "auto"
+      },
+      "gridPos": { "h": 6, "w": 4, "x": 10, "y": 4 }
+    },
+    {
+      "id": 12,
+      "type": "stat",
+      "title": "etcd Has Leader",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "min(etcd_server_has_leader)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "red", "value": null },
+              { "color": "green", "value": 1 }
+            ]
+          },
+          "mappings": [
+            {
+              "type": "value",
+              "options": {
+                "0": { "text": "NO LEADER", "color": "red" },
+                "1": { "text": "LEADER OK", "color": "green" }
+              }
+            }
+          ],
+          "unit": "short",
+          "noValue": "?"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 3, "w": 5, "x": 14, "y": 4 }
+    },
+    {
+      "id": 13,
+      "type": "stat",
+      "title": "API Servers Up",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(up{job=\"apiserver\"})",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "red", "value": null },
+              { "color": "yellow", "value": 1 },
+              { "color": "green", "value": 2 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 3, "w": 5, "x": 19, "y": 4 }
+    },
+    {
+      "id": 14,
+      "type": "stat",
+      "title": "etcd Members Up",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(up{job=\"etcd\"})",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "red", "value": null },
+              { "color": "yellow", "value": 2 },
+              { "color": "green", "value": 3 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 3, "w": 5, "x": 14, "y": 7 }
+    },
+    {
+      "id": 15,
+      "type": "stat",
+      "title": "Operators Degraded",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(cluster_operator_conditions{condition=\"Degraded\",status=\"True\"} == 1) or vector(0)",
+          "refId": "A",
+          "legendFormat": ""
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 1 }
+            ]
+          },
+          "unit": "short",
+          "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "justifyMode": "center",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 3, "w": 5, "x": 19, "y": 7 }
+    },
+    {
+      "id": 16,
+      "type": "timeseries",
+      "title": "CPU Usage per Node (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "lineWidth": 2,
+            "fillOpacity": 10,
+            "spanNulls": false,
+            "showPoints": "never"
+          }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": {
+          "displayMode": "list",
+          "placement": "bottom",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 10 }
+    },
+    {
+      "id": 17,
+      "type": "timeseries",
+      "title": "Memory Usage per Node (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "lineWidth": 2,
+            "fillOpacity": 10,
+            "spanNulls": false,
+            "showPoints": "never"
+          }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": {
+          "displayMode": "list",
+          "placement": "bottom",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 10 }
+    },
+    {
+      "id": 18,
+      "type": "timeseries",
+      "title": "Network Traffic — Cluster Total",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|tun.*|ovn.*|br-int|br-ex\"}[5m]))",
+          "refId": "A",
+          "legendFormat": "Receive"
+        },
+        {
+          "expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|tun.*|ovn.*|br-int|br-ex\"}[5m]))",
+          "refId": "B",
+          "legendFormat": "Transmit"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps",
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "lineWidth": 2,
+            "fillOpacity": 10,
+            "spanNulls": false,
+            "showPoints": "never"
+          }
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Receive" },
+            "properties": [{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Transmit" },
+            "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }]
+          }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "none" },
+        "legend": {
+          "displayMode": "list",
+          "placement": "bottom",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 18 }
+    },
+    {
+      "id": 19,
+      "type": "timeseries",
+      "title": "Pod Phases Over Time",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count(kube_pod_status_phase{phase=\"Running\"} == 1)",
+          "refId": "A",
+          "legendFormat": "Running"
+        },
+        {
+          "expr": "count(kube_pod_status_phase{phase=\"Pending\"} == 1) or vector(0)",
+          "refId": "B",
+          "legendFormat": "Pending"
+        },
+        {
+          "expr": "count(kube_pod_status_phase{phase=\"Failed\"} == 1) or vector(0)",
+          "refId": "C",
+          "legendFormat": "Failed"
+        },
+        {
+          "expr": "count(kube_pod_status_phase{phase=\"Unknown\"} == 1) or vector(0)",
+          "refId": "D",
+          "legendFormat": "Unknown"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short",
+          "custom": {
+            "lineWidth": 2,
+            "fillOpacity": 15,
+            "spanNulls": false,
+            "showPoints": "never"
+          }
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Running" },
+            "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Pending" },
+            "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Failed" },
+            "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Unknown" },
+            "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }]
+          }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "none" },
+        "legend": {
+          "displayMode": "list",
+          "placement": "bottom",
+          "calcs": ["lastNotNull"]
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 18 }
+    }
+  ]
+}
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/control-plane.json b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/control-plane.json
new file mode 100644
index 0000000..921085d
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/control-plane.json
@@ -0,0 +1,742 @@
+{
+  "title": "Control Plane Health",
+  "uid": "okd-control-plane",
+  "schemaVersion": 36,
+  "version": 1,
+  "refresh": "30s",
+  "time": { "from": "now-1h", "to": "now" },
+  "tags": ["okd", "control-plane"],
+  "templating": {
+    "list": [
+      {
+        "name": "instance",
+        "type": "query",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(apiserver_request_total, instance)", "refId": "A" },
+        "refresh": 2,
+        "includeAll": true,
+        "multi": true,
+        "allValue": ".*",
+        "label": "API Server Instance",
+        "sort": 1,
+        "current": {},
+        "options": []
+      }
+    ]
+  },
+  "panels": [
+
+    {
+      "id": 1, "type": "stat", "title": "API Servers Up",
+      "description": "Number of kube-apiserver instances currently scraped and up. Healthy HA cluster = 3.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(up{job=~\".*apiserver.*\"} == 1)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red",    "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "green",  "value": 3 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+    },
+
+    {
+      "id": 2, "type": "stat", "title": "Controller Managers Up",
+      "description": "kube-controller-manager instances up. In OKD only one holds the leader lease at a time; others are hot standbys.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(up{job=~\".*controller-manager.*\"} == 1)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red",    "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "green",  "value": 3 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+    },
+
+    {
+      "id": 3, "type": "stat", "title": "Schedulers Up",
+      "description": "kube-scheduler instances up. One holds the leader lease; rest are standbys. 0 = no scheduling of new pods.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(up{job=~\".*scheduler.*\"} == 1)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red",    "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "green",  "value": 3 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+    },
+
+    {
+      "id": 4, "type": "stat", "title": "API 5xx Rate",
+      "description": "Server-side errors (5xx) across all apiserver instances per second. Any sustained non-zero value = apiserver internal fault.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "sum(rate(apiserver_request_total{code=~\"5..\"}[5m]))", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 0.01 },
+            { "color": "red",    "value": 1 }
+          ]},
+          "unit": "reqps", "noValue": "0", "decimals": 3
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+    },
+
+    {
+      "id": 5, "type": "stat", "title": "Inflight — Mutating",
+      "description": "Current in-flight mutating requests (POST/PUT/PATCH/DELETE). Default OKD limit is ~1000. Hitting the limit = 429 errors for writes.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "sum(apiserver_current_inflight_requests{request_kind=\"mutating\"})", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 500 },
+            { "color": "orange", "value": 750 },
+            { "color": "red",    "value": 900 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+    },
+
+    {
+      "id": 6, "type": "stat", "title": "Inflight — Read-Only",
+      "description": "Current in-flight non-mutating requests (GET/LIST/WATCH). Default OKD limit is ~3000. Hitting it = 429 for reads, impacting controllers and kubectl.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "sum(apiserver_current_inflight_requests{request_kind=\"readOnly\"})", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1500 },
+            { "color": "orange", "value": 2200 },
+            { "color": "red",    "value": 2700 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+    },
+
+    {
+      "id": 7, "type": "stat", "title": "API Request p99 (non-WATCH)",
+      "description": "Overall p99 latency for all non-streaming verbs. >1s = noticeable kubectl sluggishness. >10s = controllers timing out on LIST/GET.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!~\"WATCH|CONNECT\"}[5m])) by (le))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 0.5  },
+            { "color": "orange", "value": 1    },
+            { "color": "red",    "value": 5    }
+          ]},
+          "unit": "s", "noValue": "0", "decimals": 3
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+    },
+
+    {
+      "id": 8, "type": "stat", "title": "APIServer → etcd p99",
+      "description": "p99 time apiserver spends waiting on etcd calls. Spike here while WAL fsync is healthy = serialization or large object overhead.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99, sum(rate(apiserver_storage_request_duration_seconds_bucket[5m])) by (le))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 0.05 },
+            { "color": "orange", "value": 0.2  },
+            { "color": "red",    "value": 0.5  }
+          ]},
+          "unit": "s", "noValue": "0", "decimals": 4
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+    },
+
+    {
+      "id": 9, "type": "row", "title": "API Server — Request Rates & Errors", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+    },
+
+    {
+      "id": 10, "type": "timeseries", "title": "Request Rate by Verb",
+      "description": "Non-streaming calls per second broken down by verb. GET/LIST = read load from controllers. POST/PUT/PATCH/DELETE = write throughput. A sudden LIST spike = controller cache resync storm.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(verb)(rate(apiserver_request_total{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m]))",
+        "refId": "A", "legendFormat": "{{verb}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 5 }
+    },
+
+    {
+      "id": 11, "type": "timeseries", "title": "Error Rate by HTTP Status Code",
+      "description": "4xx/5xx responses per second by code. 429 = inflight limit hit (throttling). 422 = admission rejection or invalid object. 500/503 = internal apiserver fault or etcd unavailability.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(code)(rate(apiserver_request_total{instance=~\"$instance\",code=~\"[45]..\"}[5m]))",
+        "refId": "A", "legendFormat": "HTTP {{code}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 5 }
+    },
+
+    {
+      "id": 12, "type": "timeseries", "title": "In-Flight Requests — Mutating vs Read-Only",
+      "description": "Instantaneous count of requests being actively handled. The two series correspond to the two inflight limit buckets enforced by the apiserver's Priority and Fairness (APF) or legacy inflight settings.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum by(request_kind)(apiserver_current_inflight_requests{instance=~\"$instance\"})", "refId": "A", "legendFormat": "{{request_kind}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 20, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 5 }
+    },
+
+    {
+      "id": 13, "type": "row", "title": "API Server — Latency", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 }
+    },
+
+    {
+      "id": 14, "type": "timeseries", "title": "Request Latency — p50 / p95 / p99 (non-WATCH)",
+      "description": "Aggregated end-to-end request duration across all verbs except WATCH/CONNECT (which are unbounded streaming). A rising p99 without a matching rise in etcd latency = CPU saturation, admission webhook slowness, or serialization overhead.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum(rate(apiserver_request_duration_seconds_bucket{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m])) by (le))", "refId": "A", "legendFormat": "p50" },
+        { "expr": "histogram_quantile(0.95, sum(rate(apiserver_request_duration_seconds_bucket{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m])) by (le))", "refId": "B", "legendFormat": "p95" },
+        { "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m])) by (le))", "refId": "C", "legendFormat": "p99" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 14 }
+    },
+
+    {
+      "id": 15, "type": "timeseries", "title": "Request p99 Latency by Verb",
+      "description": "p99 latency broken out per verb. LIST is inherently slower than GET due to serializing full collections. A POST/PUT spike = heavy admission webhook chain or large object writes. DELETE spikes are usually caused by cascading GC finalizer storms.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99, sum by(verb,le)(rate(apiserver_request_duration_seconds_bucket{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m])))",
+        "refId": "A", "legendFormat": "{{verb}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 14 }
+    },
+
+    {
+      "id": 16, "type": "timeseries", "title": "APIServer → etcd Latency by Operation",
+      "description": "Time apiserver spends waiting on etcd, split by operation type (get, list, create, update, delete, watch). Elevated get/list = etcd read pressure. Elevated create/update = write bottleneck, likely correlated with WAL fsync latency.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum by(operation,le)(rate(apiserver_storage_request_duration_seconds_bucket[5m])))", "refId": "A", "legendFormat": "p50 — {{operation}}" },
+        { "expr": "histogram_quantile(0.99, sum by(operation,le)(rate(apiserver_storage_request_duration_seconds_bucket[5m])))", "refId": "B", "legendFormat": "p99 — {{operation}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 14 }
+    },
+
+    {
+      "id": 17, "type": "row", "title": "API Server — Watches & Long-Running Requests", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 }
+    },
+
+    {
+      "id": 18, "type": "timeseries", "title": "Active Long-Running Requests (Watches) by Resource",
+      "description": "Instantaneous count of open WATCH streams grouped by resource. Each controller typically holds one WATCH per resource type per apiserver instance. A sudden drop = controller restart; a runaway climb = operator creating watches without cleanup.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(resource)(apiserver_longrunning_requests{instance=~\"$instance\",verb=\"WATCH\"})",
+        "refId": "A", "legendFormat": "{{resource}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 0, "y": 23 }
+    },
+
+    {
+      "id": 19, "type": "timeseries", "title": "Watch Events Dispatched Rate by Kind",
+      "description": "Watch events sent to all active watchers per second, by object kind. Persistent high rate for a specific kind = that resource type is churning heavily, increasing etcd load and controller reconcile frequency.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(kind)(rate(apiserver_watch_events_total{instance=~\"$instance\"}[5m]))",
+        "refId": "A", "legendFormat": "{{kind}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 8, "y": 23 }
+    },
+
+    {
+      "id": 20, "type": "timeseries", "title": "Watch Event Size — p50 / p95 / p99 by Kind",
+      "description": "Size of individual watch events dispatched to clients. Large events (MiB-scale) for Secrets or ConfigMaps = objects being stored with oversized data. Contributes to apiserver memory pressure and network saturation.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum by(kind,le)(rate(apiserver_watch_events_sizes_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50 — {{kind}}" },
+        { "expr": "histogram_quantile(0.99, sum by(kind,le)(rate(apiserver_watch_events_sizes_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p99 — {{kind}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 16, "y": 23 }
+    },
+
+    {
+      "id": 21, "type": "row", "title": "Admission Webhooks", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 }
+    },
+
+    {
+      "id": 22, "type": "timeseries", "title": "Webhook Call Rate by Name",
+      "description": "Mutating and validating admission webhook invocations per second by webhook name. A webhook invoked on every write (e.g., a mutating webhook with no object selector) can be a major source of write latency amplification.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(name,type)(rate(apiserver_admission_webhook_request_total{instance=~\"$instance\"}[5m]))",
+        "refId": "A", "legendFormat": "{{type}} — {{name}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 0, "y": 31 }
+    },
+
+    {
+      "id": 23, "type": "timeseries", "title": "Webhook Latency p99 by Name",
+      "description": "p99 round-trip time per webhook call (network + webhook server processing). Default apiserver timeout is 10s; a webhook consistently near that limit causes cascading write latency for all resources it intercepts.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99, sum by(name,le)(rate(apiserver_admission_webhook_admission_duration_seconds_bucket{instance=~\"$instance\"}[5m])))",
+        "refId": "A", "legendFormat": "{{name}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 0.5 },
+            { "color": "red",    "value": 2.0 }
+          ]}
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 8, "y": 31 }
+    },
+
+    {
+      "id": 24, "type": "timeseries", "title": "Webhook Rejection Rate by Name",
+      "description": "Rate of admission denials per webhook. A validating webhook rejecting requests is expected behaviour; a sudden surge indicates either a newly enforced policy or a misbehaving webhook rejecting valid objects.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(name,error_type)(rate(apiserver_admission_webhook_rejection_count{instance=~\"$instance\"}[5m]))",
+        "refId": "A", "legendFormat": "{{name}} ({{error_type}})"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 16, "y": 31 }
+    },
+
+    {
+      "id": 25, "type": "row", "title": "kube-controller-manager", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 38 }
+    },
+
+    {
+      "id": 26, "type": "timeseries", "title": "Work Queue Depth by Controller",
+      "description": "Items waiting to be reconciled in each controller's work queue. Persistent non-zero depth = controller cannot keep up with the event rate. Identifies which specific controller is the bottleneck during overload incidents.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(15, sum by(name)(workqueue_depth{job=~\".*controller-manager.*\"}))",
+        "refId": "A", "legendFormat": "{{name}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false,
+            "thresholdsStyle": { "mode": "line" }
+          },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 10 },
+            { "color": "red",    "value": 50 }
+          ]}
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 39 }
+    },
+
+    {
+      "id": 27, "type": "timeseries", "title": "Work Queue Item Processing Duration p99 by Controller",
+      "description": "p99 time a work item spends being actively reconciled (inside the reconcile loop, excludes queue wait time). A slow reconcile = either the controller is doing expensive API calls or the etcd write path is slow.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99, sum by(name,le)(rate(workqueue_work_duration_seconds_bucket{job=~\".*controller-manager.*\"}[5m])))",
+        "refId": "A", "legendFormat": "{{name}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 39 }
+    },
+
+    {
+      "id": 28, "type": "timeseries", "title": "Work Queue Retry Rate by Controller",
+      "description": "Rate of items being re-queued after a failed reconciliation. A persistently high retry rate for a controller = it is encountering recurring errors on the same objects (e.g., API permission errors, webhook rejections, or resource conflicts).",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(15, sum by(name)(rate(workqueue_retries_total{job=~\".*controller-manager.*\"}[5m])))",
+        "refId": "A", "legendFormat": "{{name}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 39 }
+    },
+
+    {
+      "id": 29, "type": "row", "title": "kube-scheduler", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 47 }
+    },
+
+    {
+      "id": 30, "type": "timeseries", "title": "Scheduling Attempt Rate by Result",
+      "description": "Outcomes of scheduling cycles per second. scheduled = pod successfully bound to a node. unschedulable = no node met the pod's constraints. error = scheduler internal failure (API error, timeout). Persistent unschedulable = cluster capacity or taints/affinity misconfiguration.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(result)(rate(scheduler_schedule_attempts_total[5m]))",
+        "refId": "A", "legendFormat": "{{result}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "scheduled"    }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "unschedulable" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "error"         }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 0, "y": 48 }
+    },
+
+    {
+      "id": 31, "type": "timeseries", "title": "Scheduling Latency — p50 / p95 / p99",
+      "description": "Time from when a pod enters the active queue to when a binding decision is made (does not include bind API call time). Includes filter, score, and reserve plugin execution time. Spike = expensive affinity rules, large number of nodes, or slow extender webhooks.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket[5m])) by (le))", "refId": "A", "legendFormat": "p50" },
+        { "expr": "histogram_quantile(0.95, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket[5m])) by (le))", "refId": "B", "legendFormat": "p95" },
+        { "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket[5m])) by (le))", "refId": "C", "legendFormat": "p99" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 8, "y": 48 }
+    },
+
+    {
+      "id": 32, "type": "timeseries", "title": "Pending Pods by Queue",
+      "description": "Pods waiting to be scheduled, split by internal queue. active = ready to be attempted now. backoff = recently failed, in exponential back-off. unschedulable = parked until cluster state changes. A growing unschedulable queue = systemic capacity or constraint problem.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(queue)(scheduler_pending_pods)",
+        "refId": "A", "legendFormat": "{{queue}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false,
+            "thresholdsStyle": { "mode": "line" }
+          },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 10 },
+            { "color": "red",    "value": 50 }
+          ]}
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "unschedulable" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "backoff"       }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "active"        }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 16, "y": 48 }
+    },
+
+    {
+      "id": 33, "type": "row", "title": "Process Resources — All Control Plane Components", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 55 }
+    },
+
+    {
+      "id": 34, "type": "timeseries", "title": "CPU Usage by Component",
+      "description": "Rate of CPU seconds consumed by each control plane process. apiserver CPU spike = surge in request volume or list serialization. controller-manager CPU spike = reconcile storm. scheduler CPU spike = large node count with complex affinity.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum by(job)(rate(process_cpu_seconds_total{job=~\".*apiserver.*\"}[5m]))",          "refId": "A", "legendFormat": "apiserver — {{job}}" },
+        { "expr": "sum by(job)(rate(process_cpu_seconds_total{job=~\".*controller-manager.*\"}[5m]))", "refId": "B", "legendFormat": "controller-manager — {{job}}" },
+        { "expr": "sum by(job)(rate(process_cpu_seconds_total{job=~\".*scheduler.*\"}[5m]))",          "refId": "C", "legendFormat": "scheduler — {{job}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percentunit", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 0, "y": 56 }
+    },
+
+    {
+      "id": 35, "type": "timeseries", "title": "RSS Memory by Component",
+      "description": "Resident set size of each control plane process. apiserver memory is dominated by the watch cache size and serialisation buffers. controller-manager memory = informer caches. Monotonically growing RSS without restarts = memory leak or unbounded cache growth.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum by(job)(process_resident_memory_bytes{job=~\".*apiserver.*\"})",          "refId": "A", "legendFormat": "apiserver — {{job}}" },
+        { "expr": "sum by(job)(process_resident_memory_bytes{job=~\".*controller-manager.*\"})", "refId": "B", "legendFormat": "controller-manager — {{job}}" },
+        { "expr": "sum by(job)(process_resident_memory_bytes{job=~\".*scheduler.*\"})",          "refId": "C", "legendFormat": "scheduler — {{job}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 8, "y": 56 }
+    },
+
+    {
+      "id": 36, "type": "timeseries", "title": "Goroutines by Component",
+      "description": "Number of live goroutines in each control plane process. Gradual upward drift = goroutine leak (often tied to unclosed watch streams or context leaks). A step-down = process restart. apiserver typically runs 200–600 goroutines; spikes above 1000 warrant investigation.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum by(job)(go_goroutines{job=~\".*apiserver.*\"})",          "refId": "A", "legendFormat": "apiserver — {{job}}" },
+        { "expr": "sum by(job)(go_goroutines{job=~\".*controller-manager.*\"})", "refId": "B", "legendFormat": "controller-manager — {{job}}" },
+        { "expr": "sum by(job)(go_goroutines{job=~\".*scheduler.*\"})",          "refId": "C", "legendFormat": "scheduler — {{job}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 16, "y": 56 }
+    }
+
+  ]
+}
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/etcd.json b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/etcd.json
new file mode 100644
index 0000000..93ac55e
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/etcd.json
@@ -0,0 +1,734 @@
+{
+  "title": "etcd",
+  "uid": "okd-etcd",
+  "schemaVersion": 36,
+  "version": 1,
+  "refresh": "30s",
+  "time": { "from": "now-1h", "to": "now" },
+  "tags": ["okd", "etcd"],
+  "templating": {
+    "list": [
+      {
+        "name": "instance",
+        "type": "query",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(etcd_server_has_leader, instance)", "refId": "A" },
+        "refresh": 2,
+        "includeAll": true,
+        "multi": true,
+        "allValue": ".*",
+        "label": "Instance",
+        "sort": 1,
+        "current": {},
+        "options": []
+      }
+    ]
+  },
+  "panels": [
+
+    {
+      "id": 1, "type": "stat", "title": "Cluster Members",
+      "description": "Total number of etcd members currently reporting metrics.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(etcd_server_has_leader)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red",    "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "green",  "value": 3 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+    },
+
+    {
+      "id": 2, "type": "stat", "title": "Has Leader",
+      "description": "min() across all members. 0 = at least one member has no quorum — cluster is degraded.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "min(etcd_server_has_leader)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red",   "value": null },
+            { "color": "green", "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0",
+          "mappings": [
+            { "type": "value", "options": {
+                "0": { "text": "NO LEADER", "color": "red"   },
+                "1": { "text": "OK",        "color": "green" }
+            }}
+          ]
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+    },
+
+    {
+      "id": 3, "type": "stat", "title": "Leader Changes (1h)",
+      "description": "Number of leader elections in the last hour. ≥3 indicates cluster instability.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "sum(changes(etcd_server_leader_changes_seen_total[1h]))", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "red",    "value": 3 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+    },
+
+    {
+      "id": 4, "type": "stat", "title": "DB Size (Max)",
+      "description": "Largest boltdb file size across all members. Default etcd quota is 8 GiB.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "max(etcd_mvcc_db_total_size_in_bytes)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null        },
+            { "color": "yellow", "value": 2147483648  },
+            { "color": "orange", "value": 5368709120  },
+            { "color": "red",    "value": 7516192768  }
+          ]},
+          "unit": "bytes", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+    },
+
+    {
+      "id": 5, "type": "stat", "title": "DB Fragmentation (Max)",
+      "description": "% of DB space that is allocated but unused. >50% → run etcdctl defrag.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "max((etcd_mvcc_db_total_size_in_bytes - etcd_mvcc_db_total_size_in_use_in_bytes) / etcd_mvcc_db_total_size_in_bytes * 100)",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 25 },
+            { "color": "orange", "value": 50 },
+            { "color": "red",    "value": 75 }
+          ]},
+          "unit": "percent", "noValue": "0", "decimals": 1
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+    },
+
+    {
+      "id": 6, "type": "stat", "title": "Failed Proposals/s",
+      "description": "Rate of rejected Raft proposals. Any sustained non-zero value = cluster health problem.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "sum(rate(etcd_server_proposals_failed_total[5m]))", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red",   "value": 0.001 }
+          ]},
+          "unit": "short", "noValue": "0", "decimals": 3
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+    },
+
+    {
+      "id": 7, "type": "stat", "title": "WAL Fsync p99",
+      "description": "99th percentile WAL flush-to-disk time. >10ms is concerning; >100ms = serious I/O bottleneck.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (le))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 0.01 },
+            { "color": "orange", "value": 0.1  },
+            { "color": "red",    "value": 0.5  }
+          ]},
+          "unit": "s", "noValue": "0", "decimals": 4
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+    },
+
+    {
+      "id": 8, "type": "stat", "title": "Backend Commit p99",
+      "description": "99th percentile boltdb commit time. >25ms = warning; >100ms = critical backend I/O pressure.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (le))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null  },
+            { "color": "yellow", "value": 0.025 },
+            { "color": "orange", "value": 0.1   },
+            { "color": "red",    "value": 0.25  }
+          ]},
+          "unit": "s", "noValue": "0", "decimals": 4
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+    },
+
+    {
+      "id": 9, "type": "row", "title": "Cluster Health", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+    },
+
+    {
+      "id": 10, "type": "timeseries", "title": "Has Leader per Instance",
+      "description": "1 = member has a leader; 0 = member lost quorum. A dip to 0 marks the exact moment of a leader election.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "etcd_server_has_leader{instance=~\"$instance\"}",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0, "max": 1.1,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 20, "showPoints": "never", "spanNulls": false },
+          "mappings": [
+            { "type": "value", "options": {
+                "0": { "text": "0 — no leader" },
+                "1": { "text": "1 — ok"        }
+            }}
+          ]
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "none" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": [] }
+      },
+      "gridPos": { "h": 6, "w": 8, "x": 0, "y": 5 }
+    },
+
+    {
+      "id": 11, "type": "timeseries", "title": "Leader Changes (cumulative)",
+      "description": "Monotonically increasing counter per member. A step jump = one leader election. Correlated jumps across members = cluster-wide event.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "etcd_server_leader_changes_seen_total{instance=~\"$instance\"}",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "auto", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "none" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull"] }
+      },
+      "gridPos": { "h": 6, "w": 8, "x": 8, "y": 5 }
+    },
+
+    {
+      "id": 12, "type": "timeseries", "title": "Slow Operations",
+      "description": "slow_apply: proposals applied slower than expected. slow_read_index: linearizable reads timing out. heartbeat_failures: Raft heartbeat send errors (network partition indicator).",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "rate(etcd_server_slow_apply_total{instance=~\"$instance\"}[5m])",              "refId": "A", "legendFormat": "Slow Apply — {{instance}}" },
+        { "expr": "rate(etcd_server_slow_read_indexes_total{instance=~\"$instance\"}[5m])",       "refId": "B", "legendFormat": "Slow Read Index — {{instance}}" },
+        { "expr": "rate(etcd_server_heartbeat_send_failures_total{instance=~\"$instance\"}[5m])", "refId": "C", "legendFormat": "Heartbeat Failures — {{instance}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 6, "w": 8, "x": 16, "y": 5 }
+    },
+
+    {
+      "id": 13, "type": "row", "title": "gRPC Traffic", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 11 }
+    },
+
+    {
+      "id": 14, "type": "timeseries", "title": "gRPC Request Rate by Method",
+      "description": "Unary calls/s per RPC method. High Put/Txn = heavy write load. High Range = heavy read load. High Watch = many controller watchers.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(grpc_method)(rate(grpc_server_started_total{job=~\".*etcd.*\",grpc_type=\"unary\"}[5m]))",
+        "refId": "A", "legendFormat": "{{grpc_method}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 }
+    },
+
+    {
+      "id": 15, "type": "timeseries", "title": "gRPC Error Rate by Status Code",
+      "description": "Non-OK responses by gRPC status code. RESOURCE_EXHAUSTED = overloaded. UNAVAILABLE = leader election. DEADLINE_EXCEEDED = latency spike.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(grpc_code)(rate(grpc_server_handled_total{job=~\".*etcd.*\",grpc_code!=\"OK\"}[5m]))",
+        "refId": "A", "legendFormat": "{{grpc_code}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 }
+    },
+
+    {
+      "id": 16, "type": "timeseries", "title": "gRPC Request Latency (p50 / p95 / p99)",
+      "description": "Unary call handling duration. p99 > 100ms for Put/Txn indicates disk or CPU pressure. p99 > 500ms will cause kube-apiserver timeouts.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\",grpc_type=\"unary\"}[5m])) by (le))", "refId": "A", "legendFormat": "p50" },
+        { "expr": "histogram_quantile(0.95, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\",grpc_type=\"unary\"}[5m])) by (le))", "refId": "B", "legendFormat": "p95" },
+        { "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\",grpc_type=\"unary\"}[5m])) by (le))", "refId": "C", "legendFormat": "p99" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 }
+    },
+
+    {
+      "id": 17, "type": "row", "title": "Raft Proposals", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 }
+    },
+
+    {
+      "id": 18, "type": "timeseries", "title": "Proposals Committed vs Applied",
+      "description": "Committed = agreed by Raft quorum. Applied = persisted to boltdb. A widening gap between the two = backend apply backlog (disk too slow to keep up).",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "rate(etcd_server_proposals_committed_total{instance=~\"$instance\"}[5m])", "refId": "A", "legendFormat": "Committed — {{instance}}" },
+        { "expr": "rate(etcd_server_proposals_applied_total{instance=~\"$instance\"}[5m])",   "refId": "B", "legendFormat": "Applied — {{instance}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 0, "y": 21 }
+    },
+
+    {
+      "id": 19, "type": "timeseries", "title": "Proposals Pending",
+      "description": "In-flight Raft proposals not yet committed. Consistently high (>5) = cluster cannot keep up with write throughput.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "etcd_server_proposals_pending{instance=~\"$instance\"}",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false,
+            "thresholdsStyle": { "mode": "line+area" }
+          },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 5    },
+            { "color": "red",    "value": 10   }
+          ]}
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 8, "y": 21 }
+    },
+
+    {
+      "id": 20, "type": "timeseries", "title": "Failed Proposals Rate",
+      "description": "Raft proposals that were rejected. Root causes: quorum loss, leader timeout, network partition between members.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "rate(etcd_server_proposals_failed_total{instance=~\"$instance\"}[5m])",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "lineWidth": 2, "fillOpacity": 20, "showPoints": "never", "spanNulls": false,
+            "thresholdsStyle": { "mode": "line" }
+          },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null  },
+            { "color": "red",   "value": 0.001 }
+          ]}
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 8, "x": 16, "y": 21 }
+    },
+
+    {
+      "id": 21, "type": "row", "title": "Disk I/O", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 28 }
+    },
+
+    {
+      "id": 22, "type": "timeseries", "title": "WAL Fsync Duration (p50 / p95 / p99) per Instance",
+      "description": "Time to flush the write-ahead log to disk. etcd is extremely sensitive to WAL latency. >10ms p99 = storage is the bottleneck. Correlates directly with Raft commit latency.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum by(le,instance)(rate(etcd_disk_wal_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50 — {{instance}}" },
+        { "expr": "histogram_quantile(0.95, sum by(le,instance)(rate(etcd_disk_wal_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p95 — {{instance}}" },
+        { "expr": "histogram_quantile(0.99, sum by(le,instance)(rate(etcd_disk_wal_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "C", "legendFormat": "p99 — {{instance}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 29 }
+    },
+
+    {
+      "id": 23, "type": "timeseries", "title": "Backend Commit Duration (p50 / p95 / p99) per Instance",
+      "description": "Time for boltdb to commit a batch transaction. A spike here while WAL is healthy = backend I/O saturation or boltdb lock contention. Triggers apply backlog.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum by(le,instance)(rate(etcd_disk_backend_commit_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50 — {{instance}}" },
+        { "expr": "histogram_quantile(0.95, sum by(le,instance)(rate(etcd_disk_backend_commit_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p95 — {{instance}}" },
+        { "expr": "histogram_quantile(0.99, sum by(le,instance)(rate(etcd_disk_backend_commit_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "C", "legendFormat": "p99 — {{instance}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 29 }
+    },
+
+    {
+      "id": 24, "type": "row", "title": "Network (Peer & Client)", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 37 }
+    },
+
+    {
+      "id": 25, "type": "timeseries", "title": "Peer RX Rate",
+      "description": "Bytes received from Raft peers (log replication + heartbeats). A burst during a quiet period = large snapshot being streamed to a recovering member.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "rate(etcd_network_peer_received_bytes_total{instance=~\"$instance\"}[5m])",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 6, "x": 0, "y": 38 }
+    },
+
+    {
+      "id": 26, "type": "timeseries", "title": "Peer TX Rate",
+      "description": "Bytes sent to Raft peers. Leader will have higher TX than followers (it replicates entries to all members).",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "rate(etcd_network_peer_sent_bytes_total{instance=~\"$instance\"}[5m])",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 6, "x": 6, "y": 38 }
+    },
+
+    {
+      "id": 27, "type": "timeseries", "title": "Client gRPC Received",
+      "description": "Bytes received from API clients (kube-apiserver, operators). Spike = large write burst from controllers or kubectl apply.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "rate(etcd_network_client_grpc_received_bytes_total{instance=~\"$instance\"}[5m])",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 6, "x": 12, "y": 38 }
+    },
+
+    {
+      "id": 28, "type": "timeseries", "title": "Client gRPC Sent",
+      "description": "Bytes sent to API clients (responses + watch events). Persistently high = many active Watch streams or large objects being served.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "rate(etcd_network_client_grpc_sent_bytes_total{instance=~\"$instance\"}[5m])",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 6, "x": 18, "y": 38 }
+    },
+
+    {
+      "id": 29, "type": "row", "title": "DB Size & Process Resources", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 }
+    },
+
+    {
+      "id": 30, "type": "timeseries", "title": "DB Total vs In-Use Size per Instance",
+      "description": "Total = allocated boltdb file size. In Use = live key data. The gap between them = fragmentation. Steady growth of Total = compaction not keeping up with key churn.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "etcd_mvcc_db_total_size_in_bytes{instance=~\"$instance\"}",        "refId": "A", "legendFormat": "Total — {{instance}}" },
+        { "expr": "etcd_mvcc_db_total_size_in_use_in_bytes{instance=~\"$instance\"}", "refId": "B", "legendFormat": "In Use — {{instance}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 46 }
+    },
+
+    {
+      "id": 31, "type": "timeseries", "title": "Process Resident Memory (RSS)",
+      "description": "Physical RAM consumed by the etcd process. Monotonically growing RSS = memory leak or oversized watch cache. Typical healthy range: 500 MiB–2 GiB depending on cluster size.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "etcd_process_resident_memory_bytes{instance=~\"$instance\"}",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 46 }
+    },
+
+    {
+      "id": 32, "type": "timeseries", "title": "Open File Descriptors vs Limit",
+      "description": "Open FD count (solid) and process FD limit (dashed). Approaching the limit will cause WAL file creation and new client connections to fail.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "etcd_process_open_fds{instance=~\"$instance\"}", "refId": "A", "legendFormat": "Open — {{instance}}" },
+        { "expr": "etcd_process_max_fds{instance=~\"$instance\"}",  "refId": "B", "legendFormat": "Limit — {{instance}}" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byRegexp", "options": "^Limit.*" },
+            "properties": [
+              { "id": "custom.lineWidth",  "value": 1 },
+              { "id": "custom.lineStyle",  "value": { "fill": "dash", "dash": [6, 4] } },
+              { "id": "custom.fillOpacity","value": 0 }
+            ]
+          }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 46 }
+    },
+
+    {
+      "id": 33, "type": "row", "title": "Snapshots", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 54 }
+    },
+
+    {
+      "id": 34, "type": "timeseries", "title": "Snapshot Save Duration (p50 / p95 / p99)",
+      "description": "Time to write a full snapshot of the boltdb to disk. Slow saves delay Raft log compaction, causing the WAL to grow unboundedly and members to fall further behind.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum by(le)(rate(etcd_debugging_snap_save_total_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50" },
+        { "expr": "histogram_quantile(0.95, sum by(le)(rate(etcd_debugging_snap_save_total_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p95" },
+        { "expr": "histogram_quantile(0.99, sum by(le)(rate(etcd_debugging_snap_save_total_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "C", "legendFormat": "p99" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 12, "x": 0, "y": 55 }
+    },
+
+    {
+      "id": 35, "type": "timeseries", "title": "Snapshot DB Fsync Duration (p50 / p95 / p99)",
+      "description": "Time to fsync the snapshot file itself. Distinct from WAL fsync: this is flushing the entire boltdb copy to disk after a snapshot is taken.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum by(le)(rate(etcd_snap_db_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50" },
+        { "expr": "histogram_quantile(0.95, sum by(le)(rate(etcd_snap_db_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p95" },
+        { "expr": "histogram_quantile(0.99, sum by(le)(rate(etcd_snap_db_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "C", "legendFormat": "p99" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 12, "x": 12, "y": 55 }
+    }
+
+  ]
+}
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/networking.json b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/networking.json
new file mode 100644
index 0000000..88314d2
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/networking.json
@@ -0,0 +1,945 @@
+{
+  "title": "Networking",
+  "uid": "okd-networking",
+  "schemaVersion": 36,
+  "version": 1,
+  "refresh": "30s",
+  "time": { "from": "now-1h", "to": "now" },
+  "tags": ["okd", "networking"],
+  "templating": {
+    "list": [
+      {
+        "name": "namespace",
+        "type": "query",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(kube_pod_info, namespace)", "refId": "A" },
+        "refresh": 2,
+        "includeAll": true,
+        "multi": true,
+        "allValue": ".*",
+        "label": "Namespace",
+        "sort": 1,
+        "current": {},
+        "options": []
+      }
+    ]
+  },
+  "panels": [
+
+    {
+      "id": 1, "type": "stat", "title": "Network RX Rate",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+          "unit": "Bps", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+    },
+
+    {
+      "id": 2, "type": "stat", "title": "Network TX Rate",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+          "unit": "Bps", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+    },
+
+    {
+      "id": 3, "type": "stat", "title": "RX Errors/s",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(rate(container_network_receive_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+          "unit": "pps", "noValue": "0", "decimals": 2
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+    },
+
+    {
+      "id": 4, "type": "stat", "title": "TX Errors/s",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(rate(container_network_transmit_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+          "unit": "pps", "noValue": "0", "decimals": 2
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+    },
+
+    {
+      "id": 5, "type": "stat", "title": "RX Drops/s",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(rate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] },
+          "unit": "pps", "noValue": "0", "decimals": 2
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+    },
+
+    {
+      "id": 6, "type": "stat", "title": "TX Drops/s",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(rate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] },
+          "unit": "pps", "noValue": "0", "decimals": 2
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+    },
+
+    {
+      "id": 7, "type": "stat", "title": "DNS Queries/s",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(rate(coredns_dns_requests_total[5m]))",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+          "unit": "reqps", "noValue": "0", "decimals": 1
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+    },
+
+    {
+      "id": 8, "type": "stat", "title": "DNS Error %",
+      "description": "Percentage of DNS responses with non-NOERROR rcode over the last 5 minutes.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(rate(coredns_dns_responses_total{rcode!=\"NOERROR\"}[5m])) / sum(rate(coredns_dns_responses_total[5m])) * 100",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "red",    "value": 5 }
+          ]},
+          "unit": "percent", "noValue": "0", "decimals": 2
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+    },
+
+    {
+      "id": 9, "type": "row", "title": "Network I/O", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+    },
+
+    {
+      "id": 10, "type": "timeseries", "title": "Receive Rate by Namespace",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(namespace)(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": "{{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 }
+    },
+
+    {
+      "id": 11, "type": "timeseries", "title": "Transmit Rate by Namespace",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(namespace)(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": "{{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 }
+    },
+
+    {
+      "id": 12, "type": "row", "title": "Top Pod Consumers", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 }
+    },
+
+    {
+      "id": 13, "type": "timeseries", "title": "Top 10 Pods — RX Rate",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(10, sum by(namespace,pod)(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m])))",
+        "refId": "A", "legendFormat": "{{namespace}} / {{pod}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "auto", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 }
+    },
+
+    {
+      "id": 14, "type": "timeseries", "title": "Top 10 Pods — TX Rate",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(10, sum by(namespace,pod)(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m])))",
+        "refId": "A", "legendFormat": "{{namespace}} / {{pod}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "auto", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 }
+    },
+
+    {
+      "id": 15,
+      "type": "table",
+      "title": "Pod Network I/O Summary",
+      "description": "Current RX/TX rates, errors and drops per pod. Sorted by RX rate descending.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace,pod)(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+          "refId": "A", "instant": true, "format": "table", "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,pod)(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+          "refId": "B", "instant": true, "format": "table", "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,pod)(rate(container_network_receive_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+          "refId": "C", "instant": true, "format": "table", "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,pod)(rate(container_network_transmit_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+          "refId": "D", "instant": true, "format": "table", "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,pod)(rate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+          "refId": "E", "instant": true, "format": "table", "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,pod)(rate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+          "refId": "F", "instant": true, "format": "table", "legendFormat": ""
+        }
+      ],
+      "transformations": [
+        {
+          "id": "filterFieldsByName",
+          "options": { "include": { "names": ["namespace", "pod", "Value"] } }
+        },
+        {
+          "id": "joinByField",
+          "options": { "byField": "pod", "mode": "outer" }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "namespace 1": true,
+              "namespace 2": true,
+              "namespace 3": true,
+              "namespace 4": true,
+              "namespace 5": true
+            },
+            "renameByName": {
+              "namespace": "Namespace",
+              "pod":       "Pod",
+              "Value":     "RX Rate",
+              "Value 1":   "TX Rate",
+              "Value 2":   "RX Errors/s",
+              "Value 3":   "TX Errors/s",
+              "Value 4":   "RX Drops/s",
+              "Value 5":   "TX Drops/s"
+            },
+            "indexByName": {
+              "namespace": 0,
+              "pod":       1,
+              "Value":     2,
+              "Value 1":   3,
+              "Value 2":   4,
+              "Value 3":   5,
+              "Value 4":   6,
+              "Value 5":   7
+            }
+          }
+        },
+        {
+          "id": "sortBy",
+          "options": { "fields": [{ "displayName": "RX Rate", "desc": true }] }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Namespace" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 160 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Pod" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 260 }]
+          },
+          {
+            "matcher": { "id": "byRegexp", "options": "^RX Rate$|^TX Rate$" },
+            "properties": [
+              { "id": "unit", "value": "Bps" },
+              { "id": "custom.displayMode", "value": "color-background-solid" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "yellow", "value": 10000000 },
+                { "color": "orange", "value": 100000000 },
+                { "color": "red",    "value": 500000000 }
+              ]}}
+            ]
+          },
+          {
+            "matcher": { "id": "byRegexp", "options": "^RX Errors/s$|^TX Errors/s$" },
+            "properties": [
+              { "id": "unit", "value": "pps" },
+              { "id": "decimals", "value": 3 },
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "red",   "value": 0.001 }
+              ]}}
+            ]
+          },
+          {
+            "matcher": { "id": "byRegexp", "options": "^RX Drops/s$|^TX Drops/s$" },
+            "properties": [
+              { "id": "unit", "value": "pps" },
+              { "id": "decimals", "value": 3 },
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "orange", "value": 0.001 }
+              ]}}
+            ]
+          }
+        ]
+      },
+      "options": {},
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 22 }
+    },
+
+    {
+      "id": 16, "type": "row", "title": "Errors & Packet Loss", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 }
+    },
+
+    {
+      "id": 17, "type": "timeseries", "title": "RX Errors by Namespace",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(namespace)(rate(container_network_receive_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": "{{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "pps", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 12, "x": 0, "y": 31 }
+    },
+
+    {
+      "id": 18, "type": "timeseries", "title": "TX Errors by Namespace",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(namespace)(rate(container_network_transmit_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": "{{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "pps", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 12, "x": 12, "y": 31 }
+    },
+
+    {
+      "id": 19, "type": "timeseries", "title": "RX Packet Drops by Namespace",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(namespace)(rate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": "{{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "pps", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 12, "x": 0, "y": 38 }
+    },
+
+    {
+      "id": 20, "type": "timeseries", "title": "TX Packet Drops by Namespace",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(namespace)(rate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+        "refId": "A", "legendFormat": "{{namespace}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "pps", "min": 0, "decimals": 3,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 12, "x": 12, "y": 38 }
+    },
+
+    {
+      "id": 21, "type": "row", "title": "DNS (CoreDNS)", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 }
+    },
+
+    {
+      "id": 22, "type": "timeseries", "title": "DNS Request Rate by Query Type",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(type)(rate(coredns_dns_requests_total[5m]))",
+        "refId": "A", "legendFormat": "{{type}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 46 }
+    },
+
+    {
+      "id": 23, "type": "timeseries", "title": "DNS Response Rate by Rcode",
+      "description": "NOERROR = healthy. NXDOMAIN = name not found. SERVFAIL = upstream error.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(rcode)(rate(coredns_dns_responses_total[5m]))",
+        "refId": "A", "legendFormat": "{{rcode}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "NOERROR"  }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "NXDOMAIN" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "SERVFAIL" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "REFUSED"  }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 46 }
+    },
+
+    {
+      "id": 24, "type": "timeseries", "title": "DNS Request Latency (p50 / p95 / p99)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket[5m])) by (le))",
+          "refId": "A", "legendFormat": "p50"
+        },
+        {
+          "expr": "histogram_quantile(0.95, sum(rate(coredns_dns_request_duration_seconds_bucket[5m])) by (le))",
+          "refId": "B", "legendFormat": "p95"
+        },
+        {
+          "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket[5m])) by (le))",
+          "refId": "C", "legendFormat": "p99"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0, "decimals": 4,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 46 }
+    },
+
+    {
+      "id": 25, "type": "timeseries", "title": "DNS Cache Hit Ratio (%)",
+      "description": "High hit ratio = CoreDNS is serving responses from cache, reducing upstream load.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(rate(coredns_cache_hits_total[5m])) / (sum(rate(coredns_cache_hits_total[5m])) + sum(rate(coredns_cache_misses_total[5m]))) * 100",
+        "refId": "A", "legendFormat": "Cache Hit %"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red",    "value": null },
+            { "color": "yellow", "value": 50 },
+            { "color": "green",  "value": 80 }
+          ]},
+          "custom": { "lineWidth": 2, "fillOpacity": 20, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "single" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "lastNotNull"] }
+      },
+      "gridPos": { "h": 7, "w": 12, "x": 0, "y": 54 }
+    },
+
+    {
+      "id": 26, "type": "timeseries", "title": "DNS Forward Request Rate",
+      "description": "Queries CoreDNS is forwarding upstream. Spike here with cache miss spike = upstream DNS pressure.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(rate(coredns_forward_requests_total[5m]))",
+          "refId": "A", "legendFormat": "Forward Requests/s"
+        },
+        {
+          "expr": "sum(rate(coredns_forward_responses_duration_seconds_count[5m]))",
+          "refId": "B", "legendFormat": "Forward Responses/s"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 7, "w": 12, "x": 12, "y": 54 }
+    },
+
+    {
+      "id": 27, "type": "row", "title": "Services & Endpoints", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 61 }
+    },
+
+    {
+      "id": 28, "type": "stat", "title": "Total Services",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "count(kube_service_info{namespace=~\"$namespace\"})",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 8, "x": 0, "y": 62 }
+    },
+
+    {
+      "id": 29, "type": "stat", "title": "Endpoint Addresses Available",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(kube_endpoint_address_available{namespace=~\"$namespace\"})",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 8, "x": 8, "y": 62 }
+    },
+
+    {
+      "id": 30, "type": "stat", "title": "Endpoint Addresses Not Ready",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum(kube_endpoint_address_not_ready{namespace=~\"$namespace\"}) or vector(0)",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 8, "x": 16, "y": 62 }
+    },
+
+    {
+      "id": 31,
+      "type": "table",
+      "title": "Endpoint Availability",
+      "description": "Per-endpoint available vs not-ready address counts. Red Not Ready = pods backing this service are unhealthy.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace,endpoint)(kube_endpoint_address_available{namespace=~\"$namespace\"})",
+          "refId": "A", "instant": true, "format": "table", "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,endpoint)(kube_endpoint_address_not_ready{namespace=~\"$namespace\"})",
+          "refId": "B", "instant": true, "format": "table", "legendFormat": ""
+        }
+      ],
+      "transformations": [
+        {
+          "id": "filterFieldsByName",
+          "options": { "include": { "names": ["namespace", "endpoint", "Value"] } }
+        },
+        {
+          "id": "joinByField",
+          "options": { "byField": "endpoint", "mode": "outer" }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": { "namespace 1": true },
+            "renameByName": {
+              "namespace": "Namespace",
+              "endpoint":  "Endpoint",
+              "Value":     "Available",
+              "Value 1":   "Not Ready"
+            },
+            "indexByName": {
+              "namespace": 0,
+              "endpoint":  1,
+              "Value":     2,
+              "Value 1":   3
+            }
+          }
+        },
+        {
+          "id": "sortBy",
+          "options": { "fields": [{ "displayName": "Not Ready", "desc": true }] }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Namespace" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 180 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Endpoint" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 220 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Available" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } }
+            ]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Not Ready" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] } }
+            ]
+          }
+        ]
+      },
+      "options": {},
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 66 }
+    },
+
+    {
+      "id": 32, "type": "row", "title": "OKD Router / Ingress (HAProxy)", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 74 }
+    },
+
+    {
+      "id": 33, "type": "timeseries", "title": "Router HTTP Request Rate by Code",
+      "description": "Requires HAProxy router metrics to be scraped (port 1936). OKD exposes these via the openshift-ingress ServiceMonitor.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(code)(rate(haproxy_backend_http_responses_total[5m]))",
+          "refId": "A", "legendFormat": "HTTP {{code}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "HTTP 2xx" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "HTTP 4xx" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "HTTP 5xx" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 75 }
+    },
+
+    {
+      "id": 34, "type": "timeseries", "title": "Router 4xx + 5xx Error Rate (%)",
+      "description": "Client error (4xx) and server error (5xx) rates as a percentage of all requests.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(rate(haproxy_backend_http_responses_total{code=\"4xx\"}[5m])) / sum(rate(haproxy_backend_http_responses_total[5m])) * 100",
+          "refId": "A", "legendFormat": "4xx %"
+        },
+        {
+          "expr": "sum(rate(haproxy_backend_http_responses_total{code=\"5xx\"}[5m])) / sum(rate(haproxy_backend_http_responses_total[5m])) * 100",
+          "refId": "B", "legendFormat": "5xx %"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "red",    "value": 5 }
+          ]}
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "4xx %" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "5xx %" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 75 }
+    },
+
+    {
+      "id": 35, "type": "timeseries", "title": "Router Bytes In / Out",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(rate(haproxy_frontend_bytes_in_total[5m]))",
+          "refId": "A", "legendFormat": "Bytes In"
+        },
+        {
+          "expr": "sum(rate(haproxy_frontend_bytes_out_total[5m]))",
+          "refId": "B", "legendFormat": "Bytes Out"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "Bytes In"  }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Bytes Out" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 83 }
+    },
+
+    {
+      "id": 36,
+      "type": "table",
+      "title": "Router Backend Server Status",
+      "description": "HAProxy backend servers (routes). Value 0 = DOWN, 1 = UP.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "haproxy_server_up",
+          "refId": "A", "instant": true, "format": "table", "legendFormat": ""
+        }
+      ],
+      "transformations": [
+        {
+          "id": "filterFieldsByName",
+          "options": { "include": { "names": ["proxy", "server", "Value"] } }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {},
+            "renameByName": {
+              "proxy":  "Backend",
+              "server": "Server",
+              "Value":  "Status"
+            },
+            "indexByName": { "proxy": 0, "server": 1, "Value": 2 }
+          }
+        },
+        {
+          "id": "sortBy",
+          "options": { "fields": [{ "displayName": "Status", "desc": false }] }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Backend" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 260 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Server" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 180 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Status" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "mappings", "value": [
+                { "type": "value", "options": { "0": { "text": "DOWN", "color": "red"   } } },
+                { "type": "value", "options": { "1": { "text": "UP",   "color": "green" } } }
+              ]},
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                { "color": "red",   "value": null },
+                { "color": "green", "value": 1 }
+              ]}}
+            ]
+          }
+        ]
+      },
+      "options": {},
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 83 }
+    }
+
+  ]
+}
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/nodes-health.json b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/nodes-health.json
new file mode 100644
index 0000000..0b2fe9d
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/nodes-health.json
@@ -0,0 +1,627 @@
+{
+  "title": "Node Health",
+  "uid": "okd-node-health",
+  "schemaVersion": 36,
+  "version": 2,
+  "refresh": "30s",
+  "time": { "from": "now-1h", "to": "now" },
+  "tags": ["okd", "node", "health"],
+  "templating": {
+    "list": [
+      {
+        "name": "node",
+        "type": "query",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(kube_node_info, node)", "refId": "A" },
+        "refresh": 2,
+        "includeAll": true,
+        "multi": true,
+        "allValue": ".*",
+        "label": "Node",
+        "sort": 1,
+        "current": {},
+        "options": []
+      }
+    ]
+  },
+  "panels": [
+
+    {
+      "id": 1,
+      "type": "stat",
+      "title": "Total Nodes",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_info{node=~\"$node\"})", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+    },
+
+    {
+      "id": 2,
+      "type": "stat",
+      "title": "Ready Nodes",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"$node\"} == 1)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+    },
+
+    {
+      "id": 3,
+      "type": "stat",
+      "title": "Not Ready Nodes",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"false\",node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+    },
+
+    {
+      "id": 4,
+      "type": "stat",
+      "title": "Memory Pressure",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\",node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+    },
+
+    {
+      "id": 5,
+      "type": "stat",
+      "title": "Disk Pressure",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"DiskPressure\",status=\"true\",node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+    },
+
+    {
+      "id": 6,
+      "type": "stat",
+      "title": "PID Pressure",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"PIDPressure\",status=\"true\",node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+    },
+
+    {
+      "id": 7,
+      "type": "stat",
+      "title": "Unschedulable",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_spec_unschedulable{node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+    },
+
+    {
+      "id": 8,
+      "type": "stat",
+      "title": "Kubelet Up",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(up{job=\"kubelet\",metrics_path=\"/metrics\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+    },
+
+    {
+      "id": 9,
+      "type": "table",
+      "title": "Node Conditions",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(node) (kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"$node\"})",
+          "refId": "A",
+          "legendFormat": "{{node}}",
+          "instant": true
+        },
+        {
+          "expr": "sum by(node) (kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\",node=~\"$node\"})",
+          "refId": "B",
+          "legendFormat": "{{node}}",
+          "instant": true
+        },
+        {
+          "expr": "sum by(node) (kube_node_status_condition{condition=\"DiskPressure\",status=\"true\",node=~\"$node\"})",
+          "refId": "C",
+          "legendFormat": "{{node}}",
+          "instant": true
+        },
+        {
+          "expr": "sum by(node) (kube_node_status_condition{condition=\"PIDPressure\",status=\"true\",node=~\"$node\"})",
+          "refId": "D",
+          "legendFormat": "{{node}}",
+          "instant": true
+        },
+        {
+          "expr": "sum by(node) (kube_node_spec_unschedulable{node=~\"$node\"})",
+          "refId": "E",
+          "legendFormat": "{{node}}",
+          "instant": true
+        }
+      ],
+      "transformations": [
+        {
+          "id": "labelsToFields",
+          "options": { "mode": "columns" }
+        },
+        {
+          "id": "joinByField",
+          "options": { "byField": "node", "mode": "outer" }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "Time": true,
+              "Time 1": true,
+              "Time 2": true,
+              "Time 3": true,
+              "Time 4": true,
+              "Time 5": true
+            },
+            "renameByName": {
+              "node": "Node",
+              "Value #A": "Ready",
+              "Value #B": "Mem Pressure",
+              "Value #C": "Disk Pressure",
+              "Value #D": "PID Pressure",
+              "Value #E": "Unschedulable"
+            },
+            "indexByName": {
+              "node": 0,
+              "Value #A": 1,
+              "Value #B": 2,
+              "Value #C": 3,
+              "Value #D": 4,
+              "Value #E": 5
+            }
+          }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "custom": { "displayMode": "color-background", "align": "center" }
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Node" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "auto" },
+              { "id": "custom.align", "value": "left" },
+              { "id": "custom.width", "value": 200 }
+            ]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Ready" },
+            "properties": [
+              {
+                "id": "thresholds",
+                "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
+              },
+              { "id": "custom.displayMode", "value": "color-background" },
+              {
+                "id": "mappings",
+                "value": [
+                  {
+                    "type": "value",
+                    "options": {
+                      "0": { "text": "✗ Not Ready", "color": "red", "index": 0 },
+                      "1": { "text": "✓ Ready",    "color": "green", "index": 1 }
+                    }
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "matcher": { "id": "byRegexp", "options": ".*Pressure" },
+            "properties": [
+              {
+                "id": "thresholds",
+                "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
+              },
+              { "id": "custom.displayMode", "value": "color-background" },
+              {
+                "id": "mappings",
+                "value": [
+                  {
+                    "type": "value",
+                    "options": {
+                      "0": { "text": "✓ OK",      "color": "green", "index": 0 },
+                      "1": { "text": "⚠ Active",  "color": "red",   "index": 1 }
+                    }
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Unschedulable" },
+            "properties": [
+              {
+                "id": "thresholds",
+                "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }] }
+              },
+              { "id": "custom.displayMode", "value": "color-background" },
+              {
+                "id": "mappings",
+                "value": [
+                  {
+                    "type": "value",
+                    "options": {
+                      "0": { "text": "✓ Schedulable", "color": "green",  "index": 0 },
+                      "1": { "text": "⚠ Cordoned",    "color": "yellow", "index": 1 }
+                    }
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      },
+      "options": { "sortBy": [{ "displayName": "Node", "desc": false }] },
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 4 }
+    },
+
+    {
+      "id": 10,
+      "type": "timeseries",
+      "title": "CPU Usage per Node (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 85 }] }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 16, "x": 0, "y": 12 }
+    },
+
+    {
+      "id": 11,
+      "type": "bargauge",
+      "title": "CPU Usage \u2014 Current",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 85 }] }
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "displayMode": "gradient",
+        "showUnfilled": true,
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 }
+    },
+
+    {
+      "id": 12,
+      "type": "timeseries",
+      "title": "Memory Usage per Node (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 75 }, { "color": "red", "value": 90 }] }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 16, "x": 0, "y": 20 }
+    },
+
+    {
+      "id": 13,
+      "type": "bargauge",
+      "title": "Memory Usage \u2014 Current",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 75 }, { "color": "red", "value": 90 }] }
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "displayMode": "gradient",
+        "showUnfilled": true,
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 20 }
+    },
+
+    {
+      "id": 14,
+      "type": "timeseries",
+      "title": "Root Disk Usage per Node (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}))",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 85 }] }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 16, "x": 0, "y": 28 }
+    },
+
+    {
+      "id": 15,
+      "type": "bargauge",
+      "title": "Root Disk Usage \u2014 Current",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}))",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 85 }] }
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "displayMode": "gradient",
+        "showUnfilled": true,
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 28 }
+    },
+
+    {
+      "id": 16,
+      "type": "timeseries",
+      "title": "Network Traffic per Node",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(instance) (rate(node_network_receive_bytes_total{device!~\"lo|veth.*|tun.*|ovn.*|br.*\"}[5m]))",
+          "refId": "A",
+          "legendFormat": "rx {{instance}}"
+        },
+        {
+          "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|tun.*|ovn.*|br.*\"}[5m]))",
+          "refId": "B",
+          "legendFormat": "tx {{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 36 }
+    },
+
+    {
+      "id": 17,
+      "type": "bargauge",
+      "title": "Pods per Node",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count by(node) (kube_pod_info{node=~\"$node\"})",
+          "refId": "A",
+          "legendFormat": "{{node}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short",
+          "min": 0,
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 100 },
+              { "color": "red", "value": 200 }
+            ]
+          }
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "displayMode": "gradient",
+        "showUnfilled": true,
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 36 }
+    },
+
+    {
+      "id": 18,
+      "type": "timeseries",
+      "title": "System Load Average (1m) per Node",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "node_load1",
+          "refId": "A",
+          "legendFormat": "1m \u2014 {{instance}}"
+        },
+        {
+          "expr": "node_load5",
+          "refId": "B",
+          "legendFormat": "5m \u2014 {{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short",
+          "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 44 }
+    },
+
+    {
+      "id": 19,
+      "type": "bargauge",
+      "title": "Node Uptime",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "time() - node_boot_time_seconds",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s",
+          "min": 0,
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "red", "value": null },
+              { "color": "yellow", "value": 300 },
+              { "color": "green", "value": 3600 }
+            ]
+          }
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "displayMode": "gradient",
+        "showUnfilled": false,
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 44 }
+    }
+
+  ]
+}
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/storage.json b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/storage.json
new file mode 100644
index 0000000..3c58184
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/storage.json
@@ -0,0 +1,596 @@
+{
+  "title": "Storage Health",
+  "uid": "storage-health",
+  "schemaVersion": 36,
+  "version": 1,
+  "refresh": "30s",
+  "time": { "from": "now-1h", "to": "now" },
+  "panels": [
+
+    {
+      "type": "row",
+      "id": 1,
+      "title": "PVC / PV Status",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }
+    },
+
+    {
+      "type": "stat",
+      "id": 2,
+      "title": "Bound PVCs",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Bound\"}) or vector(0)",
+          "refId": "A"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [{ "color": "green", "value": null }]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background",
+        "graphMode": "none",
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 5, "w": 4, "x": 0, "y": 1 }
+    },
+
+    {
+      "type": "stat",
+      "id": 3,
+      "title": "Pending PVCs",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Pending\"}) or vector(0)",
+          "refId": "A"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green",  "value": null },
+              { "color": "yellow", "value": 1 }
+            ]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background",
+        "graphMode": "none",
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 5, "w": 4, "x": 4, "y": 1 }
+    },
+
+    {
+      "type": "stat",
+      "id": 4,
+      "title": "Lost PVCs",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Lost\"}) or vector(0)",
+          "refId": "A"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red",   "value": 1 }
+            ]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background",
+        "graphMode": "none",
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 5, "w": 4, "x": 8, "y": 1 }
+    },
+
+    {
+      "type": "stat",
+      "id": 5,
+      "title": "Bound PVs / Available PVs",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(kube_persistentvolume_status_phase{phase=\"Bound\"}) or vector(0)",
+          "refId": "A",
+          "legendFormat": "Bound"
+        },
+        {
+          "expr": "sum(kube_persistentvolume_status_phase{phase=\"Available\"}) or vector(0)",
+          "refId": "B",
+          "legendFormat": "Available"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [{ "color": "blue", "value": null }]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background",
+        "graphMode": "none",
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 5, "w": 4, "x": 12, "y": 1 }
+    },
+
+    {
+      "type": "stat",
+      "id": 6,
+      "title": "Ceph Cluster Health",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "ceph_health_status",
+          "refId": "A"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green",  "value": null },
+              { "color": "yellow", "value": 1 },
+              { "color": "red",    "value": 2 }
+            ]
+          },
+          "mappings": [
+            {
+              "type": "value",
+              "options": {
+                "0": { "text": "HEALTH_OK",   "index": 0 },
+                "1": { "text": "HEALTH_WARN", "index": 1 },
+                "2": { "text": "HEALTH_ERR",  "index": 2 }
+              }
+            }
+          ]
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background",
+        "graphMode": "none",
+        "textMode": "value"
+      },
+      "gridPos": { "h": 5, "w": 4, "x": 16, "y": 1 }
+    },
+
+    {
+      "type": "stat",
+      "id": 7,
+      "title": "OSDs Up / Total",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(ceph_osd_up) or vector(0)",
+          "refId": "A",
+          "legendFormat": "Up"
+        },
+        {
+          "expr": "count(ceph_osd_metadata) or vector(0)",
+          "refId": "B",
+          "legendFormat": "Total"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [{ "color": "green", "value": null }]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background",
+        "graphMode": "none",
+        "textMode": "auto"
+      },
+      "gridPos": { "h": 5, "w": 4, "x": 20, "y": 1 }
+    },
+
+    {
+      "type": "row",
+      "id": 8,
+      "title": "Cluster Capacity",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 6 }
+    },
+
+    {
+      "type": "gauge",
+      "id": 9,
+      "title": "Ceph Cluster Used (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * (ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes) / ceph_cluster_total_bytes",
+          "refId": "A"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green",  "value": null },
+              { "color": "yellow", "value": 70 },
+              { "color": "red",    "value": 85 }
+            ]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "showThresholdLabels": true,
+        "showThresholdMarkers": true
+      },
+      "gridPos": { "h": 8, "w": 5, "x": 0, "y": 7 }
+    },
+
+    {
+      "type": "stat",
+      "id": 10,
+      "title": "Ceph Capacity — Total / Available",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "ceph_cluster_total_bytes",
+          "refId": "A",
+          "legendFormat": "Total"
+        },
+        {
+          "expr": "ceph_cluster_total_bytes - (ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes)",
+          "refId": "B",
+          "legendFormat": "Available"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes",
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [{ "color": "blue", "value": null }]
+          }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "value",
+        "graphMode": "none",
+        "textMode": "auto",
+        "orientation": "vertical"
+      },
+      "gridPos": { "h": 8, "w": 4, "x": 5, "y": 7 }
+    },
+
+    {
+      "type": "bargauge",
+      "id": 11,
+      "title": "PV Allocated Capacity by Storage Class (Bound)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by (storageclass) (\n  kube_persistentvolume_capacity_bytes\n  * on(persistentvolume) group_left(storageclass)\n  kube_persistentvolume_status_phase{phase=\"Bound\"}\n)",
+          "refId": "A",
+          "legendFormat": "{{storageclass}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes",
+          "color": { "mode": "palette-classic" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [{ "color": "blue", "value": null }]
+          }
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "displayMode": "gradient",
+        "showUnfilled": true
+      },
+      "gridPos": { "h": 8, "w": 7, "x": 9, "y": 7 }
+    },
+
+    {
+      "type": "piechart",
+      "id": 12,
+      "title": "PVC Phase Distribution",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Bound\"}) or vector(0)",
+          "refId": "A",
+          "legendFormat": "Bound"
+        },
+        {
+          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Pending\"}) or vector(0)",
+          "refId": "B",
+          "legendFormat": "Pending"
+        },
+        {
+          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Lost\"}) or vector(0)",
+          "refId": "C",
+          "legendFormat": "Lost"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": { "color": { "mode": "palette-classic" } }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "pieType": "pie",
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "values": ["value", "percent"]
+        }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 7 }
+    },
+
+    {
+      "type": "row",
+      "id": 13,
+      "title": "Ceph Performance",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 15 }
+    },
+
+    {
+      "type": "timeseries",
+      "id": 14,
+      "title": "Ceph Pool IOPS (Read / Write)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "rate(ceph_pool_rd[5m])",
+          "refId": "A",
+          "legendFormat": "Read — pool {{pool_id}}"
+        },
+        {
+          "expr": "rate(ceph_pool_wr[5m])",
+          "refId": "B",
+          "legendFormat": "Write — pool {{pool_id}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }
+    },
+
+    {
+      "type": "timeseries",
+      "id": 15,
+      "title": "Ceph Pool Throughput (Read / Write)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "rate(ceph_pool_rd_bytes[5m])",
+          "refId": "A",
+          "legendFormat": "Read — pool {{pool_id}}"
+        },
+        {
+          "expr": "rate(ceph_pool_wr_bytes[5m])",
+          "refId": "B",
+          "legendFormat": "Write — pool {{pool_id}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }
+    },
+
+    {
+      "type": "row",
+      "id": 16,
+      "title": "Ceph OSD & Pool Details",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 24 }
+    },
+
+    {
+      "type": "timeseries",
+      "id": 17,
+      "title": "Ceph Pool Space Used (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 * ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail)",
+          "refId": "A",
+          "legendFormat": "Pool {{pool_id}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "color": { "mode": "palette-classic" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green",  "value": null },
+              { "color": "yellow", "value": 70 },
+              { "color": "red",    "value": 85 }
+            ]
+          },
+          "custom": { "lineWidth": 2, "fillOpacity": 10 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 25 }
+    },
+
+    {
+      "type": "bargauge",
+      "id": 18,
+      "title": "OSD Status per Daemon (green = Up, red = Down)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "ceph_osd_up",
+          "refId": "A",
+          "legendFormat": "{{ceph_daemon}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "min": 0,
+          "max": 1,
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "red",   "value": null },
+              { "color": "green", "value": 1 }
+            ]
+          },
+          "mappings": [
+            {
+              "type": "value",
+              "options": {
+                "0": { "text": "DOWN", "index": 0 },
+                "1": { "text": "UP",   "index": 1 }
+              }
+            }
+          ]
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "displayMode": "basic",
+        "showUnfilled": true
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 25 }
+    },
+
+    {
+      "type": "row",
+      "id": 19,
+      "title": "Node Disk Usage",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 33 }
+    },
+
+    {
+      "type": "timeseries",
+      "id": 20,
+      "title": "Node Root Disk Usage Over Time (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} * 100)",
+          "refId": "A",
+          "legendFormat": "{{instance}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "color": { "mode": "palette-classic" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green",  "value": null },
+              { "color": "yellow", "value": 70 },
+              { "color": "red",    "value": 85 }
+            ]
+          },
+          "custom": { "lineWidth": 2, "fillOpacity": 10 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 34 }
+    },
+
+    {
+      "type": "bargauge",
+      "id": 21,
+      "title": "Current Disk Usage — All Nodes & Mountpoints",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "100 - (node_filesystem_avail_bytes{fstype!~\"tmpfs|overlay|squashfs\"} / node_filesystem_size_bytes{fstype!~\"tmpfs|overlay|squashfs\"} * 100)",
+          "refId": "A",
+          "legendFormat": "{{instance}} — {{mountpoint}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green",  "value": null },
+              { "color": "yellow", "value": 70 },
+              { "color": "red",    "value": 85 }
+            ]
+          }
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "displayMode": "gradient",
+        "showUnfilled": true
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 34 }
+    }
+
+  ]
+}
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/workloads-health.json b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/workloads-health.json
new file mode 100644
index 0000000..60219ae
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/workloads-health.json
@@ -0,0 +1,773 @@
+{
+  "title": "Workload Health",
+  "uid": "okd-workload-health",
+  "schemaVersion": 36,
+  "version": 3,
+  "refresh": "30s",
+  "time": { "from": "now-1h", "to": "now" },
+  "tags": ["okd", "workload", "health"],
+  "templating": {
+    "list": [
+      {
+        "name": "namespace",
+        "type": "query",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(kube_pod_info, namespace)", "refId": "A" },
+        "refresh": 2,
+        "includeAll": true,
+        "multi": true,
+        "allValue": ".*",
+        "label": "Namespace",
+        "sort": 1,
+        "current": {},
+        "options": []
+      }
+    ]
+  },
+  "panels": [
+
+    {
+      "id": 1, "type": "stat", "title": "Total Pods",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_pod_info{namespace=~\"$namespace\"})", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+    },
+
+    {
+      "id": 2, "type": "stat", "title": "Running Pods",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_pod_status_phase{phase=\"Running\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+    },
+
+    {
+      "id": 3, "type": "stat", "title": "Pending Pods",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_pod_status_phase{phase=\"Pending\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+    },
+
+    {
+      "id": 4, "type": "stat", "title": "Failed Pods",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_pod_status_phase{phase=\"Failed\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+    },
+
+    {
+      "id": 5, "type": "stat", "title": "CrashLoopBackOff",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+    },
+
+    {
+      "id": 6, "type": "stat", "title": "OOMKilled",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_pod_container_status_last_terminated_reason{reason=\"OOMKilled\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+    },
+
+    {
+      "id": 7, "type": "stat", "title": "Deployments Available",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_deployment_status_condition{condition=\"Available\",status=\"true\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+    },
+
+    {
+      "id": 8, "type": "stat", "title": "Deployments Degraded",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\"} > 0) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+    },
+
+    {
+      "id": 9, "type": "row", "title": "Deployments", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+    },
+
+    {
+      "id": 10,
+      "type": "table",
+      "title": "Deployment Status",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace,deployment)(kube_deployment_spec_replicas{namespace=~\"$namespace\"})",
+          "refId": "A",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,deployment)(kube_deployment_status_replicas_ready{namespace=~\"$namespace\"})",
+          "refId": "B",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,deployment)(kube_deployment_status_replicas_available{namespace=~\"$namespace\"})",
+          "refId": "C",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,deployment)(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\"})",
+          "refId": "D",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,deployment)(kube_deployment_status_replicas_updated{namespace=~\"$namespace\"})",
+          "refId": "E",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        }
+      ],
+      "transformations": [
+        {
+          "id": "filterFieldsByName",
+          "options": {
+            "include": {
+              "names": ["namespace", "deployment", "Value"]
+            }
+          }
+        },
+        {
+          "id": "joinByField",
+          "options": {
+            "byField": "deployment",
+            "mode": "outer"
+          }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "namespace 1": true,
+              "namespace 2": true,
+              "namespace 3": true,
+              "namespace 4": true
+            },
+            "renameByName": {
+              "namespace":  "Namespace",
+              "deployment": "Deployment",
+              "Value":      "Desired",
+              "Value 1":    "Ready",
+              "Value 2":    "Available",
+              "Value 3":    "Unavailable",
+              "Value 4":    "Up-to-date"
+            },
+            "indexByName": {
+              "namespace":  0,
+              "deployment": 1,
+              "Value":      2,
+              "Value 1":    3,
+              "Value 2":    4,
+              "Value 3":    5,
+              "Value 4":    6
+            }
+          }
+        },
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": [{ "displayName": "Namespace", "desc": false }]
+          }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Namespace" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 200 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Deployment" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 220 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Unavailable" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              {
+                "id": "thresholds",
+                "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
+              }
+            ]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Ready" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              {
+                "id": "thresholds",
+                "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
+              }
+            ]
+          }
+        ]
+      },
+      "options": { "sortBy": [{ "displayName": "Namespace", "desc": false }] },
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 5 }
+    },
+
+    {
+      "id": 11, "type": "row", "title": "StatefulSets & DaemonSets", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 }
+    },
+
+    {
+      "id": 12,
+      "type": "table",
+      "title": "StatefulSet Status",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace,statefulset)(kube_statefulset_replicas{namespace=~\"$namespace\"})",
+          "refId": "A",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,statefulset)(kube_statefulset_status_replicas_ready{namespace=~\"$namespace\"})",
+          "refId": "B",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,statefulset)(kube_statefulset_status_replicas_current{namespace=~\"$namespace\"})",
+          "refId": "C",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,statefulset)(kube_statefulset_status_replicas_updated{namespace=~\"$namespace\"})",
+          "refId": "D",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        }
+      ],
+      "transformations": [
+        {
+          "id": "filterFieldsByName",
+          "options": {
+            "include": {
+              "names": ["namespace", "statefulset", "Value"]
+            }
+          }
+        },
+        {
+          "id": "joinByField",
+          "options": {
+            "byField": "statefulset",
+            "mode": "outer"
+          }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "namespace 1": true,
+              "namespace 2": true,
+              "namespace 3": true
+            },
+            "renameByName": {
+              "namespace":   "Namespace",
+              "statefulset": "StatefulSet",
+              "Value":       "Desired",
+              "Value 1":     "Ready",
+              "Value 2":     "Current",
+              "Value 3":     "Up-to-date"
+            },
+            "indexByName": {
+              "namespace":   0,
+              "statefulset": 1,
+              "Value":       2,
+              "Value 1":     3,
+              "Value 2":     4,
+              "Value 3":     5
+            }
+          }
+        },
+        {
+          "id": "sortBy",
+          "options": { "fields": [{ "displayName": "Namespace", "desc": false }] }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Namespace" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 180 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "StatefulSet" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 200 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Ready" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } }
+            ]
+          }
+        ]
+      },
+      "options": {},
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 }
+    },
+
+    {
+      "id": 13,
+      "type": "table",
+      "title": "DaemonSet Status",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace,daemonset)(kube_daemonset_status_desired_number_scheduled{namespace=~\"$namespace\"})",
+          "refId": "A",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,daemonset)(kube_daemonset_status_number_ready{namespace=~\"$namespace\"})",
+          "refId": "B",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,daemonset)(kube_daemonset_status_number_unavailable{namespace=~\"$namespace\"})",
+          "refId": "C",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        },
+        {
+          "expr": "sum by(namespace,daemonset)(kube_daemonset_status_number_misscheduled{namespace=~\"$namespace\"})",
+          "refId": "D",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        }
+      ],
+      "transformations": [
+        {
+          "id": "filterFieldsByName",
+          "options": {
+            "include": {
+              "names": ["namespace", "daemonset", "Value"]
+            }
+          }
+        },
+        {
+          "id": "joinByField",
+          "options": {
+            "byField": "daemonset",
+            "mode": "outer"
+          }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "namespace 1": true,
+              "namespace 2": true,
+              "namespace 3": true
+            },
+            "renameByName": {
+              "namespace":  "Namespace",
+              "daemonset":  "DaemonSet",
+              "Value":      "Desired",
+              "Value 1":    "Ready",
+              "Value 2":    "Unavailable",
+              "Value 3":    "Misscheduled"
+            },
+            "indexByName": {
+              "namespace":  0,
+              "daemonset":  1,
+              "Value":      2,
+              "Value 1":    3,
+              "Value 2":    4,
+              "Value 3":    5
+            }
+          }
+        },
+        {
+          "id": "sortBy",
+          "options": { "fields": [{ "displayName": "Namespace", "desc": false }] }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Namespace" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 180 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "DaemonSet" },
+            "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 200 }]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Ready" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } }
+            ]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Unavailable" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] } }
+            ]
+          },
+          {
+            "matcher": { "id": "byName", "options": "Misscheduled" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] } }
+            ]
+          }
+        ]
+      },
+      "options": {},
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 }
+    },
+
+    {
+      "id": 14, "type": "row", "title": "Pods", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 }
+    },
+
+    {
+      "id": 15,
+      "type": "timeseries",
+      "title": "Pod Phase over Time",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(phase)(kube_pod_status_phase{namespace=~\"$namespace\"})",
+          "refId": "A", "legendFormat": "{{phase}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "Running" },   "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Pending" },   "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Failed" },    "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Succeeded" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Unknown" },   "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull"] }
+      },
+      "gridPos": { "h": 8, "w": 16, "x": 0, "y": 23 }
+    },
+
+    {
+      "id": 16,
+      "type": "piechart",
+      "title": "Pod Phase — Now",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(phase)(kube_pod_status_phase{namespace=~\"$namespace\"})",
+          "refId": "A", "instant": true, "legendFormat": "{{phase}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": { "unit": "short", "color": { "mode": "palette-classic" } },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "Running" },   "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Pending" },   "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Failed" },    "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Succeeded" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "Unknown" },   "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "pieType": "donut",
+        "tooltip": { "mode": "single" },
+        "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 23 }
+    },
+
+    {
+      "id": 17,
+      "type": "timeseries",
+      "title": "Container Restarts over Time (total counter, top 10)",
+      "description": "Absolute restart counter — each vertical step = a restart event. Flat line = healthy.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "topk(10,\n  sum by(namespace, pod) (\n    kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}\n  ) > 0\n)",
+          "refId": "A",
+          "legendFormat": "{{namespace}} / {{pod}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "auto", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 31 }
+    },
+
+    {
+      "id": 18,
+      "type": "table",
+      "title": "Container Total Restarts (non-zero)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace, pod, container) (kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}) > 0",
+          "refId": "A",
+          "instant": true,
+          "format": "table",
+          "legendFormat": ""
+        }
+      ],
+      "transformations": [
+        {
+          "id": "filterFieldsByName",
+          "options": {
+            "include": { "names": ["namespace", "pod", "container", "Value"] }
+          }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {},
+            "renameByName": {
+              "namespace": "Namespace",
+              "pod":       "Pod",
+              "container": "Container",
+              "Value":     "Total Restarts"
+            },
+            "indexByName": { "namespace": 0, "pod": 1, "container": 2, "Value": 3 }
+          }
+        },
+        {
+          "id": "sortBy",
+          "options": { "fields": [{ "displayName": "Total Restarts", "desc": true }] }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "Namespace" },  "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 160 }] },
+          { "matcher": { "id": "byName", "options": "Pod" },        "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 260 }] },
+          { "matcher": { "id": "byName", "options": "Container" },  "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 160 }] },
+          {
+            "matcher": { "id": "byName", "options": "Total Restarts" },
+            "properties": [
+              { "id": "custom.displayMode", "value": "color-background" },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "yellow", "value": null }, { "color": "orange", "value": 5 }, { "color": "red", "value": 20 }] } }
+            ]
+          }
+        ]
+      },
+      "options": {},
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 31 }
+    },
+
+    {
+      "id": 19, "type": "row", "title": "Resource Usage", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 39 }
+    },
+
+    {
+      "id": 20,
+      "type": "timeseries",
+      "title": "CPU Usage by Namespace",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace)(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"}[5m]))",
+          "refId": "A", "legendFormat": "{{namespace}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "cores", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 40 }
+    },
+
+    {
+      "id": 21,
+      "type": "timeseries",
+      "title": "Memory Usage by Namespace",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace)(container_memory_working_set_bytes{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"})",
+          "refId": "A", "legendFormat": "{{namespace}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 40 }
+    },
+
+    {
+      "id": 22,
+      "type": "bargauge",
+      "title": "CPU — Actual vs Requested (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace)(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"}[5m]))\n/\nsum by(namespace)(kube_pod_container_resource_requests{resource=\"cpu\",namespace=~\"$namespace\",container!=\"\"})\n* 100",
+          "refId": "A", "legendFormat": "{{namespace}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 150,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 80 }, { "color": "red", "value": 100 }] }
+        }
+      },
+      "options": {
+        "orientation": "horizontal", "displayMode": "gradient", "showUnfilled": true,
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 48 }
+    },
+
+    {
+      "id": 23,
+      "type": "bargauge",
+      "title": "Memory — Actual vs Requested (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum by(namespace)(container_memory_working_set_bytes{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"})\n/\nsum by(namespace)(kube_pod_container_resource_requests{resource=\"memory\",namespace=~\"$namespace\",container!=\"\"})\n* 100",
+          "refId": "A", "legendFormat": "{{namespace}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 150,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 80 }, { "color": "red", "value": 100 }] }
+        }
+      },
+      "options": {
+        "orientation": "horizontal", "displayMode": "gradient", "showUnfilled": true,
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 48 }
+    }
+
+  ]
+}
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/01-namespace.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/01-namespace.yaml
new file mode 100644
index 0000000..a52fe20
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/01-namespace.yaml
@@ -0,0 +1,6 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: observability
+  labels:
+    openshift.io/cluster-monitoring: "true"
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/02-serviceaccount-rbac.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/02-serviceaccount-rbac.yaml
new file mode 100644
index 0000000..cfaa8f0
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/02-serviceaccount-rbac.yaml
@@ -0,0 +1,43 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: cluster-grafana-sa
+  namespace: observability
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: grafana-prometheus-api-access
+rules:
+- apiGroups:
+  - monitoring.coreos.com
+  resources:
+  - prometheuses/api
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: grafana-prometheus-api-access-binding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: grafana-prometheus-api-access
+subjects:
+- kind: ServiceAccount
+  name: cluster-grafana-sa
+  namespace: observability
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: grafana-cluster-monitoring-view
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: cluster-monitoring-view
+subjects:
+- kind: ServiceAccount
+  name: cluster-grafana-sa
+  namespace: observability
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/03-grafana.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/03-grafana.yaml
new file mode 100644
index 0000000..f98bef1
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/03-grafana.yaml
@@ -0,0 +1,43 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: Grafana
+metadata:
+  name: cluster-grafana
+  namespace: observability
+  labels:
+    dashboards: "grafana"
+spec:
+  serviceAccountName: cluster-grafana-sa
+  automountServiceAccountToken: true
+
+  config:
+    log:
+      mode: console
+
+    security:
+      admin_user: admin
+      admin_password: paul
+
+    users:
+      viewers_can_edit: "false"
+
+    auth:
+      disable_login_form: "false"
+
+    auth.anonymous:
+      enabled: "true"
+      org_role: Viewer
+
+  deployment:
+    spec:
+      replicas: 1
+      template:
+        spec:
+          containers:
+          - name: grafana
+            resources:
+              requests:
+                cpu: 500m
+                memory: 1Gi
+              limits:
+                cpu: 1
+                memory: 2Gi
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/03a-secret-token.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/03a-secret-token.yaml
new file mode 100644
index 0000000..c57a142
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/03a-secret-token.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: grafana-prometheus-token
+  namespace: observability
+  annotations:
+    kubernetes.io/service-account.name: cluster-grafana-sa
+type: kubernetes.io/service-account-token
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/04-datasource.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/04-datasource.yaml
new file mode 100644
index 0000000..9a1ce74
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/04-datasource.yaml
@@ -0,0 +1,27 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDatasource
+metadata:
+  name: prometheus-cluster
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+  valuesFrom:
+    - targetPath: "secureJsonData.httpHeaderValue1"
+      valueFrom:
+        secretKeyRef:
+          name: grafana-prometheus-token
+          key: token
+  datasource:
+    name: Prometheus-Cluster
+    type: prometheus
+    access: proxy
+    url: https://prometheus-k8s.openshift-monitoring.svc:9091
+    isDefault: true
+    jsonData:
+      httpHeaderName1: "Authorization"
+      tlsSkipVerify: true
+      timeInterval: "30s"
+    secureJsonData:
+      httpHeaderValue1: "Bearer ${token}"
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/05-route.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/05-route.yaml
new file mode 100644
index 0000000..9b86b5e
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/05-route.yaml
@@ -0,0 +1,14 @@
+apiVersion: route.openshift.io/v1
+kind: Route
+metadata:
+  name: grafana
+  namespace: observability
+spec:
+  to:
+    kind: Service
+    name: cluster-grafana-service
+  port:
+    targetPort: 3000
+  tls:
+    termination: edge
+    insecureEdgeTerminationPolicy: Redirect
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/06-dashboard-cluster-overview.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/06-dashboard-cluster-overview.yaml
new file mode 100644
index 0000000..6b55825
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/06-dashboard-cluster-overview.yaml
@@ -0,0 +1,97 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: cluster-overview
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+
+  json: |
+    {
+      "title": "Cluster Overview",
+      "schemaVersion": 36,
+      "version": 1,
+      "refresh": "30s",
+      "time": {
+        "from": "now-1h",
+        "to": "now"
+      },
+      "panels": [
+        {
+          "type": "stat",
+          "title": "Ready Nodes",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "targets": [
+            {
+              "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\"})",
+              "refId": "A"
+            }
+          ],
+          "gridPos": { "h": 6, "w": 6, "x": 0, "y": 0 }
+        },
+        {
+          "type": "stat",
+          "title": "Running Pods",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "targets": [
+            {
+              "expr": "count(kube_pod_status_phase{phase=\"Running\"})",
+              "refId": "A"
+            }
+          ],
+          "gridPos": { "h": 6, "w": 6, "x": 6, "y": 0 }
+        },
+        {
+          "type": "timeseries",
+          "title": "Cluster CPU Usage (%)",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "targets": [
+            {
+              "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
+              "refId": "A"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }
+        },
+        {
+          "type": "timeseries",
+          "title": "Cluster Memory Usage (%)",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "targets": [
+            {
+              "expr": "100 * (1 - (sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)))",
+              "refId": "A"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }
+        }
+      ]
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/07-dashboard-openshift-metrics.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/07-dashboard-openshift-metrics.yaml
new file mode 100644
index 0000000..5b31d2e
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/07-dashboard-openshift-metrics.yaml
@@ -0,0 +1,1015 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: openshift-metrics
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+  json: |
+    {
+      "annotations": {
+        "list": [
+          {
+            "builtIn": 1,
+            "datasource": "-- Grafana --",
+            "enable": true,
+            "hide": true,
+            "iconColor": "rgba(0, 211, 255, 1)",
+            "name": "Annotations & Alerts",
+            "type": "dashboard"
+          }
+        ]
+      },
+      "description": "Metrics Dashboard for CRI-O OpenShift clusters",
+      "editable": true,
+      "gnetId": 5273,
+      "graphTooltip": 0,
+      "id": null,
+      "links": [],
+      "panels": [
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "#299c46",
+            "rgba(237, 129, 40, 0.89)",
+            "#d44a3a"
+          ],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "description": "The number of containers that start or restart over the last ten minutes.",
+          "format": "none",
+          "gauge": {
+            "maxValue": 100,
+            "minValue": 0,
+            "show": false,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "gridPos": {
+            "h": 2,
+            "w": 3,
+            "x": 0,
+            "y": 0
+          },
+          "id": 27,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "sum(changes(container_last_seen{image!=\"\"}[10m]))",
+              "format": "time_series",
+              "intervalFactor": 1,
+              "refId": "A"
+            }
+          ],
+          "thresholds": "",
+          "title": "Container Restarts",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "avg"
+        },
+        {
+          "collapsed": false,
+          "gridPos": {
+            "h": 1,
+            "w": 24,
+            "x": 0,
+            "y": 2
+          },
+          "id": 19,
+          "panels": [],
+          "repeat": null,
+          "title": "File System Space",
+          "type": "row"
+        },
+        {
+          "aliasColors": {},
+          "bars": true,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "description": "Percentage of usage of the root filesystem on each host.",
+          "fill": 1,
+          "gridPos": {
+            "h": 10,
+            "w": 10,
+            "x": 0,
+            "y": 3
+          },
+          "id": 4,
+          "legend": {
+            "alignAsTable": true,
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "rightSide": true,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": false,
+          "linewidth": 1,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+           "spaceLength": 10,
+           "stack": false,
+           "steppedLine": false,
+           "targets": [
+             {
+               "expr": "node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"} * 100",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "{{ instance }}",
+              "refId": "A",
+              "step": 10
+            }
+          ],
+          "thresholds": [
+            {
+              "colorMode": "critical",
+              "fill": true,
+              "line": true,
+              "op": "gt",
+              "value": 80
+            }
+          ],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Root Filesystem % Used",
+          "tooltip": {
+            "shared": false,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "series",
+            "name": null,
+            "show": false,
+            "values": [
+              "current"
+            ]
+          },
+          "yaxes": [
+            {
+              "format": "percent",
+              "label": "",
+              "logBase": 1,
+              "max": "100",
+              "min": "0",
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": true,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "fill": 1,
+          "gridPos": {
+            "h": 10,
+            "w": 10,
+            "x": 10,
+            "y": 3
+          },
+          "id": 5,
+          "legend": {
+            "alignAsTable": true,
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "rightSide": true,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": false,
+          "linewidth": 1,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "node_filesystem_avail_bytes{mountpoint=\"/\",device!=\"rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",device!=\"rootfs\"} * 100",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "{{ instance }}",
+              "refId": "A",
+              "step": 10
+            }
+          ],
+          "thresholds": [
+            {
+              "colorMode": "critical",
+              "fill": true,
+              "line": true,
+              "op": "gt",
+              "value": 80
+            }
+          ],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Root Filesystem % Used (rootfs)",
+          "tooltip": {
+            "shared": false,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "series",
+            "name": null,
+            "show": false,
+            "values": [
+              "current"
+            ]
+          },
+          "yaxes": [
+            {
+              "format": "percent",
+              "label": "",
+              "logBase": 1,
+              "max": "100",
+              "min": "0",
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "rgba(50, 172, 45, 0.97)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "format": "percentunit",
+          "gauge": {
+            "maxValue": 1,
+            "minValue": 0,
+            "show": true,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "gridPos": {
+            "h": 10,
+            "w": 4,
+            "x": 20,
+            "y": 3
+          },
+          "id": 18,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "(sum(node_filesystem_size_bytes{device!=\"rootfs\"}) - sum(node_filesystem_avail_bytes{device!=\"rootfs\"})) / sum(node_filesystem_size_bytes{device!=\"rootfs\"})",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "refId": "A",
+              "step": 60
+            }
+          ],
+          "thresholds": "0.8,0.9",
+          "title": "Cluster Disk Usage (non-rootfs)",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "avg"
+        },
+        {
+          "collapsed": false,
+          "gridPos": {
+            "h": 1,
+            "w": 24,
+            "x": 0,
+            "y": 13
+          },
+          "id": 20,
+          "panels": [],
+          "repeat": null,
+          "title": "Running Pods/Containers",
+          "type": "row"
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "fill": 0,
+          "gridPos": {
+            "h": 10,
+            "w": 12,
+            "x": 0,
+            "y": 14
+          },
+          "id": 8,
+          "legend": {
+            "alignAsTable": true,
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "rightSide": true,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "stack": false,
+          "steppedLine": true,
+          "targets": [
+            {
+              "expr": "container_last_seen{namespace!=\"\",container!=\"\"}",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "{{ instance }}",
+              "refId": "A",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "CRI-O Containers Running",
+          "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "fill": 0,
+          "gridPos": {
+            "h": 10,
+            "w": 12,
+            "x": 12,
+            "y": 14
+          },
+          "id": 10,
+          "legend": {
+            "alignAsTable": true,
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "rightSide": true,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "stack": false,
+          "steppedLine": true,
+          "targets": [
+            {
+              "expr": "kubelet_running_pod_count",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "{{ instance }}",
+              "refId": "A",
+              "step": 10
+            }
+          ],
+          "thresholds": [
+            {
+              "colorMode": "critical",
+              "fill": true,
+              "line": true,
+              "op": "gt",
+              "value": 35
+            }
+          ],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Pods Running",
+          "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "collapsed": false,
+          "gridPos": {
+            "h": 1,
+            "w": 24,
+            "x": 0,
+            "y": 24
+          },
+          "id": 22,
+          "panels": [],
+          "repeat": null,
+          "title": "Cluster CPU",
+          "type": "row"
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "fill": 1,
+          "gridPos": {
+            "h": 7,
+            "w": 20,
+            "x": 0,
+            "y": 25
+          },
+          "id": 14,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": false,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 1,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sort_desc(sum by (kubernetes_io_hostname,type) (rate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "",
+              "refId": "A",
+              "step": 4
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Cluster CPU",
+          "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "percent",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "#299c46",
+            "rgba(237, 129, 40, 0.89)",
+            "#d44a3a"
+          ],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "format": "percentunit",
+          "gauge": {
+            "maxValue": 1,
+            "minValue": 0,
+            "show": true,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "gridPos": {
+            "h": 7,
+            "w": 4,
+            "x": 20,
+            "y": 25
+          },
+          "id": 25,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "sum(rate(container_cpu_usage_seconds_total{id=\"/\"}[3m])) / sum(machine_cpu_cores)",
+              "format": "time_series",
+              "intervalFactor": 1,
+              "refId": "A"
+            }
+          ],
+          "thresholds": "0.7,0.9",
+          "title": "Cluster CPU Percentage",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "current"
+        },
+        {
+          "collapsed": false,
+          "gridPos": {
+            "h": 1,
+            "w": 24,
+            "x": 0,
+            "y": 32
+          },
+          "id": 23,
+          "panels": [],
+          "repeat": null,
+          "title": "Cluster Memory",
+          "type": "row"
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "fill": 1,
+          "gridPos": {
+            "h": 7,
+            "w": 20,
+            "x": 0,
+            "y": 33
+          },
+          "id": 15,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": false,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 1,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+           "spaceLength": 10,
+           "stack": false,
+           "steppedLine": true,
+           "targets": [
+             {
+               "expr": "((sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes) - sum(node_memory_Buffers_bytes) - sum(node_memory_Cached_bytes)) / sum(node_memory_MemTotal_bytes)) * 100",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "",
+              "refId": "A",
+              "step": 4
+            }
+          ],
+          "thresholds": [
+            {
+              "colorMode": "critical",
+              "fill": true,
+              "line": true,
+              "op": "gt",
+              "value": 90
+            }
+          ],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Cluster Memory",
+          "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "percent",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": true,
+          "colors": [
+            "rgba(50, 172, 45, 0.97)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "Prometheus-Cluster"
+          },
+          "decimals": null,
+          "format": "percentunit",
+          "gauge": {
+            "maxValue": 1,
+            "minValue": 0,
+            "show": true,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "gridPos": {
+            "h": 7,
+            "w": 4,
+            "x": 20,
+            "y": 33
+          },
+          "id": 16,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "Value",
+          "targets": [
+            {
+              "expr": "sum(container_memory_rss) / sum(machine_memory_bytes)",
+              "format": "time_series",
+              "hide": false,
+              "instant": false,
+              "intervalFactor": 1,
+              "legendFormat": "",
+              "refId": "B"
+            }
+          ],
+          "thresholds": "0.75, 0.9",
+          "title": "Cluster Memory Use Percentage",
+          "transparent": false,
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [],
+          "valueName": "current"
+        }
+      ],
+      "refresh": "5s",
+      "schemaVersion": 16,
+      "style": "dark",
+      "tags": [
+        "kubernetes",
+        "cri-o",
+        "openshift"
+      ],
+      "templating": {
+        "list": []
+      },
+      "time": {
+        "from": "now-1h",
+        "to": "now"
+      },
+      "timepicker": {
+        "refresh_intervals": [
+          "5s",
+          "10s",
+          "30s",
+          "1m",
+          "5m",
+          "15m",
+          "30m",
+          "1h",
+          "2h",
+          "1d"
+        ],
+        "time_options": [
+          "5m",
+          "15m",
+          "1h",
+          "6h",
+          "12h",
+          "24h",
+          "2d",
+          "7d",
+          "30d"
+        ]
+      },
+      "timezone": "browser",
+      "title": "OpenShift Metrics (CRI-O)",
+      "uid": "jmfLePkmz",
+      "version": 5
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08a-dashboard-cluster-overview.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08a-dashboard-cluster-overview.yaml
new file mode 100644
index 0000000..24e5ef7
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08a-dashboard-cluster-overview.yaml
@@ -0,0 +1,769 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: okd-cluster-overview
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+  json: |
+    {
+      "title": "Cluster Overview",
+      "uid": "okd-cluster-overview",
+      "schemaVersion": 36,
+      "version": 2,
+      "refresh": "30s",
+      "time": { "from": "now-1h", "to": "now" },
+      "tags": ["okd", "cluster", "overview"],
+      "panels": [
+
+        {
+          "id": 1,
+          "type": "stat",
+          "title": "Ready Nodes",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\"} == 1)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "red", "value": null },
+                  { "color": "green", "value": 1 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+        },
+
+        {
+          "id": 2,
+          "type": "stat",
+          "title": "Not Ready Nodes",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"false\"} == 1) or vector(0)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "red", "value": 1 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+        },
+
+        {
+          "id": 3,
+          "type": "stat",
+          "title": "Running Pods",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(kube_pod_status_phase{phase=\"Running\"} == 1)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "red", "value": null },
+                  { "color": "green", "value": 1 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+        },
+
+        {
+          "id": 4,
+          "type": "stat",
+          "title": "Pending Pods",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(kube_pod_status_phase{phase=\"Pending\"} == 1) or vector(0)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "yellow", "value": 1 },
+                  { "color": "red", "value": 5 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+        },
+
+        {
+          "id": 5,
+          "type": "stat",
+          "title": "Failed Pods",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(kube_pod_status_phase{phase=\"Failed\"} == 1) or vector(0)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "red", "value": 1 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+        },
+
+        {
+          "id": 6,
+          "type": "stat",
+          "title": "CrashLoopBackOff",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\"} == 1) or vector(0)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "red", "value": 1 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+        },
+
+        {
+          "id": 7,
+          "type": "stat",
+          "title": "Critical Alerts",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(ALERTS{alertstate=\"firing\",severity=\"critical\"}) or vector(0)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "red", "value": 1 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+        },
+
+        {
+          "id": 8,
+          "type": "stat",
+          "title": "Warning Alerts",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(ALERTS{alertstate=\"firing\",severity=\"warning\"}) or vector(0)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "yellow", "value": 1 },
+                  { "color": "red", "value": 10 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+        },
+
+        {
+          "id": 9,
+          "type": "gauge",
+          "title": "CPU Usage",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
+              "refId": "A",
+              "legendFormat": "CPU"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100,
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "yellow", "value": 70 },
+                  { "color": "red", "value": 85 }
+                ]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "showThresholdLabels": false,
+            "showThresholdMarkers": true,
+            "orientation": "auto"
+          },
+          "gridPos": { "h": 6, "w": 5, "x": 0, "y": 4 }
+        },
+
+        {
+          "id": 10,
+          "type": "gauge",
+          "title": "Memory Usage",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - (sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)))",
+              "refId": "A",
+              "legendFormat": "Memory"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100,
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "yellow", "value": 75 },
+                  { "color": "red", "value": 90 }
+                ]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "showThresholdLabels": false,
+            "showThresholdMarkers": true,
+            "orientation": "auto"
+          },
+          "gridPos": { "h": 6, "w": 5, "x": 5, "y": 4 }
+        },
+
+        {
+          "id": 11,
+          "type": "gauge",
+          "title": "Root Disk Usage",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - (sum(node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"})))",
+              "refId": "A",
+              "legendFormat": "Disk"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100,
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "yellow", "value": 70 },
+                  { "color": "red", "value": 85 }
+                ]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "showThresholdLabels": false,
+            "showThresholdMarkers": true,
+            "orientation": "auto"
+          },
+          "gridPos": { "h": 6, "w": 4, "x": 10, "y": 4 }
+        },
+
+        {
+          "id": 12,
+          "type": "stat",
+          "title": "etcd Has Leader",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "min(etcd_server_has_leader)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "red", "value": null },
+                  { "color": "green", "value": 1 }
+                ]
+              },
+              "mappings": [
+                {
+                  "type": "value",
+                  "options": {
+                    "0": { "text": "NO LEADER", "color": "red" },
+                    "1": { "text": "LEADER OK", "color": "green" }
+                  }
+                }
+              ],
+              "unit": "short",
+              "noValue": "?"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 3, "w": 5, "x": 14, "y": 4 }
+        },
+
+        {
+          "id": 13,
+          "type": "stat",
+          "title": "API Servers Up",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(up{job=\"apiserver\"})",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "red", "value": null },
+                  { "color": "yellow", "value": 1 },
+                  { "color": "green", "value": 2 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 3, "w": 5, "x": 19, "y": 4 }
+        },
+
+        {
+          "id": 14,
+          "type": "stat",
+          "title": "etcd Members Up",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(up{job=\"etcd\"})",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "red", "value": null },
+                  { "color": "yellow", "value": 2 },
+                  { "color": "green", "value": 3 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 3, "w": 5, "x": 14, "y": 7 }
+        },
+
+        {
+          "id": 15,
+          "type": "stat",
+          "title": "Operators Degraded",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(cluster_operator_conditions{condition=\"Degraded\",status=\"True\"} == 1) or vector(0)",
+              "refId": "A",
+              "legendFormat": ""
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "red", "value": 1 }
+                ]
+              },
+              "unit": "short",
+              "noValue": "0"
+            }
+          },
+          "options": {
+            "colorMode": "background",
+            "graphMode": "none",
+            "justifyMode": "center",
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 3, "w": 5, "x": 19, "y": 7 }
+        },
+
+        {
+          "id": 16,
+          "type": "timeseries",
+          "title": "CPU Usage per Node (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100,
+              "color": { "mode": "palette-classic" },
+              "custom": {
+                "lineWidth": 2,
+                "fillOpacity": 10,
+                "spanNulls": false,
+                "showPoints": "never"
+              }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": {
+              "displayMode": "list",
+              "placement": "bottom",
+              "calcs": ["mean", "max"]
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 10 }
+        },
+
+        {
+          "id": 17,
+          "type": "timeseries",
+          "title": "Memory Usage per Node (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100,
+              "color": { "mode": "palette-classic" },
+              "custom": {
+                "lineWidth": 2,
+                "fillOpacity": 10,
+                "spanNulls": false,
+                "showPoints": "never"
+              }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": {
+              "displayMode": "list",
+              "placement": "bottom",
+              "calcs": ["mean", "max"]
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 10 }
+        },
+
+        {
+          "id": 18,
+          "type": "timeseries",
+          "title": "Network Traffic — Cluster Total",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|tun.*|ovn.*|br-int|br-ex\"}[5m]))",
+              "refId": "A",
+              "legendFormat": "Receive"
+            },
+            {
+              "expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|tun.*|ovn.*|br-int|br-ex\"}[5m]))",
+              "refId": "B",
+              "legendFormat": "Transmit"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps",
+              "color": { "mode": "palette-classic" },
+              "custom": {
+                "lineWidth": 2,
+                "fillOpacity": 10,
+                "spanNulls": false,
+                "showPoints": "never"
+              }
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Receive" },
+                "properties": [{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Transmit" },
+                "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }]
+              }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "none" },
+            "legend": {
+              "displayMode": "list",
+              "placement": "bottom",
+              "calcs": ["mean", "max"]
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 18 }
+        },
+
+        {
+          "id": 19,
+          "type": "timeseries",
+          "title": "Pod Phases Over Time",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count(kube_pod_status_phase{phase=\"Running\"} == 1)",
+              "refId": "A",
+              "legendFormat": "Running"
+            },
+            {
+              "expr": "count(kube_pod_status_phase{phase=\"Pending\"} == 1) or vector(0)",
+              "refId": "B",
+              "legendFormat": "Pending"
+            },
+            {
+              "expr": "count(kube_pod_status_phase{phase=\"Failed\"} == 1) or vector(0)",
+              "refId": "C",
+              "legendFormat": "Failed"
+            },
+            {
+              "expr": "count(kube_pod_status_phase{phase=\"Unknown\"} == 1) or vector(0)",
+              "refId": "D",
+              "legendFormat": "Unknown"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short",
+              "custom": {
+                "lineWidth": 2,
+                "fillOpacity": 15,
+                "spanNulls": false,
+                "showPoints": "never"
+              }
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Running" },
+                "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Pending" },
+                "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Failed" },
+                "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Unknown" },
+                "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }]
+              }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "none" },
+            "legend": {
+              "displayMode": "list",
+              "placement": "bottom",
+              "calcs": ["lastNotNull"]
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 18 }
+        }
+
+      ]
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08b-dashboard-nodes-health.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08b-dashboard-nodes-health.yaml
new file mode 100644
index 0000000..a8cc179
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08b-dashboard-nodes-health.yaml
@@ -0,0 +1,637 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: okd-node-health
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+  json: |
+    {
+      "title": "Node Health",
+      "uid": "okd-node-health",
+      "schemaVersion": 36,
+      "version": 2,
+      "refresh": "30s",
+      "time": { "from": "now-1h", "to": "now" },
+      "tags": ["okd", "node", "health"],
+      "templating": {
+        "list": [
+          {
+            "name": "node",
+            "type": "query",
+            "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+            "query": { "query": "label_values(kube_node_info, node)", "refId": "A" },
+            "refresh": 2,
+            "includeAll": true,
+            "multi": true,
+            "allValue": ".*",
+            "label": "Node",
+            "sort": 1,
+            "current": {},
+            "options": []
+          }
+        ]
+      },
+      "panels": [
+
+        {
+          "id": 1,
+          "type": "stat",
+          "title": "Total Nodes",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_node_info{node=~\"$node\"})", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+        },
+
+        {
+          "id": 2,
+          "type": "stat",
+          "title": "Ready Nodes",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"$node\"} == 1)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+        },
+
+        {
+          "id": 3,
+          "type": "stat",
+          "title": "Not Ready Nodes",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"false\",node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+        },
+
+        {
+          "id": 4,
+          "type": "stat",
+          "title": "Memory Pressure",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\",node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+        },
+
+        {
+          "id": 5,
+          "type": "stat",
+          "title": "Disk Pressure",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_node_status_condition{condition=\"DiskPressure\",status=\"true\",node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+        },
+
+        {
+          "id": 6,
+          "type": "stat",
+          "title": "PID Pressure",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_node_status_condition{condition=\"PIDPressure\",status=\"true\",node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+        },
+
+        {
+          "id": 7,
+          "type": "stat",
+          "title": "Unschedulable",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_node_spec_unschedulable{node=~\"$node\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+        },
+
+        {
+          "id": 8,
+          "type": "stat",
+          "title": "Kubelet Up",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(up{job=\"kubelet\",metrics_path=\"/metrics\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+        },
+
+        {
+          "id": 9,
+          "type": "table",
+          "title": "Node Conditions",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(node) (kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"$node\"})",
+              "refId": "A",
+              "legendFormat": "{{node}}",
+              "instant": true
+            },
+            {
+              "expr": "sum by(node) (kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\",node=~\"$node\"})",
+              "refId": "B",
+              "legendFormat": "{{node}}",
+              "instant": true
+            },
+            {
+              "expr": "sum by(node) (kube_node_status_condition{condition=\"DiskPressure\",status=\"true\",node=~\"$node\"})",
+              "refId": "C",
+              "legendFormat": "{{node}}",
+              "instant": true
+            },
+            {
+              "expr": "sum by(node) (kube_node_status_condition{condition=\"PIDPressure\",status=\"true\",node=~\"$node\"})",
+              "refId": "D",
+              "legendFormat": "{{node}}",
+              "instant": true
+            },
+            {
+              "expr": "sum by(node) (kube_node_spec_unschedulable{node=~\"$node\"})",
+              "refId": "E",
+              "legendFormat": "{{node}}",
+              "instant": true
+            }
+          ],
+          "transformations": [
+            {
+              "id": "labelsToFields",
+              "options": { "mode": "columns" }
+            },
+            {
+              "id": "joinByField",
+              "options": { "byField": "node", "mode": "outer" }
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "Time": true,
+                  "Time 1": true,
+                  "Time 2": true,
+                  "Time 3": true,
+                  "Time 4": true,
+                  "Time 5": true
+                },
+                "renameByName": {
+                  "node": "Node",
+                  "Value #A": "Ready",
+                  "Value #B": "Mem Pressure",
+                  "Value #C": "Disk Pressure",
+                  "Value #D": "PID Pressure",
+                  "Value #E": "Unschedulable"
+                },
+                "indexByName": {
+                  "node": 0,
+                  "Value #A": 1,
+                  "Value #B": 2,
+                  "Value #C": 3,
+                  "Value #D": 4,
+                  "Value #E": 5
+                }
+              }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "custom": { "displayMode": "color-background", "align": "center" }
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Node" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "auto" },
+                  { "id": "custom.align", "value": "left" },
+                  { "id": "custom.width", "value": 200 }
+                ]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Ready" },
+                "properties": [
+                  {
+                    "id": "thresholds",
+                    "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
+                  },
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  {
+                    "id": "mappings",
+                    "value": [
+                      {
+                        "type": "value",
+                        "options": {
+                          "0": { "text": "✗ Not Ready", "color": "red", "index": 0 },
+                          "1": { "text": "✓ Ready",    "color": "green", "index": 1 }
+                        }
+                      }
+                    ]
+                  }
+                ]
+              },
+              {
+                "matcher": { "id": "byRegexp", "options": ".*Pressure" },
+                "properties": [
+                  {
+                    "id": "thresholds",
+                    "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
+                  },
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  {
+                    "id": "mappings",
+                    "value": [
+                      {
+                        "type": "value",
+                        "options": {
+                          "0": { "text": "✓ OK",      "color": "green", "index": 0 },
+                          "1": { "text": "⚠ Active",  "color": "red",   "index": 1 }
+                        }
+                      }
+                    ]
+                  }
+                ]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Unschedulable" },
+                "properties": [
+                  {
+                    "id": "thresholds",
+                    "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }] }
+                  },
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  {
+                    "id": "mappings",
+                    "value": [
+                      {
+                        "type": "value",
+                        "options": {
+                          "0": { "text": "✓ Schedulable", "color": "green",  "index": 0 },
+                          "1": { "text": "⚠ Cordoned",    "color": "yellow", "index": 1 }
+                        }
+                      }
+                    ]
+                  }
+                ]
+              }
+            ]
+          },
+          "options": { "sortBy": [{ "displayName": "Node", "desc": false }] },
+          "gridPos": { "h": 8, "w": 24, "x": 0, "y": 4 }
+        },
+
+        {
+          "id": 10,
+          "type": "timeseries",
+          "title": "CPU Usage per Node (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0, "max": 100,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 85 }] }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 16, "x": 0, "y": 12 }
+        },
+
+        {
+          "id": 11,
+          "type": "bargauge",
+          "title": "CPU Usage \u2014 Current",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0, "max": 100,
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 85 }] }
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "displayMode": "gradient",
+            "showUnfilled": true,
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 }
+        },
+
+        {
+          "id": 12,
+          "type": "timeseries",
+          "title": "Memory Usage per Node (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0, "max": 100,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 75 }, { "color": "red", "value": 90 }] }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 16, "x": 0, "y": 20 }
+        },
+
+        {
+          "id": 13,
+          "type": "bargauge",
+          "title": "Memory Usage \u2014 Current",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0, "max": 100,
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 75 }, { "color": "red", "value": 90 }] }
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "displayMode": "gradient",
+            "showUnfilled": true,
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 20 }
+        },
+
+        {
+          "id": 14,
+          "type": "timeseries",
+          "title": "Root Disk Usage per Node (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}))",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0, "max": 100,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 85 }] }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 16, "x": 0, "y": 28 }
+        },
+
+        {
+          "id": 15,
+          "type": "bargauge",
+          "title": "Root Disk Usage \u2014 Current",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}))",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0, "max": 100,
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 85 }] }
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "displayMode": "gradient",
+            "showUnfilled": true,
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 28 }
+        },
+
+        {
+          "id": 16,
+          "type": "timeseries",
+          "title": "Network Traffic per Node",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(instance) (rate(node_network_receive_bytes_total{device!~\"lo|veth.*|tun.*|ovn.*|br.*\"}[5m]))",
+              "refId": "A",
+              "legendFormat": "rx {{instance}}"
+            },
+            {
+              "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|tun.*|ovn.*|br.*\"}[5m]))",
+              "refId": "B",
+              "legendFormat": "tx {{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps",
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 36 }
+        },
+
+        {
+          "id": 17,
+          "type": "bargauge",
+          "title": "Pods per Node",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "count by(node) (kube_pod_info{node=~\"$node\"})",
+              "refId": "A",
+              "legendFormat": "{{node}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short",
+              "min": 0,
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "yellow", "value": 100 },
+                  { "color": "red", "value": 200 }
+                ]
+              }
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "displayMode": "gradient",
+            "showUnfilled": true,
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 36 }
+        },
+
+        {
+          "id": 18,
+          "type": "timeseries",
+          "title": "System Load Average (1m) per Node",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "node_load1",
+              "refId": "A",
+              "legendFormat": "1m \u2014 {{instance}}"
+            },
+            {
+              "expr": "node_load5",
+              "refId": "B",
+              "legendFormat": "5m \u2014 {{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short",
+              "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 44 }
+        },
+
+        {
+          "id": 19,
+          "type": "bargauge",
+          "title": "Node Uptime",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "time() - node_boot_time_seconds",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s",
+              "min": 0,
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "red", "value": null },
+                  { "color": "yellow", "value": 300 },
+                  { "color": "green", "value": 3600 }
+                ]
+              }
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "displayMode": "gradient",
+            "showUnfilled": false,
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 44 }
+        }
+
+      ]
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08c-dashboard-workloads-health.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08c-dashboard-workloads-health.yaml
new file mode 100644
index 0000000..871a292
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08c-dashboard-workloads-health.yaml
@@ -0,0 +1,783 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: okd-workload-health
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+  json: |
+    {
+      "title": "Workload Health",
+      "uid": "okd-workload-health",
+      "schemaVersion": 36,
+      "version": 3,
+      "refresh": "30s",
+      "time": { "from": "now-1h", "to": "now" },
+      "tags": ["okd", "workload", "health"],
+      "templating": {
+        "list": [
+          {
+            "name": "namespace",
+            "type": "query",
+            "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+            "query": { "query": "label_values(kube_pod_info, namespace)", "refId": "A" },
+            "refresh": 2,
+            "includeAll": true,
+            "multi": true,
+            "allValue": ".*",
+            "label": "Namespace",
+            "sort": 1,
+            "current": {},
+            "options": []
+          }
+        ]
+      },
+      "panels": [
+
+        {
+          "id": 1, "type": "stat", "title": "Total Pods",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_pod_info{namespace=~\"$namespace\"})", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+        },
+
+        {
+          "id": 2, "type": "stat", "title": "Running Pods",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_pod_status_phase{phase=\"Running\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+        },
+
+        {
+          "id": 3, "type": "stat", "title": "Pending Pods",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_pod_status_phase{phase=\"Pending\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+        },
+
+        {
+          "id": 4, "type": "stat", "title": "Failed Pods",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_pod_status_phase{phase=\"Failed\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+        },
+
+        {
+          "id": 5, "type": "stat", "title": "CrashLoopBackOff",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+        },
+
+        {
+          "id": 6, "type": "stat", "title": "OOMKilled",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_pod_container_status_last_terminated_reason{reason=\"OOMKilled\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+        },
+
+        {
+          "id": 7, "type": "stat", "title": "Deployments Available",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_deployment_status_condition{condition=\"Available\",status=\"true\",namespace=~\"$namespace\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+        },
+
+        {
+          "id": 8, "type": "stat", "title": "Deployments Degraded",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\"} > 0) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+        },
+
+        {
+          "id": 9, "type": "row", "title": "Deployments", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+        },
+
+        {
+          "id": 10,
+          "type": "table",
+          "title": "Deployment Status",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace,deployment)(kube_deployment_spec_replicas{namespace=~\"$namespace\"})",
+              "refId": "A",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,deployment)(kube_deployment_status_replicas_ready{namespace=~\"$namespace\"})",
+              "refId": "B",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,deployment)(kube_deployment_status_replicas_available{namespace=~\"$namespace\"})",
+              "refId": "C",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,deployment)(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\"})",
+              "refId": "D",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,deployment)(kube_deployment_status_replicas_updated{namespace=~\"$namespace\"})",
+              "refId": "E",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            }
+          ],
+          "transformations": [
+            {
+              "id": "filterFieldsByName",
+              "options": {
+                "include": {
+                  "names": ["namespace", "deployment", "Value"]
+                }
+              }
+            },
+            {
+              "id": "joinByField",
+              "options": {
+                "byField": "deployment",
+                "mode": "outer"
+              }
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "namespace 1": true,
+                  "namespace 2": true,
+                  "namespace 3": true,
+                  "namespace 4": true
+                },
+                "renameByName": {
+                  "namespace":  "Namespace",
+                  "deployment": "Deployment",
+                  "Value":      "Desired",
+                  "Value 1":    "Ready",
+                  "Value 2":    "Available",
+                  "Value 3":    "Unavailable",
+                  "Value 4":    "Up-to-date"
+                },
+                "indexByName": {
+                  "namespace":  0,
+                  "deployment": 1,
+                  "Value":      2,
+                  "Value 1":    3,
+                  "Value 2":    4,
+                  "Value 3":    5,
+                  "Value 4":    6
+                }
+              }
+            },
+            {
+              "id": "sortBy",
+              "options": {
+                "fields": [{ "displayName": "Namespace", "desc": false }]
+              }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Namespace" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 200 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Deployment" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 220 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Unavailable" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  {
+                    "id": "thresholds",
+                    "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
+                  }
+                ]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Ready" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  {
+                    "id": "thresholds",
+                    "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
+                  }
+                ]
+              }
+            ]
+          },
+          "options": { "sortBy": [{ "displayName": "Namespace", "desc": false }] },
+          "gridPos": { "h": 8, "w": 24, "x": 0, "y": 5 }
+        },
+
+        {
+          "id": 11, "type": "row", "title": "StatefulSets & DaemonSets", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 }
+        },
+
+        {
+          "id": 12,
+          "type": "table",
+          "title": "StatefulSet Status",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace,statefulset)(kube_statefulset_replicas{namespace=~\"$namespace\"})",
+              "refId": "A",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,statefulset)(kube_statefulset_status_replicas_ready{namespace=~\"$namespace\"})",
+              "refId": "B",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,statefulset)(kube_statefulset_status_replicas_current{namespace=~\"$namespace\"})",
+              "refId": "C",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,statefulset)(kube_statefulset_status_replicas_updated{namespace=~\"$namespace\"})",
+              "refId": "D",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            }
+          ],
+          "transformations": [
+            {
+              "id": "filterFieldsByName",
+              "options": {
+                "include": {
+                  "names": ["namespace", "statefulset", "Value"]
+                }
+              }
+            },
+            {
+              "id": "joinByField",
+              "options": {
+                "byField": "statefulset",
+                "mode": "outer"
+              }
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "namespace 1": true,
+                  "namespace 2": true,
+                  "namespace 3": true
+                },
+                "renameByName": {
+                  "namespace":   "Namespace",
+                  "statefulset": "StatefulSet",
+                  "Value":       "Desired",
+                  "Value 1":     "Ready",
+                  "Value 2":     "Current",
+                  "Value 3":     "Up-to-date"
+                },
+                "indexByName": {
+                  "namespace":   0,
+                  "statefulset": 1,
+                  "Value":       2,
+                  "Value 1":     3,
+                  "Value 2":     4,
+                  "Value 3":     5
+                }
+              }
+            },
+            {
+              "id": "sortBy",
+              "options": { "fields": [{ "displayName": "Namespace", "desc": false }] }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Namespace" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 180 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "StatefulSet" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 200 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Ready" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } }
+                ]
+              }
+            ]
+          },
+          "options": {},
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 }
+        },
+
+        {
+          "id": 13,
+          "type": "table",
+          "title": "DaemonSet Status",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace,daemonset)(kube_daemonset_status_desired_number_scheduled{namespace=~\"$namespace\"})",
+              "refId": "A",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,daemonset)(kube_daemonset_status_number_ready{namespace=~\"$namespace\"})",
+              "refId": "B",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,daemonset)(kube_daemonset_status_number_unavailable{namespace=~\"$namespace\"})",
+              "refId": "C",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,daemonset)(kube_daemonset_status_number_misscheduled{namespace=~\"$namespace\"})",
+              "refId": "D",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            }
+          ],
+          "transformations": [
+            {
+              "id": "filterFieldsByName",
+              "options": {
+                "include": {
+                  "names": ["namespace", "daemonset", "Value"]
+                }
+              }
+            },
+            {
+              "id": "joinByField",
+              "options": {
+                "byField": "daemonset",
+                "mode": "outer"
+              }
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "namespace 1": true,
+                  "namespace 2": true,
+                  "namespace 3": true
+                },
+                "renameByName": {
+                  "namespace":  "Namespace",
+                  "daemonset":  "DaemonSet",
+                  "Value":      "Desired",
+                  "Value 1":    "Ready",
+                  "Value 2":    "Unavailable",
+                  "Value 3":    "Misscheduled"
+                },
+                "indexByName": {
+                  "namespace":  0,
+                  "daemonset":  1,
+                  "Value":      2,
+                  "Value 1":    3,
+                  "Value 2":    4,
+                  "Value 3":    5
+                }
+              }
+            },
+            {
+              "id": "sortBy",
+              "options": { "fields": [{ "displayName": "Namespace", "desc": false }] }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Namespace" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 180 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "DaemonSet" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 200 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Ready" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } }
+                ]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Unavailable" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] } }
+                ]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Misscheduled" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] } }
+                ]
+              }
+            ]
+          },
+          "options": {},
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 }
+        },
+
+        {
+          "id": 14, "type": "row", "title": "Pods", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 }
+        },
+
+        {
+          "id": 15,
+          "type": "timeseries",
+          "title": "Pod Phase over Time",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(phase)(kube_pod_status_phase{namespace=~\"$namespace\"})",
+              "refId": "A", "legendFormat": "{{phase}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "Running" },   "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Pending" },   "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Failed" },    "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Succeeded" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Unknown" },   "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull"] }
+          },
+          "gridPos": { "h": 8, "w": 16, "x": 0, "y": 23 }
+        },
+
+        {
+          "id": 16,
+          "type": "piechart",
+          "title": "Pod Phase — Now",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(phase)(kube_pod_status_phase{namespace=~\"$namespace\"})",
+              "refId": "A", "instant": true, "legendFormat": "{{phase}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": { "unit": "short", "color": { "mode": "palette-classic" } },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "Running" },   "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Pending" },   "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Failed" },    "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Succeeded" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Unknown" },   "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "pieType": "donut",
+            "tooltip": { "mode": "single" },
+            "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 23 }
+        },
+
+        {
+          "id": 17,
+          "type": "timeseries",
+          "title": "Container Restarts over Time (total counter, top 10)",
+          "description": "Absolute restart counter — each vertical step = a restart event. Flat line = healthy.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "topk(10,\n  sum by(namespace, pod) (\n    kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}\n  ) > 0\n)",
+              "refId": "A",
+              "legendFormat": "{{namespace}} / {{pod}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "auto", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 31 }
+        },
+
+        {
+          "id": 18,
+          "type": "table",
+          "title": "Container Total Restarts (non-zero)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace, pod, container) (kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}) > 0",
+              "refId": "A",
+              "instant": true,
+              "format": "table",
+              "legendFormat": ""
+            }
+          ],
+          "transformations": [
+            {
+              "id": "filterFieldsByName",
+              "options": {
+                "include": { "names": ["namespace", "pod", "container", "Value"] }
+              }
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {},
+                "renameByName": {
+                  "namespace": "Namespace",
+                  "pod":       "Pod",
+                  "container": "Container",
+                  "Value":     "Total Restarts"
+                },
+                "indexByName": { "namespace": 0, "pod": 1, "container": 2, "Value": 3 }
+              }
+            },
+            {
+              "id": "sortBy",
+              "options": { "fields": [{ "displayName": "Total Restarts", "desc": true }] }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "Namespace" },  "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 160 }] },
+              { "matcher": { "id": "byName", "options": "Pod" },        "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 260 }] },
+              { "matcher": { "id": "byName", "options": "Container" },  "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 160 }] },
+              {
+                "matcher": { "id": "byName", "options": "Total Restarts" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "yellow", "value": null }, { "color": "orange", "value": 5 }, { "color": "red", "value": 20 }] } }
+                ]
+              }
+            ]
+          },
+          "options": {},
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 31 }
+        },
+
+        {
+          "id": 19, "type": "row", "title": "Resource Usage", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 39 }
+        },
+
+        {
+          "id": 20,
+          "type": "timeseries",
+          "title": "CPU Usage by Namespace",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace)(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"}[5m]))",
+              "refId": "A", "legendFormat": "{{namespace}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "cores", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 40 }
+        },
+
+        {
+          "id": 21,
+          "type": "timeseries",
+          "title": "Memory Usage by Namespace",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace)(container_memory_working_set_bytes{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"})",
+              "refId": "A", "legendFormat": "{{namespace}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "bytes", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 40 }
+        },
+
+        {
+          "id": 22,
+          "type": "bargauge",
+          "title": "CPU — Actual vs Requested (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace)(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"}[5m]))\n/\nsum by(namespace)(kube_pod_container_resource_requests{resource=\"cpu\",namespace=~\"$namespace\",container!=\"\"})\n* 100",
+              "refId": "A", "legendFormat": "{{namespace}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0, "max": 150,
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 80 }, { "color": "red", "value": 100 }] }
+            }
+          },
+          "options": {
+            "orientation": "horizontal", "displayMode": "gradient", "showUnfilled": true,
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 48 }
+        },
+
+        {
+          "id": 23,
+          "type": "bargauge",
+          "title": "Memory — Actual vs Requested (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace)(container_memory_working_set_bytes{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"})\n/\nsum by(namespace)(kube_pod_container_resource_requests{resource=\"memory\",namespace=~\"$namespace\",container!=\"\"})\n* 100",
+              "refId": "A", "legendFormat": "{{namespace}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0, "max": 150,
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 80 }, { "color": "red", "value": 100 }] }
+            }
+          },
+          "options": {
+            "orientation": "horizontal", "displayMode": "gradient", "showUnfilled": true,
+            "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 48 }
+        }
+
+      ]
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08d-dashboard-networking.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08d-dashboard-networking.yaml
new file mode 100644
index 0000000..66f8902
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08d-dashboard-networking.yaml
@@ -0,0 +1,955 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: okd-networking
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+  json: |
+    {
+      "title": "Networking",
+      "uid": "okd-networking",
+      "schemaVersion": 36,
+      "version": 1,
+      "refresh": "30s",
+      "time": { "from": "now-1h", "to": "now" },
+      "tags": ["okd", "networking"],
+      "templating": {
+        "list": [
+          {
+            "name": "namespace",
+            "type": "query",
+            "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+            "query": { "query": "label_values(kube_pod_info, namespace)", "refId": "A" },
+            "refresh": 2,
+            "includeAll": true,
+            "multi": true,
+            "allValue": ".*",
+            "label": "Namespace",
+            "sort": 1,
+            "current": {},
+            "options": []
+          }
+        ]
+      },
+      "panels": [
+
+        {
+          "id": 1, "type": "stat", "title": "Network RX Rate",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+              "unit": "Bps", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+        },
+
+        {
+          "id": 2, "type": "stat", "title": "Network TX Rate",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+              "unit": "Bps", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+        },
+
+        {
+          "id": 3, "type": "stat", "title": "RX Errors/s",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(rate(container_network_receive_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+              "unit": "pps", "noValue": "0", "decimals": 2
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+        },
+
+        {
+          "id": 4, "type": "stat", "title": "TX Errors/s",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(rate(container_network_transmit_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+              "unit": "pps", "noValue": "0", "decimals": 2
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+        },
+
+        {
+          "id": 5, "type": "stat", "title": "RX Drops/s",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(rate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] },
+              "unit": "pps", "noValue": "0", "decimals": 2
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+        },
+
+        {
+          "id": 6, "type": "stat", "title": "TX Drops/s",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(rate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] },
+              "unit": "pps", "noValue": "0", "decimals": 2
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+        },
+
+        {
+          "id": 7, "type": "stat", "title": "DNS Queries/s",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(rate(coredns_dns_requests_total[5m]))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+              "unit": "reqps", "noValue": "0", "decimals": 1
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+        },
+
+        {
+          "id": 8, "type": "stat", "title": "DNS Error %",
+          "description": "Percentage of DNS responses with non-NOERROR rcode over the last 5 minutes.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(rate(coredns_dns_responses_total{rcode!=\"NOERROR\"}[5m])) / sum(rate(coredns_dns_responses_total[5m])) * 100",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "red",    "value": 5 }
+              ]},
+              "unit": "percent", "noValue": "0", "decimals": 2
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+        },
+
+        {
+          "id": 9, "type": "row", "title": "Network I/O", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+        },
+
+        {
+          "id": 10, "type": "timeseries", "title": "Receive Rate by Namespace",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(namespace)(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": "{{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 }
+        },
+
+        {
+          "id": 11, "type": "timeseries", "title": "Transmit Rate by Namespace",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(namespace)(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": "{{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 }
+        },
+
+        {
+          "id": 12, "type": "row", "title": "Top Pod Consumers", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 }
+        },
+
+        {
+          "id": 13, "type": "timeseries", "title": "Top 10 Pods — RX Rate",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "topk(10, sum by(namespace,pod)(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m])))",
+            "refId": "A", "legendFormat": "{{namespace}} / {{pod}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "auto", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 }
+        },
+
+        {
+          "id": 14, "type": "timeseries", "title": "Top 10 Pods — TX Rate",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "topk(10, sum by(namespace,pod)(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m])))",
+            "refId": "A", "legendFormat": "{{namespace}} / {{pod}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "auto", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 }
+        },
+
+        {
+          "id": 15,
+          "type": "table",
+          "title": "Pod Network I/O Summary",
+          "description": "Current RX/TX rates, errors and drops per pod. Sorted by RX rate descending.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace,pod)(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+              "refId": "A", "instant": true, "format": "table", "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,pod)(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+              "refId": "B", "instant": true, "format": "table", "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,pod)(rate(container_network_receive_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+              "refId": "C", "instant": true, "format": "table", "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,pod)(rate(container_network_transmit_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+              "refId": "D", "instant": true, "format": "table", "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,pod)(rate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+              "refId": "E", "instant": true, "format": "table", "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,pod)(rate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+              "refId": "F", "instant": true, "format": "table", "legendFormat": ""
+            }
+          ],
+          "transformations": [
+            {
+              "id": "filterFieldsByName",
+              "options": { "include": { "names": ["namespace", "pod", "Value"] } }
+            },
+            {
+              "id": "joinByField",
+              "options": { "byField": "pod", "mode": "outer" }
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "namespace 1": true,
+                  "namespace 2": true,
+                  "namespace 3": true,
+                  "namespace 4": true,
+                  "namespace 5": true
+                },
+                "renameByName": {
+                  "namespace": "Namespace",
+                  "pod":       "Pod",
+                  "Value":     "RX Rate",
+                  "Value 1":   "TX Rate",
+                  "Value 2":   "RX Errors/s",
+                  "Value 3":   "TX Errors/s",
+                  "Value 4":   "RX Drops/s",
+                  "Value 5":   "TX Drops/s"
+                },
+                "indexByName": {
+                  "namespace": 0,
+                  "pod":       1,
+                  "Value":     2,
+                  "Value 1":   3,
+                  "Value 2":   4,
+                  "Value 3":   5,
+                  "Value 4":   6,
+                  "Value 5":   7
+                }
+              }
+            },
+            {
+              "id": "sortBy",
+              "options": { "fields": [{ "displayName": "RX Rate", "desc": true }] }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Namespace" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 160 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Pod" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 260 }]
+              },
+              {
+                "matcher": { "id": "byRegexp", "options": "^RX Rate$|^TX Rate$" },
+                "properties": [
+                  { "id": "unit", "value": "Bps" },
+                  { "id": "custom.displayMode", "value": "color-background-solid" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                    { "color": "green", "value": null },
+                    { "color": "yellow", "value": 10000000 },
+                    { "color": "orange", "value": 100000000 },
+                    { "color": "red",    "value": 500000000 }
+                  ]}}
+                ]
+              },
+              {
+                "matcher": { "id": "byRegexp", "options": "^RX Errors/s$|^TX Errors/s$" },
+                "properties": [
+                  { "id": "unit", "value": "pps" },
+                  { "id": "decimals", "value": 3 },
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                    { "color": "green", "value": null },
+                    { "color": "red",   "value": 0.001 }
+                  ]}}
+                ]
+              },
+              {
+                "matcher": { "id": "byRegexp", "options": "^RX Drops/s$|^TX Drops/s$" },
+                "properties": [
+                  { "id": "unit", "value": "pps" },
+                  { "id": "decimals", "value": 3 },
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                    { "color": "green",  "value": null },
+                    { "color": "orange", "value": 0.001 }
+                  ]}}
+                ]
+              }
+            ]
+          },
+          "options": {},
+          "gridPos": { "h": 8, "w": 24, "x": 0, "y": 22 }
+        },
+
+        {
+          "id": 16, "type": "row", "title": "Errors & Packet Loss", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 }
+        },
+
+        {
+          "id": 17, "type": "timeseries", "title": "RX Errors by Namespace",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(namespace)(rate(container_network_receive_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": "{{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "pps", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 12, "x": 0, "y": 31 }
+        },
+
+        {
+          "id": 18, "type": "timeseries", "title": "TX Errors by Namespace",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(namespace)(rate(container_network_transmit_errors_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": "{{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "pps", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 12, "x": 12, "y": 31 }
+        },
+
+        {
+          "id": 19, "type": "timeseries", "title": "RX Packet Drops by Namespace",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(namespace)(rate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": "{{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "pps", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 12, "x": 0, "y": 38 }
+        },
+
+        {
+          "id": 20, "type": "timeseries", "title": "TX Packet Drops by Namespace",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(namespace)(rate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod!=\"\"}[5m]))",
+            "refId": "A", "legendFormat": "{{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "pps", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 12, "x": 12, "y": 38 }
+        },
+
+        {
+          "id": 21, "type": "row", "title": "DNS (CoreDNS)", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 }
+        },
+
+        {
+          "id": 22, "type": "timeseries", "title": "DNS Request Rate by Query Type",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(type)(rate(coredns_dns_requests_total[5m]))",
+            "refId": "A", "legendFormat": "{{type}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 0, "y": 46 }
+        },
+
+        {
+          "id": 23, "type": "timeseries", "title": "DNS Response Rate by Rcode",
+          "description": "NOERROR = healthy. NXDOMAIN = name not found. SERVFAIL = upstream error.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(rcode)(rate(coredns_dns_responses_total[5m]))",
+            "refId": "A", "legendFormat": "{{rcode}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "NOERROR"  }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "NXDOMAIN" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "SERVFAIL" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "REFUSED"  }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 8, "y": 46 }
+        },
+
+        {
+          "id": 24, "type": "timeseries", "title": "DNS Request Latency (p50 / p95 / p99)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket[5m])) by (le))",
+              "refId": "A", "legendFormat": "p50"
+            },
+            {
+              "expr": "histogram_quantile(0.95, sum(rate(coredns_dns_request_duration_seconds_bucket[5m])) by (le))",
+              "refId": "B", "legendFormat": "p95"
+            },
+            {
+              "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket[5m])) by (le))",
+              "refId": "C", "legendFormat": "p99"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 46 }
+        },
+
+        {
+          "id": 25, "type": "timeseries", "title": "DNS Cache Hit Ratio (%)",
+          "description": "High hit ratio = CoreDNS is serving responses from cache, reducing upstream load.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(rate(coredns_cache_hits_total[5m])) / (sum(rate(coredns_cache_hits_total[5m])) + sum(rate(coredns_cache_misses_total[5m]))) * 100",
+            "refId": "A", "legendFormat": "Cache Hit %"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0, "max": 100,
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "red",    "value": null },
+                { "color": "yellow", "value": 50 },
+                { "color": "green",  "value": 80 }
+              ]},
+              "custom": { "lineWidth": 2, "fillOpacity": 20, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "single" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "lastNotNull"] }
+          },
+          "gridPos": { "h": 7, "w": 12, "x": 0, "y": 54 }
+        },
+
+        {
+          "id": 26, "type": "timeseries", "title": "DNS Forward Request Rate",
+          "description": "Queries CoreDNS is forwarding upstream. Spike here with cache miss spike = upstream DNS pressure.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(rate(coredns_forward_requests_total[5m]))",
+              "refId": "A", "legendFormat": "Forward Requests/s"
+            },
+            {
+              "expr": "sum(rate(coredns_forward_responses_duration_seconds_count[5m]))",
+              "refId": "B", "legendFormat": "Forward Responses/s"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 12, "x": 12, "y": 54 }
+        },
+
+        {
+          "id": 27, "type": "row", "title": "Services & Endpoints", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 61 }
+        },
+
+        {
+          "id": 28, "type": "stat", "title": "Total Services",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "count(kube_service_info{namespace=~\"$namespace\"})",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 8, "x": 0, "y": 62 }
+        },
+
+        {
+          "id": 29, "type": "stat", "title": "Endpoint Addresses Available",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(kube_endpoint_address_available{namespace=~\"$namespace\"})",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 8, "x": 8, "y": 62 }
+        },
+
+        {
+          "id": 30, "type": "stat", "title": "Endpoint Addresses Not Ready",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum(kube_endpoint_address_not_ready{namespace=~\"$namespace\"}) or vector(0)",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] },
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 8, "x": 16, "y": 62 }
+        },
+
+        {
+          "id": 31,
+          "type": "table",
+          "title": "Endpoint Availability",
+          "description": "Per-endpoint available vs not-ready address counts. Red Not Ready = pods backing this service are unhealthy.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(namespace,endpoint)(kube_endpoint_address_available{namespace=~\"$namespace\"})",
+              "refId": "A", "instant": true, "format": "table", "legendFormat": ""
+            },
+            {
+              "expr": "sum by(namespace,endpoint)(kube_endpoint_address_not_ready{namespace=~\"$namespace\"})",
+              "refId": "B", "instant": true, "format": "table", "legendFormat": ""
+            }
+          ],
+          "transformations": [
+            {
+              "id": "filterFieldsByName",
+              "options": { "include": { "names": ["namespace", "endpoint", "Value"] } }
+            },
+            {
+              "id": "joinByField",
+              "options": { "byField": "endpoint", "mode": "outer" }
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": { "namespace 1": true },
+                "renameByName": {
+                  "namespace": "Namespace",
+                  "endpoint":  "Endpoint",
+                  "Value":     "Available",
+                  "Value 1":   "Not Ready"
+                },
+                "indexByName": {
+                  "namespace": 0,
+                  "endpoint":  1,
+                  "Value":     2,
+                  "Value 1":   3
+                }
+              }
+            },
+            {
+              "id": "sortBy",
+              "options": { "fields": [{ "displayName": "Not Ready", "desc": true }] }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Namespace" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 180 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Endpoint" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 220 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Available" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } }
+                ]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Not Ready" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] } }
+                ]
+              }
+            ]
+          },
+          "options": {},
+          "gridPos": { "h": 8, "w": 24, "x": 0, "y": 66 }
+        },
+
+        {
+          "id": 32, "type": "row", "title": "OKD Router / Ingress (HAProxy)", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 74 }
+        },
+
+        {
+          "id": 33, "type": "timeseries", "title": "Router HTTP Request Rate by Code",
+          "description": "Requires HAProxy router metrics to be scraped (port 1936). OKD exposes these via the openshift-ingress ServiceMonitor.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by(code)(rate(haproxy_backend_http_responses_total[5m]))",
+              "refId": "A", "legendFormat": "HTTP {{code}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "HTTP 2xx" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "HTTP 4xx" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "HTTP 5xx" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 75 }
+        },
+
+        {
+          "id": 34, "type": "timeseries", "title": "Router 4xx + 5xx Error Rate (%)",
+          "description": "Client error (4xx) and server error (5xx) rates as a percentage of all requests.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(rate(haproxy_backend_http_responses_total{code=\"4xx\"}[5m])) / sum(rate(haproxy_backend_http_responses_total[5m])) * 100",
+              "refId": "A", "legendFormat": "4xx %"
+            },
+            {
+              "expr": "sum(rate(haproxy_backend_http_responses_total{code=\"5xx\"}[5m])) / sum(rate(haproxy_backend_http_responses_total[5m])) * 100",
+              "refId": "B", "legendFormat": "5xx %"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "red",    "value": 5 }
+              ]}
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "4xx %" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "5xx %" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 75 }
+        },
+
+        {
+          "id": 35, "type": "timeseries", "title": "Router Bytes In / Out",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(rate(haproxy_frontend_bytes_in_total[5m]))",
+              "refId": "A", "legendFormat": "Bytes In"
+            },
+            {
+              "expr": "sum(rate(haproxy_frontend_bytes_out_total[5m]))",
+              "refId": "B", "legendFormat": "Bytes Out"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "Bytes In"  }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Bytes Out" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 83 }
+        },
+
+        {
+          "id": 36,
+          "type": "table",
+          "title": "Router Backend Server Status",
+          "description": "HAProxy backend servers (routes). Value 0 = DOWN, 1 = UP.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "haproxy_server_up",
+              "refId": "A", "instant": true, "format": "table", "legendFormat": ""
+            }
+          ],
+          "transformations": [
+            {
+              "id": "filterFieldsByName",
+              "options": { "include": { "names": ["proxy", "server", "Value"] } }
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {},
+                "renameByName": {
+                  "proxy":  "Backend",
+                  "server": "Server",
+                  "Value":  "Status"
+                },
+                "indexByName": { "proxy": 0, "server": 1, "Value": 2 }
+              }
+            },
+            {
+              "id": "sortBy",
+              "options": { "fields": [{ "displayName": "Status", "desc": false }] }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": { "custom": { "align": "center", "displayMode": "auto" } },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Backend" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 260 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Server" },
+                "properties": [{ "id": "custom.align", "value": "left" }, { "id": "custom.width", "value": 180 }]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Status" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "mappings", "value": [
+                    { "type": "value", "options": { "0": { "text": "DOWN", "color": "red"   } } },
+                    { "type": "value", "options": { "1": { "text": "UP",   "color": "green" } } }
+                  ]},
+                  { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                    { "color": "red",   "value": null },
+                    { "color": "green", "value": 1 }
+                  ]}}
+                ]
+              }
+            ]
+          },
+          "options": {},
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 83 }
+        }
+
+      ]
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08e-dashboard-storage.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08e-dashboard-storage.yaml
new file mode 100644
index 0000000..5ae552f
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08e-dashboard-storage.yaml
@@ -0,0 +1,607 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: storage-health
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+
+  json: |
+    {
+      "title": "Storage Health",
+      "uid": "storage-health",
+      "schemaVersion": 36,
+      "version": 1,
+      "refresh": "30s",
+      "time": { "from": "now-1h", "to": "now" },
+      "panels": [
+
+        {
+          "type": "row",
+          "id": 1,
+          "title": "PVC / PV Status",
+          "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }
+        },
+
+        {
+          "type": "stat",
+          "id": 2,
+          "title": "Bound PVCs",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Bound\"}) or vector(0)",
+              "refId": "A"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [{ "color": "green", "value": null }]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "colorMode": "background",
+            "graphMode": "none",
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 5, "w": 4, "x": 0, "y": 1 }
+        },
+
+        {
+          "type": "stat",
+          "id": 3,
+          "title": "Pending PVCs",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Pending\"}) or vector(0)",
+              "refId": "A"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green",  "value": null },
+                  { "color": "yellow", "value": 1 }
+                ]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "colorMode": "background",
+            "graphMode": "none",
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 5, "w": 4, "x": 4, "y": 1 }
+        },
+
+        {
+          "type": "stat",
+          "id": 4,
+          "title": "Lost PVCs",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Lost\"}) or vector(0)",
+              "refId": "A"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green", "value": null },
+                  { "color": "red",   "value": 1 }
+                ]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "colorMode": "background",
+            "graphMode": "none",
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 5, "w": 4, "x": 8, "y": 1 }
+        },
+
+        {
+          "type": "stat",
+          "id": 5,
+          "title": "Bound PVs / Available PVs",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(kube_persistentvolume_status_phase{phase=\"Bound\"}) or vector(0)",
+              "refId": "A",
+              "legendFormat": "Bound"
+            },
+            {
+              "expr": "sum(kube_persistentvolume_status_phase{phase=\"Available\"}) or vector(0)",
+              "refId": "B",
+              "legendFormat": "Available"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [{ "color": "blue", "value": null }]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "colorMode": "background",
+            "graphMode": "none",
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 5, "w": 4, "x": 12, "y": 1 }
+        },
+
+        {
+          "type": "stat",
+          "id": 6,
+          "title": "Ceph Cluster Health",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "ceph_health_status",
+              "refId": "A"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green",  "value": null },
+                  { "color": "yellow", "value": 1 },
+                  { "color": "red",    "value": 2 }
+                ]
+              },
+              "mappings": [
+                {
+                  "type": "value",
+                  "options": {
+                    "0": { "text": "HEALTH_OK",   "index": 0 },
+                    "1": { "text": "HEALTH_WARN", "index": 1 },
+                    "2": { "text": "HEALTH_ERR",  "index": 2 }
+                  }
+                }
+              ]
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "colorMode": "background",
+            "graphMode": "none",
+            "textMode": "value"
+          },
+          "gridPos": { "h": 5, "w": 4, "x": 16, "y": 1 }
+        },
+
+        {
+          "type": "stat",
+          "id": 7,
+          "title": "OSDs Up / Total",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(ceph_osd_up) or vector(0)",
+              "refId": "A",
+              "legendFormat": "Up"
+            },
+            {
+              "expr": "count(ceph_osd_metadata) or vector(0)",
+              "refId": "B",
+              "legendFormat": "Total"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [{ "color": "green", "value": null }]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "colorMode": "background",
+            "graphMode": "none",
+            "textMode": "auto"
+          },
+          "gridPos": { "h": 5, "w": 4, "x": 20, "y": 1 }
+        },
+
+        {
+          "type": "row",
+          "id": 8,
+          "title": "Cluster Capacity",
+          "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 6 }
+        },
+
+        {
+          "type": "gauge",
+          "id": 9,
+          "title": "Ceph Cluster Used (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * (ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes) / ceph_cluster_total_bytes",
+              "refId": "A"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100,
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green",  "value": null },
+                  { "color": "yellow", "value": 70 },
+                  { "color": "red",    "value": 85 }
+                ]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "showThresholdLabels": true,
+            "showThresholdMarkers": true
+          },
+          "gridPos": { "h": 8, "w": 5, "x": 0, "y": 7 }
+        },
+
+        {
+          "type": "stat",
+          "id": 10,
+          "title": "Ceph Capacity — Total / Available",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "ceph_cluster_total_bytes",
+              "refId": "A",
+              "legendFormat": "Total"
+            },
+            {
+              "expr": "ceph_cluster_total_bytes - (ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes)",
+              "refId": "B",
+              "legendFormat": "Available"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "bytes",
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [{ "color": "blue", "value": null }]
+              }
+            }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "colorMode": "value",
+            "graphMode": "none",
+            "textMode": "auto",
+            "orientation": "vertical"
+          },
+          "gridPos": { "h": 8, "w": 4, "x": 5, "y": 7 }
+        },
+
+        {
+          "type": "bargauge",
+          "id": 11,
+          "title": "PV Allocated Capacity by Storage Class (Bound)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum by (storageclass) (\n  kube_persistentvolume_capacity_bytes\n  * on(persistentvolume) group_left(storageclass)\n  kube_persistentvolume_status_phase{phase=\"Bound\"}\n)",
+              "refId": "A",
+              "legendFormat": "{{storageclass}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "bytes",
+              "color": { "mode": "palette-classic" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [{ "color": "blue", "value": null }]
+              }
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "displayMode": "gradient",
+            "showUnfilled": true
+          },
+          "gridPos": { "h": 8, "w": 7, "x": 9, "y": 7 }
+        },
+
+        {
+          "type": "piechart",
+          "id": 12,
+          "title": "PVC Phase Distribution",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Bound\"}) or vector(0)",
+              "refId": "A",
+              "legendFormat": "Bound"
+            },
+            {
+              "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Pending\"}) or vector(0)",
+              "refId": "B",
+              "legendFormat": "Pending"
+            },
+            {
+              "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Lost\"}) or vector(0)",
+              "refId": "C",
+              "legendFormat": "Lost"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": { "color": { "mode": "palette-classic" } }
+          },
+          "options": {
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "pieType": "pie",
+            "legend": {
+              "displayMode": "table",
+              "placement": "right",
+              "values": ["value", "percent"]
+            }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 7 }
+        },
+
+        {
+          "type": "row",
+          "id": 13,
+          "title": "Ceph Performance",
+          "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 15 }
+        },
+
+        {
+          "type": "timeseries",
+          "id": 14,
+          "title": "Ceph Pool IOPS (Read / Write)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "rate(ceph_pool_rd[5m])",
+              "refId": "A",
+              "legendFormat": "Read — pool {{pool_id}}"
+            },
+            {
+              "expr": "rate(ceph_pool_wr[5m])",
+              "refId": "B",
+              "legendFormat": "Write — pool {{pool_id}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "ops",
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 8 }
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }
+        },
+
+        {
+          "type": "timeseries",
+          "id": 15,
+          "title": "Ceph Pool Throughput (Read / Write)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "rate(ceph_pool_rd_bytes[5m])",
+              "refId": "A",
+              "legendFormat": "Read — pool {{pool_id}}"
+            },
+            {
+              "expr": "rate(ceph_pool_wr_bytes[5m])",
+              "refId": "B",
+              "legendFormat": "Write — pool {{pool_id}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps",
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 8 }
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }
+        },
+
+        {
+          "type": "row",
+          "id": 16,
+          "title": "Ceph OSD & Pool Details",
+          "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 24 }
+        },
+
+        {
+          "type": "timeseries",
+          "id": 17,
+          "title": "Ceph Pool Space Used (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 * ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail)",
+              "refId": "A",
+              "legendFormat": "Pool {{pool_id}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100,
+              "color": { "mode": "palette-classic" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green",  "value": null },
+                  { "color": "yellow", "value": 70 },
+                  { "color": "red",    "value": 85 }
+                ]
+              },
+              "custom": { "lineWidth": 2, "fillOpacity": 10 }
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 25 }
+        },
+
+        {
+          "type": "bargauge",
+          "id": 18,
+          "title": "OSD Status per Daemon (green = Up, red = Down)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "ceph_osd_up",
+              "refId": "A",
+              "legendFormat": "{{ceph_daemon}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "min": 0,
+              "max": 1,
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "red",   "value": null },
+                  { "color": "green", "value": 1 }
+                ]
+              },
+              "mappings": [
+                {
+                  "type": "value",
+                  "options": {
+                    "0": { "text": "DOWN", "index": 0 },
+                    "1": { "text": "UP",   "index": 1 }
+                  }
+                }
+              ]
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "displayMode": "basic",
+            "showUnfilled": true
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 25 }
+        },
+
+        {
+          "type": "row",
+          "id": 19,
+          "title": "Node Disk Usage",
+          "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 33 }
+        },
+
+        {
+          "type": "timeseries",
+          "id": 20,
+          "title": "Node Root Disk Usage Over Time (%)",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} * 100)",
+              "refId": "A",
+              "legendFormat": "{{instance}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100,
+              "color": { "mode": "palette-classic" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green",  "value": null },
+                  { "color": "yellow", "value": 70 },
+                  { "color": "red",    "value": 85 }
+                ]
+              },
+              "custom": { "lineWidth": 2, "fillOpacity": 10 }
+            }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 34 }
+        },
+
+        {
+          "type": "bargauge",
+          "id": 21,
+          "title": "Current Disk Usage — All Nodes & Mountpoints",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "100 - (node_filesystem_avail_bytes{fstype!~\"tmpfs|overlay|squashfs\"} / node_filesystem_size_bytes{fstype!~\"tmpfs|overlay|squashfs\"} * 100)",
+              "refId": "A",
+              "legendFormat": "{{instance}} — {{mountpoint}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent",
+              "min": 0,
+              "max": 100,
+              "color": { "mode": "thresholds" },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  { "color": "green",  "value": null },
+                  { "color": "yellow", "value": 70 },
+                  { "color": "red",    "value": 85 }
+                ]
+              }
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "displayMode": "gradient",
+            "showUnfilled": true
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 34 }
+        }
+
+      ]
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08f-dashboard-etcd.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08f-dashboard-etcd.yaml
new file mode 100644
index 0000000..6325e47
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08f-dashboard-etcd.yaml
@@ -0,0 +1,744 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: okd-etcd
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+  json: |
+    {
+      "title": "etcd",
+      "uid": "okd-etcd",
+      "schemaVersion": 36,
+      "version": 1,
+      "refresh": "30s",
+      "time": { "from": "now-1h", "to": "now" },
+      "tags": ["okd", "etcd"],
+      "templating": {
+        "list": [
+          {
+            "name": "instance",
+            "type": "query",
+            "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+            "query": { "query": "label_values(etcd_server_has_leader, instance)", "refId": "A" },
+            "refresh": 2,
+            "includeAll": true,
+            "multi": true,
+            "allValue": ".*",
+            "label": "Instance",
+            "sort": 1,
+            "current": {},
+            "options": []
+          }
+        ]
+      },
+      "panels": [
+
+        {
+          "id": 1, "type": "stat", "title": "Cluster Members",
+          "description": "Total number of etcd members currently reporting metrics.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(etcd_server_has_leader)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "red",    "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "green",  "value": 3 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+        },
+
+        {
+          "id": 2, "type": "stat", "title": "Has Leader",
+          "description": "min() across all members. 0 = at least one member has no quorum — cluster is degraded.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "min(etcd_server_has_leader)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "red",   "value": null },
+                { "color": "green", "value": 1 }
+              ]},
+              "unit": "short", "noValue": "0",
+              "mappings": [
+                { "type": "value", "options": {
+                    "0": { "text": "NO LEADER", "color": "red"   },
+                    "1": { "text": "OK",        "color": "green" }
+                }}
+              ]
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+        },
+
+        {
+          "id": 3, "type": "stat", "title": "Leader Changes (1h)",
+          "description": "Number of leader elections in the last hour. ≥3 indicates cluster instability.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "sum(changes(etcd_server_leader_changes_seen_total[1h]))", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "red",    "value": 3 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+        },
+
+        {
+          "id": 4, "type": "stat", "title": "DB Size (Max)",
+          "description": "Largest boltdb file size across all members. Default etcd quota is 8 GiB.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "max(etcd_mvcc_db_total_size_in_bytes)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null        },
+                { "color": "yellow", "value": 2147483648  },
+                { "color": "orange", "value": 5368709120  },
+                { "color": "red",    "value": 7516192768  }
+              ]},
+              "unit": "bytes", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+        },
+
+        {
+          "id": 5, "type": "stat", "title": "DB Fragmentation (Max)",
+          "description": "% of DB space that is allocated but unused. >50% → run etcdctl defrag.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "max((etcd_mvcc_db_total_size_in_bytes - etcd_mvcc_db_total_size_in_use_in_bytes) / etcd_mvcc_db_total_size_in_bytes * 100)",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 25 },
+                { "color": "orange", "value": 50 },
+                { "color": "red",    "value": 75 }
+              ]},
+              "unit": "percent", "noValue": "0", "decimals": 1
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+        },
+
+        {
+          "id": 6, "type": "stat", "title": "Failed Proposals/s",
+          "description": "Rate of rejected Raft proposals. Any sustained non-zero value = cluster health problem.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "sum(rate(etcd_server_proposals_failed_total[5m]))", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "red",   "value": 0.001 }
+              ]},
+              "unit": "short", "noValue": "0", "decimals": 3
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+        },
+
+        {
+          "id": 7, "type": "stat", "title": "WAL Fsync p99",
+          "description": "99th percentile WAL flush-to-disk time. >10ms is concerning; >100ms = serious I/O bottleneck.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (le))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 0.01 },
+                { "color": "orange", "value": 0.1  },
+                { "color": "red",    "value": 0.5  }
+              ]},
+              "unit": "s", "noValue": "0", "decimals": 4
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+        },
+
+        {
+          "id": 8, "type": "stat", "title": "Backend Commit p99",
+          "description": "99th percentile boltdb commit time. >25ms = warning; >100ms = critical backend I/O pressure.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (le))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null  },
+                { "color": "yellow", "value": 0.025 },
+                { "color": "orange", "value": 0.1   },
+                { "color": "red",    "value": 0.25  }
+              ]},
+              "unit": "s", "noValue": "0", "decimals": 4
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+        },
+
+        {
+          "id": 9, "type": "row", "title": "Cluster Health", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+        },
+
+        {
+          "id": 10, "type": "timeseries", "title": "Has Leader per Instance",
+          "description": "1 = member has a leader; 0 = member lost quorum. A dip to 0 marks the exact moment of a leader election.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "etcd_server_has_leader{instance=~\"$instance\"}",
+            "refId": "A", "legendFormat": "{{instance}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0, "max": 1.1,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 20, "showPoints": "never", "spanNulls": false },
+              "mappings": [
+                { "type": "value", "options": {
+                    "0": { "text": "0 — no leader" },
+                    "1": { "text": "1 — ok"        }
+                }}
+              ]
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "none" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": [] }
+          },
+          "gridPos": { "h": 6, "w": 8, "x": 0, "y": 5 }
+        },
+
+        {
+          "id": 11, "type": "timeseries", "title": "Leader Changes (cumulative)",
+          "description": "Monotonically increasing counter per member. A step jump = one leader election. Correlated jumps across members = cluster-wide event.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "etcd_server_leader_changes_seen_total{instance=~\"$instance\"}",
+            "refId": "A", "legendFormat": "{{instance}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "auto", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "none" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull"] }
+          },
+          "gridPos": { "h": 6, "w": 8, "x": 8, "y": 5 }
+        },
+
+        {
+          "id": 12, "type": "timeseries", "title": "Slow Operations",
+          "description": "slow_apply: proposals applied slower than expected. slow_read_index: linearizable reads timing out. heartbeat_failures: Raft heartbeat send errors (network partition indicator).",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "rate(etcd_server_slow_apply_total{instance=~\"$instance\"}[5m])",              "refId": "A", "legendFormat": "Slow Apply — {{instance}}" },
+            { "expr": "rate(etcd_server_slow_read_indexes_total{instance=~\"$instance\"}[5m])",       "refId": "B", "legendFormat": "Slow Read Index — {{instance}}" },
+            { "expr": "rate(etcd_server_heartbeat_send_failures_total{instance=~\"$instance\"}[5m])", "refId": "C", "legendFormat": "Heartbeat Failures — {{instance}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 6, "w": 8, "x": 16, "y": 5 }
+        },
+
+        {
+          "id": 13, "type": "row", "title": "gRPC Traffic", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 11 }
+        },
+
+        {
+          "id": 14, "type": "timeseries", "title": "gRPC Request Rate by Method",
+          "description": "Unary calls/s per RPC method. High Put/Txn = heavy write load. High Range = heavy read load. High Watch = many controller watchers.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(grpc_method)(rate(grpc_server_started_total{job=~\".*etcd.*\",grpc_type=\"unary\"}[5m]))",
+            "refId": "A", "legendFormat": "{{grpc_method}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 }
+        },
+
+        {
+          "id": 15, "type": "timeseries", "title": "gRPC Error Rate by Status Code",
+          "description": "Non-OK responses by gRPC status code. RESOURCE_EXHAUSTED = overloaded. UNAVAILABLE = leader election. DEADLINE_EXCEEDED = latency spike.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(grpc_code)(rate(grpc_server_handled_total{job=~\".*etcd.*\",grpc_code!=\"OK\"}[5m]))",
+            "refId": "A", "legendFormat": "{{grpc_code}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 }
+        },
+
+        {
+          "id": 16, "type": "timeseries", "title": "gRPC Request Latency (p50 / p95 / p99)",
+          "description": "Unary call handling duration. p99 > 100ms for Put/Txn indicates disk or CPU pressure. p99 > 500ms will cause kube-apiserver timeouts.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\",grpc_type=\"unary\"}[5m])) by (le))", "refId": "A", "legendFormat": "p50" },
+            { "expr": "histogram_quantile(0.95, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\",grpc_type=\"unary\"}[5m])) by (le))", "refId": "B", "legendFormat": "p95" },
+            { "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\",grpc_type=\"unary\"}[5m])) by (le))", "refId": "C", "legendFormat": "p99" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 }
+        },
+
+        {
+          "id": 17, "type": "row", "title": "Raft Proposals", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 }
+        },
+
+        {
+          "id": 18, "type": "timeseries", "title": "Proposals Committed vs Applied",
+          "description": "Committed = agreed by Raft quorum. Applied = persisted to boltdb. A widening gap between the two = backend apply backlog (disk too slow to keep up).",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "rate(etcd_server_proposals_committed_total{instance=~\"$instance\"}[5m])", "refId": "A", "legendFormat": "Committed — {{instance}}" },
+            { "expr": "rate(etcd_server_proposals_applied_total{instance=~\"$instance\"}[5m])",   "refId": "B", "legendFormat": "Applied — {{instance}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 0, "y": 21 }
+        },
+
+        {
+          "id": 19, "type": "timeseries", "title": "Proposals Pending",
+          "description": "In-flight Raft proposals not yet committed. Consistently high (>5) = cluster cannot keep up with write throughput.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "etcd_server_proposals_pending{instance=~\"$instance\"}",
+            "refId": "A", "legendFormat": "{{instance}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": {
+                "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false,
+                "thresholdsStyle": { "mode": "line+area" }
+              },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 5    },
+                { "color": "red",    "value": 10   }
+              ]}
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 8, "y": 21 }
+        },
+
+        {
+          "id": 20, "type": "timeseries", "title": "Failed Proposals Rate",
+          "description": "Raft proposals that were rejected. Root causes: quorum loss, leader timeout, network partition between members.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "rate(etcd_server_proposals_failed_total{instance=~\"$instance\"}[5m])",
+            "refId": "A", "legendFormat": "{{instance}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": {
+                "lineWidth": 2, "fillOpacity": 20, "showPoints": "never", "spanNulls": false,
+                "thresholdsStyle": { "mode": "line" }
+              },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null  },
+                { "color": "red",   "value": 0.001 }
+              ]}
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 16, "y": 21 }
+        },
+
+        {
+          "id": 21, "type": "row", "title": "Disk I/O", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 28 }
+        },
+
+        {
+          "id": 22, "type": "timeseries", "title": "WAL Fsync Duration (p50 / p95 / p99) per Instance",
+          "description": "Time to flush the write-ahead log to disk. etcd is extremely sensitive to WAL latency. >10ms p99 = storage is the bottleneck. Correlates directly with Raft commit latency.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "histogram_quantile(0.50, sum by(le,instance)(rate(etcd_disk_wal_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50 — {{instance}}" },
+            { "expr": "histogram_quantile(0.95, sum by(le,instance)(rate(etcd_disk_wal_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p95 — {{instance}}" },
+            { "expr": "histogram_quantile(0.99, sum by(le,instance)(rate(etcd_disk_wal_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "C", "legendFormat": "p99 — {{instance}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 29 }
+        },
+
+        {
+          "id": 23, "type": "timeseries", "title": "Backend Commit Duration (p50 / p95 / p99) per Instance",
+          "description": "Time for boltdb to commit a batch transaction. A spike here while WAL is healthy = backend I/O saturation or boltdb lock contention. Triggers apply backlog.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "histogram_quantile(0.50, sum by(le,instance)(rate(etcd_disk_backend_commit_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50 — {{instance}}" },
+            { "expr": "histogram_quantile(0.95, sum by(le,instance)(rate(etcd_disk_backend_commit_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p95 — {{instance}}" },
+            { "expr": "histogram_quantile(0.99, sum by(le,instance)(rate(etcd_disk_backend_commit_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "C", "legendFormat": "p99 — {{instance}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 29 }
+        },
+
+        {
+          "id": 24, "type": "row", "title": "Network (Peer & Client)", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 37 }
+        },
+
+        {
+          "id": 25, "type": "timeseries", "title": "Peer RX Rate",
+          "description": "Bytes received from Raft peers (log replication + heartbeats). A burst during a quiet period = large snapshot being streamed to a recovering member.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "rate(etcd_network_peer_received_bytes_total{instance=~\"$instance\"}[5m])",
+            "refId": "A", "legendFormat": "{{instance}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 6, "x": 0, "y": 38 }
+        },
+
+        {
+          "id": 26, "type": "timeseries", "title": "Peer TX Rate",
+          "description": "Bytes sent to Raft peers. Leader will have higher TX than followers (it replicates entries to all members).",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "rate(etcd_network_peer_sent_bytes_total{instance=~\"$instance\"}[5m])",
+            "refId": "A", "legendFormat": "{{instance}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 6, "x": 6, "y": 38 }
+        },
+
+        {
+          "id": 27, "type": "timeseries", "title": "Client gRPC Received",
+          "description": "Bytes received from API clients (kube-apiserver, operators). Spike = large write burst from controllers or kubectl apply.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "rate(etcd_network_client_grpc_received_bytes_total{instance=~\"$instance\"}[5m])",
+            "refId": "A", "legendFormat": "{{instance}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 6, "x": 12, "y": 38 }
+        },
+
+        {
+          "id": 28, "type": "timeseries", "title": "Client gRPC Sent",
+          "description": "Bytes sent to API clients (responses + watch events). Persistently high = many active Watch streams or large objects being served.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "rate(etcd_network_client_grpc_sent_bytes_total{instance=~\"$instance\"}[5m])",
+            "refId": "A", "legendFormat": "{{instance}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 6, "x": 18, "y": 38 }
+        },
+
+        {
+          "id": 29, "type": "row", "title": "DB Size & Process Resources", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 }
+        },
+
+        {
+          "id": 30, "type": "timeseries", "title": "DB Total vs In-Use Size per Instance",
+          "description": "Total = allocated boltdb file size. In Use = live key data. The gap between them = fragmentation. Steady growth of Total = compaction not keeping up with key churn.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "etcd_mvcc_db_total_size_in_bytes{instance=~\"$instance\"}",        "refId": "A", "legendFormat": "Total — {{instance}}" },
+            { "expr": "etcd_mvcc_db_total_size_in_use_in_bytes{instance=~\"$instance\"}", "refId": "B", "legendFormat": "In Use — {{instance}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "bytes", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 0, "y": 46 }
+        },
+
+        {
+          "id": 31, "type": "timeseries", "title": "Process Resident Memory (RSS)",
+          "description": "Physical RAM consumed by the etcd process. Monotonically growing RSS = memory leak or oversized watch cache. Typical healthy range: 500 MiB–2 GiB depending on cluster size.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "etcd_process_resident_memory_bytes{instance=~\"$instance\"}",
+            "refId": "A", "legendFormat": "{{instance}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "bytes", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 8, "y": 46 }
+        },
+
+        {
+          "id": 32, "type": "timeseries", "title": "Open File Descriptors vs Limit",
+          "description": "Open FD count (solid) and process FD limit (dashed). Approaching the limit will cause WAL file creation and new client connections to fail.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "etcd_process_open_fds{instance=~\"$instance\"}", "refId": "A", "legendFormat": "Open — {{instance}}" },
+            { "expr": "etcd_process_max_fds{instance=~\"$instance\"}",  "refId": "B", "legendFormat": "Limit — {{instance}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byRegexp", "options": "^Limit.*" },
+                "properties": [
+                  { "id": "custom.lineWidth",  "value": 1 },
+                  { "id": "custom.lineStyle",  "value": { "fill": "dash", "dash": [6, 4] } },
+                  { "id": "custom.fillOpacity","value": 0 }
+                ]
+              }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 46 }
+        },
+
+        {
+          "id": 33, "type": "row", "title": "Snapshots", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 54 }
+        },
+
+        {
+          "id": 34, "type": "timeseries", "title": "Snapshot Save Duration (p50 / p95 / p99)",
+          "description": "Time to write a full snapshot of the boltdb to disk. Slow saves delay Raft log compaction, causing the WAL to grow unboundedly and members to fall further behind.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "histogram_quantile(0.50, sum by(le)(rate(etcd_debugging_snap_save_total_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50" },
+            { "expr": "histogram_quantile(0.95, sum by(le)(rate(etcd_debugging_snap_save_total_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p95" },
+            { "expr": "histogram_quantile(0.99, sum by(le)(rate(etcd_debugging_snap_save_total_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "C", "legendFormat": "p99" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 12, "x": 0, "y": 55 }
+        },
+
+        {
+          "id": 35, "type": "timeseries", "title": "Snapshot DB Fsync Duration (p50 / p95 / p99)",
+          "description": "Time to fsync the snapshot file itself. Distinct from WAL fsync: this is flushing the entire boltdb copy to disk after a snapshot is taken.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "histogram_quantile(0.50, sum by(le)(rate(etcd_snap_db_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50" },
+            { "expr": "histogram_quantile(0.95, sum by(le)(rate(etcd_snap_db_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p95" },
+            { "expr": "histogram_quantile(0.99, sum by(le)(rate(etcd_snap_db_fsync_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "refId": "C", "legendFormat": "p99" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 12, "x": 12, "y": 55 }
+        }
+
+      ]
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08g-dashboard-control-plane.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08g-dashboard-control-plane.yaml
new file mode 100644
index 0000000..94c826e
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08g-dashboard-control-plane.yaml
@@ -0,0 +1,752 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: okd-control-plane-health
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+  json: |
+    {
+      "title": "Control Plane Health",
+      "uid": "okd-control-plane",
+      "schemaVersion": 36,
+      "version": 1,
+      "refresh": "30s",
+      "time": { "from": "now-1h", "to": "now" },
+      "tags": ["okd", "control-plane"],
+      "templating": {
+        "list": [
+          {
+            "name": "instance",
+            "type": "query",
+            "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+            "query": { "query": "label_values(apiserver_request_total, instance)", "refId": "A" },
+            "refresh": 2,
+            "includeAll": true,
+            "multi": true,
+            "allValue": ".*",
+            "label": "API Server Instance",
+            "sort": 1,
+            "current": {},
+            "options": []
+          }
+        ]
+      },
+      "panels": [
+
+        {
+          "id": 1, "type": "stat", "title": "API Servers Up",
+          "description": "Number of kube-apiserver instances currently scraped and up. Healthy HA cluster = 3.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(up{job=~\".*apiserver.*\"} == 1)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "red",    "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "green",  "value": 3 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+        },
+
+        {
+          "id": 2, "type": "stat", "title": "Controller Managers Up",
+          "description": "kube-controller-manager instances up. In OKD only one holds the leader lease at a time; others are hot standbys.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(up{job=~\".*controller-manager.*\"} == 1)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "red",    "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "green",  "value": 3 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+        },
+
+        {
+          "id": 3, "type": "stat", "title": "Schedulers Up",
+          "description": "kube-scheduler instances up. One holds the leader lease; rest are standbys. 0 = no scheduling of new pods.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(up{job=~\".*scheduler.*\"} == 1)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "red",    "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "green",  "value": 3 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+        },
+
+        {
+          "id": 4, "type": "stat", "title": "API 5xx Rate",
+          "description": "Server-side errors (5xx) across all apiserver instances per second. Any sustained non-zero value = apiserver internal fault.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "sum(rate(apiserver_request_total{code=~\"5..\"}[5m]))", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 0.01 },
+                { "color": "red",    "value": 1 }
+              ]},
+              "unit": "reqps", "noValue": "0", "decimals": 3
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+        },
+
+        {
+          "id": 5, "type": "stat", "title": "Inflight — Mutating",
+          "description": "Current in-flight mutating requests (POST/PUT/PATCH/DELETE). Default OKD limit is ~1000. Hitting the limit = 429 errors for writes.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "sum(apiserver_current_inflight_requests{request_kind=\"mutating\"})", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 500 },
+                { "color": "orange", "value": 750 },
+                { "color": "red",    "value": 900 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+        },
+
+        {
+          "id": 6, "type": "stat", "title": "Inflight — Read-Only",
+          "description": "Current in-flight non-mutating requests (GET/LIST/WATCH). Default OKD limit is ~3000. Hitting it = 429 for reads, impacting controllers and kubectl.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "sum(apiserver_current_inflight_requests{request_kind=\"readOnly\"})", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 1500 },
+                { "color": "orange", "value": 2200 },
+                { "color": "red",    "value": 2700 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+        },
+
+        {
+          "id": 7, "type": "stat", "title": "API Request p99 (non-WATCH)",
+          "description": "Overall p99 latency for all non-streaming verbs. >1s = noticeable kubectl sluggishness. >10s = controllers timing out on LIST/GET.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!~\"WATCH|CONNECT\"}[5m])) by (le))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 0.5  },
+                { "color": "orange", "value": 1    },
+                { "color": "red",    "value": 5    }
+              ]},
+              "unit": "s", "noValue": "0", "decimals": 3
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+        },
+
+        {
+          "id": 8, "type": "stat", "title": "APIServer → etcd p99",
+          "description": "p99 time apiserver spends waiting on etcd calls. Spike here while WAL fsync is healthy = serialization or large object overhead.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "histogram_quantile(0.99, sum(rate(apiserver_storage_request_duration_seconds_bucket[5m])) by (le))",
+            "refId": "A", "legendFormat": ""
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 0.05 },
+                { "color": "orange", "value": 0.2  },
+                { "color": "red",    "value": 0.5  }
+              ]},
+              "unit": "s", "noValue": "0", "decimals": 4
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+        },
+
+        {
+          "id": 9, "type": "row", "title": "API Server — Request Rates & Errors", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+        },
+
+        {
+          "id": 10, "type": "timeseries", "title": "Request Rate by Verb",
+          "description": "Non-streaming calls per second broken down by verb. GET/LIST = read load from controllers. POST/PUT/PATCH/DELETE = write throughput. A sudden LIST spike = controller cache resync storm.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(verb)(rate(apiserver_request_total{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m]))",
+            "refId": "A", "legendFormat": "{{verb}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 0, "y": 5 }
+        },
+
+        {
+          "id": 11, "type": "timeseries", "title": "Error Rate by HTTP Status Code",
+          "description": "4xx/5xx responses per second by code. 429 = inflight limit hit (throttling). 422 = admission rejection or invalid object. 500/503 = internal apiserver fault or etcd unavailability.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(code)(rate(apiserver_request_total{instance=~\"$instance\",code=~\"[45]..\"}[5m]))",
+            "refId": "A", "legendFormat": "HTTP {{code}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 8, "y": 5 }
+        },
+
+        {
+          "id": 12, "type": "timeseries", "title": "In-Flight Requests — Mutating vs Read-Only",
+          "description": "Instantaneous count of requests being actively handled. The two series correspond to the two inflight limit buckets enforced by the apiserver's Priority and Fairness (APF) or legacy inflight settings.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "sum by(request_kind)(apiserver_current_inflight_requests{instance=~\"$instance\"})", "refId": "A", "legendFormat": "{{request_kind}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 20, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 5 }
+        },
+
+        {
+          "id": 13, "type": "row", "title": "API Server — Latency", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 }
+        },
+
+        {
+          "id": 14, "type": "timeseries", "title": "Request Latency — p50 / p95 / p99 (non-WATCH)",
+          "description": "Aggregated end-to-end request duration across all verbs except WATCH/CONNECT (which are unbounded streaming). A rising p99 without a matching rise in etcd latency = CPU saturation, admission webhook slowness, or serialization overhead.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "histogram_quantile(0.50, sum(rate(apiserver_request_duration_seconds_bucket{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m])) by (le))", "refId": "A", "legendFormat": "p50" },
+            { "expr": "histogram_quantile(0.95, sum(rate(apiserver_request_duration_seconds_bucket{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m])) by (le))", "refId": "B", "legendFormat": "p95" },
+            { "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m])) by (le))", "refId": "C", "legendFormat": "p99" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 0, "y": 14 }
+        },
+
+        {
+          "id": 15, "type": "timeseries", "title": "Request p99 Latency by Verb",
+          "description": "p99 latency broken out per verb. LIST is inherently slower than GET due to serializing full collections. A POST/PUT spike = heavy admission webhook chain or large object writes. DELETE spikes are usually caused by cascading GC finalizer storms.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "histogram_quantile(0.99, sum by(verb,le)(rate(apiserver_request_duration_seconds_bucket{instance=~\"$instance\",verb!~\"WATCH|CONNECT\"}[5m])))",
+            "refId": "A", "legendFormat": "{{verb}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 8, "y": 14 }
+        },
+
+        {
+          "id": 16, "type": "timeseries", "title": "APIServer → etcd Latency by Operation",
+          "description": "Time apiserver spends waiting on etcd, split by operation type (get, list, create, update, delete, watch). Elevated get/list = etcd read pressure. Elevated create/update = write bottleneck, likely correlated with WAL fsync latency.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "histogram_quantile(0.50, sum by(operation,le)(rate(apiserver_storage_request_duration_seconds_bucket[5m])))", "refId": "A", "legendFormat": "p50 — {{operation}}" },
+            { "expr": "histogram_quantile(0.99, sum by(operation,le)(rate(apiserver_storage_request_duration_seconds_bucket[5m])))", "refId": "B", "legendFormat": "p99 — {{operation}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 14 }
+        },
+
+        {
+          "id": 17, "type": "row", "title": "API Server — Watches & Long-Running Requests", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 }
+        },
+
+        {
+          "id": 18, "type": "timeseries", "title": "Active Long-Running Requests (Watches) by Resource",
+          "description": "Instantaneous count of open WATCH streams grouped by resource. Each controller typically holds one WATCH per resource type per apiserver instance. A sudden drop = controller restart; a runaway climb = operator creating watches without cleanup.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(resource)(apiserver_longrunning_requests{instance=~\"$instance\",verb=\"WATCH\"})",
+            "refId": "A", "legendFormat": "{{resource}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 0, "y": 23 }
+        },
+
+        {
+          "id": 19, "type": "timeseries", "title": "Watch Events Dispatched Rate by Kind",
+          "description": "Watch events sent to all active watchers per second, by object kind. Persistent high rate for a specific kind = that resource type is churning heavily, increasing etcd load and controller reconcile frequency.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(kind)(rate(apiserver_watch_events_total{instance=~\"$instance\"}[5m]))",
+            "refId": "A", "legendFormat": "{{kind}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 8, "y": 23 }
+        },
+
+        {
+          "id": 20, "type": "timeseries", "title": "Watch Event Size — p50 / p95 / p99 by Kind",
+          "description": "Size of individual watch events dispatched to clients. Large events (MiB-scale) for Secrets or ConfigMaps = objects being stored with oversized data. Contributes to apiserver memory pressure and network saturation.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "histogram_quantile(0.50, sum by(kind,le)(rate(apiserver_watch_events_sizes_bucket{instance=~\"$instance\"}[5m])))", "refId": "A", "legendFormat": "p50 — {{kind}}" },
+            { "expr": "histogram_quantile(0.99, sum by(kind,le)(rate(apiserver_watch_events_sizes_bucket{instance=~\"$instance\"}[5m])))", "refId": "B", "legendFormat": "p99 — {{kind}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "bytes", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 16, "y": 23 }
+        },
+
+        {
+          "id": 21, "type": "row", "title": "Admission Webhooks", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 }
+        },
+
+        {
+          "id": 22, "type": "timeseries", "title": "Webhook Call Rate by Name",
+          "description": "Mutating and validating admission webhook invocations per second by webhook name. A webhook invoked on every write (e.g., a mutating webhook with no object selector) can be a major source of write latency amplification.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(name,type)(rate(apiserver_admission_webhook_request_total{instance=~\"$instance\"}[5m]))",
+            "refId": "A", "legendFormat": "{{type}} — {{name}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 0, "y": 31 }
+        },
+
+        {
+          "id": 23, "type": "timeseries", "title": "Webhook Latency p99 by Name",
+          "description": "p99 round-trip time per webhook call (network + webhook server processing). Default apiserver timeout is 10s; a webhook consistently near that limit causes cascading write latency for all resources it intercepts.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "histogram_quantile(0.99, sum by(name,le)(rate(apiserver_admission_webhook_admission_duration_seconds_bucket{instance=~\"$instance\"}[5m])))",
+            "refId": "A", "legendFormat": "{{name}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 0.5 },
+                { "color": "red",    "value": 2.0 }
+              ]}
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 8, "y": 31 }
+        },
+
+        {
+          "id": 24, "type": "timeseries", "title": "Webhook Rejection Rate by Name",
+          "description": "Rate of admission denials per webhook. A validating webhook rejecting requests is expected behaviour; a sudden surge indicates either a newly enforced policy or a misbehaving webhook rejecting valid objects.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(name,error_type)(rate(apiserver_admission_webhook_rejection_count{instance=~\"$instance\"}[5m]))",
+            "refId": "A", "legendFormat": "{{name}} ({{error_type}})"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 16, "y": 31 }
+        },
+
+        {
+          "id": 25, "type": "row", "title": "kube-controller-manager", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 38 }
+        },
+
+        {
+          "id": 26, "type": "timeseries", "title": "Work Queue Depth by Controller",
+          "description": "Items waiting to be reconciled in each controller's work queue. Persistent non-zero depth = controller cannot keep up with the event rate. Identifies which specific controller is the bottleneck during overload incidents.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "topk(15, sum by(name)(workqueue_depth{job=~\".*controller-manager.*\"}))",
+            "refId": "A", "legendFormat": "{{name}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": {
+                "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false,
+                "thresholdsStyle": { "mode": "line" }
+              },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 10 },
+                { "color": "red",    "value": 50 }
+              ]}
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 0, "y": 39 }
+        },
+
+        {
+          "id": 27, "type": "timeseries", "title": "Work Queue Item Processing Duration p99 by Controller",
+          "description": "p99 time a work item spends being actively reconciled (inside the reconcile loop, excludes queue wait time). A slow reconcile = either the controller is doing expensive API calls or the etcd write path is slow.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "histogram_quantile(0.99, sum by(name,le)(rate(workqueue_work_duration_seconds_bucket{job=~\".*controller-manager.*\"}[5m])))",
+            "refId": "A", "legendFormat": "{{name}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 8, "y": 39 }
+        },
+
+        {
+          "id": 28, "type": "timeseries", "title": "Work Queue Retry Rate by Controller",
+          "description": "Rate of items being re-queued after a failed reconciliation. A persistently high retry rate for a controller = it is encountering recurring errors on the same objects (e.g., API permission errors, webhook rejections, or resource conflicts).",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "topk(15, sum by(name)(rate(workqueue_retries_total{job=~\".*controller-manager.*\"}[5m])))",
+            "refId": "A", "legendFormat": "{{name}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 39 }
+        },
+
+        {
+          "id": 29, "type": "row", "title": "kube-scheduler", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 47 }
+        },
+
+        {
+          "id": 30, "type": "timeseries", "title": "Scheduling Attempt Rate by Result",
+          "description": "Outcomes of scheduling cycles per second. scheduled = pod successfully bound to a node. unschedulable = no node met the pod's constraints. error = scheduler internal failure (API error, timeout). Persistent unschedulable = cluster capacity or taints/affinity misconfiguration.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(result)(rate(scheduler_schedule_attempts_total[5m]))",
+            "refId": "A", "legendFormat": "{{result}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "scheduled"    }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "unschedulable" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "error"         }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 0, "y": 48 }
+        },
+
+        {
+          "id": 31, "type": "timeseries", "title": "Scheduling Latency — p50 / p95 / p99",
+          "description": "Time from when a pod enters the active queue to when a binding decision is made (does not include bind API call time). Includes filter, score, and reserve plugin execution time. Spike = expensive affinity rules, large number of nodes, or slow extender webhooks.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "histogram_quantile(0.50, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket[5m])) by (le))", "refId": "A", "legendFormat": "p50" },
+            { "expr": "histogram_quantile(0.95, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket[5m])) by (le))", "refId": "B", "legendFormat": "p95" },
+            { "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket[5m])) by (le))", "refId": "C", "legendFormat": "p99" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "p50" }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p95" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "p99" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 8, "y": 48 }
+        },
+
+        {
+          "id": 32, "type": "timeseries", "title": "Pending Pods by Queue",
+          "description": "Pods waiting to be scheduled, split by internal queue. active = ready to be attempted now. backoff = recently failed, in exponential back-off. unschedulable = parked until cluster state changes. A growing unschedulable queue = systemic capacity or constraint problem.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(queue)(scheduler_pending_pods)",
+            "refId": "A", "legendFormat": "{{queue}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": {
+                "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false,
+                "thresholdsStyle": { "mode": "line" }
+              },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 10 },
+                { "color": "red",    "value": 50 }
+              ]}
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "unschedulable" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "backoff"       }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "active"        }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 16, "y": 48 }
+        },
+
+        {
+          "id": 33, "type": "row", "title": "Process Resources — All Control Plane Components", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 55 }
+        },
+
+        {
+          "id": 34, "type": "timeseries", "title": "CPU Usage by Component",
+          "description": "Rate of CPU seconds consumed by each control plane process. apiserver CPU spike = surge in request volume or list serialization. controller-manager CPU spike = reconcile storm. scheduler CPU spike = large node count with complex affinity.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "sum by(job)(rate(process_cpu_seconds_total{job=~\".*apiserver.*\"}[5m]))",          "refId": "A", "legendFormat": "apiserver — {{job}}" },
+            { "expr": "sum by(job)(rate(process_cpu_seconds_total{job=~\".*controller-manager.*\"}[5m]))", "refId": "B", "legendFormat": "controller-manager — {{job}}" },
+            { "expr": "sum by(job)(rate(process_cpu_seconds_total{job=~\".*scheduler.*\"}[5m]))",          "refId": "C", "legendFormat": "scheduler — {{job}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percentunit", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 0, "y": 56 }
+        },
+
+        {
+          "id": 35, "type": "timeseries", "title": "RSS Memory by Component",
+          "description": "Resident set size of each control plane process. apiserver memory is dominated by the watch cache size and serialisation buffers. controller-manager memory = informer caches. Monotonically growing RSS without restarts = memory leak or unbounded cache growth.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "sum by(job)(process_resident_memory_bytes{job=~\".*apiserver.*\"})",          "refId": "A", "legendFormat": "apiserver — {{job}}" },
+            { "expr": "sum by(job)(process_resident_memory_bytes{job=~\".*controller-manager.*\"})", "refId": "B", "legendFormat": "controller-manager — {{job}}" },
+            { "expr": "sum by(job)(process_resident_memory_bytes{job=~\".*scheduler.*\"})",          "refId": "C", "legendFormat": "scheduler — {{job}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "bytes", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 8, "y": 56 }
+        },
+
+        {
+          "id": 36, "type": "timeseries", "title": "Goroutines by Component",
+          "description": "Number of live goroutines in each control plane process. Gradual upward drift = goroutine leak (often tied to unclosed watch streams or context leaks). A step-down = process restart. apiserver typically runs 200–600 goroutines; spikes above 1000 warrant investigation.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "sum by(job)(go_goroutines{job=~\".*apiserver.*\"})",          "refId": "A", "legendFormat": "apiserver — {{job}}" },
+            { "expr": "sum by(job)(go_goroutines{job=~\".*controller-manager.*\"})", "refId": "B", "legendFormat": "controller-manager — {{job}}" },
+            { "expr": "sum by(job)(go_goroutines{job=~\".*scheduler.*\"})",          "refId": "C", "legendFormat": "scheduler — {{job}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 16, "y": 56 }
+        }
+
+      ]
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08h-dashboard-alerts-events-problems.yaml b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08h-dashboard-alerts-events-problems.yaml
new file mode 100644
index 0000000..5c9bee4
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dev_test_yamls/08h-dashboard-alerts-events-problems.yaml
@@ -0,0 +1,741 @@
+apiVersion: grafana.integreatly.org/v1beta1
+kind: GrafanaDashboard
+metadata:
+  name: okd-alerts-events
+  namespace: observability
+spec:
+  instanceSelector:
+    matchLabels:
+      dashboards: "grafana"
+  json: |
+    {
+      "title": "Alerts & Events — Active Problems",
+      "uid": "okd-alerts-events",
+      "schemaVersion": 36,
+      "version": 1,
+      "refresh": "30s",
+      "time": { "from": "now-3h", "to": "now" },
+      "tags": ["okd", "alerts", "events"],
+      "templating": {
+        "list": [
+          {
+            "name": "severity",
+            "type": "custom",
+            "label": "Severity Filter",
+            "query": "critical,warning,info",
+            "current": { "selected": true, "text": "All", "value": "$__all" },
+            "includeAll": true,
+            "allValue": "critical|warning|info",
+            "multi": false,
+            "options": [
+              { "selected": true,  "text": "All",      "value": "$__all" },
+              { "selected": false, "text": "Critical",  "value": "critical" },
+              { "selected": false, "text": "Warning",   "value": "warning" },
+              { "selected": false, "text": "Info",      "value": "info" }
+            ]
+          },
+          {
+            "name": "namespace",
+            "type": "query",
+            "label": "Namespace",
+            "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+            "query": { "query": "label_values(ALERTS{alertstate=\"firing\"}, namespace)", "refId": "A" },
+            "refresh": 2,
+            "includeAll": true,
+            "allValue": ".*",
+            "multi": true,
+            "sort": 1,
+            "current": {},
+            "options": []
+          }
+        ]
+      },
+      "panels": [
+
+        {
+          "id": 1, "type": "stat", "title": "Critical Alerts Firing",
+          "description": "Alerting rule instances currently in the firing state with severity=\"critical\". Any non-zero value represents a breached SLO or infrastructure condition requiring immediate on-call response. The ALERTS metric is generated by Prometheus directly from your alerting rules — it reflects what Prometheus knows, before Alertmanager routing or silencing.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(ALERTS{alertstate=\"firing\",severity=\"critical\"}) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "red",   "value": 1 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 0, "y": 0 }
+        },
+
+        {
+          "id": 2, "type": "stat", "title": "Warning Alerts Firing",
+          "description": "Firing alerts at severity=\"warning\". Warnings indicate a degraded or elevated-risk condition that has not yet crossed the critical threshold. A sustained or growing warning count often precedes a critical fire — treat them as early-warning signals, not background noise.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(ALERTS{alertstate=\"firing\",severity=\"warning\"}) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "orange", "value": 5 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 3, "y": 0 }
+        },
+
+        {
+          "id": 3, "type": "stat", "title": "Info / Unclassified Alerts Firing",
+          "description": "Firing alerts with severity=\"info\" or no severity label. These are informational and do not normally require immediate action. A sudden large jump may reveal noisy alerting rules generating alert fatigue — rules worth reviewing for threshold tuning or adding inhibition rules.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(ALERTS{alertstate=\"firing\",severity!~\"critical|warning\"}) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "blue",  "value": 1 },
+                { "color": "blue",  "value": 25 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 6, "y": 0 }
+        },
+
+        {
+          "id": 4, "type": "stat", "title": "Alerts Silenced (Suppressed)",
+          "description": "Alerts currently matched by an active Alertmanager silence rule and therefore not routed to receivers. Silences are intentional during maintenance windows, but a large suppressed count outside of planned maintenance = an overly broad silence masking real problems. Zero silences when a maintenance window is active = the silence has expired or was misconfigured.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "sum(alertmanager_alerts{state=\"suppressed\"}) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "red",    "value": 20 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 9, "y": 0 }
+        },
+
+        {
+          "id": 5, "type": "stat", "title": "CrashLoopBackOff Pods",
+          "description": "Container instances currently waiting in the CrashLoopBackOff state — the container crashed and Kubernetes is retrying with exponential back-off. Each instance is a pod that cannot stay running. Common root causes: OOM kill, bad entrypoint, missing Secret or ConfigMap, an unavailable init dependency, or a broken image layer.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "red",    "value": 3 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 12, "y": 0 }
+        },
+
+        {
+          "id": 6, "type": "stat", "title": "OOMKilled Containers",
+          "description": "Containers whose most recent termination reason was OOMKilled. This is a current-state snapshot: a container that was OOMKilled, restarted, and is now Running will still appear here until its next termination occurs for a different reason. Non-zero and stable = recurring OOM, likely a workload memory leak or under-provisioned memory limit.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_pod_container_status_last_terminated_reason{reason=\"OOMKilled\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "orange", "value": 1 },
+                { "color": "red",    "value": 5 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 15, "y": 0 }
+        },
+
+        {
+          "id": 7, "type": "stat", "title": "NotReady Nodes",
+          "description": "Nodes where the Ready condition is currently not True (False or Unknown). A NotReady node stops receiving new pod scheduling and, after the node eviction timeout (~5 min default), pods on it will be evicted. Control plane nodes going NotReady simultaneously = potential quorum loss. Any non-zero value is a tier-1 incident signal.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\"} == 0) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "red",   "value": 1 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 18, "y": 0 }
+        },
+
+        {
+          "id": 8, "type": "stat", "title": "Degraded Cluster Operators (OKD)",
+          "description": "OKD ClusterOperators currently reporting Degraded=True. Each ClusterOperator owns a core platform component — authentication, networking, image-registry, monitoring, ingress, storage, etc. A degraded operator means its managed component is impaired or unavailable. Zero is the only acceptable steady-state value outside of an active upgrade.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{ "expr": "count(cluster_operator_conditions{condition=\"Degraded\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+          "fieldConfig": {
+            "defaults": {
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "red",   "value": 1 }
+              ]},
+              "unit": "short", "noValue": "0"
+            }
+          },
+          "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+          "gridPos": { "h": 4, "w": 3, "x": 21, "y": 0 }
+        },
+
+        {
+          "id": 9, "type": "row", "title": "Alert Overview", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }
+        },
+
+        {
+          "id": 10, "type": "timeseries", "title": "Firing Alert Count by Severity Over Time",
+          "description": "Instantaneous count of firing ALERTS series grouped by severity over the selected window. A vertical rise = new alerting condition emerged. A horizontal plateau = a persistent, unresolved problem. A step-down = alert resolved or Prometheus rule evaluation stopped matching. Use the Severity Filter variable to narrow scope during triage.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "count by(severity)(ALERTS{alertstate=\"firing\",severity=~\"$severity\",namespace=~\"$namespace\"})",
+            "refId": "A",
+            "legendFormat": "{{severity}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "critical" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "warning"  }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "info"     }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max", "lastNotNull"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 0, "y": 5 }
+        },
+
+        {
+          "id": 11, "type": "timeseries", "title": "Alertmanager Notification Rate by Integration",
+          "description": "Rate of notification delivery attempts from Alertmanager per second, split by integration type (slack, pagerduty, email, webhook, etc.). Solid lines = successful deliveries; dashed red lines = failed deliveries. A drop to zero on all integrations = Alertmanager is not processing or the cluster is completely quiet. Persistent failures on one integration = check that receiver's credentials or endpoint availability.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "sum by(integration)(rate(alertmanager_notifications_total[5m]))",        "refId": "A", "legendFormat": "✓ {{integration}}" },
+            { "expr": "sum by(integration)(rate(alertmanager_notifications_failed_total[5m]))", "refId": "B", "legendFormat": "✗ {{integration}}" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0, "decimals": 3,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byFrameRefID", "options": "B" },
+                "properties": [
+                  { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } },
+                  { "id": "custom.lineStyle", "value": { "dash": [6, 4], "fill": "dash" } },
+                  { "id": "custom.lineWidth", "value": 1 }
+                ]
+              }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 8, "y": 5 }
+        },
+
+        {
+          "id": 12, "type": "bargauge", "title": "Longest-Firing Active Alerts",
+          "description": "Duration (now - ALERTS_FOR_STATE timestamp) for each currently firing alert, sorted descending. Alerts at the top have been firing longest and are the most likely candidates for known-but-unresolved issues, stale firing conditions, or alerts that should have a silence applied. Red bars (> 2 hours) strongly suggest a problem that has been acknowledged but not resolved.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sort_desc(time() - ALERTS_FOR_STATE{alertstate=\"firing\",severity=~\"$severity\",namespace=~\"$namespace\"})",
+            "refId": "A",
+            "legendFormat": "{{alertname}} · {{severity}} · {{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "s", "min": 0,
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null  },
+                { "color": "yellow", "value": 300   },
+                { "color": "orange", "value": 1800  },
+                { "color": "red",    "value": 7200  }
+              ]}
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "displayMode": "gradient",
+            "showUnfilled": true,
+            "valueMode": "color"
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 5 }
+        },
+
+        {
+          "id": 13, "type": "row", "title": "Active Firing Alerts — Full Detail", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 }
+        },
+
+        {
+          "id": 14, "type": "table", "title": "All Firing Alerts",
+          "description": "Instant-query table of every currently firing alert visible to Prometheus, filtered by the Namespace and Severity variables above. Each row is one alert instance (unique label combination). The value column is omitted — by definition every row here is firing. Use the built-in column filter (funnel icon) to further narrow to a specific alertname, pod, or node. Columns are sparse: labels not defined in a given alert rule will show '—'.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "ALERTS{alertstate=\"firing\",severity=~\"$severity\",namespace=~\"$namespace\"}",
+            "refId": "A",
+            "instant": true,
+            "legendFormat": ""
+          }],
+          "transformations": [
+            { "id": "labelsToFields", "options": { "mode": "columns" } },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "alertstate": true,
+                  "__name__":   true,
+                  "Value":      true,
+                  "Time":       true
+                },
+                "renameByName": {
+                  "alertname": "Alert Name",
+                  "severity":  "Severity",
+                  "namespace": "Namespace",
+                  "pod":       "Pod",
+                  "node":      "Node",
+                  "container": "Container",
+                  "job":       "Job",
+                  "service":   "Service",
+                  "reason":    "Reason",
+                  "instance":  "Instance"
+                },
+                "indexByName": {
+                  "severity":  0,
+                  "alertname": 1,
+                  "namespace": 2,
+                  "pod":       3,
+                  "node":      4,
+                  "container": 5,
+                  "job":       6,
+                  "service":   7,
+                  "reason":    8,
+                  "instance":  9
+                }
+              }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "custom": { "align": "left", "filterable": true },
+              "noValue": "—"
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Severity" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "custom.width", "value": 110 },
+                  {
+                    "id": "mappings",
+                    "value": [{
+                      "type": "value",
+                      "options": {
+                        "critical": { "text": "CRITICAL", "color": "dark-red",    "index": 0 },
+                        "warning":  { "text": "WARNING",  "color": "dark-yellow", "index": 1 },
+                        "info":     { "text": "INFO",     "color": "dark-blue",   "index": 2 }
+                      }
+                    }]
+                  }
+                ]
+              },
+              { "matcher": { "id": "byName", "options": "Alert Name" }, "properties": [{ "id": "custom.width", "value": 300 }] },
+              { "matcher": { "id": "byName", "options": "Namespace"  }, "properties": [{ "id": "custom.width", "value": 180 }] },
+              { "matcher": { "id": "byName", "options": "Pod"        }, "properties": [{ "id": "custom.width", "value": 200 }] },
+              { "matcher": { "id": "byName", "options": "Node"       }, "properties": [{ "id": "custom.width", "value": 200 }] }
+            ]
+          },
+          "options": {
+            "sortBy": [{ "desc": false, "displayName": "Severity" }],
+            "footer": { "show": false }
+          },
+          "gridPos": { "h": 12, "w": 24, "x": 0, "y": 14 }
+        },
+
+        {
+          "id": 15, "type": "row", "title": "Kubernetes Warning Events", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 }
+        },
+
+        {
+          "id": 16, "type": "timeseries", "title": "Warning Event Rate by Reason",
+          "description": "Rate of Kubernetes Warning-type events per second grouped by reason code. BackOff = container is CrashLooping. FailedScheduling = no node satisfies pod constraints. FailedMount = volume attachment or CSI failure. Evicted = kubelet evicted a pod due to memory or disk pressure. NodeNotReady = node lost contact. A spike in a single reason narrows the incident root-cause immediately without needing to read raw event logs. Requires kube-state-metrics with --resources=events.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "topk(10, sum by(reason)(rate(kube_event_count{type=\"Warning\",namespace=~\"$namespace\"}[5m])))",
+            "refId": "A",
+            "legendFormat": "{{reason}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "reqps", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 0, "y": 27 }
+        },
+
+        {
+          "id": 17, "type": "bargauge", "title": "Warning Events — Top Namespaces (Accumulated Count)",
+          "description": "Total accumulated Warning event count (the count field on the Kubernetes Event object) per namespace, showing the top 15 most active. A namespace dominating this chart is generating significantly more abnormal conditions than its peers, useful for identifying noisy tenants, misconfigured deployments, or namespaces experiencing a persistent infrastructure problem. Note this is the raw Event.count field — it resets if the event object is deleted and recreated.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "topk(15, sum by(namespace)(kube_event_count{type=\"Warning\"}))",
+            "refId": "A",
+            "legendFormat": "{{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "thresholds" },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 10 },
+                { "color": "orange", "value": 50 },
+                { "color": "red",    "value": 200 }
+              ]}
+            }
+          },
+          "options": {
+            "orientation": "horizontal",
+            "reduceOptions": { "calcs": ["lastNotNull"] },
+            "displayMode": "gradient",
+            "showUnfilled": true
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 8, "y": 27 }
+        },
+
+        {
+          "id": 18, "type": "timeseries", "title": "Warning Events — Accumulated Count by Reason Over Time",
+          "description": "Raw accumulated event count gauge over time, split by reason. Unlike the rate panel this shows total volume and slope simultaneously. A line that climbs steeply = events are occurring frequently right now. A line that plateaus = the condition causing that reason has stopped. A line that drops to zero = the event object was deleted and recreated or the condition fully resolved.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "topk(10, sum by(reason)(kube_event_count{type=\"Warning\",namespace=~\"$namespace\"}))",
+            "refId": "A",
+            "legendFormat": "{{reason}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 8, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 8, "w": 8, "x": 16, "y": 27 }
+        },
+
+        {
+          "id": 19, "type": "row", "title": "Pod Problems", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 35 }
+        },
+
+        {
+          "id": 20, "type": "timeseries", "title": "CrashLoopBackOff Pods by Namespace",
+          "description": "Count of container instances in CrashLoopBackOff waiting state over time, broken down by namespace. A sudden rise in one namespace = a workload deployment is failing. A persistent baseline across many namespaces = a shared dependency (Secret, ConfigMap, network policy, or an upstream service) has become unavailable. Unlike restart rate, this panel shows the steady-state count of pods currently stuck — not flapping.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "sum by(namespace)(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\",namespace=~\"$namespace\"} == 1)",
+            "refId": "A",
+            "legendFormat": "{{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": {
+                "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false,
+                "thresholdsStyle": { "mode": "line" }
+              },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green",  "value": null },
+                { "color": "yellow", "value": 1 },
+                { "color": "red",    "value": 5 }
+              ]}
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 0, "y": 36 }
+        },
+
+        {
+          "id": 21, "type": "timeseries", "title": "Container Restart Rate by Namespace",
+          "description": "Rate of container restarts per second across all reasons (OOMKill, liveness probe failure, process exit) grouped by namespace. A namespace with a rising restart rate that has not yet entered CrashLoopBackOff is in the early failure window before the exponential back-off penalty kicks in. Cross-reference with the OOMKilled stat tile and the last-terminated-reason to separate crash types.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "topk(10, sum by(namespace)(rate(kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}[5m])))",
+            "refId": "A",
+            "legendFormat": "{{namespace}}"
+          }],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0, "decimals": 4,
+              "color": { "mode": "palette-classic" },
+              "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+            }
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 8, "y": 36 }
+        },
+
+        {
+          "id": 22, "type": "timeseries", "title": "Pods by Problem Phase (Failed / Pending / Unknown)",
+          "description": "Count of pods in Failed, Pending, or Unknown phase over time. Failed = container terminated with a non-zero exit code or was evicted and not rescheduled. Pending for more than a few minutes = scheduler unable to bind the pod (check FailedScheduling events, node capacity, and taint/toleration mismatches). Unknown = kubelet is not reporting to the apiserver, typically indicating a node network partition or kubelet crash.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            { "expr": "sum by(phase)(kube_pod_status_phase{phase=~\"Failed|Unknown\",namespace=~\"$namespace\"} == 1)", "refId": "A", "legendFormat": "{{phase}}" },
+            { "expr": "sum(kube_pod_status_phase{phase=\"Pending\",namespace=~\"$namespace\"} == 1)",                 "refId": "B", "legendFormat": "Pending" }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "short", "min": 0,
+              "color": { "mode": "palette-classic" },
+              "custom": {
+                "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false,
+                "thresholdsStyle": { "mode": "line" }
+              },
+              "thresholds": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "red",   "value": 1 }
+              ]}
+            },
+            "overrides": [
+              { "matcher": { "id": "byName", "options": "Failed"  }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Pending" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+              { "matcher": { "id": "byName", "options": "Unknown" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+            ]
+          },
+          "options": {
+            "tooltip": { "mode": "multi", "sort": "desc" },
+            "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
+          },
+          "gridPos": { "h": 7, "w": 8, "x": 16, "y": 36 }
+        },
+
+        {
+          "id": 23, "type": "row", "title": "Node & Cluster Operator Conditions", "collapsed": false,
+          "gridPos": { "h": 1, "w": 24, "x": 0, "y": 43 }
+        },
+
+        {
+          "id": 24, "type": "table", "title": "Node Condition Status Matrix",
+          "description": "Instant snapshot of every active node condition across all nodes. Each row is one (node, condition, status) triple where value=1, meaning that combination is currently true. Ready=true is the normal healthy state; MemoryPressure=true, DiskPressure=true, PIDPressure=true, and NetworkUnavailable=true all indicate problem states that will affect pod scheduling on that node. Use the column filter to show only conditions where status=\"true\" and condition != \"Ready\" to isolate problems quickly.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "kube_node_status_condition == 1",
+            "refId": "A",
+            "instant": true,
+            "legendFormat": ""
+          }],
+          "transformations": [
+            { "id": "labelsToFields", "options": { "mode": "columns" } },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "Time":     true,
+                  "Value":    true,
+                  "__name__": true,
+                  "endpoint": true,
+                  "job":      true,
+                  "service":  true,
+                  "instance": true
+                },
+                "renameByName": {
+                  "node":      "Node",
+                  "condition": "Condition",
+                  "status":    "Status"
+                },
+                "indexByName": { "node": 0, "condition": 1, "status": 2 }
+              }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "custom": { "align": "left", "filterable": true },
+              "noValue": "—"
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Status" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "custom.width", "value": 90 },
+                  {
+                    "id": "mappings",
+                    "value": [{
+                      "type": "value",
+                      "options": {
+                        "true":    { "text": "true",    "color": "green",       "index": 0 },
+                        "false":   { "text": "false",   "color": "dark-red",    "index": 1 },
+                        "unknown": { "text": "unknown", "color": "dark-orange", "index": 2 }
+                      }
+                    }]
+                  }
+                ]
+              },
+              {
+                "matcher": { "id": "byName", "options": "Condition" },
+                "properties": [
+                  { "id": "custom.width", "value": 190 },
+                  { "id": "custom.displayMode", "value": "color-text" },
+                  {
+                    "id": "mappings",
+                    "value": [{
+                      "type": "value",
+                      "options": {
+                        "Ready":              { "color": "green",  "index": 0 },
+                        "MemoryPressure":     { "color": "red",    "index": 1 },
+                        "DiskPressure":       { "color": "red",    "index": 2 },
+                        "PIDPressure":        { "color": "red",    "index": 3 },
+                        "NetworkUnavailable": { "color": "red",    "index": 4 }
+                      }
+                    }]
+                  }
+                ]
+              },
+              { "matcher": { "id": "byName", "options": "Node" }, "properties": [{ "id": "custom.width", "value": 230 }] }
+            ]
+          },
+          "options": {
+            "sortBy": [{ "desc": false, "displayName": "Node" }],
+            "footer": { "show": false }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 0, "y": 44 }
+        },
+
+        {
+          "id": 25, "type": "table", "title": "Cluster Operator Conditions — Degraded & Progressing (OKD)",
+          "description": "Shows only ClusterOperator conditions that indicate a problem state: Degraded=True (operator has failed to achieve its desired state) or Progressing=True (operator is actively reconciling — normal during upgrades but alarming in steady state). Operators not appearing in this table are healthy. The reason column gives the operator's own explanation for the condition, which maps directly to the relevant operator log stream and OpenShift runbook.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [
+            {
+              "expr": "cluster_operator_conditions{condition=\"Degraded\"} == 1",
+              "refId": "A",
+              "instant": true,
+              "legendFormat": ""
+            },
+            {
+              "expr": "cluster_operator_conditions{condition=\"Progressing\"} == 1",
+              "refId": "B",
+              "instant": true,
+              "legendFormat": ""
+            }
+          ],
+          "transformations": [
+            { "id": "labelsToFields", "options": { "mode": "columns" } },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "Time":      true,
+                  "Value":     true,
+                  "__name__":  true,
+                  "endpoint":  true,
+                  "job":       true,
+                  "service":   true,
+                  "instance":  true,
+                  "namespace": true
+                },
+                "renameByName": {
+                  "name":      "Operator",
+                  "condition": "Condition",
+                  "reason":    "Reason"
+                },
+                "indexByName": { "name": 0, "condition": 1, "reason": 2 }
+              }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "custom": { "align": "left", "filterable": true },
+              "noValue": "—"
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Condition" },
+                "properties": [
+                  { "id": "custom.displayMode", "value": "color-background" },
+                  { "id": "custom.width", "value": 140 },
+                  {
+                    "id": "mappings",
+                    "value": [{
+                      "type": "value",
+                      "options": {
+                        "Degraded":    { "text": "Degraded",    "color": "dark-red",    "index": 0 },
+                        "Progressing": { "text": "Progressing", "color": "dark-yellow", "index": 1 }
+                      }
+                    }]
+                  }
+                ]
+              },
+              { "matcher": { "id": "byName", "options": "Operator" }, "properties": [{ "id": "custom.width", "value": 240 }] },
+              { "matcher": { "id": "byName", "options": "Reason"   }, "properties": [{ "id": "custom.width", "value": 220 }] }
+            ]
+          },
+          "options": {
+            "sortBy": [{ "desc": false, "displayName": "Condition" }],
+            "footer": { "show": false }
+          },
+          "gridPos": { "h": 8, "w": 12, "x": 12, "y": 44 }
+        }
+
+      ]
+    }
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/mod.rs b/harmony/src/modules/monitoring/cluster_dashboards/mod.rs
new file mode 100644
index 0000000..ec14778
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/mod.rs
@@ -0,0 +1,2 @@
+mod score;
+pub use score::ClusterDashboardsScore;
diff --git a/harmony/src/modules/monitoring/cluster_dashboards/score.rs b/harmony/src/modules/monitoring/cluster_dashboards/score.rs
new file mode 100644
index 0000000..22f916d
--- /dev/null
+++ b/harmony/src/modules/monitoring/cluster_dashboards/score.rs
@@ -0,0 +1,557 @@
+use async_trait::async_trait;
+use harmony_types::id::Id;
+use k8s_openapi::api::core::v1::{Namespace, Secret};
+use kube::api::ObjectMeta;
+use serde::{Deserialize, Serialize};
+use std::collections::BTreeMap;
+
+use harmony_k8s::KubernetesDistribution;
+use log::debug;
+
+use crate::{
+    data::Version,
+    interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
+    inventory::Inventory,
+    modules::k8s::resource::K8sResourceScore,
+    modules::monitoring::kube_prometheus::crd::crd_grafana::{
+        Grafana, GrafanaContainer, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource,
+        GrafanaDatasourceConfig, GrafanaDatasourceJsonData, GrafanaDatasourceSecureJsonData,
+        GrafanaDatasourceSpec, GrafanaDeployment, GrafanaDeploymentSpec, GrafanaIngress,
+        GrafanaIngressBackend, GrafanaIngressBackendService, GrafanaIngressPath,
+        GrafanaIngressRule, GrafanaIngressRuleHttp, GrafanaIngressServicePort, GrafanaIngressSpec,
+        GrafanaPodSpec, GrafanaPodTemplate, GrafanaRoute, GrafanaRoutePort, GrafanaRouteSpec,
+        GrafanaRouteTarget, GrafanaRouteTls, GrafanaSecretKeyRef, GrafanaSpec, GrafanaValueFrom,
+        GrafanaValueSource, ResourceRequirements,
+    },
+    modules::monitoring::kube_prometheus::crd::crd_prometheuses::LabelSelector,
+    score::Score,
+    topology::{K8sclient, Topology},
+};
+
+#[derive(Clone, Debug, Serialize)]
+pub struct ClusterDashboardsScore {
+    pub namespace: String,
+    pub grafana_admin_user: String,
+    pub grafana_admin_password: String,
+}
+
+impl Default for ClusterDashboardsScore {
+    fn default() -> Self {
+        Self {
+            namespace: "harmony-observability".to_string(),
+            grafana_admin_user: "admin".to_string(),
+            grafana_admin_password: "password".to_string(),
+        }
+    }
+}
+
+impl ClusterDashboardsScore {
+    pub fn new(namespace: &str) -> Self {
+        Self {
+            namespace: namespace.to_string(),
+            grafana_admin_user: "admin".to_string(),
+            grafana_admin_password: "password".to_string(),
+        }
+    }
+
+    pub fn with_credentials(namespace: &str, admin_user: &str, admin_password: &str) -> Self {
+        Self {
+            namespace: namespace.to_string(),
+            grafana_admin_user: admin_user.to_string(),
+            grafana_admin_password: admin_password.to_string(),
+        }
+    }
+}
+
+impl<T: Topology + K8sclient> Score<T> for ClusterDashboardsScore {
+    fn name(&self) -> String {
+        format!("ClusterDashboardsScore({})", self.namespace)
+    }
+
+    #[doc(hidden)]
+    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
+        Box::new(ClusterDashboardsInterpret {
+            namespace: self.namespace.clone(),
+            grafana_admin_user: self.grafana_admin_user.clone(),
+            grafana_admin_password: self.grafana_admin_password.clone(),
+        })
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ClusterDashboardsInterpret {
+    namespace: String,
+    grafana_admin_user: String,
+    grafana_admin_password: String,
+}
+
+#[async_trait]
+impl<T: Topology + K8sclient> Interpret<T> for ClusterDashboardsInterpret {
+    async fn execute(
+        &self,
+        inventory: &Inventory,
+        topology: &T,
+    ) -> Result<Outcome, InterpretError> {
+        self.create_namespace(inventory, topology).await?;
+        self.create_rbac_resources(inventory, topology).await?;
+        self.create_secret(inventory, topology).await?;
+        self.create_grafana(inventory, topology).await?;
+        self.create_datasource(inventory, topology).await?;
+        self.create_dashboards(inventory, topology).await?;
+
+        Ok(Outcome::success(format!(
+            "Cluster dashboards resources in namespace '{}' with {} dashboards successfully created",
+            self.namespace, 8
+        )))
+    }
+
+    fn get_name(&self) -> InterpretName {
+        InterpretName::Custom("ClusterDashboards")
+    }
+
+    fn get_version(&self) -> Version {
+        todo!()
+    }
+
+    fn get_status(&self) -> InterpretStatus {
+        todo!()
+    }
+
+    fn get_children(&self) -> Vec<Id> {
+        todo!()
+    }
+}
+
+impl ClusterDashboardsInterpret {
+    async fn create_namespace(
+        &self,
+        inventory: &Inventory,
+        topology: &(impl Topology + K8sclient),
+    ) -> Result<(), InterpretError> {
+        let mut labels = BTreeMap::new();
+        labels.insert(
+            "openshift.io/cluster-monitoring".to_string(),
+            "true".to_string(),
+        );
+
+        let namespace = Namespace {
+            metadata: ObjectMeta {
+                name: Some(self.namespace.clone()),
+                labels: Some(labels),
+                ..ObjectMeta::default()
+            },
+            ..Namespace::default()
+        };
+
+        K8sResourceScore::single(namespace, None)
+            .interpret(inventory, topology)
+            .await?;
+
+        Ok(())
+    }
+
+    async fn create_rbac_resources(
+        &self,
+        inventory: &Inventory,
+        topology: &(impl Topology + K8sclient),
+    ) -> Result<(), InterpretError> {
+        let service_account_name = "grafana-prometheus-datasource-sa".to_string();
+        let rbac_namespace = self.namespace.clone();
+
+        let service_account = {
+            use k8s_openapi::api::core::v1::ServiceAccount;
+            ServiceAccount {
+                metadata: ObjectMeta {
+                    name: Some(service_account_name.clone()),
+                    namespace: Some(rbac_namespace.clone()),
+                    ..ObjectMeta::default()
+                },
+                ..ServiceAccount::default()
+            }
+        };
+
+        let cluster_role = {
+            use k8s_openapi::api::rbac::v1::{ClusterRole, PolicyRule};
+            ClusterRole {
+                metadata: ObjectMeta {
+                    name: Some("grafana-prometheus-api-access".to_string()),
+                    ..ObjectMeta::default()
+                },
+                rules: Some(vec![PolicyRule {
+                    api_groups: Some(vec!["monitoring.coreos.com".to_string()]),
+                    resources: Some(vec!["prometheuses/api".to_string()]),
+                    verbs: vec!["get".to_string()],
+                    ..PolicyRule::default()
+                }]),
+                ..ClusterRole::default()
+            }
+        };
+
+        let cluster_role_binding = {
+            use k8s_openapi::api::rbac::v1::{ClusterRoleBinding, RoleRef, Subject};
+            ClusterRoleBinding {
+                metadata: ObjectMeta {
+                    name: Some("grafana-prometheus-api-access-binding".to_string()),
+                    ..ObjectMeta::default()
+                },
+                subjects: Some(vec![Subject {
+                    kind: "ServiceAccount".to_string(),
+                    name: service_account_name.clone(),
+                    namespace: Some(rbac_namespace.clone()),
+                    ..Subject::default()
+                }]),
+                role_ref: RoleRef {
+                    api_group: "rbac.authorization.k8s.io".to_string(),
+                    kind: "ClusterRole".to_string(),
+                    name: "grafana-prometheus-api-access".to_string(),
+                },
+            }
+        };
+
+        let cluster_role_binding_cluster_monitoring = {
+            use k8s_openapi::api::rbac::v1::{ClusterRoleBinding, RoleRef, Subject};
+            ClusterRoleBinding {
+                metadata: ObjectMeta {
+                    name: Some("grafana-cluster-monitoring-view".to_string()),
+                    ..ObjectMeta::default()
+                },
+                subjects: Some(vec![Subject {
+                    kind: "ServiceAccount".to_string(),
+                    name: service_account_name.clone(),
+                    namespace: Some(rbac_namespace.clone()),
+                    ..Subject::default()
+                }]),
+                role_ref: RoleRef {
+                    api_group: "rbac.authorization.k8s.io".to_string(),
+                    kind: "ClusterRole".to_string(),
+                    name: "cluster-monitoring-view".to_string(),
+                },
+            }
+        };
+
+        K8sResourceScore::single(service_account, Some(rbac_namespace.clone()))
+            .interpret(inventory, topology)
+            .await?;
+        K8sResourceScore::single(cluster_role, None)
+            .interpret(inventory, topology)
+            .await?;
+        K8sResourceScore::single(cluster_role_binding, None)
+            .interpret(inventory, topology)
+            .await?;
+        K8sResourceScore::single(cluster_role_binding_cluster_monitoring, None)
+            .interpret(inventory, topology)
+            .await?;
+
+        Ok(())
+    }
+
+    async fn create_secret(
+        &self,
+        inventory: &Inventory,
+        topology: &(impl Topology + K8sclient),
+    ) -> Result<(), InterpretError> {
+        let service_account_name = "grafana-prometheus-datasource-sa".to_string();
+        let secret_name = "grafana-prometheus-token".to_string();
+        let secret_namespace = self.namespace.clone();
+
+        let secret = Secret {
+            metadata: ObjectMeta {
+                name: Some(secret_name),
+                namespace: Some(secret_namespace),
+                annotations: Some({
+                    let mut ann = BTreeMap::new();
+                    ann.insert(
+                        "kubernetes.io/service-account.name".to_string(),
+                        service_account_name,
+                    );
+                    ann
+                }),
+                ..ObjectMeta::default()
+            },
+            type_: Some("kubernetes.io/service-account-token".to_string()),
+            ..Secret::default()
+        };
+
+        K8sResourceScore::single(secret, Some(self.namespace.clone()))
+            .interpret(inventory, topology)
+            .await?;
+
+        Ok(())
+    }
+
+    async fn create_grafana(
+        &self,
+        inventory: &Inventory,
+        topology: &(impl Topology + K8sclient),
+    ) -> Result<(), InterpretError> {
+        let labels: BTreeMap<String, String> =
+            [("dashboards".to_string(), "grafana".to_string())].into();
+
+        let mut config: BTreeMap<String, BTreeMap<String, String>> = BTreeMap::new();
+        config.insert("log".into(), [("mode".into(), "console".into())].into());
+        config.insert(
+            "security".into(),
+            [
+                ("admin_user".into(), self.grafana_admin_user.clone()),
+                ("admin_password".into(), self.grafana_admin_password.clone()),
+            ]
+            .into(),
+        );
+        config.insert(
+            "users".into(),
+            [("viewers_can_edit".into(), "false".into())].into(),
+        );
+        config.insert(
+            "auth".into(),
+            [("disable_login_form".into(), "false".into())].into(),
+        );
+        config.insert(
+            "auth.anonymous".into(),
+            [
+                ("enabled".into(), "true".into()),
+                ("org_role".into(), "Viewer".into()),
+            ]
+            .into(),
+        );
+
+        let resources = ResourceRequirements {
+            requests: [
+                ("cpu".into(), "500m".into()),
+                ("memory".into(), "1Gi".into()),
+            ]
+            .into(),
+            limits: [("cpu".into(), "1".into()), ("memory".into(), "2Gi".into())].into(),
+        };
+
+        let client = topology
+            .k8s_client()
+            .await
+            .map_err(|e| InterpretError::new(format!("Failed to get k8s client: {e}")))?;
+        let distribution = client
+            .get_k8s_distribution()
+            .await
+            .map_err(|e| InterpretError::new(format!("Failed to detect k8s distribution: {e}")))?;
+
+        // OpenShift → Route (operator-managed); plain k8s → Ingress (operator-managed).
+        let (route, ingress) = if matches!(distribution, KubernetesDistribution::OpenshiftFamily) {
+            debug!("OpenShift detected; Grafana CR will use .spec.route");
+            let route = GrafanaRoute {
+                spec: Some(GrafanaRouteSpec {
+                    port: Some(GrafanaRoutePort { target_port: 3000 }),
+                    tls: Some(GrafanaRouteTls {
+                        termination: Some("edge".to_string()),
+                        insecure_edge_termination_policy: Some("Redirect".to_string()),
+                    }),
+                    to: Some(GrafanaRouteTarget {
+                        kind: "Service".to_string(),
+                        name: "cluster-grafana-service".to_string(),
+                        weight: Some(100),
+                    }),
+                }),
+            };
+            (Some(route), None)
+        } else {
+            let hostname = client
+                .get_domain("cluster-grafana")
+                .await
+                .map_err(|e| InterpretError::new(format!("Failed to resolve domain: {e}")))?;
+            debug!("Non-OpenShift detected; Grafana CR will use .spec.ingress (host: {hostname})");
+            let ingress = GrafanaIngress {
+                spec: Some(GrafanaIngressSpec {
+                    ingress_class_name: None,
+                    rules: Some(vec![GrafanaIngressRule {
+                        host: Some(hostname),
+                        http: Some(GrafanaIngressRuleHttp {
+                            paths: vec![GrafanaIngressPath {
+                                path: "/".to_string(),
+                                path_type: "Prefix".to_string(),
+                                backend: GrafanaIngressBackend {
+                                    service: GrafanaIngressBackendService {
+                                        name: "cluster-grafana-service".to_string(),
+                                        port: GrafanaIngressServicePort { number: 3000 },
+                                    },
+                                },
+                            }],
+                        }),
+                    }]),
+                }),
+            };
+            (None, Some(ingress))
+        };
+
+        let grafana = Grafana {
+            metadata: ObjectMeta {
+                name: Some("cluster-grafana".to_string()),
+                namespace: Some(self.namespace.clone()),
+                labels: Some(labels),
+                ..ObjectMeta::default()
+            },
+            spec: GrafanaSpec {
+                config: Some(config),
+                deployment: Some(GrafanaDeployment {
+                    spec: Some(GrafanaDeploymentSpec {
+                        replicas: Some(1),
+                        template: Some(GrafanaPodTemplate {
+                            spec: Some(GrafanaPodSpec {
+                                containers: vec![GrafanaContainer {
+                                    name: "grafana".to_string(),
+                                    resources: Some(resources),
+                                }],
+                            }),
+                        }),
+                    }),
+                }),
+                route,
+                ingress,
+            },
+        };
+
+        K8sResourceScore::single(grafana, Some(self.namespace.clone()))
+            .interpret(inventory, topology)
+            .await?;
+
+        Ok(())
+    }
+
+    async fn create_datasource(
+        &self,
+        inventory: &Inventory,
+        topology: &(impl Topology + K8sclient),
+    ) -> Result<(), InterpretError> {
+        let labels: BTreeMap<String, String> =
+            [("datasource".to_string(), "prometheus".to_string())].into();
+
+        let instance_selector = LabelSelector {
+            match_labels: [("dashboards".to_string(), "grafana".to_string())].into(),
+            match_expressions: vec![],
+        };
+
+        let datasource = GrafanaDatasource {
+            metadata: ObjectMeta {
+                name: Some("prometheus-cluster".to_string()),
+                namespace: Some(self.namespace.clone()),
+                labels: Some(labels),
+                ..ObjectMeta::default()
+            },
+            spec: GrafanaDatasourceSpec {
+                instance_selector,
+                allow_cross_namespace_import: None,
+                datasource: GrafanaDatasourceConfig {
+                    name: "Prometheus-Cluster".to_string(),
+                    r#type: "prometheus".to_string(),
+                    access: "proxy".to_string(),
+                    url: "https://prometheus-k8s.openshift-monitoring.svc:9091".to_string(),
+                    database: None,
+                    is_default: Some(true),
+                    editable: None,
+                    json_data: Some(GrafanaDatasourceJsonData {
+                        http_header_name1: Some("Authorization".to_string()),
+                        tls_skip_verify: Some(true),
+                        time_interval: Some("30s".to_string()),
+                        oauth_pass_thru: None,
+                    }),
+                    secure_json_data: Some(GrafanaDatasourceSecureJsonData {
+                        // Placeholder; real value comes from `values_from` at
+                        // reconcile time (see below).
+                        http_header_value1: Some("Bearer ${token}".to_string()),
+                    }),
+                },
+                values_from: Some(vec![GrafanaValueFrom {
+                    target_path: "secureJsonData.httpHeaderValue1".to_string(),
+                    value_from: GrafanaValueSource {
+                        secret_key_ref: GrafanaSecretKeyRef {
+                            name: "grafana-prometheus-token".to_string(),
+                            key: "token".to_string(),
+                        },
+                    },
+                }]),
+            },
+        };
+
+        K8sResourceScore::single(datasource, Some(self.namespace.clone()))
+            .interpret(inventory, topology)
+            .await?;
+
+        Ok(())
+    }
+
+    async fn create_dashboards(
+        &self,
+        inventory: &Inventory,
+        topology: &(impl Topology + K8sclient),
+    ) -> Result<(), InterpretError> {
+        let dashboards: &[(&str, &str)] = &[
+            (
+                "okd-cluster-overview",
+                include_str!("dashboards/cluster-overview.json"),
+            ),
+            (
+                "okd-node-health",
+                include_str!("dashboards/nodes-health.json"),
+            ),
+            (
+                "okd-workload-health",
+                include_str!("dashboards/workloads-health.json"),
+            ),
+            ("okd-networking", include_str!("dashboards/networking.json")),
+            ("storage-health", include_str!("dashboards/storage.json")),
+            ("okd-etcd", include_str!("dashboards/etcd.json")),
+            (
+                "okd-control-plane",
+                include_str!("dashboards/control-plane.json"),
+            ),
+            (
+                "okd-alerts-events",
+                include_str!("dashboards/alerts-events-problems.json"),
+            ),
+        ];
+
+        for (dashboard_name, json_content) in dashboards {
+            let labels: BTreeMap<String, String> =
+                [("dashboard".to_string(), dashboard_name.to_string())].into();
+
+            let instance_selector = LabelSelector {
+                match_labels: [("dashboards".to_string(), "grafana".to_string())].into(),
+                match_expressions: vec![],
+            };
+
+            let dashboard = GrafanaDashboard {
+                metadata: ObjectMeta {
+                    name: Some(dashboard_name.to_string()),
+                    namespace: Some(self.namespace.clone()),
+                    labels: Some(labels),
+                    ..ObjectMeta::default()
+                },
+                spec: GrafanaDashboardSpec {
+                    instance_selector,
+                    json: Some(json_content.to_string()),
+                    resync_period: None,
+                    datasources: None,
+                    grafana_com: None,
+                },
+            };
+
+            K8sResourceScore::single(dashboard, Some(self.namespace.clone()))
+                .interpret(inventory, topology)
+                .await?;
+        }
+
+        Ok(())
+    }
+
+    fn get_name(&self) -> InterpretName {
+        InterpretName::Custom("ClusterDashboards")
+    }
+
+    fn get_version(&self) -> Version {
+        todo!()
+    }
+
+    fn get_status(&self) -> InterpretStatus {
+        todo!()
+    }
+
+    fn get_children(&self) -> Vec<Id> {
+        todo!()
+    }
+}
diff --git a/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs b/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs
index c9ccacb..4c26851 100644
--- a/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs
+++ b/harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs
@@ -1,10 +1,29 @@
+use async_trait::async_trait;
+use harmony_k8s::KubernetesDistribution;
 use harmony_macros::hurl;
+use harmony_types::id::Id;
+use k8s_openapi::api::rbac::v1::{ClusterRole, ClusterRoleBinding, PolicyRule, RoleRef, Subject};
+use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
+use log::debug;
 use non_blank_string_rs::NonBlankString;
+use serde::Serialize;
 use std::{collections::HashMap, str::FromStr};
 
-use crate::modules::helm::chart::{HelmChartScore, HelmRepository};
+use crate::{
+    data::Version,
+    interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
+    inventory::Inventory,
+    modules::helm::chart::{HelmChartScore, HelmRepository},
+    modules::k8s::resource::K8sResourceScore,
+    score::Score,
+    topology::{HelmCommand, K8sclient, Topology},
+};
 
-pub fn grafana_helm_chart_score(ns: &str, namespace_scope: bool) -> HelmChartScore {
+pub fn grafana_helm_chart_score(
+    ns: &str,
+    namespace_scope: bool,
+    chart_version: Option<&str>,
+) -> HelmChartScore {
     let mut values_overrides = HashMap::new();
     values_overrides.insert(
         NonBlankString::from_str("namespaceScope").unwrap(),
@@ -14,7 +33,7 @@ pub fn grafana_helm_chart_score(ns: &str, namespace_scope: bool) -> HelmChartSco
         namespace: Some(NonBlankString::from_str(ns).unwrap()),
         release_name: NonBlankString::from_str("grafana-operator").unwrap(),
         chart_name: NonBlankString::from_str("grafana/grafana-operator").unwrap(),
-        chart_version: None,
+        chart_version: chart_version.map(|v| NonBlankString::from_str(v).unwrap()),
         values_overrides: Some(values_overrides),
         values_yaml: None,
         create_namespace: true,
@@ -26,3 +45,173 @@ pub fn grafana_helm_chart_score(ns: &str, namespace_scope: bool) -> HelmChartSco
         )),
     }
 }
+
+/// Cluster-scoped RBAC so grafana-operator can watch `route.openshift.io/v1.Route`.
+/// The upstream chart's ClusterRole doesn't include these verbs and the chart
+/// exposes no values key to extend it, so we apply them separately.
+///
+/// Safe on non-OpenShift clusters: Kubernetes accepts a `ClusterRole`
+/// referencing a missing API group — the rule is simply never matched — but
+/// `GrafanaOperatorScore` only applies these on detected OpenShift clusters.
+pub fn grafana_operator_openshift_route_rbac_scores(
+    ns: &str,
+) -> (
+    K8sResourceScore<ClusterRole>,
+    K8sResourceScore<ClusterRoleBinding>,
+) {
+    let cluster_role_name = "harmony-grafana-operator-openshift-routes".to_string();
+    let cluster_role_binding_name = "harmony-grafana-operator-openshift-routes-binding".to_string();
+    let operator_sa_name = "grafana-operator".to_string();
+
+    let cluster_role = ClusterRole {
+        metadata: ObjectMeta {
+            name: Some(cluster_role_name.clone()),
+            ..ObjectMeta::default()
+        },
+        rules: Some(vec![PolicyRule {
+            api_groups: Some(vec!["route.openshift.io".to_string()]),
+            resources: Some(vec!["routes".to_string(), "routes/custom-host".to_string()]),
+            verbs: vec![
+                "get".to_string(),
+                "list".to_string(),
+                "watch".to_string(),
+                "create".to_string(),
+                "update".to_string(),
+                "patch".to_string(),
+                "delete".to_string(),
+            ],
+            ..PolicyRule::default()
+        }]),
+        ..ClusterRole::default()
+    };
+
+    let cluster_role_binding = ClusterRoleBinding {
+        metadata: ObjectMeta {
+            name: Some(cluster_role_binding_name),
+            ..ObjectMeta::default()
+        },
+        subjects: Some(vec![Subject {
+            kind: "ServiceAccount".to_string(),
+            name: operator_sa_name,
+            namespace: Some(ns.to_string()),
+            ..Subject::default()
+        }]),
+        role_ref: RoleRef {
+            api_group: "rbac.authorization.k8s.io".to_string(),
+            kind: "ClusterRole".to_string(),
+            name: cluster_role_name,
+        },
+    };
+
+    (
+        K8sResourceScore::single(cluster_role, None),
+        K8sResourceScore::single(cluster_role_binding, None),
+    )
+}
+
+/// Composite score: installs grafana-operator via Helm, and on OpenShift-family
+/// clusters also applies the `route.openshift.io` RBAC the operator needs to
+/// reconcile Routes. Distribution is detected at interpret time via the
+/// cluster's API discovery — no flag needed at call time.
+#[derive(Debug, Clone, Serialize)]
+pub struct GrafanaOperatorScore {
+    pub namespace: String,
+    pub namespace_scope: bool,
+    pub chart_version: Option<String>,
+}
+
+impl GrafanaOperatorScore {
+    pub fn new(namespace: &str, chart_version: Option<&str>) -> Self {
+        Self {
+            namespace: namespace.to_string(),
+            namespace_scope: false,
+            chart_version: chart_version.map(|v| v.to_string()),
+        }
+    }
+}
+
+impl<T: Topology + K8sclient + HelmCommand> Score<T> for GrafanaOperatorScore {
+    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
+        Box::new(GrafanaOperatorInterpret {
+            namespace: self.namespace.clone(),
+            namespace_scope: self.namespace_scope,
+            chart_version: self.chart_version.clone(),
+        })
+    }
+
+    fn name(&self) -> String {
+        format!("GrafanaOperatorScore({})", self.namespace)
+    }
+}
+
+#[derive(Debug, Clone)]
+struct GrafanaOperatorInterpret {
+    namespace: String,
+    namespace_scope: bool,
+    chart_version: Option<String>,
+}
+
+#[async_trait]
+impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for GrafanaOperatorInterpret {
+    async fn execute(
+        &self,
+        inventory: &Inventory,
+        topology: &T,
+    ) -> Result<Outcome, InterpretError> {
+        let client = topology
+            .k8s_client()
+            .await
+            .map_err(|e| InterpretError::new(format!("Failed to get k8s client: {e}")))?;
+
+        let distribution = client
+            .get_k8s_distribution()
+            .await
+            .map_err(|e| InterpretError::new(format!("Failed to detect k8s distribution: {e}")))?;
+
+        if matches!(distribution, KubernetesDistribution::OpenshiftFamily) {
+            debug!(
+                "OpenShift detected; applying grafana-operator Route RBAC in namespace {}",
+                self.namespace
+            );
+            let (cr, crb) = grafana_operator_openshift_route_rbac_scores(&self.namespace);
+            cr.create_interpret().execute(inventory, topology).await?;
+            crb.create_interpret().execute(inventory, topology).await?;
+        } else {
+            debug!(
+                "Non-OpenShift distribution ({:?}); skipping Route RBAC",
+                distribution
+            );
+        }
+
+        let helm_score = grafana_helm_chart_score(
+            &self.namespace,
+            self.namespace_scope,
+            self.chart_version.as_deref(),
+        );
+        helm_score
+            .create_interpret()
+            .execute(inventory, topology)
+            .await?;
+
+        Ok(Outcome::success(format!(
+            "grafana-operator installed in namespace '{}' (distribution: {:?})",
+            self.namespace, distribution
+        )))
+    }
+
+    fn get_name(&self) -> InterpretName {
+        InterpretName::Custom("GrafanaOperator")
+    }
+
+    fn get_version(&self) -> Version {
+        todo!()
+    }
+
+    fn get_status(&self) -> InterpretStatus {
+        todo!()
+    }
+
+    fn get_children(&self) -> Vec<Id> {
+        todo!()
+    }
+}
diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs
index 386890e..1b197ce 100644
--- a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs
+++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs
@@ -6,7 +6,13 @@ use serde::{Deserialize, Serialize};
 
 use super::crd_prometheuses::LabelSelector;
 
-#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
+/// `Grafana` CR binding — audited against
+/// `grafanas.grafana.integreatly.org/v1beta1` on grafana-operator v5.22.
+/// Only the fields actively consumed by harmony callers are modeled.
+/// `.spec.config` is `map[string]map[string]string` upstream (grafana.ini
+/// sections); it is modeled as a nested `BTreeMap` rather than a struct to
+/// avoid losing sections like `auth.anonymous` (dotted keys).
+#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
 #[kube(
     group = "grafana.integreatly.org",
     version = "v1beta1",
@@ -16,81 +22,177 @@ use super::crd_prometheuses::LabelSelector;
 )]
 #[serde(rename_all = "camelCase")]
 pub struct GrafanaSpec {
+    /// `grafana.ini` content. Outer map key = section name (e.g. `security`,
+    /// `auth.anonymous`); inner map = key/value pairs in that section.
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub config: Option<GrafanaConfig>,
+    pub config: Option<BTreeMap<String, BTreeMap<String, String>>>,
 
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub admin_user: Option<String>,
+    pub deployment: Option<GrafanaDeployment>,
 
+    /// OpenShift-only: reconciled by grafana-operator when the
+    /// `route.openshift.io` CRD is present.
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub admin_password: Option<String>,
+    pub route: Option<GrafanaRoute>,
 
+    /// Standard k8s Ingress: reconciled by grafana-operator on non-OpenShift
+    /// clusters. Mutually exclusive with `route` in practice.
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub ingress: Option<GrafanaIngress>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaDeployment {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub spec: Option<GrafanaDeploymentSpec>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaDeploymentSpec {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub replicas: Option<i32>,
 
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub persistence: Option<GrafanaPersistence>,
+    pub template: Option<GrafanaPodTemplate>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaPodTemplate {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub spec: Option<GrafanaPodSpec>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaPodSpec {
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub containers: Vec<GrafanaContainer>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaContainer {
+    pub name: String,
 
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub resources: Option<ResourceRequirements>,
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct GrafanaConfig {
+pub struct GrafanaRoute {
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub log: Option<GrafanaLogConfig>,
+    pub spec: Option<GrafanaRouteSpec>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaRouteSpec {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub port: Option<GrafanaRoutePort>,
 
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub security: Option<GrafanaSecurityConfig>,
+    pub tls: Option<GrafanaRouteTls>,
+
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub to: Option<GrafanaRouteTarget>,
 }
 
 #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct GrafanaLogConfig {
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub mode: Option<String>,
-
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub level: Option<String>,
+pub struct GrafanaRoutePort {
+    /// Upstream schema is int-or-string; we only use integer.
+    pub target_port: i32,
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct GrafanaSecurityConfig {
+pub struct GrafanaRouteTls {
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub admin_user: Option<String>,
+    pub termination: Option<String>,
 
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub admin_password: Option<String>,
+    pub insecure_edge_termination_policy: Option<String>,
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaRouteTarget {
+    pub kind: String,
+    pub name: String,
+
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub weight: Option<i32>,
+}
+
+// ---- Ingress types (mirrors standard k8s IngressSpec, narrow subset) ----
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct GrafanaIngress {
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub enabled: Option<bool>,
-
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub hosts: Option<Vec<String>>,
+    pub spec: Option<GrafanaIngressSpec>,
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
 #[serde(rename_all = "camelCase")]
-pub struct GrafanaPersistence {
+pub struct GrafanaIngressSpec {
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub enabled: Option<bool>,
+    pub ingress_class_name: Option<String>,
 
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub storage_class_name: Option<String>,
+    pub rules: Option<Vec<GrafanaIngressRule>>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaIngressRule {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub host: Option<String>,
 
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub size: Option<String>,
+    pub http: Option<GrafanaIngressRuleHttp>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaIngressRuleHttp {
+    pub paths: Vec<GrafanaIngressPath>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaIngressPath {
+    pub path: String,
+    pub path_type: String,
+    pub backend: GrafanaIngressBackend,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaIngressBackend {
+    pub service: GrafanaIngressBackendService,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaIngressBackendService {
+    pub name: String,
+    pub port: GrafanaIngressServicePort,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct GrafanaIngressServicePort {
+    pub number: i32,
 }
 
 // ------------------------------------------------------------------------------------------------
 
-#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
+#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
 #[kube(
     group = "grafana.integreatly.org",
     version = "v1beta1",
@@ -135,7 +237,7 @@ pub struct GrafanaCom {
 
 // ------------------------------------------------------------------------------------------------
 
-#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
+#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
 #[kube(
     group = "grafana.integreatly.org",
     version = "v1beta1",
@@ -176,7 +278,7 @@ pub struct GrafanaSecretKeyRef {
     pub key: String,
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
+#[derive(Serialize, Deserialize, Debug, Clone, Default, JsonSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct GrafanaDatasourceConfig {
     pub access: String,
@@ -235,3 +337,23 @@ pub struct ResourceRequirements {
     #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
     pub requests: BTreeMap<String, String>,
 }
+
+// `Default` impls on the `CustomResource`-generated wrappers so they satisfy
+// the `K: Default` bound on `K8sResourceScore<K>`.
+impl Default for Grafana {
+    fn default() -> Self {
+        Grafana::new("", GrafanaSpec::default())
+    }
+}
+
+impl Default for GrafanaDashboard {
+    fn default() -> Self {
+        GrafanaDashboard::new("", GrafanaDashboardSpec::default())
+    }
+}
+
+impl Default for GrafanaDatasource {
+    fn default() -> Self {
+        GrafanaDatasource::new("", GrafanaDatasourceSpec::default())
+    }
+}
diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_grafana.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_grafana.rs
index 65efab9..8020950 100644
--- a/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_grafana.rs
+++ b/harmony/src/modules/monitoring/kube_prometheus/crd/rhob_grafana.rs
@@ -1,3 +1,34 @@
+//! ⚠️  **STALE DUPLICATE — DO NOT COPY FROM**
+//!
+//! This file is a near-identical duplicate of `crd_grafana.rs` from before
+//! that file was audited against the upstream
+//! `grafanas.grafana.integreatly.org/v1beta1` schema (grafana-operator
+//! v5.22). Fields defined below are known to be **wrong** relative to
+//! upstream, in particular:
+//!
+//! - `GrafanaSpec.admin_user` / `admin_password` — do not exist at
+//!   `.spec` top-level upstream; the real location is
+//!   `.spec.config.security.admin_user/admin_password`.
+//! - `GrafanaSpec.persistence` — upstream key is `persistentVolumeClaim`,
+//!   so writes here are silently dropped.
+//! - `GrafanaSpec.resources` — there is no `.spec.resources` upstream at
+//!   all (container resources belong under
+//!   `.spec.deployment.spec.template.spec.containers[].resources`).
+//! - `GrafanaSpec.ingress` — upstream `ingress` is `{ metadata, spec }`,
+//!   not `{ enabled, hosts }` as modeled here.
+//! - `GrafanaConfig` as a typed struct — upstream `.spec.config` is
+//!   `map[string]map[string]string` (grafana.ini sections). The struct
+//!   form here cannot express sections like `auth.anonymous` (dotted
+//!   keys) and loses anything beyond `log`/`security`.
+//!
+//! This file is kept only because `rhob_alerting_score.rs` still builds
+//! against it, and that caller happens to construct `GrafanaSpec` with
+//! every field set to `None` — so the bugs are latent, not active.
+//!
+//! If you need a correct binding, use `crd_grafana.rs`. If you extend this
+//! file, port the changes to `crd_grafana.rs` first, then dedupe — don't
+//! spread the rot.
+
 use std::collections::BTreeMap;
 
 use kube::CustomResource;
diff --git a/harmony/src/modules/monitoring/mod.rs b/harmony/src/modules/monitoring/mod.rs
index 7f07d5a..aa08e7a 100644
--- a/harmony/src/modules/monitoring/mod.rs
+++ b/harmony/src/modules/monitoring/mod.rs
@@ -1,6 +1,7 @@
 pub mod alert_channel;
 pub mod alert_rule;
 pub mod application_monitoring;
+pub mod cluster_dashboards;
 pub mod grafana;
 pub mod kube_prometheus;
 pub mod ntfy;
diff --git a/harmony/src/modules/monitoring/prometheus/prometheus.rs b/harmony/src/modules/monitoring/prometheus/prometheus.rs
index 2fe0d06..4904b4d 100644
--- a/harmony/src/modules/monitoring/prometheus/prometheus.rs
+++ b/harmony/src/modules/monitoring/prometheus/prometheus.rs
@@ -114,7 +114,7 @@ impl Prometheus {
         };
 
         if let Some(ns) = namespace.as_deref() {
-            grafana_helm_chart_score(ns, false)
+            grafana_helm_chart_score(ns, false, None)
                 .interpret(inventory, topology)
                 .await
         } else {
diff --git a/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs b/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs
index 586029b..136e1a2 100644
--- a/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs
+++ b/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs
@@ -542,14 +542,7 @@ impl K8sPrometheusCRDAlertingInterpret {
                 labels: Some(label.clone()),
                 ..Default::default()
             },
-            spec: GrafanaSpec {
-                config: None,
-                admin_user: None,
-                admin_password: None,
-                ingress: None,
-                persistence: None,
-                resources: None,
-            },
+            spec: GrafanaSpec::default(),
         };
         client
             .apply(&grafana, Some(&self.sender.namespace.clone()))
diff --git a/harmony/src/modules/prometheus/rhob_alerting_score.rs b/harmony/src/modules/prometheus/rhob_alerting_score.rs
index 8a85d1b..1d31a71 100644
--- a/harmony/src/modules/prometheus/rhob_alerting_score.rs
+++ b/harmony/src/modules/prometheus/rhob_alerting_score.rs
@@ -12,6 +12,9 @@ use std::process::Command;
 use crate::modules::k8s::ingress::{K8sIngressScore, PathType};
 use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard;
 use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
+// NOTE: `rhob_grafana` is a stale, incorrect duplicate of `crd_grafana`.
+// See the warning at the top of `rhob_grafana.rs`. Prefer `crd_grafana`
+// for any new work.
 use crate::modules::monitoring::kube_prometheus::crd::rhob_grafana::{
     Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig,
     GrafanaDatasourceSpec, GrafanaSpec,