Compare commits

..

3 Commits

Author SHA1 Message Date
a9fe4ab267 fix: cargo fmt
All checks were successful
Run Check Script / check (pull_request) Successful in 1m0s
2025-08-25 13:33:36 -04:00
65cc9befeb mod.rs
Some checks failed
Run Check Script / check (pull_request) Failing after 20s
2025-08-25 13:31:39 -04:00
d456a1f9ee feat: score to validate whether the ceph cluster is healthy 2025-08-25 13:30:32 -04:00
9 changed files with 171 additions and 155 deletions

View File

@@ -1,4 +1,4 @@
FROM docker.io/rust:1.89.0 AS build
FROM docker.io/rust:1.87.0 AS build
WORKDIR /app
@@ -6,7 +6,7 @@ COPY . .
RUN cargo build --release --bin harmony_composer
FROM docker.io/rust:1.89.0
FROM docker.io/rust:1.87.0
WORKDIR /app

View File

@@ -0,0 +1,11 @@
[package]
name = "example_validate_ceph_cluster_health"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true

View File

@@ -0,0 +1,18 @@
use harmony::{
inventory::Inventory,
modules::storage::ceph::ceph_validate_health_score::CephVerifyClusterHealth,
topology::K8sAnywhereTopology,
};
#[tokio::main]
async fn main() {
let ceph_health_score = CephVerifyClusterHealth {
rook_ceph_namespace: "rook-ceph".to_string(),
};
let topology = K8sAnywhereTopology::from_env();
let inventory = Inventory::autoload();
harmony_cli::run(inventory, topology, vec![Box::new(ceph_health_score)], None)
.await
.unwrap();
}

View File

@@ -32,6 +32,7 @@ pub enum InterpretName {
Lamp,
ApplicationMonitoring,
K8sPrometheusCrdAlerting,
CephClusterHealth,
}
impl std::fmt::Display for InterpretName {
@@ -58,6 +59,7 @@ impl std::fmt::Display for InterpretName {
InterpretName::Lamp => f.write_str("LAMP"),
InterpretName::ApplicationMonitoring => f.write_str("ApplicationMonitoring"),
InterpretName::K8sPrometheusCrdAlerting => f.write_str("K8sPrometheusCrdAlerting"),
InterpretName::CephClusterHealth => f.write_str("CephClusterHealth"),
}
}
}

View File

@@ -0,0 +1,136 @@
use std::{sync::Arc, time::Duration};
use async_trait::async_trait;
use log::debug;
use serde::Serialize;
use tokio::time::Instant;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{K8sclient, Topology, k8s::K8sClient},
};
#[derive(Clone, Debug, Serialize)]
pub struct CephVerifyClusterHealth {
pub rook_ceph_namespace: String,
}
impl<T: Topology + K8sclient> Score<T> for CephVerifyClusterHealth {
fn name(&self) -> String {
format!("CephValidateClusterHealth")
}
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(CephVerifyClusterHealthInterpret {
score: self.clone(),
})
}
}
#[derive(Clone, Debug)]
pub struct CephVerifyClusterHealthInterpret {
score: CephVerifyClusterHealth,
}
#[async_trait]
impl<T: Topology + K8sclient> Interpret<T> for CephVerifyClusterHealthInterpret {
async fn execute(
&self,
_inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let client = topology.k8s_client().await.unwrap();
self.verify_ceph_toolbox_exists(client.clone()).await?;
self.validate_ceph_cluster_health(client.clone()).await?;
Ok(Outcome::success("Ceph cluster healthy".to_string()))
}
fn get_name(&self) -> InterpretName {
InterpretName::CephClusterHealth
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}
impl CephVerifyClusterHealthInterpret {
pub async fn verify_ceph_toolbox_exists(
&self,
client: Arc<K8sClient>,
) -> Result<Outcome, InterpretError> {
let toolbox_dep = "rook-ceph-tools".to_string();
match client
.get_deployment(&toolbox_dep, Some(&self.score.rook_ceph_namespace))
.await
{
Ok(Some(deployment)) => {
if let Some(status) = deployment.status {
let ready_count = status.ready_replicas.unwrap_or(0);
if ready_count >= 1 {
return Ok(Outcome::success(format!(
"'{}' is ready with {} replica(s).",
&toolbox_dep, ready_count
)));
} else {
return Err(InterpretError::new(
"ceph-tool-box not ready in cluster".to_string(),
));
}
} else {
Err(InterpretError::new(format!(
"failed to get deployment status {}",
&toolbox_dep
)))
}
}
Ok(None) => Err(InterpretError::new(format!(
"Deployment '{}' not found in namespace '{}'.",
&toolbox_dep, self.score.rook_ceph_namespace
))),
Err(e) => Err(InterpretError::new(format!(
"Failed to query for deployment '{}': {}",
&toolbox_dep, e
))),
}
}
pub async fn validate_ceph_cluster_health(
&self,
client: Arc<K8sClient>,
) -> Result<Outcome, InterpretError> {
debug!("Verifying ceph cluster is in healthy state");
let health = client
.exec_app_capture_output(
"rook-ceph-tools".to_string(),
"app".to_string(),
Some(&self.score.rook_ceph_namespace),
vec!["sh", "-c", "ceph health"],
)
.await?;
if health.contains("HEALTH_OK") {
return Ok(Outcome::success(
"Ceph Cluster in healthy state".to_string(),
));
} else {
Err(InterpretError::new(format!(
"Ceph cluster unhealthy {}",
health
)))
}
}
}

View File

@@ -1,4 +1,2 @@
pub mod ceph_remove_osd_score;
pub mod rook_ceph_helm_chart_score;
pub mod rook_ceph_cluster_helm_chart_score;
pub mod rook_ceph_install_score;
pub mod ceph_osd_replacement_score;
pub mod ceph_validate_health_score;

View File

@@ -1,44 +0,0 @@
use std::str::FromStr;
use non_blank_string_rs::NonBlankString;
use crate::modules::helm::chart::HelmChartScore;
pub fn rook_ceph_cluster_helm_chart(ns: &str) -> HelmChartScore {
let values = r#"
monitoring:
enabled: true
createPrometheusRules: true
cephClusterSpec:
placement:
all:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: storage-node
operator: In
values:
- "true"
dashboard:
ssl: false
prometheusEndpoint: http://prometheus-operated:9090
prometheusEndpointSSLVerify: false
toolbox:
enabled: true
"#
.to_string();
HelmChartScore {
namespace: Some(NonBlankString::from_str(ns).unwrap()),
release_name: NonBlankString::from_str("rook-ceph").unwrap(),
chart_name: NonBlankString::from_str("https://charts.rook.io/release/rook-release/rook-ceph-cluster").unwrap(),
chart_version: todo!(),
values_overrides: todo!(),
values_yaml: Some(values.to_string()),
create_namespace: todo!(),
install_only: todo!(),
repository: todo!(),
}
}

View File

@@ -1,24 +0,0 @@
use std::str::FromStr;
use non_blank_string_rs::NonBlankString;
use crate::modules::helm::chart::HelmChartScore;
pub fn rook_ceph_helm_chart(ns: &str) -> HelmChartScore {
let values = r#"
monitoring:
enabled: true
"#
.to_string();
HelmChartScore {
namespace: Some(NonBlankString::from_str(ns).unwrap()),
release_name: NonBlankString::from_str("rook-ceph").unwrap(),
chart_name: NonBlankString::from_str("https://charts.rook.io/release/rook-release/rook-ceph").unwrap(),
chart_version: todo!(),
values_overrides: todo!(),
values_yaml: Some(values.to_string()),
create_namespace: todo!(),
install_only: todo!(),
repository: todo!(),
}
}

View File

@@ -1,81 +0,0 @@
use serde::Serialize;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{HelmCommand, Topology},
};
#[derive(Debug, Clone, Serialize)]
pub struct RookCephInstall {
namespace: String,
}
impl<T: Topology + HelmCommand> Score<T> for RookCephInstall {
fn name(&self) -> String {
"RookCephInstall".to_string()
}
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(RookCephInstallInterpret {
score: self.score.clone(),
})
}
}
#[derive(Debug, Clone)]
pub struct RookCephInstallInterpret {
score: RookCephInstall,
}
#[async_trait]
impl<T: Topology + HelmCommand> Interpret<T> for RookCephInstallInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<InterpretError, Outcome> {
self.label_nodes();
self.install_rook_helm_chart(self.score.namespace);
self.install_rook_cluster_helm_chart(self.score.namespace);
//TODO I think we will need to add a capability OCClient to interact with the okd
//cli tool
self.add_oc_adm_policy(self.score.namespace);
}
fn get_name(&self) -> InterpretName {
todo!()
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}
impl RookCephInstallInterpret {
fn label_nodes(&self) -> _ {
todo!()
}
fn install_rook_helm_chart(&self, namespace: String) -> _ {
todo!()
}
fn install_rook_cluster_helm_chart(&self, namespace: String) -> _ {
todo!()
}
fn add_oc_adm_policy(&self, namespace: String) -> _ {
todo!()
}
}