Compare commits

..

6 Commits

Author SHA1 Message Date
a9fe4ab267 fix: cargo fmt
All checks were successful
Run Check Script / check (pull_request) Successful in 1m0s
2025-08-25 13:33:36 -04:00
65cc9befeb mod.rs
Some checks failed
Run Check Script / check (pull_request) Failing after 20s
2025-08-25 13:31:39 -04:00
d456a1f9ee feat: score to validate whether the ceph cluster is healthy 2025-08-25 13:30:32 -04:00
d36c574590 Merge pull request 'feat/inventory_agent' (#119) from feat/inventory_agent into master
Some checks failed
Run Check Script / check (push) Failing after 38s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 5m48s
Reviewed-on: #119
2025-08-22 01:55:52 +00:00
72fb05b5cc fix(inventory_agent) : Agent now retreives correct dmidecode fields, fixed uuid generation which is unacceptable, fixed storage drive parsing, much better error handling, much more strict behavior which also leads to more complete output as missing fields will raise errors unless explicitely optional 2025-08-19 17:56:06 -04:00
6685b05cc5 wip(inventory_agent): Refactoring for better error handling in progress 2025-08-19 17:05:23 -04:00
9 changed files with 742 additions and 314 deletions

5
Cargo.lock generated
View File

@@ -105,7 +105,7 @@ dependencies = [
"futures-core",
"futures-util",
"mio 1.0.4",
"socket2",
"socket2 0.5.10",
"tokio",
"tracing",
]
@@ -167,7 +167,7 @@ dependencies = [
"serde_json",
"serde_urlencoded",
"smallvec",
"socket2",
"socket2 0.5.10",
"time",
"tracing",
"url",
@@ -2178,7 +2178,6 @@ dependencies = [
"serde",
"serde_json",
"sysinfo",
"uuid",
]
[[package]]

View File

@@ -0,0 +1,11 @@
[package]
name = "example_validate_ceph_cluster_health"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true

View File

@@ -0,0 +1,18 @@
use harmony::{
inventory::Inventory,
modules::storage::ceph::ceph_validate_health_score::CephVerifyClusterHealth,
topology::K8sAnywhereTopology,
};
#[tokio::main]
async fn main() {
let ceph_health_score = CephVerifyClusterHealth {
rook_ceph_namespace: "rook-ceph".to_string(),
};
let topology = K8sAnywhereTopology::from_env();
let inventory = Inventory::autoload();
harmony_cli::run(inventory, topology, vec![Box::new(ceph_health_score)], None)
.await
.unwrap();
}

View File

@@ -32,6 +32,7 @@ pub enum InterpretName {
Lamp,
ApplicationMonitoring,
K8sPrometheusCrdAlerting,
CephClusterHealth,
}
impl std::fmt::Display for InterpretName {
@@ -58,6 +59,7 @@ impl std::fmt::Display for InterpretName {
InterpretName::Lamp => f.write_str("LAMP"),
InterpretName::ApplicationMonitoring => f.write_str("ApplicationMonitoring"),
InterpretName::K8sPrometheusCrdAlerting => f.write_str("K8sPrometheusCrdAlerting"),
InterpretName::CephClusterHealth => f.write_str("CephClusterHealth"),
}
}
}

View File

@@ -0,0 +1,136 @@
use std::{sync::Arc, time::Duration};
use async_trait::async_trait;
use log::debug;
use serde::Serialize;
use tokio::time::Instant;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{K8sclient, Topology, k8s::K8sClient},
};
#[derive(Clone, Debug, Serialize)]
pub struct CephVerifyClusterHealth {
pub rook_ceph_namespace: String,
}
impl<T: Topology + K8sclient> Score<T> for CephVerifyClusterHealth {
fn name(&self) -> String {
format!("CephValidateClusterHealth")
}
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(CephVerifyClusterHealthInterpret {
score: self.clone(),
})
}
}
#[derive(Clone, Debug)]
pub struct CephVerifyClusterHealthInterpret {
score: CephVerifyClusterHealth,
}
#[async_trait]
impl<T: Topology + K8sclient> Interpret<T> for CephVerifyClusterHealthInterpret {
async fn execute(
&self,
_inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let client = topology.k8s_client().await.unwrap();
self.verify_ceph_toolbox_exists(client.clone()).await?;
self.validate_ceph_cluster_health(client.clone()).await?;
Ok(Outcome::success("Ceph cluster healthy".to_string()))
}
fn get_name(&self) -> InterpretName {
InterpretName::CephClusterHealth
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}
impl CephVerifyClusterHealthInterpret {
pub async fn verify_ceph_toolbox_exists(
&self,
client: Arc<K8sClient>,
) -> Result<Outcome, InterpretError> {
let toolbox_dep = "rook-ceph-tools".to_string();
match client
.get_deployment(&toolbox_dep, Some(&self.score.rook_ceph_namespace))
.await
{
Ok(Some(deployment)) => {
if let Some(status) = deployment.status {
let ready_count = status.ready_replicas.unwrap_or(0);
if ready_count >= 1 {
return Ok(Outcome::success(format!(
"'{}' is ready with {} replica(s).",
&toolbox_dep, ready_count
)));
} else {
return Err(InterpretError::new(
"ceph-tool-box not ready in cluster".to_string(),
));
}
} else {
Err(InterpretError::new(format!(
"failed to get deployment status {}",
&toolbox_dep
)))
}
}
Ok(None) => Err(InterpretError::new(format!(
"Deployment '{}' not found in namespace '{}'.",
&toolbox_dep, self.score.rook_ceph_namespace
))),
Err(e) => Err(InterpretError::new(format!(
"Failed to query for deployment '{}': {}",
&toolbox_dep, e
))),
}
}
pub async fn validate_ceph_cluster_health(
&self,
client: Arc<K8sClient>,
) -> Result<Outcome, InterpretError> {
debug!("Verifying ceph cluster is in healthy state");
let health = client
.exec_app_capture_output(
"rook-ceph-tools".to_string(),
"app".to_string(),
Some(&self.score.rook_ceph_namespace),
vec!["sh", "-c", "ceph health"],
)
.await?;
if health.contains("HEALTH_OK") {
return Ok(Outcome::success(
"Ceph Cluster in healthy state".to_string(),
));
} else {
Err(InterpretError::new(format!(
"Ceph cluster unhealthy {}",
health
)))
}
}
}

View File

@@ -1 +1,2 @@
pub mod ceph_osd_replacement_score;
pub mod ceph_validate_health_score;

View File

@@ -10,4 +10,3 @@ serde.workspace = true
serde_json.workspace = true
log.workspace = true
env_logger.workspace = true
uuid.workspace = true

File diff suppressed because it is too large Load Diff

View File

@@ -9,8 +9,16 @@ mod hwinfo;
async fn inventory() -> impl Responder {
log::info!("Received inventory request");
let host = PhysicalHost::gather();
log::info!("Inventory data gathered successfully");
actix_web::HttpResponse::Ok().json(host)
match host {
Ok(host) => {
log::info!("Inventory data gathered successfully");
actix_web::HttpResponse::Ok().json(host)
}
Err(error) => {
log::error!("Inventory data gathering FAILED");
actix_web::HttpResponse::InternalServerError().json(error)
}
}
}
#[actix_web::main]