From 52c258704eea8d3beb46c9c1a033c762e1cc77c1 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Thu, 21 May 2026 09:22:48 -0400 Subject: [PATCH 01/19] feat(storage/ceph): add typed Rook-Ceph v1 CRDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce kube::CustomResource-derived Rust types for the four ceph.rook.io/v1 kinds we'll need to drive a Ceph install end-to-end: CephCluster, CephBlockPool, CephFilesystem, CephObjectStore. Shared spec primitives (PoolSpec, ReplicatedSpec, ErasureCodedSpec, FailureDomain, MetadataServerSpec, PlacementSpec, VolumeClaimTemplate) live in crd/shared.rs. These are stand-alone data types with no Score impl yet — follow-up commits will add the operator-install and cluster-apply Scores that consume them. Mirrors the typed-CRD pattern from postgresql/cnpg/crd.rs. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../modules/storage/ceph/crd/block_pool.rs | 53 ++++ .../src/modules/storage/ceph/crd/cluster.rs | 258 ++++++++++++++++++ .../modules/storage/ceph/crd/filesystem.rs | 54 ++++ harmony/src/modules/storage/ceph/crd/mod.rs | 11 + .../modules/storage/ceph/crd/object_store.rs | 74 +++++ .../src/modules/storage/ceph/crd/shared.rs | 117 ++++++++ harmony/src/modules/storage/ceph/mod.rs | 1 + 7 files changed, 568 insertions(+) create mode 100644 harmony/src/modules/storage/ceph/crd/block_pool.rs create mode 100644 harmony/src/modules/storage/ceph/crd/cluster.rs create mode 100644 harmony/src/modules/storage/ceph/crd/filesystem.rs create mode 100644 harmony/src/modules/storage/ceph/crd/mod.rs create mode 100644 harmony/src/modules/storage/ceph/crd/object_store.rs create mode 100644 harmony/src/modules/storage/ceph/crd/shared.rs diff --git a/harmony/src/modules/storage/ceph/crd/block_pool.rs b/harmony/src/modules/storage/ceph/crd/block_pool.rs new file mode 100644 index 00000000..397436a0 --- /dev/null +++ b/harmony/src/modules/storage/ceph/crd/block_pool.rs @@ -0,0 +1,53 @@ +use std::collections::BTreeMap; + +use kube::{CustomResource, api::ObjectMeta}; +use serde::{Deserialize, Serialize}; + +use super::shared::{ErasureCodedSpec, FailureDomain, ReplicatedSpec}; + +#[derive(CustomResource, Deserialize, Serialize, Clone, Debug)] +#[kube( + group = "ceph.rook.io", + version = "v1", + kind = "CephBlockPool", + plural = "cephblockpools", + namespaced = true, + schema = "disabled" +)] +#[serde(rename_all = "camelCase")] +pub struct CephBlockPoolSpec { + #[serde(skip_serializing_if = "Option::is_none")] + pub replicated: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub erasure_coded: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub failure_domain: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub device_class: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub parameters: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub enable_rbd_stats: Option, +} + +impl Default for CephBlockPool { + fn default() -> Self { + Self { + metadata: ObjectMeta::default(), + spec: CephBlockPoolSpec::default(), + } + } +} + +impl Default for CephBlockPoolSpec { + fn default() -> Self { + Self { + replicated: Some(ReplicatedSpec::default()), + erasure_coded: None, + failure_domain: Some(FailureDomain::Host), + device_class: None, + parameters: None, + enable_rbd_stats: None, + } + } +} diff --git a/harmony/src/modules/storage/ceph/crd/cluster.rs b/harmony/src/modules/storage/ceph/crd/cluster.rs new file mode 100644 index 00000000..ec8e29f4 --- /dev/null +++ b/harmony/src/modules/storage/ceph/crd/cluster.rs @@ -0,0 +1,258 @@ +use std::collections::BTreeMap; + +use kube::{CustomResource, api::ObjectMeta}; +use serde::{Deserialize, Serialize}; + +use super::shared::{PlacementSpec, VolumeClaimTemplate}; + +#[derive(CustomResource, Deserialize, Serialize, Clone, Debug)] +#[kube( + group = "ceph.rook.io", + version = "v1", + kind = "CephCluster", + plural = "cephclusters", + namespaced = true, + schema = "disabled" +)] +#[serde(rename_all = "camelCase")] +pub struct CephClusterSpec { + pub ceph_version: CephVersionSpec, + pub data_dir_host_path: String, + pub mon: MonSpec, + pub mgr: MgrSpec, + pub dashboard: DashboardSpec, + pub storage: StorageSpec, + #[serde(skip_serializing_if = "Option::is_none")] + pub network: Option, + #[serde(skip_serializing_if = "BTreeMap::is_empty", default)] + pub placement: BTreeMap, + #[serde(skip_serializing_if = "Option::is_none")] + pub crash_collector: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub log_collector: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub remove_osds_if_out_and_safe_to_remove: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub continue_upgrade_after_checks_even_if_not_healthy: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub skip_upgrade_checks: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub wait_timeout_for_healthy_osd_in_minutes: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub disruption_management: Option, +} + +impl Default for CephCluster { + fn default() -> Self { + Self { + metadata: ObjectMeta::default(), + spec: CephClusterSpec::default(), + } + } +} + +impl Default for CephClusterSpec { + fn default() -> Self { + Self { + ceph_version: CephVersionSpec::default(), + data_dir_host_path: "/var/lib/rook".to_string(), + mon: MonSpec::default(), + mgr: MgrSpec::default(), + dashboard: DashboardSpec::default(), + storage: StorageSpec::default(), + network: None, + placement: BTreeMap::new(), + crash_collector: None, + log_collector: None, + remove_osds_if_out_and_safe_to_remove: None, + continue_upgrade_after_checks_even_if_not_healthy: None, + skip_upgrade_checks: None, + wait_timeout_for_healthy_osd_in_minutes: None, + disruption_management: None, + } + } +} + +#[derive(Deserialize, Serialize, Clone, Debug)] +#[serde(rename_all = "camelCase")] +pub struct CephVersionSpec { + pub image: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub allow_unsupported: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub image_pull_policy: Option, +} + +impl Default for CephVersionSpec { + fn default() -> Self { + Self { + image: "quay.io/ceph/ceph:v19.2.3".to_string(), + allow_unsupported: Some(false), + image_pull_policy: Some("IfNotPresent".to_string()), + } + } +} + +#[derive(Deserialize, Serialize, Clone, Debug)] +#[serde(rename_all = "camelCase")] +pub struct MonSpec { + pub count: u32, + pub allow_multiple_per_node: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub volume_claim_template: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub failure_domain_label: Option, +} + +impl Default for MonSpec { + fn default() -> Self { + Self { + count: 3, + allow_multiple_per_node: false, + volume_claim_template: None, + failure_domain_label: None, + } + } +} + +#[derive(Deserialize, Serialize, Clone, Debug)] +#[serde(rename_all = "camelCase")] +pub struct MgrSpec { + pub count: u32, + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub modules: Vec, +} + +impl Default for MgrSpec { + fn default() -> Self { + Self { + count: 2, + modules: vec![ModuleSpec { + name: "pg_autoscaler".to_string(), + enabled: true, + }], + } + } +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct ModuleSpec { + pub name: String, + pub enabled: bool, +} + +#[derive(Deserialize, Serialize, Clone, Debug)] +#[serde(rename_all = "camelCase")] +pub struct DashboardSpec { + pub enabled: bool, + pub ssl: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub port: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub url_prefix: Option, +} + +impl Default for DashboardSpec { + fn default() -> Self { + Self { + enabled: true, + ssl: true, + port: Some(8443), + url_prefix: None, + } + } +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct StorageSpec { + pub use_all_nodes: bool, + pub use_all_devices: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub device_filter: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub device_path_filter: Option, + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub nodes: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub config: Option>, + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub storage_class_device_sets: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub allow_device_class_update: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub allow_osd_crush_weight_update: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct NodeSpec { + pub name: String, + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub devices: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub config: Option>, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct DeviceSpec { + pub name: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub config: Option>, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct StorageClassDeviceSet { + pub name: String, + pub count: u32, + pub portable: bool, + pub volume_claim_templates: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub placement: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct NetworkSpec { + #[serde(skip_serializing_if = "Option::is_none")] + pub provider: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub host_network: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub dual_stack: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ipv4: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ipv6: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct CrashCollectorSpec { + pub disable: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub days_to_retain: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct LogCollectorSpec { + pub enabled: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub periodicity: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub max_log_size: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct DisruptionManagementSpec { + pub manage_pod_budgets: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub osd_maintenance_timeout: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub pg_health_check_timeout: Option, +} diff --git a/harmony/src/modules/storage/ceph/crd/filesystem.rs b/harmony/src/modules/storage/ceph/crd/filesystem.rs new file mode 100644 index 00000000..5f87a7f7 --- /dev/null +++ b/harmony/src/modules/storage/ceph/crd/filesystem.rs @@ -0,0 +1,54 @@ +use kube::{CustomResource, api::ObjectMeta}; +use serde::{Deserialize, Serialize}; + +use super::shared::{MetadataServerSpec, NamedPoolSpec, PoolSpec, ReplicatedSpec}; + +#[derive(CustomResource, Deserialize, Serialize, Clone, Debug)] +#[kube( + group = "ceph.rook.io", + version = "v1", + kind = "CephFilesystem", + plural = "cephfilesystems", + namespaced = true, + schema = "disabled" +)] +#[serde(rename_all = "camelCase")] +pub struct CephFilesystemSpec { + pub metadata_pool: PoolSpec, + pub data_pools: Vec, + pub metadata_server: MetadataServerSpec, + #[serde(skip_serializing_if = "Option::is_none")] + pub preserve_filesystem_on_delete: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub preserve_pools_on_delete: Option, +} + +impl Default for CephFilesystem { + fn default() -> Self { + Self { + metadata: ObjectMeta::default(), + spec: CephFilesystemSpec::default(), + } + } +} + +impl Default for CephFilesystemSpec { + fn default() -> Self { + Self { + metadata_pool: PoolSpec { + replicated: Some(ReplicatedSpec::default()), + ..PoolSpec::default() + }, + data_pools: vec![NamedPoolSpec { + name: "data0".to_string(), + spec: PoolSpec { + replicated: Some(ReplicatedSpec::default()), + ..PoolSpec::default() + }, + }], + metadata_server: MetadataServerSpec::default(), + preserve_filesystem_on_delete: Some(true), + preserve_pools_on_delete: Some(false), + } + } +} diff --git a/harmony/src/modules/storage/ceph/crd/mod.rs b/harmony/src/modules/storage/ceph/crd/mod.rs new file mode 100644 index 00000000..5d49edcf --- /dev/null +++ b/harmony/src/modules/storage/ceph/crd/mod.rs @@ -0,0 +1,11 @@ +pub mod block_pool; +pub mod cluster; +pub mod filesystem; +pub mod object_store; +pub mod shared; + +pub use block_pool::*; +pub use cluster::*; +pub use filesystem::*; +pub use object_store::*; +pub use shared::*; diff --git a/harmony/src/modules/storage/ceph/crd/object_store.rs b/harmony/src/modules/storage/ceph/crd/object_store.rs new file mode 100644 index 00000000..fc805099 --- /dev/null +++ b/harmony/src/modules/storage/ceph/crd/object_store.rs @@ -0,0 +1,74 @@ +use kube::{CustomResource, api::ObjectMeta}; +use serde::{Deserialize, Serialize}; + +use super::shared::{PlacementSpec, PoolSpec, ReplicatedSpec}; + +#[derive(CustomResource, Deserialize, Serialize, Clone, Debug)] +#[kube( + group = "ceph.rook.io", + version = "v1", + kind = "CephObjectStore", + plural = "cephobjectstores", + namespaced = true, + schema = "disabled" +)] +#[serde(rename_all = "camelCase")] +pub struct CephObjectStoreSpec { + pub metadata_pool: PoolSpec, + pub data_pool: PoolSpec, + pub gateway: GatewaySpec, + #[serde(skip_serializing_if = "Option::is_none")] + pub preserve_pools_on_delete: Option, +} + +impl Default for CephObjectStore { + fn default() -> Self { + Self { + metadata: ObjectMeta::default(), + spec: CephObjectStoreSpec::default(), + } + } +} + +impl Default for CephObjectStoreSpec { + fn default() -> Self { + Self { + metadata_pool: PoolSpec { + replicated: Some(ReplicatedSpec::default()), + ..PoolSpec::default() + }, + data_pool: PoolSpec { + replicated: Some(ReplicatedSpec::default()), + ..PoolSpec::default() + }, + gateway: GatewaySpec::default(), + preserve_pools_on_delete: Some(false), + } + } +} + +#[derive(Deserialize, Serialize, Clone, Debug)] +#[serde(rename_all = "camelCase")] +pub struct GatewaySpec { + /// Non-secure gateway port. Defaults to 8080 to dodge OKD's <1024 bind restriction. + pub port: u16, + #[serde(skip_serializing_if = "Option::is_none")] + pub secure_port: Option, + pub instances: u32, + #[serde(skip_serializing_if = "Option::is_none")] + pub placement: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ssl_certificate_ref: Option, +} + +impl Default for GatewaySpec { + fn default() -> Self { + Self { + port: 8080, + secure_port: None, + instances: 1, + placement: None, + ssl_certificate_ref: None, + } + } +} diff --git a/harmony/src/modules/storage/ceph/crd/shared.rs b/harmony/src/modules/storage/ceph/crd/shared.rs new file mode 100644 index 00000000..dd6995c6 --- /dev/null +++ b/harmony/src/modules/storage/ceph/crd/shared.rs @@ -0,0 +1,117 @@ +use std::collections::BTreeMap; + +use k8s_openapi::api::core::v1::{PersistentVolumeClaim, Toleration}; +use serde::{Deserialize, Serialize}; + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct PoolSpec { + #[serde(skip_serializing_if = "Option::is_none")] + pub replicated: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub erasure_coded: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub failure_domain: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub device_class: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub parameters: Option>, +} + +#[derive(Deserialize, Serialize, Clone, Debug)] +#[serde(rename_all = "camelCase")] +pub struct ReplicatedSpec { + pub size: u32, + #[serde(skip_serializing_if = "Option::is_none")] + pub require_safe_replica_size: Option, +} + +impl Default for ReplicatedSpec { + fn default() -> Self { + Self { + size: 3, + require_safe_replica_size: Some(true), + } + } +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct ErasureCodedSpec { + pub data_chunks: u32, + pub coding_chunks: u32, +} + +#[derive(Deserialize, Serialize, Clone, Debug)] +#[serde(rename_all = "lowercase")] +pub enum FailureDomain { + Osd, + Host, + Rack, + Zone, + Region, +} + +impl Default for FailureDomain { + fn default() -> Self { + Self::Host + } +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct NamedPoolSpec { + pub name: String, + #[serde(flatten)] + pub spec: PoolSpec, +} + +#[derive(Deserialize, Serialize, Clone, Debug)] +#[serde(rename_all = "camelCase")] +pub struct MetadataServerSpec { + pub active_count: u32, + pub active_standby: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub placement: Option, +} + +impl Default for MetadataServerSpec { + fn default() -> Self { + Self { + active_count: 1, + active_standby: true, + placement: None, + } + } +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct PlacementSpec { + #[serde(skip_serializing_if = "Option::is_none")] + pub node_affinity: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub pod_affinity: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub pod_anti_affinity: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub tolerations: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub topology_spread_constraints: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct VolumeClaimTemplate { + pub metadata: k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta, + pub spec: k8s_openapi::api::core::v1::PersistentVolumeClaimSpec, +} + +impl From for VolumeClaimTemplate { + fn from(pvc: PersistentVolumeClaim) -> Self { + Self { + metadata: pvc.metadata, + spec: pvc.spec.unwrap_or_default(), + } + } +} diff --git a/harmony/src/modules/storage/ceph/mod.rs b/harmony/src/modules/storage/ceph/mod.rs index 0a3dcecf..e513d447 100644 --- a/harmony/src/modules/storage/ceph/mod.rs +++ b/harmony/src/modules/storage/ceph/mod.rs @@ -1,2 +1,3 @@ pub mod ceph_remove_osd_score; pub mod ceph_validate_health_score; +pub mod crd; -- 2.39.5 From 2fec85229442c5d4c3a21ed7ff6600fd392005fb Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Thu, 21 May 2026 09:23:31 -0400 Subject: [PATCH 02/19] feat(storage/ceph): add RookCephOperator + RookCephCluster install Scores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the install gap on the Ceph module: previously the storage/ceph/ module only contained Day-2 Scores (CephVerifyClusterHealth, CephRemoveOsd) which assumed a Rook-Ceph cluster already existed in the rook-ceph namespace. There was no Score that actually installed one. Adds two Scores, mirroring the CNPG split-architecture: - RookCephOperatorScore wraps HelmChartScore against the upstream charts.rook.io/release repo. Rook's docs explicitly recommend Helm on OpenShift because the chart auto-creates the SecurityContextConstraints OKD requires. default_okd() sets hostpathRequiresPrivileged=true (mandatory under OKD's SELinux restricted policy) and enables both the RBD and CephFS CSI drivers. - RookCephClusterScore applies the typed CephCluster, CephBlockPool, CephFilesystem, and CephObjectStore CRs added in the previous commit via K8sResourceScore::single, plus auto-generates the matching rook-ceph-block-* (RBD) and rook-cephfs-* (CephFS) StorageClass resources. default_okd() ships a 3-mon / 2-mgr / SSL-dashboard / size=3-replicated topology with useAllNodes+useAllDevices. ODF (ocs.openshift.io/StorageCluster) was rejected: it requires the registry.redhat.io pull-secret and isn't supported on OKD. The CRDs themselves are installed transparently by the operator's Helm chart — re-typing those in Rust would mean maintaining two sources of truth for the same OpenAPI schema, so we don't. Type-safety lives at the user-facing layer (CephCluster/Block/FS/Object specs), which is where the value sits. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/modules/storage/ceph/cluster_score.rs | 304 ++++++++++++++++++ harmony/src/modules/storage/ceph/mod.rs | 5 + .../modules/storage/ceph/operator_score.rs | 120 +++++++ 3 files changed, 429 insertions(+) create mode 100644 harmony/src/modules/storage/ceph/cluster_score.rs create mode 100644 harmony/src/modules/storage/ceph/operator_score.rs diff --git a/harmony/src/modules/storage/ceph/cluster_score.rs b/harmony/src/modules/storage/ceph/cluster_score.rs new file mode 100644 index 00000000..7f35cd35 --- /dev/null +++ b/harmony/src/modules/storage/ceph/cluster_score.rs @@ -0,0 +1,304 @@ +use std::collections::BTreeMap; + +use async_trait::async_trait; +use k8s_openapi::api::storage::v1::StorageClass; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; +use log::info; +use serde::Serialize; + +use crate::data::Version; +use crate::interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}; +use crate::inventory::Inventory; +use crate::modules::k8s::resource::K8sResourceScore; +use crate::score::Score; +use crate::topology::{K8sclient, Topology}; +use harmony_types::id::Id; + +use super::crd::{ + CephBlockPool, CephBlockPoolSpec, CephCluster, CephFilesystem, CephObjectStore, FailureDomain, + ReplicatedSpec, +}; + +/// Deploys a typed Rook-Ceph cluster: `CephCluster` + pools + filesystems + +/// object stores + their consumer `StorageClass`es. Assumes the Rook operator +/// is already installed (use [`super::RookCephOperatorScore`]). +/// +/// Each Custom Resource is a real Rust type (`kube::CustomResource`-derived), +/// applied via `K8sResourceScore::single` — the same apply path used by CNPG. +/// +/// # Ordering +/// CRs are applied in this order: +/// 1. `CephCluster` (operator must reconcile this to Ready before pools register) +/// 2. `CephBlockPool` resources + their RBD `StorageClass`es +/// 3. `CephFilesystem` resources + their CephFS `StorageClass`es +/// 4. `CephObjectStore` resources +/// +/// The operator reconciles each in the background; the apply itself succeeds +/// as soon as the CR is accepted by the API server. +#[derive(Debug, Clone, Serialize)] +pub struct RookCephClusterScore { + pub namespace: String, + pub cluster: CephCluster, + pub block_pools: Vec, + pub filesystems: Vec, + pub object_stores: Vec, + /// Mark the first block-pool StorageClass as the cluster default. + pub default_block_pool_storage_class: bool, +} + +impl RookCephClusterScore { + /// OKD-friendly defaults: 3-mon, 2-mgr, dashboard on 8443/ssl, replicated + /// pools size=3, useAllNodes + useAllDevices, dataDirHostPath=/var/lib/rook, + /// one CephBlockPool named "replicapool", no filesystem or object store. + pub fn default_okd(namespace: impl Into) -> Self { + let ns = namespace.into(); + + let mut cluster = CephCluster::default(); + cluster.metadata.name = Some(ns.clone()); + cluster.metadata.namespace = Some(ns.clone()); + cluster.spec.storage.use_all_nodes = true; + cluster.spec.storage.use_all_devices = true; + + let block_pool = CephBlockPool { + metadata: ObjectMeta { + name: Some("replicapool".to_string()), + namespace: Some(ns.clone()), + ..ObjectMeta::default() + }, + spec: CephBlockPoolSpec { + replicated: Some(ReplicatedSpec::default()), + failure_domain: Some(FailureDomain::Host), + ..CephBlockPoolSpec::default() + }, + }; + + Self { + namespace: ns, + cluster, + block_pools: vec![block_pool], + filesystems: vec![], + object_stores: vec![], + default_block_pool_storage_class: true, + } + } +} + +impl Score for RookCephClusterScore { + fn create_interpret(&self) -> Box> { + Box::new(RookCephClusterInterpret { + score: self.clone(), + }) + } + + fn name(&self) -> String { + format!("RookCephClusterScore({})", self.namespace) + } +} + +#[derive(Debug)] +struct RookCephClusterInterpret { + score: RookCephClusterScore, +} + +#[async_trait] +impl Interpret for RookCephClusterInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + let ns = self.score.namespace.clone(); + + info!("[Rook-Ceph] Applying CephCluster '{}'", ns); + K8sResourceScore::single(self.score.cluster.clone(), Some(ns.clone())) + .interpret(inventory, topology) + .await?; + + for (idx, pool) in self.score.block_pools.iter().enumerate() { + let pool_name = pool + .metadata + .name + .clone() + .unwrap_or_else(|| format!("pool-{idx}")); + info!("[Rook-Ceph] Applying CephBlockPool '{}'", pool_name); + K8sResourceScore::single(pool.clone(), Some(ns.clone())) + .interpret(inventory, topology) + .await?; + + let sc = rbd_storage_class( + &pool_name, + &ns, + idx == 0 && self.score.default_block_pool_storage_class, + ); + info!( + "[Rook-Ceph] Applying StorageClass '{}' (RBD on pool '{}')", + sc.metadata.name.as_deref().unwrap_or("?"), + pool_name + ); + K8sResourceScore::single(sc, None) + .interpret(inventory, topology) + .await?; + } + + for fs in self.score.filesystems.iter() { + let fs_name = fs + .metadata + .name + .clone() + .unwrap_or_else(|| "cephfs".to_string()); + info!("[Rook-Ceph] Applying CephFilesystem '{}'", fs_name); + K8sResourceScore::single(fs.clone(), Some(ns.clone())) + .interpret(inventory, topology) + .await?; + + let sc = cephfs_storage_class(&fs_name, &ns); + info!( + "[Rook-Ceph] Applying StorageClass '{}' (CephFS on filesystem '{}')", + sc.metadata.name.as_deref().unwrap_or("?"), + fs_name + ); + K8sResourceScore::single(sc, None) + .interpret(inventory, topology) + .await?; + } + + for store in self.score.object_stores.iter() { + let store_name = store + .metadata + .name + .clone() + .unwrap_or_else(|| "object-store".to_string()); + info!("[Rook-Ceph] Applying CephObjectStore '{}'", store_name); + K8sResourceScore::single(store.clone(), Some(ns.clone())) + .interpret(inventory, topology) + .await?; + } + + Ok(Outcome::success(format!( + "Applied Rook-Ceph cluster '{}' ({} block-pool(s), {} filesystem(s), {} object-store(s))", + ns, + self.score.block_pools.len(), + self.score.filesystems.len(), + self.score.object_stores.len(), + ))) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("RookCephClusterInterpret") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} + +fn rbd_storage_class(pool_name: &str, ns: &str, is_default: bool) -> StorageClass { + let mut params = BTreeMap::new(); + params.insert("clusterID".to_string(), ns.to_string()); + params.insert("pool".to_string(), pool_name.to_string()); + params.insert("imageFormat".to_string(), "2".to_string()); + params.insert("imageFeatures".to_string(), "layering".to_string()); + params.insert( + "csi.storage.k8s.io/provisioner-secret-name".to_string(), + "rook-csi-rbd-provisioner".to_string(), + ); + params.insert( + "csi.storage.k8s.io/provisioner-secret-namespace".to_string(), + ns.to_string(), + ); + params.insert( + "csi.storage.k8s.io/controller-expand-secret-name".to_string(), + "rook-csi-rbd-provisioner".to_string(), + ); + params.insert( + "csi.storage.k8s.io/controller-expand-secret-namespace".to_string(), + ns.to_string(), + ); + params.insert( + "csi.storage.k8s.io/node-stage-secret-name".to_string(), + "rook-csi-rbd-node".to_string(), + ); + params.insert( + "csi.storage.k8s.io/node-stage-secret-namespace".to_string(), + ns.to_string(), + ); + params.insert("csi.storage.k8s.io/fstype".to_string(), "ext4".to_string()); + + let mut annotations = BTreeMap::new(); + if is_default { + annotations.insert( + "storageclass.kubernetes.io/is-default-class".to_string(), + "true".to_string(), + ); + } + + StorageClass { + metadata: ObjectMeta { + name: Some(format!("rook-ceph-block-{pool_name}")), + annotations: if annotations.is_empty() { + None + } else { + Some(annotations) + }, + ..ObjectMeta::default() + }, + provisioner: format!("{ns}.rbd.csi.ceph.com"), + parameters: Some(params), + reclaim_policy: Some("Delete".to_string()), + allow_volume_expansion: Some(true), + volume_binding_mode: Some("Immediate".to_string()), + ..StorageClass::default() + } +} + +fn cephfs_storage_class(fs_name: &str, ns: &str) -> StorageClass { + let mut params = BTreeMap::new(); + params.insert("clusterID".to_string(), ns.to_string()); + params.insert("fsName".to_string(), fs_name.to_string()); + params.insert("pool".to_string(), format!("{fs_name}-data0")); + params.insert( + "csi.storage.k8s.io/provisioner-secret-name".to_string(), + "rook-csi-cephfs-provisioner".to_string(), + ); + params.insert( + "csi.storage.k8s.io/provisioner-secret-namespace".to_string(), + ns.to_string(), + ); + params.insert( + "csi.storage.k8s.io/controller-expand-secret-name".to_string(), + "rook-csi-cephfs-provisioner".to_string(), + ); + params.insert( + "csi.storage.k8s.io/controller-expand-secret-namespace".to_string(), + ns.to_string(), + ); + params.insert( + "csi.storage.k8s.io/node-stage-secret-name".to_string(), + "rook-csi-cephfs-node".to_string(), + ); + params.insert( + "csi.storage.k8s.io/node-stage-secret-namespace".to_string(), + ns.to_string(), + ); + + StorageClass { + metadata: ObjectMeta { + name: Some(format!("rook-cephfs-{fs_name}")), + ..ObjectMeta::default() + }, + provisioner: format!("{ns}.cephfs.csi.ceph.com"), + parameters: Some(params), + reclaim_policy: Some("Delete".to_string()), + allow_volume_expansion: Some(true), + volume_binding_mode: Some("Immediate".to_string()), + ..StorageClass::default() + } +} diff --git a/harmony/src/modules/storage/ceph/mod.rs b/harmony/src/modules/storage/ceph/mod.rs index e513d447..e7653d9a 100644 --- a/harmony/src/modules/storage/ceph/mod.rs +++ b/harmony/src/modules/storage/ceph/mod.rs @@ -1,3 +1,8 @@ pub mod ceph_remove_osd_score; pub mod ceph_validate_health_score; +pub mod cluster_score; pub mod crd; +pub mod operator_score; + +pub use cluster_score::*; +pub use operator_score::*; diff --git a/harmony/src/modules/storage/ceph/operator_score.rs b/harmony/src/modules/storage/ceph/operator_score.rs new file mode 100644 index 00000000..566b0eef --- /dev/null +++ b/harmony/src/modules/storage/ceph/operator_score.rs @@ -0,0 +1,120 @@ +use std::collections::HashMap; +use std::str::FromStr; + +use non_blank_string_rs::NonBlankString; +use serde::Serialize; + +use crate::interpret::Interpret; +use crate::modules::helm::chart::{HelmChartScore, HelmRepository}; +use crate::score::Score; +use crate::topology::{HelmCommand, Topology}; +use harmony_macros::hurl; + +/// Install the Rook-Ceph operator via its upstream Helm chart. +/// +/// The Rook docs explicitly recommend Helm on OpenShift/OKD because the chart +/// automatically creates the `SecurityContextConstraints` resources OKD requires. +/// This Score wraps `HelmChartScore` against `https://charts.rook.io/release`. +/// +/// The chart installs: +/// - The `rook-ceph` operator Deployment +/// - All `ceph.rook.io/v1` CRDs (CephCluster, CephBlockPool, CephFilesystem, CephObjectStore, ...) +/// - RBAC (ServiceAccounts, Roles, ClusterRoles, RoleBindings, ClusterRoleBindings) +/// - OpenShift SCC bindings when `hostpath_requires_privileged` is true +/// +/// The CRs that consume these CRDs are deployed separately by `RookCephClusterScore` +/// (typed Rust structs applied via `K8sResourceScore`), preserving compile-time +/// type-safety on the user-facing surface. +/// +/// # OKD requirements +/// - `hostpath_requires_privileged` must be `true` — OpenShift's SELinux +/// restricted-set blocks hostPath writes otherwise. +/// +/// # Usage +/// ```ignore +/// use harmony::modules::storage::ceph::RookCephOperatorScore; +/// let score = RookCephOperatorScore::default_okd(); +/// ``` +#[derive(Debug, Clone, Serialize)] +pub struct RookCephOperatorScore { + pub namespace: String, + pub chart_version: Option, + pub hostpath_requires_privileged: bool, + pub enable_rbd_driver: bool, + pub enable_cephfs_driver: bool, +} + +impl RookCephOperatorScore { + /// OKD-friendly defaults: `rook-ceph` namespace, both CSI drivers enabled, + /// hostPath privileged mode on. Chart version unpinned (uses latest). + pub fn default_okd() -> Self { + Self { + namespace: "rook-ceph".to_string(), + chart_version: None, + hostpath_requires_privileged: true, + enable_rbd_driver: true, + enable_cephfs_driver: true, + } + } + + /// K3s / vanilla-K8s defaults — same as OKD but without the privileged flag. + pub fn default_k8s() -> Self { + Self { + hostpath_requires_privileged: false, + ..Self::default_okd() + } + } +} + +impl Default for RookCephOperatorScore { + fn default() -> Self { + Self::default_okd() + } +} + +impl Score for RookCephOperatorScore { + fn create_interpret(&self) -> Box> { + let mut values: HashMap = HashMap::new(); + values.insert( + NonBlankString::from_str("csi.enableRbdDriver").unwrap(), + self.enable_rbd_driver.to_string(), + ); + values.insert( + NonBlankString::from_str("csi.enableCephfsDriver").unwrap(), + self.enable_cephfs_driver.to_string(), + ); + if self.hostpath_requires_privileged { + values.insert( + NonBlankString::from_str("hostpathRequiresPrivileged").unwrap(), + "true".to_string(), + ); + } + + let chart_version = self + .chart_version + .as_ref() + .map(|v| NonBlankString::from_str(v).expect("chart_version must be non-blank")); + + let helm_score = HelmChartScore { + namespace: Some(NonBlankString::from_str(&self.namespace).unwrap()), + release_name: NonBlankString::from_str("rook-ceph").unwrap(), + chart_name: NonBlankString::from_str("rook-release/rook-ceph").unwrap(), + chart_version, + values_overrides: Some(values), + values_yaml: None, + create_namespace: true, + install_only: true, + repository: Some(HelmRepository::new( + "rook-release".to_string(), + hurl!("https://charts.rook.io/release"), + true, + )), + }; + + helm_score.create_interpret() + } + + fn name(&self) -> String { + format!("RookCephOperatorScore({})", self.namespace) + } +} -- 2.39.5 From 8a3a6e71071b9ed00abb789f423cc81a00a922a7 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Thu, 21 May 2026 09:23:43 -0400 Subject: [PATCH 03/19] feat(examples): add install_rook_ceph end-to-end example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minimal example wiring the new RookCephOperatorScore + RookCephClusterScore against K8sAnywhereTopology, then chaining the existing CephVerifyClusterHealth Day-2 Score to close the install→verify loop. Picked up automatically by the examples/* workspace wildcard. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 9 +++++++++ examples/install_rook_ceph/Cargo.toml | 12 ++++++++++++ examples/install_rook_ceph/src/main.rs | 27 ++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 examples/install_rook_ceph/Cargo.toml create mode 100644 examples/install_rook_ceph/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 97904ddf..93a7555c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2685,6 +2685,15 @@ dependencies = [ "url", ] +[[package]] +name = "example-install-rook-ceph" +version = "0.1.0" +dependencies = [ + "harmony", + "harmony_cli", + "tokio", +] + [[package]] name = "example-k8s-drain-node" version = "0.1.0" diff --git a/examples/install_rook_ceph/Cargo.toml b/examples/install_rook_ceph/Cargo.toml new file mode 100644 index 00000000..a49d2f6c --- /dev/null +++ b/examples/install_rook_ceph/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "example-install-rook-ceph" +edition = "2024" +version.workspace = true +readme.workspace = true +license.workspace = true +publish = false + +[dependencies] +harmony = { path = "../../harmony" } +harmony_cli = { path = "../../harmony_cli" } +tokio = { workspace = true } diff --git a/examples/install_rook_ceph/src/main.rs b/examples/install_rook_ceph/src/main.rs new file mode 100644 index 00000000..3a08b578 --- /dev/null +++ b/examples/install_rook_ceph/src/main.rs @@ -0,0 +1,27 @@ +use harmony::{ + inventory::Inventory, + modules::storage::ceph::{ + RookCephClusterScore, RookCephOperatorScore, + ceph_validate_health_score::CephVerifyClusterHealth, + }, + score::Score, + topology::K8sAnywhereTopology, +}; + +#[tokio::main] +async fn main() { + let topology = K8sAnywhereTopology::from_env(); + let inventory = Inventory::autoload(); + + let scores: Vec>> = vec![ + Box::new(RookCephOperatorScore::default_okd()), + Box::new(RookCephClusterScore::default_okd("rook-ceph")), + Box::new(CephVerifyClusterHealth { + rook_ceph_namespace: "rook-ceph".to_string(), + }), + ]; + + harmony_cli::run(inventory, topology, scores, None) + .await + .unwrap(); +} -- 2.39.5 From d7315812ca12c91e4bdefa20a24ac5b2894ae2cc Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 06:55:12 -0400 Subject: [PATCH 04/19] feat(examples): deploy CephObjectStore (S3) in install_rook_ceph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the example to stand up a 2-instance RGW gateway alongside the block pool. The CephObjectStore CR uses the default replicated metadata and data pools (size=3) and Rook's port 8080 to dodge OKD's <1024 bind restriction. The operator-created Service exposes the S3 endpoint at rook-ceph-rgw-ceph-objectstore.rook-ceph.svc.cluster.local:8080. Adds k8s-openapi to the example's deps for ObjectMeta — needed now that the example builds a CR directly instead of relying solely on default_okd(). Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 1 + examples/install_rook_ceph/Cargo.toml | 1 + examples/install_rook_ceph/src/main.rs | 26 ++++++++++++++++++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 93a7555c..11476bec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2691,6 +2691,7 @@ version = "0.1.0" dependencies = [ "harmony", "harmony_cli", + "k8s-openapi", "tokio", ] diff --git a/examples/install_rook_ceph/Cargo.toml b/examples/install_rook_ceph/Cargo.toml index a49d2f6c..b79d88b6 100644 --- a/examples/install_rook_ceph/Cargo.toml +++ b/examples/install_rook_ceph/Cargo.toml @@ -9,4 +9,5 @@ publish = false [dependencies] harmony = { path = "../../harmony" } harmony_cli = { path = "../../harmony_cli" } +k8s-openapi = { workspace = true } tokio = { workspace = true } diff --git a/examples/install_rook_ceph/src/main.rs b/examples/install_rook_ceph/src/main.rs index 3a08b578..f23fc65a 100644 --- a/examples/install_rook_ceph/src/main.rs +++ b/examples/install_rook_ceph/src/main.rs @@ -3,21 +3,43 @@ use harmony::{ modules::storage::ceph::{ RookCephClusterScore, RookCephOperatorScore, ceph_validate_health_score::CephVerifyClusterHealth, + crd::{CephObjectStore, CephObjectStoreSpec, GatewaySpec}, }, score::Score, topology::K8sAnywhereTopology, }; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; + +const NAMESPACE: &str = "rook-ceph"; #[tokio::main] async fn main() { let topology = K8sAnywhereTopology::from_env(); let inventory = Inventory::autoload(); + let object_store = CephObjectStore { + metadata: ObjectMeta { + name: Some("ceph-objectstore".to_string()), + namespace: Some(NAMESPACE.to_string()), + ..ObjectMeta::default() + }, + spec: CephObjectStoreSpec { + gateway: GatewaySpec { + instances: 2, + ..GatewaySpec::default() + }, + ..CephObjectStoreSpec::default() + }, + }; + + let mut cluster = RookCephClusterScore::default_okd(NAMESPACE); + cluster.object_stores = vec![object_store]; + let scores: Vec>> = vec![ Box::new(RookCephOperatorScore::default_okd()), - Box::new(RookCephClusterScore::default_okd("rook-ceph")), + Box::new(cluster), Box::new(CephVerifyClusterHealth { - rook_ceph_namespace: "rook-ceph".to_string(), + rook_ceph_namespace: NAMESPACE.to_string(), }), ]; -- 2.39.5 From 9b0481ad08417784951d1881248825df6ec4cbb6 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:00:43 -0400 Subject: [PATCH 05/19] feat(examples): expose S3 endpoint via edge-TLS Ingress in install_rook_ceph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a Kubernetes Ingress in front of the RGW Service (port 8080) with TLS terminated at the edge — the backend RGW stays on HTTP, only reachable intra-cluster. Hostname and TLS Secret name are configurable via the S3_HOSTNAME and S3_TLS_SECRET consts at the top of main.rs. The TLS Secret must exist in the rook-ceph namespace before running the example (e.g. created by cert-manager or a manual `kubectl create secret tls`). The example does not create it — cert material can't be shipped in a repo. Built as a raw k8s_openapi::Ingress applied via K8sResourceScore::single because harmony's K8sIngressScore currently emits HTTP-only Ingresses and doesn't expose a TLS field. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/install_rook_ceph/src/main.rs | 69 ++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/examples/install_rook_ceph/src/main.rs b/examples/install_rook_ceph/src/main.rs index f23fc65a..69d0f5d0 100644 --- a/examples/install_rook_ceph/src/main.rs +++ b/examples/install_rook_ceph/src/main.rs @@ -1,16 +1,34 @@ use harmony::{ inventory::Inventory, - modules::storage::ceph::{ - RookCephClusterScore, RookCephOperatorScore, - ceph_validate_health_score::CephVerifyClusterHealth, - crd::{CephObjectStore, CephObjectStoreSpec, GatewaySpec}, + modules::{ + k8s::resource::K8sResourceScore, + storage::ceph::{ + RookCephClusterScore, RookCephOperatorScore, + ceph_validate_health_score::CephVerifyClusterHealth, + crd::{CephObjectStore, CephObjectStoreSpec, GatewaySpec}, + }, }, score::Score, topology::K8sAnywhereTopology, }; +use k8s_openapi::api::networking::v1::{ + HTTPIngressPath, HTTPIngressRuleValue, Ingress, IngressBackend, IngressRule, + IngressServiceBackend, IngressSpec, IngressTLS, ServiceBackendPort, +}; use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; const NAMESPACE: &str = "rook-ceph"; +const OBJECTSTORE_NAME: &str = "ceph-objectstore"; + +/// External S3 hostname for the Ingress. Edit this to match the DNS name you'll +/// point at your cluster's ingress LB. +const S3_HOSTNAME: &str = "s3.example.com"; + +/// TLS Secret holding the cert+key for `S3_HOSTNAME`. The Secret must exist in +/// `NAMESPACE` *before* this example runs (e.g. via cert-manager, or a manual +/// `kubectl create secret tls`). The example does not create it — real cert +/// material can't be shipped in a repo. +const S3_TLS_SECRET: &str = "ceph-objectstore-tls"; #[tokio::main] async fn main() { @@ -19,7 +37,7 @@ async fn main() { let object_store = CephObjectStore { metadata: ObjectMeta { - name: Some("ceph-objectstore".to_string()), + name: Some(OBJECTSTORE_NAME.to_string()), namespace: Some(NAMESPACE.to_string()), ..ObjectMeta::default() }, @@ -35,12 +53,53 @@ async fn main() { let mut cluster = RookCephClusterScore::default_okd(NAMESPACE); cluster.object_stores = vec![object_store]; + // Edge-TLS Ingress in front of the RGW Service. Rook creates the Service + // as `rook-ceph-rgw-` on the gateway port (8080 here). + let s3_ingress = Ingress { + metadata: ObjectMeta { + name: Some(format!("{OBJECTSTORE_NAME}-s3")), + namespace: Some(NAMESPACE.to_string()), + ..ObjectMeta::default() + }, + spec: Some(IngressSpec { + rules: Some(vec![IngressRule { + host: Some(S3_HOSTNAME.to_string()), + http: Some(HTTPIngressRuleValue { + paths: vec![HTTPIngressPath { + path: Some("/".to_string()), + path_type: "Prefix".to_string(), + backend: IngressBackend { + service: Some(IngressServiceBackend { + name: format!("rook-ceph-rgw-{OBJECTSTORE_NAME}"), + port: Some(ServiceBackendPort { + number: Some(8080), + ..ServiceBackendPort::default() + }), + }), + ..IngressBackend::default() + }, + }], + }), + }]), + tls: Some(vec![IngressTLS { + hosts: Some(vec![S3_HOSTNAME.to_string()]), + secret_name: Some(S3_TLS_SECRET.to_string()), + }]), + ..IngressSpec::default() + }), + ..Ingress::default() + }; + let scores: Vec>> = vec![ Box::new(RookCephOperatorScore::default_okd()), Box::new(cluster), Box::new(CephVerifyClusterHealth { rook_ceph_namespace: NAMESPACE.to_string(), }), + Box::new(K8sResourceScore::single( + s3_ingress, + Some(NAMESPACE.to_string()), + )), ]; harmony_cli::run(inventory, topology, scores, None) -- 2.39.5 From 96ebbd5f3e8be37bb80fdd3331f4d73574324758 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:08:41 -0400 Subject: [PATCH 06/19] chore(examples): comment out S3 Ingress in install_rook_ceph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the edge-TLS Ingress block in the source as a reference but disable it by default — running the example shouldn't require a pre-provisioned TLS Secret or an ingress controller. Uncomment the import lines, the const declarations, the Ingress construction, and the K8sResourceScore entry in the `scores` vec to re-enable. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/install_rook_ceph/src/main.rs | 115 +++++++++++++------------ 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/examples/install_rook_ceph/src/main.rs b/examples/install_rook_ceph/src/main.rs index 69d0f5d0..ac9f9aa5 100644 --- a/examples/install_rook_ceph/src/main.rs +++ b/examples/install_rook_ceph/src/main.rs @@ -1,34 +1,34 @@ use harmony::{ inventory::Inventory, - modules::{ - k8s::resource::K8sResourceScore, - storage::ceph::{ - RookCephClusterScore, RookCephOperatorScore, - ceph_validate_health_score::CephVerifyClusterHealth, - crd::{CephObjectStore, CephObjectStoreSpec, GatewaySpec}, - }, + modules::storage::ceph::{ + RookCephClusterScore, RookCephOperatorScore, + ceph_validate_health_score::CephVerifyClusterHealth, + crd::{CephObjectStore, CephObjectStoreSpec, GatewaySpec}, }, score::Score, topology::K8sAnywhereTopology, }; -use k8s_openapi::api::networking::v1::{ - HTTPIngressPath, HTTPIngressRuleValue, Ingress, IngressBackend, IngressRule, - IngressServiceBackend, IngressSpec, IngressTLS, ServiceBackendPort, -}; use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; +// Uncomment to enable the edge-TLS Ingress for the S3 endpoint. +// use harmony::modules::k8s::resource::K8sResourceScore; +// use k8s_openapi::api::networking::v1::{ +// HTTPIngressPath, HTTPIngressRuleValue, Ingress, IngressBackend, IngressRule, +// IngressServiceBackend, IngressSpec, IngressTLS, ServiceBackendPort, +// }; + const NAMESPACE: &str = "rook-ceph"; const OBJECTSTORE_NAME: &str = "ceph-objectstore"; -/// External S3 hostname for the Ingress. Edit this to match the DNS name you'll -/// point at your cluster's ingress LB. -const S3_HOSTNAME: &str = "s3.example.com"; +// External S3 hostname for the Ingress. Edit this to match the DNS name +// you'll point at your cluster's ingress LB. +// const S3_HOSTNAME: &str = "s3.example.com"; -/// TLS Secret holding the cert+key for `S3_HOSTNAME`. The Secret must exist in -/// `NAMESPACE` *before* this example runs (e.g. via cert-manager, or a manual -/// `kubectl create secret tls`). The example does not create it — real cert -/// material can't be shipped in a repo. -const S3_TLS_SECRET: &str = "ceph-objectstore-tls"; +// TLS Secret holding the cert+key for `S3_HOSTNAME`. The Secret must exist +// in `NAMESPACE` *before* this example runs (e.g. via cert-manager, or a +// manual `kubectl create secret tls`). The example does not create it — +// real cert material can't be shipped in a repo. +// const S3_TLS_SECRET: &str = "ceph-objectstore-tls"; #[tokio::main] async fn main() { @@ -55,40 +55,44 @@ async fn main() { // Edge-TLS Ingress in front of the RGW Service. Rook creates the Service // as `rook-ceph-rgw-` on the gateway port (8080 here). - let s3_ingress = Ingress { - metadata: ObjectMeta { - name: Some(format!("{OBJECTSTORE_NAME}-s3")), - namespace: Some(NAMESPACE.to_string()), - ..ObjectMeta::default() - }, - spec: Some(IngressSpec { - rules: Some(vec![IngressRule { - host: Some(S3_HOSTNAME.to_string()), - http: Some(HTTPIngressRuleValue { - paths: vec![HTTPIngressPath { - path: Some("/".to_string()), - path_type: "Prefix".to_string(), - backend: IngressBackend { - service: Some(IngressServiceBackend { - name: format!("rook-ceph-rgw-{OBJECTSTORE_NAME}"), - port: Some(ServiceBackendPort { - number: Some(8080), - ..ServiceBackendPort::default() - }), - }), - ..IngressBackend::default() - }, - }], - }), - }]), - tls: Some(vec![IngressTLS { - hosts: Some(vec![S3_HOSTNAME.to_string()]), - secret_name: Some(S3_TLS_SECRET.to_string()), - }]), - ..IngressSpec::default() - }), - ..Ingress::default() - }; + // Uncomment this block (and its imports + consts above) to enable, then + // add the Box::new(K8sResourceScore::single(...)) entry to the `scores` + // vec below. + // + // let s3_ingress = Ingress { + // metadata: ObjectMeta { + // name: Some(format!("{OBJECTSTORE_NAME}-s3")), + // namespace: Some(NAMESPACE.to_string()), + // ..ObjectMeta::default() + // }, + // spec: Some(IngressSpec { + // rules: Some(vec![IngressRule { + // host: Some(S3_HOSTNAME.to_string()), + // http: Some(HTTPIngressRuleValue { + // paths: vec![HTTPIngressPath { + // path: Some("/".to_string()), + // path_type: "Prefix".to_string(), + // backend: IngressBackend { + // service: Some(IngressServiceBackend { + // name: format!("rook-ceph-rgw-{OBJECTSTORE_NAME}"), + // port: Some(ServiceBackendPort { + // number: Some(8080), + // ..ServiceBackendPort::default() + // }), + // }), + // ..IngressBackend::default() + // }, + // }], + // }), + // }]), + // tls: Some(vec![IngressTLS { + // hosts: Some(vec![S3_HOSTNAME.to_string()]), + // secret_name: Some(S3_TLS_SECRET.to_string()), + // }]), + // ..IngressSpec::default() + // }), + // ..Ingress::default() + // }; let scores: Vec>> = vec![ Box::new(RookCephOperatorScore::default_okd()), @@ -96,10 +100,7 @@ async fn main() { Box::new(CephVerifyClusterHealth { rook_ceph_namespace: NAMESPACE.to_string(), }), - Box::new(K8sResourceScore::single( - s3_ingress, - Some(NAMESPACE.to_string()), - )), + // Box::new(K8sResourceScore::single(s3_ingress, Some(NAMESPACE.to_string()))), ]; harmony_cli::run(inventory, topology, scores, None) -- 2.39.5 From 56bf08b3bc049a8fc3c21831f354a16285e9683a Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:20:19 -0400 Subject: [PATCH 07/19] feat(storage/ceph): typed CephObjectStoreUser CRD + apply via cluster Score Adds the missing piece for actually using the S3 endpoint Rook stands up: a typed CephObjectStoreUser with full spec coverage (capabilities, quotas, cluster_namespace) and a convenience `for_store` constructor. Rook materializes the user's S3 access credentials into a Secret named `rook-ceph-object-user--` with base64-encoded `AccessKey` / `SecretKey` data keys. The new `credentials_secret_name()` helper returns that name programmatically so callers don't have to assemble the string. Threads through RookCephClusterScore as a new `object_store_users: Vec` field, applied as step 5 of the CR sequence (after the object stores they depend on). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/modules/storage/ceph/cluster_score.rs | 29 +++- harmony/src/modules/storage/ceph/crd/mod.rs | 2 + .../storage/ceph/crd/object_store_user.rs | 128 ++++++++++++++++++ 3 files changed, 156 insertions(+), 3 deletions(-) create mode 100644 harmony/src/modules/storage/ceph/crd/object_store_user.rs diff --git a/harmony/src/modules/storage/ceph/cluster_score.rs b/harmony/src/modules/storage/ceph/cluster_score.rs index 7f35cd35..73abec18 100644 --- a/harmony/src/modules/storage/ceph/cluster_score.rs +++ b/harmony/src/modules/storage/ceph/cluster_score.rs @@ -15,8 +15,8 @@ use crate::topology::{K8sclient, Topology}; use harmony_types::id::Id; use super::crd::{ - CephBlockPool, CephBlockPoolSpec, CephCluster, CephFilesystem, CephObjectStore, FailureDomain, - ReplicatedSpec, + CephBlockPool, CephBlockPoolSpec, CephCluster, CephFilesystem, CephObjectStore, + CephObjectStoreUser, FailureDomain, ReplicatedSpec, }; /// Deploys a typed Rook-Ceph cluster: `CephCluster` + pools + filesystems + @@ -32,6 +32,8 @@ use super::crd::{ /// 2. `CephBlockPool` resources + their RBD `StorageClass`es /// 3. `CephFilesystem` resources + their CephFS `StorageClass`es /// 4. `CephObjectStore` resources +/// 5. `CephObjectStoreUser` resources (Rook materializes their S3 credentials +/// into a `rook-ceph-object-user--` Secret per user) /// /// The operator reconciles each in the background; the apply itself succeeds /// as soon as the CR is accepted by the API server. @@ -42,6 +44,7 @@ pub struct RookCephClusterScore { pub block_pools: Vec, pub filesystems: Vec, pub object_stores: Vec, + pub object_store_users: Vec, /// Mark the first block-pool StorageClass as the cluster default. pub default_block_pool_storage_class: bool, } @@ -78,6 +81,7 @@ impl RookCephClusterScore { block_pools: vec![block_pool], filesystems: vec![], object_stores: vec![], + object_store_users: vec![], default_block_pool_storage_class: true, } } @@ -174,12 +178,31 @@ impl Interpret for RookCephClusterInterpre .await?; } + for user in self.score.object_store_users.iter() { + let user_name = user + .metadata + .name + .clone() + .unwrap_or_else(|| "object-store-user".to_string()); + info!( + "[Rook-Ceph] Applying CephObjectStoreUser '{}' (store='{}'); \ + credentials will appear in Secret '{}'", + user_name, + user.spec.store, + user.credentials_secret_name(), + ); + K8sResourceScore::single(user.clone(), Some(ns.clone())) + .interpret(inventory, topology) + .await?; + } + Ok(Outcome::success(format!( - "Applied Rook-Ceph cluster '{}' ({} block-pool(s), {} filesystem(s), {} object-store(s))", + "Applied Rook-Ceph cluster '{}' ({} block-pool(s), {} filesystem(s), {} object-store(s), {} object-store-user(s))", ns, self.score.block_pools.len(), self.score.filesystems.len(), self.score.object_stores.len(), + self.score.object_store_users.len(), ))) } diff --git a/harmony/src/modules/storage/ceph/crd/mod.rs b/harmony/src/modules/storage/ceph/crd/mod.rs index 5d49edcf..5a0b96aa 100644 --- a/harmony/src/modules/storage/ceph/crd/mod.rs +++ b/harmony/src/modules/storage/ceph/crd/mod.rs @@ -2,10 +2,12 @@ pub mod block_pool; pub mod cluster; pub mod filesystem; pub mod object_store; +pub mod object_store_user; pub mod shared; pub use block_pool::*; pub use cluster::*; pub use filesystem::*; pub use object_store::*; +pub use object_store_user::*; pub use shared::*; diff --git a/harmony/src/modules/storage/ceph/crd/object_store_user.rs b/harmony/src/modules/storage/ceph/crd/object_store_user.rs new file mode 100644 index 00000000..b599f116 --- /dev/null +++ b/harmony/src/modules/storage/ceph/crd/object_store_user.rs @@ -0,0 +1,128 @@ +use kube::{CustomResource, api::ObjectMeta}; +use serde::{Deserialize, Serialize}; + +/// `CephObjectStoreUser` — provisions an RGW S3 user against a `CephObjectStore`. +/// +/// Rook auto-creates a Kubernetes Secret named +/// `rook-ceph-object-user--` containing the user's access +/// credentials, with these base64-encoded keys: +/// - `AccessKey` — S3 access key ID (20 bytes) +/// - `SecretKey` — S3 secret access key (40 bytes) +/// +/// The Secret lives in the same namespace as the user. +#[derive(CustomResource, Deserialize, Serialize, Clone, Debug)] +#[kube( + group = "ceph.rook.io", + version = "v1", + kind = "CephObjectStoreUser", + plural = "cephobjectstoreusers", + namespaced = true, + schema = "disabled" +)] +#[serde(rename_all = "camelCase")] +pub struct CephObjectStoreUserSpec { + /// Name of the `CephObjectStore` this user belongs to. + pub store: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub display_name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub cluster_namespace: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub quotas: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub capabilities: Option, +} + +impl Default for CephObjectStoreUser { + fn default() -> Self { + Self { + metadata: ObjectMeta::default(), + spec: CephObjectStoreUserSpec::default(), + } + } +} + +impl Default for CephObjectStoreUserSpec { + fn default() -> Self { + Self { + store: String::new(), + display_name: None, + cluster_namespace: None, + quotas: None, + capabilities: None, + } + } +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct UserQuotas { + #[serde(skip_serializing_if = "Option::is_none")] + pub max_buckets: Option, + /// Resource-quantity string, e.g. `"10G"`, `"500M"`. + #[serde(skip_serializing_if = "Option::is_none")] + pub max_size: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub max_objects: Option, +} + +/// RGW capability strings — each accepts `"read"`, `"write"`, `"read, write"`, +/// or `"*"`. +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct UserCapabilities { + #[serde(skip_serializing_if = "Option::is_none")] + pub user: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub bucket: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub usage: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub zone: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub roles: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub info: Option, + #[serde(rename = "amz-cache", skip_serializing_if = "Option::is_none")] + pub amz_cache: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub bilog: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub mdlog: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub datalog: Option, + #[serde(rename = "user-policy", skip_serializing_if = "Option::is_none")] + pub user_policy: Option, + #[serde(rename = "odic-provider", skip_serializing_if = "Option::is_none")] + pub odic_provider: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ratelimit: Option, +} + +impl CephObjectStoreUser { + /// Convenience constructor: a user attached to `store` with the given + /// display name. Capabilities and quotas left unset (RGW defaults apply). + pub fn for_store(name: &str, namespace: &str, store: &str, display_name: &str) -> Self { + Self { + metadata: ObjectMeta { + name: Some(name.to_string()), + namespace: Some(namespace.to_string()), + ..ObjectMeta::default() + }, + spec: CephObjectStoreUserSpec { + store: store.to_string(), + display_name: Some(display_name.to_string()), + ..CephObjectStoreUserSpec::default() + }, + } + } + + /// Name of the auto-generated Secret carrying this user's `AccessKey` / + /// `SecretKey` (base64-encoded). + pub fn credentials_secret_name(&self) -> String { + let user = self.metadata.name.as_deref().unwrap_or(""); + format!("rook-ceph-object-user-{}-{}", self.spec.store, user) + } +} -- 2.39.5 From 521781798c3b0aa5fd1588585c58687faf87d2e3 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:20:26 -0400 Subject: [PATCH 08/19] feat(examples): create default S3 user in install_rook_ceph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The example now provisions a CephObjectStoreUser named "harmony-default-user" attached to the ceph-objectstore. After the run, the user's S3 credentials are available in the rook-ceph-object-user-ceph-objectstore-harmony-default-user Secret in the rook-ceph namespace — no manual radosgw-admin or YAML steps required. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/install_rook_ceph/src/main.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/examples/install_rook_ceph/src/main.rs b/examples/install_rook_ceph/src/main.rs index ac9f9aa5..c370cb48 100644 --- a/examples/install_rook_ceph/src/main.rs +++ b/examples/install_rook_ceph/src/main.rs @@ -3,7 +3,7 @@ use harmony::{ modules::storage::ceph::{ RookCephClusterScore, RookCephOperatorScore, ceph_validate_health_score::CephVerifyClusterHealth, - crd::{CephObjectStore, CephObjectStoreSpec, GatewaySpec}, + crd::{CephObjectStore, CephObjectStoreSpec, CephObjectStoreUser, GatewaySpec}, }, score::Score, topology::K8sAnywhereTopology, @@ -19,6 +19,7 @@ use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; const NAMESPACE: &str = "rook-ceph"; const OBJECTSTORE_NAME: &str = "ceph-objectstore"; +const S3_USER_NAME: &str = "harmony-default-user"; // External S3 hostname for the Ingress. Edit this to match the DNS name // you'll point at your cluster's ingress LB. @@ -50,8 +51,16 @@ async fn main() { }, }; + let s3_user = CephObjectStoreUser::for_store( + S3_USER_NAME, + NAMESPACE, + OBJECTSTORE_NAME, + "Harmony default S3 user", + ); + let mut cluster = RookCephClusterScore::default_okd(NAMESPACE); cluster.object_stores = vec![object_store]; + cluster.object_store_users = vec![s3_user]; // Edge-TLS Ingress in front of the RGW Service. Rook creates the Service // as `rook-ceph-rgw-` on the gateway port (8080 here). -- 2.39.5 From fc37b08a6fee5e879dd47abb44c5ac4ee0d1b63b Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:27:26 -0400 Subject: [PATCH 09/19] feat(storage/ceph): typed cephConfig field + set_config builder on CephClusterSpec Adds the missing surface to drive Rook's declarative centralized config without dropping out to imperative `ceph config set` calls in the toolbox. The new `ceph_config: Option>>` field on CephClusterSpec mirrors the Rook v1.18 `spec.cephConfig` shape: outer key is the Ceph "WHO" target ("global", "osd.*", "mon.*", "mgr.*", "client.rgw.", "osd.0", ...), inner is `option-name -> value`. All values are strings per Rook (Ceph parses them). Rook applies these after MONs reach quorum and re-applies on drift. `CephClusterSpec::set_config(who, key, value)` is a chainable helper that lazily allocates the maps so callers can write `.set_config("osd.*", "osd_max_backfills", "1")` instead of building the nested BTreeMaps by hand. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/modules/storage/ceph/crd/cluster.rs | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/harmony/src/modules/storage/ceph/crd/cluster.rs b/harmony/src/modules/storage/ceph/crd/cluster.rs index ec8e29f4..71cda132 100644 --- a/harmony/src/modules/storage/ceph/crd/cluster.rs +++ b/harmony/src/modules/storage/ceph/crd/cluster.rs @@ -40,6 +40,38 @@ pub struct CephClusterSpec { pub wait_timeout_for_healthy_osd_in_minutes: Option, #[serde(skip_serializing_if = "Option::is_none")] pub disruption_management: Option, + /// Centralized Ceph config applied by the operator after MONs reach quorum. + /// Outer key is the "WHO" target (e.g. `"global"`, `"osd.*"`, `"mgr.*"`, + /// `"client.rgw."`, or a specific daemon like `"osd.0"`). Inner map + /// is `option-name -> string-value` — Rook calls `ceph config set + /// ` for each entry. Values must be strings even for numerics + /// and booleans; Ceph parses them. Rook does not validate the keys. + /// + /// Prefer [`CephClusterSpec::set_config`] for ergonomic insertion. + #[serde(skip_serializing_if = "Option::is_none")] + pub ceph_config: Option>>, +} + +impl CephClusterSpec { + /// Set a single centralized Ceph config entry (`ceph config set + /// `). Chainable. Creates the `ceph_config` map and the + /// per-WHO sub-map on demand. + /// + /// Common `who` values: `"global"`, `"osd.*"`, `"mon.*"`, `"mgr.*"`, + /// `"client.rgw."`, or a specific daemon like `"osd.0"`. + pub fn set_config( + &mut self, + who: impl Into, + key: impl Into, + value: impl Into, + ) -> &mut Self { + self.ceph_config + .get_or_insert_with(BTreeMap::new) + .entry(who.into()) + .or_default() + .insert(key.into(), value.into()); + self + } } impl Default for CephCluster { @@ -69,6 +101,7 @@ impl Default for CephClusterSpec { skip_upgrade_checks: None, wait_timeout_for_healthy_osd_in_minutes: None, disruption_management: None, + ceph_config: None, } } } -- 2.39.5 From db398f0dc95eeb60288cae3eaaf576c4e3d7712e Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:27:36 -0400 Subject: [PATCH 10/19] feat(examples): apply pico-OKD OSD tuning via cephConfig in install_rook_ceph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires four conservative OSD config keys into the cluster spec so that on a small pico OKD, recovery storms don't starve client I/O: osd_max_backfills=1, osd_recovery_max_active=1, osd_recovery_op_priority=1, osd_mclock_profile=high_client_ops Leaves a commented-out alternative for disabling mclock entirely (osd_op_queue=wpq) if the mclock scheduler turns out to be the culprit. These are NOT defaults baked into RookCephClusterScore — they live in the example because they're specific to the pico use case. Adjust or remove per your hardware. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/install_rook_ceph/src/main.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/examples/install_rook_ceph/src/main.rs b/examples/install_rook_ceph/src/main.rs index c370cb48..b206c936 100644 --- a/examples/install_rook_ceph/src/main.rs +++ b/examples/install_rook_ceph/src/main.rs @@ -62,6 +62,24 @@ async fn main() { cluster.object_stores = vec![object_store]; cluster.object_store_users = vec![s3_user]; + // Pico-OKD tuning: relax OSD backfill/recovery on a small cluster so + // recovery storms don't starve client I/O. These are equivalent to running + // `ceph config set osd ` against the live cluster, but + // declarative — Rook reconciles them after the MONs reach quorum and + // re-applies on drift. Values are strings (Ceph parses them). + // + // Adjust or remove per your hardware. Not opinionated defaults — only + // wired here because the user's first target is a pico OKD. + cluster + .cluster + .spec + .set_config("osd.*", "osd_max_backfills", "1") + .set_config("osd.*", "osd_recovery_max_active", "1") + .set_config("osd.*", "osd_recovery_op_priority", "1") + .set_config("osd.*", "osd_mclock_profile", "high_client_ops"); + // Alternative — disable mclock entirely if you suspect it's the problem: + // cluster.cluster.spec.set_config("osd.*", "osd_op_queue", "wpq"); + // Edge-TLS Ingress in front of the RGW Service. Rook creates the Service // as `rook-ceph-rgw-` on the gateway port (8080 here). // Uncomment this block (and its imports + consts above) to enable, then -- 2.39.5 From b1bbc78331c03c81776e66f3d3138b46482bf0e4 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:42:57 -0400 Subject: [PATCH 11/19] fix(storage/ceph): enable rook-ceph-tools by default in RookCephOperatorScore The Rook operator Helm chart ships toolbox.enabled=false by default, so the rook-ceph-tools Deployment is never created. That breaks two downstream consumers: - CephVerifyClusterHealth, which looks up the Deployment and execs `ceph health` inside it - RookCephClusterScore's new post-apply readiness wait (next commit), which polls the same path Add an `enable_toolbox: bool` field on RookCephOperatorScore (default true via both default_okd() and default_k8s()) that sets the Helm value `toolbox.enabled` to the requested string. Users who genuinely don't want the toolbox can opt out, but the typical Harmony flow needs it. Co-Authored-By: Claude Opus 4.7 (1M context) --- harmony/src/modules/storage/ceph/operator_score.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/harmony/src/modules/storage/ceph/operator_score.rs b/harmony/src/modules/storage/ceph/operator_score.rs index 566b0eef..c8eb52c8 100644 --- a/harmony/src/modules/storage/ceph/operator_score.rs +++ b/harmony/src/modules/storage/ceph/operator_score.rs @@ -42,11 +42,17 @@ pub struct RookCephOperatorScore { pub hostpath_requires_privileged: bool, pub enable_rbd_driver: bool, pub enable_cephfs_driver: bool, + /// Deploy the `rook-ceph-tools` pod. Required by `CephVerifyClusterHealth` + /// and by `RookCephClusterScore`'s post-apply readiness check, which exec + /// `ceph status` inside it. Defaults to `true` — the chart default is + /// `false`, but Harmony's flow assumes the toolbox is present. + pub enable_toolbox: bool, } impl RookCephOperatorScore { /// OKD-friendly defaults: `rook-ceph` namespace, both CSI drivers enabled, - /// hostPath privileged mode on. Chart version unpinned (uses latest). + /// hostPath privileged mode on, toolbox enabled. Chart version unpinned + /// (uses latest). pub fn default_okd() -> Self { Self { namespace: "rook-ceph".to_string(), @@ -54,6 +60,7 @@ impl RookCephOperatorScore { hostpath_requires_privileged: true, enable_rbd_driver: true, enable_cephfs_driver: true, + enable_toolbox: true, } } @@ -89,6 +96,10 @@ impl Score for RookCephOperatorScore { "true".to_string(), ); } + values.insert( + NonBlankString::from_str("toolbox.enabled").unwrap(), + self.enable_toolbox.to_string(), + ); let chart_version = self .chart_version -- 2.39.5 From 94614f874c81072d2f1c6b53cd7b0b8e6685ede6 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:43:18 -0400 Subject: [PATCH 12/19] fix(storage/ceph): wait for toolbox + HEALTH_OK in RookCephClusterScore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit K8sResourceScore returns once the API server has accepted a CR — not once the operator has reconciled it. So previously, RookCephClusterScore's interpret() returned in ~5 seconds while the actual cluster was still 2-15 minutes from being usable, causing the immediately-following CephVerifyClusterHealth to fail with "rook-ceph-tools not found" or HEALTH_WARN ≠ HEALTH_OK on a single-shot check. After applying the CephCluster CR, the Score now waits for: 1. The rook-ceph-tools Deployment to have ≥1 ready replica (10 min timeout). Gating exec on this is mandatory because exec_app_capture_output panics (`.expect("No matching pod")`) if called when no toolbox pod exists yet. 2. `ceph health` to return HEALTH_OK (20 min timeout). Fresh clusters sit in HEALTH_WARN for a few minutes while mons reach quorum, mgrs come up, and OSDs bootstrap their PGs. The wait logs every status transition so the user can tell what's happening. Only after both waits succeed does the Score apply the dependent CRs (block pools, filesystems, object stores, users). Failing fast at the cluster stage is better than applying CRs the operator can never reconcile. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/modules/storage/ceph/cluster_score.rs | 143 +++++++++++++++++- 1 file changed, 135 insertions(+), 8 deletions(-) diff --git a/harmony/src/modules/storage/ceph/cluster_score.rs b/harmony/src/modules/storage/ceph/cluster_score.rs index 73abec18..909ad479 100644 --- a/harmony/src/modules/storage/ceph/cluster_score.rs +++ b/harmony/src/modules/storage/ceph/cluster_score.rs @@ -1,9 +1,12 @@ use std::collections::BTreeMap; +use std::sync::Arc; +use std::time::{Duration, Instant}; use async_trait::async_trait; +use harmony_k8s::K8sClient; use k8s_openapi::api::storage::v1::StorageClass; use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; -use log::info; +use log::{debug, info, warn}; use serde::Serialize; use crate::data::Version; @@ -14,6 +17,18 @@ use crate::score::Score; use crate::topology::{K8sclient, Topology}; use harmony_types::id::Id; +/// Total time we'll wait for the toolbox Deployment to come up before bailing. +const TOOLBOX_READY_TIMEOUT: Duration = Duration::from_secs(600); + +/// Total time we'll wait for the Ceph cluster to reach `HEALTH_OK` after the +/// toolbox is up. Mons + mgrs + OSDs + initial PG peering on a small cluster +/// typically lands in 5–15 min; 20 covers slower hardware. +const HEALTH_OK_TIMEOUT: Duration = Duration::from_secs(1200); + +/// Poll cadence for both waits. Cluster transitions on the order of seconds, +/// not subsecond — no benefit to a tighter loop. +const POLL_INTERVAL: Duration = Duration::from_secs(10); + use super::crd::{ CephBlockPool, CephBlockPoolSpec, CephCluster, CephFilesystem, CephObjectStore, CephObjectStoreUser, FailureDomain, ReplicatedSpec, @@ -28,15 +43,22 @@ use super::crd::{ /// /// # Ordering /// CRs are applied in this order: -/// 1. `CephCluster` (operator must reconcile this to Ready before pools register) -/// 2. `CephBlockPool` resources + their RBD `StorageClass`es -/// 3. `CephFilesystem` resources + their CephFS `StorageClass`es -/// 4. `CephObjectStore` resources -/// 5. `CephObjectStoreUser` resources (Rook materializes their S3 credentials +/// 1. `CephCluster` +/// 2. **Wait** for the `rook-ceph-tools` Deployment to come up (requires +/// `RookCephOperatorScore::enable_toolbox = true`, which is the Harmony +/// default). +/// 3. **Wait** for `ceph health` to return `HEALTH_OK` — mons in quorum, mgrs +/// up, OSDs bootstrapped, initial PGs peered. Typically 5–15 min on a +/// small cluster; capped at 20 min. +/// 4. `CephBlockPool` resources + their RBD `StorageClass`es +/// 5. `CephFilesystem` resources + their CephFS `StorageClass`es +/// 6. `CephObjectStore` resources +/// 7. `CephObjectStoreUser` resources (Rook materializes their S3 credentials /// into a `rook-ceph-object-user--` Secret per user) /// -/// The operator reconciles each in the background; the apply itself succeeds -/// as soon as the CR is accepted by the API server. +/// The waits in steps 2–3 mean this Score takes minutes, not seconds, to +/// return — but downstream Scores like `CephVerifyClusterHealth` can rely on +/// the cluster actually being ready when this returns. #[derive(Debug, Clone, Serialize)] pub struct RookCephClusterScore { pub namespace: String, @@ -118,6 +140,14 @@ impl Interpret for RookCephClusterInterpre .interpret(inventory, topology) .await?; + let k8s = topology + .k8s_client() + .await + .map_err(|e| InterpretError::new(format!("Failed to get k8s client: {e}")))?; + + wait_for_toolbox_ready(&k8s, &ns).await?; + wait_for_health_ok(&k8s, &ns).await?; + for (idx, pool) in self.score.block_pools.iter().enumerate() { let pool_name = pool .metadata @@ -223,6 +253,103 @@ impl Interpret for RookCephClusterInterpre } } +/// Poll the `rook-ceph-tools` Deployment until it has ≥1 ready replica. This +/// is a prerequisite for any `ceph` CLI call against the cluster — including +/// the HEALTH_OK wait below, and the existing `CephVerifyClusterHealth` Score. +/// +/// `exec_app_capture_output` panics (`.expect("No matching pod")`) if called +/// when no toolbox pod exists yet — so we must gate exec on this first. +async fn wait_for_toolbox_ready(client: &Arc, ns: &str) -> Result<(), InterpretError> { + let toolbox = "rook-ceph-tools"; + info!( + "[Rook-Ceph] Waiting for '{}' deployment in '{}' (up to {}s)", + toolbox, + ns, + TOOLBOX_READY_TIMEOUT.as_secs() + ); + let start = Instant::now(); + loop { + match client.get_deployment(toolbox, Some(ns)).await { + Ok(Some(dep)) => { + let ready = dep + .status + .as_ref() + .and_then(|s| s.ready_replicas) + .unwrap_or(0); + if ready >= 1 { + info!("[Rook-Ceph] '{}' is ready ({} replica(s))", toolbox, ready); + return Ok(()); + } + debug!("[Rook-Ceph] '{}' present but 0 ready replicas", toolbox); + } + Ok(None) => debug!("[Rook-Ceph] '{}' deployment not yet created", toolbox), + Err(e) => debug!("[Rook-Ceph] error checking '{}': {e}", toolbox), + } + if start.elapsed() > TOOLBOX_READY_TIMEOUT { + return Err(InterpretError::new(format!( + "Timed out after {}s waiting for '{}' deployment to be ready in '{}'. \ + Is the operator running with toolbox.enabled=true?", + TOOLBOX_READY_TIMEOUT.as_secs(), + toolbox, + ns, + ))); + } + tokio::time::sleep(POLL_INTERVAL).await; + } +} + +/// Poll `ceph health` via the toolbox until it returns `HEALTH_OK`. Fresh +/// clusters typically sit in `HEALTH_WARN` for a few minutes while mons reach +/// quorum, mgrs come up, and OSDs bootstrap their PGs. Returning before +/// `HEALTH_OK` would race with the subsequent block-pool / object-store / +/// user applies and with `CephVerifyClusterHealth`. +async fn wait_for_health_ok(client: &Arc, ns: &str) -> Result<(), InterpretError> { + info!( + "[Rook-Ceph] Waiting for cluster to reach HEALTH_OK in '{}' (up to {}s)", + ns, + HEALTH_OK_TIMEOUT.as_secs() + ); + let start = Instant::now(); + let mut last_status = String::new(); + loop { + match client + .exec_app_capture_output( + "rook-ceph-tools".to_string(), + "app".to_string(), + Some(ns), + vec!["sh", "-c", "ceph health"], + ) + .await + { + Ok(out) => { + let trimmed = out.trim().to_string(); + if trimmed.starts_with("HEALTH_OK") { + info!("[Rook-Ceph] Cluster reached HEALTH_OK"); + return Ok(()); + } + if trimmed != last_status { + info!("[Rook-Ceph] ceph health: {}", trimmed); + last_status = trimmed; + } + } + Err(e) => debug!("[Rook-Ceph] ceph health exec failed: {e}"), + } + if start.elapsed() > HEALTH_OK_TIMEOUT { + warn!( + "[Rook-Ceph] Last ceph health output before timeout: {}", + last_status + ); + return Err(InterpretError::new(format!( + "Timed out after {}s waiting for HEALTH_OK in '{}'. Last status: '{}'", + HEALTH_OK_TIMEOUT.as_secs(), + ns, + last_status, + ))); + } + tokio::time::sleep(POLL_INTERVAL).await; + } +} + fn rbd_storage_class(pool_name: &str, ns: &str, is_default: bool) -> StorageClass { let mut params = BTreeMap::new(); params.insert("clusterID".to_string(), ns.to_string()); -- 2.39.5 From 914d39824509db3703985f2632613f370e1eef1e Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:45:13 -0400 Subject: [PATCH 13/19] chore(examples): adopt 'harmony-s3' as default S3 user name in install_rook_ceph Rename the CephObjectStoreUser const from "harmony-default-user" to the shorter "harmony-s3" that the user prefers as the default. Affects the auto-generated credentials Secret name, which becomes `rook-ceph-object-user-ceph-objectstore-harmony-s3`. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/install_rook_ceph/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/install_rook_ceph/src/main.rs b/examples/install_rook_ceph/src/main.rs index b206c936..2f945382 100644 --- a/examples/install_rook_ceph/src/main.rs +++ b/examples/install_rook_ceph/src/main.rs @@ -19,7 +19,7 @@ use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; const NAMESPACE: &str = "rook-ceph"; const OBJECTSTORE_NAME: &str = "ceph-objectstore"; -const S3_USER_NAME: &str = "harmony-default-user"; +const S3_USER_NAME: &str = "harmony-s3"; // External S3 hostname for the Ingress. Edit this to match the DNS name // you'll point at your cluster's ingress LB. -- 2.39.5 From f748e6ba48499b24ff642744e8f016f93dacd98c Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:45:32 -0400 Subject: [PATCH 14/19] feat(examples): add env.sh for install_rook_ceph (existing-cluster mode) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The example is meant to target a real pico OKD / external cluster, not a throwaway local K3D. Default `K8sAnywhereConfig::from_env()` reads HARMONY_USE_LOCAL_K3D and treats unset/true as "spin up a local K3D" — which is the wrong behavior here. env.sh sets: - HARMONY_USE_LOCAL_K3D=false force external cluster - HARMONY_PROFILE=staging required when use_local_k3d=false (current_target() panics otherwise) - HARMONY_USE_SYSTEM_KUBECONFIG=true use $HOME/.kube/config - HARMONY_SECRET_NAMESPACE/STORE/DATABASE_URL per-example state - RUST_LOG=harmony=debug to see the wait-loop progress Leaves KUBECONFIG and HARMONY_K8S_CONTEXT commented-out as overrides the user can uncomment when their kubeconfig isn't in the default location. Usage: source env.sh && cargo run -p example-install-rook-ceph Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/install_rook_ceph/env.sh | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 examples/install_rook_ceph/env.sh diff --git a/examples/install_rook_ceph/env.sh b/examples/install_rook_ceph/env.sh new file mode 100644 index 00000000..a90aa663 --- /dev/null +++ b/examples/install_rook_ceph/env.sh @@ -0,0 +1,33 @@ +# shellcheck shell=bash +# Source this file before running the example: +# source env.sh && cargo run -p example-install-rook-ceph + +# ---------- Target an existing cluster (no local K3D) ------------------------ +# This example is meant to drive a real cluster (pico OKD, vanilla K8s with an +# ingress, etc.) — not a throwaway local one. Force K8sAnywhereTopology to use +# whatever cluster the kubeconfig points at. +export HARMONY_USE_LOCAL_K3D=false + +# Pick "staging" while you're iterating; switch to "production" once you trust +# the deploy. Required whenever HARMONY_USE_LOCAL_K3D=false. +export HARMONY_PROFILE=staging + +# Use $HOME/.kube/config (or whatever KUBECONFIG points at). Set to false if +# you'd rather Harmony manage the kubeconfig itself. +export HARMONY_USE_SYSTEM_KUBECONFIG=true + +# If your kubeconfig isn't in the default place, point at it explicitly. +# export KUBECONFIG=/path/to/your/kubeconfig + +# Pin to a specific kubectl context if your kubeconfig has more than one. +# export HARMONY_K8S_CONTEXT=my-pico-okd + +# ---------- Harmony secret + state storage ----------------------------------- +export HARMONY_SECRET_NAMESPACE=install-rook-ceph +export HARMONY_SECRET_STORE=file +export HARMONY_DATABASE_URL=sqlite://harmony_install_rook_ceph.sqlite + +# ---------- Logging ---------------------------------------------------------- +# The RookCephClusterScore wait loops log every `ceph health` transition at +# info level; debug shows polling cadence. +export RUST_LOG=harmony=debug -- 2.39.5 From a661b1d0786d562c8a20b98d4aecd02bf80a7fa9 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:49:16 -0400 Subject: [PATCH 15/19] fix(examples): drop unneeded env vars from install_rook_ceph/env.sh Two vars in the previous env.sh were either dead code or actively broken for this example's flow: HARMONY_PROFILE Only read by K8sAnywhereTopology::current_target(), which is only invoked by Scores implementing MultiTargetTopology (ntfy, application packaging, app monitoring). None of the Scores in install_rook_ceph require that trait, so the value is never read and the panic case is never reached. Removed. HARMONY_USE_SYSTEM_KUBECONFIG=true Setting this to true is actively worse: try_get_or_install_k8s_client hits `todo!()` at k8s_anywhere.rs:900 when this branch is taken. The correct way to point at an existing kubeconfig is the standard KUBECONFIG env var (or the default $HOME/.kube/config fallback in get_kube_config_path()). Removed. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/install_rook_ceph/env.sh | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/examples/install_rook_ceph/env.sh b/examples/install_rook_ceph/env.sh index a90aa663..e07420e8 100644 --- a/examples/install_rook_ceph/env.sh +++ b/examples/install_rook_ceph/env.sh @@ -8,21 +8,19 @@ # whatever cluster the kubeconfig points at. export HARMONY_USE_LOCAL_K3D=false -# Pick "staging" while you're iterating; switch to "production" once you trust -# the deploy. Required whenever HARMONY_USE_LOCAL_K3D=false. -export HARMONY_PROFILE=staging - -# Use $HOME/.kube/config (or whatever KUBECONFIG points at). Set to false if -# you'd rather Harmony manage the kubeconfig itself. -export HARMONY_USE_SYSTEM_KUBECONFIG=true - -# If your kubeconfig isn't in the default place, point at it explicitly. +# K8sAnywhereTopology resolves the kubeconfig via: +# 1. KUBECONFIG env var +# 2. fallback: $HOME/.kube/config +# Uncomment if your config isn't in the default location. # export KUBECONFIG=/path/to/your/kubeconfig # Pin to a specific kubectl context if your kubeconfig has more than one. # export HARMONY_K8S_CONTEXT=my-pico-okd -# ---------- Harmony secret + state storage ----------------------------------- +# ---------- Harmony state storage (per-example boilerplate) ------------------ +# This example doesn't actually touch harmony_secret or the SQLite DB, but +# other examples in the workspace use this same boilerplate and these vars +# don't cost anything when unread. export HARMONY_SECRET_NAMESPACE=install-rook-ceph export HARMONY_SECRET_STORE=file export HARMONY_DATABASE_URL=sqlite://harmony_install_rook_ceph.sqlite -- 2.39.5 From e3e07e739d1534aa397d4fc6d98f7e3995f86368 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:55:20 -0400 Subject: [PATCH 16/19] fix(storage/ceph): wait for operator + CRD discovery before applying CephCluster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit helm install returns once chart resources are created — not once the operator Deployment is Ready, and not once the API server's discovery cache has picked up the Rook CRDs. The kube-rs client that K8sAnywhereTopology hands out is shared and OnceCell-initialized, so its own discovery cache was populated before the chart added any ceph.rook.io/v1 resources. Applying CephCluster immediately after the operator install therefore tended to fail with "Cannot resolve GVK ceph.rook.io/v1/CephCluster". This is the same race CNPG handles at postgresql/score_k8s.rs:180-203 via wait_until_deployment_ready + wait_for_crd + invalidate_discovery. RookCephClusterScore now does the same dance, at the top of execute(), before any CR apply: 1. wait_until_deployment_ready("rook-ceph-operator", 300s) 2. wait_for_crd("cephclusters.ceph.rook.io", 60s) 3. invalidate_discovery() 4. Apply CephCluster 5. (existing) wait for toolbox ready 6. (existing) wait for HEALTH_OK 7. Apply pools / fs / object stores / users The subsequent pool/fs/object-store/user CRD applies happen many minutes later (after HEALTH_OK), by which point the discovery cache has long since refreshed — no per-apply invalidation needed. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/modules/storage/ceph/cluster_score.rs | 73 ++++++++++++++----- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/harmony/src/modules/storage/ceph/cluster_score.rs b/harmony/src/modules/storage/ceph/cluster_score.rs index 909ad479..ad68b2c1 100644 --- a/harmony/src/modules/storage/ceph/cluster_score.rs +++ b/harmony/src/modules/storage/ceph/cluster_score.rs @@ -17,6 +17,15 @@ use crate::score::Score; use crate::topology::{K8sclient, Topology}; use harmony_types::id::Id; +/// Total time we'll wait for the `rook-ceph-operator` Deployment to be ready +/// after `helm install` returns. Image pulls dominate the first run. +const OPERATOR_READY_TIMEOUT: Duration = Duration::from_secs(300); + +/// Total time we'll wait for the `cephclusters.ceph.rook.io` CRD to be +/// registered in the API server's discovery surface. Helm 3 applies CRDs +/// before other resources but the cache can lag. +const CRD_READY_TIMEOUT: Duration = Duration::from_secs(60); + /// Total time we'll wait for the toolbox Deployment to come up before bailing. const TOOLBOX_READY_TIMEOUT: Duration = Duration::from_secs(600); @@ -25,8 +34,8 @@ const TOOLBOX_READY_TIMEOUT: Duration = Duration::from_secs(600); /// typically lands in 5–15 min; 20 covers slower hardware. const HEALTH_OK_TIMEOUT: Duration = Duration::from_secs(1200); -/// Poll cadence for both waits. Cluster transitions on the order of seconds, -/// not subsecond — no benefit to a tighter loop. +/// Poll cadence for the toolbox + health waits. Cluster transitions on the +/// order of seconds, not subsecond — no benefit to a tighter loop. const POLL_INTERVAL: Duration = Duration::from_secs(10); use super::crd::{ @@ -43,22 +52,26 @@ use super::crd::{ /// /// # Ordering /// CRs are applied in this order: -/// 1. `CephCluster` -/// 2. **Wait** for the `rook-ceph-tools` Deployment to come up (requires +/// 1. **Wait** for the `rook-ceph-operator` Deployment to be Ready (Helm +/// install returns before pods are up). +/// 2. **Wait** for `cephclusters.ceph.rook.io` CRD to be discoverable and +/// invalidate the kube-rs discovery cache. +/// 3. `CephCluster` +/// 4. **Wait** for the `rook-ceph-tools` Deployment to come up (requires /// `RookCephOperatorScore::enable_toolbox = true`, which is the Harmony /// default). -/// 3. **Wait** for `ceph health` to return `HEALTH_OK` — mons in quorum, mgrs +/// 5. **Wait** for `ceph health` to return `HEALTH_OK` — mons in quorum, mgrs /// up, OSDs bootstrapped, initial PGs peered. Typically 5–15 min on a /// small cluster; capped at 20 min. -/// 4. `CephBlockPool` resources + their RBD `StorageClass`es -/// 5. `CephFilesystem` resources + their CephFS `StorageClass`es -/// 6. `CephObjectStore` resources -/// 7. `CephObjectStoreUser` resources (Rook materializes their S3 credentials +/// 6. `CephBlockPool` resources + their RBD `StorageClass`es +/// 7. `CephFilesystem` resources + their CephFS `StorageClass`es +/// 8. `CephObjectStore` resources +/// 9. `CephObjectStoreUser` resources (Rook materializes their S3 credentials /// into a `rook-ceph-object-user--` Secret per user) /// -/// The waits in steps 2–3 mean this Score takes minutes, not seconds, to -/// return — but downstream Scores like `CephVerifyClusterHealth` can rely on -/// the cluster actually being ready when this returns. +/// The waits in steps 1, 2, 4, and 5 mean this Score takes minutes, not +/// seconds, to return — but downstream Scores like `CephVerifyClusterHealth` +/// can rely on the cluster actually being ready when this returns. #[derive(Debug, Clone, Serialize)] pub struct RookCephClusterScore { pub namespace: String, @@ -135,16 +148,42 @@ impl Interpret for RookCephClusterInterpre ) -> Result { let ns = self.score.namespace.clone(); - info!("[Rook-Ceph] Applying CephCluster '{}'", ns); - K8sResourceScore::single(self.score.cluster.clone(), Some(ns.clone())) - .interpret(inventory, topology) - .await?; - let k8s = topology .k8s_client() .await .map_err(|e| InterpretError::new(format!("Failed to get k8s client: {e}")))?; + // `helm install` (in RookCephOperatorScore) returns once resources are + // created — not once the operator Deployment is Ready or the CRDs are + // registered in the API server's discovery cache. Without these waits, + // the CephCluster apply below races and typically fails with + // "Cannot resolve GVK ceph.rook.io/v1/CephCluster". + info!( + "[Rook-Ceph] Waiting for rook-ceph-operator deployment in '{}'", + ns + ); + k8s.wait_until_deployment_ready( + "rook-ceph-operator", + Some(&ns), + Some(OPERATOR_READY_TIMEOUT), + ) + .await + .map_err(|e| InterpretError::new(format!("rook-ceph-operator not ready: {e}")))?; + + info!("[Rook-Ceph] Waiting for cephclusters.ceph.rook.io CRD registration"); + k8s.wait_for_crd("cephclusters.ceph.rook.io", Some(CRD_READY_TIMEOUT)) + .await + .map_err(|e| InterpretError::new(format!("CephCluster CRD not registered: {e}")))?; + // The discovery cache on our shared K8sClient was populated before the + // operator install added the Rook CRDs. Force a refresh so the next + // apply can resolve the new GVK. + k8s.invalidate_discovery().await; + + info!("[Rook-Ceph] Applying CephCluster '{}'", ns); + K8sResourceScore::single(self.score.cluster.clone(), Some(ns.clone())) + .interpret(inventory, topology) + .await?; + wait_for_toolbox_ready(&k8s, &ns).await?; wait_for_health_ok(&k8s, &ns).await?; -- 2.39.5 From 28bcedf9d027cfe11ef7cdeefbe7803022e82945 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 07:59:23 -0400 Subject: [PATCH 17/19] fix(storage/ceph): pin Rook chart + Ceph image to canonical 1.19.5 pairing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous defaults shipped an unpinned Helm chart_version and a guessed Ceph image tag ("v19.2.3" — no build suffix, no source-of-truth reference). Both are unacceptable for a production install path. Replaced with the official pairing per the Rook 1.19 documentation: - RookCephOperatorScore::default_okd().chart_version = Some("v1.19.5") Latest stable release of the rook-ceph Helm chart at https://charts.rook.io/release as of 2026-05; verified via the chart repo's index.yaml. - CephVersionSpec::default().image = "quay.io/ceph/ceph:v19.2.3-20250717" The full version+build tag the Rook 1.19 upgrade docs explicitly recommend for production at https://rook.io/docs/rook/v1.19/Upgrade/ceph-upgrade/, with the date-stamped suffix that pins an immutable container image. Pinning here means heterogeneous-daemon-version scenarios are impossible by construction, and upgrades become a deliberate change to this code rather than an unobservable container pull side-effect. Co-Authored-By: Claude Opus 4.7 (1M context) --- harmony/src/modules/storage/ceph/crd/cluster.rs | 6 +++++- harmony/src/modules/storage/ceph/operator_score.rs | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/harmony/src/modules/storage/ceph/crd/cluster.rs b/harmony/src/modules/storage/ceph/crd/cluster.rs index 71cda132..9bab64be 100644 --- a/harmony/src/modules/storage/ceph/crd/cluster.rs +++ b/harmony/src/modules/storage/ceph/crd/cluster.rs @@ -118,8 +118,12 @@ pub struct CephVersionSpec { impl Default for CephVersionSpec { fn default() -> Self { + // Pinned to the full version+build tag explicitly recommended for + // production by the Rook 1.19 docs (Upgrade/ceph-upgrade). The + // date-stamped suffix locks the exact container image so the cluster + // can't go heterogeneous across daemon restarts. Self { - image: "quay.io/ceph/ceph:v19.2.3".to_string(), + image: "quay.io/ceph/ceph:v19.2.3-20250717".to_string(), allow_unsupported: Some(false), image_pull_policy: Some("IfNotPresent".to_string()), } diff --git a/harmony/src/modules/storage/ceph/operator_score.rs b/harmony/src/modules/storage/ceph/operator_score.rs index c8eb52c8..356c4cee 100644 --- a/harmony/src/modules/storage/ceph/operator_score.rs +++ b/harmony/src/modules/storage/ceph/operator_score.rs @@ -51,12 +51,14 @@ pub struct RookCephOperatorScore { impl RookCephOperatorScore { /// OKD-friendly defaults: `rook-ceph` namespace, both CSI drivers enabled, - /// hostPath privileged mode on, toolbox enabled. Chart version unpinned - /// (uses latest). + /// hostPath privileged mode on, toolbox enabled. Pinned to Rook 1.19.5 — + /// the latest stable release at the time this Score was written, and the + /// pair tested against the Ceph image default in + /// [`super::crd::CephVersionSpec::default`]. pub fn default_okd() -> Self { Self { namespace: "rook-ceph".to_string(), - chart_version: None, + chart_version: Some("v1.19.5".to_string()), hostpath_requires_privileged: true, enable_rbd_driver: true, enable_cephfs_driver: true, -- 2.39.5 From 2ffa717a643f640ab55f115d258c7cb854107093 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 08:26:54 -0400 Subject: [PATCH 18/19] fix(storage/ceph): ship rook-ceph-tools as a typed Deployment, not via Helm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix passed --set toolbox.enabled=true to the rook-ceph operator chart. That was wrong: in Rook v1.19 the toolbox is no longer part of the operator chart — it's a standalone manifest at deploy/examples/toolbox.yaml. The Helm value was silently ignored, so the rook-ceph-tools Deployment was never created. Symptom on a real install: cluster came up healthy (mons + mgrs + OSDs all Running) but `oc -n rook-ceph get deploy/rook-ceph-tools` returned NotFound, and RookCephClusterScore's wait_for_toolbox_ready timed out after 10 minutes. This commit: - Adds a new toolbox.rs module that ports the canonical rook/rook@v1.19.5 toolbox.yaml verbatim to a typed k8s_openapi::Deployment, including the inline toolbox.sh bash script (~50 lines) that re-renders /etc/ceph/ceph.conf when mon endpoints change. - Sources the container image from the CephCluster spec's cephVersion.image so the toolbox stays in lockstep with the cluster's Ceph version automatically — no second pin to keep in sync. - Has RookCephClusterScore apply the typed Deployment via K8sResourceScore::single immediately after applying CephCluster, then waits for it to be Ready as before. - Removes the now-dead enable_toolbox field and toolbox.enabled Helm value from RookCephOperatorScore, plus the misleading doc claim that the chart deploys the toolbox. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/modules/storage/ceph/cluster_score.rs | 43 ++- harmony/src/modules/storage/ceph/mod.rs | 1 + .../modules/storage/ceph/operator_score.rs | 27 +- harmony/src/modules/storage/ceph/toolbox.rs | 255 ++++++++++++++++++ 4 files changed, 298 insertions(+), 28 deletions(-) create mode 100644 harmony/src/modules/storage/ceph/toolbox.rs diff --git a/harmony/src/modules/storage/ceph/cluster_score.rs b/harmony/src/modules/storage/ceph/cluster_score.rs index ad68b2c1..8aa96b0a 100644 --- a/harmony/src/modules/storage/ceph/cluster_score.rs +++ b/harmony/src/modules/storage/ceph/cluster_score.rs @@ -42,6 +42,7 @@ use super::crd::{ CephBlockPool, CephBlockPoolSpec, CephCluster, CephFilesystem, CephObjectStore, CephObjectStoreUser, FailureDomain, ReplicatedSpec, }; +use super::toolbox::toolbox_deployment; /// Deploys a typed Rook-Ceph cluster: `CephCluster` + pools + filesystems + /// object stores + their consumer `StorageClass`es. Assumes the Rook operator @@ -51,25 +52,27 @@ use super::crd::{ /// applied via `K8sResourceScore::single` — the same apply path used by CNPG. /// /// # Ordering -/// CRs are applied in this order: +/// Steps applied in this order: /// 1. **Wait** for the `rook-ceph-operator` Deployment to be Ready (Helm /// install returns before pods are up). /// 2. **Wait** for `cephclusters.ceph.rook.io` CRD to be discoverable and /// invalidate the kube-rs discovery cache. -/// 3. `CephCluster` -/// 4. **Wait** for the `rook-ceph-tools` Deployment to come up (requires -/// `RookCephOperatorScore::enable_toolbox = true`, which is the Harmony -/// default). -/// 5. **Wait** for `ceph health` to return `HEALTH_OK` — mons in quorum, mgrs +/// 3. Apply `CephCluster`. +/// 4. Apply the `rook-ceph-tools` Deployment (typed, ported from the upstream +/// `deploy/examples/toolbox.yaml`). Image is read from the CephCluster +/// spec so it always matches the daemons. +/// 5. **Wait** for the `rook-ceph-tools` Deployment to be Ready. +/// 6. **Wait** for `ceph health` to return `HEALTH_OK` — mons in quorum, mgrs /// up, OSDs bootstrapped, initial PGs peered. Typically 5–15 min on a /// small cluster; capped at 20 min. -/// 6. `CephBlockPool` resources + their RBD `StorageClass`es -/// 7. `CephFilesystem` resources + their CephFS `StorageClass`es -/// 8. `CephObjectStore` resources -/// 9. `CephObjectStoreUser` resources (Rook materializes their S3 credentials -/// into a `rook-ceph-object-user--` Secret per user) +/// 7. Apply `CephBlockPool` resources + their RBD `StorageClass`es. +/// 8. Apply `CephFilesystem` resources + their CephFS `StorageClass`es. +/// 9. Apply `CephObjectStore` resources. +/// 10. Apply `CephObjectStoreUser` resources (Rook materializes their S3 +/// credentials into a `rook-ceph-object-user--` Secret per +/// user). /// -/// The waits in steps 1, 2, 4, and 5 mean this Score takes minutes, not +/// The waits in steps 1, 2, 5, and 6 mean this Score takes minutes, not /// seconds, to return — but downstream Scores like `CephVerifyClusterHealth` /// can rely on the cluster actually being ready when this returns. #[derive(Debug, Clone, Serialize)] @@ -184,6 +187,22 @@ impl Interpret for RookCephClusterInterpre .interpret(inventory, topology) .await?; + // Rook v1.19 no longer deploys the toolbox via the operator Helm chart; + // we ship it as a typed Deployment ported from the upstream + // `deploy/examples/toolbox.yaml`. Image follows whatever CephCluster + // is configured with — keeps the toolbox in lockstep with the daemons. + let toolbox_image = &self.score.cluster.spec.ceph_version.image; + info!( + "[Rook-Ceph] Applying rook-ceph-tools Deployment (image='{}')", + toolbox_image + ); + K8sResourceScore::single( + toolbox_deployment(&ns, toolbox_image), + Some(ns.clone()), + ) + .interpret(inventory, topology) + .await?; + wait_for_toolbox_ready(&k8s, &ns).await?; wait_for_health_ok(&k8s, &ns).await?; diff --git a/harmony/src/modules/storage/ceph/mod.rs b/harmony/src/modules/storage/ceph/mod.rs index e7653d9a..cddb8428 100644 --- a/harmony/src/modules/storage/ceph/mod.rs +++ b/harmony/src/modules/storage/ceph/mod.rs @@ -3,6 +3,7 @@ pub mod ceph_validate_health_score; pub mod cluster_score; pub mod crd; pub mod operator_score; +pub mod toolbox; pub use cluster_score::*; pub use operator_score::*; diff --git a/harmony/src/modules/storage/ceph/operator_score.rs b/harmony/src/modules/storage/ceph/operator_score.rs index 356c4cee..fafc0e4c 100644 --- a/harmony/src/modules/storage/ceph/operator_score.rs +++ b/harmony/src/modules/storage/ceph/operator_score.rs @@ -22,9 +22,14 @@ use harmony_macros::hurl; /// - RBAC (ServiceAccounts, Roles, ClusterRoles, RoleBindings, ClusterRoleBindings) /// - OpenShift SCC bindings when `hostpath_requires_privileged` is true /// -/// The CRs that consume these CRDs are deployed separately by `RookCephClusterScore` -/// (typed Rust structs applied via `K8sResourceScore`), preserving compile-time -/// type-safety on the user-facing surface. +/// The chart does **not** install the `rook-ceph-tools` (toolbox) pod in +/// Rook v1.19 — that's now a standalone manifest in `deploy/examples/`. +/// `RookCephClusterScore` deploys it as a typed Rust Deployment via +/// [`super::toolbox::toolbox_deployment`]. +/// +/// The CRs that consume these CRDs are deployed separately by +/// `RookCephClusterScore` (typed Rust structs applied via `K8sResourceScore`), +/// preserving compile-time type-safety on the user-facing surface. /// /// # OKD requirements /// - `hostpath_requires_privileged` must be `true` — OpenShift's SELinux @@ -42,18 +47,13 @@ pub struct RookCephOperatorScore { pub hostpath_requires_privileged: bool, pub enable_rbd_driver: bool, pub enable_cephfs_driver: bool, - /// Deploy the `rook-ceph-tools` pod. Required by `CephVerifyClusterHealth` - /// and by `RookCephClusterScore`'s post-apply readiness check, which exec - /// `ceph status` inside it. Defaults to `true` — the chart default is - /// `false`, but Harmony's flow assumes the toolbox is present. - pub enable_toolbox: bool, } impl RookCephOperatorScore { /// OKD-friendly defaults: `rook-ceph` namespace, both CSI drivers enabled, - /// hostPath privileged mode on, toolbox enabled. Pinned to Rook 1.19.5 — - /// the latest stable release at the time this Score was written, and the - /// pair tested against the Ceph image default in + /// hostPath privileged mode on. Pinned to Rook 1.19.5 — the latest stable + /// release at the time this Score was written, and the pair tested + /// against the Ceph image default in /// [`super::crd::CephVersionSpec::default`]. pub fn default_okd() -> Self { Self { @@ -62,7 +62,6 @@ impl RookCephOperatorScore { hostpath_requires_privileged: true, enable_rbd_driver: true, enable_cephfs_driver: true, - enable_toolbox: true, } } @@ -98,10 +97,6 @@ impl Score for RookCephOperatorScore { "true".to_string(), ); } - values.insert( - NonBlankString::from_str("toolbox.enabled").unwrap(), - self.enable_toolbox.to_string(), - ); let chart_version = self .chart_version diff --git a/harmony/src/modules/storage/ceph/toolbox.rs b/harmony/src/modules/storage/ceph/toolbox.rs new file mode 100644 index 00000000..d58917e3 --- /dev/null +++ b/harmony/src/modules/storage/ceph/toolbox.rs @@ -0,0 +1,255 @@ +//! Typed `rook-ceph-tools` Deployment. +//! +//! In Rook v1.19 the toolbox is *not* installed by the operator Helm chart — +//! it's a standalone manifest at `deploy/examples/toolbox.yaml` in the rook +//! repo. This module ports that manifest to a typed `k8s_openapi::Deployment` +//! so [`super::RookCephClusterScore`] can apply it via `K8sResourceScore` +//! without dropping back to raw YAML. +//! +//! The container image is sourced from the CephCluster spec's `cephVersion` +//! so the toolbox stays in lockstep with the cluster's Ceph version. + +use std::collections::BTreeMap; + +use k8s_openapi::api::apps::v1::{Deployment, DeploymentSpec}; +use k8s_openapi::api::core::v1::{ + Capabilities, ConfigMapVolumeSource, Container, EmptyDirVolumeSource, EnvVar, EnvVarSource, + KeyToPath, PodSpec, PodTemplateSpec, SecretKeySelector, SecretVolumeSource, SecurityContext, + Toleration, Volume, VolumeMount, +}; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; + +/// Inline replication of Rook's `toolbox.sh` — re-renders `/etc/ceph/ceph.conf` +/// and keyring on mon-endpoint changes so the bundled `quay.io/ceph/ceph` +/// image (which lacks the rook tooling) can be used directly. +/// +/// Verbatim from `deploy/examples/toolbox.yaml` in rook/rook@v1.19.5. +const TOOLBOX_SCRIPT: &str = r#"# Replicate the script from toolbox.sh inline so the ceph image +# can be run directly, instead of requiring the rook toolbox +CEPH_CONFIG="/etc/ceph/ceph.conf" +MON_CONFIG="/etc/rook/mon-endpoints" +KEYRING_FILE="/etc/ceph/keyring" +CONFIG_OVERRIDE="/etc/rook-config-override/config" + +# create a ceph config file in its default location so ceph/rados tools can be used +# without specifying any arguments +write_endpoints() { + endpoints=$(cat ${MON_CONFIG}) + + # filter out the mon names + # external cluster can have numbers or hyphens in mon names, handling them in regex + # shellcheck disable=SC2001 + mon_endpoints=$(echo "${endpoints}"| sed 's/[a-z0-9_-]\+=//g') + + DATE=$(date) + echo "$DATE writing mon endpoints to ${CEPH_CONFIG}: ${endpoints}" + cat < ${CEPH_CONFIG} +[global] +mon_host = ${mon_endpoints} + +[client.admin] +keyring = ${KEYRING_FILE} +EOF + + # Merge the config override if it exists and is not empty + if [ -f "${CONFIG_OVERRIDE}" ] && [ -s "${CONFIG_OVERRIDE}" ]; then + echo "$DATE merging config override from ${CONFIG_OVERRIDE}" + echo "" >> ${CEPH_CONFIG} + cat ${CONFIG_OVERRIDE} >> ${CEPH_CONFIG} + fi +} + +# watch the endpoints config file and update if the mon endpoints ever change +watch_endpoints() { + # get the timestamp for the target of the soft link + real_path=$(realpath ${MON_CONFIG}) + initial_time=$(stat -c %Z "${real_path}") + while true; do + real_path=$(realpath ${MON_CONFIG}) + latest_time=$(stat -c %Z "${real_path}") + + if [[ "${latest_time}" != "${initial_time}" ]]; then + write_endpoints + initial_time=${latest_time} + fi + + sleep 10 + done +} + +# read the secret from an env var (for backward compatibility), or from the secret file +ceph_secret=${ROOK_CEPH_SECRET} +if [[ "$ceph_secret" == "" ]]; then + ceph_secret=$(cat /var/lib/rook-ceph-mon/secret.keyring) +fi + +# create the keyring file +cat < ${KEYRING_FILE} +[${ROOK_CEPH_USERNAME}] +key = ${ceph_secret} +EOF + +# write the initial config file +write_endpoints + +# continuously update the mon endpoints if they fail over +watch_endpoints +"#; + +const TOOLBOX_NAME: &str = "rook-ceph-tools"; +const TOOLBOX_LABEL: &str = "rook-ceph-tools"; + +/// Build the canonical `rook-ceph-tools` Deployment for the given namespace +/// and Ceph container image. Ported verbatim from rook/rook@v1.19.5's +/// `deploy/examples/toolbox.yaml`. +pub fn toolbox_deployment(namespace: &str, image: &str) -> Deployment { + let labels = { + let mut m = BTreeMap::new(); + m.insert("app".to_string(), TOOLBOX_LABEL.to_string()); + m + }; + + let container = Container { + name: TOOLBOX_NAME.to_string(), + image: Some(image.to_string()), + image_pull_policy: Some("IfNotPresent".to_string()), + command: Some(vec![ + "/bin/bash".to_string(), + "-c".to_string(), + TOOLBOX_SCRIPT.to_string(), + ]), + tty: Some(true), + security_context: Some(SecurityContext { + run_as_non_root: Some(true), + run_as_user: Some(2016), + run_as_group: Some(2016), + capabilities: Some(Capabilities { + drop: Some(vec!["ALL".to_string()]), + ..Capabilities::default() + }), + ..SecurityContext::default() + }), + env: Some(vec![EnvVar { + name: "ROOK_CEPH_USERNAME".to_string(), + value_from: Some(EnvVarSource { + secret_key_ref: Some(SecretKeySelector { + name: "rook-ceph-mon".to_string(), + key: "ceph-username".to_string(), + optional: None, + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }]), + volume_mounts: Some(vec![ + VolumeMount { + name: "ceph-config".to_string(), + mount_path: "/etc/ceph".to_string(), + ..VolumeMount::default() + }, + VolumeMount { + name: "mon-endpoint-volume".to_string(), + mount_path: "/etc/rook".to_string(), + ..VolumeMount::default() + }, + VolumeMount { + name: "ceph-admin-secret".to_string(), + mount_path: "/var/lib/rook-ceph-mon".to_string(), + read_only: Some(true), + ..VolumeMount::default() + }, + VolumeMount { + name: "rook-config-override".to_string(), + mount_path: "/etc/rook-config-override".to_string(), + read_only: Some(true), + ..VolumeMount::default() + }, + ]), + ..Container::default() + }; + + let volumes = vec![ + Volume { + name: "ceph-admin-secret".to_string(), + secret: Some(SecretVolumeSource { + secret_name: Some("rook-ceph-mon".to_string()), + optional: Some(false), + items: Some(vec![KeyToPath { + key: "ceph-secret".to_string(), + path: "secret.keyring".to_string(), + ..KeyToPath::default() + }]), + ..SecretVolumeSource::default() + }), + ..Volume::default() + }, + Volume { + name: "mon-endpoint-volume".to_string(), + config_map: Some(ConfigMapVolumeSource { + name: "rook-ceph-mon-endpoints".to_string(), + items: Some(vec![KeyToPath { + key: "data".to_string(), + path: "mon-endpoints".to_string(), + ..KeyToPath::default() + }]), + ..ConfigMapVolumeSource::default() + }), + ..Volume::default() + }, + Volume { + name: "rook-config-override".to_string(), + config_map: Some(ConfigMapVolumeSource { + name: "rook-config-override".to_string(), + optional: Some(true), + ..ConfigMapVolumeSource::default() + }), + ..Volume::default() + }, + Volume { + name: "ceph-config".to_string(), + empty_dir: Some(EmptyDirVolumeSource::default()), + ..Volume::default() + }, + ]; + + let pod_spec = PodSpec { + dns_policy: Some("ClusterFirstWithHostNet".to_string()), + service_account_name: Some("rook-ceph-default".to_string()), + containers: vec![container], + volumes: Some(volumes), + tolerations: Some(vec![Toleration { + key: Some("node.kubernetes.io/unreachable".to_string()), + operator: Some("Exists".to_string()), + effect: Some("NoExecute".to_string()), + toleration_seconds: Some(5), + ..Toleration::default() + }]), + ..PodSpec::default() + }; + + Deployment { + metadata: ObjectMeta { + name: Some(TOOLBOX_NAME.to_string()), + namespace: Some(namespace.to_string()), + labels: Some(labels.clone()), + ..ObjectMeta::default() + }, + spec: Some(DeploymentSpec { + replicas: Some(1), + selector: LabelSelector { + match_labels: Some(labels.clone()), + ..LabelSelector::default() + }, + template: PodTemplateSpec { + metadata: Some(ObjectMeta { + labels: Some(labels), + ..ObjectMeta::default() + }), + spec: Some(pod_spec), + }, + ..DeploymentSpec::default() + }), + ..Deployment::default() + } +} -- 2.39.5 From 14786fc03e58fa0ca531a62a994cc67cee5c89f7 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 22 May 2026 08:47:19 -0400 Subject: [PATCH 19/19] fix(k8s+storage/ceph): force-conflicts on Rook CR applies for re-run safety MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Server-Side Apply rejects re-applies of a resource whose fields another field manager has taken ownership of. Rook is exactly such a manager: after reconciling a CephCluster, it claims ownership of fields like .spec.mgr.modules (the operator dynamically toggles modules) and likely .spec.storage.* (discovered nodes), .spec.dashboard.* (port assignments), etc. Re-running the example against an existing cluster therefore failed: ApiError: Apply failed with 1 conflict: conflict with "rook" using ceph.rook.io/v1: .spec.mgr.modules The kube-rs apply flow used by K8sResourceScore was hardcoding `PatchParams::apply(FIELD_MANAGER)` without `.force`. apply_dynamic_many already supports force_conflicts but the typed path didn't expose it. Changes: - K8sResourceScore gains a `force_conflicts: bool` field (default false, so all existing call sites keep their semantics) plus a chainable builder `with_force_conflicts(true)`. When set, execute() round-trips each typed resource through serde_json to DynamicObject and routes via apply_dynamic_many with force=true. - RookCephClusterScore opts in via with_force_conflicts(true) on every Rook CR apply (CephCluster, CephBlockPool, CephFilesystem, CephObjectStore, CephObjectStoreUser). The toolbox Deployment and auto-generated StorageClasses keep the default (no force) — they're only managed by Harmony, no other field manager to conflict with. For declarative IaC this is the correct semantic: Harmony's declared state is authoritative; any operator-side mutations to fields we set get overridden on the next reconcile. Co-Authored-By: Claude Opus 4.7 (1M context) --- harmony/src/modules/k8s/resource.rs | 58 +++++++++++++++++-- .../src/modules/storage/ceph/cluster_score.rs | 5 ++ 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/harmony/src/modules/k8s/resource.rs b/harmony/src/modules/k8s/resource.rs index bff8183f..1cbc1582 100644 --- a/harmony/src/modules/k8s/resource.rs +++ b/harmony/src/modules/k8s/resource.rs @@ -1,6 +1,7 @@ use async_trait::async_trait; use k8s_openapi::ResourceScope; use kube::Resource; +use kube::api::DynamicObject; use log::info; use serde::{Serialize, de::DeserializeOwned}; @@ -17,6 +18,12 @@ use harmony_types::id::Id; pub struct K8sResourceScore { pub resource: Vec, pub namespace: Option, + /// When `true`, server-side apply is performed with `force=true` + /// (equivalent to `kubectl apply --force-conflicts`). Required when + /// applying a resource whose fields an operator has taken ownership of + /// (typical for CRs managed by reconciling operators like Rook). For + /// generic Kubernetes resources owned only by Harmony, leave `false`. + pub force_conflicts: bool, } impl K8sResourceScore { @@ -24,8 +31,16 @@ impl K8sResourceScore { Self { resource: vec![resource], namespace, + force_conflicts: false, } } + + /// Consume `self` and return it with `force_conflicts` set. Chainable + /// after `single`: `K8sResourceScore::single(r, ns).with_force_conflicts(true)`. + pub fn with_force_conflicts(mut self, force: bool) -> Self { + self.force_conflicts = force; + self + } } impl< @@ -102,16 +117,47 @@ where .collect(); info!( - "Applying {} resources : {}", + "Applying {} resources : {} (force_conflicts={})", resource_names.len(), - resource_names.join(", ") + resource_names.join(", "), + self.score.force_conflicts ); - topology + let k8s = topology .k8s_client() .await - .map_err(|e| InterpretError::new(format!("Failed to get k8s client : {e}")))? - .apply_many(&self.score.resource, self.score.namespace.as_deref()) - .await?; + .map_err(|e| InterpretError::new(format!("Failed to get k8s client : {e}")))?; + + if self.score.force_conflicts { + // apply_dynamic_many exposes the force_conflicts flag on + // PatchParams that the typed apply path does not. Round-trip each + // typed resource through JSON to land on a DynamicObject without + // duplicating the harmony-k8s private `to_dynamic` helper. + let dyn_objects: Vec = self + .score + .resource + .iter() + .map(|r| { + serde_json::to_value(r) + .map_err(|e| { + InterpretError::new(format!( + "Failed to serialize resource for force-apply: {e}" + )) + }) + .and_then(|v| { + serde_json::from_value::(v).map_err(|e| { + InterpretError::new(format!( + "Failed to convert resource to DynamicObject: {e}" + )) + }) + }) + }) + .collect::, _>>()?; + k8s.apply_dynamic_many(&dyn_objects, self.score.namespace.as_deref(), true) + .await?; + } else { + k8s.apply_many(&self.score.resource, self.score.namespace.as_deref()) + .await?; + } Ok(Outcome::success( "Successfully applied resource".to_string(), diff --git a/harmony/src/modules/storage/ceph/cluster_score.rs b/harmony/src/modules/storage/ceph/cluster_score.rs index 8aa96b0a..460d70dc 100644 --- a/harmony/src/modules/storage/ceph/cluster_score.rs +++ b/harmony/src/modules/storage/ceph/cluster_score.rs @@ -184,6 +184,7 @@ impl Interpret for RookCephClusterInterpre info!("[Rook-Ceph] Applying CephCluster '{}'", ns); K8sResourceScore::single(self.score.cluster.clone(), Some(ns.clone())) + .with_force_conflicts(true) .interpret(inventory, topology) .await?; @@ -214,6 +215,7 @@ impl Interpret for RookCephClusterInterpre .unwrap_or_else(|| format!("pool-{idx}")); info!("[Rook-Ceph] Applying CephBlockPool '{}'", pool_name); K8sResourceScore::single(pool.clone(), Some(ns.clone())) + .with_force_conflicts(true) .interpret(inventory, topology) .await?; @@ -240,6 +242,7 @@ impl Interpret for RookCephClusterInterpre .unwrap_or_else(|| "cephfs".to_string()); info!("[Rook-Ceph] Applying CephFilesystem '{}'", fs_name); K8sResourceScore::single(fs.clone(), Some(ns.clone())) + .with_force_conflicts(true) .interpret(inventory, topology) .await?; @@ -262,6 +265,7 @@ impl Interpret for RookCephClusterInterpre .unwrap_or_else(|| "object-store".to_string()); info!("[Rook-Ceph] Applying CephObjectStore '{}'", store_name); K8sResourceScore::single(store.clone(), Some(ns.clone())) + .with_force_conflicts(true) .interpret(inventory, topology) .await?; } @@ -280,6 +284,7 @@ impl Interpret for RookCephClusterInterpre user.credentials_secret_name(), ); K8sResourceScore::single(user.clone(), Some(ns.clone())) + .with_force_conflicts(true) .interpret(inventory, topology) .await?; } -- 2.39.5