diff --git a/Cargo.lock b/Cargo.lock index 7d9cdcf..ab3b102 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6049,6 +6049,21 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" +[[package]] +name = "test-score" +version = "0.1.0" +dependencies = [ + "base64 0.22.1", + "env_logger", + "harmony", + "harmony_cli", + "harmony_macros", + "harmony_types", + "log", + "tokio", + "url", +] + [[package]] name = "thiserror" version = "1.0.69" diff --git a/harmony/src/domain/inventory/mod.rs b/harmony/src/domain/inventory/mod.rs index 7d160d7..f7cc1ef 100644 --- a/harmony/src/domain/inventory/mod.rs +++ b/harmony/src/domain/inventory/mod.rs @@ -1,4 +1,6 @@ mod repository; +use std::fmt; + pub use repository::*; #[derive(Debug, new, Clone)] @@ -71,3 +73,14 @@ pub enum HostRole { Worker, Storage, } + +impl fmt::Display for HostRole { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + HostRole::Bootstrap => write!(f, "Bootstrap"), + HostRole::ControlPlane => write!(f, "ControlPlane"), + HostRole::Worker => write!(f, "Worker"), + HostRole::Storage => write!(f, "Storage"), + } + } +} diff --git a/harmony/src/modules/okd/bootstrap_03_control_plane.rs b/harmony/src/modules/okd/bootstrap_03_control_plane.rs index 5abe848..7e882ab 100644 --- a/harmony/src/modules/okd/bootstrap_03_control_plane.rs +++ b/harmony/src/modules/okd/bootstrap_03_control_plane.rs @@ -5,8 +5,10 @@ use crate::{ interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, inventory::{HostRole, Inventory}, modules::{ - dhcp::DhcpHostBindingScore, http::IPxeMacBootFileScore, - inventory::DiscoverHostForRoleScore, okd::templates::BootstrapIpxeTpl, + dhcp::DhcpHostBindingScore, + http::IPxeMacBootFileScore, + inventory::DiscoverHostForRoleScore, + okd::{bootstrap_okd_node::OKDNodeInterpret, templates::BootstrapIpxeTpl}, }, score::Score, topology::{HAClusterTopology, HostBinding}, @@ -50,159 +52,6 @@ impl OKDSetup03ControlPlaneInterpret { status: InterpretStatus::QUEUED, } } - - /// Ensures that three physical hosts are discovered and available for the ControlPlane role. - /// It will trigger discovery if not enough hosts are found. - async fn get_nodes( - &self, - inventory: &Inventory, - topology: &HAClusterTopology, - ) -> Result, InterpretError> { - const REQUIRED_HOSTS: usize = 3; - let repo = InventoryRepositoryFactory::build().await?; - let mut control_plane_hosts = repo.get_host_for_role(&HostRole::ControlPlane).await?; - - while control_plane_hosts.len() < REQUIRED_HOSTS { - info!( - "Discovery of {} control plane hosts in progress, current number {}", - REQUIRED_HOSTS, - control_plane_hosts.len() - ); - // This score triggers the discovery agent for a specific role. - DiscoverHostForRoleScore { - role: HostRole::ControlPlane, - } - .interpret(inventory, topology) - .await?; - control_plane_hosts = repo.get_host_for_role(&HostRole::ControlPlane).await?; - } - - if control_plane_hosts.len() < REQUIRED_HOSTS { - Err(InterpretError::new(format!( - "OKD Requires at least {} control plane hosts, but only found {}. Cannot proceed.", - REQUIRED_HOSTS, - control_plane_hosts.len() - ))) - } else { - // Take exactly the number of required hosts to ensure consistency. - Ok(control_plane_hosts - .into_iter() - .take(REQUIRED_HOSTS) - .collect()) - } - } - - /// Configures DHCP host bindings for all control plane nodes. - async fn configure_host_binding( - &self, - inventory: &Inventory, - topology: &HAClusterTopology, - nodes: &Vec, - ) -> Result<(), InterpretError> { - info!("[ControlPlane] Configuring host bindings for control plane nodes."); - - // Ensure the topology definition matches the number of physical nodes found. - if topology.control_plane.len() != nodes.len() { - return Err(InterpretError::new(format!( - "Mismatch between logical control plane hosts defined in topology ({}) and physical nodes found ({}).", - topology.control_plane.len(), - nodes.len() - ))); - } - - // Create a binding for each physical host to its corresponding logical host. - let bindings: Vec = topology - .control_plane - .iter() - .zip(nodes.iter()) - .map(|(logical_host, physical_host)| { - info!( - "Creating binding: Logical Host '{}' -> Physical Host ID '{}'", - logical_host.name, physical_host.id - ); - HostBinding { - logical_host: logical_host.clone(), - physical_host: physical_host.clone(), - } - }) - .collect(); - - DhcpHostBindingScore { - host_binding: bindings, - domain: Some(topology.domain_name.clone()), - } - .interpret(inventory, topology) - .await?; - - Ok(()) - } - - /// Renders and deploys a per-MAC iPXE boot file for each control plane node. - async fn configure_ipxe( - &self, - inventory: &Inventory, - topology: &HAClusterTopology, - nodes: &Vec, - ) -> Result<(), InterpretError> { - info!("[ControlPlane] Rendering per-MAC iPXE configurations."); - - // The iPXE script content is the same for all control plane nodes, - // pointing to the 'master.ign' ignition file. - let content = BootstrapIpxeTpl { - http_ip: &topology.http_server.get_ip().to_string(), - scos_path: "scos", - ignition_http_path: "okd_ignition_files", - installation_device: "/dev/sda", // This might need to be configurable per-host in the future - ignition_file_name: "master.ign", // Control plane nodes use the master ignition file - } - .to_string(); - - debug!("[ControlPlane] iPXE content template:\n{content}"); - - // Create and apply an iPXE boot file for each node. - for node in nodes { - let mac_address = node.get_mac_address(); - if mac_address.is_empty() { - return Err(InterpretError::new(format!( - "Physical host with ID '{}' has no MAC addresses defined.", - node.id - ))); - } - info!( - "[ControlPlane] Applying iPXE config for node ID '{}' with MACs: {:?}", - node.id, mac_address - ); - - IPxeMacBootFileScore { - mac_address, - content: content.clone(), - } - .interpret(inventory, topology) - .await?; - } - - Ok(()) - } - - /// Prompts the user to reboot the target control plane nodes. - async fn reboot_targets(&self, nodes: &Vec) -> Result<(), InterpretError> { - let node_ids: Vec = nodes.iter().map(|n| n.id.to_string()).collect(); - info!("[ControlPlane] Requesting reboot for control plane nodes: {node_ids:?}",); - - let confirmation = inquire::Confirm::new( - &format!("Please reboot the {} control plane nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), node_ids.join(", ")), - ) - .prompt() - .map_err(|e| InterpretError::new(format!("User prompt failed: {e}")))?; - - if !confirmation { - return Err(InterpretError::new( - "User aborted the operation.".to_string(), - )); - } - - Ok(()) - } } #[async_trait] @@ -228,19 +77,10 @@ impl Interpret for OKDSetup03ControlPlaneInterpret { inventory: &Inventory, topology: &HAClusterTopology, ) -> Result { - // 1. Ensure we have 3 physical hosts for the control plane. - let nodes = self.get_nodes(inventory, topology).await?; - - // 2. Create DHCP reservations for the control plane nodes. - self.configure_host_binding(inventory, topology, &nodes) + OKDNodeInterpret::new(HostRole::ControlPlane) + .execute(inventory, topology) .await?; - // 3. Create iPXE files for each control plane node to boot from the master ignition. - self.configure_ipxe(inventory, topology, &nodes).await?; - - // 4. Reboot the nodes to start the OS installation. - self.reboot_targets(&nodes).await?; - // TODO: Implement a step to wait for the control plane nodes to join the cluster // and for the cluster operators to become available. This would be similar to // the `wait-for bootstrap-complete` command. diff --git a/harmony/src/modules/okd/bootstrap_04_workers.rs b/harmony/src/modules/okd/bootstrap_04_workers.rs index 461cab9..62bf2ad 100644 --- a/harmony/src/modules/okd/bootstrap_04_workers.rs +++ b/harmony/src/modules/okd/bootstrap_04_workers.rs @@ -1,13 +1,13 @@ use async_trait::async_trait; use derive_new::new; use harmony_types::id::Id; -use log::info; use serde::Serialize; use crate::{ data::Version, interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, - inventory::Inventory, + inventory::{HostRole, Inventory}, + modules::okd::bootstrap_okd_node::OKDNodeInterpret, score::Score, topology::HAClusterTopology, }; @@ -23,7 +23,7 @@ pub struct OKDSetup04WorkersScore {} impl Score for OKDSetup04WorkersScore { fn create_interpret(&self) -> Box> { - Box::new(OKDSetup04WorkersInterpret::new(self.clone())) + Box::new(OKDSetup04WorkersInterpret::new()) } fn name(&self) -> String { @@ -33,25 +33,18 @@ impl Score for OKDSetup04WorkersScore { #[derive(Debug, Clone)] pub struct OKDSetup04WorkersInterpret { - score: OKDSetup04WorkersScore, version: Version, status: InterpretStatus, } impl OKDSetup04WorkersInterpret { - pub fn new(score: OKDSetup04WorkersScore) -> Self { + pub fn new() -> Self { let version = Version::from("1.0.0").unwrap(); Self { version, - score, status: InterpretStatus::QUEUED, } } - - async fn render_and_reboot(&self) -> Result<(), InterpretError> { - info!("[Workers] Rendering per-MAC PXE for workers and rebooting"); - Ok(()) - } } #[async_trait] @@ -74,10 +67,12 @@ impl Interpret for OKDSetup04WorkersInterpret { async fn execute( &self, - _inventory: &Inventory, - _topology: &HAClusterTopology, + inventory: &Inventory, + topology: &HAClusterTopology, ) -> Result { - self.render_and_reboot().await?; + OKDNodeInterpret::new(HostRole::Worker) + .execute(inventory, topology) + .await?; Ok(Outcome::success("Workers provisioned".into())) } } diff --git a/harmony/src/modules/okd/bootstrap_okd_node.rs b/harmony/src/modules/okd/bootstrap_okd_node.rs new file mode 100644 index 0000000..a5eb7c2 --- /dev/null +++ b/harmony/src/modules/okd/bootstrap_okd_node.rs @@ -0,0 +1,296 @@ +use async_trait::async_trait; +use derive_new::new; +use harmony_types::id::Id; +use log::{debug, info}; +use serde::Serialize; + +use crate::{ + data::Version, + hardware::PhysicalHost, + infra::inventory::InventoryRepositoryFactory, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::{HostRole, Inventory}, + modules::{ + dhcp::DhcpHostBindingScore, + http::IPxeMacBootFileScore, + inventory::DiscoverHostForRoleScore, + okd::{ + okd_node::{ + BootstrapRole, ControlPlaneRole, OKDRoleProperties, StorageRole, WorkerRole, + }, + templates::BootstrapIpxeTpl, + }, + }, + score::Score, + topology::{HAClusterTopology, HostBinding, LogicalHost}, +}; + +#[derive(Debug, Clone, Serialize, new)] +pub struct OKDNodeScore { + host_role: HostRole, +} + +impl Score for OKDNodeScore { + fn name(&self) -> String { + "OKDNodeScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OKDNodeInterpret::new(self.host_role.clone())) + } +} + +#[derive(Debug, Clone)] +pub struct OKDNodeInterpret { + host_role: HostRole, +} + +impl OKDNodeInterpret { + pub fn new(host_role: HostRole) -> Self { + Self { host_role } + } + + fn okd_role_properties(&self, role: &HostRole) -> &'static dyn OKDRoleProperties { + match role { + HostRole::Bootstrap => &BootstrapRole, + HostRole::ControlPlane => &ControlPlaneRole, + HostRole::Worker => &WorkerRole, + HostRole::Storage => &StorageRole, + } + } + + async fn get_nodes( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + ) -> Result, InterpretError> { + let repo = InventoryRepositoryFactory::build().await?; + + let mut hosts = repo.get_host_for_role(&self.host_role).await?; + + let okd_host_properties = self.okd_role_properties(&self.host_role); + + let required_hosts: usize = okd_host_properties.required_hosts(); + + while hosts.len() < required_hosts { + info!( + "Discovery of {} {} hosts in progress, current number {}", + required_hosts, + self.host_role, + hosts.len() + ); + // This score triggers the discovery agent for a specific role. + DiscoverHostForRoleScore { + role: self.host_role.clone(), + } + .interpret(inventory, topology) + .await?; + hosts = repo.get_host_for_role(&self.host_role).await?; + } + + if hosts.len() < required_hosts { + Err(InterpretError::new(format!( + "OKD Requires at least {} {} hosts, but only found {}. Cannot proceed.", + required_hosts, + self.host_role, + hosts.len() + ))) + } else { + // Take exactly the number of required hosts to ensure consistency. + Ok(hosts.into_iter().take(required_hosts).collect()) + } + } + + /// Configures DHCP host bindings for all nodes. + async fn configure_host_binding( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + nodes: &Vec, + ) -> Result<(), InterpretError> { + info!( + "[{}] Configuring host bindings for {} plane nodes.", + self.host_role, self.host_role, + ); + + let host_properties = self.okd_role_properties(&self.host_role); + + self.validate_host_node_match(nodes, host_properties.logical_hosts(topology))?; + + let bindings: Vec = + self.host_bindings(nodes, host_properties.logical_hosts(topology)); + + DhcpHostBindingScore { + host_binding: bindings, + domain: Some(topology.domain_name.clone()), + } + .interpret(inventory, topology) + .await?; + + Ok(()) + } + + // Ensure the topology definition matches the number of physical nodes found. + fn validate_host_node_match( + &self, + nodes: &Vec, + hosts: &Vec, + ) -> Result<(), InterpretError> { + if hosts.len() != nodes.len() { + return Err(InterpretError::new(format!( + "Mismatch between logical hosts defined in topology ({}) and physical nodes found ({}).", + hosts.len(), + nodes.len() + ))); + } + Ok(()) + } + + // Create a binding for each physical host to its corresponding logical host. + fn host_bindings( + &self, + nodes: &Vec, + hosts: &Vec, + ) -> Vec { + hosts + .iter() + .zip(nodes.iter()) + .map(|(logical_host, physical_host)| { + info!( + "Creating binding: Logical Host '{}' -> Physical Host ID '{}'", + logical_host.name, physical_host.id + ); + HostBinding { + logical_host: logical_host.clone(), + physical_host: physical_host.clone(), + } + }) + .collect() + } + + /// Renders and deploys a per-MAC iPXE boot file for each node. + async fn configure_ipxe( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + nodes: &Vec, + ) -> Result<(), InterpretError> { + info!( + "[{}] Rendering per-MAC iPXE configurations.", + self.host_role + ); + + let okd_role_properties = self.okd_role_properties(&self.host_role); + // The iPXE script content is the same for all control plane nodes, + // pointing to the 'master.ign' ignition file. + let content = BootstrapIpxeTpl { + http_ip: &topology.http_server.get_ip().to_string(), + scos_path: "scos", + ignition_http_path: "okd_ignition_files", + //TODO must be refactored to not only use /dev/sda + installation_device: "/dev/sda", // This might need to be configurable per-host in the future + ignition_file_name: okd_role_properties.ignition_file(), + } + .to_string(); + + debug!("[{}] iPXE content template:\n{content}", self.host_role); + + // Create and apply an iPXE boot file for each node. + for node in nodes { + let mac_address = node.get_mac_address(); + if mac_address.is_empty() { + return Err(InterpretError::new(format!( + "Physical host with ID '{}' has no MAC addresses defined.", + node.id + ))); + } + info!( + "[{}] Applying iPXE config for node ID '{}' with MACs: {:?}", + self.host_role, node.id, mac_address + ); + + IPxeMacBootFileScore { + mac_address, + content: content.clone(), + } + .interpret(inventory, topology) + .await?; + } + + Ok(()) + } + + /// Prompts the user to reboot the target control plane nodes. + async fn reboot_targets(&self, nodes: &Vec) -> Result<(), InterpretError> { + let node_ids: Vec = nodes.iter().map(|n| n.id.to_string()).collect(); + info!( + "[{}] Requesting reboot for control plane nodes: {node_ids:?}", + self.host_role + ); + + let confirmation = inquire::Confirm::new( + &format!("Please reboot the {} {} nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), self.host_role, node_ids.join(", ")), + ) + .prompt() + .map_err(|e| InterpretError::new(format!("User prompt failed: {e}")))?; + + if !confirmation { + return Err(InterpretError::new( + "User aborted the operation.".to_string(), + )); + } + + Ok(()) + } +} + +#[async_trait] +impl Interpret for OKDNodeInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + ) -> Result { + // 1. Ensure we have the specfied number of physical hosts. + let nodes = self.get_nodes(inventory, topology).await?; + + // 2. Create DHCP reservations for the nodes. + self.configure_host_binding(inventory, topology, &nodes) + .await?; + + // 3. Create iPXE files for each node to boot from the ignition. + self.configure_ipxe(inventory, topology, &nodes).await?; + + // 4. Reboot the nodes to start the OS installation. + self.reboot_targets(&nodes).await?; + + // TODO: Implement a step to wait for the control plane nodes to join the cluster + // and for the cluster operators to become available. This would be similar to + // the `wait-for bootstrap-complete` command. + info!( + "[{}] Provisioning initiated. Monitor the cluster convergence manually.", + self.host_role + ); + + Ok(Outcome::success(format!( + "{} provisioning has been successfully initiated.", + self.host_role + ))) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("OKDNodeSetup".into()) + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/harmony/src/modules/okd/mod.rs b/harmony/src/modules/okd/mod.rs index 8bb85ef..1cf66bc 100644 --- a/harmony/src/modules/okd/mod.rs +++ b/harmony/src/modules/okd/mod.rs @@ -6,12 +6,14 @@ mod bootstrap_05_sanity_check; mod bootstrap_06_installation_report; pub mod bootstrap_dhcp; pub mod bootstrap_load_balancer; +pub mod bootstrap_okd_node; mod bootstrap_persist_network_bond; pub mod dhcp; pub mod dns; pub mod installation; pub mod ipxe; pub mod load_balancer; +pub mod okd_node; pub mod templates; pub mod upgrade; pub use bootstrap_01_prepare::*; diff --git a/harmony/src/modules/okd/okd_node.rs b/harmony/src/modules/okd/okd_node.rs new file mode 100644 index 0000000..687ae5a --- /dev/null +++ b/harmony/src/modules/okd/okd_node.rs @@ -0,0 +1,69 @@ +use crate::topology::{HAClusterTopology, LogicalHost}; + +pub trait OKDRoleProperties { + fn ignition_file(&self) -> &'static str; + fn required_hosts(&self) -> usize; + fn logical_hosts<'a>(&self, t: &'a HAClusterTopology) -> &'a Vec; +} + +pub struct BootstrapRole; +pub struct ControlPlaneRole; +pub struct WorkerRole; +pub struct StorageRole; + +impl OKDRoleProperties for BootstrapRole { + fn ignition_file(&self) -> &'static str { + "bootstrap.ign" + } + + fn required_hosts(&self) -> usize { + 1 + } + + fn logical_hosts<'a>(&self, t: &'a HAClusterTopology) -> &'a Vec { + todo!() + } +} + +impl OKDRoleProperties for ControlPlaneRole { + fn ignition_file(&self) -> &'static str { + "master.ign" + } + + fn required_hosts(&self) -> usize { + 3 + } + + fn logical_hosts<'a>(&self, t: &'a HAClusterTopology) -> &'a Vec { + &t.control_plane + } +} + +impl OKDRoleProperties for WorkerRole { + fn ignition_file(&self) -> &'static str { + "worker.ign" + } + + fn required_hosts(&self) -> usize { + 2 + } + + fn logical_hosts<'a>(&self, t: &'a HAClusterTopology) -> &'a Vec { + &t.workers + } +} + +//TODO unsure if this is to be implemented here or not +impl OKDRoleProperties for StorageRole { + fn ignition_file(&self) -> &'static str { + todo!() + } + + fn required_hosts(&self) -> usize { + todo!() + } + + fn logical_hosts<'a>(&self, t: &'a HAClusterTopology) -> &'a Vec { + todo!() + } +}