From 4f4510d7ba721b75fc2405e957ad2ebbb7866b68 Mon Sep 17 00:00:00 2001 From: Willem Date: Mon, 24 Nov 2025 14:56:37 -0500 Subject: [PATCH] feat: added steps for bootstrap install worker node --- .../src/modules/okd/bootstrap_04_workers.rs | 182 +++++++++++++++++- 1 file changed, 177 insertions(+), 5 deletions(-) diff --git a/harmony/src/modules/okd/bootstrap_04_workers.rs b/harmony/src/modules/okd/bootstrap_04_workers.rs index 461cab9..bdbb8ea 100644 --- a/harmony/src/modules/okd/bootstrap_04_workers.rs +++ b/harmony/src/modules/okd/bootstrap_04_workers.rs @@ -1,15 +1,21 @@ use async_trait::async_trait; use derive_new::new; use harmony_types::id::Id; -use log::info; +use log::{debug, info}; use serde::Serialize; use crate::{ data::Version, + hardware::PhysicalHost, + infra::inventory::InventoryRepositoryFactory, interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, - inventory::Inventory, + inventory::{HostRole, Inventory}, + modules::{ + dhcp::DhcpHostBindingScore, http::IPxeMacBootFileScore, + inventory::DiscoverHostForRoleScore, okd::templates::BootstrapIpxeTpl, + }, score::Score, - topology::HAClusterTopology, + topology::{HAClusterTopology, HostBinding}, }; // ------------------------------------------------------------------------------------------------- @@ -52,6 +58,159 @@ impl OKDSetup04WorkersInterpret { info!("[Workers] Rendering per-MAC PXE for workers and rebooting"); Ok(()) } + + /// Ensures that three physical hosts are discovered and available for the ControlPlane role. + /// It will trigger discovery if not enough hosts are found. + async fn get_nodes( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + ) -> Result, InterpretError> { + const REQUIRED_HOSTS: usize = 2; + let repo = InventoryRepositoryFactory::build().await?; + let mut control_plane_hosts = repo.get_host_for_role(&HostRole::Worker).await?; + + while control_plane_hosts.len() < REQUIRED_HOSTS { + info!( + "Discovery of {} control plane hosts in progress, current number {}", + REQUIRED_HOSTS, + control_plane_hosts.len() + ); + // This score triggers the discovery agent for a specific role. + DiscoverHostForRoleScore { + role: HostRole::Worker, + } + .interpret(inventory, topology) + .await?; + control_plane_hosts = repo.get_host_for_role(&HostRole::Worker).await?; + } + + if control_plane_hosts.len() < REQUIRED_HOSTS { + Err(InterpretError::new(format!( + "OKD Requires at least {} control plane hosts, but only found {}. Cannot proceed.", + REQUIRED_HOSTS, + control_plane_hosts.len() + ))) + } else { + // Take exactly the number of required hosts to ensure consistency. + Ok(control_plane_hosts + .into_iter() + .take(REQUIRED_HOSTS) + .collect()) + } + } + + /// Configures DHCP host bindings for all control plane nodes. + async fn configure_host_binding( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + nodes: &Vec, + ) -> Result<(), InterpretError> { + info!("[Worker] Configuring host bindings for worker nodes."); + + // Ensure the topology definition matches the number of physical nodes found. + if topology.control_plane.len() != nodes.len() { + return Err(InterpretError::new(format!( + "Mismatch between logical control plane hosts defined in topology ({}) and physical nodes found ({}).", + topology.control_plane.len(), + nodes.len() + ))); + } + + // Create a binding for each physical host to its corresponding logical host. + let bindings: Vec = topology + .control_plane + .iter() + .zip(nodes.iter()) + .map(|(logical_host, physical_host)| { + info!( + "Creating binding: Logical Host '{}' -> Physical Host ID '{}'", + logical_host.name, physical_host.id + ); + HostBinding { + logical_host: logical_host.clone(), + physical_host: physical_host.clone(), + } + }) + .collect(); + + DhcpHostBindingScore { + host_binding: bindings, + domain: Some(topology.domain_name.clone()), + } + .interpret(inventory, topology) + .await?; + + Ok(()) + } + + /// Renders and deploys a per-MAC iPXE boot file for each control plane node. + async fn configure_ipxe( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + nodes: &Vec, + ) -> Result<(), InterpretError> { + info!("[Worker] Rendering per-MAC iPXE configurations."); + + // The iPXE script content is the same for all control plane nodes, + // pointing to the 'master.ign' ignition file. + let content = BootstrapIpxeTpl { + http_ip: &topology.http_server.get_ip().to_string(), + scos_path: "scos", + ignition_http_path: "okd_ignition_files", + installation_device: "/dev/sda", // This might need to be configurable per-host in the future + ignition_file_name: "worker.ign", // Worker nodes use the worker ignition file + } + .to_string(); + + debug!("[Worker] iPXE content template:\n{content}"); + + // Create and apply an iPXE boot file for each node. + for node in nodes { + let mac_address = node.get_mac_address(); + if mac_address.is_empty() { + return Err(InterpretError::new(format!( + "Physical host with ID '{}' has no MAC addresses defined.", + node.id + ))); + } + info!( + "[Worker] Applying iPXE config for node ID '{}' with MACs: {:?}", + node.id, mac_address + ); + + IPxeMacBootFileScore { + mac_address, + content: content.clone(), + } + .interpret(inventory, topology) + .await?; + } + + Ok(()) + } + + /// Prompts the user to reboot the target control plane nodes. + async fn reboot_targets(&self, nodes: &Vec) -> Result<(), InterpretError> { + let node_ids: Vec = nodes.iter().map(|n| n.id.to_string()).collect(); + info!("[Worker] Requesting reboot for control plane nodes: {node_ids:?}",); + + let confirmation = inquire::Confirm::new( + &format!("Please reboot the {} worker nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), node_ids.join(", ")), + ) + .prompt() + .map_err(|e| InterpretError::new(format!("User prompt failed: {e}")))?; + + if !confirmation { + return Err(InterpretError::new( + "User aborted the operation.".to_string(), + )); + } + + Ok(()) + } } #[async_trait] @@ -74,10 +233,23 @@ impl Interpret for OKDSetup04WorkersInterpret { async fn execute( &self, - _inventory: &Inventory, - _topology: &HAClusterTopology, + inventory: &Inventory, + topology: &HAClusterTopology, ) -> Result { self.render_and_reboot().await?; + // 1. Ensure we have 2 physical hosts for the worker nodes. + let nodes = self.get_nodes(inventory, topology).await?; + + // 2. Create DHCP reservations for the worker nodes. + self.configure_host_binding(inventory, topology, &nodes) + .await?; + + // 3. Create iPXE files for each worker node to boot from the worker ignition. + self.configure_ipxe(inventory, topology, &nodes).await?; + + // 4. Reboot the nodes to start the OS installation. + self.reboot_targets(&nodes).await?; + Ok(Outcome::success("Workers provisioned".into())) } }