From 902185daa45cb01d957755f760fff457686d2ecd Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Thu, 4 Sep 2025 17:49:06 -0400 Subject: [PATCH] feat: Control plane bootstraping logic implemented, next step is testing it! --- .../modules/okd/bootstrap_03_control_plane.rs | 246 ++++++++++++------ 1 file changed, 161 insertions(+), 85 deletions(-) diff --git a/harmony/src/modules/okd/bootstrap_03_control_plane.rs b/harmony/src/modules/okd/bootstrap_03_control_plane.rs index 1739aea..412a71a 100644 --- a/harmony/src/modules/okd/bootstrap_03_control_plane.rs +++ b/harmony/src/modules/okd/bootstrap_03_control_plane.rs @@ -2,31 +2,24 @@ use std::{fmt::Write, path::PathBuf}; use async_trait::async_trait; use derive_new::new; -use harmony_secret::SecretManager; use harmony_types::id::Id; -use log::{debug, error, info, warn}; -use serde::{Deserialize, Serialize}; -use tokio::{fs::File, io::AsyncWriteExt, process::Command}; +use log::{debug, info}; +use serde::Serialize; use crate::{ - config::secret::{RedhatSecret, SshKeyPair}, - data::{FileContent, FilePath, Version}, hardware::PhysicalHost, infra::inventory::InventoryRepositoryFactory, - instrumentation::{HarmonyEvent, instrument}, interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, inventory::{HostRole, Inventory}, modules::{ dhcp::DhcpHostBindingScore, - http::{IPxeMacBootFileScore, StaticFilesHttpScore}, - inventory::{DiscoverHostForRoleScore, LaunchDiscoverInventoryAgentScore}, - okd::{ - bootstrap_load_balancer::OKDBootstrapLoadBalancerScore, - templates::{BootstrapIpxeTpl, InstallConfigYaml}, - }, + http::IPxeMacBootFileScore, + inventory::DiscoverHostForRoleScore, + okd::templates::BootstrapIpxeTpl, }, score::Score, topology::{HAClusterTopology, HostBinding}, + data::Version, }; // ------------------------------------------------------------------------------------------------- // Step 03: Control Plane @@ -64,84 +57,24 @@ impl OKDSetup03ControlPlaneInterpret { } } - async fn configure_host_binding( - &self, - inventory: &Inventory, - topology: &HAClusterTopology, - nodes: &Vec, - ) -> Result<(), InterpretError> { - let binding = HostBinding { - logical_host: topology.bootstrap_host.clone(), - physical_host: self.get_bootstrap_node().await?, - }; - info!("Configuring host binding for bootstrap node {binding:?}"); - - DhcpHostBindingScore { - host_binding: vec![binding], - domain: Some(topology.domain_name.clone()), - } - .interpret(inventory, topology) - .await?; - Ok(()) - } - - async fn configure_ipxe( - &self, - inventory: &Inventory, - topology: &HAClusterTopology, - nodes: &Vec, - ) -> Result<(), InterpretError> { - info!("[ControlPlane] Rendering per-MAC PXE"); - let content = BootstrapIpxeTpl { - http_ip: &topology.http_server.get_ip().to_string(), - scos_path: "scos", // TODO use some constant - ignition_http_path: "okd_ignition_files", // TODO use proper variable - installation_device: "/dev/sda", - ignition_file_name: "bootstrap.ign", - } - .to_string(); - - let bootstrap_node = self.get_nodes().await?; - let mac_address = bootstrap_node.get_mac_address(); - - info!("[Bootstrap] Rendering per-MAC PXE for bootstrap node"); - debug!("bootstrap ipxe content : {content}"); - debug!("bootstrap mac addresses : {mac_address:?}"); - - IPxeMacBootFileScore { - mac_address, - content, - } - .interpret(inventory, topology) - .await?; - Ok(()) - } - - async fn persist_network_bond(&self) -> Result<(), InterpretError> { - // Generate MC or NNCP from inventory NIC data; apply via ignition or post-join. - info!("[ControlPlane] Ensuring persistent bonding via MachineConfig/NNCP"); - inquire::Confirm::new( - "Network configuration for control plane nodes is not automated yet, configure it manually now.", - ) - .prompt() - .expect("Unexpected prompt error"); - - Ok(()) - } - + /// Ensures that three physical hosts are discovered and available for the ControlPlane role. + /// It will trigger discovery if not enough hosts are found. async fn get_nodes( &self, inventory: &Inventory, topology: &HAClusterTopology, ) -> Result, InterpretError> { + const REQUIRED_HOSTS: usize = 3; let repo = InventoryRepositoryFactory::build().await?; let mut control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?; - while control_plane_hosts.len() < 3 { + while control_plane_hosts.len() < REQUIRED_HOSTS { info!( - "Discovery of 3 control plane hosts in progress, current number {}", + "Discovery of {} control plane hosts in progress, current number {}", + REQUIRED_HOSTS, control_plane_hosts.len() ); + // This score triggers the discovery agent for a specific role. DiscoverHostForRoleScore { role: HostRole::ControlPlane, } @@ -150,15 +83,141 @@ impl OKDSetup03ControlPlaneInterpret { control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?; } - if control_plane_hosts.len() < 3 { + if control_plane_hosts.len() < REQUIRED_HOSTS { Err(InterpretError::new(format!( - "OKD Requires at least 3 hosts, got {}, cannot proceed", + "OKD Requires at least {} control plane hosts, but only found {}. Cannot proceed.", + REQUIRED_HOSTS, control_plane_hosts.len() ))) } else { - Ok(control_plane_hosts) + // Take exactly the number of required hosts to ensure consistency. + Ok(control_plane_hosts.into_iter().take(REQUIRED_HOSTS).collect()) } } + + /// Configures DHCP host bindings for all control plane nodes. + async fn configure_host_binding( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + nodes: &Vec, + ) -> Result<(), InterpretError> { + info!("[ControlPlane] Configuring host bindings for control plane nodes."); + + // Ensure the topology definition matches the number of physical nodes found. + if topology.control_plane_hosts.len() != nodes.len() { + return Err(InterpretError::new(format!( + "Mismatch between logical control plane hosts defined in topology ({}) and physical nodes found ({}).", + topology.control_plane_hosts.len(), + nodes.len() + ))); + } + + // Create a binding for each physical host to its corresponding logical host. + let bindings: Vec = topology + .control_plane_hosts + .iter() + .zip(nodes.iter()) + .map(|(logical_host, physical_host)| { + info!( + "Creating binding: Logical Host '{}' -> Physical Host ID '{}'", + logical_host.get_hostname(), + physical_host.id + ); + HostBinding { + logical_host: logical_host.clone(), + physical_host: physical_host.clone(), + } + }) + .collect(); + + DhcpHostBindingScore { + host_binding: bindings, + domain: Some(topology.domain_name.clone()), + } + .interpret(inventory, topology) + .await?; + + Ok(()) + } + + /// Renders and deploys a per-MAC iPXE boot file for each control plane node. + async fn configure_ipxe( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + nodes: &Vec, + ) -> Result<(), InterpretError> { + info!("[ControlPlane] Rendering per-MAC iPXE configurations."); + + // The iPXE script content is the same for all control plane nodes, + // pointing to the 'master.ign' ignition file. + let content = BootstrapIpxeTpl { + http_ip: &topology.http_server.get_ip().to_string(), + scos_path: "scos", + ignition_http_path: "okd_ignition_files", + installation_device: "/dev/sda", // This might need to be configurable per-host in the future + ignition_file_name: "master.ign", // Control plane nodes use the master ignition file + } + .to_string(); + + debug!("[ControlPlane] iPXE content template:\n{}", content); + + // Create and apply an iPXE boot file for each node. + for node in nodes { + let mac_address = node.get_mac_address(); + if mac_address.is_empty() { + return Err(InterpretError::new(format!( + "Physical host with ID '{}' has no MAC addresses defined.", + node.id + ))); + } + info!( + "[ControlPlane] Applying iPXE config for node ID '{}' with MACs: {:?}", + node.id, mac_address + ); + + IPxeMacBootFileScore { + mac_address, + content: content.clone(), + } + .interpret(inventory, topology) + .await?; + } + + Ok(()) + } + + /// Prompts the user to reboot the target control plane nodes. + async fn reboot_targets(&self, nodes: &Vec) -> Result<(), InterpretError> { + let node_ids: Vec = nodes.iter().map(|n| n.id.to_string()).collect(); + info!("[ControlPlane] Requesting reboot for control plane nodes: {:?}", node_ids); + + let confirmation = inquire::Confirm::new( + &format!("Please reboot the {} control plane nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), node_ids.join(", ")), + ) + .prompt() + .map_err(|e| InterpretError::new(format!("User prompt failed: {}", e)))?; + + if !confirmation { + return Err(InterpretError::new("User aborted the operation.".to_string())); + } + + Ok(()) + } + + /// Placeholder for automating network bonding configuration. + async fn persist_network_bond(&self) -> Result<(), InterpretError> { + // Generate MC or NNCP from inventory NIC data; apply via ignition or post-join. + info!("[ControlPlane] Ensuring persistent bonding via MachineConfig/NNCP"); + inquire::Confirm::new( + "Network configuration for control plane nodes is not automated yet. Configure it manually if needed.", + ) + .prompt() + .map_err(|e| InterpretError::new(format!("User prompt failed: {}", e)))?; + + Ok(()) + } } #[async_trait] @@ -184,12 +243,29 @@ impl Interpret for OKDSetup03ControlPlaneInterpret { inventory: &Inventory, topology: &HAClusterTopology, ) -> Result { + // 1. Ensure we have 3 physical hosts for the control plane. let nodes = self.get_nodes(inventory, topology).await?; - // TODO add relevant methods here + + // 2. Create DHCP reservations for the control plane nodes. + self.configure_host_binding(inventory, topology, &nodes).await?; + + // 3. Create iPXE files for each control plane node to boot from the master ignition. + self.configure_ipxe(inventory, topology, &nodes).await?; + + // 4. Reboot the nodes to start the OS installation. + self.reboot_targets(&nodes).await?; + + // 5. Placeholder for post-boot network configuration (e.g., bonding). self.persist_network_bond().await?; + + // TODO: Implement a step to wait for the control plane nodes to join the cluster + // and for the cluster operators to become available. This would be similar to + // the `wait-for bootstrap-complete` command. + info!("[ControlPlane] Provisioning initiated. Monitor the cluster convergence manually."); + Ok(Outcome::new( InterpretStatus::SUCCESS, - "Control plane provisioned".into(), + "Control plane provisioning has been successfully initiated.".into(), )) } }