feat: Control plane bootstraping logic implemented, next step is testing it!
Some checks failed
Run Check Script / check (pull_request) Failing after 18s

This commit is contained in:
Jean-Gabriel Gill-Couture 2025-09-04 17:49:06 -04:00
parent 8bcade27a1
commit 902185daa4

View File

@ -2,31 +2,24 @@ use std::{fmt::Write, path::PathBuf};
use async_trait::async_trait; use async_trait::async_trait;
use derive_new::new; use derive_new::new;
use harmony_secret::SecretManager;
use harmony_types::id::Id; use harmony_types::id::Id;
use log::{debug, error, info, warn}; use log::{debug, info};
use serde::{Deserialize, Serialize}; use serde::Serialize;
use tokio::{fs::File, io::AsyncWriteExt, process::Command};
use crate::{ use crate::{
config::secret::{RedhatSecret, SshKeyPair},
data::{FileContent, FilePath, Version},
hardware::PhysicalHost, hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory, infra::inventory::InventoryRepositoryFactory,
instrumentation::{HarmonyEvent, instrument},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory}, inventory::{HostRole, Inventory},
modules::{ modules::{
dhcp::DhcpHostBindingScore, dhcp::DhcpHostBindingScore,
http::{IPxeMacBootFileScore, StaticFilesHttpScore}, http::IPxeMacBootFileScore,
inventory::{DiscoverHostForRoleScore, LaunchDiscoverInventoryAgentScore}, inventory::DiscoverHostForRoleScore,
okd::{ okd::templates::BootstrapIpxeTpl,
bootstrap_load_balancer::OKDBootstrapLoadBalancerScore,
templates::{BootstrapIpxeTpl, InstallConfigYaml},
},
}, },
score::Score, score::Score,
topology::{HAClusterTopology, HostBinding}, topology::{HAClusterTopology, HostBinding},
data::Version,
}; };
// ------------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------------
// Step 03: Control Plane // Step 03: Control Plane
@ -64,84 +57,24 @@ impl OKDSetup03ControlPlaneInterpret {
} }
} }
async fn configure_host_binding( /// Ensures that three physical hosts are discovered and available for the ControlPlane role.
&self, /// It will trigger discovery if not enough hosts are found.
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
let binding = HostBinding {
logical_host: topology.bootstrap_host.clone(),
physical_host: self.get_bootstrap_node().await?,
};
info!("Configuring host binding for bootstrap node {binding:?}");
DhcpHostBindingScore {
host_binding: vec![binding],
domain: Some(topology.domain_name.clone()),
}
.interpret(inventory, topology)
.await?;
Ok(())
}
async fn configure_ipxe(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!("[ControlPlane] Rendering per-MAC PXE");
let content = BootstrapIpxeTpl {
http_ip: &topology.http_server.get_ip().to_string(),
scos_path: "scos", // TODO use some constant
ignition_http_path: "okd_ignition_files", // TODO use proper variable
installation_device: "/dev/sda",
ignition_file_name: "bootstrap.ign",
}
.to_string();
let bootstrap_node = self.get_nodes().await?;
let mac_address = bootstrap_node.get_mac_address();
info!("[Bootstrap] Rendering per-MAC PXE for bootstrap node");
debug!("bootstrap ipxe content : {content}");
debug!("bootstrap mac addresses : {mac_address:?}");
IPxeMacBootFileScore {
mac_address,
content,
}
.interpret(inventory, topology)
.await?;
Ok(())
}
async fn persist_network_bond(&self) -> Result<(), InterpretError> {
// Generate MC or NNCP from inventory NIC data; apply via ignition or post-join.
info!("[ControlPlane] Ensuring persistent bonding via MachineConfig/NNCP");
inquire::Confirm::new(
"Network configuration for control plane nodes is not automated yet, configure it manually now.",
)
.prompt()
.expect("Unexpected prompt error");
Ok(())
}
async fn get_nodes( async fn get_nodes(
&self, &self,
inventory: &Inventory, inventory: &Inventory,
topology: &HAClusterTopology, topology: &HAClusterTopology,
) -> Result<Vec<PhysicalHost>, InterpretError> { ) -> Result<Vec<PhysicalHost>, InterpretError> {
const REQUIRED_HOSTS: usize = 3;
let repo = InventoryRepositoryFactory::build().await?; let repo = InventoryRepositoryFactory::build().await?;
let mut control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?; let mut control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?;
while control_plane_hosts.len() < 3 { while control_plane_hosts.len() < REQUIRED_HOSTS {
info!( info!(
"Discovery of 3 control plane hosts in progress, current number {}", "Discovery of {} control plane hosts in progress, current number {}",
REQUIRED_HOSTS,
control_plane_hosts.len() control_plane_hosts.len()
); );
// This score triggers the discovery agent for a specific role.
DiscoverHostForRoleScore { DiscoverHostForRoleScore {
role: HostRole::ControlPlane, role: HostRole::ControlPlane,
} }
@ -150,15 +83,141 @@ impl OKDSetup03ControlPlaneInterpret {
control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?; control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?;
} }
if control_plane_hosts.len() < 3 { if control_plane_hosts.len() < REQUIRED_HOSTS {
Err(InterpretError::new(format!( Err(InterpretError::new(format!(
"OKD Requires at least 3 hosts, got {}, cannot proceed", "OKD Requires at least {} control plane hosts, but only found {}. Cannot proceed.",
REQUIRED_HOSTS,
control_plane_hosts.len() control_plane_hosts.len()
))) )))
} else { } else {
Ok(control_plane_hosts) // Take exactly the number of required hosts to ensure consistency.
Ok(control_plane_hosts.into_iter().take(REQUIRED_HOSTS).collect())
} }
} }
/// Configures DHCP host bindings for all control plane nodes.
async fn configure_host_binding(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!("[ControlPlane] Configuring host bindings for control plane nodes.");
// Ensure the topology definition matches the number of physical nodes found.
if topology.control_plane_hosts.len() != nodes.len() {
return Err(InterpretError::new(format!(
"Mismatch between logical control plane hosts defined in topology ({}) and physical nodes found ({}).",
topology.control_plane_hosts.len(),
nodes.len()
)));
}
// Create a binding for each physical host to its corresponding logical host.
let bindings: Vec<HostBinding> = topology
.control_plane_hosts
.iter()
.zip(nodes.iter())
.map(|(logical_host, physical_host)| {
info!(
"Creating binding: Logical Host '{}' -> Physical Host ID '{}'",
logical_host.get_hostname(),
physical_host.id
);
HostBinding {
logical_host: logical_host.clone(),
physical_host: physical_host.clone(),
}
})
.collect();
DhcpHostBindingScore {
host_binding: bindings,
domain: Some(topology.domain_name.clone()),
}
.interpret(inventory, topology)
.await?;
Ok(())
}
/// Renders and deploys a per-MAC iPXE boot file for each control plane node.
async fn configure_ipxe(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!("[ControlPlane] Rendering per-MAC iPXE configurations.");
// The iPXE script content is the same for all control plane nodes,
// pointing to the 'master.ign' ignition file.
let content = BootstrapIpxeTpl {
http_ip: &topology.http_server.get_ip().to_string(),
scos_path: "scos",
ignition_http_path: "okd_ignition_files",
installation_device: "/dev/sda", // This might need to be configurable per-host in the future
ignition_file_name: "master.ign", // Control plane nodes use the master ignition file
}
.to_string();
debug!("[ControlPlane] iPXE content template:\n{}", content);
// Create and apply an iPXE boot file for each node.
for node in nodes {
let mac_address = node.get_mac_address();
if mac_address.is_empty() {
return Err(InterpretError::new(format!(
"Physical host with ID '{}' has no MAC addresses defined.",
node.id
)));
}
info!(
"[ControlPlane] Applying iPXE config for node ID '{}' with MACs: {:?}",
node.id, mac_address
);
IPxeMacBootFileScore {
mac_address,
content: content.clone(),
}
.interpret(inventory, topology)
.await?;
}
Ok(())
}
/// Prompts the user to reboot the target control plane nodes.
async fn reboot_targets(&self, nodes: &Vec<PhysicalHost>) -> Result<(), InterpretError> {
let node_ids: Vec<String> = nodes.iter().map(|n| n.id.to_string()).collect();
info!("[ControlPlane] Requesting reboot for control plane nodes: {:?}", node_ids);
let confirmation = inquire::Confirm::new(
&format!("Please reboot the {} control plane nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), node_ids.join(", ")),
)
.prompt()
.map_err(|e| InterpretError::new(format!("User prompt failed: {}", e)))?;
if !confirmation {
return Err(InterpretError::new("User aborted the operation.".to_string()));
}
Ok(())
}
/// Placeholder for automating network bonding configuration.
async fn persist_network_bond(&self) -> Result<(), InterpretError> {
// Generate MC or NNCP from inventory NIC data; apply via ignition or post-join.
info!("[ControlPlane] Ensuring persistent bonding via MachineConfig/NNCP");
inquire::Confirm::new(
"Network configuration for control plane nodes is not automated yet. Configure it manually if needed.",
)
.prompt()
.map_err(|e| InterpretError::new(format!("User prompt failed: {}", e)))?;
Ok(())
}
} }
#[async_trait] #[async_trait]
@ -184,12 +243,29 @@ impl Interpret<HAClusterTopology> for OKDSetup03ControlPlaneInterpret {
inventory: &Inventory, inventory: &Inventory,
topology: &HAClusterTopology, topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> { ) -> Result<Outcome, InterpretError> {
// 1. Ensure we have 3 physical hosts for the control plane.
let nodes = self.get_nodes(inventory, topology).await?; let nodes = self.get_nodes(inventory, topology).await?;
// TODO add relevant methods here
// 2. Create DHCP reservations for the control plane nodes.
self.configure_host_binding(inventory, topology, &nodes).await?;
// 3. Create iPXE files for each control plane node to boot from the master ignition.
self.configure_ipxe(inventory, topology, &nodes).await?;
// 4. Reboot the nodes to start the OS installation.
self.reboot_targets(&nodes).await?;
// 5. Placeholder for post-boot network configuration (e.g., bonding).
self.persist_network_bond().await?; self.persist_network_bond().await?;
// TODO: Implement a step to wait for the control plane nodes to join the cluster
// and for the cluster operators to become available. This would be similar to
// the `wait-for bootstrap-complete` command.
info!("[ControlPlane] Provisioning initiated. Monitor the cluster convergence manually.");
Ok(Outcome::new( Ok(Outcome::new(
InterpretStatus::SUCCESS, InterpretStatus::SUCCESS,
"Control plane provisioned".into(), "Control plane provisioning has been successfully initiated.".into(),
)) ))
} }
} }