feat: Control plane bootstraping logic implemented, next step is testing it!
Some checks failed
Run Check Script / check (pull_request) Failing after 18s
Some checks failed
Run Check Script / check (pull_request) Failing after 18s
This commit is contained in:
parent
8bcade27a1
commit
902185daa4
@ -2,31 +2,24 @@ use std::{fmt::Write, path::PathBuf};
|
|||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use derive_new::new;
|
use derive_new::new;
|
||||||
use harmony_secret::SecretManager;
|
|
||||||
use harmony_types::id::Id;
|
use harmony_types::id::Id;
|
||||||
use log::{debug, error, info, warn};
|
use log::{debug, info};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::Serialize;
|
||||||
use tokio::{fs::File, io::AsyncWriteExt, process::Command};
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
config::secret::{RedhatSecret, SshKeyPair},
|
|
||||||
data::{FileContent, FilePath, Version},
|
|
||||||
hardware::PhysicalHost,
|
hardware::PhysicalHost,
|
||||||
infra::inventory::InventoryRepositoryFactory,
|
infra::inventory::InventoryRepositoryFactory,
|
||||||
instrumentation::{HarmonyEvent, instrument},
|
|
||||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||||
inventory::{HostRole, Inventory},
|
inventory::{HostRole, Inventory},
|
||||||
modules::{
|
modules::{
|
||||||
dhcp::DhcpHostBindingScore,
|
dhcp::DhcpHostBindingScore,
|
||||||
http::{IPxeMacBootFileScore, StaticFilesHttpScore},
|
http::IPxeMacBootFileScore,
|
||||||
inventory::{DiscoverHostForRoleScore, LaunchDiscoverInventoryAgentScore},
|
inventory::DiscoverHostForRoleScore,
|
||||||
okd::{
|
okd::templates::BootstrapIpxeTpl,
|
||||||
bootstrap_load_balancer::OKDBootstrapLoadBalancerScore,
|
|
||||||
templates::{BootstrapIpxeTpl, InstallConfigYaml},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
score::Score,
|
score::Score,
|
||||||
topology::{HAClusterTopology, HostBinding},
|
topology::{HAClusterTopology, HostBinding},
|
||||||
|
data::Version,
|
||||||
};
|
};
|
||||||
// -------------------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------------------
|
||||||
// Step 03: Control Plane
|
// Step 03: Control Plane
|
||||||
@ -64,84 +57,24 @@ impl OKDSetup03ControlPlaneInterpret {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn configure_host_binding(
|
/// Ensures that three physical hosts are discovered and available for the ControlPlane role.
|
||||||
&self,
|
/// It will trigger discovery if not enough hosts are found.
|
||||||
inventory: &Inventory,
|
|
||||||
topology: &HAClusterTopology,
|
|
||||||
nodes: &Vec<PhysicalHost>,
|
|
||||||
) -> Result<(), InterpretError> {
|
|
||||||
let binding = HostBinding {
|
|
||||||
logical_host: topology.bootstrap_host.clone(),
|
|
||||||
physical_host: self.get_bootstrap_node().await?,
|
|
||||||
};
|
|
||||||
info!("Configuring host binding for bootstrap node {binding:?}");
|
|
||||||
|
|
||||||
DhcpHostBindingScore {
|
|
||||||
host_binding: vec![binding],
|
|
||||||
domain: Some(topology.domain_name.clone()),
|
|
||||||
}
|
|
||||||
.interpret(inventory, topology)
|
|
||||||
.await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn configure_ipxe(
|
|
||||||
&self,
|
|
||||||
inventory: &Inventory,
|
|
||||||
topology: &HAClusterTopology,
|
|
||||||
nodes: &Vec<PhysicalHost>,
|
|
||||||
) -> Result<(), InterpretError> {
|
|
||||||
info!("[ControlPlane] Rendering per-MAC PXE");
|
|
||||||
let content = BootstrapIpxeTpl {
|
|
||||||
http_ip: &topology.http_server.get_ip().to_string(),
|
|
||||||
scos_path: "scos", // TODO use some constant
|
|
||||||
ignition_http_path: "okd_ignition_files", // TODO use proper variable
|
|
||||||
installation_device: "/dev/sda",
|
|
||||||
ignition_file_name: "bootstrap.ign",
|
|
||||||
}
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
let bootstrap_node = self.get_nodes().await?;
|
|
||||||
let mac_address = bootstrap_node.get_mac_address();
|
|
||||||
|
|
||||||
info!("[Bootstrap] Rendering per-MAC PXE for bootstrap node");
|
|
||||||
debug!("bootstrap ipxe content : {content}");
|
|
||||||
debug!("bootstrap mac addresses : {mac_address:?}");
|
|
||||||
|
|
||||||
IPxeMacBootFileScore {
|
|
||||||
mac_address,
|
|
||||||
content,
|
|
||||||
}
|
|
||||||
.interpret(inventory, topology)
|
|
||||||
.await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn persist_network_bond(&self) -> Result<(), InterpretError> {
|
|
||||||
// Generate MC or NNCP from inventory NIC data; apply via ignition or post-join.
|
|
||||||
info!("[ControlPlane] Ensuring persistent bonding via MachineConfig/NNCP");
|
|
||||||
inquire::Confirm::new(
|
|
||||||
"Network configuration for control plane nodes is not automated yet, configure it manually now.",
|
|
||||||
)
|
|
||||||
.prompt()
|
|
||||||
.expect("Unexpected prompt error");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_nodes(
|
async fn get_nodes(
|
||||||
&self,
|
&self,
|
||||||
inventory: &Inventory,
|
inventory: &Inventory,
|
||||||
topology: &HAClusterTopology,
|
topology: &HAClusterTopology,
|
||||||
) -> Result<Vec<PhysicalHost>, InterpretError> {
|
) -> Result<Vec<PhysicalHost>, InterpretError> {
|
||||||
|
const REQUIRED_HOSTS: usize = 3;
|
||||||
let repo = InventoryRepositoryFactory::build().await?;
|
let repo = InventoryRepositoryFactory::build().await?;
|
||||||
let mut control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?;
|
let mut control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?;
|
||||||
|
|
||||||
while control_plane_hosts.len() < 3 {
|
while control_plane_hosts.len() < REQUIRED_HOSTS {
|
||||||
info!(
|
info!(
|
||||||
"Discovery of 3 control plane hosts in progress, current number {}",
|
"Discovery of {} control plane hosts in progress, current number {}",
|
||||||
|
REQUIRED_HOSTS,
|
||||||
control_plane_hosts.len()
|
control_plane_hosts.len()
|
||||||
);
|
);
|
||||||
|
// This score triggers the discovery agent for a specific role.
|
||||||
DiscoverHostForRoleScore {
|
DiscoverHostForRoleScore {
|
||||||
role: HostRole::ControlPlane,
|
role: HostRole::ControlPlane,
|
||||||
}
|
}
|
||||||
@ -150,15 +83,141 @@ impl OKDSetup03ControlPlaneInterpret {
|
|||||||
control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?;
|
control_plane_hosts = repo.get_host_for_role(HostRole::ControlPlane).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if control_plane_hosts.len() < 3 {
|
if control_plane_hosts.len() < REQUIRED_HOSTS {
|
||||||
Err(InterpretError::new(format!(
|
Err(InterpretError::new(format!(
|
||||||
"OKD Requires at least 3 hosts, got {}, cannot proceed",
|
"OKD Requires at least {} control plane hosts, but only found {}. Cannot proceed.",
|
||||||
|
REQUIRED_HOSTS,
|
||||||
control_plane_hosts.len()
|
control_plane_hosts.len()
|
||||||
)))
|
)))
|
||||||
} else {
|
} else {
|
||||||
Ok(control_plane_hosts)
|
// Take exactly the number of required hosts to ensure consistency.
|
||||||
|
Ok(control_plane_hosts.into_iter().take(REQUIRED_HOSTS).collect())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Configures DHCP host bindings for all control plane nodes.
|
||||||
|
async fn configure_host_binding(
|
||||||
|
&self,
|
||||||
|
inventory: &Inventory,
|
||||||
|
topology: &HAClusterTopology,
|
||||||
|
nodes: &Vec<PhysicalHost>,
|
||||||
|
) -> Result<(), InterpretError> {
|
||||||
|
info!("[ControlPlane] Configuring host bindings for control plane nodes.");
|
||||||
|
|
||||||
|
// Ensure the topology definition matches the number of physical nodes found.
|
||||||
|
if topology.control_plane_hosts.len() != nodes.len() {
|
||||||
|
return Err(InterpretError::new(format!(
|
||||||
|
"Mismatch between logical control plane hosts defined in topology ({}) and physical nodes found ({}).",
|
||||||
|
topology.control_plane_hosts.len(),
|
||||||
|
nodes.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a binding for each physical host to its corresponding logical host.
|
||||||
|
let bindings: Vec<HostBinding> = topology
|
||||||
|
.control_plane_hosts
|
||||||
|
.iter()
|
||||||
|
.zip(nodes.iter())
|
||||||
|
.map(|(logical_host, physical_host)| {
|
||||||
|
info!(
|
||||||
|
"Creating binding: Logical Host '{}' -> Physical Host ID '{}'",
|
||||||
|
logical_host.get_hostname(),
|
||||||
|
physical_host.id
|
||||||
|
);
|
||||||
|
HostBinding {
|
||||||
|
logical_host: logical_host.clone(),
|
||||||
|
physical_host: physical_host.clone(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
DhcpHostBindingScore {
|
||||||
|
host_binding: bindings,
|
||||||
|
domain: Some(topology.domain_name.clone()),
|
||||||
|
}
|
||||||
|
.interpret(inventory, topology)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Renders and deploys a per-MAC iPXE boot file for each control plane node.
|
||||||
|
async fn configure_ipxe(
|
||||||
|
&self,
|
||||||
|
inventory: &Inventory,
|
||||||
|
topology: &HAClusterTopology,
|
||||||
|
nodes: &Vec<PhysicalHost>,
|
||||||
|
) -> Result<(), InterpretError> {
|
||||||
|
info!("[ControlPlane] Rendering per-MAC iPXE configurations.");
|
||||||
|
|
||||||
|
// The iPXE script content is the same for all control plane nodes,
|
||||||
|
// pointing to the 'master.ign' ignition file.
|
||||||
|
let content = BootstrapIpxeTpl {
|
||||||
|
http_ip: &topology.http_server.get_ip().to_string(),
|
||||||
|
scos_path: "scos",
|
||||||
|
ignition_http_path: "okd_ignition_files",
|
||||||
|
installation_device: "/dev/sda", // This might need to be configurable per-host in the future
|
||||||
|
ignition_file_name: "master.ign", // Control plane nodes use the master ignition file
|
||||||
|
}
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
debug!("[ControlPlane] iPXE content template:\n{}", content);
|
||||||
|
|
||||||
|
// Create and apply an iPXE boot file for each node.
|
||||||
|
for node in nodes {
|
||||||
|
let mac_address = node.get_mac_address();
|
||||||
|
if mac_address.is_empty() {
|
||||||
|
return Err(InterpretError::new(format!(
|
||||||
|
"Physical host with ID '{}' has no MAC addresses defined.",
|
||||||
|
node.id
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
info!(
|
||||||
|
"[ControlPlane] Applying iPXE config for node ID '{}' with MACs: {:?}",
|
||||||
|
node.id, mac_address
|
||||||
|
);
|
||||||
|
|
||||||
|
IPxeMacBootFileScore {
|
||||||
|
mac_address,
|
||||||
|
content: content.clone(),
|
||||||
|
}
|
||||||
|
.interpret(inventory, topology)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prompts the user to reboot the target control plane nodes.
|
||||||
|
async fn reboot_targets(&self, nodes: &Vec<PhysicalHost>) -> Result<(), InterpretError> {
|
||||||
|
let node_ids: Vec<String> = nodes.iter().map(|n| n.id.to_string()).collect();
|
||||||
|
info!("[ControlPlane] Requesting reboot for control plane nodes: {:?}", node_ids);
|
||||||
|
|
||||||
|
let confirmation = inquire::Confirm::new(
|
||||||
|
&format!("Please reboot the {} control plane nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), node_ids.join(", ")),
|
||||||
|
)
|
||||||
|
.prompt()
|
||||||
|
.map_err(|e| InterpretError::new(format!("User prompt failed: {}", e)))?;
|
||||||
|
|
||||||
|
if !confirmation {
|
||||||
|
return Err(InterpretError::new("User aborted the operation.".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Placeholder for automating network bonding configuration.
|
||||||
|
async fn persist_network_bond(&self) -> Result<(), InterpretError> {
|
||||||
|
// Generate MC or NNCP from inventory NIC data; apply via ignition or post-join.
|
||||||
|
info!("[ControlPlane] Ensuring persistent bonding via MachineConfig/NNCP");
|
||||||
|
inquire::Confirm::new(
|
||||||
|
"Network configuration for control plane nodes is not automated yet. Configure it manually if needed.",
|
||||||
|
)
|
||||||
|
.prompt()
|
||||||
|
.map_err(|e| InterpretError::new(format!("User prompt failed: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
@ -184,12 +243,29 @@ impl Interpret<HAClusterTopology> for OKDSetup03ControlPlaneInterpret {
|
|||||||
inventory: &Inventory,
|
inventory: &Inventory,
|
||||||
topology: &HAClusterTopology,
|
topology: &HAClusterTopology,
|
||||||
) -> Result<Outcome, InterpretError> {
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
// 1. Ensure we have 3 physical hosts for the control plane.
|
||||||
let nodes = self.get_nodes(inventory, topology).await?;
|
let nodes = self.get_nodes(inventory, topology).await?;
|
||||||
// TODO add relevant methods here
|
|
||||||
|
// 2. Create DHCP reservations for the control plane nodes.
|
||||||
|
self.configure_host_binding(inventory, topology, &nodes).await?;
|
||||||
|
|
||||||
|
// 3. Create iPXE files for each control plane node to boot from the master ignition.
|
||||||
|
self.configure_ipxe(inventory, topology, &nodes).await?;
|
||||||
|
|
||||||
|
// 4. Reboot the nodes to start the OS installation.
|
||||||
|
self.reboot_targets(&nodes).await?;
|
||||||
|
|
||||||
|
// 5. Placeholder for post-boot network configuration (e.g., bonding).
|
||||||
self.persist_network_bond().await?;
|
self.persist_network_bond().await?;
|
||||||
|
|
||||||
|
// TODO: Implement a step to wait for the control plane nodes to join the cluster
|
||||||
|
// and for the cluster operators to become available. This would be similar to
|
||||||
|
// the `wait-for bootstrap-complete` command.
|
||||||
|
info!("[ControlPlane] Provisioning initiated. Monitor the cluster convergence manually.");
|
||||||
|
|
||||||
Ok(Outcome::new(
|
Ok(Outcome::new(
|
||||||
InterpretStatus::SUCCESS,
|
InterpretStatus::SUCCESS,
|
||||||
"Control plane provisioned".into(),
|
"Control plane provisioning has been successfully initiated.".into(),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user