Merge remote-tracking branch 'origin/master' into feat/postgresqlScore
Some checks failed
Run Check Script / check (pull_request) Failing after 1s

This commit is contained in:
2026-01-06 13:44:50 -05:00
72 changed files with 1899 additions and 481 deletions

View File

@@ -152,10 +152,10 @@ impl PhysicalHost {
pub fn parts_list(&self) -> String {
let PhysicalHost {
id,
category,
category: _,
network,
storage,
labels,
labels: _,
memory_modules,
cpus,
} = self;
@@ -226,8 +226,8 @@ impl PhysicalHost {
speed_mhz,
manufacturer,
part_number,
serial_number,
rank,
serial_number: _,
rank: _,
} = mem;
parts_list.push_str(&format!(
"\n{}Gb, {}Mhz, Manufacturer ({}), Part Number ({})",

View File

@@ -4,6 +4,8 @@ use std::error::Error;
use async_trait::async_trait;
use derive_new::new;
use crate::inventory::HostRole;
use super::{
data::Version, executors::ExecutorError, inventory::Inventory, topology::PreparationError,
};

View File

@@ -1,4 +1,6 @@
mod repository;
use std::fmt;
pub use repository::*;
#[derive(Debug, new, Clone)]
@@ -69,5 +71,14 @@ pub enum HostRole {
Bootstrap,
ControlPlane,
Worker,
Storage,
}
impl fmt::Display for HostRole {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
HostRole::Bootstrap => write!(f, "Bootstrap"),
HostRole::ControlPlane => write!(f, "ControlPlane"),
HostRole::Worker => write!(f, "Worker"),
}
}
}

View File

@@ -1,4 +1,5 @@
use async_trait::async_trait;
use brocade::PortOperatingMode;
use harmony_macros::ip;
use harmony_types::{
id::Id,
@@ -8,9 +9,9 @@ use harmony_types::{
use log::debug;
use log::info;
use crate::infra::network_manager::OpenShiftNmStateNetworkManager;
use crate::topology::PxeOptions;
use crate::{data::FileContent, executors::ExecutorError};
use crate::{infra::network_manager::OpenShiftNmStateNetworkManager, topology::PortConfig};
use crate::{modules::inventory::HarmonyDiscoveryStrategy, topology::PxeOptions};
use super::{
DHCPStaticEntry, DhcpServer, DnsRecord, DnsRecordType, DnsServer, Firewall, HostNetworkConfig,
@@ -298,6 +299,13 @@ impl Switch for HAClusterTopology {
Ok(())
}
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
todo!()
}
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
todo!()
}
}
#[async_trait]
@@ -521,4 +529,10 @@ impl SwitchClient for DummyInfra {
) -> Result<u8, SwitchError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
todo!()
}
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
todo!()
}
}

View File

@@ -7,6 +7,7 @@ use std::{
};
use async_trait::async_trait;
use brocade::PortOperatingMode;
use derive_new::new;
use harmony_types::{
id::Id,
@@ -214,6 +215,8 @@ impl From<String> for NetworkError {
}
}
pub type PortConfig = (PortLocation, PortOperatingMode);
#[async_trait]
pub trait Switch: Send + Sync {
async fn setup_switch(&self) -> Result<(), SwitchError>;
@@ -224,6 +227,8 @@ pub trait Switch: Send + Sync {
) -> Result<Option<PortLocation>, SwitchError>;
async fn configure_port_channel(&self, config: &HostNetworkConfig) -> Result<(), SwitchError>;
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError>;
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError>;
}
#[derive(Clone, Debug, PartialEq)]
@@ -283,6 +288,9 @@ pub trait SwitchClient: Debug + Send + Sync {
channel_name: &str,
switch_ports: Vec<PortLocation>,
) -> Result<u8, SwitchError>;
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError>;
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError>;
}
#[cfg(test)]

View File

@@ -14,7 +14,7 @@ use k8s_openapi::{
},
apimachinery::pkg::util::intstr::IntOrString,
};
use kube::Resource;
use kube::{Resource, api::DynamicObject};
use log::debug;
use serde::de::DeserializeOwned;
use serde_json::json;

View File

@@ -1,12 +1,13 @@
use async_trait::async_trait;
use brocade::{BrocadeClient, BrocadeOptions, InterSwitchLink, InterfaceStatus, PortOperatingMode};
use harmony_types::{
id::Id,
net::{IpAddress, MacAddress},
switch::{PortDeclaration, PortLocation},
};
use option_ext::OptionExt;
use crate::topology::{SwitchClient, SwitchError};
use crate::topology::{PortConfig, SwitchClient, SwitchError};
#[derive(Debug)]
pub struct BrocadeSwitchClient {
@@ -18,9 +19,9 @@ impl BrocadeSwitchClient {
ip_addresses: &[IpAddress],
username: &str,
password: &str,
options: Option<BrocadeOptions>,
options: BrocadeOptions,
) -> Result<Self, brocade::Error> {
let brocade = brocade::init(ip_addresses, 22, username, password, options).await?;
let brocade = brocade::init(ip_addresses, username, password, options).await?;
Ok(Self { brocade })
}
}
@@ -59,7 +60,7 @@ impl SwitchClient for BrocadeSwitchClient {
}
self.brocade
.configure_interfaces(interfaces)
.configure_interfaces(&interfaces)
.await
.map_err(|e| SwitchError::new(e.to_string()))?;
@@ -111,6 +112,27 @@ impl SwitchClient for BrocadeSwitchClient {
Ok(channel_id)
}
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
for i in ids {
self.brocade
.clear_port_channel(&i.to_string())
.await
.map_err(|e| SwitchError::new(e.to_string()))?;
}
Ok(())
}
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
// FIXME hardcoded TenGigabitEthernet = bad
let ports = ports
.iter()
.map(|p| (format!("TenGigabitEthernet {}", p.0), p.1.clone()))
.collect();
self.brocade
.configure_interfaces(&ports)
.await
.map_err(|e| SwitchError::new(e.to_string()))?;
Ok(())
}
}
#[cfg(test)]
@@ -121,7 +143,7 @@ mod tests {
use async_trait::async_trait;
use brocade::{
BrocadeClient, BrocadeInfo, Error, InterSwitchLink, InterfaceInfo, InterfaceStatus,
InterfaceType, MacAddressEntry, PortChannelId, PortOperatingMode,
InterfaceType, MacAddressEntry, PortChannelId, PortOperatingMode, SecurityLevel,
};
use harmony_types::switch::PortLocation;
@@ -145,6 +167,7 @@ mod tests {
client.setup().await.unwrap();
//TODO not sure about this
let configured_interfaces = brocade.configured_interfaces.lock().unwrap();
assert_that!(*configured_interfaces).contains_exactly(vec![
(first_interface.name.clone(), PortOperatingMode::Access),
@@ -255,10 +278,10 @@ mod tests {
async fn configure_interfaces(
&self,
interfaces: Vec<(String, PortOperatingMode)>,
interfaces: &Vec<(String, PortOperatingMode)>,
) -> Result<(), Error> {
let mut configured_interfaces = self.configured_interfaces.lock().unwrap();
*configured_interfaces = interfaces;
*configured_interfaces = interfaces.clone();
Ok(())
}
@@ -279,6 +302,10 @@ mod tests {
async fn clear_port_channel(&self, _channel_name: &str) -> Result<(), Error> {
todo!()
}
async fn enable_snmp(&self, user_name: &str, auth: &str, des: &str) -> Result<(), Error> {
todo!()
}
}
impl FakeBrocadeClient {

View File

@@ -121,7 +121,7 @@ mod test {
#[test]
fn deployment_to_dynamic_roundtrip() {
// Create a sample Deployment with nested structures
let mut deployment = Deployment {
let deployment = Deployment {
metadata: ObjectMeta {
name: Some("my-deployment".to_string()),
labels: Some({

View File

@@ -8,7 +8,6 @@ mod tftp;
use std::sync::Arc;
pub use management::*;
use opnsense_config_xml::Host;
use tokio::sync::RwLock;
use crate::{executors::ExecutorError, topology::LogicalHost};

View File

@@ -0,0 +1,117 @@
use std::net::{IpAddr, Ipv4Addr};
use async_trait::async_trait;
use brocade::BrocadeOptions;
use harmony_secret::{Secret, SecretManager};
use harmony_types::id::Id;
use serde::{Deserialize, Serialize};
use crate::{
data::Version,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::Topology,
};
#[derive(Debug, Clone, Serialize)]
pub struct BrocadeEnableSnmpScore {
pub switch_ips: Vec<IpAddr>,
pub dry_run: bool,
}
impl<T: Topology> Score<T> for BrocadeEnableSnmpScore {
fn name(&self) -> String {
"BrocadeEnableSnmpScore".to_string()
}
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(BrocadeEnableSnmpInterpret {
score: self.clone(),
})
}
}
#[derive(Debug, Clone, Serialize)]
pub struct BrocadeEnableSnmpInterpret {
score: BrocadeEnableSnmpScore,
}
#[derive(Secret, Clone, Debug, Serialize, Deserialize)]
struct BrocadeSwitchAuth {
username: String,
password: String,
}
#[derive(Secret, Clone, Debug, Serialize, Deserialize)]
struct BrocadeSnmpAuth {
username: String,
auth_password: String,
des_password: String,
}
#[async_trait]
impl<T: Topology> Interpret<T> for BrocadeEnableSnmpInterpret {
async fn execute(
&self,
_inventory: &Inventory,
_topology: &T,
) -> Result<Outcome, InterpretError> {
let switch_addresses = &self.score.switch_ips;
let snmp_auth = SecretManager::get_or_prompt::<BrocadeSnmpAuth>()
.await
.unwrap();
let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
.await
.unwrap();
let brocade = brocade::init(
&switch_addresses,
22,
&config.username,
&config.password,
Some(BrocadeOptions {
dry_run: self.score.dry_run,
..Default::default()
}),
)
.await
.expect("Brocade client failed to connect");
brocade
.enable_snmp(
&snmp_auth.username,
&snmp_auth.auth_password,
&snmp_auth.des_password,
)
.await
.map_err(|e| InterpretError::new(e.to_string()))?;
Ok(Outcome::success(format!(
"Activated snmp server for Brocade at {}",
switch_addresses
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
.join(", ")
)))
}
fn get_name(&self) -> InterpretName {
InterpretName::Custom("BrocadeEnableSnmpInterpret")
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -19,8 +19,11 @@ pub struct DhcpScore {
pub host_binding: Vec<HostBinding>,
pub next_server: Option<IpAddress>,
pub boot_filename: Option<String>,
/// Boot filename to be provided to PXE clients identifying as BIOS
pub filename: Option<String>,
/// Boot filename to be provided to PXE clients identifying as uefi but NOT iPXE
pub filename64: Option<String>,
/// Boot filename to be provided to PXE clients identifying as iPXE
pub filenameipxe: Option<String>,
pub dhcp_range: (IpAddress, IpAddress),
pub domain: Option<String>,

View File

@@ -5,11 +5,10 @@ use serde::{Deserialize, Serialize};
use crate::{
data::Version,
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::inventory::LaunchDiscoverInventoryAgentScore,
modules::inventory::{HarmonyDiscoveryStrategy, LaunchDiscoverInventoryAgentScore},
score::Score,
topology::Topology,
};
@@ -17,11 +16,13 @@ use crate::{
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiscoverHostForRoleScore {
pub role: HostRole,
pub number_desired_hosts: i16,
pub discovery_strategy: HarmonyDiscoveryStrategy,
}
impl<T: Topology> Score<T> for DiscoverHostForRoleScore {
fn name(&self) -> String {
"DiscoverInventoryAgentScore".to_string()
format!("DiscoverHostForRoleScore({:?})", self.role)
}
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
@@ -48,13 +49,15 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
);
LaunchDiscoverInventoryAgentScore {
discovery_timeout: None,
discovery_strategy: self.score.discovery_strategy.clone(),
}
.interpret(inventory, topology)
.await?;
let host: PhysicalHost;
let mut chosen_hosts = vec![];
let host_repo = InventoryRepositoryFactory::build().await?;
let mut assigned_hosts = 0;
loop {
let all_hosts = host_repo.get_all_hosts().await?;
@@ -75,15 +78,24 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
match ans {
Ok(choice) => {
info!(
"Selected {} as the {:?} node.",
choice.summary(),
self.score.role
"Assigned role {:?} for node {}",
self.score.role,
choice.summary()
);
host_repo
.save_role_mapping(&self.score.role, &choice)
.await?;
host = choice;
break;
chosen_hosts.push(choice);
assigned_hosts += 1;
info!(
"Found {assigned_hosts} hosts for role {:?}",
self.score.role
);
if assigned_hosts == self.score.number_desired_hosts {
break;
}
}
Err(inquire::InquireError::OperationCanceled) => {
info!("Refresh requested. Fetching list of discovered hosts again...");
@@ -100,8 +112,13 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
}
Ok(Outcome::success(format!(
"Successfully discovered host {} for role {:?}",
host.summary(),
"Successfully discovered {} hosts {} for role {:?}",
self.score.number_desired_hosts,
chosen_hosts
.iter()
.map(|h| h.summary())
.collect::<Vec<String>>()
.join(", "),
self.score.role
)))
}

View File

@@ -1,6 +1,10 @@
mod discovery;
pub mod inspect;
use std::net::Ipv4Addr;
use cidr::{Ipv4Cidr, Ipv4Inet};
pub use discovery::*;
use tokio::time::{Duration, timeout};
use async_trait::async_trait;
use harmony_inventory_agent::local_presence::DiscoveryEvent;
@@ -24,6 +28,7 @@ use harmony_types::id::Id;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LaunchDiscoverInventoryAgentScore {
pub discovery_timeout: Option<u64>,
pub discovery_strategy: HarmonyDiscoveryStrategy,
}
impl<T: Topology> Score<T> for LaunchDiscoverInventoryAgentScore {
@@ -43,6 +48,12 @@ struct DiscoverInventoryAgentInterpret {
score: LaunchDiscoverInventoryAgentScore,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum HarmonyDiscoveryStrategy {
MDNS,
SUBNET { cidr: cidr::Ipv4Cidr, port: u16 },
}
#[async_trait]
impl<T: Topology> Interpret<T> for DiscoverInventoryAgentInterpret {
async fn execute(
@@ -57,6 +68,37 @@ impl<T: Topology> Interpret<T> for DiscoverInventoryAgentInterpret {
),
};
match self.score.discovery_strategy {
HarmonyDiscoveryStrategy::MDNS => self.launch_mdns_discovery().await,
HarmonyDiscoveryStrategy::SUBNET { cidr, port } => {
self.launch_cidr_discovery(&cidr, port).await
}
};
Ok(Outcome::success(
"Discovery process completed successfully".to_string(),
))
}
fn get_name(&self) -> InterpretName {
InterpretName::DiscoverInventoryAgent
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}
impl DiscoverInventoryAgentInterpret {
async fn launch_mdns_discovery(&self) {
harmony_inventory_agent::local_presence::discover_agents(
self.score.discovery_timeout,
|event: DiscoveryEvent| -> Result<(), String> {
@@ -112,6 +154,8 @@ impl<T: Topology> Interpret<T> for DiscoverInventoryAgentInterpret {
cpus,
};
// FIXME only save the host when it is new or something changed in it.
// we currently are saving the host every time it is discovered.
let repo = InventoryRepositoryFactory::build()
.await
.map_err(|e| format!("Could not build repository : {e}"))
@@ -132,25 +176,111 @@ impl<T: Topology> Interpret<T> for DiscoverInventoryAgentInterpret {
Ok(())
},
)
.await;
Ok(Outcome::success(
"Discovery process completed successfully".to_string(),
))
.await
}
fn get_name(&self) -> InterpretName {
InterpretName::DiscoverInventoryAgent
}
// async fn launch_cidr_discovery(&self, cidr : &Ipv4Cidr, port: u16) {
// todo!("launnch cidr discovery for {cidr} : {port}
// - Iterate over all possible addresses in cidr
// - make calls in batches of 20 attempting to reach harmony inventory agent on <addr, port> using same as above harmony_inventory_agent::client::get_host_inventory(&address, port)
// - Log warn when response is 404, it means the port was used by something else unexpected
// - Log error when response is 5xx
// - Log debug when no response (timeout 15 seconds)
// - Log info when found and response is 2xx
// ");
// }
async fn launch_cidr_discovery(&self, cidr: &Ipv4Cidr, port: u16) {
let addrs: Vec<Ipv4Inet> = cidr.iter().collect();
let total = addrs.len();
info!(
"Starting CIDR discovery for {} hosts on {}/{} (port {})",
total,
cidr.network_length(),
cidr,
port
);
fn get_version(&self) -> Version {
todo!()
}
let batch_size: usize = 20;
let timeout_secs = 5;
let request_timeout = Duration::from_secs(timeout_secs);
fn get_status(&self) -> InterpretStatus {
todo!()
}
let mut current_batch = 0;
let num_batches = addrs.len() / batch_size;
fn get_children(&self) -> Vec<Id> {
todo!()
for batch in addrs.chunks(batch_size) {
current_batch += 1;
info!("Starting query batch {current_batch} of {num_batches}, timeout {timeout_secs}");
let mut tasks = Vec::with_capacity(batch.len());
for addr in batch {
let addr = addr.address().to_string();
let port = port;
let task = tokio::spawn(async move {
match timeout(
request_timeout,
harmony_inventory_agent::client::get_host_inventory(&addr, port),
)
.await
{
Ok(Ok(host)) => {
info!("Found and response is 2xx for {addr}:{port}");
// Reuse the same conversion to PhysicalHost as MDNS flow
let harmony_inventory_agent::hwinfo::PhysicalHost {
storage_drives,
storage_controller,
memory_modules,
cpus,
chipset,
network_interfaces,
management_interface,
host_uuid,
} = host;
let host = PhysicalHost {
id: Id::from(host_uuid),
category: HostCategory::Server,
network: network_interfaces,
storage: storage_drives,
labels: vec![Label {
name: "discovered-by".to_string(),
value: "harmony-inventory-agent".to_string(),
}],
memory_modules,
cpus,
};
// Save host to inventory
let repo = InventoryRepositoryFactory::build()
.await
.map_err(|e| format!("Could not build repository : {e}"))
.unwrap();
if let Err(e) = repo.save(&host).await {
log::debug!("Failed to save host {}: {e}", host.id);
} else {
info!("Saved host id {}, summary : {}", host.id, host.summary());
}
}
Ok(Err(e)) => {
log::info!("Error querying inventory agent on {addr}:{port} : {e}");
}
Err(_) => {
// Timeout for this host
log::debug!("No response (timeout) for {addr}:{port}");
}
}
});
tasks.push(task);
}
// Wait for this batch to complete
for t in tasks {
let _ = t.await;
}
}
info!("CIDR discovery completed");
}
}

View File

@@ -1,4 +1,5 @@
pub mod application;
pub mod brocade;
pub mod cert_manager;
pub mod dhcp;
pub mod dns;

View File

@@ -4,7 +4,7 @@ use crate::{
infra::inventory::InventoryRepositoryFactory,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::inventory::DiscoverHostForRoleScore,
modules::inventory::{DiscoverHostForRoleScore, HarmonyDiscoveryStrategy},
score::Score,
topology::HAClusterTopology,
};
@@ -104,6 +104,8 @@ When you can dig them, confirm to continue.
bootstrap_host = hosts.into_iter().next().to_owned();
DiscoverHostForRoleScore {
role: HostRole::Bootstrap,
number_desired_hosts: 1,
discovery_strategy: HarmonyDiscoveryStrategy::MDNS,
}
.interpret(inventory, topology)
.await?;

View File

@@ -1,20 +1,10 @@
use crate::{
data::Version,
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::{
dhcp::DhcpHostBindingScore, http::IPxeMacBootFileScore,
inventory::DiscoverHostForRoleScore, okd::templates::BootstrapIpxeTpl,
},
interpret::Interpret,
inventory::HostRole,
modules::{inventory::HarmonyDiscoveryStrategy, okd::bootstrap_okd_node::OKDNodeInterpret},
score::Score,
topology::{HAClusterTopology, HostBinding},
topology::HAClusterTopology,
};
use async_trait::async_trait;
use derive_new::new;
use harmony_types::id::Id;
use log::{debug, info};
use serde::Serialize;
// -------------------------------------------------------------------------------------------------
@@ -23,231 +13,23 @@ use serde::Serialize;
// - Persist bonding via MachineConfigs (or NNCP) once SCOS is active.
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDSetup03ControlPlaneScore {}
#[derive(Debug, Clone, Serialize)]
pub struct OKDSetup03ControlPlaneScore {
pub discovery_strategy: HarmonyDiscoveryStrategy,
}
impl Score<HAClusterTopology> for OKDSetup03ControlPlaneScore {
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDSetup03ControlPlaneInterpret::new())
// TODO: Implement a step to wait for the control plane nodes to join the cluster
// and for the cluster operators to become available. This would be similar to
// the `wait-for bootstrap-complete` command.
Box::new(OKDNodeInterpret::new(
HostRole::ControlPlane,
self.discovery_strategy.clone(),
))
}
fn name(&self) -> String {
"OKDSetup03ControlPlaneScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct OKDSetup03ControlPlaneInterpret {
version: Version,
status: InterpretStatus,
}
impl OKDSetup03ControlPlaneInterpret {
pub fn new() -> Self {
let version = Version::from("1.0.0").unwrap();
Self {
version,
status: InterpretStatus::QUEUED,
}
}
/// Ensures that three physical hosts are discovered and available for the ControlPlane role.
/// It will trigger discovery if not enough hosts are found.
async fn get_nodes(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<Vec<PhysicalHost>, InterpretError> {
const REQUIRED_HOSTS: usize = 3;
let repo = InventoryRepositoryFactory::build().await?;
let mut control_plane_hosts = repo.get_host_for_role(&HostRole::ControlPlane).await?;
while control_plane_hosts.len() < REQUIRED_HOSTS {
info!(
"Discovery of {} control plane hosts in progress, current number {}",
REQUIRED_HOSTS,
control_plane_hosts.len()
);
// This score triggers the discovery agent for a specific role.
DiscoverHostForRoleScore {
role: HostRole::ControlPlane,
}
.interpret(inventory, topology)
.await?;
control_plane_hosts = repo.get_host_for_role(&HostRole::ControlPlane).await?;
}
if control_plane_hosts.len() < REQUIRED_HOSTS {
Err(InterpretError::new(format!(
"OKD Requires at least {} control plane hosts, but only found {}. Cannot proceed.",
REQUIRED_HOSTS,
control_plane_hosts.len()
)))
} else {
// Take exactly the number of required hosts to ensure consistency.
Ok(control_plane_hosts
.into_iter()
.take(REQUIRED_HOSTS)
.collect())
}
}
/// Configures DHCP host bindings for all control plane nodes.
async fn configure_host_binding(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!("[ControlPlane] Configuring host bindings for control plane nodes.");
// Ensure the topology definition matches the number of physical nodes found.
if topology.control_plane.len() != nodes.len() {
return Err(InterpretError::new(format!(
"Mismatch between logical control plane hosts defined in topology ({}) and physical nodes found ({}).",
topology.control_plane.len(),
nodes.len()
)));
}
// Create a binding for each physical host to its corresponding logical host.
let bindings: Vec<HostBinding> = topology
.control_plane
.iter()
.zip(nodes.iter())
.map(|(logical_host, physical_host)| {
info!(
"Creating binding: Logical Host '{}' -> Physical Host ID '{}'",
logical_host.name, physical_host.id
);
HostBinding {
logical_host: logical_host.clone(),
physical_host: physical_host.clone(),
}
})
.collect();
DhcpHostBindingScore {
host_binding: bindings,
domain: Some(topology.domain_name.clone()),
}
.interpret(inventory, topology)
.await?;
Ok(())
}
/// Renders and deploys a per-MAC iPXE boot file for each control plane node.
async fn configure_ipxe(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!("[ControlPlane] Rendering per-MAC iPXE configurations.");
// The iPXE script content is the same for all control plane nodes,
// pointing to the 'master.ign' ignition file.
let content = BootstrapIpxeTpl {
http_ip: &topology.http_server.get_ip().to_string(),
scos_path: "scos",
ignition_http_path: "okd_ignition_files",
installation_device: "/dev/sda", // This might need to be configurable per-host in the future
ignition_file_name: "master.ign", // Control plane nodes use the master ignition file
}
.to_string();
debug!("[ControlPlane] iPXE content template:\n{content}");
// Create and apply an iPXE boot file for each node.
for node in nodes {
let mac_address = node.get_mac_address();
if mac_address.is_empty() {
return Err(InterpretError::new(format!(
"Physical host with ID '{}' has no MAC addresses defined.",
node.id
)));
}
info!(
"[ControlPlane] Applying iPXE config for node ID '{}' with MACs: {:?}",
node.id, mac_address
);
IPxeMacBootFileScore {
mac_address,
content: content.clone(),
}
.interpret(inventory, topology)
.await?;
}
Ok(())
}
/// Prompts the user to reboot the target control plane nodes.
async fn reboot_targets(&self, nodes: &Vec<PhysicalHost>) -> Result<(), InterpretError> {
let node_ids: Vec<String> = nodes.iter().map(|n| n.id.to_string()).collect();
info!("[ControlPlane] Requesting reboot for control plane nodes: {node_ids:?}",);
let confirmation = inquire::Confirm::new(
&format!("Please reboot the {} control plane nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), node_ids.join(", ")),
)
.prompt()
.map_err(|e| InterpretError::new(format!("User prompt failed: {e}")))?;
if !confirmation {
return Err(InterpretError::new(
"User aborted the operation.".to_string(),
));
}
Ok(())
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDSetup03ControlPlaneInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDSetup03ControlPlane")
}
fn get_version(&self) -> Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
async fn execute(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
// 1. Ensure we have 3 physical hosts for the control plane.
let nodes = self.get_nodes(inventory, topology).await?;
// 2. Create DHCP reservations for the control plane nodes.
self.configure_host_binding(inventory, topology, &nodes)
.await?;
// 3. Create iPXE files for each control plane node to boot from the master ignition.
self.configure_ipxe(inventory, topology, &nodes).await?;
// 4. Reboot the nodes to start the OS installation.
self.reboot_targets(&nodes).await?;
// TODO: Implement a step to wait for the control plane nodes to join the cluster
// and for the cluster operators to become available. This would be similar to
// the `wait-for bootstrap-complete` command.
info!("[ControlPlane] Provisioning initiated. Monitor the cluster convergence manually.");
Ok(Outcome::success(
"Control plane provisioning has been successfully initiated.".into(),
))
}
}

View File

@@ -1,13 +1,9 @@
use async_trait::async_trait;
use derive_new::new;
use harmony_types::id::Id;
use log::info;
use serde::Serialize;
use crate::{
data::Version,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
interpret::Interpret,
inventory::HostRole,
modules::{inventory::HarmonyDiscoveryStrategy, okd::bootstrap_okd_node::OKDNodeInterpret},
score::Score,
topology::HAClusterTopology,
};
@@ -18,66 +14,20 @@ use crate::{
// - Persist bonding via MC/NNCP as required (same approach as masters).
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDSetup04WorkersScore {}
#[derive(Debug, Clone, Serialize)]
pub struct OKDSetup04WorkersScore {
pub discovery_strategy: HarmonyDiscoveryStrategy,
}
impl Score<HAClusterTopology> for OKDSetup04WorkersScore {
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDSetup04WorkersInterpret::new(self.clone()))
Box::new(OKDNodeInterpret::new(
HostRole::ControlPlane,
self.discovery_strategy.clone(),
))
}
fn name(&self) -> String {
"OKDSetup04WorkersScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct OKDSetup04WorkersInterpret {
score: OKDSetup04WorkersScore,
version: Version,
status: InterpretStatus,
}
impl OKDSetup04WorkersInterpret {
pub fn new(score: OKDSetup04WorkersScore) -> Self {
let version = Version::from("1.0.0").unwrap();
Self {
version,
score,
status: InterpretStatus::QUEUED,
}
}
async fn render_and_reboot(&self) -> Result<(), InterpretError> {
info!("[Workers] Rendering per-MAC PXE for workers and rebooting");
Ok(())
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDSetup04WorkersInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDSetup04Workers")
}
fn get_version(&self) -> Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
async fn execute(
&self,
_inventory: &Inventory,
_topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
self.render_and_reboot().await?;
Ok(Outcome::success("Workers provisioned".into()))
}
}

View File

@@ -0,0 +1,313 @@
use async_trait::async_trait;
use derive_new::new;
use harmony_types::id::Id;
use log::{debug, info};
use serde::Serialize;
use crate::{
data::Version,
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::{
dhcp::DhcpHostBindingScore,
http::IPxeMacBootFileScore,
inventory::{DiscoverHostForRoleScore, HarmonyDiscoveryStrategy},
okd::{
okd_node::{BootstrapRole, ControlPlaneRole, OKDRoleProperties, WorkerRole},
templates::BootstrapIpxeTpl,
},
},
score::Score,
topology::{HAClusterTopology, HostBinding, LogicalHost},
};
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDNodeInstallationScore {
host_role: HostRole,
discovery_strategy: HarmonyDiscoveryStrategy,
}
impl Score<HAClusterTopology> for OKDNodeInstallationScore {
fn name(&self) -> String {
"OKDNodeScore".to_string()
}
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDNodeInterpret::new(
self.host_role.clone(),
self.discovery_strategy.clone(),
))
}
}
#[derive(Debug, Clone)]
pub struct OKDNodeInterpret {
host_role: HostRole,
discovery_strategy: HarmonyDiscoveryStrategy,
}
impl OKDNodeInterpret {
pub fn new(host_role: HostRole, discovery_strategy: HarmonyDiscoveryStrategy) -> Self {
Self {
host_role,
discovery_strategy,
}
}
fn okd_role_properties(&self, role: &HostRole) -> &'static dyn OKDRoleProperties {
match role {
HostRole::Bootstrap => &BootstrapRole,
HostRole::ControlPlane => &ControlPlaneRole,
HostRole::Worker => &WorkerRole,
}
}
async fn get_nodes(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<Vec<PhysicalHost>, InterpretError> {
let repo = InventoryRepositoryFactory::build().await?;
let mut hosts = repo.get_host_for_role(&self.host_role).await?;
let okd_host_properties = self.okd_role_properties(&self.host_role);
let required_hosts: i16 = okd_host_properties.required_hosts();
info!(
"Discovery of {} {} hosts in progress, current number {}",
required_hosts,
self.host_role,
hosts.len()
);
// This score triggers the discovery agent for a specific role.
DiscoverHostForRoleScore {
role: self.host_role.clone(),
number_desired_hosts: required_hosts,
discovery_strategy: self.discovery_strategy.clone(),
}
.interpret(inventory, topology)
.await?;
hosts = repo.get_host_for_role(&self.host_role).await?;
if hosts.len() < required_hosts.try_into().unwrap_or(0) {
Err(InterpretError::new(format!(
"OKD Requires at least {} {} hosts, but only found {}. Cannot proceed.",
required_hosts,
self.host_role,
hosts.len()
)))
} else {
// Take exactly the number of required hosts to ensure consistency.
Ok(hosts
.into_iter()
.take(required_hosts.try_into().unwrap())
.collect())
}
}
/// Configures DHCP host bindings for all nodes.
async fn configure_host_binding(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!(
"[{}] Configuring host bindings for {} plane nodes.",
self.host_role, self.host_role,
);
let host_properties = self.okd_role_properties(&self.host_role);
self.validate_host_node_match(nodes, host_properties.logical_hosts(topology))?;
let bindings: Vec<HostBinding> =
self.host_bindings(nodes, host_properties.logical_hosts(topology));
DhcpHostBindingScore {
host_binding: bindings,
domain: Some(topology.domain_name.clone()),
}
.interpret(inventory, topology)
.await?;
Ok(())
}
// Ensure the topology definition matches the number of physical nodes found.
fn validate_host_node_match(
&self,
nodes: &Vec<PhysicalHost>,
hosts: &Vec<LogicalHost>,
) -> Result<(), InterpretError> {
if hosts.len() != nodes.len() {
return Err(InterpretError::new(format!(
"Mismatch between logical hosts defined in topology ({}) and physical nodes found ({}).",
hosts.len(),
nodes.len()
)));
}
Ok(())
}
// Create a binding for each physical host to its corresponding logical host.
fn host_bindings(
&self,
nodes: &Vec<PhysicalHost>,
hosts: &Vec<LogicalHost>,
) -> Vec<HostBinding> {
hosts
.iter()
.zip(nodes.iter())
.map(|(logical_host, physical_host)| {
info!(
"Creating binding: Logical Host '{}' -> Physical Host ID '{}'",
logical_host.name, physical_host.id
);
HostBinding {
logical_host: logical_host.clone(),
physical_host: physical_host.clone(),
}
})
.collect()
}
/// Renders and deploys a per-MAC iPXE boot file for each node.
async fn configure_ipxe(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!(
"[{}] Rendering per-MAC iPXE configurations.",
self.host_role
);
let okd_role_properties = self.okd_role_properties(&self.host_role);
// The iPXE script content is the same for all control plane nodes,
// pointing to the 'master.ign' ignition file.
let content = BootstrapIpxeTpl {
http_ip: &topology.http_server.get_ip().to_string(),
scos_path: "scos",
ignition_http_path: "okd_ignition_files",
//TODO must be refactored to not only use /dev/sda
installation_device: "/dev/sda", // This might need to be configurable per-host in the future
ignition_file_name: okd_role_properties.ignition_file(),
}
.to_string();
debug!("[{}] iPXE content template:\n{content}", self.host_role);
// Create and apply an iPXE boot file for each node.
for node in nodes {
let mac_address = node.get_mac_address();
if mac_address.is_empty() {
return Err(InterpretError::new(format!(
"Physical host with ID '{}' has no MAC addresses defined.",
node.id
)));
}
info!(
"[{}] Applying iPXE config for node ID '{}' with MACs: {:?}",
self.host_role, node.id, mac_address
);
IPxeMacBootFileScore {
mac_address,
content: content.clone(),
}
.interpret(inventory, topology)
.await?;
}
Ok(())
}
/// Prompts the user to reboot the target control plane nodes.
async fn reboot_targets(&self, nodes: &Vec<PhysicalHost>) -> Result<(), InterpretError> {
let node_ids: Vec<String> = nodes.iter().map(|n| n.id.to_string()).collect();
info!(
"[{}] Requesting reboot for control plane nodes: {node_ids:?}",
self.host_role
);
let confirmation = inquire::Confirm::new(
&format!("Please reboot the {} {} nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), self.host_role, node_ids.join(", ")),
)
.prompt()
.map_err(|e| InterpretError::new(format!("User prompt failed: {e}")))?;
if !confirmation {
return Err(InterpretError::new(
"User aborted the operation.".to_string(),
));
}
Ok(())
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDNodeInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
// 1. Ensure we have the specfied number of physical hosts.
let nodes = self.get_nodes(inventory, topology).await?;
// 2. Create DHCP reservations for the nodes.
self.configure_host_binding(inventory, topology, &nodes)
.await?;
// 3. Create iPXE files for each node to boot from the ignition.
self.configure_ipxe(inventory, topology, &nodes).await?;
// 4. Reboot the nodes to start the OS installation.
self.reboot_targets(&nodes).await?;
// TODO: Implement a step to validate that the installation of the nodes is
// complete and for the cluster operators to become available.
//
// The OpenShift installer only provides two wait commands which currently need to be
// run manually:
// - `openshift-install wait-for bootstrap-complete`
// - `openshift-install wait-for install-complete`
//
// There is no installer command that waits specifically for worker node
// provisioning. Worker nodes join asynchronously (via ignition + CSR approval),
// and the cluster becomes fully functional only once all nodes are Ready and the
// cluster operators report Available=True.
info!(
"[{}] Provisioning initiated. Monitor the cluster convergence manually.",
self.host_role
);
Ok(Outcome::success(format!(
"{} provisioning has been successfully initiated.",
self.host_role
)))
}
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDNodeSetup".into())
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -251,14 +251,15 @@ impl<T: Topology + NetworkManager + Switch> Interpret<T> for HostNetworkConfigur
#[cfg(test)]
mod tests {
use assertor::*;
use brocade::PortOperatingMode;
use harmony_types::{net::MacAddress, switch::PortLocation};
use lazy_static::lazy_static;
use crate::{
hardware::HostCategory,
topology::{
HostNetworkConfig, NetworkError, PreparationError, PreparationOutcome, SwitchError,
SwitchPort,
HostNetworkConfig, NetworkError, PortConfig, PreparationError, PreparationOutcome,
SwitchError, SwitchPort,
},
};
use std::{
@@ -692,5 +693,14 @@ mod tests {
Ok(())
}
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
todo!()
}
async fn configure_interface(
&self,
port_config: &Vec<PortConfig>,
) -> Result<(), SwitchError> {
todo!()
}
}
}

View File

@@ -48,10 +48,13 @@
//! - internal_domain: Internal cluster domain (e.g., cluster.local or harmony.mcd).
use crate::{
modules::okd::{
OKDSetup01InventoryScore, OKDSetup02BootstrapScore, OKDSetup03ControlPlaneScore,
OKDSetup04WorkersScore, OKDSetup05SanityCheckScore, OKDSetupPersistNetworkBondScore,
bootstrap_06_installation_report::OKDSetup06InstallationReportScore,
modules::{
inventory::HarmonyDiscoveryStrategy,
okd::{
OKDSetup01InventoryScore, OKDSetup02BootstrapScore, OKDSetup03ControlPlaneScore,
OKDSetup04WorkersScore, OKDSetup05SanityCheckScore, OKDSetupPersistNetworkBondScore,
bootstrap_06_installation_report::OKDSetup06InstallationReportScore,
},
},
score::Score,
topology::HAClusterTopology,
@@ -60,13 +63,19 @@ use crate::{
pub struct OKDInstallationPipeline;
impl OKDInstallationPipeline {
pub async fn get_all_scores() -> Vec<Box<dyn Score<HAClusterTopology>>> {
pub async fn get_all_scores(
discovery_strategy: HarmonyDiscoveryStrategy,
) -> Vec<Box<dyn Score<HAClusterTopology>>> {
vec![
Box::new(OKDSetup01InventoryScore::new()),
Box::new(OKDSetup02BootstrapScore::new()),
Box::new(OKDSetup03ControlPlaneScore::new()),
Box::new(OKDSetup03ControlPlaneScore {
discovery_strategy: discovery_strategy.clone(),
}),
Box::new(OKDSetupPersistNetworkBondScore::new()),
Box::new(OKDSetup04WorkersScore::new()),
Box::new(OKDSetup04WorkersScore {
discovery_strategy: discovery_strategy.clone(),
}),
Box::new(OKDSetup05SanityCheckScore::new()),
Box::new(OKDSetup06InstallationReportScore::new()),
]

View File

@@ -6,6 +6,7 @@ mod bootstrap_05_sanity_check;
mod bootstrap_06_installation_report;
pub mod bootstrap_dhcp;
pub mod bootstrap_load_balancer;
pub mod bootstrap_okd_node;
mod bootstrap_persist_network_bond;
pub mod dhcp;
pub mod dns;
@@ -13,6 +14,7 @@ pub mod installation;
pub mod ipxe;
pub mod load_balancer;
pub mod route;
pub mod okd_node;
pub mod templates;
pub mod upgrade;
pub use bootstrap_01_prepare::*;

View File

@@ -0,0 +1,54 @@
use crate::topology::{HAClusterTopology, LogicalHost};
pub trait OKDRoleProperties {
fn ignition_file(&self) -> &'static str;
fn required_hosts(&self) -> i16;
fn logical_hosts<'a>(&self, t: &'a HAClusterTopology) -> &'a Vec<LogicalHost>;
}
pub struct BootstrapRole;
pub struct ControlPlaneRole;
pub struct WorkerRole;
pub struct StorageRole;
impl OKDRoleProperties for BootstrapRole {
fn ignition_file(&self) -> &'static str {
"bootstrap.ign"
}
fn required_hosts(&self) -> i16 {
1
}
fn logical_hosts<'a>(&self, t: &'a HAClusterTopology) -> &'a Vec<LogicalHost> {
todo!()
}
}
impl OKDRoleProperties for ControlPlaneRole {
fn ignition_file(&self) -> &'static str {
"master.ign"
}
fn required_hosts(&self) -> i16 {
3
}
fn logical_hosts<'a>(&self, t: &'a HAClusterTopology) -> &'a Vec<LogicalHost> {
&t.control_plane
}
}
impl OKDRoleProperties for WorkerRole {
fn ignition_file(&self) -> &'static str {
"worker.ign"
}
fn required_hosts(&self) -> i16 {
2
}
fn logical_hosts<'a>(&self, t: &'a HAClusterTopology) -> &'a Vec<LogicalHost> {
&t.workers
}
}