Some checks failed
Run Check Script / check (pull_request) Failing after 1m8s
1054 lines
35 KiB
Rust
1054 lines
35 KiB
Rust
//! OKDInstallationScore
|
||
//!
|
||
//! Overview
|
||
//! --------
|
||
//! OKDInstallationScore orchestrates an end-to-end, bare-metal OKD (OpenShift/OKD 4.19).
|
||
//! It follows principles of “discovery-first, then provision” strategy with strict ordering,
|
||
//! observable progress, and minimal assumptions about the underlying network.
|
||
//!
|
||
//! High-level flow
|
||
//! 1) OKDSetup01Inventory
|
||
//! - Serve default iPXE + Kickstart (in-RAM CentOS Stream 9) for discovery only.
|
||
//! - Enable SSH with the cluster’s pubkey, start a Rust inventory agent.
|
||
//! - Harmony discovers nodes by scraping the agent endpoint and collects MACs/NICs.
|
||
//!
|
||
//! 2) OKDSetup02Bootstrap
|
||
//! - User selects which discovered node becomes bootstrap.
|
||
//! - Prepare the OKD cluster installation files
|
||
//! - Render per-MAC iPXE for bootstrap with OKD 4.19 SCOS live assets + ignition.
|
||
//! - Reboot node via SSH; install bootstrap; wait for bootstrap-complete.
|
||
//!
|
||
//! 3) OKDSetup03ControlPlane
|
||
//! - Render per-MAC iPXE for cp0/cp1/cp2 with ignition. Reboot via SSH, join masters.
|
||
//! - Configure network bond (where relevant) using OKD NMState MachineConfig
|
||
//!
|
||
//! 4) OKDSetup04Workers
|
||
//! - Render per-MAC iPXE for worker set; join workers.
|
||
//! - Configure network bond (where relevant) using OKD NMState MachineConfig
|
||
//!
|
||
//! 5) OKDSetup05SanityCheck
|
||
//! - Validate API/ingress/clusteroperators; ensure healthy control plane and SDN.
|
||
//!
|
||
//! 6) OKDSetup06InstallationReport
|
||
//! - Produce a concise, machine-readable report (JSON) and a human summary.
|
||
//!
|
||
//! Network notes
|
||
//! - During Inventory: ports must be simple access (no LACP). DHCP succeeds; iPXE
|
||
//! loads CentOS Stream live with Kickstart and starts the inventory endpoint.
|
||
//! - During Provisioning: only after SCOS is on disk and Ignition/MC can be applied
|
||
//! do we set the bond persistently. If early bonding is truly required on a host,
|
||
//! use kernel args selectively in the per-MAC PXE for that host, but never for the
|
||
//! generic discovery path.
|
||
//! - This is caused by the inherent race condition between PXE, which cannot perform
|
||
//! its DHCP recovery process on a bonded network, and the bond configuration itself,
|
||
//! which must be configured on host AND switch to connect properly.
|
||
//!
|
||
//! Configuration knobs
|
||
//! - public_domain: External wildcard/apps domain (e.g., apps.example.com).
|
||
//! - internal_domain: Internal cluster domain (e.g., cluster.local or harmony.mcd).
|
||
|
||
use std::{fmt::Write, path::PathBuf, process::ExitStatus};
|
||
|
||
use async_trait::async_trait;
|
||
use derive_new::new;
|
||
use harmony_secret::SecretManager;
|
||
use harmony_types::{id::Id, net::Url};
|
||
use log::{debug, error, info, warn};
|
||
use serde::{Deserialize, Serialize};
|
||
use tokio::{
|
||
fs::File,
|
||
io::{AsyncReadExt, AsyncWriteExt},
|
||
process::Command,
|
||
};
|
||
|
||
use crate::{
|
||
config::secret::{RedhatSecret, SshKeyPair},
|
||
data::{FileContent, FilePath, Version},
|
||
hardware::PhysicalHost,
|
||
infra::inventory::InventoryRepositoryFactory,
|
||
instrumentation::{HarmonyEvent, instrument},
|
||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||
inventory::{HostRole, Inventory},
|
||
modules::{
|
||
dhcp::DhcpHostBindingScore,
|
||
http::{IPxeMacBootFileScore, StaticFilesHttpScore},
|
||
inventory::LaunchDiscoverInventoryAgentScore,
|
||
okd::{
|
||
bootstrap_load_balancer::OKDBootstrapLoadBalancerScore,
|
||
dns::OKDDnsScore,
|
||
templates::{BootstrapIpxeTpl, InstallConfigYaml},
|
||
},
|
||
},
|
||
score::Score,
|
||
topology::{HAClusterTopology, HostBinding},
|
||
};
|
||
|
||
// -------------------------------------------------------------------------------------------------
|
||
// Public Orchestrator Score
|
||
// -------------------------------------------------------------------------------------------------
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, new)]
|
||
pub struct OKDInstallationScore {}
|
||
|
||
impl Score<HAClusterTopology> for OKDInstallationScore {
|
||
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
|
||
Box::new(OKDInstallationInterpret::new(self.clone()))
|
||
}
|
||
|
||
fn name(&self) -> String {
|
||
"OKDInstallationScore".to_string()
|
||
}
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------------------------
|
||
// Orchestrator Interpret
|
||
// -------------------------------------------------------------------------------------------------
|
||
|
||
#[derive(Debug, Clone)]
|
||
pub struct OKDInstallationInterpret {
|
||
score: OKDInstallationScore,
|
||
version: Version,
|
||
status: InterpretStatus,
|
||
}
|
||
|
||
impl OKDInstallationInterpret {
|
||
pub fn new(score: OKDInstallationScore) -> Self {
|
||
let version = Version::from("0.1.0").expect("valid version");
|
||
Self {
|
||
score,
|
||
version,
|
||
status: InterpretStatus::QUEUED,
|
||
}
|
||
}
|
||
|
||
async fn run_inventory_phase(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
OKDSetup01InventoryScore::new()
|
||
.interpret(inventory, topology)
|
||
.await?;
|
||
Ok(())
|
||
}
|
||
|
||
async fn run_bootstrap_phase(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
OKDSetup02BootstrapScore::new()
|
||
.interpret(inventory, topology)
|
||
.await?;
|
||
Ok(())
|
||
}
|
||
|
||
async fn run_control_plane_phase(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
let control_plane_score = OKDSetup03ControlPlaneScore::new();
|
||
control_plane_score.interpret(inventory, topology).await?;
|
||
Ok(())
|
||
}
|
||
|
||
async fn run_workers_phase(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
let workers_score = OKDSetup04WorkersScore::new();
|
||
workers_score.interpret(inventory, topology).await?;
|
||
Ok(())
|
||
}
|
||
|
||
async fn run_sanity_phase(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
let sanity_score = OKDSetup05SanityCheckScore::new();
|
||
sanity_score.interpret(inventory, topology).await?;
|
||
Ok(())
|
||
}
|
||
|
||
async fn run_report_phase(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
let report_score = OKDSetup06InstallationReportScore::new();
|
||
report_score.interpret(inventory, topology).await?;
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
#[async_trait]
|
||
impl Interpret<HAClusterTopology> for OKDInstallationInterpret {
|
||
fn get_name(&self) -> InterpretName {
|
||
InterpretName::Custom("OKDInstallationInterpret")
|
||
}
|
||
|
||
fn get_version(&self) -> Version {
|
||
self.version.clone()
|
||
}
|
||
|
||
fn get_status(&self) -> InterpretStatus {
|
||
self.status.clone()
|
||
}
|
||
|
||
fn get_children(&self) -> Vec<Id> {
|
||
vec![]
|
||
}
|
||
|
||
async fn execute(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<Outcome, InterpretError> {
|
||
instrument(HarmonyEvent::HarmonyStarted).ok();
|
||
|
||
info!("Starting OKD installation pipeline",);
|
||
|
||
self.run_inventory_phase(inventory, topology).await?;
|
||
|
||
self.run_bootstrap_phase(inventory, topology).await?;
|
||
|
||
self.run_control_plane_phase(inventory, topology).await?;
|
||
|
||
self.run_workers_phase(inventory, topology).await?;
|
||
|
||
self.run_sanity_phase(inventory, topology).await?;
|
||
|
||
self.run_report_phase(inventory, topology).await?;
|
||
|
||
instrument(HarmonyEvent::HarmonyFinished).ok();
|
||
|
||
Ok(Outcome::new(
|
||
InterpretStatus::SUCCESS,
|
||
"OKD installation pipeline completed".into(),
|
||
))
|
||
}
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------------------------
|
||
// Step 01: Inventory (default PXE + Kickstart in RAM + Rust agent)
|
||
// - This score exposes/ensures the default inventory assets and waits for discoveries.
|
||
// - No early bonding. Simple access DHCP.
|
||
// -------------------------------------------------------------------------------------------------
|
||
|
||
#[derive(Debug, Clone, Serialize, new)]
|
||
struct OKDSetup01InventoryScore {}
|
||
|
||
impl Score<HAClusterTopology> for OKDSetup01InventoryScore {
|
||
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
|
||
Box::new(OKDSetup01InventoryInterpret::new(self.clone()))
|
||
}
|
||
|
||
fn name(&self) -> String {
|
||
"OKDSetup01InventoryScore".to_string()
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
struct OKDSetup01InventoryInterpret {
|
||
score: OKDSetup01InventoryScore,
|
||
version: Version,
|
||
status: InterpretStatus,
|
||
}
|
||
|
||
impl OKDSetup01InventoryInterpret {
|
||
pub fn new(score: OKDSetup01InventoryScore) -> Self {
|
||
let version = Version::from("1.0.0").unwrap();
|
||
Self {
|
||
version,
|
||
score,
|
||
status: InterpretStatus::QUEUED,
|
||
}
|
||
}
|
||
}
|
||
|
||
#[async_trait]
|
||
impl Interpret<HAClusterTopology> for OKDSetup01InventoryInterpret {
|
||
fn get_name(&self) -> InterpretName {
|
||
InterpretName::Custom("OKDSetup01Inventory")
|
||
}
|
||
|
||
fn get_version(&self) -> Version {
|
||
self.version.clone()
|
||
}
|
||
|
||
fn get_status(&self) -> InterpretStatus {
|
||
self.status.clone()
|
||
}
|
||
|
||
fn get_children(&self) -> Vec<Id> {
|
||
vec![]
|
||
}
|
||
|
||
async fn execute(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<Outcome, InterpretError> {
|
||
info!("Setting up base DNS config for OKD");
|
||
let cluster_domain = &topology.domain_name;
|
||
let load_balancer_ip = &topology.load_balancer.get_ip();
|
||
inquire::Confirm::new(&format!(
|
||
"Set hostnames manually in your opnsense dnsmasq config :
|
||
*.apps.{cluster_domain} -> {load_balancer_ip}
|
||
api.{cluster_domain} -> {load_balancer_ip}
|
||
api-int.{cluster_domain} -> {load_balancer_ip}
|
||
|
||
When you can dig them, confirm to continue.
|
||
"
|
||
))
|
||
.prompt()
|
||
.expect("Prompt error");
|
||
// TODO reactivate automatic dns config when migration from unbound to dnsmasq is done
|
||
// OKDDnsScore::new(topology)
|
||
// .interpret(inventory, topology)
|
||
// .await?;
|
||
|
||
info!(
|
||
"Launching discovery agent, make sure that your nodes are successfully PXE booted and running inventory agent. They should answer on `http://<node_ip>:8080/inventory`"
|
||
);
|
||
LaunchDiscoverInventoryAgentScore {
|
||
discovery_timeout: None,
|
||
}
|
||
.interpret(inventory, topology)
|
||
.await?;
|
||
|
||
let bootstrap_host: PhysicalHost;
|
||
let host_repo = InventoryRepositoryFactory::build().await?;
|
||
|
||
loop {
|
||
let all_hosts = host_repo.get_all_hosts().await?;
|
||
|
||
if all_hosts.is_empty() {
|
||
warn!("No discovered hosts found yet. Waiting for hosts to appear...");
|
||
// Sleep to avoid spamming the user and logs while waiting for nodes.
|
||
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
|
||
continue;
|
||
}
|
||
|
||
let ans = inquire::Select::new(
|
||
"Select the node to be used as the bootstrap node:",
|
||
all_hosts,
|
||
)
|
||
.with_help_message("Press Esc to refresh the list of discovered hosts")
|
||
.prompt();
|
||
|
||
match ans {
|
||
Ok(choice) => {
|
||
info!("Selected {} as the bootstrap node.", choice.summary());
|
||
host_repo
|
||
.save_role_mapping(&HostRole::Bootstrap, &choice)
|
||
.await?;
|
||
bootstrap_host = choice;
|
||
break;
|
||
}
|
||
Err(inquire::InquireError::OperationCanceled) => {
|
||
info!("Refresh requested. Fetching list of discovered hosts again...");
|
||
continue;
|
||
}
|
||
Err(e) => {
|
||
error!("Failed to select bootstrap node: {}", e);
|
||
return Err(InterpretError::new(format!(
|
||
"Could not select host : {}",
|
||
e.to_string()
|
||
)));
|
||
}
|
||
}
|
||
}
|
||
|
||
Ok(Outcome::new(
|
||
InterpretStatus::SUCCESS,
|
||
format!(
|
||
"Found and assigned bootstrap node: {}",
|
||
bootstrap_host.summary()
|
||
),
|
||
))
|
||
}
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------------------------
|
||
// Step 02: Bootstrap
|
||
// - Select bootstrap node (from discovered set).
|
||
// - Render per-MAC iPXE pointing to OKD 4.19 SCOS live assets + bootstrap ignition.
|
||
// - Reboot the host via SSH and wait for bootstrap-complete.
|
||
// - No bonding at this stage unless absolutely required; prefer persistence via MC later.
|
||
// -------------------------------------------------------------------------------------------------
|
||
|
||
#[derive(Debug, Clone, Serialize, new)]
|
||
struct OKDSetup02BootstrapScore {}
|
||
|
||
impl Score<HAClusterTopology> for OKDSetup02BootstrapScore {
|
||
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
|
||
Box::new(OKDSetup02BootstrapInterpret::new())
|
||
}
|
||
|
||
fn name(&self) -> String {
|
||
"OKDSetup02BootstrapScore".to_string()
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
struct OKDSetup02BootstrapInterpret {
|
||
version: Version,
|
||
status: InterpretStatus,
|
||
}
|
||
|
||
impl OKDSetup02BootstrapInterpret {
|
||
pub fn new() -> Self {
|
||
let version = Version::from("1.0.0").unwrap();
|
||
Self {
|
||
version,
|
||
status: InterpretStatus::QUEUED,
|
||
}
|
||
}
|
||
|
||
async fn get_bootstrap_node(&self) -> Result<PhysicalHost, InterpretError> {
|
||
let repo = InventoryRepositoryFactory::build().await?;
|
||
match repo
|
||
.get_host_for_role(HostRole::Bootstrap)
|
||
.await?
|
||
.into_iter()
|
||
.next()
|
||
{
|
||
Some(host) => Ok(host),
|
||
None => Err(InterpretError::new(
|
||
"No bootstrap node available".to_string(),
|
||
)),
|
||
}
|
||
}
|
||
|
||
async fn prepare_ignition_files(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
let okd_bin_path = PathBuf::from("./data/okd/bin");
|
||
let okd_installation_path_str = "./data/okd/installation_files";
|
||
let okd_images_path = &PathBuf::from("./data/okd/installer_image/");
|
||
let okd_installation_path = &PathBuf::from(okd_installation_path_str);
|
||
|
||
let exit_status = Command::new("mkdir")
|
||
.arg("-p")
|
||
.arg(okd_installation_path)
|
||
.spawn()
|
||
.expect("Command failed to start")
|
||
.wait()
|
||
.await
|
||
.map_err(|e| {
|
||
InterpretError::new(format!("Failed to create okd installation directory : {e}"))
|
||
})?;
|
||
if !exit_status.success() {
|
||
return Err(InterpretError::new(format!(
|
||
"Failed to create okd installation directory"
|
||
)));
|
||
} else {
|
||
info!(
|
||
"Created OKD installation directory {}",
|
||
okd_installation_path.to_string_lossy()
|
||
);
|
||
}
|
||
|
||
let redhat_secret = SecretManager::get::<RedhatSecret>().await?;
|
||
let ssh_key = SecretManager::get::<SshKeyPair>().await?;
|
||
|
||
let install_config_yaml = InstallConfigYaml {
|
||
cluster_name: &topology.get_cluster_name(),
|
||
cluster_domain: &topology.get_cluster_base_domain(),
|
||
pull_secret: &redhat_secret.pull_secret,
|
||
ssh_public_key: &ssh_key.public,
|
||
}
|
||
.to_string();
|
||
|
||
let install_config_file_path = &okd_installation_path.join("install-config.yaml");
|
||
|
||
self.create_file(install_config_file_path, install_config_yaml.as_bytes())
|
||
.await?;
|
||
|
||
let install_config_backup_extension = install_config_file_path
|
||
.extension()
|
||
.map(|e| format!("{}.bak", e.to_string_lossy()))
|
||
.unwrap_or("bak".to_string());
|
||
|
||
let mut install_config_backup = install_config_file_path.clone();
|
||
install_config_backup.set_extension(install_config_backup_extension);
|
||
|
||
self.create_file(&install_config_backup, install_config_yaml.as_bytes())
|
||
.await?;
|
||
|
||
info!("Creating manifest files with openshift-install");
|
||
let output = Command::new(okd_bin_path.join("openshift-install"))
|
||
.args([
|
||
"create",
|
||
"manifests",
|
||
"--dir",
|
||
okd_installation_path.to_str().unwrap(),
|
||
])
|
||
.output()
|
||
.await
|
||
.map_err(|e| InterpretError::new(format!("Failed to create okd manifest : {e}")))?;
|
||
let stdout = String::from_utf8(output.stdout).unwrap();
|
||
info!("openshift-install stdout :\n\n{}", stdout);
|
||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||
info!("openshift-install stderr :\n\n{}", stderr);
|
||
info!("openshift-install exit status : {}", output.status);
|
||
if !output.status.success() {
|
||
return Err(InterpretError::new(format!(
|
||
"Failed to create okd manifest, exit code {} : {}",
|
||
output.status, stderr
|
||
)));
|
||
}
|
||
|
||
info!("Creating ignition files with openshift-install");
|
||
let output = Command::new(okd_bin_path.join("openshift-install"))
|
||
.args([
|
||
"create",
|
||
"ignition-configs",
|
||
"--dir",
|
||
okd_installation_path.to_str().unwrap(),
|
||
])
|
||
.output()
|
||
.await
|
||
.map_err(|e| {
|
||
InterpretError::new(format!("Failed to create okd ignition config : {e}"))
|
||
})?;
|
||
let stdout = String::from_utf8(output.stdout).unwrap();
|
||
info!("openshift-install stdout :\n\n{}", stdout);
|
||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||
info!("openshift-install stderr :\n\n{}", stderr);
|
||
info!("openshift-install exit status : {}", output.status);
|
||
if !output.status.success() {
|
||
return Err(InterpretError::new(format!(
|
||
"Failed to create okd manifest, exit code {} : {}",
|
||
output.status, stderr
|
||
)));
|
||
}
|
||
|
||
let ignition_files_http_path = PathBuf::from("okd_ignition_files");
|
||
let prepare_file_content = async |filename: &str| -> Result<FileContent, InterpretError> {
|
||
let local_path = okd_installation_path.join(filename);
|
||
let remote_path = ignition_files_http_path.join(filename);
|
||
|
||
info!(
|
||
"Preparing file content for local file : {} to remote : {}",
|
||
local_path.to_string_lossy(),
|
||
remote_path.to_string_lossy()
|
||
);
|
||
|
||
let content = tokio::fs::read_to_string(&local_path).await.map_err(|e| {
|
||
InterpretError::new(format!(
|
||
"Could not read file content {} : {e}",
|
||
local_path.to_string_lossy()
|
||
))
|
||
})?;
|
||
|
||
Ok(FileContent {
|
||
path: FilePath::Relative(remote_path.to_string_lossy().to_string()),
|
||
content,
|
||
})
|
||
};
|
||
|
||
StaticFilesHttpScore {
|
||
remote_path: None,
|
||
folder_to_serve: None,
|
||
files: vec![
|
||
prepare_file_content("bootstrap.ign").await?,
|
||
prepare_file_content("master.ign").await?,
|
||
prepare_file_content("worker.ign").await?,
|
||
prepare_file_content("metadata.json").await?,
|
||
],
|
||
}
|
||
.interpret(inventory, topology)
|
||
.await?;
|
||
|
||
let run_command =
|
||
async |cmd: &str, args: Vec<&str>| -> Result<std::process::Output, InterpretError> {
|
||
let output = Command::new(cmd).args(&args).output().await.map_err(|e| {
|
||
InterpretError::new(format!("Failed to launch command {cmd} : {e}"))
|
||
})?;
|
||
let stdout = String::from_utf8(output.stdout.clone()).unwrap();
|
||
info!("{cmd} stdout :\n\n{}", stdout);
|
||
let stderr = String::from_utf8(output.stderr.clone()).unwrap();
|
||
info!("{cmd} stderr :\n\n{}", stderr);
|
||
info!("{cmd} exit status : {}", output.status);
|
||
if !output.status.success() {
|
||
return Err(InterpretError::new(format!(
|
||
"Command execution failed, exit code {} : {} {}",
|
||
output.status,
|
||
cmd,
|
||
args.join(" ")
|
||
)));
|
||
}
|
||
Ok(output)
|
||
};
|
||
|
||
info!("Successfully prepared ignition files for OKD installation");
|
||
// ignition_files_http_path // = PathBuf::from("okd_ignition_files");
|
||
info!(
|
||
r#"Uploading images, they can be refreshed with a command similar to this one: openshift-install coreos print-stream-json | grep -Eo '"https.*(kernel.|initramfs.|rootfs.)\w+(\.img)?"' | grep x86_64 | xargs -n 1 curl -LO"#
|
||
);
|
||
|
||
warn!(
|
||
"TODO push installer image files with `scp -r data/okd/installer_image/* root@192.168.1.1:/usr/local/http/scos/` until performance issue is resolved"
|
||
);
|
||
inquire::Confirm::new(
|
||
"push installer image files with `scp -r data/okd/installer_image/* root@192.168.1.1:/usr/local/http/scos/` until performance issue is resolved").prompt().expect("Prompt error");
|
||
|
||
// let scos_http_path = PathBuf::from("scos");
|
||
// StaticFilesHttpScore {
|
||
// folder_to_serve: Some(Url::LocalFolder(
|
||
// okd_images_path.to_string_lossy().to_string(),
|
||
// )),
|
||
// remote_path: Some(scos_http_path.to_string_lossy().to_string()),
|
||
// files: vec![],
|
||
// }
|
||
// .interpret(inventory, topology)
|
||
// .await?;
|
||
|
||
Ok(())
|
||
}
|
||
|
||
async fn configure_host_binding(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
let binding = HostBinding {
|
||
logical_host: topology.bootstrap_host.clone(),
|
||
physical_host: self.get_bootstrap_node().await?,
|
||
};
|
||
info!("Configuring host binding for bootstrap node {binding:?}");
|
||
|
||
DhcpHostBindingScore {
|
||
host_binding: vec![binding],
|
||
domain: Some(topology.domain_name.clone()),
|
||
}
|
||
.interpret(inventory, topology)
|
||
.await?;
|
||
Ok(())
|
||
}
|
||
|
||
async fn render_per_mac_pxe(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
let content = BootstrapIpxeTpl {
|
||
http_ip: &topology.http_server.get_ip().to_string(),
|
||
scos_path: "scos", // TODO use some constant
|
||
installation_device: "/dev/sda", // TODO do something smart based on the host drives
|
||
// topology. Something like use the smallest device
|
||
// above 200G that is an ssd
|
||
}
|
||
.to_string();
|
||
|
||
let bootstrap_node = self.get_bootstrap_node().await?;
|
||
let mac_address = bootstrap_node.get_mac_address();
|
||
|
||
info!("[Bootstrap] Rendering per-MAC PXE for bootstrap node");
|
||
debug!("bootstrap ipxe content : {content}");
|
||
debug!("bootstrap mac addresses : {mac_address:?}");
|
||
|
||
IPxeMacBootFileScore {
|
||
mac_address,
|
||
content,
|
||
}
|
||
.interpret(inventory, topology)
|
||
.await?;
|
||
Ok(())
|
||
}
|
||
|
||
async fn setup_bootstrap_load_balancer(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<(), InterpretError> {
|
||
let outcome = OKDBootstrapLoadBalancerScore::new(topology)
|
||
.interpret(inventory, topology)
|
||
.await?;
|
||
info!("Successfully executed OKDBootstrapLoadBalancerScore : {outcome:?}");
|
||
Ok(())
|
||
}
|
||
|
||
async fn reboot_target(&self) -> Result<(), InterpretError> {
|
||
// Placeholder: ssh reboot using the inventory ephemeral key
|
||
info!("[Bootstrap] Rebooting bootstrap node via SSH");
|
||
// TODO reboot programatically, there are some logical checks and refactoring to do such as
|
||
// accessing the bootstrap node config (ip address) from the inventory
|
||
let confirmation = inquire::Confirm::new(
|
||
"Now reboot the bootstrap node so it picks up its pxe boot file. Press enter when ready.",
|
||
)
|
||
.with_default(true)
|
||
.prompt()
|
||
.expect("Unexpected prompt error");
|
||
Ok(())
|
||
}
|
||
|
||
async fn wait_for_bootstrap_complete(&self) -> Result<(), InterpretError> {
|
||
// Placeholder: wait-for bootstrap-complete
|
||
info!("[Bootstrap] Waiting for bootstrap-complete …");
|
||
todo!("[Bootstrap] Waiting for bootstrap-complete …")
|
||
}
|
||
|
||
async fn create_file(&self, path: &PathBuf, content: &[u8]) -> Result<(), InterpretError> {
|
||
let mut install_config_file = File::create(path).await.map_err(|e| {
|
||
InterpretError::new(format!(
|
||
"Could not create file {} : {e}",
|
||
path.to_string_lossy()
|
||
))
|
||
})?;
|
||
install_config_file.write(content).await.map_err(|e| {
|
||
InterpretError::new(format!(
|
||
"Could not write file {} : {e}",
|
||
path.to_string_lossy()
|
||
))
|
||
})?;
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
#[async_trait]
|
||
impl Interpret<HAClusterTopology> for OKDSetup02BootstrapInterpret {
|
||
fn get_name(&self) -> InterpretName {
|
||
InterpretName::Custom("OKDSetup02Bootstrap")
|
||
}
|
||
|
||
fn get_version(&self) -> Version {
|
||
self.version.clone()
|
||
}
|
||
|
||
fn get_status(&self) -> InterpretStatus {
|
||
self.status.clone()
|
||
}
|
||
|
||
fn get_children(&self) -> Vec<Id> {
|
||
vec![]
|
||
}
|
||
|
||
async fn execute(
|
||
&self,
|
||
inventory: &Inventory,
|
||
topology: &HAClusterTopology,
|
||
) -> Result<Outcome, InterpretError> {
|
||
self.configure_host_binding(inventory, topology).await?;
|
||
self.prepare_ignition_files(inventory, topology).await?;
|
||
self.render_per_mac_pxe(inventory, topology).await?;
|
||
self.setup_bootstrap_load_balancer(inventory, topology)
|
||
.await?;
|
||
|
||
// TODO https://docs.okd.io/latest/installing/installing_bare_metal/upi/installing-bare-metal.html#installation-user-provisioned-validating-dns_installing-bare-metal
|
||
// self.validate_dns_config(inventory, topology).await?;
|
||
|
||
self.reboot_target().await?;
|
||
self.wait_for_bootstrap_complete().await?;
|
||
|
||
Ok(Outcome::new(
|
||
InterpretStatus::SUCCESS,
|
||
"Bootstrap phase complete".into(),
|
||
))
|
||
}
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------------------------
|
||
// Step 03: Control Plane
|
||
// - Render per-MAC PXE & ignition for cp0/cp1/cp2.
|
||
// - Persist bonding via MachineConfigs (or NNCP) once SCOS is active.
|
||
// -------------------------------------------------------------------------------------------------
|
||
|
||
#[derive(Debug, Clone, Serialize, new)]
|
||
struct OKDSetup03ControlPlaneScore {}
|
||
|
||
impl Score<HAClusterTopology> for OKDSetup03ControlPlaneScore {
|
||
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
|
||
Box::new(OKDSetup03ControlPlaneInterpret::new(self.clone()))
|
||
}
|
||
|
||
fn name(&self) -> String {
|
||
"OKDSetup03ControlPlaneScore".to_string()
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
struct OKDSetup03ControlPlaneInterpret {
|
||
score: OKDSetup03ControlPlaneScore,
|
||
version: Version,
|
||
status: InterpretStatus,
|
||
}
|
||
|
||
impl OKDSetup03ControlPlaneInterpret {
|
||
pub fn new(score: OKDSetup03ControlPlaneScore) -> Self {
|
||
let version = Version::from("1.0.0").unwrap();
|
||
Self {
|
||
version,
|
||
score,
|
||
status: InterpretStatus::QUEUED,
|
||
}
|
||
}
|
||
|
||
async fn render_and_reboot(&self) -> Result<(), InterpretError> {
|
||
info!("[ControlPlane] Rendering per-MAC PXE for masters and rebooting");
|
||
Ok(())
|
||
}
|
||
|
||
async fn persist_network_bond(&self) -> Result<(), InterpretError> {
|
||
// Generate MC or NNCP from inventory NIC data; apply via ignition or post-join.
|
||
info!("[ControlPlane] Ensuring persistent bonding via MachineConfig/NNCP");
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
#[async_trait]
|
||
impl Interpret<HAClusterTopology> for OKDSetup03ControlPlaneInterpret {
|
||
fn get_name(&self) -> InterpretName {
|
||
InterpretName::Custom("OKDSetup03ControlPlane")
|
||
}
|
||
|
||
fn get_version(&self) -> Version {
|
||
self.version.clone()
|
||
}
|
||
|
||
fn get_status(&self) -> InterpretStatus {
|
||
self.status.clone()
|
||
}
|
||
|
||
fn get_children(&self) -> Vec<Id> {
|
||
vec![]
|
||
}
|
||
|
||
async fn execute(
|
||
&self,
|
||
_inventory: &Inventory,
|
||
_topology: &HAClusterTopology,
|
||
) -> Result<Outcome, InterpretError> {
|
||
self.render_and_reboot().await?;
|
||
self.persist_network_bond().await?;
|
||
Ok(Outcome::new(
|
||
InterpretStatus::SUCCESS,
|
||
"Control plane provisioned".into(),
|
||
))
|
||
}
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------------------------
|
||
// Step 04: Workers
|
||
// - Render per-MAC PXE & ignition for workers; join nodes.
|
||
// - Persist bonding via MC/NNCP as required (same approach as masters).
|
||
// -------------------------------------------------------------------------------------------------
|
||
|
||
#[derive(Debug, Clone, Serialize, new)]
|
||
struct OKDSetup04WorkersScore {}
|
||
|
||
impl Score<HAClusterTopology> for OKDSetup04WorkersScore {
|
||
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
|
||
Box::new(OKDSetup04WorkersInterpret::new(self.clone()))
|
||
}
|
||
|
||
fn name(&self) -> String {
|
||
"OKDSetup04WorkersScore".to_string()
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
struct OKDSetup04WorkersInterpret {
|
||
score: OKDSetup04WorkersScore,
|
||
version: Version,
|
||
status: InterpretStatus,
|
||
}
|
||
|
||
impl OKDSetup04WorkersInterpret {
|
||
pub fn new(score: OKDSetup04WorkersScore) -> Self {
|
||
let version = Version::from("1.0.0").unwrap();
|
||
Self {
|
||
version,
|
||
score,
|
||
status: InterpretStatus::QUEUED,
|
||
}
|
||
}
|
||
|
||
async fn render_and_reboot(&self) -> Result<(), InterpretError> {
|
||
info!("[Workers] Rendering per-MAC PXE for workers and rebooting");
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
#[async_trait]
|
||
impl Interpret<HAClusterTopology> for OKDSetup04WorkersInterpret {
|
||
fn get_name(&self) -> InterpretName {
|
||
InterpretName::Custom("OKDSetup04Workers")
|
||
}
|
||
|
||
fn get_version(&self) -> Version {
|
||
self.version.clone()
|
||
}
|
||
|
||
fn get_status(&self) -> InterpretStatus {
|
||
self.status.clone()
|
||
}
|
||
|
||
fn get_children(&self) -> Vec<Id> {
|
||
vec![]
|
||
}
|
||
|
||
async fn execute(
|
||
&self,
|
||
_inventory: &Inventory,
|
||
_topology: &HAClusterTopology,
|
||
) -> Result<Outcome, InterpretError> {
|
||
self.render_and_reboot().await?;
|
||
Ok(Outcome::new(
|
||
InterpretStatus::SUCCESS,
|
||
"Workers provisioned".into(),
|
||
))
|
||
}
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------------------------
|
||
// Step 05: Sanity Check
|
||
// - Validate API reachability, ClusterOperators, ingress, and SDN status.
|
||
// -------------------------------------------------------------------------------------------------
|
||
|
||
#[derive(Debug, Clone, Serialize, new)]
|
||
struct OKDSetup05SanityCheckScore {}
|
||
|
||
impl Score<HAClusterTopology> for OKDSetup05SanityCheckScore {
|
||
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
|
||
Box::new(OKDSetup05SanityCheckInterpret::new(self.clone()))
|
||
}
|
||
|
||
fn name(&self) -> String {
|
||
"OKDSetup05SanityCheckScore".to_string()
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
struct OKDSetup05SanityCheckInterpret {
|
||
score: OKDSetup05SanityCheckScore,
|
||
version: Version,
|
||
status: InterpretStatus,
|
||
}
|
||
|
||
impl OKDSetup05SanityCheckInterpret {
|
||
pub fn new(score: OKDSetup05SanityCheckScore) -> Self {
|
||
let version = Version::from("1.0.0").unwrap();
|
||
Self {
|
||
version,
|
||
score,
|
||
status: InterpretStatus::QUEUED,
|
||
}
|
||
}
|
||
|
||
async fn run_checks(&self) -> Result<(), InterpretError> {
|
||
info!("[Sanity] Checking API, COs, Ingress, and SDN health …");
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
#[async_trait]
|
||
impl Interpret<HAClusterTopology> for OKDSetup05SanityCheckInterpret {
|
||
fn get_name(&self) -> InterpretName {
|
||
InterpretName::Custom("OKDSetup05SanityCheck")
|
||
}
|
||
|
||
fn get_version(&self) -> Version {
|
||
self.version.clone()
|
||
}
|
||
|
||
fn get_status(&self) -> InterpretStatus {
|
||
self.status.clone()
|
||
}
|
||
|
||
fn get_children(&self) -> Vec<Id> {
|
||
vec![]
|
||
}
|
||
|
||
async fn execute(
|
||
&self,
|
||
_inventory: &Inventory,
|
||
_topology: &HAClusterTopology,
|
||
) -> Result<Outcome, InterpretError> {
|
||
self.run_checks().await?;
|
||
Ok(Outcome::new(
|
||
InterpretStatus::SUCCESS,
|
||
"Sanity checks passed".into(),
|
||
))
|
||
}
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------------------------
|
||
// Step 06: Installation Report
|
||
// - Emit JSON and concise human summary of nodes, roles, versions, and health.
|
||
// -------------------------------------------------------------------------------------------------
|
||
|
||
#[derive(Debug, Clone, Serialize, new)]
|
||
struct OKDSetup06InstallationReportScore {}
|
||
|
||
impl Score<HAClusterTopology> for OKDSetup06InstallationReportScore {
|
||
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
|
||
Box::new(OKDSetup06InstallationReportInterpret::new(self.clone()))
|
||
}
|
||
|
||
fn name(&self) -> String {
|
||
"OKDSetup06InstallationReportScore".to_string()
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
struct OKDSetup06InstallationReportInterpret {
|
||
score: OKDSetup06InstallationReportScore,
|
||
version: Version,
|
||
status: InterpretStatus,
|
||
}
|
||
|
||
impl OKDSetup06InstallationReportInterpret {
|
||
pub fn new(score: OKDSetup06InstallationReportScore) -> Self {
|
||
let version = Version::from("1.0.0").unwrap();
|
||
Self {
|
||
version,
|
||
score,
|
||
status: InterpretStatus::QUEUED,
|
||
}
|
||
}
|
||
|
||
async fn generate(&self) -> Result<(), InterpretError> {
|
||
info!("[Report] Generating OKD installation report",);
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
#[async_trait]
|
||
impl Interpret<HAClusterTopology> for OKDSetup06InstallationReportInterpret {
|
||
fn get_name(&self) -> InterpretName {
|
||
InterpretName::Custom("OKDSetup06InstallationReport")
|
||
}
|
||
|
||
fn get_version(&self) -> Version {
|
||
self.version.clone()
|
||
}
|
||
|
||
fn get_status(&self) -> InterpretStatus {
|
||
self.status.clone()
|
||
}
|
||
|
||
fn get_children(&self) -> Vec<Id> {
|
||
vec![]
|
||
}
|
||
|
||
async fn execute(
|
||
&self,
|
||
_inventory: &Inventory,
|
||
_topology: &HAClusterTopology,
|
||
) -> Result<Outcome, InterpretError> {
|
||
self.generate().await?;
|
||
Ok(Outcome::new(
|
||
InterpretStatus::SUCCESS,
|
||
"Installation report generated".into(),
|
||
))
|
||
}
|
||
}
|