Merge branch 'master' into fix/ingress

This commit is contained in:
2025-09-09 08:11:21 -04:00
94 changed files with 5166 additions and 687 deletions

View File

@@ -68,9 +68,11 @@ thiserror.workspace = true
once_cell = "1.21.3"
walkdir = "2.5.0"
harmony_inventory_agent = { path = "../harmony_inventory_agent" }
harmony_secret_derive = { version = "0.1.0", path = "../harmony_secret_derive" }
harmony_secret_derive = { path = "../harmony_secret_derive" }
harmony_secret = { path = "../harmony_secret" }
askama.workspace = true
sqlx.workspace = true
inquire.workspace = true
[dev-dependencies]
pretty_assertions.workspace = true

View File

@@ -1,3 +1,5 @@
pub mod secret;
use lazy_static::lazy_static;
use std::path::PathBuf;

View File

@@ -0,0 +1,20 @@
use harmony_secret_derive::Secret;
use serde::{Deserialize, Serialize};
#[derive(Secret, Serialize, Deserialize, Debug, PartialEq)]
pub struct OPNSenseFirewallCredentials {
pub username: String,
pub password: String,
}
// TODO we need a better way to handle multiple "instances" of the same secret structure.
#[derive(Secret, Serialize, Deserialize, Debug, PartialEq)]
pub struct SshKeyPair {
pub private: String,
pub public: String,
}
#[derive(Secret, Serialize, Deserialize, Debug, PartialEq)]
pub struct RedhatSecret {
pub pull_secret: String,
}

View File

@@ -1,5 +1,3 @@
use std::sync::Arc;
use derive_new::new;
use harmony_inventory_agent::hwinfo::{CPU, MemoryModule, NetworkInterface, StorageDrive};
use harmony_types::net::MacAddress;
@@ -10,7 +8,7 @@ pub type HostGroup = Vec<PhysicalHost>;
pub type SwitchGroup = Vec<Switch>;
pub type FirewallGroup = Vec<PhysicalHost>;
#[derive(Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PhysicalHost {
pub id: Id,
pub category: HostCategory,
@@ -151,6 +149,98 @@ impl PhysicalHost {
parts.join(" | ")
}
pub fn parts_list(&self) -> String {
let PhysicalHost {
id,
category,
network,
storage,
labels,
memory_modules,
cpus,
} = self;
let mut parts_list = String::new();
parts_list.push_str("\n\n=====================");
parts_list.push_str(&format!("\nHost ID {id}"));
parts_list.push_str("\n=====================");
parts_list.push_str("\n\n=====================");
parts_list.push_str(&format!("\nCPU count {}", cpus.len()));
parts_list.push_str("\n=====================");
cpus.iter().for_each(|c| {
let CPU {
model,
vendor,
cores,
threads,
frequency_mhz,
} = c;
parts_list.push_str(&format!(
"\n{vendor} {model}, {cores}/{threads} {}Ghz",
*frequency_mhz as f64 / 1000.0
));
});
parts_list.push_str("\n\n=====================");
parts_list.push_str(&format!("\nNetwork Interfaces count {}", network.len()));
parts_list.push_str("\n=====================");
network.iter().for_each(|nic| {
parts_list.push_str(&format!(
"\nNic({} {}Gbps mac({}) ipv4({}), ipv6({})",
nic.name,
nic.speed_mbps.unwrap_or(0) / 1000,
nic.mac_address,
nic.ipv4_addresses.join(","),
nic.ipv6_addresses.join(",")
));
});
parts_list.push_str("\n\n=====================");
parts_list.push_str(&format!("\nStorage drives count {}", storage.len()));
parts_list.push_str("\n=====================");
storage.iter().for_each(|drive| {
let StorageDrive {
name,
model,
serial,
size_bytes,
logical_block_size: _,
physical_block_size: _,
rotational: _,
wwn: _,
interface_type,
smart_status,
} = drive;
parts_list.push_str(&format!(
"\n{name} {}Gb {model} {interface_type} smart({smart_status:?}) {serial}",
size_bytes / 1000 / 1000 / 1000
));
});
parts_list.push_str("\n\n=====================");
parts_list.push_str(&format!("\nMemory modules count {}", memory_modules.len()));
parts_list.push_str("\n=====================");
memory_modules.iter().for_each(|mem| {
let MemoryModule {
size_bytes,
speed_mhz,
manufacturer,
part_number,
serial_number,
rank,
} = mem;
parts_list.push_str(&format!(
"\n{}Gb, {}Mhz, Manufacturer ({}), Part Number ({})",
size_bytes / 1000 / 1000 / 1000,
speed_mhz.unwrap_or(0),
manufacturer.as_ref().unwrap_or(&String::new()),
part_number.as_ref().unwrap_or(&String::new()),
));
});
parts_list
}
pub fn cluster_mac(&self) -> MacAddress {
self.network
.first()
@@ -173,6 +263,10 @@ impl PhysicalHost {
self
}
pub fn get_mac_address(&self) -> Vec<MacAddress> {
self.network.iter().map(|nic| nic.mac_address).collect()
}
pub fn label(mut self, name: String, value: String) -> Self {
self.labels.push(Label { name, value });
self
@@ -221,15 +315,6 @@ impl PhysicalHost {
// }
// }
impl<'de> Deserialize<'de> for PhysicalHost {
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
todo!()
}
}
#[derive(new, Serialize)]
pub struct ManualManagementInterface;
@@ -273,16 +358,13 @@ where
}
}
#[derive(Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum HostCategory {
Server,
Firewall,
Switch,
}
#[cfg(test)]
use harmony_macros::mac_address;
use harmony_types::id::Id;
#[derive(Debug, Clone, Serialize)]
@@ -291,7 +373,7 @@ pub struct Switch {
_management_interface: NetworkInterface,
}
#[derive(Debug, new, Clone, Serialize)]
#[derive(Debug, new, Clone, Serialize, Deserialize)]
pub struct Label {
pub name: String,
pub value: String,

View File

@@ -32,6 +32,7 @@ pub enum InterpretName {
K8sPrometheusCrdAlerting,
DiscoverInventoryAgent,
CephClusterHealth,
Custom(&'static str),
RHOBAlerting,
}
@@ -61,6 +62,7 @@ impl std::fmt::Display for InterpretName {
InterpretName::K8sPrometheusCrdAlerting => f.write_str("K8sPrometheusCrdAlerting"),
InterpretName::DiscoverInventoryAgent => f.write_str("DiscoverInventoryAgent"),
InterpretName::CephClusterHealth => f.write_str("CephClusterHealth"),
InterpretName::Custom(name) => f.write_str(name),
InterpretName::RHOBAlerting => f.write_str("RHOBAlerting"),
}
}
@@ -142,6 +144,12 @@ impl From<PreparationError> for InterpretError {
}
}
impl From<harmony_secret::SecretStoreError> for InterpretError {
fn from(value: harmony_secret::SecretStoreError) -> Self {
InterpretError::new(format!("Interpret error : {value}"))
}
}
impl From<ExecutorError> for InterpretError {
fn from(value: ExecutorError) -> Self {
Self {

View File

@@ -17,12 +17,14 @@ impl InventoryFilter {
use derive_new::new;
use log::info;
use serde::{Deserialize, Serialize};
use strum::EnumIter;
use crate::hardware::{ManagementInterface, ManualManagementInterface};
use super::{
filter::Filter,
hardware::{FirewallGroup, HostGroup, Location, SwitchGroup},
hardware::{HostGroup, Location, SwitchGroup},
};
#[derive(Debug)]
@@ -61,3 +63,11 @@ impl Inventory {
}
}
}
#[derive(Debug, Serialize, Deserialize, sqlx::Type, Clone, EnumIter)]
pub enum HostRole {
Bootstrap,
ControlPlane,
Worker,
Storage,
}

View File

@@ -1,6 +1,6 @@
use async_trait::async_trait;
use crate::hardware::PhysicalHost;
use crate::{hardware::PhysicalHost, interpret::InterpretError, inventory::HostRole};
/// Errors that can occur within the repository layer.
#[derive(thiserror::Error, Debug)]
@@ -15,6 +15,12 @@ pub enum RepoError {
ConnectionFailed(String),
}
impl From<RepoError> for InterpretError {
fn from(value: RepoError) -> Self {
InterpretError::new(format!("Interpret error : {value}"))
}
}
// --- Trait and Implementation ---
/// Defines the contract for inventory persistence.
@@ -22,4 +28,11 @@ pub enum RepoError {
pub trait InventoryRepository: Send + Sync + 'static {
async fn save(&self, host: &PhysicalHost) -> Result<(), RepoError>;
async fn get_latest_by_id(&self, host_id: &str) -> Result<Option<PhysicalHost>, RepoError>;
async fn get_all_hosts(&self) -> Result<Vec<PhysicalHost>, RepoError>;
async fn get_host_for_role(&self, role: &HostRole) -> Result<Vec<PhysicalHost>, RepoError>;
async fn save_role_mapping(
&self,
role: &HostRole,
host: &PhysicalHost,
) -> Result<(), RepoError>;
}

View File

@@ -69,6 +69,26 @@ impl K8sclient for HAClusterTopology {
}
impl HAClusterTopology {
// TODO this is a hack to avoid refactoring
pub fn get_cluster_name(&self) -> String {
self.domain_name
.split(".")
.next()
.expect("Cluster domain name must not be empty")
.to_string()
}
pub fn get_cluster_base_domain(&self) -> String {
let base_domain = self
.domain_name
.strip_prefix(&self.get_cluster_name())
.expect("cluster domain must start with cluster name");
base_domain
.strip_prefix(".")
.unwrap_or(base_domain)
.to_string()
}
pub fn autoload() -> Self {
let dummy_infra = Arc::new(DummyInfra {});
let dummy_host = LogicalHost {
@@ -161,6 +181,14 @@ impl DhcpServer for HAClusterTopology {
self.dhcp_server.set_pxe_options(options).await
}
async fn set_dhcp_range(
&self,
start: &IpAddress,
end: &IpAddress,
) -> Result<(), ExecutorError> {
self.dhcp_server.set_dhcp_range(start, end).await
}
fn get_ip(&self) -> IpAddress {
self.dhcp_server.get_ip()
}
@@ -209,8 +237,12 @@ impl Router for HAClusterTopology {
#[async_trait]
impl HttpServer for HAClusterTopology {
async fn serve_files(&self, url: &Url) -> Result<(), ExecutorError> {
self.http_server.serve_files(url).await
async fn serve_files(
&self,
url: &Url,
remote_path: &Option<String>,
) -> Result<(), ExecutorError> {
self.http_server.serve_files(url, remote_path).await
}
async fn serve_file_content(&self, file: &FileContent) -> Result<(), ExecutorError> {
@@ -298,6 +330,13 @@ impl DhcpServer for DummyInfra {
async fn set_pxe_options(&self, _options: PxeOptions) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn set_dhcp_range(
&self,
start: &IpAddress,
end: &IpAddress,
) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_ip(&self) -> IpAddress {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
@@ -362,7 +401,11 @@ impl TftpServer for DummyInfra {
#[async_trait]
impl HttpServer for DummyInfra {
async fn serve_files(&self, _url: &Url) -> Result<(), ExecutorError> {
async fn serve_files(
&self,
_url: &Url,
_remote_path: &Option<String>,
) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn serve_file_content(&self, _file: &FileContent) -> Result<(), ExecutorError> {

View File

@@ -5,7 +5,11 @@ use harmony_types::net::IpAddress;
use harmony_types::net::Url;
#[async_trait]
pub trait HttpServer: Send + Sync {
async fn serve_files(&self, url: &Url) -> Result<(), ExecutorError>;
async fn serve_files(
&self,
url: &Url,
remote_path: &Option<String>,
) -> Result<(), ExecutorError>;
async fn serve_file_content(&self, file: &FileContent) -> Result<(), ExecutorError>;
fn get_ip(&self) -> IpAddress;

View File

@@ -102,8 +102,17 @@ pub enum HttpStatusCode {
ServerError5xx,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub enum SSL {
SSL,
Disabled,
Default,
SNI,
Other(String),
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub enum HealthCheck {
HTTP(String, HttpMethod, HttpStatusCode),
HTTP(String, HttpMethod, HttpStatusCode, SSL),
TCP(Option<u16>),
}

View File

@@ -11,15 +11,21 @@ use super::{LogicalHost, k8s::K8sClient};
#[derive(Debug)]
pub struct DHCPStaticEntry {
pub name: String,
pub mac: MacAddress,
pub mac: Vec<MacAddress>,
pub ip: Ipv4Addr,
}
impl std::fmt::Display for DHCPStaticEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mac = self
.mac
.iter()
.map(|m| m.to_string())
.collect::<Vec<String>>()
.join(",");
f.write_fmt(format_args!(
"DHCPStaticEntry : name {}, mac {}, ip {}",
self.name, self.mac, self.ip
self.name, mac, self.ip
))
}
}
@@ -41,6 +47,7 @@ impl std::fmt::Debug for dyn Firewall {
pub struct NetworkDomain {
pub name: String,
}
#[async_trait]
pub trait K8sclient: Send + Sync {
async fn k8s_client(&self) -> Result<Arc<K8sClient>, String>;
@@ -59,6 +66,8 @@ pub trait DhcpServer: Send + Sync + std::fmt::Debug {
async fn remove_static_mapping(&self, mac: &MacAddress) -> Result<(), ExecutorError>;
async fn list_static_mappings(&self) -> Vec<(MacAddress, IpAddress)>;
async fn set_pxe_options(&self, pxe_options: PxeOptions) -> Result<(), ExecutorError>;
async fn set_dhcp_range(&self, start: &IpAddress, end: &IpAddress)
-> Result<(), ExecutorError>;
fn get_ip(&self) -> IpAddress;
fn get_host(&self) -> LogicalHost;
async fn commit_config(&self) -> Result<(), ExecutorError>;

View File

@@ -1,6 +1,6 @@
use crate::{
hardware::PhysicalHost,
inventory::{InventoryRepository, RepoError},
inventory::{HostRole, InventoryRepository, RepoError},
};
use async_trait::async_trait;
use harmony_types::id::Id;
@@ -46,20 +46,104 @@ impl InventoryRepository for SqliteInventoryRepository {
}
async fn get_latest_by_id(&self, host_id: &str) -> Result<Option<PhysicalHost>, RepoError> {
let _row = sqlx::query_as!(
let row = sqlx::query_as!(
DbHost,
r#"SELECT id, version_id, data as "data: Json<PhysicalHost>" FROM physical_hosts WHERE id = ? ORDER BY version_id DESC LIMIT 1"#,
host_id
)
.fetch_optional(&self.pool)
.await?;
todo!()
Ok(row.map(|r| r.data.0))
}
async fn get_all_hosts(&self) -> Result<Vec<PhysicalHost>, RepoError> {
let db_hosts = sqlx::query_as!(
DbHost,
r#"
SELECT
p1.id,
p1.version_id,
p1.data as "data: Json<PhysicalHost>"
FROM
physical_hosts p1
INNER JOIN (
SELECT
id,
MAX(version_id) AS max_version
FROM
physical_hosts
GROUP BY
id
) p2 ON p1.id = p2.id AND p1.version_id = p2.max_version
"#
)
.fetch_all(&self.pool)
.await?;
let hosts = db_hosts.into_iter().map(|row| row.data.0).collect();
Ok(hosts)
}
async fn save_role_mapping(
&self,
role: &HostRole,
host: &PhysicalHost,
) -> Result<(), RepoError> {
let host_id = host.id.to_string();
sqlx::query!(
r#"
INSERT INTO host_role_mapping (host_id, role)
VALUES (?, ?)
"#,
host_id,
role
)
.execute(&self.pool)
.await?;
info!("Saved role mapping for host '{}' as '{:?}'", host.id, role);
Ok(())
}
async fn get_host_for_role(&self, role: &HostRole) -> Result<Vec<PhysicalHost>, RepoError> {
struct HostIdRow {
host_id: String,
}
let role_str = format!("{:?}", role);
let host_id_rows = sqlx::query_as!(
HostIdRow,
"SELECT host_id FROM host_role_mapping WHERE role = ?",
role_str
)
.fetch_all(&self.pool)
.await?;
let mut hosts = Vec::with_capacity(host_id_rows.len());
for row in host_id_rows {
match self.get_latest_by_id(&row.host_id).await? {
Some(host) => hosts.push(host),
None => {
log::warn!(
"Found a role mapping for host_id '{}', but the host does not exist in the physical_hosts table. This may indicate a data integrity issue.",
row.host_id
);
}
}
}
Ok(hosts)
}
}
use sqlx::types::Json;
struct DbHost {
data: Json<PhysicalHost>,
id: Id,
version_id: Id,
id: String,
version_id: String,
}

View File

@@ -17,13 +17,13 @@ impl DhcpServer for OPNSenseFirewall {
}
async fn add_static_mapping(&self, entry: &DHCPStaticEntry) -> Result<(), ExecutorError> {
let mac: String = String::from(&entry.mac);
let mac: Vec<String> = entry.mac.iter().map(MacAddress::to_string).collect();
{
let mut writable_opnsense = self.opnsense_config.write().await;
writable_opnsense
.dhcp()
.add_static_mapping(&mac, entry.ip, &entry.name)
.add_static_mapping(&mac, &entry.ip, &entry.name)
.unwrap();
}
@@ -68,4 +68,19 @@ impl DhcpServer for OPNSenseFirewall {
ExecutorError::UnexpectedError(format!("Failed to set_pxe_options : {dhcp_error}"))
})
}
async fn set_dhcp_range(
&self,
start: &IpAddress,
end: &IpAddress,
) -> Result<(), ExecutorError> {
let mut writable_opnsense = self.opnsense_config.write().await;
writable_opnsense
.dhcp()
.set_dhcp_range(&start.to_string(), &end.to_string())
.await
.map_err(|dhcp_error| {
ExecutorError::UnexpectedError(format!("Failed to set_dhcp_range : {dhcp_error}"))
})
}
}

View File

@@ -1,4 +1,3 @@
use crate::infra::opnsense::Host;
use crate::infra::opnsense::LogicalHost;
use crate::{
executors::ExecutorError,
@@ -12,21 +11,22 @@ use super::OPNSenseFirewall;
#[async_trait]
impl DnsServer for OPNSenseFirewall {
async fn register_hosts(&self, hosts: Vec<DnsRecord>) -> Result<(), ExecutorError> {
let mut writable_opnsense = self.opnsense_config.write().await;
let mut dns = writable_opnsense.dns();
let hosts = hosts
.iter()
.map(|h| {
Host::new(
h.host.clone(),
h.domain.clone(),
h.record_type.to_string(),
h.value.to_string(),
)
})
.collect();
dns.register_hosts(hosts);
Ok(())
todo!("Refactor this to use dnsmasq")
// let mut writable_opnsense = self.opnsense_config.write().await;
// let mut dns = writable_opnsense.dns();
// let hosts = hosts
// .iter()
// .map(|h| {
// Host::new(
// h.host.clone(),
// h.domain.clone(),
// h.record_type.to_string(),
// h.value.to_string(),
// )
// })
// .collect();
// dns.add_static_mapping(hosts);
// Ok(())
}
fn remove_record(
@@ -38,25 +38,26 @@ impl DnsServer for OPNSenseFirewall {
}
async fn list_records(&self) -> Vec<crate::topology::DnsRecord> {
self.opnsense_config
.write()
.await
.dns()
.get_hosts()
.iter()
.map(|h| DnsRecord {
host: h.hostname.clone(),
domain: h.domain.clone(),
record_type: h
.rr
.parse()
.expect("received invalid record type {h.rr} from opnsense"),
value: h
.server
.parse()
.expect("received invalid ipv4 record from opnsense {h.server}"),
})
.collect()
todo!("Refactor this to use dnsmasq")
// self.opnsense_config
// .write()
// .await
// .dns()
// .get_hosts()
// .iter()
// .map(|h| DnsRecord {
// host: h.hostname.clone(),
// domain: h.domain.clone(),
// record_type: h
// .rr
// .parse()
// .expect("received invalid record type {h.rr} from opnsense"),
// value: h
// .server
// .parse()
// .expect("received invalid ipv4 record from opnsense {h.server}"),
// })
// .collect()
}
fn get_ip(&self) -> IpAddress {
@@ -68,11 +69,12 @@ impl DnsServer for OPNSenseFirewall {
}
async fn register_dhcp_leases(&self, register: bool) -> Result<(), ExecutorError> {
let mut writable_opnsense = self.opnsense_config.write().await;
let mut dns = writable_opnsense.dns();
dns.register_dhcp_leases(register);
Ok(())
todo!("Refactor this to use dnsmasq")
// let mut writable_opnsense = self.opnsense_config.write().await;
// let mut dns = writable_opnsense.dns();
// dns.register_dhcp_leases(register);
//
// Ok(())
}
async fn commit_config(&self) -> Result<(), ExecutorError> {

View File

@@ -10,13 +10,21 @@ const OPNSENSE_HTTP_ROOT_PATH: &str = "/usr/local/http";
#[async_trait]
impl HttpServer for OPNSenseFirewall {
async fn serve_files(&self, url: &Url) -> Result<(), ExecutorError> {
async fn serve_files(
&self,
url: &Url,
remote_path: &Option<String>,
) -> Result<(), ExecutorError> {
let config = self.opnsense_config.read().await;
info!("Uploading files from url {url} to {OPNSENSE_HTTP_ROOT_PATH}");
let remote_upload_path = remote_path
.clone()
.map(|r| format!("{OPNSENSE_HTTP_ROOT_PATH}/{r}"))
.unwrap_or(OPNSENSE_HTTP_ROOT_PATH.to_string());
match url {
Url::LocalFolder(path) => {
config
.upload_files(path, OPNSENSE_HTTP_ROOT_PATH)
.upload_files(path, &remote_upload_path)
.await
.map_err(|e| ExecutorError::UnexpectedError(e.to_string()))?;
}

View File

@@ -1,13 +1,15 @@
use async_trait::async_trait;
use log::{debug, info, warn};
use opnsense_config_xml::{Frontend, HAProxy, HAProxyBackend, HAProxyHealthCheck, HAProxyServer};
use log::{debug, error, info, warn};
use opnsense_config_xml::{
Frontend, HAProxy, HAProxyBackend, HAProxyHealthCheck, HAProxyServer, MaybeString,
};
use uuid::Uuid;
use crate::{
executors::ExecutorError,
topology::{
BackendServer, HealthCheck, HttpMethod, HttpStatusCode, LoadBalancer, LoadBalancerService,
LogicalHost,
LogicalHost, SSL,
},
};
use harmony_types::net::IpAddress;
@@ -206,7 +208,22 @@ pub(crate) fn get_health_check_for_backend(
.unwrap_or_default()
.into();
let status_code: HttpStatusCode = HttpStatusCode::Success2xx;
Some(HealthCheck::HTTP(path, method, status_code))
let ssl = match haproxy_health_check
.ssl
.content_string()
.to_uppercase()
.as_str()
{
"SSL" => SSL::SSL,
"SSLNI" => SSL::SNI,
"NOSSL" => SSL::Disabled,
"" => SSL::Default,
other => {
error!("Unknown haproxy health check ssl config {other}");
SSL::Other(other.to_string())
}
};
Some(HealthCheck::HTTP(path, method, status_code, ssl))
}
_ => panic!("Received unsupported health check type {}", uppercase),
}
@@ -241,7 +258,14 @@ pub(crate) fn harmony_load_balancer_service_to_haproxy_xml(
// frontend points to backend
let healthcheck = if let Some(health_check) = &service.health_check {
match health_check {
HealthCheck::HTTP(path, http_method, _http_status_code) => {
HealthCheck::HTTP(path, http_method, _http_status_code, ssl) => {
let ssl: MaybeString = match ssl {
SSL::SSL => "ssl".into(),
SSL::SNI => "sslni".into(),
SSL::Disabled => "nossl".into(),
SSL::Default => "".into(),
SSL::Other(other) => other.as_str().into(),
};
let haproxy_check = HAProxyHealthCheck {
name: format!("HTTP_{http_method}_{path}"),
uuid: Uuid::new_v4().to_string(),
@@ -249,6 +273,7 @@ pub(crate) fn harmony_load_balancer_service_to_haproxy_xml(
health_check_type: "http".to_string(),
http_uri: path.clone().into(),
interval: "2s".to_string(),
ssl,
..Default::default()
};

View File

@@ -1,7 +1,7 @@
use async_trait::async_trait;
use derive_new::new;
use harmony_types::id::Id;
use log::info;
use log::{info, trace};
use serde::Serialize;
use crate::{
@@ -22,6 +22,8 @@ pub struct DhcpScore {
pub filename: Option<String>,
pub filename64: Option<String>,
pub filenameipxe: Option<String>,
pub dhcp_range: (IpAddress, IpAddress),
pub domain: Option<String>,
}
impl<T: Topology + DhcpServer> Score<T> for DhcpScore {
@@ -52,48 +54,6 @@ impl DhcpInterpret {
status: InterpretStatus::QUEUED,
}
}
async fn add_static_entries<D: DhcpServer>(
&self,
_inventory: &Inventory,
dhcp_server: &D,
) -> Result<Outcome, InterpretError> {
let dhcp_entries: Vec<DHCPStaticEntry> = self
.score
.host_binding
.iter()
.map(|binding| {
let ip = match binding.logical_host.ip {
std::net::IpAddr::V4(ipv4) => ipv4,
std::net::IpAddr::V6(_) => {
unimplemented!("DHCPStaticEntry only supports ipv4 at the moment")
}
};
DHCPStaticEntry {
name: binding.logical_host.name.clone(),
mac: binding.physical_host.cluster_mac(),
ip,
}
})
.collect();
info!("DHCPStaticEntry : {:?}", dhcp_entries);
info!("DHCP server : {:?}", dhcp_server);
let number_new_entries = dhcp_entries.len();
for entry in dhcp_entries.into_iter() {
match dhcp_server.add_static_mapping(&entry).await {
Ok(_) => info!("Successfully registered DHCPStaticEntry {}", entry),
Err(_) => todo!(),
}
}
Ok(Outcome::new(
InterpretStatus::SUCCESS,
format!("Dhcp Interpret registered {} entries", number_new_entries),
))
}
async fn set_pxe_options<D: DhcpServer>(
&self,
@@ -124,7 +84,7 @@ impl DhcpInterpret {
}
#[async_trait]
impl<T: DhcpServer> Interpret<T> for DhcpInterpret {
impl<T: Topology + DhcpServer> Interpret<T> for DhcpInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::OPNSenseDHCP
}
@@ -149,8 +109,16 @@ impl<T: DhcpServer> Interpret<T> for DhcpInterpret {
info!("Executing DhcpInterpret on inventory {inventory:?}");
self.set_pxe_options(inventory, topology).await?;
topology
.set_dhcp_range(&self.score.dhcp_range.0, &self.score.dhcp_range.1)
.await?;
self.add_static_entries(inventory, topology).await?;
DhcpHostBindingScore {
host_binding: self.score.host_binding.clone(),
domain: self.score.domain.clone(),
}
.interpret(inventory, topology)
.await?;
topology.commit_config().await?;
@@ -160,3 +128,120 @@ impl<T: DhcpServer> Interpret<T> for DhcpInterpret {
))
}
}
#[derive(Debug, new, Clone, Serialize)]
pub struct DhcpHostBindingScore {
pub host_binding: Vec<HostBinding>,
pub domain: Option<String>,
}
impl<T: Topology + DhcpServer> Score<T> for DhcpHostBindingScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(DhcpHostBindingInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {
"DhcpHostBindingScore".to_string()
}
}
// https://docs.opnsense.org/manual/dhcp.html#advanced-settings
#[derive(Debug, Clone)]
pub struct DhcpHostBindingInterpret {
score: DhcpHostBindingScore,
}
impl DhcpHostBindingInterpret {
async fn add_static_entries<D: DhcpServer>(
&self,
_inventory: &Inventory,
dhcp_server: &D,
) -> Result<Outcome, InterpretError> {
let dhcp_entries: Vec<DHCPStaticEntry> = self
.score
.host_binding
.iter()
.map(|binding| {
let ip = match binding.logical_host.ip {
std::net::IpAddr::V4(ipv4) => ipv4,
std::net::IpAddr::V6(_) => {
unimplemented!("DHCPStaticEntry only supports ipv4 at the moment")
}
};
let name = if let Some(domain) = self.score.domain.as_ref() {
format!("{}.{}", binding.logical_host.name, domain)
} else {
binding.logical_host.name.clone()
};
DHCPStaticEntry {
name,
mac: binding.physical_host.get_mac_address(),
ip,
}
})
.collect();
info!("DHCPStaticEntry : {:?}", dhcp_entries);
trace!("DHCP server : {:?}", dhcp_server);
let number_new_entries = dhcp_entries.len();
for entry in dhcp_entries.into_iter() {
match dhcp_server.add_static_mapping(&entry).await {
Ok(_) => info!("Successfully registered DHCPStaticEntry {}", entry),
Err(_) => todo!(),
}
}
Ok(Outcome::new(
InterpretStatus::SUCCESS,
format!("Dhcp Interpret registered {} entries", number_new_entries),
))
}
}
#[async_trait]
impl<T: DhcpServer> Interpret<T> for DhcpHostBindingInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("DhcpHostBindingInterpret")
}
fn get_version(&self) -> crate::domain::data::Version {
Version::from("1.0.0").unwrap()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
info!(
"Executing DhcpHostBindingInterpret on {} bindings",
self.score.host_binding.len()
);
self.add_static_entries(inventory, topology).await?;
topology.commit_config().await?;
Ok(Outcome::new(
InterpretStatus::SUCCESS,
format!(
"Dhcp Host Binding Interpret execution successful on {} hosts",
self.score.host_binding.len()
),
))
}
}

View File

@@ -3,14 +3,14 @@ use derive_new::new;
use serde::Serialize;
use crate::{
data::{FileContent, Version},
data::{FileContent, FilePath, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{HttpServer, Topology},
};
use harmony_types::id::Id;
use harmony_types::net::Url;
use harmony_types::{id::Id, net::MacAddress};
/// Configure an HTTP server that is provided by the Topology
///
@@ -25,8 +25,11 @@ use harmony_types::net::Url;
/// ```
#[derive(Debug, new, Clone, Serialize)]
pub struct StaticFilesHttpScore {
// TODO this should be split in two scores, one for folder and
// other for files
pub folder_to_serve: Option<Url>,
pub files: Vec<FileContent>,
pub remote_path: Option<String>,
}
impl<T: Topology + HttpServer> Score<T> for StaticFilesHttpScore {
@@ -54,7 +57,9 @@ impl<T: Topology + HttpServer> Interpret<T> for StaticFilesHttpInterpret {
http_server.ensure_initialized().await?;
// http_server.set_ip(topology.router.get_gateway()).await?;
if let Some(folder) = self.score.folder_to_serve.as_ref() {
http_server.serve_files(folder).await?;
http_server
.serve_files(folder, &self.score.remote_path)
.await?;
}
for f in self.score.files.iter() {
@@ -91,3 +96,34 @@ impl<T: Topology + HttpServer> Interpret<T> for StaticFilesHttpInterpret {
todo!()
}
}
#[derive(Debug, new, Clone, Serialize)]
pub struct IPxeMacBootFileScore {
pub content: String,
pub mac_address: Vec<MacAddress>,
}
impl<T: Topology + HttpServer> Score<T> for IPxeMacBootFileScore {
fn name(&self) -> String {
"IPxeMacBootFileScore".to_string()
}
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
StaticFilesHttpScore {
remote_path: None,
folder_to_serve: None,
files: self
.mac_address
.iter()
.map(|mac| FileContent {
path: FilePath::Relative(format!(
"byMAC/01-{}.ipxe",
mac.to_string().replace(":", "-")
)),
content: self.content.clone(),
})
.collect(),
}
.create_interpret()
}
}

View File

@@ -0,0 +1,122 @@
use async_trait::async_trait;
use harmony_types::id::Id;
use log::{error, info};
use serde::{Deserialize, Serialize};
use crate::{
data::Version,
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::inventory::LaunchDiscoverInventoryAgentScore,
score::Score,
topology::Topology,
};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiscoverHostForRoleScore {
pub role: HostRole,
}
impl<T: Topology> Score<T> for DiscoverHostForRoleScore {
fn name(&self) -> String {
"DiscoverInventoryAgentScore".to_string()
}
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(DiscoverHostForRoleInterpret {
score: self.clone(),
})
}
}
#[derive(Debug)]
pub struct DiscoverHostForRoleInterpret {
score: DiscoverHostForRoleScore,
}
#[async_trait]
impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
info!(
"Launching discovery agent, make sure that your nodes are successfully PXE booted and running inventory agent. They should answer on `http://<node_ip>:8080/inventory`"
);
LaunchDiscoverInventoryAgentScore {
discovery_timeout: None,
}
.interpret(inventory, topology)
.await?;
let host: PhysicalHost;
let host_repo = InventoryRepositoryFactory::build().await?;
loop {
let all_hosts = host_repo.get_all_hosts().await?;
if all_hosts.is_empty() {
info!("No discovered hosts found yet. Waiting for hosts to appear...");
// Sleep to avoid spamming the user and logs while waiting for nodes.
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
continue;
}
let ans = inquire::Select::new(
&format!("Select the node to be used for role {:?}:", self.score.role),
all_hosts,
)
.with_help_message("Press Esc to refresh the list of discovered hosts")
.prompt();
match ans {
Ok(choice) => {
info!("Selected {} as the bootstrap node.", choice.summary());
host_repo
.save_role_mapping(&self.score.role, &choice)
.await?;
host = choice;
break;
}
Err(inquire::InquireError::OperationCanceled) => {
info!("Refresh requested. Fetching list of discovered hosts again...");
continue;
}
Err(e) => {
error!(
"Failed to select node for role {:?} : {}",
self.score.role, e
);
return Err(InterpretError::new(format!(
"Could not select host : {}",
e.to_string()
)));
}
}
}
Ok(Outcome::success(format!(
"Successfully discovered host {} for role {:?}",
host.summary(),
self.score.role
)))
}
fn get_name(&self) -> InterpretName {
InterpretName::Custom("DiscoverHostForRoleScore")
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -0,0 +1,72 @@
use async_trait::async_trait;
use harmony_types::id::Id;
use log::info;
use serde::{Deserialize, Serialize};
use strum::IntoEnumIterator;
use crate::{
data::Version,
infra::inventory::InventoryRepositoryFactory,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
score::Score,
topology::Topology,
};
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct InspectInventoryScore {}
impl<T: Topology> Score<T> for InspectInventoryScore {
fn name(&self) -> String {
"InspectInventoryScore".to_string()
}
#[doc(hidden)]
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(InspectInventoryInterpret {})
}
}
#[derive(Debug)]
pub struct InspectInventoryInterpret;
#[async_trait]
impl<T: Topology> Interpret<T> for InspectInventoryInterpret {
async fn execute(
&self,
_inventory: &Inventory,
_topology: &T,
) -> Result<Outcome, InterpretError> {
let repo = InventoryRepositoryFactory::build().await?;
for role in HostRole::iter() {
info!("Inspecting hosts for role {role:?}");
let hosts = repo.get_host_for_role(&role).await?;
info!("Hosts with role {role:?} : {}", hosts.len());
hosts.iter().enumerate().for_each(|(idx, h)| {
info!(
"Found host index {idx} with role {role:?} => \n{}\n{}",
h.summary(),
h.parts_list()
)
});
}
Ok(Outcome::success(
"Inventory inspection complete".to_string(),
))
}
fn get_name(&self) -> InterpretName {
InterpretName::Custom("InspectInventoryInterpret")
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -1,3 +1,7 @@
mod discovery;
pub mod inspect;
pub use discovery::*;
use async_trait::async_trait;
use harmony_inventory_agent::local_presence::DiscoveryEvent;
use log::{debug, info, trace};
@@ -18,11 +22,11 @@ use harmony_types::id::Id;
/// This will allow us to register/update hosts running harmony_inventory_agent
/// from LAN in the Harmony inventory
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiscoverInventoryAgentScore {
pub struct LaunchDiscoverInventoryAgentScore {
pub discovery_timeout: Option<u64>,
}
impl<T: Topology> Score<T> for DiscoverInventoryAgentScore {
impl<T: Topology> Score<T> for LaunchDiscoverInventoryAgentScore {
fn name(&self) -> String {
"DiscoverInventoryAgentScore".to_string()
}
@@ -36,7 +40,7 @@ impl<T: Topology> Score<T> for DiscoverInventoryAgentScore {
#[derive(Debug)]
struct DiscoverInventoryAgentInterpret {
score: DiscoverInventoryAgentScore,
score: LaunchDiscoverInventoryAgentScore,
}
#[async_trait]
@@ -46,6 +50,13 @@ impl<T: Topology> Interpret<T> for DiscoverInventoryAgentInterpret {
_inventory: &Inventory,
_topology: &T,
) -> Result<Outcome, InterpretError> {
match self.score.discovery_timeout {
Some(timeout) => info!("Discovery agent will wait for {timeout} seconds"),
None => info!(
"Discovery agent will wait forever in the background, go on and enjoy this delicious inventory."
),
};
harmony_inventory_agent::local_presence::discover_agents(
self.score.discovery_timeout,
|event: DiscoveryEvent| -> Result<(), String> {

View File

@@ -1,67 +0,0 @@
use async_trait::async_trait;
use derive_new::new;
use serde::Serialize;
use crate::{
data::Version,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::Topology,
};
use harmony_types::id::Id;
#[derive(Debug, new, Clone, Serialize)]
pub struct IpxeScore {
//files_to_serve: Url,
}
impl<T: Topology> Score<T> for IpxeScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(IpxeInterpret::new(self.clone()))
}
fn name(&self) -> String {
"IpxeScore".to_string()
}
}
#[derive(Debug, new, Clone)]
pub struct IpxeInterpret {
_score: IpxeScore,
}
#[async_trait]
impl<T: Topology> Interpret<T> for IpxeInterpret {
async fn execute(
&self,
_inventory: &Inventory,
_topology: &T,
) -> Result<Outcome, InterpretError> {
/*
let http_server = &topology.http_server;
http_server.ensure_initialized().await?;
Ok(Outcome::success(format!(
"Http Server running and serving files from {}",
self.score.files_to_serve
)))
*/
todo!();
}
fn get_name(&self) -> InterpretName {
InterpretName::Ipxe
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -6,7 +6,6 @@ pub mod dummy;
pub mod helm;
pub mod http;
pub mod inventory;
pub mod ipxe;
pub mod k3d;
pub mod k8s;
pub mod lamp;

View File

@@ -0,0 +1,120 @@
use async_trait::async_trait;
use derive_new::new;
use harmony_types::id::Id;
use log::{error, info, warn};
use serde::Serialize;
use crate::{
data::Version,
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::inventory::{DiscoverHostForRoleScore, LaunchDiscoverInventoryAgentScore},
score::Score,
topology::HAClusterTopology,
};
// -------------------------------------------------------------------------------------------------
// Step 01: Inventory (default PXE + Kickstart in RAM + Rust agent)
// - This score exposes/ensures the default inventory assets and waits for discoveries.
// - No early bonding. Simple access DHCP.
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDSetup01InventoryScore {}
impl Score<HAClusterTopology> for OKDSetup01InventoryScore {
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDSetup01InventoryInterpret::new(self.clone()))
}
fn name(&self) -> String {
"OKDSetup01InventoryScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct OKDSetup01InventoryInterpret {
score: OKDSetup01InventoryScore,
version: Version,
status: InterpretStatus,
}
impl OKDSetup01InventoryInterpret {
pub fn new(score: OKDSetup01InventoryScore) -> Self {
let version = Version::from("1.0.0").unwrap();
Self {
version,
score,
status: InterpretStatus::QUEUED,
}
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDSetup01InventoryInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDSetup01Inventory")
}
fn get_version(&self) -> Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
async fn execute(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
info!("Setting up base DNS config for OKD");
let cluster_domain = &topology.domain_name;
let load_balancer_ip = &topology.load_balancer.get_ip();
inquire::Confirm::new(&format!(
"Set hostnames manually in your opnsense dnsmasq config :
*.apps.{cluster_domain} -> {load_balancer_ip}
api.{cluster_domain} -> {load_balancer_ip}
api-int.{cluster_domain} -> {load_balancer_ip}
When you can dig them, confirm to continue.
"
))
.prompt()
.expect("Prompt error");
// TODO reactivate automatic dns config when migration from unbound to dnsmasq is done
// OKDDnsScore::new(topology)
// .interpret(inventory, topology)
// .await?;
// TODO refactor this section into a function discover_hosts_for_role(...) that can be used
// from anywhere in the project, not a member of this struct
let mut bootstrap_host: Option<PhysicalHost> = None;
let repo = InventoryRepositoryFactory::build().await?;
while bootstrap_host.is_none() {
let hosts = repo.get_host_for_role(&HostRole::Bootstrap).await?;
bootstrap_host = hosts.into_iter().next().to_owned();
DiscoverHostForRoleScore {
role: HostRole::Bootstrap,
}
.interpret(inventory, topology)
.await?;
}
Ok(Outcome::new(
InterpretStatus::SUCCESS,
format!(
"Found and assigned bootstrap node: {}",
bootstrap_host.unwrap().summary()
),
))
}
}

View File

@@ -0,0 +1,387 @@
use std::{fmt::Write, path::PathBuf};
use async_trait::async_trait;
use derive_new::new;
use harmony_secret::SecretManager;
use harmony_types::id::Id;
use log::{debug, error, info, warn};
use serde::{Deserialize, Serialize};
use tokio::{fs::File, io::AsyncWriteExt, process::Command};
use crate::{
config::secret::{RedhatSecret, SshKeyPair},
data::{FileContent, FilePath, Version},
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
instrumentation::{HarmonyEvent, instrument},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::{
dhcp::DhcpHostBindingScore,
http::{IPxeMacBootFileScore, StaticFilesHttpScore},
inventory::LaunchDiscoverInventoryAgentScore,
okd::{
bootstrap_load_balancer::OKDBootstrapLoadBalancerScore,
templates::{BootstrapIpxeTpl, InstallConfigYaml},
},
},
score::Score,
topology::{HAClusterTopology, HostBinding},
};
// -------------------------------------------------------------------------------------------------
// Step 02: Bootstrap
// - Select bootstrap node (from discovered set).
// - Render per-MAC iPXE pointing to OKD 4.19 SCOS live assets + bootstrap ignition.
// - Reboot the host via SSH and wait for bootstrap-complete.
// - No bonding at this stage unless absolutely required; prefer persistence via MC later.
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDSetup02BootstrapScore {}
impl Score<HAClusterTopology> for OKDSetup02BootstrapScore {
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDSetup02BootstrapInterpret::new())
}
fn name(&self) -> String {
"OKDSetup02BootstrapScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct OKDSetup02BootstrapInterpret {
version: Version,
status: InterpretStatus,
}
impl OKDSetup02BootstrapInterpret {
pub fn new() -> Self {
let version = Version::from("1.0.0").unwrap();
Self {
version,
status: InterpretStatus::QUEUED,
}
}
async fn get_bootstrap_node(&self) -> Result<PhysicalHost, InterpretError> {
let repo = InventoryRepositoryFactory::build().await?;
match repo
.get_host_for_role(&HostRole::Bootstrap)
.await?
.into_iter()
.next()
{
Some(host) => Ok(host),
None => Err(InterpretError::new(
"No bootstrap node available".to_string(),
)),
}
}
async fn prepare_ignition_files(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<(), InterpretError> {
let okd_bin_path = PathBuf::from("./data/okd/bin");
let okd_installation_path_str =
format!("./data/okd/installation_files_{}", inventory.location.name);
let okd_images_path = &PathBuf::from("./data/okd/installer_image/");
let okd_installation_path = &PathBuf::from(okd_installation_path_str);
let exit_status = Command::new("mkdir")
.arg("-p")
.arg(okd_installation_path)
.spawn()
.expect("Command failed to start")
.wait()
.await
.map_err(|e| {
InterpretError::new(format!("Failed to create okd installation directory : {e}"))
})?;
if !exit_status.success() {
return Err(InterpretError::new(format!(
"Failed to create okd installation directory"
)));
} else {
info!(
"Created OKD installation directory {}",
okd_installation_path.to_string_lossy()
);
}
let redhat_secret = SecretManager::get_or_prompt::<RedhatSecret>().await?;
let ssh_key = SecretManager::get_or_prompt::<SshKeyPair>().await?;
let install_config_yaml = InstallConfigYaml {
cluster_name: &topology.get_cluster_name(),
cluster_domain: &topology.get_cluster_base_domain(),
pull_secret: &redhat_secret.pull_secret,
ssh_public_key: &ssh_key.public,
}
.to_string();
let install_config_file_path = &okd_installation_path.join("install-config.yaml");
self.create_file(install_config_file_path, install_config_yaml.as_bytes())
.await?;
let install_config_backup_extension = install_config_file_path
.extension()
.map(|e| format!("{}.bak", e.to_string_lossy()))
.unwrap_or("bak".to_string());
let mut install_config_backup = install_config_file_path.clone();
install_config_backup.set_extension(install_config_backup_extension);
self.create_file(&install_config_backup, install_config_yaml.as_bytes())
.await?;
info!("Creating manifest files with openshift-install");
let output = Command::new(okd_bin_path.join("openshift-install"))
.args([
"create",
"manifests",
"--dir",
okd_installation_path.to_str().unwrap(),
])
.output()
.await
.map_err(|e| InterpretError::new(format!("Failed to create okd manifest : {e}")))?;
let stdout = String::from_utf8(output.stdout).unwrap();
info!("openshift-install stdout :\n\n{}", stdout);
let stderr = String::from_utf8(output.stderr).unwrap();
info!("openshift-install stderr :\n\n{}", stderr);
info!("openshift-install exit status : {}", output.status);
if !output.status.success() {
return Err(InterpretError::new(format!(
"Failed to create okd manifest, exit code {} : {}",
output.status, stderr
)));
}
info!("Creating ignition files with openshift-install");
let output = Command::new(okd_bin_path.join("openshift-install"))
.args([
"create",
"ignition-configs",
"--dir",
okd_installation_path.to_str().unwrap(),
])
.output()
.await
.map_err(|e| {
InterpretError::new(format!("Failed to create okd ignition config : {e}"))
})?;
let stdout = String::from_utf8(output.stdout).unwrap();
info!("openshift-install stdout :\n\n{}", stdout);
let stderr = String::from_utf8(output.stderr).unwrap();
info!("openshift-install stderr :\n\n{}", stderr);
info!("openshift-install exit status : {}", output.status);
if !output.status.success() {
return Err(InterpretError::new(format!(
"Failed to create okd manifest, exit code {} : {}",
output.status, stderr
)));
}
let ignition_files_http_path = PathBuf::from("okd_ignition_files");
let prepare_file_content = async |filename: &str| -> Result<FileContent, InterpretError> {
let local_path = okd_installation_path.join(filename);
let remote_path = ignition_files_http_path.join(filename);
info!(
"Preparing file content for local file : {} to remote : {}",
local_path.to_string_lossy(),
remote_path.to_string_lossy()
);
let content = tokio::fs::read_to_string(&local_path).await.map_err(|e| {
InterpretError::new(format!(
"Could not read file content {} : {e}",
local_path.to_string_lossy()
))
})?;
Ok(FileContent {
path: FilePath::Relative(remote_path.to_string_lossy().to_string()),
content,
})
};
StaticFilesHttpScore {
remote_path: None,
folder_to_serve: None,
files: vec![
prepare_file_content("bootstrap.ign").await?,
prepare_file_content("master.ign").await?,
prepare_file_content("worker.ign").await?,
prepare_file_content("metadata.json").await?,
],
}
.interpret(inventory, topology)
.await?;
info!("Successfully prepared ignition files for OKD installation");
// ignition_files_http_path // = PathBuf::from("okd_ignition_files");
info!(
r#"Uploading images, they can be refreshed with a command similar to this one: openshift-install coreos print-stream-json | grep -Eo '"https.*(kernel.|initramfs.|rootfs.)\w+(\.img)?"' | grep x86_64 | xargs -n 1 curl -LO"#
);
inquire::Confirm::new(
&format!("push installer image files with `scp -r {}/* root@{}:/usr/local/http/scos/` until performance issue is resolved", okd_images_path.to_string_lossy(), topology.http_server.get_ip())).prompt().expect("Prompt error");
// let scos_http_path = PathBuf::from("scos");
// StaticFilesHttpScore {
// folder_to_serve: Some(Url::LocalFolder(
// okd_images_path.to_string_lossy().to_string(),
// )),
// remote_path: Some(scos_http_path.to_string_lossy().to_string()),
// files: vec![],
// }
// .interpret(inventory, topology)
// .await?;
Ok(())
}
async fn configure_host_binding(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<(), InterpretError> {
let binding = HostBinding {
logical_host: topology.bootstrap_host.clone(),
physical_host: self.get_bootstrap_node().await?,
};
info!("Configuring host binding for bootstrap node {binding:?}");
DhcpHostBindingScore {
host_binding: vec![binding],
domain: Some(topology.domain_name.clone()),
}
.interpret(inventory, topology)
.await?;
Ok(())
}
async fn render_per_mac_pxe(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<(), InterpretError> {
let content = BootstrapIpxeTpl {
http_ip: &topology.http_server.get_ip().to_string(),
scos_path: "scos", // TODO use some constant
ignition_http_path: "okd_ignition_files", // TODO use proper variable
installation_device: "/dev/sda",
ignition_file_name: "bootstrap.ign",
}
.to_string();
let bootstrap_node = self.get_bootstrap_node().await?;
let mac_address = bootstrap_node.get_mac_address();
info!("[Bootstrap] Rendering per-MAC PXE for bootstrap node");
debug!("bootstrap ipxe content : {content}");
debug!("bootstrap mac addresses : {mac_address:?}");
IPxeMacBootFileScore {
mac_address,
content,
}
.interpret(inventory, topology)
.await?;
Ok(())
}
async fn setup_bootstrap_load_balancer(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<(), InterpretError> {
let outcome = OKDBootstrapLoadBalancerScore::new(topology)
.interpret(inventory, topology)
.await?;
info!("Successfully executed OKDBootstrapLoadBalancerScore : {outcome:?}");
Ok(())
}
async fn reboot_target(&self) -> Result<(), InterpretError> {
// Placeholder: ssh reboot using the inventory ephemeral key
info!("[Bootstrap] Rebooting bootstrap node via SSH");
// TODO reboot programatically, there are some logical checks and refactoring to do such as
// accessing the bootstrap node config (ip address) from the inventory
let confirmation = inquire::Confirm::new(
"Now reboot the bootstrap node so it picks up its pxe boot file. Press enter when ready.",
)
.prompt()
.expect("Unexpected prompt error");
Ok(())
}
async fn wait_for_bootstrap_complete(&self) -> Result<(), InterpretError> {
// Placeholder: wait-for bootstrap-complete
info!("[Bootstrap] Waiting for bootstrap-complete …");
todo!("[Bootstrap] Waiting for bootstrap-complete …")
}
async fn create_file(&self, path: &PathBuf, content: &[u8]) -> Result<(), InterpretError> {
let mut install_config_file = File::create(path).await.map_err(|e| {
InterpretError::new(format!(
"Could not create file {} : {e}",
path.to_string_lossy()
))
})?;
install_config_file.write(content).await.map_err(|e| {
InterpretError::new(format!(
"Could not write file {} : {e}",
path.to_string_lossy()
))
})?;
Ok(())
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDSetup02BootstrapInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDSetup02Bootstrap")
}
fn get_version(&self) -> Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
async fn execute(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
self.configure_host_binding(inventory, topology).await?;
self.prepare_ignition_files(inventory, topology).await?;
self.render_per_mac_pxe(inventory, topology).await?;
self.setup_bootstrap_load_balancer(inventory, topology)
.await?;
// TODO https://docs.okd.io/latest/installing/installing_bare_metal/upi/installing-bare-metal.html#installation-user-provisioned-validating-dns_installing-bare-metal
// self.validate_dns_config(inventory, topology).await?;
self.reboot_target().await?;
self.wait_for_bootstrap_complete().await?;
Ok(Outcome::new(
InterpretStatus::SUCCESS,
"Bootstrap phase complete".into(),
))
}
}

View File

@@ -0,0 +1,277 @@
use std::{fmt::Write, path::PathBuf};
use async_trait::async_trait;
use derive_new::new;
use harmony_types::id::Id;
use log::{debug, info};
use serde::Serialize;
use crate::{
data::Version,
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::{
dhcp::DhcpHostBindingScore, http::IPxeMacBootFileScore,
inventory::DiscoverHostForRoleScore, okd::templates::BootstrapIpxeTpl,
},
score::Score,
topology::{HAClusterTopology, HostBinding},
};
// -------------------------------------------------------------------------------------------------
// Step 03: Control Plane
// - Render per-MAC PXE & ignition for cp0/cp1/cp2.
// - Persist bonding via MachineConfigs (or NNCP) once SCOS is active.
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDSetup03ControlPlaneScore {}
impl Score<HAClusterTopology> for OKDSetup03ControlPlaneScore {
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDSetup03ControlPlaneInterpret::new(self.clone()))
}
fn name(&self) -> String {
"OKDSetup03ControlPlaneScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct OKDSetup03ControlPlaneInterpret {
score: OKDSetup03ControlPlaneScore,
version: Version,
status: InterpretStatus,
}
impl OKDSetup03ControlPlaneInterpret {
pub fn new(score: OKDSetup03ControlPlaneScore) -> Self {
let version = Version::from("1.0.0").unwrap();
Self {
version,
score,
status: InterpretStatus::QUEUED,
}
}
/// Ensures that three physical hosts are discovered and available for the ControlPlane role.
/// It will trigger discovery if not enough hosts are found.
async fn get_nodes(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<Vec<PhysicalHost>, InterpretError> {
const REQUIRED_HOSTS: usize = 3;
let repo = InventoryRepositoryFactory::build().await?;
let mut control_plane_hosts = repo.get_host_for_role(&HostRole::ControlPlane).await?;
while control_plane_hosts.len() < REQUIRED_HOSTS {
info!(
"Discovery of {} control plane hosts in progress, current number {}",
REQUIRED_HOSTS,
control_plane_hosts.len()
);
// This score triggers the discovery agent for a specific role.
DiscoverHostForRoleScore {
role: HostRole::ControlPlane,
}
.interpret(inventory, topology)
.await?;
control_plane_hosts = repo.get_host_for_role(&HostRole::ControlPlane).await?;
}
if control_plane_hosts.len() < REQUIRED_HOSTS {
Err(InterpretError::new(format!(
"OKD Requires at least {} control plane hosts, but only found {}. Cannot proceed.",
REQUIRED_HOSTS,
control_plane_hosts.len()
)))
} else {
// Take exactly the number of required hosts to ensure consistency.
Ok(control_plane_hosts
.into_iter()
.take(REQUIRED_HOSTS)
.collect())
}
}
/// Configures DHCP host bindings for all control plane nodes.
async fn configure_host_binding(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!("[ControlPlane] Configuring host bindings for control plane nodes.");
// Ensure the topology definition matches the number of physical nodes found.
if topology.control_plane.len() != nodes.len() {
return Err(InterpretError::new(format!(
"Mismatch between logical control plane hosts defined in topology ({}) and physical nodes found ({}).",
topology.control_plane.len(),
nodes.len()
)));
}
// Create a binding for each physical host to its corresponding logical host.
let bindings: Vec<HostBinding> = topology
.control_plane
.iter()
.zip(nodes.iter())
.map(|(logical_host, physical_host)| {
info!(
"Creating binding: Logical Host '{}' -> Physical Host ID '{}'",
logical_host.name, physical_host.id
);
HostBinding {
logical_host: logical_host.clone(),
physical_host: physical_host.clone(),
}
})
.collect();
DhcpHostBindingScore {
host_binding: bindings,
domain: Some(topology.domain_name.clone()),
}
.interpret(inventory, topology)
.await?;
Ok(())
}
/// Renders and deploys a per-MAC iPXE boot file for each control plane node.
async fn configure_ipxe(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!("[ControlPlane] Rendering per-MAC iPXE configurations.");
// The iPXE script content is the same for all control plane nodes,
// pointing to the 'master.ign' ignition file.
let content = BootstrapIpxeTpl {
http_ip: &topology.http_server.get_ip().to_string(),
scos_path: "scos",
ignition_http_path: "okd_ignition_files",
installation_device: "/dev/sda", // This might need to be configurable per-host in the future
ignition_file_name: "master.ign", // Control plane nodes use the master ignition file
}
.to_string();
debug!("[ControlPlane] iPXE content template:\n{}", content);
// Create and apply an iPXE boot file for each node.
for node in nodes {
let mac_address = node.get_mac_address();
if mac_address.is_empty() {
return Err(InterpretError::new(format!(
"Physical host with ID '{}' has no MAC addresses defined.",
node.id
)));
}
info!(
"[ControlPlane] Applying iPXE config for node ID '{}' with MACs: {:?}",
node.id, mac_address
);
IPxeMacBootFileScore {
mac_address,
content: content.clone(),
}
.interpret(inventory, topology)
.await?;
}
Ok(())
}
/// Prompts the user to reboot the target control plane nodes.
async fn reboot_targets(&self, nodes: &Vec<PhysicalHost>) -> Result<(), InterpretError> {
let node_ids: Vec<String> = nodes.iter().map(|n| n.id.to_string()).collect();
info!(
"[ControlPlane] Requesting reboot for control plane nodes: {:?}",
node_ids
);
let confirmation = inquire::Confirm::new(
&format!("Please reboot the {} control plane nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), node_ids.join(", ")),
)
.prompt()
.map_err(|e| InterpretError::new(format!("User prompt failed: {}", e)))?;
if !confirmation {
return Err(InterpretError::new(
"User aborted the operation.".to_string(),
));
}
Ok(())
}
/// Placeholder for automating network bonding configuration.
async fn persist_network_bond(&self) -> Result<(), InterpretError> {
// Generate MC or NNCP from inventory NIC data; apply via ignition or post-join.
info!("[ControlPlane] Ensuring persistent bonding via MachineConfig/NNCP");
inquire::Confirm::new(
"Network configuration for control plane nodes is not automated yet. Configure it manually if needed.",
)
.prompt()
.map_err(|e| InterpretError::new(format!("User prompt failed: {}", e)))?;
Ok(())
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDSetup03ControlPlaneInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDSetup03ControlPlane")
}
fn get_version(&self) -> Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
async fn execute(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
// 1. Ensure we have 3 physical hosts for the control plane.
let nodes = self.get_nodes(inventory, topology).await?;
// 2. Create DHCP reservations for the control plane nodes.
self.configure_host_binding(inventory, topology, &nodes)
.await?;
// 3. Create iPXE files for each control plane node to boot from the master ignition.
self.configure_ipxe(inventory, topology, &nodes).await?;
// 4. Reboot the nodes to start the OS installation.
self.reboot_targets(&nodes).await?;
// 5. Placeholder for post-boot network configuration (e.g., bonding).
self.persist_network_bond().await?;
// TODO: Implement a step to wait for the control plane nodes to join the cluster
// and for the cluster operators to become available. This would be similar to
// the `wait-for bootstrap-complete` command.
info!("[ControlPlane] Provisioning initiated. Monitor the cluster convergence manually.");
Ok(Outcome::new(
InterpretStatus::SUCCESS,
"Control plane provisioning has been successfully initiated.".into(),
))
}
}

View File

@@ -0,0 +1,102 @@
use std::{fmt::Write, path::PathBuf};
use async_trait::async_trait;
use derive_new::new;
use harmony_secret::SecretManager;
use harmony_types::id::Id;
use log::{debug, error, info, warn};
use serde::{Deserialize, Serialize};
use tokio::{fs::File, io::AsyncWriteExt, process::Command};
use crate::{
config::secret::{RedhatSecret, SshKeyPair},
data::{FileContent, FilePath, Version},
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
instrumentation::{HarmonyEvent, instrument},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::{
dhcp::DhcpHostBindingScore,
http::{IPxeMacBootFileScore, StaticFilesHttpScore},
inventory::LaunchDiscoverInventoryAgentScore,
okd::{
bootstrap_load_balancer::OKDBootstrapLoadBalancerScore,
templates::{BootstrapIpxeTpl, InstallConfigYaml},
},
},
score::Score,
topology::{HAClusterTopology, HostBinding},
};
// -------------------------------------------------------------------------------------------------
// Step 04: Workers
// - Render per-MAC PXE & ignition for workers; join nodes.
// - Persist bonding via MC/NNCP as required (same approach as masters).
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDSetup04WorkersScore {}
impl Score<HAClusterTopology> for OKDSetup04WorkersScore {
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDSetup04WorkersInterpret::new(self.clone()))
}
fn name(&self) -> String {
"OKDSetup04WorkersScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct OKDSetup04WorkersInterpret {
score: OKDSetup04WorkersScore,
version: Version,
status: InterpretStatus,
}
impl OKDSetup04WorkersInterpret {
pub fn new(score: OKDSetup04WorkersScore) -> Self {
let version = Version::from("1.0.0").unwrap();
Self {
version,
score,
status: InterpretStatus::QUEUED,
}
}
async fn render_and_reboot(&self) -> Result<(), InterpretError> {
info!("[Workers] Rendering per-MAC PXE for workers and rebooting");
Ok(())
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDSetup04WorkersInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDSetup04Workers")
}
fn get_version(&self) -> Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
async fn execute(
&self,
_inventory: &Inventory,
_topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
self.render_and_reboot().await?;
Ok(Outcome::new(
InterpretStatus::SUCCESS,
"Workers provisioned".into(),
))
}
}

View File

@@ -0,0 +1,101 @@
use std::{fmt::Write, path::PathBuf};
use async_trait::async_trait;
use derive_new::new;
use harmony_secret::SecretManager;
use harmony_types::id::Id;
use log::{debug, error, info, warn};
use serde::{Deserialize, Serialize};
use tokio::{fs::File, io::AsyncWriteExt, process::Command};
use crate::{
config::secret::{RedhatSecret, SshKeyPair},
data::{FileContent, FilePath, Version},
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
instrumentation::{HarmonyEvent, instrument},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::{
dhcp::DhcpHostBindingScore,
http::{IPxeMacBootFileScore, StaticFilesHttpScore},
inventory::LaunchDiscoverInventoryAgentScore,
okd::{
bootstrap_load_balancer::OKDBootstrapLoadBalancerScore,
templates::{BootstrapIpxeTpl, InstallConfigYaml},
},
},
score::Score,
topology::{HAClusterTopology, HostBinding},
};
// -------------------------------------------------------------------------------------------------
// Step 05: Sanity Check
// - Validate API reachability, ClusterOperators, ingress, and SDN status.
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDSetup05SanityCheckScore {}
impl Score<HAClusterTopology> for OKDSetup05SanityCheckScore {
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDSetup05SanityCheckInterpret::new(self.clone()))
}
fn name(&self) -> String {
"OKDSetup05SanityCheckScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct OKDSetup05SanityCheckInterpret {
score: OKDSetup05SanityCheckScore,
version: Version,
status: InterpretStatus,
}
impl OKDSetup05SanityCheckInterpret {
pub fn new(score: OKDSetup05SanityCheckScore) -> Self {
let version = Version::from("1.0.0").unwrap();
Self {
version,
score,
status: InterpretStatus::QUEUED,
}
}
async fn run_checks(&self) -> Result<(), InterpretError> {
info!("[Sanity] Checking API, COs, Ingress, and SDN health …");
Ok(())
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDSetup05SanityCheckInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDSetup05SanityCheck")
}
fn get_version(&self) -> Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
async fn execute(
&self,
_inventory: &Inventory,
_topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
self.run_checks().await?;
Ok(Outcome::new(
InterpretStatus::SUCCESS,
"Sanity checks passed".into(),
))
}
}

View File

@@ -0,0 +1,101 @@
// -------------------------------------------------------------------------------------------------
use async_trait::async_trait;
use derive_new::new;
use harmony_secret::SecretManager;
use harmony_types::id::Id;
use log::{debug, error, info, warn};
use serde::{Deserialize, Serialize};
use std::{fmt::Write, path::PathBuf};
use tokio::{fs::File, io::AsyncWriteExt, process::Command};
use crate::{
config::secret::{RedhatSecret, SshKeyPair},
data::{FileContent, FilePath, Version},
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
instrumentation::{HarmonyEvent, instrument},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::{
dhcp::DhcpHostBindingScore,
http::{IPxeMacBootFileScore, StaticFilesHttpScore},
inventory::LaunchDiscoverInventoryAgentScore,
okd::{
bootstrap_load_balancer::OKDBootstrapLoadBalancerScore,
templates::{BootstrapIpxeTpl, InstallConfigYaml},
},
},
score::Score,
topology::{HAClusterTopology, HostBinding},
};
// Step 06: Installation Report
// - Emit JSON and concise human summary of nodes, roles, versions, and health.
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDSetup06InstallationReportScore {}
impl Score<HAClusterTopology> for OKDSetup06InstallationReportScore {
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDSetup06InstallationReportInterpret::new(self.clone()))
}
fn name(&self) -> String {
"OKDSetup06InstallationReportScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct OKDSetup06InstallationReportInterpret {
score: OKDSetup06InstallationReportScore,
version: Version,
status: InterpretStatus,
}
impl OKDSetup06InstallationReportInterpret {
pub fn new(score: OKDSetup06InstallationReportScore) -> Self {
let version = Version::from("1.0.0").unwrap();
Self {
version,
score,
status: InterpretStatus::QUEUED,
}
}
async fn generate(&self) -> Result<(), InterpretError> {
info!("[Report] Generating OKD installation report",);
Ok(())
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDSetup06InstallationReportInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDSetup06InstallationReport")
}
fn get_version(&self) -> Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
async fn execute(
&self,
_inventory: &Inventory,
_topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
self.generate().await?;
Ok(Outcome::new(
InterpretStatus::SUCCESS,
"Installation report generated".into(),
))
}
}

View File

@@ -37,21 +37,23 @@ impl OKDBootstrapDhcpScore {
.clone(),
});
// TODO refactor this so it is not copy pasted from dhcp.rs
Self {
dhcp_score: DhcpScore::new(
host_binding,
// TODO : we should add a tftp server to the topology instead of relying on the
// router address, this is leaking implementation details
Some(topology.router.get_gateway()),
None, // To allow UEFI boot we cannot provide a legacy file
Some("undionly.kpxe".to_string()),
Some("ipxe.efi".to_string()),
Some(format!(
"http://{}:8080/boot.ipxe",
topology.router.get_gateway()
)),
),
}
todo!("Add dhcp range")
// Self {
// dhcp_score: DhcpScore::new(
// host_binding,
// // TODO : we should add a tftp server to the topology instead of relying on the
// // router address, this is leaking implementation details
// Some(topology.router.get_gateway()),
// None, // To allow UEFI boot we cannot provide a legacy file
// Some("undionly.kpxe".to_string()),
// Some("ipxe.efi".to_string()),
// Some(format!(
// "http://{}:8080/boot.ipxe",
// topology.router.get_gateway()
// )),
// (self.),
// ),
// }
}
}

View File

@@ -8,7 +8,7 @@ use crate::{
score::Score,
topology::{
BackendServer, HAClusterTopology, HealthCheck, HttpMethod, HttpStatusCode, LoadBalancer,
LoadBalancerService, Topology,
LoadBalancerService, SSL, Topology,
},
};
@@ -44,6 +44,7 @@ impl OKDBootstrapLoadBalancerScore {
"/readyz".to_string(),
HttpMethod::GET,
HttpStatusCode::Success2xx,
SSL::SSL,
)),
},
];
@@ -54,6 +55,7 @@ impl OKDBootstrapLoadBalancerScore {
},
}
}
fn topology_to_backend_server(topology: &HAClusterTopology, port: u16) -> Vec<BackendServer> {
let mut backend: Vec<_> = topology
.control_plane
@@ -63,6 +65,14 @@ impl OKDBootstrapLoadBalancerScore {
port,
})
.collect();
topology.workers.iter().for_each(|worker| {
backend.push(BackendServer {
address: worker.ip.to_string(),
port,
})
});
backend.push(BackendServer {
address: topology.bootstrap_host.ip.to_string(),
port,

View File

@@ -1,3 +1,6 @@
use std::net::Ipv4Addr;
use harmony_types::net::IpAddress;
use serde::Serialize;
use crate::{
@@ -44,6 +47,16 @@ impl OKDDhcpScore {
})
});
let dhcp_server_ip = match topology.dhcp_server.get_ip() {
std::net::IpAddr::V4(ipv4_addr) => ipv4_addr,
std::net::IpAddr::V6(_ipv6_addr) => todo!("Support ipv6 someday"),
};
// TODO this could overflow, we should use proper subnet maths here instead of an ip
// address and guessing the subnet size from there
let start = Ipv4Addr::from(u32::from(dhcp_server_ip) + 100);
let end = Ipv4Addr::from(u32::from(dhcp_server_ip) + 150);
Self {
// TODO : we should add a tftp server to the topology instead of relying on the
// router address, this is leaking implementation details
@@ -57,6 +70,8 @@ impl OKDDhcpScore {
"http://{}:8080/boot.ipxe",
topology.router.get_gateway()
)),
dhcp_range: (IpAddress::from(start), IpAddress::from(end)),
domain: Some(topology.domain_name.clone()),
},
}
}

View File

@@ -0,0 +1,73 @@
//! OKDInstallationScore
//!
//! Overview
//! --------
//! OKDInstallationScore orchestrates an end-to-end, bare-metal OKD (OpenShift/OKD 4.19).
//! It follows principles of “discovery-first, then provision” strategy with strict ordering,
//! observable progress, and minimal assumptions about the underlying network.
//!
//! High-level flow
//! 1) OKDSetup01Inventory
//! - Serve default iPXE + Kickstart (in-RAM CentOS Stream 9) for discovery only.
//! - Enable SSH with the clusters pubkey, start a Rust inventory agent.
//! - Harmony discovers nodes by scraping the agent endpoint and collects MACs/NICs.
//!
//! 2) OKDSetup02Bootstrap
//! - User selects which discovered node becomes bootstrap.
//! - Prepare the OKD cluster installation files
//! - Render per-MAC iPXE for bootstrap with OKD 4.19 SCOS live assets + ignition.
//! - Reboot node via SSH; install bootstrap; wait for bootstrap-complete.
//!
//! 3) OKDSetup03ControlPlane
//! - Render per-MAC iPXE for cp0/cp1/cp2 with ignition. Reboot via SSH, join masters.
//! - Configure network bond (where relevant) using OKD NMState MachineConfig
//!
//! 4) OKDSetup04Workers
//! - Render per-MAC iPXE for worker set; join workers.
//! - Configure network bond (where relevant) using OKD NMState MachineConfig
//!
//! 5) OKDSetup05SanityCheck
//! - Validate API/ingress/clusteroperators; ensure healthy control plane and SDN.
//!
//! 6) OKDSetup06InstallationReport
//! - Produce a concise, machine-readable report (JSON) and a human summary.
//!
//! Network notes
//! - During Inventory: ports must be simple access (no LACP). DHCP succeeds; iPXE
//! loads CentOS Stream live with Kickstart and starts the inventory endpoint.
//! - During Provisioning: only after SCOS is on disk and Ignition/MC can be applied
//! do we set the bond persistently. If early bonding is truly required on a host,
//! use kernel args selectively in the per-MAC PXE for that host, but never for the
//! generic discovery path.
//! - This is caused by the inherent race condition between PXE, which cannot perform
//! its DHCP recovery process on a bonded network, and the bond configuration itself,
//! which must be configured on host AND switch to connect properly.
//!
//! Configuration knobs
//! - public_domain: External wildcard/apps domain (e.g., apps.example.com).
//! - internal_domain: Internal cluster domain (e.g., cluster.local or harmony.mcd).
use crate::{
modules::okd::{
OKDSetup01InventoryScore, OKDSetup02BootstrapScore, OKDSetup03ControlPlaneScore,
OKDSetup04WorkersScore, OKDSetup05SanityCheckScore,
bootstrap_06_installation_report::OKDSetup06InstallationReportScore,
},
score::Score,
topology::HAClusterTopology,
};
pub struct OKDInstallationPipeline;
impl OKDInstallationPipeline {
pub async fn get_all_scores() -> Vec<Box<dyn Score<HAClusterTopology>>> {
vec![
Box::new(OKDSetup01InventoryScore::new()),
Box::new(OKDSetup02BootstrapScore::new()),
Box::new(OKDSetup03ControlPlaneScore::new()),
Box::new(OKDSetup04WorkersScore::new()),
Box::new(OKDSetup05SanityCheckScore::new()),
Box::new(OKDSetup06InstallationReportScore::new()),
]
}
}

View File

@@ -1,9 +1,9 @@
use askama::Template;
use async_trait::async_trait;
use derive_new::new;
use harmony_types::net::Url;
use harmony_types::net::{IpAddress, Url};
use serde::Serialize;
use std::net::IpAddr;
use std::net::{IpAddr, Ipv4Addr};
use crate::{
data::{FileContent, FilePath, Version},
@@ -16,29 +16,31 @@ use crate::{
use harmony_types::id::Id;
#[derive(Debug, new, Clone, Serialize)]
pub struct OkdIpxeScore {
pub struct OKDIpxeScore {
pub kickstart_filename: String,
pub harmony_inventory_agent: String,
pub cluster_pubkey_filename: String,
pub cluster_pubkey: FileContent,
}
impl<T: Topology + DhcpServer + TftpServer + HttpServer + Router> Score<T> for OkdIpxeScore {
impl<T: Topology + DhcpServer + TftpServer + HttpServer + Router> Score<T> for OKDIpxeScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(IpxeInterpret::new(self.clone()))
Box::new(OKDIpxeInterpret::new(self.clone()))
}
fn name(&self) -> String {
"OkdIpxeScore".to_string()
"OKDipxeScore".to_string()
}
}
#[derive(Debug, new, Clone)]
pub struct IpxeInterpret {
score: OkdIpxeScore,
pub struct OKDIpxeInterpret {
score: OKDIpxeScore,
}
#[async_trait]
impl<T: Topology + DhcpServer + TftpServer + HttpServer + Router> Interpret<T> for IpxeInterpret {
impl<T: Topology + DhcpServer + TftpServer + HttpServer + Router> Interpret<T>
for OKDIpxeInterpret
{
async fn execute(
&self,
inventory: &Inventory,
@@ -46,19 +48,32 @@ impl<T: Topology + DhcpServer + TftpServer + HttpServer + Router> Interpret<T> f
) -> Result<Outcome, InterpretError> {
let gateway_ip = topology.get_gateway();
let dhcp_server_ip = match DhcpServer::get_ip(topology) {
std::net::IpAddr::V4(ipv4_addr) => ipv4_addr,
std::net::IpAddr::V6(_ipv6_addr) => todo!("Support ipv6 someday"),
};
// TODO this could overflow, we should use proper subnet maths here instead of an ip
// address and guessing the subnet size from there
let start = Ipv4Addr::from(u32::from(dhcp_server_ip) + 100);
let end = Ipv4Addr::from(u32::from(dhcp_server_ip) + 150);
let scores: Vec<Box<dyn Score<T>>> = vec![
Box::new(DhcpScore {
host_binding: vec![],
domain: None,
next_server: Some(topology.get_gateway()),
boot_filename: None,
filename: Some("undionly.kpxe".to_string()),
filename64: Some("ipxe.efi".to_string()),
filenameipxe: Some(format!("http://{gateway_ip}:8080/boot.ipxe").to_string()),
dhcp_range: (IpAddress::from(start), IpAddress::from(end)),
}),
Box::new(TftpScore {
files_to_serve: Url::LocalFolder("./data/pxe/okd/tftpboot/".to_string()),
}),
Box::new(StaticFilesHttpScore {
remote_path: None,
// TODO The current russh based copy is way too slow, check for a lib update or use scp
// when available
//
@@ -80,7 +95,7 @@ impl<T: Topology + DhcpServer + TftpServer + HttpServer + Router> Interpret<T> f
content: InventoryKickstartTpl {
gateway_ip: &gateway_ip,
harmony_inventory_agent: &self.score.harmony_inventory_agent,
cluster_pubkey_filename: &self.score.cluster_pubkey_filename,
cluster_pubkey_filename: &self.score.cluster_pubkey.path.to_string(),
}
.to_string(),
},
@@ -92,6 +107,7 @@ impl<T: Topology + DhcpServer + TftpServer + HttpServer + Router> Interpret<T> f
}
.to_string(),
},
self.score.cluster_pubkey.clone(),
],
}),
];
@@ -107,6 +123,7 @@ impl<T: Topology + DhcpServer + TftpServer + HttpServer + Router> Interpret<T> f
Err(e) => return Err(e),
};
}
inquire::Confirm::new(&format!("Execute the copy : `scp -r data/pxe/okd/http_files/* root@{}:/usr/local/http/` and confirm when done to continue", HttpServer::get_ip(topology))).prompt().expect("Prompt error");
Ok(Outcome::success("Ipxe installed".to_string()))
}

View File

@@ -8,7 +8,7 @@ use crate::{
score::Score,
topology::{
BackendServer, HAClusterTopology, HealthCheck, HttpMethod, HttpStatusCode, LoadBalancer,
LoadBalancerService, Topology,
LoadBalancerService, SSL, Topology,
},
};
@@ -62,6 +62,7 @@ impl OKDLoadBalancerScore {
"/readyz".to_string(),
HttpMethod::GET,
HttpStatusCode::Success2xx,
SSL::SSL,
)),
},
];

View File

@@ -1,7 +1,21 @@
mod bootstrap_01_prepare;
mod bootstrap_02_bootstrap;
mod bootstrap_03_control_plane;
mod bootstrap_04_workers;
mod bootstrap_05_sanity_check;
mod bootstrap_06_installation_report;
pub mod bootstrap_dhcp;
pub mod bootstrap_load_balancer;
pub mod dhcp;
pub mod dns;
pub mod installation;
pub mod ipxe;
pub mod load_balancer;
pub mod templates;
pub mod upgrade;
pub use bootstrap_01_prepare::*;
pub use bootstrap_02_bootstrap::*;
pub use bootstrap_03_control_plane::*;
pub use bootstrap_04_workers::*;
pub use bootstrap_05_sanity_check::*;
pub use bootstrap_06_installation_report::*;

View File

@@ -0,0 +1,20 @@
use askama::Template;
#[derive(Template)]
#[template(path = "okd/install-config.yaml.j2")]
pub struct InstallConfigYaml<'a> {
pub cluster_domain: &'a str,
pub pull_secret: &'a str,
pub ssh_public_key: &'a str,
pub cluster_name: &'a str,
}
#[derive(Template)]
#[template(path = "okd/bootstrap.ipxe.j2")]
pub struct BootstrapIpxeTpl<'a> {
pub http_ip: &'a str,
pub scos_path: &'a str,
pub installation_device: &'a str,
pub ignition_http_path: &'a str,
pub ignition_file_name: &'static str,
}

View File

@@ -1,4 +1,4 @@
use std::{sync::Arc, time::Duration};
use std::sync::Arc;
use async_trait::async_trait;
use log::debug;

View File

@@ -1,6 +1,63 @@
#!ipxe
# iPXE Chainloading Script
#
# Attempts to load a host-specific configuration file. If that fails,
# it logs the failure, waits for a few seconds, and then attempts to
# load a generic fallback configuration.
# --- Configuration ---
set base-url http://{{ gateway_ip }}:8080
set hostfile ${base-url}/byMAC/01-${mac:hexhyp}.ipxe
set fallbackfile ${base-url}/fallback.ipxe
chain ${hostfile} || chain ${base-url}/fallback.ipxe
# --- Script Logic ---
echo
echo "========================================"
echo " iPXE Network Boot Initiated"
echo "========================================"
echo "Client MAC Address: ${mac}"
echo "Boot Server URL: ${base-url}"
echo
# --- Primary Boot Attempt ---
echo "--> Attempting to load host-specific script..."
echo " Location: ${hostfile}"
sleep 2
# The "&& exit ||" pattern works as follows:
# 1. iPXE attempts to 'chain' the hostfile.
# 2. If successful (returns 0), the "&& exit" part is executed, and this script terminates.
# 3. If it fails (returns non-zero), the "||" part is triggered, and execution continues below.
chain --autofree --replace ${hostfile} && exit ||
# --- Fallback Boot Attempt ---
# This part of the script is only reached if the 'chain ${hostfile}' command above failed.
echo
echo "--> Host-specific script not found or failed to load."
echo
echo
echo "--> Attempting to load fallback script..."
echo " Location: ${fallbackfile}"
sleep 8
chain --autofree --replace ${fallbackfile} && exit ||
# --- Final Failure ---
# This part is only reached if BOTH chain commands have failed.
echo
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
echo " FATAL: All boot scripts failed!"
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
echo "Could not load either the host-specific script or the fallback script."
echo "Dropping to iPXE shell for manual troubleshooting in 10 seconds."
sleep 8
shell
# A final exit is good practice, though 'shell' is a blocking command.
exit

View File

@@ -0,0 +1,52 @@
#!ipxe
# ==================================================================
# MAC-Specific Boot Script for CoreOS/FCOS Installation
# ==================================================================
# --- Configuration ---
set http_ip {{ http_ip }}
set scos_path {{ scos_path }}
set inst_dev {{ installation_device }}
set ign_path {{ ignition_http_path }}
set ign_file {{ ignition_file_name }}
# --- Derived Variables ---
set base-url http://${http_ip}:8080
set scos-base-url ${base-url}/${scos_path}
set ignition-url ${base-url}/${ign_path}/${ign_file}
# --- Pre-boot Logging & Verification ---
echo
echo "Starting MAC-specific installation..."
echo "--------------------------------------------------"
echo " Installation Device: ${inst_dev}"
echo " CoreOS Kernel URL: ${scos-base-url}/scos-live-kernel.x86_64"
echo " Ignition URL: ${ignition-url}"
echo "--------------------------------------------------"
echo "Waiting for 3 seconds before loading boot assets..."
sleep 3
# --- Load Boot Assets with Failure Checks ---
# The '|| goto failure' pattern provides a clean exit if any asset fails to load.
echo "Loading kernel..."
kernel ${scos-base-url}/scos-live-kernel.x86_64 initrd=main coreos.live.rootfs_url=${scos-base-url}/scos-live-rootfs.x86_64.img coreos.inst.install_dev=${inst_dev} coreos.inst.ignition_url=${ignition-url} || goto failure
echo "Loading initramfs..."
initrd --name main ${scos-base-url}/scos-live-initramfs.x86_64.img || goto failure
# --- Boot ---
echo "All assets loaded successfully. Starting boot process..."
boot || goto failure
# This part is never reached on successful boot.
# --- Failure Handling ---
:failure
echo
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
echo " ERROR: A boot component failed to load."
echo " Dropping to iPXE shell for manual debugging."
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
sleep 10
shell

View File

@@ -0,0 +1,24 @@
# Built from https://docs.okd.io/latest/installing/installing_bare_metal/upi/installing-bare-metal.html#installation-bare-metal-config-yaml_installing-bare-metal
apiVersion: v1
baseDomain: {{ cluster_domain }}
compute:
- hyperthreading: Enabled
name: worker
replicas: 0
controlPlane:
hyperthreading: Enabled
name: master
replicas: 3
metadata:
name: {{ cluster_name }}
networking:
clusterNetwork:
- cidr: 10.128.0.0/14
hostPrefix: 23
networkType: OVNKubernetes
serviceNetwork:
- 172.30.0.0/16
platform:
none: {}
pullSecret: '{{ pull_secret|safe }}'
sshKey: '{{ ssh_public_key }}'