harmony/harmony/src/domain/topology/ha_cluster.rs
Ian Letourneau 7368184917
All checks were successful
Run Check Script / check (pull_request) Successful in 1m30s
fix(ha_cluster): inject switch client for better testability
2025-10-22 15:12:53 -04:00

701 lines
23 KiB
Rust

use async_trait::async_trait;
use harmony_macros::ip;
use harmony_types::{
net::{MacAddress, Url},
switch::PortLocation,
};
use k8s_openapi::api::core::v1::Namespace;
use kube::api::ObjectMeta;
use log::debug;
use log::info;
use crate::data::FileContent;
use crate::executors::ExecutorError;
use crate::hardware::PhysicalHost;
use crate::modules::okd::crd::{
InstallPlanApproval, OperatorGroup, OperatorGroupSpec, Subscription, SubscriptionSpec,
nmstate::{self, NMState, NodeNetworkConfigurationPolicy, NodeNetworkConfigurationPolicySpec},
};
use crate::topology::PxeOptions;
use super::{
DHCPStaticEntry, DhcpServer, DnsRecord, DnsRecordType, DnsServer, Firewall, HostNetworkConfig,
HttpServer, IpAddress, K8sclient, LoadBalancer, LoadBalancerService, LogicalHost,
PreparationError, PreparationOutcome, Router, Switch, SwitchClient, SwitchError, TftpServer,
Topology, k8s::K8sClient,
};
use std::collections::BTreeMap;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct HAClusterTopology {
pub domain_name: String,
pub router: Arc<dyn Router>,
pub load_balancer: Arc<dyn LoadBalancer>,
pub firewall: Arc<dyn Firewall>,
pub dhcp_server: Arc<dyn DhcpServer>,
pub tftp_server: Arc<dyn TftpServer>,
pub http_server: Arc<dyn HttpServer>,
pub dns_server: Arc<dyn DnsServer>,
pub switch_client: Arc<dyn SwitchClient>,
pub bootstrap_host: LogicalHost,
pub control_plane: Vec<LogicalHost>,
pub workers: Vec<LogicalHost>,
}
#[async_trait]
impl Topology for HAClusterTopology {
fn name(&self) -> &str {
"HAClusterTopology"
}
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
debug!(
"ensure_ready, not entirely sure what it should do here, probably something like verify that the hosts are reachable and all services are up and ready."
);
Ok(PreparationOutcome::Noop)
}
}
#[async_trait]
impl K8sclient for HAClusterTopology {
async fn k8s_client(&self) -> Result<Arc<K8sClient>, String> {
Ok(Arc::new(
K8sClient::try_default().await.map_err(|e| e.to_string())?,
))
}
}
impl HAClusterTopology {
// TODO this is a hack to avoid refactoring
pub fn get_cluster_name(&self) -> String {
self.domain_name
.split(".")
.next()
.expect("Cluster domain name must not be empty")
.to_string()
}
pub fn get_cluster_base_domain(&self) -> String {
let base_domain = self
.domain_name
.strip_prefix(&self.get_cluster_name())
.expect("cluster domain must start with cluster name");
base_domain
.strip_prefix(".")
.unwrap_or(base_domain)
.to_string()
}
async fn ensure_nmstate_operator_installed(&self) -> Result<(), String> {
// FIXME: Find a way to check nmstate is already available (get pod -n openshift-nmstate)
debug!("Installing NMState operator...");
let k8s_client = self.k8s_client().await?;
let nmstate_namespace = Namespace {
metadata: ObjectMeta {
name: Some("openshift-nmstate".to_string()),
finalizers: Some(vec!["kubernetes".to_string()]),
..Default::default()
},
..Default::default()
};
debug!("Creating NMState namespace: {nmstate_namespace:#?}");
k8s_client
.apply(&nmstate_namespace, None)
.await
.map_err(|e| e.to_string())?;
let nmstate_operator_group = OperatorGroup {
metadata: ObjectMeta {
name: Some("openshift-nmstate".to_string()),
namespace: Some("openshift-nmstate".to_string()),
..Default::default()
},
spec: OperatorGroupSpec {
target_namespaces: vec!["openshift-nmstate".to_string()],
},
};
debug!("Creating NMState operator group: {nmstate_operator_group:#?}");
k8s_client
.apply(&nmstate_operator_group, None)
.await
.map_err(|e| e.to_string())?;
let nmstate_subscription = Subscription {
metadata: ObjectMeta {
name: Some("kubernetes-nmstate-operator".to_string()),
namespace: Some("openshift-nmstate".to_string()),
..Default::default()
},
spec: SubscriptionSpec {
channel: Some("stable".to_string()),
install_plan_approval: Some(InstallPlanApproval::Automatic),
name: "kubernetes-nmstate-operator".to_string(),
source: "redhat-operators".to_string(),
source_namespace: "openshift-marketplace".to_string(),
},
};
debug!("Subscribing to NMState Operator: {nmstate_subscription:#?}");
k8s_client
.apply(&nmstate_subscription, None)
.await
.map_err(|e| e.to_string())?;
let nmstate = NMState {
metadata: ObjectMeta {
name: Some("nmstate".to_string()),
..Default::default()
},
..Default::default()
};
debug!("Creating NMState: {nmstate:#?}");
k8s_client
.apply(&nmstate, None)
.await
.map_err(|e| e.to_string())?;
Ok(())
}
fn get_next_bond_id(&self) -> u8 {
42 // FIXME: Find a better way to declare the bond id
}
async fn configure_bond(
&self,
host: &PhysicalHost,
config: &HostNetworkConfig,
) -> Result<(), SwitchError> {
self.ensure_nmstate_operator_installed()
.await
.map_err(|e| {
SwitchError::new(format!(
"Can't configure bond, NMState operator not available: {e}"
))
})?;
let bond_config = self.create_bond_configuration(host, config);
debug!("Configuring bond for host {host:?}: {bond_config:#?}");
self.k8s_client()
.await
.unwrap()
.apply(&bond_config, None)
.await
.unwrap();
todo!()
}
fn create_bond_configuration(
&self,
host: &PhysicalHost,
config: &HostNetworkConfig,
) -> NodeNetworkConfigurationPolicy {
let host_name = host.id.clone();
let bond_id = self.get_next_bond_id();
let bond_name = format!("bond{bond_id}");
let mut bond_mtu: Option<u32> = None;
let mut bond_mac_address: Option<String> = None;
let mut bond_ports = Vec::new();
let mut interfaces: Vec<nmstate::InterfaceSpec> = Vec::new();
for switch_port in &config.switch_ports {
let interface_name = switch_port.interface.name.clone();
interfaces.push(nmstate::InterfaceSpec {
name: interface_name.clone(),
description: Some(format!("Member of bond {bond_name}")),
r#type: "ethernet".to_string(),
state: "up".to_string(),
mtu: Some(switch_port.interface.mtu),
mac_address: Some(switch_port.interface.mac_address.to_string()),
ipv4: Some(nmstate::IpStackSpec {
enabled: Some(false),
..Default::default()
}),
ipv6: Some(nmstate::IpStackSpec {
enabled: Some(false),
..Default::default()
}),
link_aggregation: None,
..Default::default()
});
bond_ports.push(interface_name);
// Use the first port's details for the bond mtu and mac address
if bond_mtu.is_none() {
bond_mtu = Some(switch_port.interface.mtu);
}
if bond_mac_address.is_none() {
bond_mac_address = Some(switch_port.interface.mac_address.to_string());
}
}
interfaces.push(nmstate::InterfaceSpec {
name: bond_name.clone(),
description: Some(format!("Network bond for host {host_name}")),
r#type: "bond".to_string(),
state: "up".to_string(),
mtu: bond_mtu,
mac_address: bond_mac_address,
ipv4: Some(nmstate::IpStackSpec {
dhcp: Some(true),
enabled: Some(true),
..Default::default()
}),
ipv6: Some(nmstate::IpStackSpec {
dhcp: Some(true),
autoconf: Some(true),
enabled: Some(true),
..Default::default()
}),
link_aggregation: Some(nmstate::BondSpec {
mode: "802.3ad".to_string(),
ports: bond_ports,
..Default::default()
}),
..Default::default()
});
NodeNetworkConfigurationPolicy {
metadata: ObjectMeta {
name: Some(format!("{host_name}-bond-config")),
..Default::default()
},
spec: NodeNetworkConfigurationPolicySpec {
node_selector: Some(BTreeMap::from([(
"kubernetes.io/hostname".to_string(),
host_name.to_string(),
)])),
desired_state: nmstate::DesiredStateSpec { interfaces },
},
}
}
async fn configure_port_channel(
&self,
host: &PhysicalHost,
config: &HostNetworkConfig,
) -> Result<(), SwitchError> {
debug!("Configuring port channel: {config:#?}");
let switch_ports = config.switch_ports.iter().map(|s| s.port.clone()).collect();
self.switch_client
.configure_port_channel(&format!("Harmony_{}", host.id), switch_ports)
.await
.map_err(|e| SwitchError::new(format!("Failed to configure switch: {e}")))?;
Ok(())
}
pub fn autoload() -> Self {
let dummy_infra = Arc::new(DummyInfra {});
let dummy_host = LogicalHost {
ip: ip!("0.0.0.0"),
name: "dummyhost".to_string(),
};
Self {
domain_name: "DummyTopology".to_string(),
router: dummy_infra.clone(),
load_balancer: dummy_infra.clone(),
firewall: dummy_infra.clone(),
dhcp_server: dummy_infra.clone(),
tftp_server: dummy_infra.clone(),
http_server: dummy_infra.clone(),
dns_server: dummy_infra.clone(),
switch_client: dummy_infra.clone(),
bootstrap_host: dummy_host,
control_plane: vec![],
workers: vec![],
}
}
}
#[async_trait]
impl DnsServer for HAClusterTopology {
async fn register_dhcp_leases(&self, register: bool) -> Result<(), ExecutorError> {
self.dns_server.register_dhcp_leases(register).await
}
async fn register_hosts(&self, hosts: Vec<DnsRecord>) -> Result<(), ExecutorError> {
self.dns_server.register_hosts(hosts).await
}
fn remove_record(&self, name: &str, record_type: DnsRecordType) -> Result<(), ExecutorError> {
self.dns_server.remove_record(name, record_type)
}
async fn list_records(&self) -> Vec<DnsRecord> {
self.dns_server.list_records().await
}
fn get_ip(&self) -> IpAddress {
self.dns_server.get_ip()
}
fn get_host(&self) -> LogicalHost {
self.dns_server.get_host()
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
self.dns_server.commit_config().await
}
}
#[async_trait]
impl LoadBalancer for HAClusterTopology {
fn get_ip(&self) -> IpAddress {
self.load_balancer.get_ip()
}
fn get_host(&self) -> LogicalHost {
self.load_balancer.get_host()
}
async fn add_service(&self, service: &LoadBalancerService) -> Result<(), ExecutorError> {
self.load_balancer.add_service(service).await
}
async fn remove_service(&self, service: &LoadBalancerService) -> Result<(), ExecutorError> {
self.load_balancer.remove_service(service).await
}
async fn list_services(&self) -> Vec<LoadBalancerService> {
self.load_balancer.list_services().await
}
async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
self.load_balancer.ensure_initialized().await
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
self.load_balancer.commit_config().await
}
async fn reload_restart(&self) -> Result<(), ExecutorError> {
self.load_balancer.reload_restart().await
}
}
#[async_trait]
impl DhcpServer for HAClusterTopology {
async fn add_static_mapping(&self, entry: &DHCPStaticEntry) -> Result<(), ExecutorError> {
self.dhcp_server.add_static_mapping(entry).await
}
async fn remove_static_mapping(&self, mac: &MacAddress) -> Result<(), ExecutorError> {
self.dhcp_server.remove_static_mapping(mac).await
}
async fn list_static_mappings(&self) -> Vec<(MacAddress, IpAddress)> {
self.dhcp_server.list_static_mappings().await
}
async fn set_pxe_options(&self, options: PxeOptions) -> Result<(), ExecutorError> {
self.dhcp_server.set_pxe_options(options).await
}
async fn set_dhcp_range(
&self,
start: &IpAddress,
end: &IpAddress,
) -> Result<(), ExecutorError> {
self.dhcp_server.set_dhcp_range(start, end).await
}
fn get_ip(&self) -> IpAddress {
self.dhcp_server.get_ip()
}
fn get_host(&self) -> LogicalHost {
self.dhcp_server.get_host()
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
self.dhcp_server.commit_config().await
}
}
#[async_trait]
impl TftpServer for HAClusterTopology {
async fn serve_files(&self, url: &Url) -> Result<(), ExecutorError> {
self.tftp_server.serve_files(url).await
}
fn get_ip(&self) -> IpAddress {
self.tftp_server.get_ip()
}
async fn set_ip(&self, ip: IpAddress) -> Result<(), ExecutorError> {
self.tftp_server.set_ip(ip).await
}
async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
self.tftp_server.ensure_initialized().await
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
self.tftp_server.commit_config().await
}
async fn reload_restart(&self) -> Result<(), ExecutorError> {
self.tftp_server.reload_restart().await
}
}
impl Router for HAClusterTopology {
fn get_gateway(&self) -> super::IpAddress {
self.router.get_gateway()
}
fn get_cidr(&self) -> cidr::Ipv4Cidr {
self.router.get_cidr()
}
fn get_host(&self) -> LogicalHost {
self.router.get_host()
}
}
#[async_trait]
impl HttpServer for HAClusterTopology {
async fn serve_files(
&self,
url: &Url,
remote_path: &Option<String>,
) -> Result<(), ExecutorError> {
self.http_server.serve_files(url, remote_path).await
}
async fn serve_file_content(&self, file: &FileContent) -> Result<(), ExecutorError> {
self.http_server.serve_file_content(file).await
}
fn get_ip(&self) -> IpAddress {
self.http_server.get_ip()
}
async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
self.http_server.ensure_initialized().await
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
self.http_server.commit_config().await
}
async fn reload_restart(&self) -> Result<(), ExecutorError> {
self.http_server.reload_restart().await
}
}
#[async_trait]
impl Switch for HAClusterTopology {
async fn setup_switch(&self) -> Result<(), SwitchError> {
self.switch_client.setup().await?;
Ok(())
}
async fn get_port_for_mac_address(
&self,
mac_address: &MacAddress,
) -> Result<Option<PortLocation>, SwitchError> {
let port = self.switch_client.find_port(mac_address).await?;
Ok(port)
}
async fn configure_host_network(
&self,
host: &PhysicalHost,
config: HostNetworkConfig,
) -> Result<(), SwitchError> {
self.configure_bond(host, &config).await?;
self.configure_port_channel(host, &config).await
}
}
#[derive(Debug)]
pub struct DummyInfra;
#[async_trait]
impl Topology for DummyInfra {
fn name(&self) -> &str {
"DummyInfra"
}
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
let dummy_msg = "This is a dummy infrastructure that does nothing";
info!("{dummy_msg}");
Ok(PreparationOutcome::Success {
details: dummy_msg.into(),
})
}
}
const UNIMPLEMENTED_DUMMY_INFRA: &str = "This is a dummy infrastructure, no operation is supported";
impl Router for DummyInfra {
fn get_gateway(&self) -> super::IpAddress {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_cidr(&self) -> cidr::Ipv4Cidr {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_host(&self) -> LogicalHost {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
}
impl Firewall for DummyInfra {
fn add_rule(
&mut self,
_rule: super::FirewallRule,
) -> Result<(), crate::executors::ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn remove_rule(&mut self, _rule_id: &str) -> Result<(), crate::executors::ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn list_rules(&self) -> Vec<super::FirewallRule> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_ip(&self) -> super::IpAddress {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_host(&self) -> LogicalHost {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
}
#[async_trait]
impl DhcpServer for DummyInfra {
async fn add_static_mapping(&self, _entry: &DHCPStaticEntry) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn remove_static_mapping(&self, _mac: &MacAddress) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn list_static_mappings(&self) -> Vec<(MacAddress, IpAddress)> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn set_pxe_options(&self, _options: PxeOptions) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn set_dhcp_range(
&self,
_start: &IpAddress,
_end: &IpAddress,
) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_ip(&self) -> IpAddress {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_host(&self) -> LogicalHost {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
}
#[async_trait]
impl LoadBalancer for DummyInfra {
fn get_ip(&self) -> IpAddress {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_host(&self) -> LogicalHost {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn add_service(&self, _service: &LoadBalancerService) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn remove_service(&self, _service: &LoadBalancerService) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn list_services(&self) -> Vec<LoadBalancerService> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn reload_restart(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
}
#[async_trait]
impl TftpServer for DummyInfra {
async fn serve_files(&self, _url: &Url) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_ip(&self) -> IpAddress {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn set_ip(&self, _ip: IpAddress) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn reload_restart(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
}
#[async_trait]
impl HttpServer for DummyInfra {
async fn serve_files(
&self,
_url: &Url,
_remote_path: &Option<String>,
) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn serve_file_content(&self, _file: &FileContent) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_ip(&self) -> IpAddress {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn reload_restart(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
}
#[async_trait]
impl DnsServer for DummyInfra {
async fn register_dhcp_leases(&self, _register: bool) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn register_hosts(&self, _hosts: Vec<DnsRecord>) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn remove_record(&self, _name: &str, _record_type: DnsRecordType) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn list_records(&self) -> Vec<DnsRecord> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_ip(&self) -> IpAddress {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
fn get_host(&self) -> LogicalHost {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
}
#[async_trait]
impl SwitchClient for DummyInfra {
async fn setup(&self) -> Result<(), SwitchError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn find_port(
&self,
_mac_address: &MacAddress,
) -> Result<Option<PortLocation>, SwitchError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
async fn configure_port_channel(
&self,
_channel_name: &str,
_switch_ports: Vec<PortLocation>,
) -> Result<u8, SwitchError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
}