From adc14c052d05b2ba6dd4a70d96f206f57ab9661a Mon Sep 17 00:00:00 2001 From: Ian Letourneau Date: Tue, 4 Nov 2025 13:38:28 -0500 Subject: [PATCH] fix(host_network): retrieve proper hostname and next available bond id --- harmony/src/domain/topology/ha_cluster.rs | 105 ++++++--- harmony/src/domain/topology/k8s.rs | 59 +++++- harmony/src/modules/inventory/discovery.rs | 11 +- harmony/src/modules/okd/crd/nmstate.rs | 234 +++++++++++++++++++-- opnsense-config-xml/src/data/interfaces.rs | 2 +- 5 files changed, 356 insertions(+), 55 deletions(-) diff --git a/harmony/src/domain/topology/ha_cluster.rs b/harmony/src/domain/topology/ha_cluster.rs index d65d9aa..88045ae 100644 --- a/harmony/src/domain/topology/ha_cluster.rs +++ b/harmony/src/domain/topology/ha_cluster.rs @@ -1,14 +1,19 @@ use async_trait::async_trait; use harmony_macros::ip; use harmony_types::{ + id::Id, net::{MacAddress, Url}, switch::PortLocation, }; -use kube::api::ObjectMeta; +use k8s_openapi::api::core::v1::Node; +use kube::{ + ResourceExt, + api::{ObjectList, ObjectMeta}, +}; use log::debug; use log::info; -use crate::modules::okd::crd::nmstate::{self, NodeNetworkConfigurationPolicy}; +use crate::modules::okd::crd::nmstate::{self, NodeNetworkConfigurationPolicy, NodeNetworkState}; use crate::topology::PxeOptions; use crate::{data::FileContent, modules::okd::crd::nmstate::NMState}; use crate::{ @@ -63,7 +68,7 @@ impl K8sclient for HAClusterTopology { K8sClient::try_default().await.map_err(|e| e.to_string())?, )), Some(kubeconfig) => { - let Some(client) = K8sClient::from_kubeconfig(&kubeconfig).await else { + let Some(client) = K8sClient::from_kubeconfig(kubeconfig).await else { return Err("Failed to create k8s client".to_string()); }; Ok(Arc::new(client)) @@ -152,8 +157,19 @@ impl HAClusterTopology { Ok(()) } - fn get_next_bond_id(&self) -> u8 { - 42 // FIXME: Find a better way to declare the bond id + async fn get_next_bond_id(&self, hostname: &str) -> Result { + let network_state: Option = self + .k8s_client() + .await + .unwrap() + .get_resource(hostname, None) + .await + .map_err(|e| format!("Failed to list nodes: {e}"))?; + + println!("HELLLOOOO NETWORK STATE: {network_state:#?}"); + + let bond_id = 42; // FIXME: Find a better way to declare the bond id + Ok(format!("bond{bond_id}")) } async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), SwitchError> { @@ -165,7 +181,20 @@ impl HAClusterTopology { )) })?; - let bond_config = self.create_bond_configuration(config); + let hostname = self.get_hostname(&config.host_id).await.map_err(|e| { + SwitchError::new(format!( + "Can't configure bond, can't get hostname for host '{}': {e}", + config.host_id + )) + })?; + let bond_id = self.get_next_bond_id(&hostname).await.map_err(|e| { + SwitchError::new(format!( + "Can't configure bond, can't get an available bond id for host '{}': {e}", + config.host_id + )) + })?; + let bond_config = self.create_bond_configuration(&hostname, &bond_id, config); + debug!( "Applying NMState bond config for host {}: {bond_config:#?}", config.host_id @@ -182,23 +211,21 @@ impl HAClusterTopology { fn create_bond_configuration( &self, + host: &str, + bond_name: &str, config: &HostNetworkConfig, ) -> NodeNetworkConfigurationPolicy { - let host_name = &config.host_id; - let bond_id = self.get_next_bond_id(); - let bond_name = format!("bond{bond_id}"); - - info!("Configuring bond '{bond_name}' for host '{host_name}'..."); + info!("Configuring bond '{bond_name}' for host '{host}'..."); let mut bond_mtu: Option = None; let mut copy_mac_from: Option = None; let mut bond_ports = Vec::new(); - let mut interfaces: Vec = Vec::new(); + let mut interfaces: Vec = Vec::new(); for switch_port in &config.switch_ports { let interface_name = switch_port.interface.name.clone(); - interfaces.push(nmstate::InterfaceSpec { + interfaces.push(nmstate::Interface { name: interface_name.clone(), description: Some(format!("Member of bond {bond_name}")), r#type: "ethernet".to_string(), @@ -228,9 +255,9 @@ impl HAClusterTopology { } } - interfaces.push(nmstate::InterfaceSpec { - name: bond_name.clone(), - description: Some(format!("Network bond for host {host_name}")), + interfaces.push(nmstate::Interface { + name: bond_name.to_string(), + description: Some(format!("Network bond for host {host}")), r#type: "bond".to_string(), state: "up".to_string(), copy_mac_from, @@ -255,19 +282,49 @@ impl HAClusterTopology { NodeNetworkConfigurationPolicy { metadata: ObjectMeta { - name: Some(format!("{host_name}-bond-config")), + name: Some(format!("{host}-bond-config")), ..Default::default() }, spec: NodeNetworkConfigurationPolicySpec { node_selector: Some(BTreeMap::from([( "kubernetes.io/hostname".to_string(), - host_name.to_string(), + host.to_string(), )])), - desired_state: nmstate::DesiredStateSpec { interfaces }, + desired_state: nmstate::NetworkState { + interfaces, + ..Default::default() + }, }, } } + async fn get_hostname(&self, host_id: &Id) -> Result { + let nodes: ObjectList = self + .k8s_client() + .await + .unwrap() + .list_resources(None, None) + .await + .map_err(|e| format!("Failed to list nodes: {e}"))?; + + let Some(node) = nodes.iter().find(|n| { + n.status + .as_ref() + .and_then(|s| s.node_info.as_ref()) + .map(|i| i.system_uuid == host_id.to_string()) + .unwrap_or(false) + }) else { + return Err(format!("No node found for host '{host_id}'")); + }; + + node.labels() + .get("kubernetes.io/hostname") + .ok_or(format!( + "Node '{host_id}' has no kubernetes.io/hostname label" + )) + .cloned() + } + async fn configure_port_channel(&self, config: &HostNetworkConfig) -> Result<(), SwitchError> { debug!("Configuring port channel: {config:#?}"); let switch_ports = config.switch_ports.iter().map(|s| s.port.clone()).collect(); @@ -458,21 +515,19 @@ impl HttpServer for HAClusterTopology { #[async_trait] impl Switch for HAClusterTopology { async fn setup_switch(&self) -> Result<(), SwitchError> { - self.switch_client.setup().await?; - Ok(()) + self.switch_client.setup().await.map(|_| ()) } async fn get_port_for_mac_address( &self, mac_address: &MacAddress, ) -> Result, SwitchError> { - let port = self.switch_client.find_port(mac_address).await?; - Ok(port) + self.switch_client.find_port(mac_address).await } async fn configure_host_network(&self, config: &HostNetworkConfig) -> Result<(), SwitchError> { - self.configure_bond(config).await?; - self.configure_port_channel(config).await + self.configure_bond(config).await + // self.configure_port_channel(config).await } } diff --git a/harmony/src/domain/topology/k8s.rs b/harmony/src/domain/topology/k8s.rs index 71129e1..03c59e1 100644 --- a/harmony/src/domain/topology/k8s.rs +++ b/harmony/src/domain/topology/k8s.rs @@ -5,13 +5,15 @@ use k8s_openapi::{ ClusterResourceScope, NamespaceResourceScope, api::{ apps::v1::Deployment, - core::v1::{Pod, ServiceAccount}, + core::v1::{Node, Pod, ServiceAccount}, }, apimachinery::pkg::version::Info, }; use kube::{ Client, Config, Discovery, Error, Resource, - api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt}, + api::{ + Api, AttachParams, DeleteParams, ListParams, ObjectList, Patch, PatchParams, ResourceExt, + }, config::{KubeConfigOptions, Kubeconfig}, core::ErrorResponse, discovery::{ApiCapabilities, Scope}, @@ -564,7 +566,58 @@ impl K8sClient { Ok(()) } - pub(crate) async fn from_kubeconfig(path: &str) -> Option { + /// Gets a single named resource of a specific type `K`. + /// + /// This function uses the `ApplyStrategy` trait to correctly determine + /// whether to look in a specific namespace or in the entire cluster. + /// + /// Returns `Ok(None)` if the resource is not found (404). + pub async fn get_resource( + &self, + name: &str, + namespace: Option<&str>, + ) -> Result, Error> + where + K: Resource + Clone + std::fmt::Debug + DeserializeOwned, + ::Scope: ApplyStrategy, + ::DynamicType: Default, + { + let api: Api = + <::Scope as ApplyStrategy>::get_api(&self.client, namespace); + + api.get_opt(name).await + } + + /// Lists all resources of a specific type `K`. + /// + /// This function uses the `ApplyStrategy` trait to correctly determine + /// whether to list from a specific namespace or from the entire cluster. + pub async fn list_resources( + &self, + namespace: Option<&str>, + list_params: Option, + ) -> Result, Error> + where + K: Resource + Clone + std::fmt::Debug + DeserializeOwned, + ::Scope: ApplyStrategy, + ::DynamicType: Default, + { + let api: Api = + <::Scope as ApplyStrategy>::get_api(&self.client, namespace); + + let list_params = list_params.unwrap_or_default(); + api.list(&list_params).await + } + + /// Fetches a list of all Nodes in the cluster. + pub async fn get_nodes( + &self, + list_params: Option, + ) -> Result, Error> { + self.list_resources(None, list_params).await + } + + pub async fn from_kubeconfig(path: &str) -> Option { let k = match Kubeconfig::read_from(path) { Ok(k) => k, Err(e) => { diff --git a/harmony/src/modules/inventory/discovery.rs b/harmony/src/modules/inventory/discovery.rs index 143c56a..b02078b 100644 --- a/harmony/src/modules/inventory/discovery.rs +++ b/harmony/src/modules/inventory/discovery.rs @@ -74,7 +74,11 @@ impl Interpret for DiscoverHostForRoleInterpret { match ans { Ok(choice) => { - info!("Selected {} as the bootstrap node.", choice.summary()); + info!( + "Selected {} as the {:?} node.", + choice.summary(), + self.score.role + ); host_repo .save_role_mapping(&self.score.role, &choice) .await?; @@ -90,10 +94,7 @@ impl Interpret for DiscoverHostForRoleInterpret { "Failed to select node for role {:?} : {}", self.score.role, e ); - return Err(InterpretError::new(format!( - "Could not select host : {}", - e.to_string() - ))); + return Err(InterpretError::new(format!("Could not select host : {e}"))); } } } diff --git a/harmony/src/modules/okd/crd/nmstate.rs b/harmony/src/modules/okd/crd/nmstate.rs index 9f986e5..3c4955e 100644 --- a/harmony/src/modules/okd/crd/nmstate.rs +++ b/harmony/src/modules/okd/crd/nmstate.rs @@ -53,18 +53,193 @@ pub struct ProbeDns { pub struct NodeNetworkConfigurationPolicySpec { #[serde(skip_serializing_if = "Option::is_none")] pub node_selector: Option>, - pub desired_state: DesiredStateSpec, + pub desired_state: NetworkState, +} + +#[derive(CustomResource, Deserialize, Serialize, Clone, Debug, JsonSchema)] +#[kube( + group = "nmstate.io", + version = "v1beta1", + kind = "NodeNetworkState", + plural = "nodenetworkstates", + namespaced = false, + status = "NodeNetworkStateStatus" +)] +#[serde(rename_all = "camelCase")] +pub struct NodeNetworkStateSpec { + // This resource is read-only and has no spec. } #[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct NodeNetworkStateStatus { + #[serde(skip_serializing_if = "Option::is_none")] + pub current_state: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub handler_nmstate_version: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub host_network_manager_version: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub last_successful_update_time: Option, +} + +/// The NetworkState is the top-level struct, representing the entire +/// desired or current network state. +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] #[serde(rename_all = "kebab-case")] -pub struct DesiredStateSpec { - pub interfaces: Vec, +#[serde(deny_unknown_fields)] +pub struct NetworkState { + #[serde(skip_serializing_if = "Option::is_none")] + pub hostname: Option, + #[serde(rename = "dns-resolver", skip_serializing_if = "Option::is_none")] + pub dns: Option, + #[serde(rename = "route-rules", skip_serializing_if = "Option::is_none")] + pub rules: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub routes: Option, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub interfaces: Vec, + #[serde(rename = "ovs-db", skip_serializing_if = "Option::is_none")] + pub ovsdb: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ovn: Option, } #[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] #[serde(rename_all = "kebab-case")] -pub struct InterfaceSpec { +pub struct HostNameState { + #[serde(skip_serializing_if = "Option::is_none")] + pub running: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub config: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct DnsState { + #[serde(skip_serializing_if = "Option::is_none")] + pub running: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub config: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct DnsResolverConfig { + #[serde(skip_serializing_if = "Option::is_none")] + pub search: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub server: Option>, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct RouteRuleState { + #[serde(skip_serializing_if = "Option::is_none")] + pub config: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub running: Option>, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct RouteState { + #[serde(skip_serializing_if = "Option::is_none")] + pub config: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub running: Option>, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct RouteRule { + #[serde(rename = "ip-from", skip_serializing_if = "Option::is_none")] + pub ip_from: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub priority: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub route_table: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct Route { + #[serde(skip_serializing_if = "Option::is_none")] + pub destination: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub metric: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub next_hop_address: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub next_hop_interface: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub table_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub mtu: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct OvsDbGlobalConfig { + #[serde(skip_serializing_if = "Option::is_none")] + pub external_ids: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub other_config: Option>, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct OvnConfiguration { + #[serde(skip_serializing_if = "Option::is_none")] + pub bridge_mappings: Option>, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct OvnBridgeMapping { + #[serde(skip_serializing_if = "Option::is_none")] + pub localnet: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub bridge: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)] +#[serde(untagged)] +#[serde(rename_all = "kebab-case")] +pub enum StpSpec { + Bool(bool), + Options(StpOptions), +} + +#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct LldpState { + #[serde(skip_serializing_if = "Option::is_none")] + pub enabled: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct OvsDb { + #[serde(skip_serializing_if = "Option::is_none")] + pub external_ids: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub other_config: Option>, +} + +#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct PatchState { + #[serde(skip_serializing_if = "Option::is_none")] + pub peer: Option, +} + +#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub struct Interface { pub name: String, #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, @@ -98,10 +273,38 @@ pub struct InterfaceSpec { pub infiniband: Option, #[serde(skip_serializing_if = "Option::is_none")] pub linux_bridge: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(alias = "bridge")] pub ovs_bridge: Option, + #[serde(skip_serializing_if = "Option::is_none")] - pub ethtool: Option, + pub ethtool: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub accept_all_mac_addresses: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub identifier: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub lldp: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub permanent_mac_address: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub max_mtu: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub min_mtu: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub mptcp: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub profile_name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub wait_ip: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ovs_db: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub driver: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub patch: Option, } #[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] @@ -287,11 +490,15 @@ pub struct OvsBridgeSpec { #[serde(rename_all = "kebab-case")] pub struct OvsBridgeOptions { #[serde(skip_serializing_if = "Option::is_none")] - pub stp: Option, + pub stp: Option, #[serde(skip_serializing_if = "Option::is_none")] pub rstp: Option, #[serde(skip_serializing_if = "Option::is_none")] pub mcast_snooping_enable: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub datapath: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub fail_mode: Option, } #[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] @@ -305,18 +512,3 @@ pub struct OvsPortSpec { #[serde(skip_serializing_if = "Option::is_none")] pub r#type: Option, } - -#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] -#[serde(rename_all = "kebab-case")] -pub struct EthtoolSpec { - // TODO: Properly describe this spec (https://nmstate.io/devel/yaml_api.html#ethtool) -} - -#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)] -#[serde(rename_all = "kebab-case")] -pub struct EthtoolFecSpec { - #[serde(skip_serializing_if = "Option::is_none")] - pub auto: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub mode: Option, -} diff --git a/opnsense-config-xml/src/data/interfaces.rs b/opnsense-config-xml/src/data/interfaces.rs index b06f392..fb49a4d 100644 --- a/opnsense-config-xml/src/data/interfaces.rs +++ b/opnsense-config-xml/src/data/interfaces.rs @@ -9,7 +9,7 @@ pub struct Interface { pub physical_interface_name: String, pub descr: Option, pub mtu: Option, - pub enable: MaybeString, + pub enable: Option, pub lock: Option, #[yaserde(rename = "spoofmac")] pub spoof_mac: Option,