fix(host_network): retrieve proper hostname and next available bond id

This commit is contained in:
Ian Letourneau 2025-11-04 13:38:28 -05:00
parent c80ede706b
commit adc14c052d
5 changed files with 356 additions and 55 deletions

View File

@ -1,14 +1,19 @@
use async_trait::async_trait;
use harmony_macros::ip;
use harmony_types::{
id::Id,
net::{MacAddress, Url},
switch::PortLocation,
};
use kube::api::ObjectMeta;
use k8s_openapi::api::core::v1::Node;
use kube::{
ResourceExt,
api::{ObjectList, ObjectMeta},
};
use log::debug;
use log::info;
use crate::modules::okd::crd::nmstate::{self, NodeNetworkConfigurationPolicy};
use crate::modules::okd::crd::nmstate::{self, NodeNetworkConfigurationPolicy, NodeNetworkState};
use crate::topology::PxeOptions;
use crate::{data::FileContent, modules::okd::crd::nmstate::NMState};
use crate::{
@ -63,7 +68,7 @@ impl K8sclient for HAClusterTopology {
K8sClient::try_default().await.map_err(|e| e.to_string())?,
)),
Some(kubeconfig) => {
let Some(client) = K8sClient::from_kubeconfig(&kubeconfig).await else {
let Some(client) = K8sClient::from_kubeconfig(kubeconfig).await else {
return Err("Failed to create k8s client".to_string());
};
Ok(Arc::new(client))
@ -152,8 +157,19 @@ impl HAClusterTopology {
Ok(())
}
fn get_next_bond_id(&self) -> u8 {
42 // FIXME: Find a better way to declare the bond id
async fn get_next_bond_id(&self, hostname: &str) -> Result<String, String> {
let network_state: Option<nmstate::NodeNetworkState> = self
.k8s_client()
.await
.unwrap()
.get_resource(hostname, None)
.await
.map_err(|e| format!("Failed to list nodes: {e}"))?;
println!("HELLLOOOO NETWORK STATE: {network_state:#?}");
let bond_id = 42; // FIXME: Find a better way to declare the bond id
Ok(format!("bond{bond_id}"))
}
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), SwitchError> {
@ -165,7 +181,20 @@ impl HAClusterTopology {
))
})?;
let bond_config = self.create_bond_configuration(config);
let hostname = self.get_hostname(&config.host_id).await.map_err(|e| {
SwitchError::new(format!(
"Can't configure bond, can't get hostname for host '{}': {e}",
config.host_id
))
})?;
let bond_id = self.get_next_bond_id(&hostname).await.map_err(|e| {
SwitchError::new(format!(
"Can't configure bond, can't get an available bond id for host '{}': {e}",
config.host_id
))
})?;
let bond_config = self.create_bond_configuration(&hostname, &bond_id, config);
debug!(
"Applying NMState bond config for host {}: {bond_config:#?}",
config.host_id
@ -182,23 +211,21 @@ impl HAClusterTopology {
fn create_bond_configuration(
&self,
host: &str,
bond_name: &str,
config: &HostNetworkConfig,
) -> NodeNetworkConfigurationPolicy {
let host_name = &config.host_id;
let bond_id = self.get_next_bond_id();
let bond_name = format!("bond{bond_id}");
info!("Configuring bond '{bond_name}' for host '{host_name}'...");
info!("Configuring bond '{bond_name}' for host '{host}'...");
let mut bond_mtu: Option<u32> = None;
let mut copy_mac_from: Option<String> = None;
let mut bond_ports = Vec::new();
let mut interfaces: Vec<nmstate::InterfaceSpec> = Vec::new();
let mut interfaces: Vec<nmstate::Interface> = Vec::new();
for switch_port in &config.switch_ports {
let interface_name = switch_port.interface.name.clone();
interfaces.push(nmstate::InterfaceSpec {
interfaces.push(nmstate::Interface {
name: interface_name.clone(),
description: Some(format!("Member of bond {bond_name}")),
r#type: "ethernet".to_string(),
@ -228,9 +255,9 @@ impl HAClusterTopology {
}
}
interfaces.push(nmstate::InterfaceSpec {
name: bond_name.clone(),
description: Some(format!("Network bond for host {host_name}")),
interfaces.push(nmstate::Interface {
name: bond_name.to_string(),
description: Some(format!("Network bond for host {host}")),
r#type: "bond".to_string(),
state: "up".to_string(),
copy_mac_from,
@ -255,19 +282,49 @@ impl HAClusterTopology {
NodeNetworkConfigurationPolicy {
metadata: ObjectMeta {
name: Some(format!("{host_name}-bond-config")),
name: Some(format!("{host}-bond-config")),
..Default::default()
},
spec: NodeNetworkConfigurationPolicySpec {
node_selector: Some(BTreeMap::from([(
"kubernetes.io/hostname".to_string(),
host_name.to_string(),
host.to_string(),
)])),
desired_state: nmstate::DesiredStateSpec { interfaces },
desired_state: nmstate::NetworkState {
interfaces,
..Default::default()
},
},
}
}
async fn get_hostname(&self, host_id: &Id) -> Result<String, String> {
let nodes: ObjectList<Node> = self
.k8s_client()
.await
.unwrap()
.list_resources(None, None)
.await
.map_err(|e| format!("Failed to list nodes: {e}"))?;
let Some(node) = nodes.iter().find(|n| {
n.status
.as_ref()
.and_then(|s| s.node_info.as_ref())
.map(|i| i.system_uuid == host_id.to_string())
.unwrap_or(false)
}) else {
return Err(format!("No node found for host '{host_id}'"));
};
node.labels()
.get("kubernetes.io/hostname")
.ok_or(format!(
"Node '{host_id}' has no kubernetes.io/hostname label"
))
.cloned()
}
async fn configure_port_channel(&self, config: &HostNetworkConfig) -> Result<(), SwitchError> {
debug!("Configuring port channel: {config:#?}");
let switch_ports = config.switch_ports.iter().map(|s| s.port.clone()).collect();
@ -458,21 +515,19 @@ impl HttpServer for HAClusterTopology {
#[async_trait]
impl Switch for HAClusterTopology {
async fn setup_switch(&self) -> Result<(), SwitchError> {
self.switch_client.setup().await?;
Ok(())
self.switch_client.setup().await.map(|_| ())
}
async fn get_port_for_mac_address(
&self,
mac_address: &MacAddress,
) -> Result<Option<PortLocation>, SwitchError> {
let port = self.switch_client.find_port(mac_address).await?;
Ok(port)
self.switch_client.find_port(mac_address).await
}
async fn configure_host_network(&self, config: &HostNetworkConfig) -> Result<(), SwitchError> {
self.configure_bond(config).await?;
self.configure_port_channel(config).await
self.configure_bond(config).await
// self.configure_port_channel(config).await
}
}

View File

@ -5,13 +5,15 @@ use k8s_openapi::{
ClusterResourceScope, NamespaceResourceScope,
api::{
apps::v1::Deployment,
core::v1::{Pod, ServiceAccount},
core::v1::{Node, Pod, ServiceAccount},
},
apimachinery::pkg::version::Info,
};
use kube::{
Client, Config, Discovery, Error, Resource,
api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt},
api::{
Api, AttachParams, DeleteParams, ListParams, ObjectList, Patch, PatchParams, ResourceExt,
},
config::{KubeConfigOptions, Kubeconfig},
core::ErrorResponse,
discovery::{ApiCapabilities, Scope},
@ -564,7 +566,58 @@ impl K8sClient {
Ok(())
}
pub(crate) async fn from_kubeconfig(path: &str) -> Option<K8sClient> {
/// Gets a single named resource of a specific type `K`.
///
/// This function uses the `ApplyStrategy` trait to correctly determine
/// whether to look in a specific namespace or in the entire cluster.
///
/// Returns `Ok(None)` if the resource is not found (404).
pub async fn get_resource<K>(
&self,
name: &str,
namespace: Option<&str>,
) -> Result<Option<K>, Error>
where
K: Resource + Clone + std::fmt::Debug + DeserializeOwned,
<K as Resource>::Scope: ApplyStrategy<K>,
<K as kube::Resource>::DynamicType: Default,
{
let api: Api<K> =
<<K as Resource>::Scope as ApplyStrategy<K>>::get_api(&self.client, namespace);
api.get_opt(name).await
}
/// Lists all resources of a specific type `K`.
///
/// This function uses the `ApplyStrategy` trait to correctly determine
/// whether to list from a specific namespace or from the entire cluster.
pub async fn list_resources<K>(
&self,
namespace: Option<&str>,
list_params: Option<ListParams>,
) -> Result<ObjectList<K>, Error>
where
K: Resource + Clone + std::fmt::Debug + DeserializeOwned,
<K as Resource>::Scope: ApplyStrategy<K>,
<K as kube::Resource>::DynamicType: Default,
{
let api: Api<K> =
<<K as Resource>::Scope as ApplyStrategy<K>>::get_api(&self.client, namespace);
let list_params = list_params.unwrap_or_default();
api.list(&list_params).await
}
/// Fetches a list of all Nodes in the cluster.
pub async fn get_nodes(
&self,
list_params: Option<ListParams>,
) -> Result<ObjectList<Node>, Error> {
self.list_resources(None, list_params).await
}
pub async fn from_kubeconfig(path: &str) -> Option<K8sClient> {
let k = match Kubeconfig::read_from(path) {
Ok(k) => k,
Err(e) => {

View File

@ -74,7 +74,11 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
match ans {
Ok(choice) => {
info!("Selected {} as the bootstrap node.", choice.summary());
info!(
"Selected {} as the {:?} node.",
choice.summary(),
self.score.role
);
host_repo
.save_role_mapping(&self.score.role, &choice)
.await?;
@ -90,10 +94,7 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
"Failed to select node for role {:?} : {}",
self.score.role, e
);
return Err(InterpretError::new(format!(
"Could not select host : {}",
e.to_string()
)));
return Err(InterpretError::new(format!("Could not select host : {e}")));
}
}
}

View File

@ -53,18 +53,193 @@ pub struct ProbeDns {
pub struct NodeNetworkConfigurationPolicySpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub node_selector: Option<BTreeMap<String, String>>,
pub desired_state: DesiredStateSpec,
pub desired_state: NetworkState,
}
#[derive(CustomResource, Deserialize, Serialize, Clone, Debug, JsonSchema)]
#[kube(
group = "nmstate.io",
version = "v1beta1",
kind = "NodeNetworkState",
plural = "nodenetworkstates",
namespaced = false,
status = "NodeNetworkStateStatus"
)]
#[serde(rename_all = "camelCase")]
pub struct NodeNetworkStateSpec {
// This resource is read-only and has no spec.
}
#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct NodeNetworkStateStatus {
#[serde(skip_serializing_if = "Option::is_none")]
pub current_state: Option<NetworkState>,
#[serde(skip_serializing_if = "Option::is_none")]
pub handler_nmstate_version: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub host_network_manager_version: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub last_successful_update_time: Option<String>,
}
/// The NetworkState is the top-level struct, representing the entire
/// desired or current network state.
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct DesiredStateSpec {
pub interfaces: Vec<InterfaceSpec>,
#[serde(deny_unknown_fields)]
pub struct NetworkState {
#[serde(skip_serializing_if = "Option::is_none")]
pub hostname: Option<HostNameState>,
#[serde(rename = "dns-resolver", skip_serializing_if = "Option::is_none")]
pub dns: Option<DnsState>,
#[serde(rename = "route-rules", skip_serializing_if = "Option::is_none")]
pub rules: Option<RouteRuleState>,
#[serde(skip_serializing_if = "Option::is_none")]
pub routes: Option<RouteState>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub interfaces: Vec<Interface>,
#[serde(rename = "ovs-db", skip_serializing_if = "Option::is_none")]
pub ovsdb: Option<OvsDbGlobalConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ovn: Option<OvnConfiguration>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct InterfaceSpec {
pub struct HostNameState {
#[serde(skip_serializing_if = "Option::is_none")]
pub running: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub config: Option<String>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct DnsState {
#[serde(skip_serializing_if = "Option::is_none")]
pub running: Option<DnsResolverConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
pub config: Option<DnsResolverConfig>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct DnsResolverConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub search: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub server: Option<Vec<String>>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct RouteRuleState {
#[serde(skip_serializing_if = "Option::is_none")]
pub config: Option<Vec<RouteRule>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub running: Option<Vec<RouteRule>>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct RouteState {
#[serde(skip_serializing_if = "Option::is_none")]
pub config: Option<Vec<Route>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub running: Option<Vec<Route>>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct RouteRule {
#[serde(rename = "ip-from", skip_serializing_if = "Option::is_none")]
pub ip_from: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub priority: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub route_table: Option<u32>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct Route {
#[serde(skip_serializing_if = "Option::is_none")]
pub destination: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metric: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub next_hop_address: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub next_hop_interface: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub table_id: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mtu: Option<u32>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct OvsDbGlobalConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub external_ids: Option<BTreeMap<String, String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub other_config: Option<BTreeMap<String, String>>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct OvnConfiguration {
#[serde(skip_serializing_if = "Option::is_none")]
pub bridge_mappings: Option<Vec<OvnBridgeMapping>>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct OvnBridgeMapping {
#[serde(skip_serializing_if = "Option::is_none")]
pub localnet: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bridge: Option<String>,
}
#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)]
#[serde(untagged)]
#[serde(rename_all = "kebab-case")]
pub enum StpSpec {
Bool(bool),
Options(StpOptions),
}
#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct LldpState {
#[serde(skip_serializing_if = "Option::is_none")]
pub enabled: Option<bool>,
}
#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct OvsDb {
#[serde(skip_serializing_if = "Option::is_none")]
pub external_ids: Option<BTreeMap<String, String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub other_config: Option<BTreeMap<String, String>>,
}
#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct PatchState {
#[serde(skip_serializing_if = "Option::is_none")]
pub peer: Option<String>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct Interface {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
@ -98,10 +273,38 @@ pub struct InterfaceSpec {
pub infiniband: Option<InfinibandSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub linux_bridge: Option<LinuxBridgeSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(alias = "bridge")]
pub ovs_bridge: Option<OvsBridgeSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ethtool: Option<EthtoolSpec>,
pub ethtool: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub accept_all_mac_addresses: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub identifier: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub lldp: Option<LldpState>,
#[serde(skip_serializing_if = "Option::is_none")]
pub permanent_mac_address: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_mtu: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub min_mtu: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mptcp: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub profile_name: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub wait_ip: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ovs_db: Option<OvsDb>,
#[serde(skip_serializing_if = "Option::is_none")]
pub driver: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub patch: Option<PatchState>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
@ -287,11 +490,15 @@ pub struct OvsBridgeSpec {
#[serde(rename_all = "kebab-case")]
pub struct OvsBridgeOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub stp: Option<bool>,
pub stp: Option<StpSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub rstp: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mcast_snooping_enable: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub datapath: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub fail_mode: Option<String>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
@ -305,18 +512,3 @@ pub struct OvsPortSpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub r#type: Option<String>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct EthtoolSpec {
// TODO: Properly describe this spec (https://nmstate.io/devel/yaml_api.html#ethtool)
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct EthtoolFecSpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub auto: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mode: Option<String>,
}

View File

@ -9,7 +9,7 @@ pub struct Interface {
pub physical_interface_name: String,
pub descr: Option<MaybeString>,
pub mtu: Option<MaybeString>,
pub enable: MaybeString,
pub enable: Option<MaybeString>,
pub lock: Option<MaybeString>,
#[yaserde(rename = "spoofmac")]
pub spoof_mac: Option<MaybeString>,