Compare commits

...

6 Commits

Author SHA1 Message Date
759a9287d3 Merge remote-tracking branch 'origin/master' into feat/cluster_monitoring
Some checks failed
Run Check Script / check (pull_request) Failing after 19s
2025-11-05 17:02:10 -05:00
24922321b1 fix: webhook name must be k8s field compliant, add a FIXME note 2025-11-05 16:59:48 -05:00
4ff57062ae Merge pull request 'feat(kube): Convert kube_openapi Resource to DynamicObject' (#180) from feat/kube_convert_dynamic_resource into master
Some checks failed
Run Check Script / check (push) Successful in 1m19s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m23s
Reviewed-on: #180
Reviewed-by: Ian Letourneau <ian@noma.to>
2025-11-05 21:48:32 +00:00
50ce54ea66 Merge pull request 'fix(opnsense-config): mark Interface::enable as optional' (#181) from fix-opnsense-config into master
Some checks failed
Run Check Script / check (push) Successful in 1m12s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m27s
Reviewed-on: #181
2025-11-05 17:13:29 +00:00
Ian Letourneau
827a49e56b fix(opnsense-config): mark Interface::enable as optional
All checks were successful
Run Check Script / check (pull_request) Successful in 1m11s
2025-11-04 17:25:30 -05:00
c80ede706b fix(host_network): adjust bond & port-channel configuration (partial) (#175)
Some checks failed
Run Check Script / check (push) Successful in 1m20s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m21s
## Description
* Replace the CatalogSource approach to install the OperatorHub.io catalog by a more simple & straightforward way to install NMState
* Improve logging
* Add report summarizing the host network configuration that was applied (which host, bonds, port-channels)
* Fix command to find next available port channel id

## Extra info
Using the `apply_url` approach to install the NMState operator isn't the best approach: it's harder to maintain and upgrade. But it helps us achieve waht we wanted for now: install the NMState Operator to configure bonds on a host.

The preferred approach, installing an operator from the OperatorHub.io catalog, didn't work for now. We had a timeout error with DeadlineExceeded probably caused by an insufficient CPU/Memory allocation to query such a big catalog, even though we tweaked the RAM allocation (we couldn't find a way to do it for CPU).

Spent too much time on this so we stopped these efforts for now. It would be good to get back to it when we need to install something else from a custom catalog.

Reviewed-on: #175
2025-10-29 17:09:16 +00:00
28 changed files with 847 additions and 295 deletions

19
Cargo.lock generated
View File

@@ -1804,6 +1804,25 @@ dependencies = [
"url",
]
[[package]]
name = "example-okd-cluster-alerts"
version = "0.1.0"
dependencies = [
"brocade",
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_secret",
"harmony_secret_derive",
"harmony_types",
"log",
"serde",
"tokio",
"url",
]
[[package]]
name = "example-okd-install"
version = "0.1.0"

View File

@@ -31,6 +31,7 @@ pub struct BrocadeOptions {
pub struct TimeoutConfig {
pub shell_ready: Duration,
pub command_execution: Duration,
pub command_output: Duration,
pub cleanup: Duration,
pub message_wait: Duration,
}
@@ -40,6 +41,7 @@ impl Default for TimeoutConfig {
Self {
shell_ready: Duration::from_secs(10),
command_execution: Duration::from_secs(60), // Commands like `deploy` (for a LAG) can take a while
command_output: Duration::from_secs(5), // Delay to start logging "waiting for command output"
cleanup: Duration::from_secs(10),
message_wait: Duration::from_millis(500),
}

View File

@@ -3,6 +3,7 @@ use std::str::FromStr;
use async_trait::async_trait;
use harmony_types::switch::{PortDeclaration, PortLocation};
use log::{debug, info};
use regex::Regex;
use crate::{
BrocadeClient, BrocadeInfo, Error, ExecutionMode, InterSwitchLink, InterfaceInfo,
@@ -103,13 +104,37 @@ impl NetworkOperatingSystemClient {
};
Some(Ok(InterfaceInfo {
name: format!("{} {}", interface_type, port_location),
name: format!("{interface_type} {port_location}"),
port_location,
interface_type,
operating_mode,
status,
}))
}
fn map_configure_interfaces_error(&self, err: Error) -> Error {
debug!("[Brocade] {err}");
if let Error::CommandError(message) = &err {
if message.contains("switchport")
&& message.contains("Cannot configure aggregator member")
{
let re = Regex::new(r"\(conf-if-([a-zA-Z]+)-([\d/]+)\)#").unwrap();
if let Some(caps) = re.captures(message) {
let interface_type = &caps[1];
let port_location = &caps[2];
let interface = format!("{interface_type} {port_location}");
return Error::CommandError(format!(
"Cannot configure interface '{interface}', it is a member of a port-channel (LAG)"
));
}
}
}
err
}
}
#[async_trait]
@@ -197,11 +222,10 @@ impl BrocadeClient for NetworkOperatingSystemClient {
commands.push("exit".into());
}
commands.push("write memory".into());
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
.await
.map_err(|err| self.map_configure_interfaces_error(err))?;
info!("[Brocade] Interfaces configured.");
@@ -213,7 +237,7 @@ impl BrocadeClient for NetworkOperatingSystemClient {
let output = self
.shell
.run_command("show port-channel", ExecutionMode::Regular)
.run_command("show port-channel summary", ExecutionMode::Regular)
.await?;
let used_ids: Vec<u8> = output
@@ -248,7 +272,12 @@ impl BrocadeClient for NetworkOperatingSystemClient {
ports: &[PortLocation],
) -> Result<(), Error> {
info!(
"[Brocade] Configuring port-channel '{channel_name} {channel_id}' with ports: {ports:?}"
"[Brocade] Configuring port-channel '{channel_id} {channel_name}' with ports: {}",
ports
.iter()
.map(|p| format!("{p}"))
.collect::<Vec<String>>()
.join(", ")
);
let interfaces = self.get_interfaces().await?;
@@ -276,8 +305,6 @@ impl BrocadeClient for NetworkOperatingSystemClient {
commands.push("exit".into());
}
commands.push("write memory".into());
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
@@ -294,7 +321,6 @@ impl BrocadeClient for NetworkOperatingSystemClient {
"configure terminal".into(),
format!("no interface port-channel {}", channel_name),
"exit".into(),
"write memory".into(),
];
self.shell

View File

@@ -211,7 +211,7 @@ impl BrocadeSession {
let mut output = Vec::new();
let start = Instant::now();
let read_timeout = Duration::from_millis(500);
let log_interval = Duration::from_secs(3);
let log_interval = Duration::from_secs(5);
let mut last_log = Instant::now();
loop {
@@ -221,7 +221,9 @@ impl BrocadeSession {
));
}
if start.elapsed() > Duration::from_secs(5) && last_log.elapsed() > log_interval {
if start.elapsed() > self.options.timeouts.command_output
&& last_log.elapsed() > log_interval
{
info!("[Brocade] Waiting for command output...");
last_log = Instant::now();
}
@@ -276,7 +278,7 @@ impl BrocadeSession {
let output_lower = output.to_lowercase();
if ERROR_PATTERNS.iter().any(|&p| output_lower.contains(p)) {
return Err(Error::CommandError(format!(
"Command '{command}' failed: {}",
"Command error: {}",
output.trim()
)));
}

View File

@@ -61,6 +61,7 @@ async fn main() {
let gateway_ipv4 = Ipv4Addr::new(192, 168, 33, 1);
let gateway_ip = IpAddr::V4(gateway_ipv4);
let topology = harmony::topology::HAClusterTopology {
kubeconfig: None,
domain_name: "ncd0.harmony.mcd".to_string(), // TODO this must be set manually correctly
// when setting up the opnsense firewall
router: Arc::new(UnmanagedRouter::new(

View File

@@ -15,7 +15,7 @@ async fn main() {
K8sAnywhereTopology::from_env(),
vec![Box::new(OpenshiftClusterAlertScore {
receivers: vec![Box::new(DiscordWebhook {
name: "Webhook example".to_string(),
name: "discord-webhook-example".to_string(),
url: hurl!("http://something.o"),
})],
})],

View File

@@ -59,6 +59,7 @@ pub async fn get_topology() -> HAClusterTopology {
let gateway_ipv4 = ipv4!("192.168.1.1");
let gateway_ip = IpAddr::V4(gateway_ipv4);
harmony::topology::HAClusterTopology {
kubeconfig: None,
domain_name: "demo.harmony.mcd".to_string(),
router: Arc::new(UnmanagedRouter::new(
gateway_ip,

View File

@@ -54,6 +54,7 @@ pub async fn get_topology() -> HAClusterTopology {
let gateway_ipv4 = ipv4!("192.168.1.1");
let gateway_ip = IpAddr::V4(gateway_ipv4);
harmony::topology::HAClusterTopology {
kubeconfig: None,
domain_name: "demo.harmony.mcd".to_string(),
router: Arc::new(UnmanagedRouter::new(
gateway_ip,

View File

@@ -57,6 +57,7 @@ async fn main() {
let gateway_ipv4 = Ipv4Addr::new(10, 100, 8, 1);
let gateway_ip = IpAddr::V4(gateway_ipv4);
let topology = harmony::topology::HAClusterTopology {
kubeconfig: None,
domain_name: "demo.harmony.mcd".to_string(),
router: Arc::new(UnmanagedRouter::new(
gateway_ip,

View File

@@ -4,19 +4,16 @@ use harmony_types::{
net::{MacAddress, Url},
switch::PortLocation,
};
use k8s_openapi::api::core::v1::Namespace;
use kube::api::ObjectMeta;
use log::debug;
use log::info;
use crate::data::FileContent;
use crate::executors::ExecutorError;
use crate::hardware::PhysicalHost;
use crate::modules::okd::crd::{
InstallPlanApproval, OperatorGroup, OperatorGroupSpec, Subscription, SubscriptionSpec,
nmstate::{self, NMState, NodeNetworkConfigurationPolicy, NodeNetworkConfigurationPolicySpec},
};
use crate::modules::okd::crd::nmstate::{self, NodeNetworkConfigurationPolicy};
use crate::topology::PxeOptions;
use crate::{data::FileContent, modules::okd::crd::nmstate::NMState};
use crate::{
executors::ExecutorError, modules::okd::crd::nmstate::NodeNetworkConfigurationPolicySpec,
};
use super::{
DHCPStaticEntry, DhcpServer, DnsRecord, DnsRecordType, DnsServer, Firewall, HostNetworkConfig,
@@ -42,6 +39,7 @@ pub struct HAClusterTopology {
pub bootstrap_host: LogicalHost,
pub control_plane: Vec<LogicalHost>,
pub workers: Vec<LogicalHost>,
pub kubeconfig: Option<String>,
}
#[async_trait]
@@ -60,9 +58,17 @@ impl Topology for HAClusterTopology {
#[async_trait]
impl K8sclient for HAClusterTopology {
async fn k8s_client(&self) -> Result<Arc<K8sClient>, String> {
Ok(Arc::new(
K8sClient::try_default().await.map_err(|e| e.to_string())?,
))
match &self.kubeconfig {
None => Ok(Arc::new(
K8sClient::try_default().await.map_err(|e| e.to_string())?,
)),
Some(kubeconfig) => {
let Some(client) = K8sClient::from_kubeconfig(&kubeconfig).await else {
return Err("Failed to create k8s client".to_string());
};
Ok(Arc::new(client))
}
}
}
}
@@ -88,60 +94,48 @@ impl HAClusterTopology {
}
async fn ensure_nmstate_operator_installed(&self) -> Result<(), String> {
// FIXME: Find a way to check nmstate is already available (get pod -n openshift-nmstate)
debug!("Installing NMState operator...");
let k8s_client = self.k8s_client().await?;
let nmstate_namespace = Namespace {
metadata: ObjectMeta {
name: Some("openshift-nmstate".to_string()),
finalizers: Some(vec!["kubernetes".to_string()]),
..Default::default()
},
..Default::default()
};
debug!("Creating NMState namespace: {nmstate_namespace:#?}");
k8s_client
.apply(&nmstate_namespace, None)
debug!("Installing NMState controller...");
k8s_client.apply_url(url::Url::parse("https://github.com/nmstate/kubernetes-nmstate/releases/download/v0.84.0/nmstate.io_nmstates.yaml
").unwrap(), Some("nmstate"))
.await
.map_err(|e| e.to_string())?;
let nmstate_operator_group = OperatorGroup {
metadata: ObjectMeta {
name: Some("openshift-nmstate".to_string()),
namespace: Some("openshift-nmstate".to_string()),
..Default::default()
},
spec: OperatorGroupSpec {
target_namespaces: vec!["openshift-nmstate".to_string()],
},
};
debug!("Creating NMState operator group: {nmstate_operator_group:#?}");
k8s_client
.apply(&nmstate_operator_group, None)
debug!("Creating NMState namespace...");
k8s_client.apply_url(url::Url::parse("https://github.com/nmstate/kubernetes-nmstate/releases/download/v0.84.0/namespace.yaml
").unwrap(), Some("nmstate"))
.await
.map_err(|e| e.to_string())?;
let nmstate_subscription = Subscription {
metadata: ObjectMeta {
name: Some("kubernetes-nmstate-operator".to_string()),
namespace: Some("openshift-nmstate".to_string()),
..Default::default()
},
spec: SubscriptionSpec {
channel: Some("stable".to_string()),
install_plan_approval: Some(InstallPlanApproval::Automatic),
name: "kubernetes-nmstate-operator".to_string(),
source: "redhat-operators".to_string(),
source_namespace: "openshift-marketplace".to_string(),
},
};
debug!("Subscribing to NMState Operator: {nmstate_subscription:#?}");
k8s_client
.apply(&nmstate_subscription, None)
debug!("Creating NMState service account...");
k8s_client.apply_url(url::Url::parse("https://github.com/nmstate/kubernetes-nmstate/releases/download/v0.84.0/service_account.yaml
").unwrap(), Some("nmstate"))
.await
.map_err(|e| e.to_string())?;
debug!("Creating NMState role...");
k8s_client.apply_url(url::Url::parse("https://github.com/nmstate/kubernetes-nmstate/releases/download/v0.84.0/role.yaml
").unwrap(), Some("nmstate"))
.await
.map_err(|e| e.to_string())?;
debug!("Creating NMState role binding...");
k8s_client.apply_url(url::Url::parse("https://github.com/nmstate/kubernetes-nmstate/releases/download/v0.84.0/role_binding.yaml
").unwrap(), Some("nmstate"))
.await
.map_err(|e| e.to_string())?;
debug!("Creating NMState operator...");
k8s_client.apply_url(url::Url::parse("https://github.com/nmstate/kubernetes-nmstate/releases/download/v0.84.0/operator.yaml
").unwrap(), Some("nmstate"))
.await
.map_err(|e| e.to_string())?;
k8s_client
.wait_until_deployment_ready("nmstate-operator", Some("nmstate"), None)
.await?;
let nmstate = NMState {
metadata: ObjectMeta {
name: Some("nmstate".to_string()),
@@ -162,11 +156,7 @@ impl HAClusterTopology {
42 // FIXME: Find a better way to declare the bond id
}
async fn configure_bond(
&self,
host: &PhysicalHost,
config: &HostNetworkConfig,
) -> Result<(), SwitchError> {
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), SwitchError> {
self.ensure_nmstate_operator_installed()
.await
.map_err(|e| {
@@ -175,29 +165,33 @@ impl HAClusterTopology {
))
})?;
let bond_config = self.create_bond_configuration(host, config);
debug!("Configuring bond for host {host:?}: {bond_config:#?}");
let bond_config = self.create_bond_configuration(config);
debug!(
"Applying NMState bond config for host {}: {bond_config:#?}",
config.host_id
);
self.k8s_client()
.await
.unwrap()
.apply(&bond_config, None)
.await
.unwrap();
.map_err(|e| SwitchError::new(format!("Failed to configure bond: {e}")))?;
todo!()
Ok(())
}
fn create_bond_configuration(
&self,
host: &PhysicalHost,
config: &HostNetworkConfig,
) -> NodeNetworkConfigurationPolicy {
let host_name = host.id.clone();
let host_name = &config.host_id;
let bond_id = self.get_next_bond_id();
let bond_name = format!("bond{bond_id}");
info!("Configuring bond '{bond_name}' for host '{host_name}'...");
let mut bond_mtu: Option<u32> = None;
let mut bond_mac_address: Option<String> = None;
let mut copy_mac_from: Option<String> = None;
let mut bond_ports = Vec::new();
let mut interfaces: Vec<nmstate::InterfaceSpec> = Vec::new();
@@ -223,14 +217,14 @@ impl HAClusterTopology {
..Default::default()
});
bond_ports.push(interface_name);
bond_ports.push(interface_name.clone());
// Use the first port's details for the bond mtu and mac address
if bond_mtu.is_none() {
bond_mtu = Some(switch_port.interface.mtu);
}
if bond_mac_address.is_none() {
bond_mac_address = Some(switch_port.interface.mac_address.to_string());
if copy_mac_from.is_none() {
copy_mac_from = Some(interface_name);
}
}
@@ -239,8 +233,7 @@ impl HAClusterTopology {
description: Some(format!("Network bond for host {host_name}")),
r#type: "bond".to_string(),
state: "up".to_string(),
mtu: bond_mtu,
mac_address: bond_mac_address,
copy_mac_from,
ipv4: Some(nmstate::IpStackSpec {
dhcp: Some(true),
enabled: Some(true),
@@ -275,16 +268,12 @@ impl HAClusterTopology {
}
}
async fn configure_port_channel(
&self,
host: &PhysicalHost,
config: &HostNetworkConfig,
) -> Result<(), SwitchError> {
async fn configure_port_channel(&self, config: &HostNetworkConfig) -> Result<(), SwitchError> {
debug!("Configuring port channel: {config:#?}");
let switch_ports = config.switch_ports.iter().map(|s| s.port.clone()).collect();
self.switch_client
.configure_port_channel(&format!("Harmony_{}", host.id), switch_ports)
.configure_port_channel(&format!("Harmony_{}", config.host_id), switch_ports)
.await
.map_err(|e| SwitchError::new(format!("Failed to configure switch: {e}")))?;
@@ -299,6 +288,7 @@ impl HAClusterTopology {
};
Self {
kubeconfig: None,
domain_name: "DummyTopology".to_string(),
router: dummy_infra.clone(),
load_balancer: dummy_infra.clone(),
@@ -480,13 +470,9 @@ impl Switch for HAClusterTopology {
Ok(port)
}
async fn configure_host_network(
&self,
host: &PhysicalHost,
config: HostNetworkConfig,
) -> Result<(), SwitchError> {
self.configure_bond(host, &config).await?;
self.configure_port_channel(host, &config).await
async fn configure_host_network(&self, config: &HostNetworkConfig) -> Result<(), SwitchError> {
self.configure_bond(config).await?;
self.configure_port_channel(config).await
}
}

View File

@@ -13,7 +13,8 @@ use kube::{
Client, Config, Discovery, Error, Resource,
api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt},
config::{KubeConfigOptions, Kubeconfig},
core::ErrorResponse,
core::{DynamicResourceScope, ErrorResponse},
discovery::{ApiCapabilities, Scope},
error::DiscoveryError,
runtime::reflector::Lookup,
};
@@ -22,11 +23,12 @@ use kube::{
api::{ApiResource, GroupVersionKind},
runtime::wait::await_condition,
};
use log::{debug, error, info, trace};
use log::{debug, error, info, trace, warn};
use serde::{Serialize, de::DeserializeOwned};
use serde_json::json;
use similar::TextDiff;
use tokio::{io::AsyncReadExt, time::sleep};
use url::Url;
#[derive(new, Clone)]
pub struct K8sClient {
@@ -88,7 +90,8 @@ impl K8sClient {
} else {
Api::default_namespaced_with(self.client.clone(), &gvk)
};
Ok(resource.get(name).await?)
resource.get(name).await
}
pub async fn get_secret_json_value(
@@ -120,8 +123,9 @@ impl K8sClient {
debug!("getting default namespace deployment");
Api::default_namespaced(self.client.clone())
};
debug!("getting deployment {} in ns {}", name, namespace.unwrap());
Ok(deps.get_opt(name).await?)
deps.get_opt(name).await
}
pub async fn get_pod(&self, name: &str, namespace: Option<&str>) -> Result<Option<Pod>, Error> {
@@ -130,7 +134,8 @@ impl K8sClient {
} else {
Api::default_namespaced(self.client.clone())
};
Ok(pods.get_opt(name).await?)
pods.get_opt(name).await
}
pub async fn scale_deployment(
@@ -173,9 +178,9 @@ impl K8sClient {
pub async fn wait_until_deployment_ready(
&self,
name: String,
name: &str,
namespace: Option<&str>,
timeout: Option<u64>,
timeout: Option<Duration>,
) -> Result<(), String> {
let api: Api<Deployment>;
@@ -185,9 +190,9 @@ impl K8sClient {
api = Api::default_namespaced(self.client.clone());
}
let establish = await_condition(api, name.as_str(), conditions::is_deployment_completed());
let t = timeout.unwrap_or(300);
let res = tokio::time::timeout(std::time::Duration::from_secs(t), establish).await;
let establish = await_condition(api, name, conditions::is_deployment_completed());
let timeout = timeout.unwrap_or(Duration::from_secs(120));
let res = tokio::time::timeout(timeout, establish).await;
if res.is_ok() {
Ok(())
@@ -277,7 +282,7 @@ impl K8sClient {
if let Some(s) = status.status {
let mut stdout_buf = String::new();
if let Some(mut stdout) = process.stdout().take() {
if let Some(mut stdout) = process.stdout() {
stdout
.read_to_string(&mut stdout_buf)
.await
@@ -349,6 +354,169 @@ impl K8sClient {
}
}
fn get_api_for_dynamic_object(
&self,
object: &DynamicObject,
ns: Option<&str>,
) -> Result<Api<DynamicObject>, Error> {
let api_resource = object
.types
.as_ref()
.and_then(|t| {
let parts: Vec<&str> = t.api_version.split('/').collect();
match parts.as_slice() {
[version] => Some(ApiResource::from_gvk(&GroupVersionKind::gvk(
"", version, &t.kind,
))),
[group, version] => Some(ApiResource::from_gvk(&GroupVersionKind::gvk(
group, version, &t.kind,
))),
_ => None,
}
})
.ok_or_else(|| {
Error::BuildRequest(kube::core::request::Error::Validation(
"Invalid apiVersion in DynamicObject {object:#?}".to_string(),
))
})?;
match ns {
Some(ns) => Ok(Api::namespaced_with(self.client.clone(), ns, &api_resource)),
None => Ok(Api::default_namespaced_with(
self.client.clone(),
&api_resource,
)),
}
}
pub async fn apply_dynamic_many(
&self,
resource: &[DynamicObject],
namespace: Option<&str>,
force_conflicts: bool,
) -> Result<Vec<DynamicObject>, Error> {
let mut result = Vec::new();
for r in resource.iter() {
result.push(self.apply_dynamic(r, namespace, force_conflicts).await?);
}
Ok(result)
}
/// Apply DynamicObject resource to the cluster
pub async fn apply_dynamic(
&self,
resource: &DynamicObject,
namespace: Option<&str>,
force_conflicts: bool,
) -> Result<DynamicObject, Error> {
// Build API for this dynamic object
let api = self.get_api_for_dynamic_object(resource, namespace)?;
let name = resource
.metadata
.name
.as_ref()
.ok_or_else(|| {
Error::BuildRequest(kube::core::request::Error::Validation(
"DynamicObject must have metadata.name".to_string(),
))
})?
.as_str();
debug!(
"Applying dynamic resource kind={:?} apiVersion={:?} name='{}' ns={:?}",
resource.types.as_ref().map(|t| &t.kind),
resource.types.as_ref().map(|t| &t.api_version),
name,
namespace
);
trace!(
"Dynamic resource payload:\n{:#}",
serde_json::to_value(resource).unwrap_or(serde_json::Value::Null)
);
// Using same field manager as in apply()
let mut patch_params = PatchParams::apply("harmony");
patch_params.force = force_conflicts;
if *crate::config::DRY_RUN {
// Dry-run path: fetch current, show diff, and return appropriate object
match api.get(name).await {
Ok(current) => {
trace!("Received current dynamic value {current:#?}");
println!("\nPerforming dry-run for resource: '{}'", name);
// Serialize current and new, and strip status from current if present
let mut current_yaml =
serde_yaml::to_value(&current).unwrap_or_else(|_| serde_yaml::Value::Null);
if let Some(map) = current_yaml.as_mapping_mut() {
if map.contains_key(&serde_yaml::Value::String("status".to_string())) {
let removed =
map.remove(&serde_yaml::Value::String("status".to_string()));
trace!("Removed status from current dynamic object: {:?}", removed);
} else {
trace!(
"Did not find status entry for current dynamic object {}/{}",
current.metadata.namespace.as_deref().unwrap_or(""),
current.metadata.name.as_deref().unwrap_or("")
);
}
}
let current_yaml = serde_yaml::to_string(&current_yaml)
.unwrap_or_else(|_| "Failed to serialize current resource".to_string());
let new_yaml = serde_yaml::to_string(resource)
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
if current_yaml == new_yaml {
println!("No changes detected.");
return Ok(current);
}
println!("Changes detected:");
let diff = TextDiff::from_lines(&current_yaml, &new_yaml);
for change in diff.iter_all_changes() {
let sign = match change.tag() {
similar::ChangeTag::Delete => "-",
similar::ChangeTag::Insert => "+",
similar::ChangeTag::Equal => " ",
};
print!("{}{}", sign, change);
}
// Return the incoming resource as the would-be applied state
Ok(resource.clone())
}
Err(Error::Api(ErrorResponse { code: 404, .. })) => {
println!("\nPerforming dry-run for new resource: '{}'", name);
println!(
"Resource does not exist. It would be created with the following content:"
);
let new_yaml = serde_yaml::to_string(resource)
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
for line in new_yaml.lines() {
println!("+{}", line);
}
Ok(resource.clone())
}
Err(e) => {
error!("Failed to get dynamic resource '{}': {}", name, e);
Err(e)
}
}
} else {
// Real apply via server-side apply
debug!("Patching (server-side apply) dynamic resource '{}'", name);
api.patch(name, &patch_params, &Patch::Apply(resource))
.await
.map_err(|e| {
error!("Failed to apply dynamic resource '{}': {}", name, e);
e
})
}
}
/// Apply a resource in namespace
///
/// See `kubectl apply` for more information on the expected behavior of this function
@@ -383,14 +551,14 @@ impl K8sClient {
Ok(current) => {
trace!("Received current value {current:#?}");
// The resource exists, so we calculate and display a diff.
println!("\nPerforming dry-run for resource: '{}'", name);
println!("\nPerforming dry-run for resource: '{name}'");
let mut current_yaml = serde_yaml::to_value(&current).unwrap_or_else(|_| {
panic!("Could not serialize current value : {current:#?}")
});
if current_yaml.is_mapping() && current_yaml.get("status").is_some() {
let map = current_yaml.as_mapping_mut().unwrap();
let removed = map.remove_entry("status");
trace!("Removed status {:?}", removed);
trace!("Removed status {removed:?}");
} else {
trace!(
"Did not find status entry for current object {}/{}",
@@ -419,14 +587,14 @@ impl K8sClient {
similar::ChangeTag::Insert => "+",
similar::ChangeTag::Equal => " ",
};
print!("{}{}", sign, change);
print!("{sign}{change}");
}
// In a dry run, we return the new resource state that would have been applied.
Ok(resource.clone())
}
Err(Error::Api(ErrorResponse { code: 404, .. })) => {
// The resource does not exist, so the "diff" is the entire new resource.
println!("\nPerforming dry-run for new resource: '{}'", name);
println!("\nPerforming dry-run for new resource: '{name}'");
println!(
"Resource does not exist. It would be created with the following content:"
);
@@ -435,14 +603,14 @@ impl K8sClient {
// Print each line of the new resource with a '+' prefix.
for line in new_yaml.lines() {
println!("+{}", line);
println!("+{line}");
}
// In a dry run, we return the new resource state that would have been created.
Ok(resource.clone())
}
Err(e) => {
// Another API error occurred.
error!("Failed to get resource '{}': {}", name, e);
error!("Failed to get resource '{name}': {e}");
Err(e)
}
}
@@ -457,7 +625,7 @@ impl K8sClient {
where
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
<K as Resource>::Scope: ApplyStrategy<K>,
<K as kube::Resource>::DynamicType: Default,
<K as Resource>::DynamicType: Default,
{
let mut result = Vec::new();
for r in resource.iter() {
@@ -522,10 +690,7 @@ impl K8sClient {
// 6. Apply the object to the cluster using Server-Side Apply.
// This will create the resource if it doesn't exist, or update it if it does.
println!(
"Applying Argo Application '{}' in namespace '{}'...",
name, namespace
);
println!("Applying '{name}' in namespace '{namespace}'...",);
let patch_params = PatchParams::apply("harmony"); // Use a unique field manager name
let result = api.patch(name, &patch_params, &Patch::Apply(&obj)).await?;
@@ -534,6 +699,51 @@ impl K8sClient {
Ok(())
}
/// Apply a resource from a URL
///
/// It is the equivalent of `kubectl apply -f <url>`
pub async fn apply_url(&self, url: Url, ns: Option<&str>) -> Result<(), Error> {
let patch_params = PatchParams::apply("harmony");
let discovery = kube::Discovery::new(self.client.clone()).run().await?;
let yaml = reqwest::get(url)
.await
.expect("Could not get URL")
.text()
.await
.expect("Could not get content from URL");
for doc in multidoc_deserialize(&yaml).expect("failed to parse YAML from file") {
let obj: DynamicObject =
serde_yaml::from_value(doc).expect("cannot apply without valid YAML");
let namespace = obj.metadata.namespace.as_deref().or(ns);
let type_meta = obj
.types
.as_ref()
.expect("cannot apply object without valid TypeMeta");
let gvk = GroupVersionKind::try_from(type_meta)
.expect("cannot apply object without valid GroupVersionKind");
let name = obj.name_any();
if let Some((ar, caps)) = discovery.resolve_gvk(&gvk) {
let api = get_dynamic_api(ar, caps, self.client.clone(), namespace, false);
trace!(
"Applying {}: \n{}",
gvk.kind,
serde_yaml::to_string(&obj).expect("Failed to serialize YAML")
);
let data: serde_json::Value =
serde_json::to_value(&obj).expect("Failed to serialize JSON");
let _r = api.patch(&name, &patch_params, &Patch::Apply(data)).await?;
debug!("applied {} {}", gvk.kind, name);
} else {
warn!("Cannot apply document for unknown {gvk:?}");
}
}
Ok(())
}
pub(crate) async fn from_kubeconfig(path: &str) -> Option<K8sClient> {
let k = match Kubeconfig::read_from(path) {
Ok(k) => k,
@@ -553,6 +763,31 @@ impl K8sClient {
}
}
fn get_dynamic_api(
resource: ApiResource,
capabilities: ApiCapabilities,
client: Client,
ns: Option<&str>,
all: bool,
) -> Api<DynamicObject> {
if capabilities.scope == Scope::Cluster || all {
Api::all_with(client, &resource)
} else if let Some(namespace) = ns {
Api::namespaced_with(client, namespace, &resource)
} else {
Api::default_namespaced_with(client, &resource)
}
}
fn multidoc_deserialize(data: &str) -> Result<Vec<serde_yaml::Value>, serde_yaml::Error> {
use serde::Deserialize;
let mut docs = vec![];
for de in serde_yaml::Deserializer::from_str(data) {
docs.push(serde_yaml::Value::deserialize(de)?);
}
Ok(docs)
}
pub trait ApplyStrategy<K: Resource> {
fn get_api(client: &Client, ns: Option<&str>) -> Api<K>;
}

View File

@@ -1,4 +1,4 @@
use std::{collections::BTreeMap, process::Command, sync::Arc};
use std::{collections::BTreeMap, process::Command, sync::Arc, time::Duration};
use async_trait::async_trait;
use base64::{Engine, engine::general_purpose};
@@ -155,9 +155,9 @@ impl Grafana for K8sAnywhereTopology {
//TODO change this to a ensure ready or something better than just a timeout
client
.wait_until_deployment_ready(
"grafana-grafana-deployment".to_string(),
"grafana-grafana-deployment",
Some("grafana"),
Some(30),
Some(Duration::from_secs(30)),
)
.await?;

View File

@@ -9,6 +9,7 @@ use std::{
use async_trait::async_trait;
use derive_new::new;
use harmony_types::{
id::Id,
net::{IpAddress, MacAddress},
switch::PortLocation,
};
@@ -191,15 +192,12 @@ pub trait Switch: Send + Sync {
mac_address: &MacAddress,
) -> Result<Option<PortLocation>, SwitchError>;
async fn configure_host_network(
&self,
host: &PhysicalHost,
config: HostNetworkConfig,
) -> Result<(), SwitchError>;
async fn configure_host_network(&self, config: &HostNetworkConfig) -> Result<(), SwitchError>;
}
#[derive(Clone, Debug, PartialEq)]
pub struct HostNetworkConfig {
pub host_id: Id,
pub switch_ports: Vec<SwitchPort>,
}

View File

@@ -14,7 +14,7 @@ use k8s_openapi::{
},
apimachinery::pkg::util::intstr::IntOrString,
};
use kube::Resource;
use kube::{api::DynamicObject, Resource};
use log::debug;
use serde::de::DeserializeOwned;
use serde_json::json;

View File

@@ -11,7 +11,7 @@ pub struct InventoryRepositoryFactory;
impl InventoryRepositoryFactory {
pub async fn build() -> Result<Box<dyn InventoryRepository>, RepoError> {
Ok(Box::new(
SqliteInventoryRepository::new(&(*DATABASE_URL)).await?,
SqliteInventoryRepository::new(&DATABASE_URL).await?,
))
}
}

View File

@@ -38,13 +38,15 @@ impl<
+ 'static
+ Send
+ Clone,
T: Topology,
T: Topology + K8sclient,
> Score<T> for K8sResourceScore<K>
where
<K as kube::Resource>::DynamicType: Default,
{
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
todo!()
Box::new(K8sResourceInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {

View File

@@ -32,6 +32,19 @@ use harmony_types::net::Url;
#[derive(Debug, Clone, Serialize)]
pub struct DiscordWebhook {
// FIXME use a stricter type as this is used as a k8s resource name. It could also be converted
// to remove whitespace and other invalid characters, but this is a potential bug that is not
// very easy to figure out for beginners.
//
// It gives out error messages like this :
//
// [2025-10-30 15:10:49 ERROR harmony::domain::topology::k8s] Failed to get dynamic resource 'Webhook example-secret': Failed to build request: failed to build request: invalid uri character
// [2025-10-30 15:10:49 ERROR harmony_cli::cli_logger] ⚠️ InterpretError : Failed to build request: failed to build request: invalid uri character
// [2025-10-30 15:10:49 DEBUG harmony::domain::maestro] Got result Err(InterpretError { msg: "InterpretError : Failed to build request: failed to build request: invalid uri character" })
// [2025-10-30 15:10:49 INFO harmony_cli::cli_logger] 🎼 Harmony completed
//
// thread 'main' panicked at examples/okd_cluster_alerts/src/main.rs:25:6:
// called `Result::unwrap()` on an `Err` value: InterpretError { msg: "InterpretError : Failed to build request: failed to build request: invalid uri character" }
pub name: String,
pub url: Url,
}
@@ -84,7 +97,7 @@ impl AlertReceiver<OpenshiftClusterAlertSender> for DiscordWebhook {
}
fn name(&self) -> String {
todo!()
self.name.clone()
}
fn clone_box(&self) -> Box<dyn AlertReceiver<OpenshiftClusterAlertSender>> {

View File

@@ -100,11 +100,7 @@ impl<T: Topology + HelmCommand + K8sclient + MultiTargetTopology> Interpret<T> f
info!("deploying ntfy...");
client
.wait_until_deployment_ready(
"ntfy".to_string(),
Some(self.score.namespace.as_str()),
None,
)
.wait_until_deployment_ready("ntfy", Some(self.score.namespace.as_str()), None)
.await?;
info!("ntfy deployed");

View File

@@ -1,5 +1,4 @@
use base64::prelude::*;
use std::sync::Arc;
use async_trait::async_trait;
use harmony_types::id::Id;
@@ -11,21 +10,9 @@ use crate::{
data::Version,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
modules::{
application::Application,
monitoring::{
grafana::grafana::Grafana,
kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus,
okd::OpenshiftClusterAlertSender,
},
prometheus::prometheus::PrometheusMonitoring,
},
modules::monitoring::okd::OpenshiftClusterAlertSender,
score::Score,
topology::{
K8sclient, Topology,
k8s::K8sClient,
oberservability::monitoring::{AlertReceiver, AlertingInterpret, ScrapeTarget},
},
topology::{K8sclient, Topology, oberservability::monitoring::AlertReceiver},
};
impl Clone for Box<dyn AlertReceiver<OpenshiftClusterAlertSender>> {
@@ -74,31 +61,33 @@ impl<T: Topology + K8sclient> Interpret<T> for OpenshiftClusterAlertInterpret {
topology: &T,
) -> Result<Outcome, InterpretError> {
let client = topology.k8s_client().await?;
let openshift_monitoring_namespace = "openshift-monitoring";
let secret: DynamicObject = client
.get_secret_json_value("alertmanager-main", Some("openshift-monitoring"))
let mut alertmanager_main_secret: DynamicObject = client
.get_secret_json_value("alertmanager-main", Some(openshift_monitoring_namespace))
.await?;
trace!("Got secret {secret:?}");
trace!("Got secret {alertmanager_main_secret:#?}");
let data: serde_json::Value = secret.data;
let data: &mut serde_json::Value = &mut alertmanager_main_secret.data;
trace!("Alertmanager-main secret data {data:#?}");
let data_obj = data
.get_mut("data")
.ok_or(InterpretError::new(
"Missing 'data' field in alertmanager-main secret.".to_string(),
))?
.as_object_mut()
.ok_or(InterpretError::new(
"'data' field in alertmanager-main secret is expected to be an object ."
.to_string(),
))?;
// TODO fix this unwrap, handle the option gracefully
let config_b64 = match data.get("data") {
Some(data_value) => match data_value.get("alertmanager.yaml") {
Some(value) => value.as_str().unwrap_or(""),
None => {
return Err(InterpretError::new(
"Missing 'alertmanager.yaml' in alertmanager-main secret".to_string(),
));
}
},
None => {
return Err(InterpretError::new(
"Missing 'data' field in alertmanager-main secret.".to_string(),
));
}
};
let config_b64 = data_obj
.get("alertmanager.yaml")
.ok_or(InterpretError::new(
"Missing 'alertmanager.yaml' in alertmanager-main secret data".to_string(),
))?
.as_str()
.unwrap_or("");
trace!("Config base64 {config_b64}");
let config_bytes = BASE64_STANDARD.decode(config_b64).unwrap_or_default();
@@ -109,34 +98,28 @@ impl<T: Topology + K8sclient> Interpret<T> for OpenshiftClusterAlertInterpret {
debug!("Current alertmanager config {am_config:#?}");
let existing_receivers = if let Some(receivers) = am_config.get_mut("receivers") {
match receivers.as_mapping_mut() {
Some(recv) => recv,
let existing_receivers_sequence = if let Some(receivers) = am_config.get_mut("receivers") {
match receivers.as_sequence_mut() {
Some(seq) => seq,
None => {
return Err(InterpretError::new(format!(
"Expected alertmanager config receivers to be a mapping, got {receivers:?}"
"Expected alertmanager config receivers to be a sequence, got {:?}",
receivers
)));
}
}
} else {
&mut serde_yaml::mapping::Mapping::default()
&mut serde_yaml::Sequence::default()
};
trace!("Existing receivers : {existing_receivers:#?}");
let mut additional_resources = vec![];
for custom_receiver in &self.receivers {
let name = &custom_receiver.name();
if let Some(recv) = existing_receivers.get(name) {
info!(
"AlertManager receiver {name} already exists and will be overwritten : {recv:#?}"
);
}
debug!(
"Custom receiver YAML output: {:?}",
custom_receiver.as_alertmanager_receiver()
);
let name = custom_receiver.name();
let alertmanager_receiver = custom_receiver.as_alertmanager_receiver()?;
let json_value = alertmanager_receiver.receiver_config;
let json_value = custom_receiver.as_alertmanager_receiver()?.receiver_config;
let yaml_string = serde_json::to_string(&json_value).map_err(|e| {
InterpretError::new(format!("Failed to serialize receiver config: {}", e))
})?;
@@ -146,12 +129,71 @@ impl<T: Topology + K8sclient> Interpret<T> for OpenshiftClusterAlertInterpret {
InterpretError::new(format!("Failed to parse receiver config as YAML: {}", e))
})?;
existing_receivers.insert(serde_yaml::Value::from(name.as_str()), yaml_value);
if let Some(idx) = existing_receivers_sequence.iter().position(|r| {
r.get("name")
.and_then(|n| n.as_str())
.map_or(false, |n| n == name)
}) {
info!("Replacing existing AlertManager receiver: {}", name);
existing_receivers_sequence[idx] = yaml_value;
} else {
debug!("Adding new AlertManager receiver: {}", name);
existing_receivers_sequence.push(yaml_value);
}
additional_resources.push(alertmanager_receiver.additional_ressources);
}
debug!("Current alertmanager config {am_config:#?}");
// TODO
// - save new version of alertmanager config
// - write additional ressources to the cluster
let am_config = serde_yaml::to_string(&am_config).map_err(|e| {
InterpretError::new(format!(
"Failed to serialize new alertmanager config to string : {e}"
))
})?;
Ok(Outcome::success(todo!("whats up")))
let mut am_config_b64 = String::new();
BASE64_STANDARD.encode_string(am_config, &mut am_config_b64);
// TODO put update configmap value and save new value
data_obj.insert(
"alertmanager.yaml".to_string(),
serde_json::Value::String(am_config_b64),
);
// https://kubernetes.io/docs/reference/using-api/server-side-apply/#field-management
alertmanager_main_secret.metadata.managed_fields = None;
trace!("Applying new alertmanager_main_secret {alertmanager_main_secret:#?}");
client
.apply_dynamic(
&alertmanager_main_secret,
Some(openshift_monitoring_namespace),
true,
)
.await?;
let additional_resources = additional_resources.concat();
trace!("Applying additional ressources for alert receivers {additional_resources:#?}");
client
.apply_dynamic_many(
&additional_resources,
Some(openshift_monitoring_namespace),
true,
)
.await?;
Ok(Outcome::success(format!(
"Successfully configured {} cluster alert receivers: {}",
self.receivers.len(),
self.receivers
.iter()
.map(|r| r.name())
.collect::<Vec<_>>()
.join(", ")
)))
}
fn get_name(&self) -> InterpretName {

View File

@@ -5,10 +5,8 @@ use crate::{
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::{
dhcp::DhcpHostBindingScore,
http::IPxeMacBootFileScore,
inventory::DiscoverHostForRoleScore,
okd::{host_network::HostNetworkConfigurationScore, templates::BootstrapIpxeTpl},
dhcp::DhcpHostBindingScore, http::IPxeMacBootFileScore,
inventory::DiscoverHostForRoleScore, okd::templates::BootstrapIpxeTpl,
},
score::Score,
topology::{HAClusterTopology, HostBinding},
@@ -205,28 +203,6 @@ impl OKDSetup03ControlPlaneInterpret {
Ok(())
}
/// Placeholder for automating network bonding configuration.
async fn persist_network_bond(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
hosts: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!("[ControlPlane] Ensuring persistent bonding");
let score = HostNetworkConfigurationScore {
hosts: hosts.clone(),
};
score.interpret(inventory, topology).await?;
inquire::Confirm::new(
"Network configuration for control plane nodes is not automated yet. Configure it manually if needed.",
)
.prompt()
.map_err(|e| InterpretError::new(format!("User prompt failed: {e}")))?;
Ok(())
}
}
#[async_trait]
@@ -265,10 +241,6 @@ impl Interpret<HAClusterTopology> for OKDSetup03ControlPlaneInterpret {
// 4. Reboot the nodes to start the OS installation.
self.reboot_targets(&nodes).await?;
// 5. Placeholder for post-boot network configuration (e.g., bonding).
self.persist_network_bond(inventory, topology, &nodes)
.await?;
// TODO: Implement a step to wait for the control plane nodes to join the cluster
// and for the cluster operators to become available. This would be similar to
// the `wait-for bootstrap-complete` command.

View File

@@ -0,0 +1,130 @@
use crate::{
data::Version,
hardware::PhysicalHost,
infra::inventory::InventoryRepositoryFactory,
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::{HostRole, Inventory},
modules::okd::host_network::HostNetworkConfigurationScore,
score::Score,
topology::HAClusterTopology,
};
use async_trait::async_trait;
use derive_new::new;
use harmony_types::id::Id;
use log::info;
use serde::Serialize;
// -------------------------------------------------------------------------------------------------
// Persist Network Bond
// - Persist bonding via NMState
// - Persist port channels on the Switch
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, new)]
pub struct OKDSetupPersistNetworkBondScore {}
impl Score<HAClusterTopology> for OKDSetupPersistNetworkBondScore {
fn create_interpret(&self) -> Box<dyn Interpret<HAClusterTopology>> {
Box::new(OKDSetupPersistNetworkBondInterpet::new())
}
fn name(&self) -> String {
"OKDSetupPersistNetworkBondScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct OKDSetupPersistNetworkBondInterpet {
version: Version,
status: InterpretStatus,
}
impl OKDSetupPersistNetworkBondInterpet {
pub fn new() -> Self {
let version = Version::from("1.0.0").unwrap();
Self {
version,
status: InterpretStatus::QUEUED,
}
}
/// Ensures that three physical hosts are discovered and available for the ControlPlane role.
/// It will trigger discovery if not enough hosts are found.
async fn get_nodes(
&self,
_inventory: &Inventory,
_topology: &HAClusterTopology,
) -> Result<Vec<PhysicalHost>, InterpretError> {
const REQUIRED_HOSTS: usize = 3;
let repo = InventoryRepositoryFactory::build().await?;
let control_plane_hosts = repo.get_host_for_role(&HostRole::ControlPlane).await?;
if control_plane_hosts.len() < REQUIRED_HOSTS {
Err(InterpretError::new(format!(
"OKD Requires at least {} control plane hosts, but only found {}. Cannot proceed.",
REQUIRED_HOSTS,
control_plane_hosts.len()
)))
} else {
// Take exactly the number of required hosts to ensure consistency.
Ok(control_plane_hosts
.into_iter()
.take(REQUIRED_HOSTS)
.collect())
}
}
async fn persist_network_bond(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
hosts: &Vec<PhysicalHost>,
) -> Result<(), InterpretError> {
info!("Ensuring persistent bonding");
let score = HostNetworkConfigurationScore {
hosts: hosts.clone(),
};
score.interpret(inventory, topology).await?;
Ok(())
}
}
#[async_trait]
impl Interpret<HAClusterTopology> for OKDSetupPersistNetworkBondInterpet {
fn get_name(&self) -> InterpretName {
InterpretName::Custom("OKDSetupPersistNetworkBondInterpet")
}
fn get_version(&self) -> Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
async fn execute(
&self,
inventory: &Inventory,
topology: &HAClusterTopology,
) -> Result<Outcome, InterpretError> {
let nodes = self.get_nodes(inventory, topology).await?;
let res = self.persist_network_bond(inventory, topology, &nodes).await;
match res {
Ok(_) => Ok(Outcome::success(
"Network bond successfully persisted".into(),
)),
Err(_) => Err(InterpretError::new(
"Failed to persist network bond".to_string(),
)),
}
}
}

View File

@@ -1,41 +1 @@
use kube::CustomResource;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
pub mod nmstate;
#[derive(CustomResource, Deserialize, Serialize, Clone, Debug, JsonSchema)]
#[kube(
group = "operators.coreos.com",
version = "v1",
kind = "OperatorGroup",
namespaced
)]
#[serde(rename_all = "camelCase")]
pub struct OperatorGroupSpec {
pub target_namespaces: Vec<String>,
}
#[derive(CustomResource, Deserialize, Serialize, Clone, Debug, JsonSchema)]
#[kube(
group = "operators.coreos.com",
version = "v1alpha1",
kind = "Subscription",
namespaced
)]
#[serde(rename_all = "camelCase")]
pub struct SubscriptionSpec {
pub name: String,
pub source: String,
pub source_namespace: String,
pub channel: Option<String>,
pub install_plan_approval: Option<InstallPlanApproval>,
}
#[derive(Deserialize, Serialize, Clone, Debug, JsonSchema)]
pub enum InstallPlanApproval {
#[serde(rename = "Automatic")]
Automatic,
#[serde(rename = "Manual")]
Manual,
}

View File

@@ -6,9 +6,16 @@ use serde::{Deserialize, Serialize};
use serde_json::Value;
#[derive(CustomResource, Deserialize, Serialize, Clone, Debug, JsonSchema)]
#[kube(group = "nmstate.io", version = "v1", kind = "NMState", namespaced)]
#[kube(
group = "nmstate.io",
version = "v1",
kind = "NMState",
plural = "nmstates",
namespaced = false
)]
#[serde(rename_all = "camelCase")]
pub struct NMStateSpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub probe_configuration: Option<ProbeConfig>,
}
@@ -44,6 +51,7 @@ pub struct ProbeDns {
)]
#[serde(rename_all = "camelCase")]
pub struct NodeNetworkConfigurationPolicySpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub node_selector: Option<BTreeMap<String, String>>,
pub desired_state: DesiredStateSpec,
}
@@ -58,37 +66,64 @@ pub struct DesiredStateSpec {
#[serde(rename_all = "kebab-case")]
pub struct InterfaceSpec {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
pub r#type: String,
pub state: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub mac_address: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub copy_mac_from: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mtu: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub controller: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ipv4: Option<IpStackSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ipv6: Option<IpStackSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ethernet: Option<EthernetSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub link_aggregation: Option<BondSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub vlan: Option<VlanSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub vxlan: Option<VxlanSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mac_vtap: Option<MacVtapSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mac_vlan: Option<MacVlanSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub infiniband: Option<InfinibandSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub linux_bridge: Option<LinuxBridgeSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ovs_bridge: Option<OvsBridgeSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ethtool: Option<EthtoolSpec>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct IpStackSpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub enabled: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dhcp: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub autoconf: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub address: Option<Vec<IpAddressSpec>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub auto_dns: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub auto_gateway: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub auto_routes: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dhcp_client_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dhcp_duid: Option<String>,
}
@@ -102,8 +137,11 @@ pub struct IpAddressSpec {
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct EthernetSpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub speed: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub duplex: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub auto_negotiation: Option<bool>,
}
@@ -112,6 +150,7 @@ pub struct EthernetSpec {
pub struct BondSpec {
pub mode: String,
pub ports: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub options: Option<BTreeMap<String, Value>>,
}
@@ -120,6 +159,7 @@ pub struct BondSpec {
pub struct VlanSpec {
pub base_iface: String,
pub id: u16,
#[serde(skip_serializing_if = "Option::is_none")]
pub protocol: Option<String>,
}
@@ -129,8 +169,11 @@ pub struct VxlanSpec {
pub base_iface: String,
pub id: u32,
pub remote: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub local: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub learning: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub destination_port: Option<u16>,
}
@@ -139,6 +182,7 @@ pub struct VxlanSpec {
pub struct MacVtapSpec {
pub base_iface: String,
pub mode: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub promiscuous: Option<bool>,
}
@@ -147,6 +191,7 @@ pub struct MacVtapSpec {
pub struct MacVlanSpec {
pub base_iface: String,
pub mode: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub promiscuous: Option<bool>,
}
@@ -161,25 +206,35 @@ pub struct InfinibandSpec {
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct LinuxBridgeSpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub options: Option<LinuxBridgeOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ports: Option<Vec<LinuxBridgePort>>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct LinuxBridgeOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub mac_ageing_time: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub multicast_snooping: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub stp: Option<StpOptions>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct StpOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub enabled: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub forward_delay: Option<u16>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hello_time: Option<u16>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_age: Option<u16>,
#[serde(skip_serializing_if = "Option::is_none")]
pub priority: Option<u16>,
}
@@ -187,15 +242,20 @@ pub struct StpOptions {
#[serde(rename_all = "kebab-case")]
pub struct LinuxBridgePort {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub vlan: Option<LinuxBridgePortVlan>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct LinuxBridgePortVlan {
#[serde(skip_serializing_if = "Option::is_none")]
pub mode: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub trunk_tags: Option<Vec<VlanTag>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tag: Option<u16>,
#[serde(skip_serializing_if = "Option::is_none")]
pub enable_native: Option<bool>,
}
@@ -203,6 +263,7 @@ pub struct LinuxBridgePortVlan {
#[serde(rename_all = "kebab-case")]
pub struct VlanTag {
pub id: u16,
#[serde(skip_serializing_if = "Option::is_none")]
pub id_range: Option<VlanIdRange>,
}
@@ -216,15 +277,20 @@ pub struct VlanIdRange {
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct OvsBridgeSpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub options: Option<OvsBridgeOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ports: Option<Vec<OvsPortSpec>>,
}
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct OvsBridgeOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub stp: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub rstp: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mcast_snooping_enable: Option<bool>,
}
@@ -232,8 +298,11 @@ pub struct OvsBridgeOptions {
#[serde(rename_all = "kebab-case")]
pub struct OvsPortSpec {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub link_aggregation: Option<BondSpec>,
#[serde(skip_serializing_if = "Option::is_none")]
pub vlan: Option<LinuxBridgePortVlan>,
#[serde(skip_serializing_if = "Option::is_none")]
pub r#type: Option<String>,
}
@@ -246,6 +315,8 @@ pub struct EthtoolSpec {
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "kebab-case")]
pub struct EthtoolFecSpec {
#[serde(skip_serializing_if = "Option::is_none")]
pub auto: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mode: Option<String>,
}

View File

@@ -39,30 +39,70 @@ impl HostNetworkConfigurationInterpret {
&self,
topology: &T,
host: &PhysicalHost,
) -> Result<(), InterpretError> {
let switch_ports = self.collect_switch_ports_for_host(topology, host).await?;
if !switch_ports.is_empty() {
topology
.configure_host_network(host, HostNetworkConfig { switch_ports })
.await
.map_err(|e| InterpretError::new(format!("Failed to configure host: {e}")))?;
current_host: &usize,
total_hosts: &usize,
) -> Result<HostNetworkConfig, InterpretError> {
if host.network.is_empty() {
info!("[Host {current_host}/{total_hosts}] No interfaces to configure, skipping");
return Ok(HostNetworkConfig {
host_id: host.id.clone(),
switch_ports: vec![],
});
}
Ok(())
let switch_ports = self
.collect_switch_ports_for_host(topology, host, current_host, total_hosts)
.await?;
let config = HostNetworkConfig {
host_id: host.id.clone(),
switch_ports,
};
if !config.switch_ports.is_empty() {
info!(
"[Host {current_host}/{total_hosts}] Found {} ports for {} interfaces",
config.switch_ports.len(),
host.network.len()
);
info!("[Host {current_host}/{total_hosts}] Configuring host network...");
topology
.configure_host_network(&config)
.await
.map_err(|e| InterpretError::new(format!("Failed to configure host: {e}")))?;
} else {
info!(
"[Host {current_host}/{total_hosts}] No ports found for {} interfaces, skipping",
host.network.len()
);
}
Ok(config)
}
async fn collect_switch_ports_for_host<T: Topology + Switch>(
&self,
topology: &T,
host: &PhysicalHost,
current_host: &usize,
total_hosts: &usize,
) -> Result<Vec<SwitchPort>, InterpretError> {
let mut switch_ports = vec![];
if host.network.is_empty() {
return Ok(switch_ports);
}
info!("[Host {current_host}/{total_hosts}] Collecting ports on switch...");
for network_interface in &host.network {
let mac_address = network_interface.mac_address;
match topology.get_port_for_mac_address(&mac_address).await {
Ok(Some(port)) => {
info!(
"[Host {current_host}/{total_hosts}] Found port '{port}' for '{mac_address}'"
);
switch_ports.push(SwitchPort {
interface: NetworkInterface {
name: network_interface.name.clone(),
@@ -73,7 +113,7 @@ impl HostNetworkConfigurationInterpret {
port,
});
}
Ok(None) => debug!("No port found for host '{}', skipping", host.id),
Ok(None) => debug!("No port found for '{mac_address}', skipping"),
Err(e) => {
return Err(InterpretError::new(format!(
"Failed to get port for host '{}': {}",
@@ -85,6 +125,47 @@ impl HostNetworkConfigurationInterpret {
Ok(switch_ports)
}
fn format_host_configuration(&self, configs: Vec<HostNetworkConfig>) -> Vec<String> {
let mut report = vec![
"Network Configuration Report".to_string(),
"------------------------------------------------------------------".to_string(),
];
for config in configs {
let host = self
.score
.hosts
.iter()
.find(|h| h.id == config.host_id)
.unwrap();
println!("[Host] {host}");
if config.switch_ports.is_empty() {
report.push(format!(
"⏭️ Host {}: SKIPPED (No matching switch ports found)",
config.host_id
));
} else {
let mappings: Vec<String> = config
.switch_ports
.iter()
.map(|p| format!("[{} -> {}]", p.interface.name, p.port))
.collect();
report.push(format!(
"✅ Host {}: Bonded {} port(s) {}",
config.host_id,
config.switch_ports.len(),
mappings.join(", ")
));
}
}
report
.push("------------------------------------------------------------------".to_string());
report
}
}
#[async_trait]
@@ -114,27 +195,38 @@ impl<T: Topology + Switch> Interpret<T> for HostNetworkConfigurationInterpret {
return Ok(Outcome::noop("No hosts to configure".into()));
}
info!(
"Started network configuration for {} host(s)...",
self.score.hosts.len()
);
let host_count = self.score.hosts.len();
info!("Started network configuration for {host_count} host(s)...",);
info!("Setting up switch with sane defaults...");
topology
.setup_switch()
.await
.map_err(|e| InterpretError::new(format!("Switch setup failed: {e}")))?;
info!("Switch ready");
let mut current_host = 1;
let mut host_configurations = vec![];
let mut configured_host_count = 0;
for host in &self.score.hosts {
self.configure_network_for_host(topology, host).await?;
configured_host_count += 1;
}
let host_configuration = self
.configure_network_for_host(topology, host, &current_host, &host_count)
.await?;
if configured_host_count > 0 {
Ok(Outcome::success(format!(
"Configured {configured_host_count}/{} host(s)",
self.score.hosts.len()
)))
host_configurations.push(host_configuration);
current_host += 1;
}
if current_host > 1 {
let details = self.format_host_configuration(host_configurations);
Ok(Outcome::success_with_details(
format!(
"Configured {}/{} host(s)",
current_host - 1,
self.score.hosts.len()
),
details,
))
} else {
Ok(Outcome::noop("No hosts configured".into()))
}
@@ -209,6 +301,7 @@ mod tests {
assert_that!(*configured_host_networks).contains_exactly(vec![(
HOST_ID.clone(),
HostNetworkConfig {
host_id: HOST_ID.clone(),
switch_ports: vec![SwitchPort {
interface: EXISTING_INTERFACE.clone(),
port: PORT.clone(),
@@ -234,6 +327,7 @@ mod tests {
assert_that!(*configured_host_networks).contains_exactly(vec![(
HOST_ID.clone(),
HostNetworkConfig {
host_id: HOST_ID.clone(),
switch_ports: vec![
SwitchPort {
interface: EXISTING_INTERFACE.clone(),
@@ -263,6 +357,7 @@ mod tests {
(
HOST_ID.clone(),
HostNetworkConfig {
host_id: HOST_ID.clone(),
switch_ports: vec![SwitchPort {
interface: EXISTING_INTERFACE.clone(),
port: PORT.clone(),
@@ -272,6 +367,7 @@ mod tests {
(
ANOTHER_HOST_ID.clone(),
HostNetworkConfig {
host_id: ANOTHER_HOST_ID.clone(),
switch_ports: vec![SwitchPort {
interface: ANOTHER_EXISTING_INTERFACE.clone(),
port: ANOTHER_PORT.clone(),
@@ -382,11 +478,10 @@ mod tests {
async fn configure_host_network(
&self,
host: &PhysicalHost,
config: HostNetworkConfig,
config: &HostNetworkConfig,
) -> Result<(), SwitchError> {
let mut configured_host_networks = self.configured_host_networks.lock().unwrap();
configured_host_networks.push((host.id.clone(), config.clone()));
configured_host_networks.push((config.host_id.clone(), config.clone()));
Ok(())
}

View File

@@ -50,7 +50,7 @@
use crate::{
modules::okd::{
OKDSetup01InventoryScore, OKDSetup02BootstrapScore, OKDSetup03ControlPlaneScore,
OKDSetup04WorkersScore, OKDSetup05SanityCheckScore,
OKDSetup04WorkersScore, OKDSetup05SanityCheckScore, OKDSetupPersistNetworkBondScore,
bootstrap_06_installation_report::OKDSetup06InstallationReportScore,
},
score::Score,
@@ -65,6 +65,7 @@ impl OKDInstallationPipeline {
Box::new(OKDSetup01InventoryScore::new()),
Box::new(OKDSetup02BootstrapScore::new()),
Box::new(OKDSetup03ControlPlaneScore::new()),
Box::new(OKDSetupPersistNetworkBondScore::new()),
Box::new(OKDSetup04WorkersScore::new()),
Box::new(OKDSetup05SanityCheckScore::new()),
Box::new(OKDSetup06InstallationReportScore::new()),

View File

@@ -6,6 +6,7 @@ mod bootstrap_05_sanity_check;
mod bootstrap_06_installation_report;
pub mod bootstrap_dhcp;
pub mod bootstrap_load_balancer;
mod bootstrap_persist_network_bond;
pub mod dhcp;
pub mod dns;
pub mod installation;
@@ -19,5 +20,6 @@ pub use bootstrap_03_control_plane::*;
pub use bootstrap_04_workers::*;
pub use bootstrap_05_sanity_check::*;
pub use bootstrap_06_installation_report::*;
pub use bootstrap_persist_network_bond::*;
pub mod crd;
pub mod host_network;

View File

@@ -40,7 +40,7 @@ pub fn init() {
HarmonyEvent::HarmonyFinished => {
if !details.is_empty() {
println!(
"\n{} All done! Here's what's next for you:",
"\n{} All done! Here's a few info for you:",
theme::EMOJI_SUMMARY
);
for detail in details.iter() {

View File

@@ -9,7 +9,7 @@ pub struct Interface {
pub physical_interface_name: String,
pub descr: Option<MaybeString>,
pub mtu: Option<MaybeString>,
pub enable: MaybeString,
pub enable: Option<MaybeString>,
pub lock: Option<MaybeString>,
#[yaserde(rename = "spoofmac")]
pub spoof_mac: Option<MaybeString>,
@@ -134,19 +134,15 @@ mod test {
<interfaces>
<paul>
<if></if>
<enable/>
</paul>
<anotherpaul>
<if></if>
<enable/>
</anotherpaul>
<thirdone>
<if></if>
<enable/>
</thirdone>
<andgofor4>
<if></if>
<enable/>
</andgofor4>
</interfaces>
<bar>foo</bar>