From 981529751a249672c5fb65e0766ef1928fdd8267 Mon Sep 17 00:00:00 2001 From: Ian Letourneau Date: Tue, 28 Oct 2025 18:32:25 -0400 Subject: [PATCH] improve more logs, better error messages when switch setup failed --- brocade/src/lib.rs | 2 ++ brocade/src/network_operating_system.rs | 28 +++++++++++++++- brocade/src/shell.rs | 8 +++-- harmony/src/domain/topology/ha_cluster.rs | 6 ++-- .../okd/bootstrap_persist_network_bond.rs | 14 +++++--- harmony/src/modules/okd/host_network.rs | 32 ++++++++++++++++--- harmony_cli/src/cli_reporter.rs | 2 +- 7 files changed, 76 insertions(+), 16 deletions(-) diff --git a/brocade/src/lib.rs b/brocade/src/lib.rs index 57b464a..c0b5b70 100644 --- a/brocade/src/lib.rs +++ b/brocade/src/lib.rs @@ -31,6 +31,7 @@ pub struct BrocadeOptions { pub struct TimeoutConfig { pub shell_ready: Duration, pub command_execution: Duration, + pub command_output: Duration, pub cleanup: Duration, pub message_wait: Duration, } @@ -40,6 +41,7 @@ impl Default for TimeoutConfig { Self { shell_ready: Duration::from_secs(10), command_execution: Duration::from_secs(60), // Commands like `deploy` (for a LAG) can take a while + command_output: Duration::from_secs(5), // Delay to start logging "waiting for command output" cleanup: Duration::from_secs(10), message_wait: Duration::from_millis(500), } diff --git a/brocade/src/network_operating_system.rs b/brocade/src/network_operating_system.rs index a6f824a..f4db713 100644 --- a/brocade/src/network_operating_system.rs +++ b/brocade/src/network_operating_system.rs @@ -3,6 +3,7 @@ use std::str::FromStr; use async_trait::async_trait; use harmony_types::switch::{PortDeclaration, PortLocation}; use log::{debug, info}; +use regex::Regex; use crate::{ BrocadeClient, BrocadeInfo, Error, ExecutionMode, InterSwitchLink, InterfaceInfo, @@ -110,6 +111,30 @@ impl NetworkOperatingSystemClient { status, })) } + + fn map_configure_interfaces_error(&self, err: Error) -> Error { + debug!("[Brocade] {err}"); + + if let Error::CommandError(message) = &err { + if message.contains("switchport") + && message.contains("Cannot configure aggregator member") + { + let re = Regex::new(r"\(conf-if-([a-zA-Z]+)-([\d/]+)\)#").unwrap(); + + if let Some(caps) = re.captures(message) { + let interface_type = &caps[1]; + let port_location = &caps[2]; + let interface = format!("{interface_type} {port_location}"); + + return Error::CommandError(format!( + "Cannot configure interface '{interface}', it is a member of a port-channel (LAG)" + )); + } + } + } + + err + } } #[async_trait] @@ -199,7 +224,8 @@ impl BrocadeClient for NetworkOperatingSystemClient { self.shell .run_commands(commands, ExecutionMode::Regular) - .await?; + .await + .map_err(|err| self.map_configure_interfaces_error(err))?; info!("[Brocade] Interfaces configured."); diff --git a/brocade/src/shell.rs b/brocade/src/shell.rs index 28eceb8..9cd94a9 100644 --- a/brocade/src/shell.rs +++ b/brocade/src/shell.rs @@ -211,7 +211,7 @@ impl BrocadeSession { let mut output = Vec::new(); let start = Instant::now(); let read_timeout = Duration::from_millis(500); - let log_interval = Duration::from_secs(3); + let log_interval = Duration::from_secs(5); let mut last_log = Instant::now(); loop { @@ -221,7 +221,9 @@ impl BrocadeSession { )); } - if start.elapsed() > Duration::from_secs(5) && last_log.elapsed() > log_interval { + if start.elapsed() > self.options.timeouts.command_output + && last_log.elapsed() > log_interval + { info!("[Brocade] Waiting for command output..."); last_log = Instant::now(); } @@ -276,7 +278,7 @@ impl BrocadeSession { let output_lower = output.to_lowercase(); if ERROR_PATTERNS.iter().any(|&p| output_lower.contains(p)) { return Err(Error::CommandError(format!( - "Command '{command}' failed: {}", + "Command error: {}", output.trim() ))); } diff --git a/harmony/src/domain/topology/ha_cluster.rs b/harmony/src/domain/topology/ha_cluster.rs index ee5f274..e16b6d2 100644 --- a/harmony/src/domain/topology/ha_cluster.rs +++ b/harmony/src/domain/topology/ha_cluster.rs @@ -167,7 +167,7 @@ impl HAClusterTopology { let bond_config = self.create_bond_configuration(config); debug!( - "Configuring bond for host {}: {bond_config:#?}", + "Applying NMState bond config for host {}: {bond_config:#?}", config.host_id ); self.k8s_client() @@ -185,9 +185,11 @@ impl HAClusterTopology { config: &HostNetworkConfig, ) -> NodeNetworkConfigurationPolicy { let host_name = &config.host_id; - let bond_id = self.get_next_bond_id(); let bond_name = format!("bond{bond_id}"); + + info!("Configuring bond '{bond_name}' for host '{host_name}'..."); + let mut bond_mtu: Option = None; let mut bond_mac_address: Option = None; let mut bond_ports = Vec::new(); diff --git a/harmony/src/modules/okd/bootstrap_persist_network_bond.rs b/harmony/src/modules/okd/bootstrap_persist_network_bond.rs index 788f2c8..4aa47b6 100644 --- a/harmony/src/modules/okd/bootstrap_persist_network_bond.rs +++ b/harmony/src/modules/okd/bootstrap_persist_network_bond.rs @@ -116,11 +116,15 @@ impl Interpret for OKDSetupPersistNetworkBondInterpet { ) -> Result { let nodes = self.get_nodes(inventory, topology).await?; - self.persist_network_bond(inventory, topology, &nodes) - .await?; + let res = self.persist_network_bond(inventory, topology, &nodes).await; - Ok(Outcome::success( - "Network bond successfully persisted".into(), - )) + match res { + Ok(_) => Ok(Outcome::success( + "Network bond successfully persisted".into(), + )), + Err(_) => Err(InterpretError::new( + "Failed to persist network bond".to_string(), + )), + } } } diff --git a/harmony/src/modules/okd/host_network.rs b/harmony/src/modules/okd/host_network.rs index 81ce4b5..39fb027 100644 --- a/harmony/src/modules/okd/host_network.rs +++ b/harmony/src/modules/okd/host_network.rs @@ -42,8 +42,17 @@ impl HostNetworkConfigurationInterpret { current_host: &usize, total_hosts: &usize, ) -> Result { - info!("[Host {current_host}/{total_hosts}] Collecting ports on switch..."); - let switch_ports = self.collect_switch_ports_for_host(topology, host).await?; + if host.network.is_empty() { + info!("[Host {current_host}/{total_hosts}] No interfaces to configure, skipping"); + return Ok(HostNetworkConfig { + host_id: host.id.clone(), + switch_ports: vec![], + }); + } + + let switch_ports = self + .collect_switch_ports_for_host(topology, host, current_host, total_hosts) + .await?; let config = HostNetworkConfig { host_id: host.id.clone(), @@ -63,7 +72,10 @@ impl HostNetworkConfigurationInterpret { .await .map_err(|e| InterpretError::new(format!("Failed to configure host: {e}")))?; } else { - info!("[Host {current_host}/{total_hosts}] No ports found"); + info!( + "[Host {current_host}/{total_hosts}] No ports found for {} interfaces, skipping", + host.network.len() + ); } Ok(config) @@ -73,14 +85,24 @@ impl HostNetworkConfigurationInterpret { &self, topology: &T, host: &PhysicalHost, + current_host: &usize, + total_hosts: &usize, ) -> Result, InterpretError> { let mut switch_ports = vec![]; + if host.network.is_empty() { + return Ok(switch_ports); + } + + info!("[Host {current_host}/{total_hosts}] Collecting ports on switch..."); for network_interface in &host.network { let mac_address = network_interface.mac_address; match topology.get_port_for_mac_address(&mac_address).await { Ok(Some(port)) => { + info!( + "[Host {current_host}/{total_hosts}] Found port '{port}' for '{mac_address}'" + ); switch_ports.push(SwitchPort { interface: NetworkInterface { name: network_interface.name.clone(), @@ -91,7 +113,7 @@ impl HostNetworkConfigurationInterpret { port, }); } - Ok(None) => debug!("No port found for host '{}', skipping", host.id), + Ok(None) => debug!("No port found for '{mac_address}', skipping"), Err(e) => { return Err(InterpretError::new(format!( "Failed to get port for host '{}': {}", @@ -176,10 +198,12 @@ impl Interpret for HostNetworkConfigurationInterpret { let host_count = self.score.hosts.len(); info!("Started network configuration for {host_count} host(s)...",); + info!("Setting up switch with sane defaults..."); topology .setup_switch() .await .map_err(|e| InterpretError::new(format!("Switch setup failed: {e}")))?; + info!("Switch ready"); let mut current_host = 1; let mut host_configurations = vec![]; diff --git a/harmony_cli/src/cli_reporter.rs b/harmony_cli/src/cli_reporter.rs index f6095cc..6c9823b 100644 --- a/harmony_cli/src/cli_reporter.rs +++ b/harmony_cli/src/cli_reporter.rs @@ -40,7 +40,7 @@ pub fn init() { HarmonyEvent::HarmonyFinished => { if !details.is_empty() { println!( - "\n{} All done! Here's what's next for you:", + "\n{} All done! Here's a few info for you:", theme::EMOJI_SUMMARY ); for detail in details.iter() {