From 8126b233d8eed3fb6260c4d463585e9308de9422 Mon Sep 17 00:00:00 2001 From: Willem Date: Wed, 22 Oct 2025 11:27:28 -0400 Subject: [PATCH 1/9] feat: implementation for opnsense os-node_exporter --- harmony/src/domain/topology/mod.rs | 1 + harmony/src/infra/opnsense/mod.rs | 1 + harmony/src/infra/opnsense/node_exporter.rs | 44 ++++++++++++ harmony/src/modules/opnsense/mod.rs | 1 + harmony/src/modules/opnsense/node_exporter.rs | 70 +++++++++++++++++++ opnsense-config-xml/src/data/opnsense.rs | 20 +++++- opnsense-config/src/config/config.rs | 7 +- opnsense-config/src/modules/mod.rs | 1 + opnsense-config/src/modules/node_exporter.rs | 55 +++++++++++++++ 9 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 harmony/src/infra/opnsense/node_exporter.rs create mode 100644 harmony/src/modules/opnsense/node_exporter.rs create mode 100644 opnsense-config/src/modules/node_exporter.rs diff --git a/harmony/src/domain/topology/mod.rs b/harmony/src/domain/topology/mod.rs index 85e57d7..08c1c15 100644 --- a/harmony/src/domain/topology/mod.rs +++ b/harmony/src/domain/topology/mod.rs @@ -1,5 +1,6 @@ mod ha_cluster; pub mod ingress; +pub mod node_exporter; use harmony_types::net::IpAddress; mod host_binding; mod http; diff --git a/harmony/src/infra/opnsense/mod.rs b/harmony/src/infra/opnsense/mod.rs index 3878cfc..102d2b6 100644 --- a/harmony/src/infra/opnsense/mod.rs +++ b/harmony/src/infra/opnsense/mod.rs @@ -4,6 +4,7 @@ mod firewall; mod http; mod load_balancer; mod management; +pub mod node_exporter; mod tftp; use std::sync::Arc; diff --git a/harmony/src/infra/opnsense/node_exporter.rs b/harmony/src/infra/opnsense/node_exporter.rs new file mode 100644 index 0000000..2c27b26 --- /dev/null +++ b/harmony/src/infra/opnsense/node_exporter.rs @@ -0,0 +1,44 @@ +use async_trait::async_trait; +use log::debug; + +use crate::{ + executors::ExecutorError, infra::opnsense::OPNSenseFirewall, + topology::node_exporter::NodeExporter, +}; + +#[async_trait] +impl NodeExporter for OPNSenseFirewall { + async fn ensure_initialized(&self) -> Result<(), ExecutorError> { + let mut config = self.opnsense_config.write().await; + let node_exporter = config.node_exporter(); + if let Some(config) = node_exporter.get_full_config() { + debug!( + "Node exporter available in opnsense config, assuming it is already installed. {config:?}" + ); + } else { + config + .install_package("os-node_exporter") + .await + .map_err(|e| { + ExecutorError::UnexpectedError(format!("Executor failed when trying to install os-node_exporter package with error {e:?}" + )) + })?; + } + + config.node_exporter().enable(true); + Ok(()) + } + async fn commit_config(&self) -> Result<(), ExecutorError> { + OPNSenseFirewall::commit_config(self).await + } + + async fn reload_restart(&self) -> Result<(), ExecutorError> { + self.opnsense_config + .write() + .await + .node_exporter() + .reload_restart() + .await + .map_err(|e| ExecutorError::UnexpectedError(e.to_string())) + } +} diff --git a/harmony/src/modules/opnsense/mod.rs b/harmony/src/modules/opnsense/mod.rs index 28b52cf..8988205 100644 --- a/harmony/src/modules/opnsense/mod.rs +++ b/harmony/src/modules/opnsense/mod.rs @@ -1,3 +1,4 @@ +pub mod node_exporter; mod shell; mod upgrade; pub use shell::*; diff --git a/harmony/src/modules/opnsense/node_exporter.rs b/harmony/src/modules/opnsense/node_exporter.rs new file mode 100644 index 0000000..d17f67a --- /dev/null +++ b/harmony/src/modules/opnsense/node_exporter.rs @@ -0,0 +1,70 @@ +use async_trait::async_trait; +use harmony_types::id::Id; +use log::info; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + score::Score, + topology::{Topology, node_exporter::NodeExporter}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct NodeExporterScore {} + +impl Score for NodeExporterScore { + fn name(&self) -> String { + "NodeExporterScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(NodeExporterInterpret {}) + } +} + +#[derive(Debug)] +pub struct NodeExporterInterpret {} + +#[async_trait] +impl Interpret for NodeExporterInterpret { + async fn execute( + &self, + _inventory: &Inventory, + node_exporter: &T, + ) -> Result { + info!( + "Making sure node exporter is initiailized: {:?}", + node_exporter.ensure_initialized().await? + ); + + info!("Applying Node Exporter configuration"); + + node_exporter.commit_config().await?; + + info!("Reloading and restarting Node Exporter"); + + node_exporter.reload_restart().await?; + + Ok(Outcome::success(format!( + "NodeExporter successfully configured" + ))) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("NodeExporter") + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} diff --git a/opnsense-config-xml/src/data/opnsense.rs b/opnsense-config-xml/src/data/opnsense.rs index fa5f985..4b384d4 100644 --- a/opnsense-config-xml/src/data/opnsense.rs +++ b/opnsense-config-xml/src/data/opnsense.rs @@ -433,7 +433,7 @@ pub struct OPNsenseXmlSection { #[yaserde(rename = "Interfaces")] pub interfaces: Option, #[yaserde(rename = "NodeExporter")] - pub node_exporter: Option, + pub node_exporter: Option, #[yaserde(rename = "Kea")] pub kea: Option, pub monit: Option, @@ -1595,3 +1595,21 @@ pub struct Ifgroups { #[yaserde(attribute = true)] pub version: String, } + +#[derive(Default, PartialEq, Debug, YaSerialize, YaDeserialize)] +pub struct NodeExporter { + pub enabled: u8, + pub listenaddress: Option, + pub listenport: u16, + pub cpu: u8, + pub exec: u8, + pub filesystem: u8, + pub loadavg: u8, + pub meminfo: u8, + pub netdev: u8, + pub time: u8, + pub devstat: u8, + pub interrupts: u8, + pub ntp: u8, + pub zfs: u8, +} diff --git a/opnsense-config/src/config/config.rs b/opnsense-config/src/config/config.rs index c2d0f60..30240e4 100644 --- a/opnsense-config/src/config/config.rs +++ b/opnsense-config/src/config/config.rs @@ -5,7 +5,8 @@ use crate::{ error::Error, modules::{ caddy::CaddyConfig, dhcp_legacy::DhcpConfigLegacyISC, dns::UnboundDnsConfig, - dnsmasq::DhcpConfigDnsMasq, load_balancer::LoadBalancerConfig, tftp::TftpConfig, + dnsmasq::DhcpConfigDnsMasq, load_balancer::LoadBalancerConfig, + node_exporter::NodeExporterConfig, tftp::TftpConfig, }, }; use log::{debug, info, trace, warn}; @@ -71,6 +72,10 @@ impl Config { LoadBalancerConfig::new(&mut self.opnsense, self.shell.clone()) } + pub fn node_exporter(&mut self) -> NodeExporterConfig<'_> { + NodeExporterConfig::new(&mut self.opnsense, self.shell.clone()) + } + pub async fn upload_files(&self, source: &str, destination: &str) -> Result { self.shell.upload_folder(source, destination).await } diff --git a/opnsense-config/src/modules/mod.rs b/opnsense-config/src/modules/mod.rs index 3448075..eec16a2 100644 --- a/opnsense-config/src/modules/mod.rs +++ b/opnsense-config/src/modules/mod.rs @@ -4,4 +4,5 @@ pub mod dhcp_legacy; pub mod dns; pub mod dnsmasq; pub mod load_balancer; +pub mod node_exporter; pub mod tftp; diff --git a/opnsense-config/src/modules/node_exporter.rs b/opnsense-config/src/modules/node_exporter.rs new file mode 100644 index 0000000..9a44876 --- /dev/null +++ b/opnsense-config/src/modules/node_exporter.rs @@ -0,0 +1,55 @@ +use std::sync::Arc; + +use opnsense_config_xml::{NodeExporter, OPNsense}; + +use crate::{config::OPNsenseShell, Error}; + +pub struct NodeExporterConfig<'a> { + opnsense: &'a mut OPNsense, + opnsense_shell: Arc, +} + +impl<'a> NodeExporterConfig<'a> { + pub fn new(opnsense: &'a mut OPNsense, opnsense_shell: Arc) -> Self { + Self { + opnsense, + opnsense_shell, + } + } + + pub fn get_full_config(&self) -> &Option { + &self.opnsense.opnsense.node_exporter + } + fn with_node_exporter(&mut self, f: F) -> R + where + F: FnOnce(&mut NodeExporter) -> R, + { + match &mut self.opnsense.opnsense.node_exporter.as_mut() { + Some(node_exporter) => f(node_exporter), + None => unimplemented!( + " + node exporter is not yet installed" + ), + } + } + + pub fn enable(&mut self, enabled: bool) { + self.with_node_exporter(|node_exporter| node_exporter.enabled = enabled as u8) + } + + pub async fn reload_restart(&self) -> Result<(), Error> { + self.opnsense_shell + .exec("configctl node_exporter stop") + .await?; + self.opnsense_shell + .exec("configctl template reload OPNsense/NodeExporter") + .await?; + self.opnsense_shell + .exec("configctl node_exporter configtest") + .await?; + self.opnsense_shell + .exec("configctl node_exporter start") + .await?; + Ok(()) + } +} From 5af13800b7774311fd73efd9682371dcb4373aa1 Mon Sep 17 00:00:00 2001 From: Willem Date: Wed, 22 Oct 2025 11:51:22 -0400 Subject: [PATCH 2/9] fix: removed unimplemnted marco and returned Err instead some formatting error --- opnsense-config/src/modules/node_exporter.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/opnsense-config/src/modules/node_exporter.rs b/opnsense-config/src/modules/node_exporter.rs index 9a44876..fd7ee5c 100644 --- a/opnsense-config/src/modules/node_exporter.rs +++ b/opnsense-config/src/modules/node_exporter.rs @@ -20,21 +20,20 @@ impl<'a> NodeExporterConfig<'a> { pub fn get_full_config(&self) -> &Option { &self.opnsense.opnsense.node_exporter } - fn with_node_exporter(&mut self, f: F) -> R + + fn with_node_exporter(&mut self, f: F) -> Result where F: FnOnce(&mut NodeExporter) -> R, { match &mut self.opnsense.opnsense.node_exporter.as_mut() { - Some(node_exporter) => f(node_exporter), - None => unimplemented!( - " - node exporter is not yet installed" - ), + Some(node_exporter) => Ok(f(node_exporter)), + None => Err("node exporter is not yet installed"), } } - pub fn enable(&mut self, enabled: bool) { + pub fn enable(&mut self, enabled: bool) -> Result<(), &'static str> { self.with_node_exporter(|node_exporter| node_exporter.enabled = enabled as u8) + .map(|_| ()) } pub async fn reload_restart(&self) -> Result<(), Error> { From 5ab58f025330488e60f89d4a40976815059ce24a Mon Sep 17 00:00:00 2001 From: Willem Date: Wed, 22 Oct 2025 14:39:12 -0400 Subject: [PATCH 3/9] fix: added impl node exporter for hacluster topology and dummy infra --- examples/nanodc/src/main.rs | 4 +- examples/okd_installation/src/topology.rs | 1 + examples/okd_pxe/src/topology.rs | 1 + examples/opnsense/src/main.rs | 1 + examples/opnsense_node_exporter/Cargo.toml | 20 ++++ examples/opnsense_node_exporter/src/main.rs | 110 +++++++++++++++++++ harmony/src/domain/topology/ha_cluster.rs | 36 +++++- harmony/src/domain/topology/node_exporter.rs | 17 +++ 8 files changed, 187 insertions(+), 3 deletions(-) create mode 100644 examples/opnsense_node_exporter/Cargo.toml create mode 100644 examples/opnsense_node_exporter/src/main.rs create mode 100644 harmony/src/domain/topology/node_exporter.rs diff --git a/examples/nanodc/src/main.rs b/examples/nanodc/src/main.rs index 57574d2..95b16a6 100644 --- a/examples/nanodc/src/main.rs +++ b/examples/nanodc/src/main.rs @@ -39,8 +39,7 @@ async fn main() { let gateway_ipv4 = Ipv4Addr::new(192, 168, 33, 1); let gateway_ip = IpAddr::V4(gateway_ipv4); let topology = harmony::topology::HAClusterTopology { - domain_name: "ncd0.harmony.mcd".to_string(), // TODO this must be set manually correctly - // when setting up the opnsense firewall + domain_name: "ncd0.harmony.mcd".to_string(), router: Arc::new(UnmanagedRouter::new( gateway_ip, Ipv4Cidr::new(lan_subnet, 24).unwrap(), @@ -84,6 +83,7 @@ async fn main() { }, ], switch: vec![], + node_exporter: opnsense.clone(), }; let inventory = Inventory { diff --git a/examples/okd_installation/src/topology.rs b/examples/okd_installation/src/topology.rs index 31062f5..4df6ab5 100644 --- a/examples/okd_installation/src/topology.rs +++ b/examples/okd_installation/src/topology.rs @@ -59,6 +59,7 @@ pub async fn get_topology() -> HAClusterTopology { }, workers: vec![], switch: vec![], + node_exporter: opnsense.clone(), } } diff --git a/examples/okd_pxe/src/topology.rs b/examples/okd_pxe/src/topology.rs index 707969a..63e3613 100644 --- a/examples/okd_pxe/src/topology.rs +++ b/examples/okd_pxe/src/topology.rs @@ -53,6 +53,7 @@ pub async fn get_topology() -> HAClusterTopology { }, workers: vec![], switch: vec![], + node_exporter: opnsense.clone(), } } diff --git a/examples/opnsense/src/main.rs b/examples/opnsense/src/main.rs index fcfaf09..8f4039d 100644 --- a/examples/opnsense/src/main.rs +++ b/examples/opnsense/src/main.rs @@ -55,6 +55,7 @@ async fn main() { }, workers: vec![], switch: vec![], + node_exporter: opnsense.clone(), }; let inventory = Inventory { diff --git a/examples/opnsense_node_exporter/Cargo.toml b/examples/opnsense_node_exporter/Cargo.toml new file mode 100644 index 0000000..957bdd9 --- /dev/null +++ b/examples/opnsense_node_exporter/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "example-opnsense-node-exporter" +edition = "2024" +version.workspace = true +readme.workspace = true +license.workspace = true + +[dependencies] +harmony = { path = "../../harmony" } +harmony_cli = { path = "../../harmony_cli" } +harmony_types = { path = "../../harmony_types" } +harmony_secret = { path = "../../harmony_secret" } +harmony_secret_derive = { path = "../../harmony_secret_derive" } +cidr = { workspace = true } +tokio = { workspace = true } +harmony_macros = { path = "../../harmony_macros" } +log = { workspace = true } +env_logger = { workspace = true } +url = { workspace = true } +serde.workspace = true diff --git a/examples/opnsense_node_exporter/src/main.rs b/examples/opnsense_node_exporter/src/main.rs new file mode 100644 index 0000000..4f1219d --- /dev/null +++ b/examples/opnsense_node_exporter/src/main.rs @@ -0,0 +1,110 @@ +use std::{ + net::{IpAddr, Ipv4Addr}, + sync::Arc, +}; + +use cidr::Ipv4Cidr; +use harmony::{ + hardware::{HostCategory, Location, PhysicalHost, SwitchGroup}, + infra::opnsense::OPNSenseManagementInterface, + inventory::Inventory, + modules::opnsense::node_exporter::NodeExporterScore, + topology::{HAClusterTopology, LogicalHost, UnmanagedRouter}, +}; +use harmony_macros::{ip, ipv4, mac_address}; + +#[tokio::main] +async fn main() { + let firewall = harmony::topology::LogicalHost { + ip: ip!("192.168.33.1"), + name: String::from("fw0"), + }; + + let opnsense = Arc::new( + harmony::infra::opnsense::OPNSenseFirewall::new(firewall, None, "root", "opnsense").await, + ); + let lan_subnet = Ipv4Addr::new(192, 168, 33, 0); + let gateway_ipv4 = Ipv4Addr::new(192, 168, 33, 1); + let gateway_ip = IpAddr::V4(gateway_ipv4); + let topology = harmony::topology::HAClusterTopology { + domain_name: "ncd0.harmony.mcd".to_string(), + router: Arc::new(UnmanagedRouter::new( + gateway_ip, + Ipv4Cidr::new(lan_subnet, 24).unwrap(), + )), + load_balancer: opnsense.clone(), + firewall: opnsense.clone(), + tftp_server: opnsense.clone(), + http_server: opnsense.clone(), + dhcp_server: opnsense.clone(), + dns_server: opnsense.clone(), + control_plane: vec![ + LogicalHost { + ip: ip!("192.168.33.20"), + name: "cp0".to_string(), + }, + LogicalHost { + ip: ip!("192.168.33.21"), + name: "cp1".to_string(), + }, + LogicalHost { + ip: ip!("192.168.33.22"), + name: "cp2".to_string(), + }, + ], + bootstrap_host: LogicalHost { + ip: ip!("192.168.33.66"), + name: "bootstrap".to_string(), + }, + workers: vec![ + LogicalHost { + ip: ip!("192.168.33.30"), + name: "wk0".to_string(), + }, + LogicalHost { + ip: ip!("192.168.33.31"), + name: "wk1".to_string(), + }, + LogicalHost { + ip: ip!("192.168.33.32"), + name: "wk2".to_string(), + }, + ], + switch: vec![], + node_exporter: opnsense.clone(), + }; + + let inventory = Inventory { + location: Location::new("I am mobile".to_string(), "earth".to_string()), + switch: SwitchGroup::from([]), + firewall_mgmt: Box::new(OPNSenseManagementInterface::new()), + storage_host: vec![], + worker_host: vec![ + PhysicalHost::empty(HostCategory::Server) + .mac_address(mac_address!("C4:62:37:02:61:0F")), + PhysicalHost::empty(HostCategory::Server) + .mac_address(mac_address!("C4:62:37:02:61:26")), + PhysicalHost::empty(HostCategory::Server) + .mac_address(mac_address!("C4:62:37:02:61:70")), + ], + control_plane_host: vec![ + PhysicalHost::empty(HostCategory::Server) + .mac_address(mac_address!("C4:62:37:02:60:FA")), + PhysicalHost::empty(HostCategory::Server) + .mac_address(mac_address!("C4:62:37:02:61:1A")), + PhysicalHost::empty(HostCategory::Server) + .mac_address(mac_address!("C4:62:37:01:BC:68")), + ], + }; + + let node_exporter_score = NodeExporterScore {}; + + harmony_cli::run( + inventory, + topology, + vec![Box::new(node_exporter_score)], + None, + ) + .await + .unwrap(); +} diff --git a/harmony/src/domain/topology/ha_cluster.rs b/harmony/src/domain/topology/ha_cluster.rs index 7be2725..a3e650d 100644 --- a/harmony/src/domain/topology/ha_cluster.rs +++ b/harmony/src/domain/topology/ha_cluster.rs @@ -11,7 +11,6 @@ use kube::api::ObjectMeta; use log::debug; use log::info; -use crate::data::FileContent; use crate::executors::ExecutorError; use crate::hardware::PhysicalHost; use crate::infra::brocade::BrocadeSwitchAuth; @@ -21,6 +20,7 @@ use crate::modules::okd::crd::{ nmstate::{self, NMState, NodeNetworkConfigurationPolicy, NodeNetworkConfigurationPolicySpec}, }; use crate::topology::PxeOptions; +use crate::{data::FileContent, topology::node_exporter::NodeExporter}; use super::{ DHCPStaticEntry, DhcpServer, DnsRecord, DnsRecordType, DnsServer, Firewall, HostNetworkConfig, @@ -43,6 +43,7 @@ pub struct HAClusterTopology { pub tftp_server: Arc, pub http_server: Arc, pub dns_server: Arc, + pub node_exporter: Arc, pub bootstrap_host: LogicalHost, pub control_plane: Vec, pub workers: Vec, @@ -333,6 +334,7 @@ impl HAClusterTopology { tftp_server: dummy_infra.clone(), http_server: dummy_infra.clone(), dns_server: dummy_infra.clone(), + node_exporter: dummy_infra.clone(), bootstrap_host: dummy_host, control_plane: vec![], workers: vec![], @@ -516,6 +518,23 @@ impl Switch for HAClusterTopology { self.configure_bond(host, &config).await?; self.configure_port_channel(host, &config).await } + + //TODO add snmp here +} + +#[async_trait] +impl NodeExporter for HAClusterTopology { + async fn ensure_initialized(&self) -> Result<(), ExecutorError> { + self.node_exporter.ensure_initialized().await + } + + async fn commit_config(&self) -> Result<(), ExecutorError> { + self.node_exporter.commit_config().await + } + + async fn reload_restart(&self) -> Result<(), ExecutorError> { + self.node_exporter.reload_restart().await + } } #[derive(Debug)] @@ -704,3 +723,18 @@ impl DnsServer for DummyInfra { unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA) } } + +#[async_trait] +impl NodeExporter for DummyInfra { + async fn ensure_initialized(&self) -> Result<(), ExecutorError> { + unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA) + } + + async fn commit_config(&self) -> Result<(), ExecutorError> { + unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA) + } + + async fn reload_restart(&self) -> Result<(), ExecutorError> { + unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA) + } +} diff --git a/harmony/src/domain/topology/node_exporter.rs b/harmony/src/domain/topology/node_exporter.rs new file mode 100644 index 0000000..88e3cc9 --- /dev/null +++ b/harmony/src/domain/topology/node_exporter.rs @@ -0,0 +1,17 @@ +use async_trait::async_trait; + +use crate::executors::ExecutorError; + +#[async_trait] +pub trait NodeExporter: Send + Sync { + async fn ensure_initialized(&self) -> Result<(), ExecutorError>; + async fn commit_config(&self) -> Result<(), ExecutorError>; + async fn reload_restart(&self) -> Result<(), ExecutorError>; +} + +//TODO complete this impl +impl std::fmt::Debug for dyn NodeExporter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("NodeExporter ",)) + } +} From 44bf21718c3223f68c1d60f8dc75019102be2322 Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 28 Oct 2025 14:41:15 -0400 Subject: [PATCH 4/9] wip: example score with impl topolgy for opnsense topology --- examples/opnsense_node_exporter/Cargo.toml | 1 + examples/opnsense_node_exporter/src/main.rs | 111 +++++++------------- harmony/src/infra/opnsense/node_exporter.rs | 2 +- opnsense-config-xml/src/data/opnsense.rs | 34 +++--- 4 files changed, 58 insertions(+), 90 deletions(-) diff --git a/examples/opnsense_node_exporter/Cargo.toml b/examples/opnsense_node_exporter/Cargo.toml index 957bdd9..5cc5c10 100644 --- a/examples/opnsense_node_exporter/Cargo.toml +++ b/examples/opnsense_node_exporter/Cargo.toml @@ -18,3 +18,4 @@ log = { workspace = true } env_logger = { workspace = true } url = { workspace = true } serde.workspace = true +async-trait.workspace = true diff --git a/examples/opnsense_node_exporter/src/main.rs b/examples/opnsense_node_exporter/src/main.rs index 4f1219d..15664ab 100644 --- a/examples/opnsense_node_exporter/src/main.rs +++ b/examples/opnsense_node_exporter/src/main.rs @@ -3,99 +3,66 @@ use std::{ sync::Arc, }; +use async_trait::async_trait; use cidr::Ipv4Cidr; use harmony::{ + executors::ExecutorError, hardware::{HostCategory, Location, PhysicalHost, SwitchGroup}, infra::opnsense::OPNSenseManagementInterface, inventory::Inventory, modules::opnsense::node_exporter::NodeExporterScore, - topology::{HAClusterTopology, LogicalHost, UnmanagedRouter}, + topology::{ + HAClusterTopology, LogicalHost, PreparationError, PreparationOutcome, Topology, + UnmanagedRouter, node_exporter::NodeExporter, + }, }; use harmony_macros::{ip, ipv4, mac_address}; +struct OpnSenseTopology { + node_exporter: Arc, +} + +#[async_trait] +impl Topology for OpnSenseTopology { + async fn ensure_ready(&self) -> Result { + Ok(PreparationOutcome::Success{ details: "Success".to_string() }) + } + fn name(&self) -> &str { + "OpnsenseTopology" + } +} + +#[async_trait] +impl NodeExporter for OpnSenseTopology { + async fn ensure_initialized(&self) -> Result<(), ExecutorError> { + self.node_exporter.ensure_initialized().await + } + + async fn commit_config(&self) -> Result<(), ExecutorError> { + self.node_exporter.commit_config().await + } + + async fn reload_restart(&self) -> Result<(), ExecutorError> { + self.node_exporter.reload_restart().await + } +} + #[tokio::main] async fn main() { let firewall = harmony::topology::LogicalHost { - ip: ip!("192.168.33.1"), + ip: ip!("192.168.1.1"), name: String::from("fw0"), }; let opnsense = Arc::new( harmony::infra::opnsense::OPNSenseFirewall::new(firewall, None, "root", "opnsense").await, ); - let lan_subnet = Ipv4Addr::new(192, 168, 33, 0); - let gateway_ipv4 = Ipv4Addr::new(192, 168, 33, 1); - let gateway_ip = IpAddr::V4(gateway_ipv4); - let topology = harmony::topology::HAClusterTopology { - domain_name: "ncd0.harmony.mcd".to_string(), - router: Arc::new(UnmanagedRouter::new( - gateway_ip, - Ipv4Cidr::new(lan_subnet, 24).unwrap(), - )), - load_balancer: opnsense.clone(), - firewall: opnsense.clone(), - tftp_server: opnsense.clone(), - http_server: opnsense.clone(), - dhcp_server: opnsense.clone(), - dns_server: opnsense.clone(), - control_plane: vec![ - LogicalHost { - ip: ip!("192.168.33.20"), - name: "cp0".to_string(), - }, - LogicalHost { - ip: ip!("192.168.33.21"), - name: "cp1".to_string(), - }, - LogicalHost { - ip: ip!("192.168.33.22"), - name: "cp2".to_string(), - }, - ], - bootstrap_host: LogicalHost { - ip: ip!("192.168.33.66"), - name: "bootstrap".to_string(), - }, - workers: vec![ - LogicalHost { - ip: ip!("192.168.33.30"), - name: "wk0".to_string(), - }, - LogicalHost { - ip: ip!("192.168.33.31"), - name: "wk1".to_string(), - }, - LogicalHost { - ip: ip!("192.168.33.32"), - name: "wk2".to_string(), - }, - ], - switch: vec![], + + let topology = OpnSenseTopology { node_exporter: opnsense.clone(), }; - let inventory = Inventory { - location: Location::new("I am mobile".to_string(), "earth".to_string()), - switch: SwitchGroup::from([]), - firewall_mgmt: Box::new(OPNSenseManagementInterface::new()), - storage_host: vec![], - worker_host: vec![ - PhysicalHost::empty(HostCategory::Server) - .mac_address(mac_address!("C4:62:37:02:61:0F")), - PhysicalHost::empty(HostCategory::Server) - .mac_address(mac_address!("C4:62:37:02:61:26")), - PhysicalHost::empty(HostCategory::Server) - .mac_address(mac_address!("C4:62:37:02:61:70")), - ], - control_plane_host: vec![ - PhysicalHost::empty(HostCategory::Server) - .mac_address(mac_address!("C4:62:37:02:60:FA")), - PhysicalHost::empty(HostCategory::Server) - .mac_address(mac_address!("C4:62:37:02:61:1A")), - PhysicalHost::empty(HostCategory::Server) - .mac_address(mac_address!("C4:62:37:01:BC:68")), - ], - }; + let inventory = Inventory::empty(); let node_exporter_score = NodeExporterScore {}; diff --git a/harmony/src/infra/opnsense/node_exporter.rs b/harmony/src/infra/opnsense/node_exporter.rs index 2c27b26..3a16ffc 100644 --- a/harmony/src/infra/opnsense/node_exporter.rs +++ b/harmony/src/infra/opnsense/node_exporter.rs @@ -25,7 +25,7 @@ impl NodeExporter for OPNSenseFirewall { })?; } - config.node_exporter().enable(true); + config.node_exporter().enable(true).map_err(|e|ExecutorError::UnexpectedError(e.to_string()))?; Ok(()) } async fn commit_config(&self) -> Result<(), ExecutorError> { diff --git a/opnsense-config-xml/src/data/opnsense.rs b/opnsense-config-xml/src/data/opnsense.rs index 4b384d4..a4bba5e 100644 --- a/opnsense-config-xml/src/data/opnsense.rs +++ b/opnsense-config-xml/src/data/opnsense.rs @@ -17,7 +17,7 @@ pub struct OPNsense { pub interfaces: NamedList, pub dhcpd: NamedList, pub snmpd: Snmpd, - pub syslog: Syslog, + pub syslog: Option, pub nat: Nat, pub filter: Filters, pub load_balancer: Option, @@ -190,7 +190,7 @@ pub struct System { pub webgui: WebGui, pub usevirtualterminal: u8, pub disablenatreflection: Option, - pub disableconsolemenu: u8, + pub disableconsolemenu: Option, pub disablevlanhwfilter: u8, pub disablechecksumoffloading: u8, pub disablesegmentationoffloading: u8, @@ -216,7 +216,7 @@ pub struct System { pub maximumfrags: Option, pub aliasesresolveinterval: Option, pub maximumtableentries: Option, - pub language: String, + pub language: Option, pub dnsserver: Option, pub dns1gw: Option, pub dns2gw: Option, @@ -233,16 +233,16 @@ pub struct System { #[derive(Default, PartialEq, Debug, YaSerialize, YaDeserialize)] pub struct Ssh { pub group: String, - pub noauto: u8, - pub interfaces: MaybeString, - pub kex: MaybeString, - pub ciphers: MaybeString, - pub macs: MaybeString, - pub keys: MaybeString, - pub enabled: String, - pub passwordauth: u8, - pub keysig: MaybeString, - pub permitrootlogin: u8, + pub noauto: Option, + pub interfaces: Option, + pub kex: Option, + pub ciphers: Option, + pub macs: Option, + pub keys: Option, + pub enabled: Option, + pub passwordauth: Option, + pub keysig: Option, + pub permitrootlogin: Option, pub rekeylimit: Option, } @@ -306,11 +306,11 @@ pub struct WebGui { pub protocol: String, #[yaserde(rename = "ssl-certref")] pub ssl_certref: String, - pub port: MaybeString, + pub port: Option, #[yaserde(rename = "ssl-ciphers")] - pub ssl_ciphers: MaybeString, - pub interfaces: MaybeString, - pub compression: MaybeString, + pub ssl_ciphers: Option, + pub interfaces: Option, + pub compression: Option, pub nohttpreferercheck: Option, } From 9ba939bde1db671e7caf6e78be500cb4f847f510 Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 28 Oct 2025 15:45:02 -0400 Subject: [PATCH 5/9] wip: cargo fmt --- examples/opnsense_node_exporter/src/main.rs | 4 +++- harmony/src/infra/opnsense/node_exporter.rs | 5 ++++- opnsense-config/src/config/config.rs | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/opnsense_node_exporter/src/main.rs b/examples/opnsense_node_exporter/src/main.rs index 15664ab..4b16841 100644 --- a/examples/opnsense_node_exporter/src/main.rs +++ b/examples/opnsense_node_exporter/src/main.rs @@ -25,7 +25,9 @@ struct OpnSenseTopology { #[async_trait] impl Topology for OpnSenseTopology { async fn ensure_ready(&self) -> Result { - Ok(PreparationOutcome::Success{ details: "Success".to_string() }) + Ok(PreparationOutcome::Success { + details: "Success".to_string(), + }) } fn name(&self) -> &str { "OpnsenseTopology" diff --git a/harmony/src/infra/opnsense/node_exporter.rs b/harmony/src/infra/opnsense/node_exporter.rs index 3a16ffc..97d2a09 100644 --- a/harmony/src/infra/opnsense/node_exporter.rs +++ b/harmony/src/infra/opnsense/node_exporter.rs @@ -25,7 +25,10 @@ impl NodeExporter for OPNSenseFirewall { })?; } - config.node_exporter().enable(true).map_err(|e|ExecutorError::UnexpectedError(e.to_string()))?; + config + .node_exporter() + .enable(true) + .map_err(|e| ExecutorError::UnexpectedError(e.to_string()))?; Ok(()) } async fn commit_config(&self) -> Result<(), ExecutorError> { diff --git a/opnsense-config/src/config/config.rs b/opnsense-config/src/config/config.rs index 30240e4..236a89e 100644 --- a/opnsense-config/src/config/config.rs +++ b/opnsense-config/src/config/config.rs @@ -14,6 +14,7 @@ use opnsense_config_xml::OPNsense; use russh::client; use serde::Serialize; use sha2::Digest; +use tokio::time::{sleep, Duration}; use super::{ConfigManager, OPNsenseShell}; From 5f147fa67206e1e6d497fb9a4ab765de1cf7e912 Mon Sep 17 00:00:00 2001 From: Willem Date: Wed, 29 Oct 2025 13:24:56 -0400 Subject: [PATCH 6/9] fix: opnsense-config reload_config() returns live config.xml rather than dropping it, allows function is_package_installed() to read live state after package installation rather than old config before installation --- opnsense-config/src/config/config.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opnsense-config/src/config/config.rs b/opnsense-config/src/config/config.rs index 236a89e..7c292c8 100644 --- a/opnsense-config/src/config/config.rs +++ b/opnsense-config/src/config/config.rs @@ -156,7 +156,8 @@ impl Config { async fn reload_config(&mut self) -> Result<(), Error> { info!("Reloading opnsense live config"); - let (opnsense, sha2) = Self::get_opnsense_instance(self.repository.clone()).await?; + let (opnsense, _sha2) = Self::get_opnsense_instance(self.repository.clone()).await?; + self.opnsense = opnsense; Ok(()) } From c2fa4f1869c9d6cef0f50930c0fd98a603f3bf55 Mon Sep 17 00:00:00 2001 From: Willem Date: Wed, 29 Oct 2025 13:53:58 -0400 Subject: [PATCH 7/9] fix:cargo fmt --- harmony/src/domain/topology/ha_cluster.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/harmony/src/domain/topology/ha_cluster.rs b/harmony/src/domain/topology/ha_cluster.rs index 3411a90..f6c3a02 100644 --- a/harmony/src/domain/topology/ha_cluster.rs +++ b/harmony/src/domain/topology/ha_cluster.rs @@ -8,12 +8,15 @@ use kube::api::ObjectMeta; use log::debug; use log::info; -use crate::{modules::okd::crd::nmstate::{self, NodeNetworkConfigurationPolicy}, topology::node_exporter::NodeExporter}; use crate::topology::PxeOptions; use crate::{data::FileContent, modules::okd::crd::nmstate::NMState}; use crate::{ executors::ExecutorError, modules::okd::crd::nmstate::NodeNetworkConfigurationPolicySpec, }; +use crate::{ + modules::okd::crd::nmstate::{self, NodeNetworkConfigurationPolicy}, + topology::node_exporter::NodeExporter, +}; use super::{ DHCPStaticEntry, DhcpServer, DnsRecord, DnsRecordType, DnsServer, Firewall, HostNetworkConfig, From 5cce9f8e7467bd19d6d45f9f00c2c222468ea434 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Fri, 19 Dec 2025 10:12:44 -0500 Subject: [PATCH 8/9] adr: draft ADR proposing harmony agent and nats-jetstram for decentralized workload management --- ...h-For-Decentralized-Workload-Management.md | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 adr/016-Harmony-Agent-And-Global-Mesh-For-Decentralized-Workload-Management.md diff --git a/adr/016-Harmony-Agent-And-Global-Mesh-For-Decentralized-Workload-Management.md b/adr/016-Harmony-Agent-And-Global-Mesh-For-Decentralized-Workload-Management.md new file mode 100644 index 0000000..5c99aec --- /dev/null +++ b/adr/016-Harmony-Agent-And-Global-Mesh-For-Decentralized-Workload-Management.md @@ -0,0 +1,90 @@ +# Architecture Decision Record: Global Orchestration Mesh & The Harmony Agent + +**Status:** Proposed +**Date:** 2025-12-19 + +## Context + +Harmony is designed to enable a truly decentralized infrastructure where independent clusters—owned by different organizations or running on diverse hardware—can collaborate reliably. This vision combines the decentralization of Web3 with the performance and capabilities of Web2. + +Currently, Harmony operates as a stateless CLI tool, invoked manually or via CI runners. While effective for deployment, this model presents a critical limitation: **a CLI cannot react to real-time events.** + +To achieve automated failover and dynamic workload management, we need a system that is "always on." Relying on manual intervention or scheduled CI jobs to recover from a cluster failure creates unacceptable latency and prevents us from scaling to thousands of nodes. + +Furthermore, we face a challenge in serving diverse workloads: +* **Financial workloads** require absolute consistency (CP - Consistency/Partition Tolerance). +* **AI/Inference workloads** require maximum availability (AP - Availability/Partition Tolerance). + +There are many more use cases, but those are the two extremes. + +We need a unified architecture that automates cluster coordination and supports both consistency models without requiring a complete re-architecture in the future. + +## Decision + +We propose a fundamental architectural evolution. It has been clear since the start of Harmony that it would be necessary to transition Harmony from a purely ephemeral CLI tool to a system that includes a persistent **Harmony Agent**. This Agent will connect to a **Global Orchestration Mesh** based on a strongly consistent protocol. + +The proposal consists of four key pillars: + +### 1. The Harmony Agent (New Component) +We will develop a long-running process (Daemon/Agent) to be deployed alongside workloads. +* **Shift from CLI:** Unlike the CLI, which applies configuration and exits, the Agent maintains a persistent connection to the mesh. +* **Responsibility:** It actively monitors cluster health, participates in consensus, and executes lifecycle commands (start/stop/fence) instantly when the mesh dictates a state change. + +### 2. The Technology: NATS JetStream +We will utilize **NATS JetStream** as the underlying transport and consensus layer for the Agent and the Mesh. +* **Why not raw Raft?** Implementing a raw Raft library requires building and maintaining the transport layer, log compaction, snapshotting, and peer discovery manually. NATS JetStream provides a battle-tested, distributed log and Key-Value store (based on Raft) out of the box, along with a high-performance pub/sub system for event propagation. +* **Role:** It will act as the "source of truth" for the cluster state. + +### 3. Strong Consistency at the Mesh Layer +The mesh will operate with **Strong Consistency** by default. +* All critical cluster state changes (topology updates, lease acquisitions, leadership elections) will require consensus among the Agents. +* This ensures that in the event of a network partition, we have a mathematical guarantee of which side holds the valid state, preventing data corruption. + +### 4. Public UX: The `FailoverStrategy` Abstraction +To keep the user experience stable and simple, we will expose the complexity of the mesh through a high-level configuration API, tentatively called `FailoverStrategy`. + +The user defines the *intent* in their config, and the Harmony Agent automates the *execution*: + +* **`FailoverStrategy::AbsoluteConsistency`**: + * *Use Case:* Banking, Transactional DBs. + * *Behavior:* If the mesh detects a partition, the Agent on the minority side immediately halts workloads. No split-brain is ever allowed. +* **`FailoverStrategy::SplitBrainAllowed`**: + * *Use Case:* LLM Inference, Stateless Web Servers. + * *Behavior:* If a partition occurs, the Agent keeps workloads running to maximize uptime. State is reconciled when connectivity returns. + +## Rationale + +**The Necessity of an Agent** +You cannot automate what you do not monitor. Moving to an Agent-based model is the only way to achieve sub-second reaction times to infrastructure failures. It transforms Harmony from a deployment tool into a self-healing platform. + +**Scaling & Decentralization** +To allow independent clusters to collaborate, they need a shared language. A strongly consistent mesh allows Cluster A (Organization X) and Cluster B (Organization Y) to agree on workload placement without a central authority. + +**Why Strong Consistency First?** +It is technically feasible to relax a strongly consistent system to allow for "Split Brain" behavior (AP) when the user requests it. However, it is nearly impossible to take an eventually consistent system and force it to be strongly consistent (CP) later. By starting with strict constraints, we cover the hardest use cases (Finance) immediately. + +**Future Topologies** +While our immediate need is `FailoverTopology` (Multi-site), this architecture supports any future topology logic: +* **`CostTopology`**: Agents negotiate to route workloads to the cluster with the cheapest spot instances. +* **`HorizontalTopology`**: Spreading a single workload across 100 clusters for massive scale. +* **`GeoTopology`**: Ensuring data stays within specific legal jurisdictions. + +The mesh provides the *capability* (consensus and messaging); the topology provides the *logic*. + +## Consequences + +**Positive** +* **Automation:** Eliminates manual failover, enabling massive scale. +* **Reliability:** Guarantees data safety for critical workloads by default. +* **Flexibility:** A single codebase serves both high-frequency trading and AI inference. +* **Stability:** The public API remains abstract, allowing us to optimize the mesh internals without breaking user code. + +**Negative** +* **Deployment Complexity:** Users must now deploy and maintain a running service (the Agent) rather than just downloading a binary. +* **Engineering Complexity:** Integrating NATS JetStream and handling distributed state machines is significantly more complex than the current CLI logic. + +## Implementation Plan (Short Term) +1. **Agent Bootstrap:** Create the initial scaffold for the Harmony Agent (daemon). +2. **Mesh Integration:** Prototype NATS JetStream embedding within the Agent. +3. **Strategy Implementation:** Add `FailoverStrategy` to the configuration schema and implement the logic in the Agent to read and act on it. +4. **Migration:** Transition the current manual failover scripts into event-driven logic handled by the Agent. From fdf1dfaa30803d2031628e0335e6d9c9ae6717a8 Mon Sep 17 00:00:00 2001 From: wjro Date: Tue, 6 Jan 2026 14:17:04 -0500 Subject: [PATCH 9/9] fix: leave implementers to define their Debug, so removed impl Debug for dyn NodeExporter --- Cargo.lock | 52 ++++++++++++++------ examples/opnsense_node_exporter/src/main.rs | 1 + harmony/src/domain/topology/node_exporter.rs | 14 +++--- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65f2d8b..5c45111 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -690,6 +690,24 @@ dependencies = [ "tokio", ] +[[package]] +name = "brocade-snmp-server" +version = "0.1.0" +dependencies = [ + "base64 0.22.1", + "brocade", + "env_logger", + "harmony", + "harmony_cli", + "harmony_macros", + "harmony_secret", + "harmony_types", + "log", + "serde", + "tokio", + "url", +] + [[package]] name = "brocade-switch" version = "0.1.0" @@ -1885,6 +1903,25 @@ dependencies = [ "url", ] +[[package]] +name = "example-opnsense-node-exporter" +version = "0.1.0" +dependencies = [ + "async-trait", + "cidr", + "env_logger", + "harmony", + "harmony_cli", + "harmony_macros", + "harmony_secret", + "harmony_secret_derive", + "harmony_types", + "log", + "serde", + "tokio", + "url", +] + [[package]] name = "example-pxe" version = "0.1.0" @@ -6095,21 +6132,6 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" -[[package]] -name = "test-score" -version = "0.1.0" -dependencies = [ - "base64 0.22.1", - "env_logger", - "harmony", - "harmony_cli", - "harmony_macros", - "harmony_types", - "log", - "tokio", - "url", -] - [[package]] name = "thiserror" version = "1.0.69" diff --git a/examples/opnsense_node_exporter/src/main.rs b/examples/opnsense_node_exporter/src/main.rs index 4b16841..d71d2ed 100644 --- a/examples/opnsense_node_exporter/src/main.rs +++ b/examples/opnsense_node_exporter/src/main.rs @@ -18,6 +18,7 @@ use harmony::{ }; use harmony_macros::{ip, ipv4, mac_address}; +#[derive(Debug)] struct OpnSenseTopology { node_exporter: Arc, } diff --git a/harmony/src/domain/topology/node_exporter.rs b/harmony/src/domain/topology/node_exporter.rs index 88e3cc9..1e6ef67 100644 --- a/harmony/src/domain/topology/node_exporter.rs +++ b/harmony/src/domain/topology/node_exporter.rs @@ -3,15 +3,15 @@ use async_trait::async_trait; use crate::executors::ExecutorError; #[async_trait] -pub trait NodeExporter: Send + Sync { +pub trait NodeExporter: Send + Sync + std::fmt::Debug { async fn ensure_initialized(&self) -> Result<(), ExecutorError>; async fn commit_config(&self) -> Result<(), ExecutorError>; async fn reload_restart(&self) -> Result<(), ExecutorError>; } -//TODO complete this impl -impl std::fmt::Debug for dyn NodeExporter { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("NodeExporter ",)) - } -} +// //TODO complete this impl +// impl std::fmt::Debug for dyn NodeExporter { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// f.write_fmt(format_args!("NodeExporter ",)) +// } +// }