diff --git a/Cargo.lock b/Cargo.lock index 97904ddf..95407b9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5552,6 +5552,7 @@ dependencies = [ "harmony_cli", "harmony_inventory_agent", "harmony_macros", + "harmony_secret", "harmony_types", "log", "opnsense-api", diff --git a/data/opnsense/ethname.LICENSE b/data/opnsense/ethname.LICENSE new file mode 100644 index 00000000..853b46db --- /dev/null +++ b/data/opnsense/ethname.LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/data/opnsense/ethname.sh b/data/opnsense/ethname.sh new file mode 100644 index 00000000..7489b690 --- /dev/null +++ b/data/opnsense/ethname.sh @@ -0,0 +1,280 @@ +#!/bin/sh +# +# * Copyright (c) 2016-2019 Eric Borisch +# * All rights reserved. +# +# Self-contained rc.d script for re-naming devices based on their MAC address. +# Renaming is performed before interface bring-up -- netif -- so all +# configurations of the devices can be done with the new names. +# +# USAGE: +# 1) Add the following to rc.conf: +# ethname_enable="YES" +# ethname_external_mac="aa:bb:cc:dd:ee:00" +# ethname_private_mac="aa:bb:cc:dd:ee:01" +# 1a) You can optionally restrict handling to a set of defined names with: +# ethname_names="external private" +# otherwise all defined ethname_*_mac="" values are used +# 2) Make sure any interfaces you want to rename have their drivers loaded or +# compiled in. If ue0 is on axe0, for example, add 'if_load_axe="YES"' to +# /boot/loader.conf. See the man page for your device (eg 'man axe') for +# particulars. +# 3) That's it. Use ifconfig_="" settings with the new names. +# +# All other devices are untouched. +# +# Optional rc.conf settings: +# ethname_timeout : Maximum wait time for devices to appear. [default=30] +# +# PROVIDE: ethname +# REQUIRE: FILESYSTEMS +# BEFORE: netif +# KEYWORD: nojail + +# ethname version 2.0 + +. /etc/rc.subr + +name=ethname +rcvar=ethname_enable +extra_commands="check" +check_cmd="en_check" + +start_cmd="${name}_start" +stop_cmd=":" + +load_rc_config ${name} +: ${ethname_names:=""} +: ${ethname_enable:=no} +: ${ethname_timeout:="30"} + +en_str="" + +# Will fill with mac interface [mac interface] ...] +en_map="" + +# Will fill with original device names that match a managed mac address. +en_orig="" + +# Total wait timeout; won't wait n*timeout for n devices, just timeout +en_waited=0 + +known_mac() +{ + echo "${en_map}" | grep -qi "$1" +} + +to_lower() +{ + echo "$*" | tr "[:upper:]" "[:lower:]" +} + + +kv_lookup() +{ + # Called with $1=K, the key we want to find the value for, and $2:$3 + # $4:$5 ... forming pairs of key:value mappings + local _K _key _value + + _K=$(to_lower "$1") + [ -z "${_K}" ] && err 1 "Called kv_lookup() with missing args." + shift + while [ $# -ge 2 ]; do + _key=$(to_lower "$1") + _value=$2 + shift 2 + # Only supports non-zero-length keys/values + [ -z "${_key}" -o -z "${_value}" ] && err 1 "Zero length values passed?" + [ "${_key}" == "${_K}" ] && echo "${_value}" && return 0 + done + return 1 +} + +good_mac() { + echo "$1" | egrep -qi '^([0-9a-z]{2}:){5}[0-9a-z]{2}$' || \ + err 1 "Invalid MAC address defined: [$1]" + return 0 +} + +good_devname() { + echo "$1" | egrep -qi '^[a-z][a-z0-9_]+$' || \ + err 1 "Invalid device name defined: [$1]" + return 0 +} + +breakout_map () { + # This takes a single ethname_map variable (old interface) and breaks it + # into the new interface (ethname_names and ethname_NAME_mac vars.) + local _mac _name + while [ $# -gt 0 ]; do + _mac=$1 + _name=$2 + good_mac "${_mac}" + good_devname "${_name}" + shift 2 + # Params checked for validity above + eval ethname_${_name}_mac="${_mac}" + ethname_names="${ethname_names} ${_name}" + done +} + +en_prep() +{ + local _mac _name _dev _found + local _compat=0 + + if [ -z "${ethname_names}" ]; then + # Compatibility code + if [ ! -z "${ethname_map}" -a ! -z "${ethname_devices}" ]; then + ethname_names="" + warn "ethname: Using old interface. Please see documentation." + breakout_map ${ethname_map} + _compat=1 + else + # Detect set ethname_*_mac names + ethname_names=$(set | sed -En '/^ethname_([^=]+)_mac=.*/s//\1/p') + fi + fi + + # Transforms set of ethname_NAME_mac="" values into en_map="MAC NAME ..." + # and en_orig="EXISTINGDEV ..."; a map of desired MAC:name mappings + # and the devices with those MACs, respectively. + + for _name in ${ethname_names}; do + # Make sure ${_name} is good before eval call + good_devname "${_name}" + eval _mac=\$ethname_${_name}_mac + + [ -z "${_mac}" -a ${_compat} -eq 0 ] && \ + warn "ethname_${_name}_mac is not set in rc.conf!" && continue + + good_mac "${_mac}" + + # Enable ctrl-c for wait loop + trap break SIGINT + + _found=0 + while [ ${en_waited} -lt ${ethname_timeout} ]; do + for _dev in $(ifconfig -l ether); do + if ifconfig ${_dev} | grep -qi "${_mac}"; then + en_map="${en_map} ${_mac} ${_name}" + en_orig="${en_orig} ${_dev}" + _found=1 + break + fi + done + [ ${_found} -eq 1 ] && break + sleep 1 + warn "Waiting for a device with MAC [${_mac}] to appear..." + en_waited=$((en_waited + 1)) + done + + trap - SIGINT + + [ ${_found} -eq 0 ] && \ + warn "Unable to locate device to rename [${_name}]!" + done +} + +en_check() { + local _mac _name _orig + local _n=1 + en_prep + # Piping into a while loop, but we don't need any results from this loop to + # be visible in this shell, so it's not an issue. + echo "${en_map}" | xargs -n 2 echo | while read _mac _name; do + _orig=$(echo "${en_orig}" | awk "{print \$${_n}}") + if [ "${_orig}" = "${_name}" ]; then + printf "Device with MAC [%s] already named '%s'\n" \ + "${_mac}" "${_name}" + else + printf "Will rename [%s] to [%s] with MAC [%s]\n" \ + "${_orig}" "${_name}" "${_mac}" + fi + _n=$((_n + 1)) + done +} + +fix_name() +{ + # Can be called with or without a second argument (which is used as the new + # name if provided.) If only one argument, lookup desired name in map. + dev=$1 + name=$2 + + # Make sure the device exists as an ifconfig device + if ! ifconfig -l ether | grep -q "${dev}"; then + en_str="could not find device." + return 1 + fi + + # Grab MAC address + mac=$(ifconfig ${dev} | awk '/ether/{print tolower($2)}') + + if [ ${#mac} -eq 0 ]; then + en_str="unable to get MAC address" + return 1 + fi + + # Make sure the MAC for this device is in our rename table. + if ! known_mac "${mac}"; then + en_str="no maching MAC in ethname__mac params." + return 1 + fi + + # Find name from MAC -> dev_name table in map + dname=$(kv_lookup ${mac} ${en_map}) + if [ "${dname}" == "${dev}" ]; then + en_str="already has desired name." + return 1 + fi + + # Use name from MAC -> dev_name table in map if $2 was empty + : ${name:=${dname}} + + # We have everything we need. Now actual rename of the device. + if ! ifconfig ${dev} name ${name} > /dev/null ; then + en_str="return code: $?" + return 2 + fi +} + +ethname_start() +{ + local _n _m _prefix _x + # Build the map of "mac name [mac name] [...]" + en_prep + + # Don't report any other errors if we haven't been asked to do anything. + if [ ${#en_orig} -eq 0 ]; then + warn "Unable to locate any of the specified ethname_\*_mac addresses." + exit 0 + fi + + # Rename interfaces; first into en_tmp_$_n with _n = 0, 1, ... to avoid any + # possible collision with the desired names. (ex. ue0 -> ue1; ue1 -> ue0 + # renaming.) + _prefix=en_$$_ + _n=0 + for _x in ${en_orig}; do + if fix_name ${_x} ${_prefix}${_n}; then + _n=$((_n+1)) + elif [ $? -eq 1 ]; then + info "Skipping rename of [${_x}]: ${en_str}" + else + warn "Error during rename of [${_x}]: ${en_str}" + fi + done + + # Loop back over renamed devices and lookup their desired names. + _m=0 + while [ ${_m} -lt ${_n} ]; do + fix_name ${_prefix}${_m} || \ + warn "Error during renaming process. Stranded [${_prefix}${_m}]." + _m=$((_m+1)) + done +} + +run_rc_command "$1" + +# vim: et:ts=4:sw=4 diff --git a/examples/opnsense_pair_integration/src/main.rs b/examples/opnsense_pair_integration/src/main.rs index cefd071b..307bf9ac 100644 --- a/examples/opnsense_pair_integration/src/main.rs +++ b/examples/opnsense_pair_integration/src/main.rs @@ -20,7 +20,6 @@ use std::net::IpAddr; use std::path::{Path, PathBuf}; -use std::sync::Arc; use harmony::config::secret::{OPNSenseApiCredentials, OPNSenseFirewallCredentials}; use harmony::infra::opnsense::OPNSenseFirewall; @@ -29,7 +28,9 @@ use harmony::modules::kvm::config::init_executor; use harmony::modules::kvm::{ BootDevice, ForwardMode, KvmExecutor, NetworkConfig, NetworkRef, VmConfig, }; -use harmony::modules::opnsense::bootstrap::OPNsenseBootstrap; +use harmony::modules::opnsense::bootstrap::{ + OPNsenseBootstrap, change_lan_ip_via_ssh, create_api_key_ssh, +}; use harmony::modules::opnsense::firewall::{FilterRuleDef, FirewallRuleScore}; use harmony::modules::opnsense::vip::VipDef; use harmony::modules::opnsense::vlan::{VlanDef, VlanScore}; @@ -158,7 +159,7 @@ async fn boot_pair( // Step 3: Change primary's LAN IP from .1 to .2 via API info!("Changing primary LAN IP to {PRIMARY_IP}..."); - change_lan_ip_via_ssh(BOOT_IP, PRIMARY_IP, 24).await?; + change_lan_ip_via_ssh(BOOT_IP, PRIMARY_IP, 24, "root", "opnsense").await?; // Step 4: Wait for primary to come back on new IP info!("Waiting for primary on new IP {PRIMARY_IP}:{API_PORT}..."); @@ -184,7 +185,7 @@ async fn boot_pair( // Step 7: Change backup's LAN IP from .1 to .3 via API info!("Changing backup LAN IP to {BACKUP_IP}..."); - change_lan_ip_via_ssh(BOOT_IP, BACKUP_IP, 24).await?; + change_lan_ip_via_ssh(BOOT_IP, BACKUP_IP, 24, "root", "opnsense").await?; // Step 8: Re-enable primary's LAN NIC info!("Re-enabling primary LAN NIC..."); @@ -219,6 +220,18 @@ async fn boot_pair( async fn bootstrap_vm(role: &str, ip: &str) -> Result<(), Box> { info!("Bootstrapping {role} firewall at {ip}..."); + // TODO: migrate this example to compose `OPNsenseBootstrapScore` + // against `OPNsenseBootstrapTopology`, mirroring the + // `opnsense_vm_integration` refactor. That replaces this whole + // procedural dance (login → abort_wizard → enable_ssh → + // set_webgui_port → wait_for_ready → mint API key via SSH) with + // a single `harmony_cli::run_cli` invocation of the Score. The + // dual-firewall scenario will need per-instance secret keys + // (tracked at `harmony/src/domain/config/secret.rs:17`); migrate + // after that lands. Until then the `abort_wizard()` call below + // continues to 403 + WARN (same reason it was dropped from + // `OPNsenseBootstrapScore` in commit 27f18d60) — known-noisy, + // doesn't block any subsequent step. let bootstrap = OPNsenseBootstrap::new(&format!("https://{ip}")); bootstrap.login("root", "opnsense").await?; bootstrap.abort_wizard().await?; @@ -247,48 +260,6 @@ async fn bootstrap_vm(role: &str, ip: &str) -> Result<(), Box Result<(), Box> { - use opnsense_config::config::{OPNsenseShell, SshCredentials, SshOPNSenseShell}; - - let ssh_config = Arc::new(russh::client::Config { - inactivity_timeout: None, - ..<_>::default() - }); - let credentials = SshCredentials::Password { - username: "root".to_string(), - password: "opnsense".to_string(), - }; - let ip: IpAddr = current_ip.parse()?; - let shell = SshOPNSenseShell::new((ip, 22), credentials, ssh_config); - - // Use a PHP script to update config.xml and apply - let php_script = format!( - r#"object()->interfaces->lan->ipaddr = '{new_ip}'; -$config->object()->interfaces->lan->subnet = '{subnet}'; -$config->save(); -echo "OK\n"; -"# - ); - - shell - .write_content_to_file(&php_script, "/tmp/change_ip.php") - .await?; - let output = shell - .exec("php /tmp/change_ip.php && rm /tmp/change_ip.php && configctl interface reconfigure lan") - .await?; - info!("IP change result: {}", output.trim()); - - Ok(()) -} - // ── Phase 2: Pair integration test ───────────────────────────────── async fn run_pair_test() -> Result<(), Box> { @@ -306,8 +277,8 @@ async fn run_pair_test() -> Result<(), Box> { info!("Creating API keys..."); let primary_ip: IpAddr = PRIMARY_IP.parse()?; let backup_ip: IpAddr = BACKUP_IP.parse()?; - let (primary_key, primary_secret) = create_api_key_ssh(&primary_ip).await?; - let (backup_key, backup_secret) = create_api_key_ssh(&backup_ip).await?; + let (primary_key, primary_secret) = create_api_key_ssh(&primary_ip, "root", "opnsense").await?; + let (backup_key, backup_secret) = create_api_key_ssh(&backup_ip, "root", "opnsense").await?; info!("API keys created for both firewalls"); // Build FirewallPairTopology @@ -641,50 +612,3 @@ async fn check_tcp_port(ip: &str, port: u16) -> bool { .map(|r| r.is_ok()) .unwrap_or(false) } - -async fn create_api_key_ssh(ip: &IpAddr) -> Result<(String, String), Box> { - use opnsense_config::config::{OPNsenseShell, SshCredentials, SshOPNSenseShell}; - - let ssh_config = Arc::new(russh::client::Config { - inactivity_timeout: None, - ..<_>::default() - }); - let credentials = SshCredentials::Password { - username: "root".to_string(), - password: "opnsense".to_string(), - }; - let shell = SshOPNSenseShell::new((*ip, 22), credentials, ssh_config); - - let php_script = r#"object()->system->user as $user) { - if ((string)$user->name === 'root') { - if (!isset($user->apikeys)) { $user->addChild('apikeys'); } - $item = $user->apikeys->addChild('item'); - $item->addChild('key', $key); - $item->addChild('secret', crypt($secret, '$6$' . bin2hex(random_bytes(8)) . '$')); - $config->save(); - echo $key . "\n" . $secret . "\n"; - exit(0); - } -} -echo "ERROR: root user not found\n"; -exit(1); -"#; - - shell - .write_content_to_file(php_script, "/tmp/create_api_key.php") - .await?; - let output = shell - .exec("php /tmp/create_api_key.php && rm /tmp/create_api_key.php") - .await?; - let lines: Vec<&str> = output.trim().lines().collect(); - if lines.len() >= 2 && !lines[0].starts_with("ERROR") { - Ok((lines[0].to_string(), lines[1].to_string())) - } else { - Err(format!("API key creation failed: {output}").into()) - } -} diff --git a/examples/opnsense_vm_integration/Cargo.toml b/examples/opnsense_vm_integration/Cargo.toml index ac4ec1a5..7ec3fd4a 100644 --- a/examples/opnsense_vm_integration/Cargo.toml +++ b/examples/opnsense_vm_integration/Cargo.toml @@ -13,6 +13,7 @@ harmony = { path = "../../harmony" } harmony_cli = { path = "../../harmony_cli" } harmony_inventory_agent = { path = "../../harmony_inventory_agent" } harmony_macros = { path = "../../harmony_macros" } +harmony_secret = { path = "../../harmony_secret" } harmony_types = { path = "../../harmony_types" } opnsense-api = { path = "../../opnsense-api" } opnsense-config = { path = "../../opnsense-config" } diff --git a/examples/opnsense_vm_integration/src/main.rs b/examples/opnsense_vm_integration/src/main.rs index ed43581a..344b983d 100644 --- a/examples/opnsense_vm_integration/src/main.rs +++ b/examples/opnsense_vm_integration/src/main.rs @@ -2,25 +2,34 @@ //! //! Fully unattended workflow — no manual browser interaction required: //! -//! 1. `--boot` — creates a KVM VM, waits for web UI, bootstraps SSH + webgui port -//! 2. (default run) — creates API key via SSH, installs packages, runs Scores -//! 3. `--full` — does both in a single invocation (CI-friendly) +//! 1. `--boot` — provisions a KVM VM (image inject, network, qcow2, +//! `virsh` define + start), then dispatches `OPNsenseBootstrapScore`: +//! login → SSH enable → web GUI port move to 9443 → API key mint → +//! persist `OPNSenseApiCredentials` + `OPNSenseFirewallCredentials` +//! to `harmony_secret::SecretManager`. +//! 2. (default run) — reads the stored credentials, runs the integration +//! Score pipeline against `OPNSenseFirewall`: +//! `OPNsenseFirmwareUpgradeScore` (brings firmware current) → +//! `OPNsensePackageInstallScore { os-haproxy }` → the config Scores +//! (web GUI port, load balancer, DHCP, TFTP, node exporter, VLAN, +//! firewall rules, SNAT/BINAT/VIP/DNAT, LAGG) → idempotency-rerun +//! of the same pipeline → entity-count assertions. +//! 3. `--full` — does both in a single invocation (CI-friendly). //! //! # Usage //! //! ```bash //! cargo run -p opnsense-vm-integration -- --check # verify prerequisites //! cargo run -p opnsense-vm-integration -- --download # download OPNsense image -//! cargo run -p opnsense-vm-integration -- --boot # create VM + automated bootstrap -//! cargo run -p opnsense-vm-integration # run integration test -//! cargo run -p opnsense-vm-integration -- --full # boot + bootstrap + test (CI mode) +//! cargo run -p opnsense-vm-integration -- --boot # create VM + run OPNsenseBootstrapScore +//! cargo run -p opnsense-vm-integration # run integration-test Score pipeline +//! cargo run -p opnsense-vm-integration -- --full # boot + bootstrap + pipeline (CI mode) //! cargo run -p opnsense-vm-integration -- --status # check VM state //! cargo run -p opnsense-vm-integration -- --clean # tear down everything //! ``` use std::net::IpAddr; use std::path::{Path, PathBuf}; -use std::sync::Arc; use harmony::config::secret::{OPNSenseApiCredentials, OPNSenseFirewallCredentials}; use harmony::hardware::{HostCategory, PhysicalHost}; @@ -32,28 +41,35 @@ use harmony::modules::kvm::{ BootDevice, ForwardMode, KvmExecutor, NetworkConfig, NetworkRef, VmConfig, }; use harmony::modules::load_balancer::LoadBalancerScore; -use harmony::modules::opnsense::bootstrap::OPNsenseBootstrap; +use harmony::modules::opnsense::bootstrap_score::OPNsenseBootstrapScore; use harmony::modules::opnsense::dnat::{DnatRuleDef, DnatScore}; use harmony::modules::opnsense::firewall::{ BinatRuleDef, BinatScore, FilterRuleDef, FirewallRuleScore, OutboundNatScore, SnatRuleDef, }; +use harmony::modules::opnsense::firmware_upgrade::{ + FirmwareUpgradeMode, OPNsenseFirmwareUpgradeScore, +}; use harmony::modules::opnsense::lagg::{LaggDef, LaggScore}; +use harmony::modules::opnsense::lan_bridge::{LanBridgeParams, OPNsenseLanBridgeScore}; use harmony::modules::opnsense::node_exporter::NodeExporterScore; +use harmony::modules::opnsense::package_install::OPNsensePackageInstallScore; use harmony::modules::opnsense::vip::{VipDef, VipScore}; use harmony::modules::opnsense::vlan::{VlanDef, VlanScore}; use harmony::modules::tftp::TftpScore; use harmony::score::Score; use harmony::topology::{ BackendServer, HealthCheck, HostBinding, HostConfig, LoadBalancerService, LogicalHost, + OPNsenseBootstrapTopology, }; use harmony_inventory_agent::hwinfo::NetworkInterface; use harmony_macros::ip; +use harmony_secret::SecretManager; use harmony_types::firewall::{ Direction, FirewallAction, IpProtocol, LaggProtocol, NetworkProtocol, VipMode, }; use harmony_types::id::Id; use harmony_types::net::{MacAddress, Url}; -use log::{info, warn}; +use log::info; const OPNSENSE_IMG_URL: &str = "https://mirror.ams1.nl.leaseweb.net/opnsense/releases/26.1/OPNsense-26.1-nano-amd64.img.bz2"; @@ -71,6 +87,24 @@ const OPN_API_PORT: u16 = 9443; #[tokio::main] async fn main() -> Result<(), Box> { + // `SecretManager` panics if HARMONY_SECRET_NAMESPACE is unset, and + // defaults to the Infisical backend if HARMONY_SECRET_STORE is unset + // (see `harmony_secret::config` and `init_secret_manager` in + // `harmony_secret::lib`). Default both so `cargo run -p + // opnsense-vm-integration` works without sourcing an env.sh. + if std::env::var("HARMONY_SECRET_NAMESPACE").is_err() { + // SAFETY: single-threaded at this point, no other reads/writes to env. + unsafe { + std::env::set_var("HARMONY_SECRET_NAMESPACE", "opnsense-vm-integration"); + } + } + if std::env::var("HARMONY_SECRET_STORE").is_err() { + // SAFETY: same as above. + unsafe { + std::env::set_var("HARMONY_SECRET_STORE", "file"); + } + } + harmony_cli::cli_logger::init(); let args: Vec = std::env::args().collect(); @@ -176,46 +210,44 @@ async fn boot_vm( wait_for_https(OPN_LAN_IP, 443).await?; - // ── Automated bootstrap (replaces manual browser interaction) ─── - info!("Bootstrapping OPNsense: login, abort wizard, enable SSH, set webgui port..."); - let bootstrap = OPNsenseBootstrap::new(&format!("https://{OPN_LAN_IP}")); - bootstrap.login("root", "opnsense").await?; - bootstrap.abort_wizard().await?; - bootstrap.enable_ssh(true, true).await?; - bootstrap - .set_webgui_port(OPN_API_PORT, OPN_LAN_IP, false) - .await?; - - // Wait for the web UI to come back on the new port - info!("Waiting for web UI on new port {OPN_API_PORT}..."); - if let Err(e) = OPNsenseBootstrap::wait_for_ready( - &format!("https://{OPN_LAN_IP}:{OPN_API_PORT}"), - std::time::Duration::from_secs(120), + // ── Hand off to OPNsenseBootstrapScore ────────────────────────── + // The Score owns the full dance: login → abort wizard → SSH → port + // move → API key mint → persist OPNSenseApiCredentials and + // OPNSenseFirewallCredentials to SecretManager. It's idempotent: a + // re-run against an already-bootstrapped firewall NOOPs. + let bootstrap_topology = OPNsenseBootstrapTopology { + vanilla_ip: ip!("192.168.1.1"), + default_username: "root".to_string(), + default_password: "opnsense".to_string(), + }; + let bootstrap_scores: Vec>> = + vec![Box::new(OPNsenseBootstrapScore { + target_api_port: OPN_API_PORT, + // The VM image is a known firmware version, and the + // integration-test Score pipeline (see `build_all_scores`) + // already runs `OPNsenseFirmwareUpgradeScore` explicitly + // before plugin installs. So we skip the bootstrap-time + // upgrade to avoid doing it twice. Operators can swap to + // `Auto` / `AutoMinor` / `Prompt` locally when testing the + // bootstrap upgrade beat specifically. + firmware_upgrade: FirmwareUpgradeMode::Disabled, + ..Default::default() + })]; + let bootstrap_args = harmony_cli::Args { + yes: true, + filter: None, + interactive: false, + all: true, + number: 0, + list: false, + }; + harmony_cli::run_cli( + Inventory::autoload(), + bootstrap_topology, + bootstrap_scores, + bootstrap_args, ) - .await - { - warn!("Web UI did not come up on port {OPN_API_PORT}: {e}"); - info!("Running diagnostics via SSH..."); - match OPNsenseBootstrap::diagnose_via_ssh(OPN_LAN_IP).await { - Ok(report) => { - info!("Diagnostic report:\n{}", report); - } - Err(diag_err) => warn!("Diagnostics failed: {diag_err}"), - } - return Err(e.into()); - } - - // Verify SSH is reachable - info!("Verifying SSH is reachable..."); - for _ in 0..30 { - if check_tcp_port(OPN_LAN_IP, 22).await { - break; - } - tokio::time::sleep(std::time::Duration::from_secs(2)).await; - } - if !check_tcp_port(OPN_LAN_IP, 22).await { - return Err("SSH did not become reachable after bootstrap".into()); - } + .await?; println!(); println!("OPNsense VM is running and fully bootstrapped:"); @@ -246,74 +278,27 @@ async fn run_integration() -> Result<(), Box> { } info!("SSH is reachable"); - // Create API key - info!("Creating API key via SSH..."); - let (api_key, api_secret) = create_api_key_ssh(&vm_ip).await?; - info!("API key created: {}...", &api_key[..api_key.len().min(12)]); + // Load API + SSH credentials from SecretManager. OPNsenseBootstrapScore + // (run by --boot or --full) is what writes them; if they're missing, + // the operator hasn't bootstrapped the VM yet. + let api_creds = SecretManager::get::().await?; + let ssh_creds = SecretManager::get::().await?; // Build topology let firewall_host = LogicalHost { - ip: vm_ip.into(), + ip: vm_ip, name: VM_NAME.to_string(), }; - let api_creds = OPNSenseApiCredentials { - key: api_key.clone(), - secret: api_secret.clone(), - }; - let ssh_creds = OPNSenseFirewallCredentials { - username: "root".to_string(), - password: "opnsense".to_string(), - }; let opnsense = OPNSenseFirewall::with_api_port(firewall_host, None, OPN_API_PORT, &api_creds, &ssh_creds) .await; - // Install packages - let config = opnsense.get_opnsense_config(); - if !config.is_package_installed("os-haproxy").await { - info!("Installing os-haproxy (may need firmware update first)..."); - match config.install_package("os-haproxy").await { - Ok(()) => info!("os-haproxy installed"), - Err(e) => { - warn!("os-haproxy install failed: {e}"); - info!("Attempting firmware update..."); - // Trigger firmware update then retry - let _: serde_json::Value = config - .client() - .post_typed("core", "firmware", "update", None::<&()>) - .await - .map_err(|e| format!("firmware update failed: {e}"))?; - // Poll for completion - for _ in 0..120 { - tokio::time::sleep(std::time::Duration::from_secs(5)).await; - let status: serde_json::Value = match config - .client() - .get_typed("core", "firmware", "upgradestatus") - .await - { - Ok(s) => s, - Err(_) => continue, // VM may be rebooting - }; - if status["status"].as_str() == Some("done") - || status["status"].as_str() == Some("reboot") - { - break; - } - } - info!("Firmware updated, retrying package install..."); - // Wait for API to come back — try configured port first - // (config.xml persists across reboots, so port stays at 9443) - wait_for_https(OPN_LAN_IP, OPN_API_PORT).await?; - // Extra settle time — web UI responds before API backend is ready - tokio::time::sleep(std::time::Duration::from_secs(10)).await; - config.install_package("os-haproxy").await?; - } - } - } else { - info!("os-haproxy already installed"); - } - // ── Build and run all Scores ────────────────────────────────────── + // Pipeline starts with the firmware upgrade Score (brings the + // freshly-bootstrapped image current) and the package-install Score + // (installs os-haproxy now that the repo metadata is current). + // Everything downstream is configuration scores that depend on the + // plugin being installed. No imperative install/retry glue. info!("Running all Scores (run 1)..."); let scores = build_all_scores()?; let args = harmony_cli::Args { @@ -330,7 +315,7 @@ async fn run_integration() -> Result<(), Box> { info!("Verifying all Scores via typed API..."); let client = opnsense_api::OpnsenseClient::builder() .base_url(format!("https://{OPN_LAN_IP}:{OPN_API_PORT}/api")) - .auth_from_key_secret(&api_key, &api_secret) + .auth_from_key_secret(&api_creds.key, &api_creds.secret) .skip_tls_verify() .timeout_secs(60) .build()?; @@ -343,7 +328,7 @@ async fn run_integration() -> Result<(), Box> { info!("=== IDEMPOTENCY TEST: Running all Scores a SECOND time ==="); let scores_round2 = build_all_scores()?; let firewall_host2 = LogicalHost { - ip: vm_ip.into(), + ip: vm_ip, name: VM_NAME.to_string(), }; let opnsense2 = @@ -404,6 +389,71 @@ async fn run_integration() -> Result<(), Box> { "LAGGs changed after 2nd run! {} -> {}", state1.lagg_count, state2.lagg_count ); + assert_eq!( + state1.bridge_count, state2.bridge_count, + "Bridges changed after 2nd run! {} -> {}", + state1.bridge_count, state2.bridge_count + ); + assert_eq!( + state1.bridge_sysctls, state2.bridge_sysctls, + "net.link.bridge.* sysctl count changed after 2nd run! {} -> {}", + state1.bridge_sysctls, state2.bridge_sysctls + ); + assert_eq!( + state1.lan_if, state2.lan_if, + "interfaces.lan.if changed after 2nd run! {} -> {}", + state1.lan_if, state2.lan_if + ); + + // ── Reachability assertion ───────────────────────────────────── + // The bridge step re-points at bridge0; a + // wrong sysctl ordering, missing service restart, or bad MAC + // inheritance can break individual services without taking down + // the whole stack. Verify BOTH HTTPS (lighttpd) AND SSH (sshd) + // come back up: HTTPS uses the webgui-port settings (own restart + // path) while SSH binds per-interface and needs `configctl sshd + // restart` after LAN's moves to bridge0 — if that step is + // missing, HTTPS stays green but SSH-based Scores time out on + // any rerun. Generous timeouts because the detached configctl + // chain takes a beat to fully settle. + info!("Verifying firewall HTTPS reachability post-run on {OPN_LAN_IP}:{OPN_API_PORT}..."); + wait_for_https(OPN_LAN_IP, OPN_API_PORT) + .await + .map_err(|e| -> Box { + format!( + "Firewall HTTPS at {OPN_LAN_IP}:{OPN_API_PORT} is unreachable after the Score \ + pipeline: {e}. The bridge / LAN reassignment likely broke L2 (check MAC \ + inheritance via net.link.bridge.inherit_mac=1 BEFORE bridge member is added, \ + and confirm `` ended at `bridge0`)." + ) + .into() + })?; + info!("HTTPS reachable at https://{OPN_LAN_IP}:{OPN_API_PORT}"); + + info!("Verifying firewall SSH reachability post-run on {OPN_LAN_IP}:22..."); + let ssh_ok = tokio::time::timeout( + std::time::Duration::from_secs(30), + async { + loop { + if check_tcp_port(OPN_LAN_IP, 22).await { + return true; + } + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + }, + ) + .await + .unwrap_or(false); + if !ssh_ok { + return Err(format!( + "Firewall SSH at {OPN_LAN_IP}:22 is unreachable after the Score pipeline. \ + HTTPS is up but sshd is bound to a stale interface — the detached configctl \ + chain after the LAN-bridge reassignment must include `configctl sshd restart` \ + so sshd re-binds to the new lan interface (bridge0)." + ) + .into()); + } + info!("SSH reachable at {OPN_LAN_IP}:22"); // Clean up temp files let _ = std::fs::remove_dir_all(std::env::temp_dir().join("harmony-tftp-test")); @@ -412,6 +462,7 @@ async fn run_integration() -> Result<(), Box> { println!("PASSED — All OPNsense integration tests successful:"); println!(" Run 1: all entities created correctly"); println!(" Run 2: idempotency verified — zero duplicates"); + println!(" Firewall reachable end-to-end after LAN-bridge reassignment"); println!(); println!("VM is running at {OPN_LAN_IP}. Use --clean to tear down."); Ok(()) @@ -431,6 +482,15 @@ struct StateSnapshot { vip_count: usize, dnat_rules: usize, lagg_count: usize, + bridge_count: usize, + bridge_sysctls: usize, + /// All `net.link.bridge.*` tunables with their current values + /// (post-Score). Lets the assertion check the four we care about + /// by key+value without disturbing other pre-existing entries. + bridge_sysctl_values: std::collections::HashMap, + tso_disabled: bool, + lro_disabled: bool, + lan_if: String, } impl StateSnapshot { @@ -445,6 +505,16 @@ impl StateSnapshot { info!(" VIPs: {}", self.vip_count); info!(" DNat rules: {}", self.dnat_rules); info!(" LAGGs: {}", self.lagg_count); + info!(" Bridges: {}", self.bridge_count); + info!( + " Bridge sysctls (net.link.bridge.*): {}", + self.bridge_sysctls + ); + info!( + " Hardware offload disabled: TSO={}, LRO={}", + self.tso_disabled, self.lro_disabled + ); + info!(" interfaces.lan.if: {}", self.lan_if); } fn assert_minimum_counts(&self) { @@ -488,6 +558,47 @@ impl StateSnapshot { "Expected >= 1 LAGG, got {}", self.lagg_count ); + assert!( + self.bridge_count >= 1, + "Expected >= 1 bridge, got {}", + self.bridge_count + ); + // The Score doesn't claim exclusive ownership of the + // `net.link.bridge.*` namespace; only that the 4 it cares + // about exist with the expected values. Pre-existing sysctls + // make the count >= 4 — that's fine. + assert!( + self.bridge_sysctls >= 4, + "Expected at least 4 net.link.bridge.* sysctls, got {}", + self.bridge_sysctls + ); + let expected: &[(&str, &str)] = &[ + ("net.link.bridge.pfil_member", "0"), + ("net.link.bridge.pfil_bridge", "1"), + ("net.link.bridge.pfil_local_phys", "0"), + ("net.link.bridge.inherit_mac", "1"), + ]; + for (key, want) in expected { + let got = self.bridge_sysctl_values.get(*key).map(String::as_str); + assert_eq!( + got, + Some(*want), + "Expected {key}={want}, got {got:?} from net.link.bridge.* tunables", + ); + } + assert!( + self.tso_disabled, + "Expected segmentation offloading to be disabled after OPNsenseLanBridgeScore", + ); + assert!( + self.lro_disabled, + "Expected large-receive offloading to be disabled after OPNsenseLanBridgeScore", + ); + assert_eq!( + self.lan_if, "bridge0", + "Expected to be reassigned to bridge0 (was {})", + self.lan_if + ); } } @@ -549,6 +660,51 @@ async fn verify_state( .map(|m| m.len()) .unwrap_or(0); + let bridges: serde_json::Value = client + .get_typed("interfaces", "bridge_settings", "get") + .await?; + let bridge_count = bridges["bridge"]["bridged"] + .as_object() + .map(|m| m.len()) + .unwrap_or(0); + + // Capture all net.link.bridge.* entries with their values so the + // assertion can check the four we care about by name + value while + // tolerating any extras left over from manual probing. + let tunables: serde_json::Value = client + .post_typed( + "core", + "tunables", + "searchItem", + Some(&serde_json::json!({ "searchPhrase": "net.link.bridge." })), + ) + .await?; + let bridge_sysctl_values: std::collections::HashMap = tunables["rows"] + .as_array() + .map(|rows| { + rows.iter() + .filter_map(|r| { + let tunable = r["tunable"].as_str()?; + if !tunable.starts_with("net.link.bridge.") { + return None; + } + let value = r["value"].as_str()?; + Some((tunable.to_string(), value.to_string())) + }) + .collect() + }) + .unwrap_or_default(); + let bridge_sysctls = bridge_sysctl_values.len(); + + let iface_settings: serde_json::Value = + client.get_typed("interfaces", "settings", "get").await?; + let tso_disabled = + iface_settings["settings"]["disablesegmentationoffloading"].as_str() == Some("1"); + let lro_disabled = + iface_settings["settings"]["disablelargereceiveoffloading"].as_str() == Some("1"); + + let lan_if = ssh_read_lan_if().await.unwrap_or_default(); + Ok(StateSnapshot { haproxy_frontends, dnsmasq_hosts, @@ -559,9 +715,44 @@ async fn verify_state( vip_count, dnat_rules, lagg_count, + bridge_count, + bridge_sysctls, + bridge_sysctl_values, + tso_disabled, + lro_disabled, + lan_if, }) } +/// Read `` over SSH (no REST endpoint for the legacy +/// interfaces tree). Returns empty string on failure — the +/// `assert_minimum_counts` check will then fail with a clear message. +async fn ssh_read_lan_if() -> Result> { + use opnsense_config::config::{OPNsenseShell, SshCredentials, SshOPNSenseShell}; + let ssh_creds = SecretManager::get::().await?; + let ip: std::net::IpAddr = OPN_LAN_IP.parse()?; + let ssh_config = std::sync::Arc::new(russh::client::Config { + inactivity_timeout: None, + ..<_>::default() + }); + let credentials = SshCredentials::Password { + username: ssh_creds.username.clone(), + password: ssh_creds.password.clone(), + }; + let shell = SshOPNSenseShell::new((ip, 22), credentials, ssh_config); + // Shell single-quotes preserve backslashes literally, so a SINGLE + // `\` in the Rust source reaches PHP as a single backslash and forms + // a valid namespace separator. `\\` in source would reach PHP as + // `\\` and trigger a parse error (silently empty stdout). + let out = shell + .exec( + "php -r 'require \"/usr/local/etc/inc/config.inc\"; \ + echo (string)OPNsense\\Core\\Config::getInstance()->object()->interfaces->lan->if;'", + ) + .await?; + Ok(out.trim().to_string()) +} + /// Build all test Scores — extracted so we can call it for both run 1 and run 2. fn build_all_scores() -> Result>>, Box> { let lb_score = LoadBalancerScore { @@ -723,6 +914,27 @@ fn build_all_scores() -> Result>>, Box` + // from vtnet0 to bridge0 — host-to-VM management survives because + // bridge0 inherits vtnet0's MAC (perf_tunables sets + // inherit_mac=1). Single-member is degenerate but exercises every + // code path; multi-member would need extra virtio NICs. + let lan_bridge_score = OPNsenseLanBridgeScore { + params: LanBridgeParams { + members: Some(vec!["vtnet0".to_string()]), + description: "harmony-test-lan-bridge".to_string(), + mtu: None, + enable_stp: false, + reassign_lan: true, + perf_tunables: true, + }, + }; + // WebGuiConfigScore runs first: moves webgui to 9443 so HAProxy can bind 443. // This is an explicit Score (not hidden in bootstrap) — see docs/architecture-challenges.md // for discussion of Score ordering/dependency. @@ -731,7 +943,22 @@ fn build_all_scores() -> Result>>, Box Result>>, Box HostBinding { }; HostBinding::new(logical, physical, HostConfig::new(None)) } - -async fn create_api_key_ssh(ip: &IpAddr) -> Result<(String, String), Box> { - use opnsense_config::config::{OPNsenseShell, SshCredentials, SshOPNSenseShell}; - - let ssh_config = Arc::new(russh::client::Config { - inactivity_timeout: None, - ..<_>::default() - }); - let credentials = SshCredentials::Password { - username: "root".to_string(), - password: "opnsense".to_string(), - }; - let shell = SshOPNSenseShell::new((*ip, 22), credentials, ssh_config); - - let php_script = r#"object()->system->user as $user) { - if ((string)$user->name === 'root') { - if (!isset($user->apikeys)) { $user->addChild('apikeys'); } - $item = $user->apikeys->addChild('item'); - $item->addChild('key', $key); - $item->addChild('secret', crypt($secret, '$6$' . bin2hex(random_bytes(8)) . '$')); - $config->save(); - echo $key . "\n" . $secret . "\n"; - exit(0); - } -} -echo "ERROR: root user not found\n"; -exit(1); -"#; - - info!("Writing API key script..."); - shell - .write_content_to_file(php_script, "/tmp/create_api_key.php") - .await?; - - info!("Executing API key generation..."); - let output = shell - .exec("php /tmp/create_api_key.php && rm /tmp/create_api_key.php") - .await?; - - let lines: Vec<&str> = output.trim().lines().collect(); - if lines.len() >= 2 && !lines[0].starts_with("ERROR") { - Ok((lines[0].to_string(), lines[1].to_string())) - } else { - Err(format!("API key creation failed: {output}").into()) - } -} diff --git a/harmony/src/domain/interpret/mod.rs b/harmony/src/domain/interpret/mod.rs index ec3ad1d0..f992cf93 100644 --- a/harmony/src/domain/interpret/mod.rs +++ b/harmony/src/domain/interpret/mod.rs @@ -11,6 +11,11 @@ use super::{ pub enum InterpretName { OPNSenseDHCP, OPNSenseDns, + OPNsenseBootstrap, + OPNsenseFirmwareUpgrade, + OPNsensePackageInstall, + OPNsensePinNicNames, + OPNsenseLanBridge, LoadBalancer, Tftp, Http, @@ -44,6 +49,11 @@ impl std::fmt::Display for InterpretName { match self { InterpretName::OPNSenseDHCP => f.write_str("OPNSenseDHCP"), InterpretName::OPNSenseDns => f.write_str("OPNSenseDns"), + InterpretName::OPNsenseBootstrap => f.write_str("OPNsenseBootstrap"), + InterpretName::OPNsenseFirmwareUpgrade => f.write_str("OPNsenseFirmwareUpgrade"), + InterpretName::OPNsensePackageInstall => f.write_str("OPNsensePackageInstall"), + InterpretName::OPNsensePinNicNames => f.write_str("OPNsensePinNicNames"), + InterpretName::OPNsenseLanBridge => f.write_str("OPNsenseLanBridge"), InterpretName::LoadBalancer => f.write_str("LoadBalancer"), InterpretName::Tftp => f.write_str("Tftp"), InterpretName::Http => f.write_str("Http"), diff --git a/harmony/src/domain/topology/mod.rs b/harmony/src/domain/topology/mod.rs index 44c35b4e..e38a2b45 100644 --- a/harmony/src/domain/topology/mod.rs +++ b/harmony/src/domain/topology/mod.rs @@ -5,9 +5,11 @@ mod ha_cluster; pub mod ingress; pub mod node_exporter; pub mod opnsense; +pub mod opnsense_bootstrap; pub use failover::*; pub use firewall_pair::*; use harmony_types::net::IpAddress; +pub use opnsense_bootstrap::*; mod host_binding; mod http; pub mod installable; diff --git a/harmony/src/domain/topology/opnsense_bootstrap.rs b/harmony/src/domain/topology/opnsense_bootstrap.rs new file mode 100644 index 00000000..784fc268 --- /dev/null +++ b/harmony/src/domain/topology/opnsense_bootstrap.rs @@ -0,0 +1,89 @@ +//! Minimal topology representing a factory-fresh OPNsense firewall. +//! +//! [`OPNsenseBootstrapTopology`] holds the connection info needed to talk to +//! an OPNsense that has just been installed from ISO and is reachable at its +//! default LAN IP with the install-time credentials. It exists so that the +//! `OPNsenseBootstrapScore` (in `harmony::modules::opnsense::bootstrap_score`) +//! can fit the standard `Score` pattern while the firewall is +//! still pre-API-credentials. +//! +//! Once the bootstrap Score runs, callers construct an +//! [`OPNSenseFirewall`](crate::infra::opnsense::OPNSenseFirewall) instead and +//! run their production-phase Scores against that. + +use async_trait::async_trait; +use serde::Serialize; + +use crate::{ + modules::opnsense::bootstrap::probe_https, + topology::{PreparationError, PreparationOutcome, Topology}, +}; +use harmony_types::net::IpAddress; + +/// A factory-fresh OPNsense firewall awaiting first-time configuration. +/// +/// The struct is intentionally tiny — it carries only what's needed to +/// reach the firewall and authenticate with the install-time defaults. +/// All "where do you want to end up" configuration (target API port, +/// optional LAN rebind, timeouts) belongs on the Score, not here. +#[derive(Debug, Clone, Serialize)] +pub struct OPNsenseBootstrapTopology { + /// LAN IP the OPNsense was configured with at install time + /// (typically `192.168.1.1`). + pub vanilla_ip: IpAddress, + /// Install-time username (typically `root`). + pub default_username: String, + /// Install-time password (typically `opnsense`). + pub default_password: String, +} + +#[async_trait] +impl Topology for OPNsenseBootstrapTopology { + fn name(&self) -> &str { + "OPNsenseBootstrapTopology" + } + + /// Probe the vanilla address on TCP 443. If unreachable, return a + /// `PreparationError` whose message points the operator at the + /// typical recovery paths (install from ISO, leave LAN at default, + /// or — if the firewall is already past first-boot — run the + /// bootstrap Score's idempotency check from the target subnet). + async fn ensure_ready(&self) -> Result { + let ip_str = self.vanilla_ip.to_string(); + if probe_https(&ip_str, 443, std::time::Duration::from_secs(3)).await { + Ok(PreparationOutcome::Success { + details: format!("Factory-fresh OPNsense reachable at https://{ip_str}"), + }) + } else { + Err(PreparationError::new(format!( + "Could not reach factory-fresh OPNsense at https://{ip_str}:443 within 3s. \ + Verify it is installed from ISO, sitting at its default LAN IP, and the dev \ + machine is on the same subnet. If you've already bootstrapped this firewall \ + once, you don't need to rerun the bootstrap Score from here — its idempotency \ + check expects the target subnet instead." + ))) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn ensure_ready_errors_when_endpoint_is_unreachable() { + // 127.0.0.1:1 is the conventional "nothing listens here" target. + // Note: the probe targets port 443, not the IP's literal port, + // so this exercises the same code path even if something is on :1. + let topology = OPNsenseBootstrapTopology { + vanilla_ip: "127.0.0.1".parse().unwrap(), + default_username: "root".into(), + default_password: "opnsense".into(), + }; + let result = topology.ensure_ready().await; + assert!( + result.is_err(), + "expected ensure_ready to fail against an unreachable endpoint, got Ok({result:?})" + ); + } +} diff --git a/harmony/src/modules/okd/mod.rs b/harmony/src/modules/okd/mod.rs index 6fd48e7a..6a5ecc2f 100644 --- a/harmony/src/modules/okd/mod.rs +++ b/harmony/src/modules/okd/mod.rs @@ -30,7 +30,9 @@ pub mod disable_dad_score; pub mod host_network; pub mod node_file_score; pub mod os_artifacts; +pub mod reapply_from_inventory; pub mod system_reserved_score; pub use add_node::*; pub use os_artifacts::*; +pub use reapply_from_inventory::*; diff --git a/harmony/src/modules/okd/reapply_from_inventory.rs b/harmony/src/modules/okd/reapply_from_inventory.rs new file mode 100644 index 00000000..42a2ab57 --- /dev/null +++ b/harmony/src/modules/okd/reapply_from_inventory.rs @@ -0,0 +1,268 @@ +//! Re-apply firewall config for already-discovered nodes. +//! +//! Recovery tool: when the OPNsense firewall has been reinstalled but the +//! harmony inventory database still has the discovered physical hosts, +//! this Score re-creates the bits that live on the firewall — without +//! running discovery, prompting for reboot, or otherwise touching the +//! installed cluster. +//! +//! What it (re-)writes per selected role: +//! 1. dnsmasq Host entries (DHCP reservation + A record), via +//! `DhcpHostBindingScore` → `DhcpConfigDnsMasq::add_static_mapping`. +//! 2. Per-MAC iPXE boot files (`byMAC/01-.ipxe`) served over +//! HTTP, via `IPxeMacBootFileScore`. Uses the same `BootstrapIpxeTpl` +//! stages 02/03/04 use, parameterized by the role's ignition file +//! (`bootstrap.ign` / `master.ign` / `worker.ign`). +//! +//! Pick which roles to re-apply via: +//! - `OKDReapplyFromInventoryScore::interactive()` — prompts via inquire +//! - `OKDReapplyFromInventoryScore::for_roles(vec![...])` — explicit set +//! - `OKDReapplyFromInventoryScore::all_roles()` — bootstrap + CP + worker +//! +//! Skips roles with no DB hosts. Errors when DB count and topology slot +//! count diverge for a role the user explicitly asked for, or when a +//! host has no installation_device / MAC recorded in the DB. + +use async_trait::async_trait; +use harmony_types::id::Id; +use log::{info, warn}; +use serde::Serialize; + +use crate::{ + data::Version, + hardware::PhysicalHost, + infra::inventory::InventoryRepositoryFactory, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::{HostRole, Inventory}, + modules::{ + dhcp::DhcpHostBindingScore, http::IPxeMacBootFileScore, + okd::templates::BootstrapIpxeTpl, + }, + score::Score, + topology::{HAClusterTopology, HostBinding, HostConfig, LogicalHost}, +}; + +#[derive(Debug, Clone, Serialize)] +pub struct OKDReapplyFromInventoryScore { + /// Which roles to re-apply for. `None` triggers an interactive + /// multi-select prompt at execute time. + pub roles: Option>, +} + +impl OKDReapplyFromInventoryScore { + pub fn interactive() -> Self { + Self { roles: None } + } + + pub fn for_roles(roles: Vec) -> Self { + Self { roles: Some(roles) } + } + + pub fn all_roles() -> Self { + Self { + roles: Some(vec![ + HostRole::Bootstrap, + HostRole::ControlPlane, + HostRole::Worker, + ]), + } + } +} + +impl Score for OKDReapplyFromInventoryScore { + fn create_interpret(&self) -> Box> { + Box::new(OKDReapplyFromInventoryInterpret { + score: self.clone(), + }) + } + + fn name(&self) -> String { + "OKDReapplyFromInventoryScore".to_string() + } +} + +#[derive(Debug)] +struct OKDReapplyFromInventoryInterpret { + score: OKDReapplyFromInventoryScore, +} + +#[async_trait] +impl Interpret for OKDReapplyFromInventoryInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &HAClusterTopology, + ) -> Result { + let roles = match &self.score.roles { + Some(r) => r.clone(), + None => prompt_roles()?, + }; + + if roles.is_empty() { + return Ok(Outcome::success("No roles selected; nothing to do".into())); + } + + let repo = InventoryRepositoryFactory::build().await?; + let mut details: Vec = Vec::new(); + let http_ip = topology.http_server.get_ip().to_string(); + + for role in roles { + let hosts = repo.get_hosts_for_role(&role).await?; + let logical = role_logical_hosts(&role, topology); + + if hosts.is_empty() { + warn!("[{role}] no hosts in inventory DB, skipping"); + details.push(format!("[{role}] skipped (no DB hosts)")); + continue; + } + if logical.is_empty() { + warn!("[{role}] topology has no slot for this role, skipping"); + details.push(format!("[{role}] skipped (no topology slot)")); + continue; + } + if logical.len() != hosts.len() { + return Err(InterpretError::new(format!( + "[{role}] topology defines {} logical host(s) but inventory DB has {} \ + physical — refusing to re-apply with a mismatched count", + logical.len(), + hosts.len() + ))); + } + + // 1. DHCP / dnsmasq host entries (DHCP reservation + A record). + let bindings = build_bindings(&hosts, &logical); + info!( + "[{role}] re-applying {} DHCP binding(s) from inventory DB", + bindings.len() + ); + DhcpHostBindingScore { + host_binding: bindings, + domain: Some(topology.domain_name.clone()), + } + .interpret(inventory, topology) + .await?; + + // 2. Per-MAC iPXE boot files (byMAC/01-.ipxe over HTTP). + let ignition_file_name = role_ignition_file(&role); + for (physical, host_config) in &hosts { + let installation_device = + host_config.installation_device.as_deref().ok_or_else(|| { + InterpretError::new(format!( + "[{role}] host {} has no installation_device in DB; \ + cannot render iPXE template", + physical.summary() + )) + })?; + + let content = BootstrapIpxeTpl { + http_ip: &http_ip, + scos_path: "scos", + ignition_http_path: "okd_ignition_files", + installation_device, + ignition_file_name, + } + .to_string(); + + let mac_address = physical.get_mac_address(); + if mac_address.is_empty() { + return Err(InterpretError::new(format!( + "[{role}] host {} has no MAC in DB; cannot write byMAC file", + physical.summary() + ))); + } + + IPxeMacBootFileScore { + mac_address, + content, + } + .interpret(inventory, topology) + .await?; + } + info!( + "[{role}] re-applied {} byMAC iPXE file(s) from inventory DB", + hosts.len() + ); + + details.push(format!( + "[{role}] re-applied {} DHCP binding(s) + {} byMAC iPXE file(s)", + hosts.len(), + hosts.len() + )); + } + + Ok(Outcome::success_with_details( + "Firewall config re-applied from inventory database".to_string(), + details, + )) + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("OKDReapplyFromInventory".into()) + } + + fn get_version(&self) -> Version { + todo!() + } + + fn get_status(&self) -> InterpretStatus { + todo!() + } + + fn get_children(&self) -> Vec { + todo!() + } +} + +fn role_logical_hosts(role: &HostRole, t: &HAClusterTopology) -> Vec { + match role { + HostRole::Bootstrap => vec![t.bootstrap_host.clone()], + HostRole::ControlPlane => t.control_plane.clone(), + HostRole::Worker => t.workers.clone(), + } +} + +fn role_ignition_file(role: &HostRole) -> &'static str { + match role { + HostRole::Bootstrap => "bootstrap.ign", + HostRole::ControlPlane => "master.ign", + HostRole::Worker => "worker.ign", + } +} + +fn build_bindings( + nodes: &[(PhysicalHost, HostConfig)], + hosts: &[LogicalHost], +) -> Vec { + hosts + .iter() + .zip(nodes.iter()) + .map(|(logical, (physical, host_config))| HostBinding { + logical_host: logical.clone(), + physical_host: physical.clone(), + host_config: host_config.clone(), + }) + .collect() +} + +fn prompt_roles() -> Result, InterpretError> { + let options = vec![ + HostRole::Bootstrap, + HostRole::ControlPlane, + HostRole::Worker, + ]; + let labels: Vec = options.iter().map(|r| r.to_string()).collect(); + + let chosen = inquire::MultiSelect::new( + "Which host roles should have their firewall config re-applied from the inventory DB?", + labels.clone(), + ) + .prompt() + .map_err(|e| InterpretError::new(format!("interactive role prompt failed: {e}")))?; + + Ok(options + .into_iter() + .zip(labels) + .filter(|(_, label)| chosen.contains(label)) + .map(|(role, _)| role) + .collect()) +} diff --git a/harmony/src/modules/opnsense/bootstrap.rs b/harmony/src/modules/opnsense/bootstrap.rs index 6402df8e..12d7f742 100644 --- a/harmony/src/modules/opnsense/bootstrap.rs +++ b/harmony/src/modules/opnsense/bootstrap.rs @@ -438,6 +438,1139 @@ impl OPNsenseBootstrap { } } +/// Reject strings that could break out of a single-quoted PHP string literal. +/// +/// Allows everything except the four characters that close, escape, or +/// inject across a `'...'` PHP literal. Empty strings are also rejected. +fn validate_php_safe(value: &str, field: &str) -> Result<(), BootstrapError> { + if value.is_empty() + || value + .chars() + .any(|c| matches!(c, '\'' | '"' | '\\' | '\n' | '\r' | '\0')) + { + return Err(BootstrapError::UnexpectedResponse(format!( + "Invalid {field}: empty or contains quote/backslash/newline" + ))); + } + Ok(()) +} + +/// One-shot HTTPS reachability probe with a short timeout. +/// +/// Returns `true` if the server responds within `timeout` (any HTTP status +/// counts as "reachable" — we accept self-signed certs since fresh OPNsense +/// uses one). Used by callers (e.g. the bootstrap Score) to decide whether +/// the firewall is on the vanilla address, the target address, both, or +/// neither. +pub async fn probe_https(host: &str, port: u16, timeout: std::time::Duration) -> bool { + let url = format!("https://{host}:{port}/"); + let client = match reqwest::Client::builder() + .danger_accept_invalid_certs(true) + .timeout(timeout) + .build() + { + Ok(c) => c, + Err(_) => return false, + }; + client.get(&url).send().await.is_ok() +} + +/// Build an `SshOPNSenseShell` against `(ip, 22)` using password authentication. +pub(crate) fn opnsense_ssh_shell( + ip: std::net::IpAddr, + username: &str, + password: &str, +) -> opnsense_config::config::SshOPNSenseShell { + use opnsense_config::config::{SshCredentials, SshOPNSenseShell}; + let ssh_config = std::sync::Arc::new(russh::client::Config { + inactivity_timeout: None, + ..<_>::default() + }); + let credentials = SshCredentials::Password { + username: username.to_string(), + password: password.to_string(), + }; + SshOPNSenseShell::new((ip, 22), credentials, ssh_config) +} + +/// Mint a fresh API key + secret on the OPNsense root user via SSH. +/// +/// SFTPs a short PHP script that appends an `` to the root user's +/// `` element in `config.xml`, executes it with the firewall's +/// `php` binary, then deletes the script. Returns `(key, secret)`. +/// +/// The PHP script uses `random_bytes` for both fields and `crypt` with a +/// SHA-512 salt for the stored secret — same scheme OPNsense uses when +/// keys are created via the web UI. +/// +/// Designed for fresh installs where the only known credentials are the +/// install-time defaults (`root` / `opnsense`); accepts arbitrary +/// credentials so the helper can be reused after a password rotation. +pub async fn create_api_key_ssh( + ip: &std::net::IpAddr, + username: &str, + password: &str, +) -> Result<(String, String), BootstrapError> { + use opnsense_config::config::OPNsenseShell; + + validate_php_safe(username, "username")?; + validate_php_safe(password, "password")?; + + let shell = opnsense_ssh_shell(*ip, username, password); + let php = r#"object()->system->user as $user) { + if ((string)$user->name === 'root') { + if (!isset($user->apikeys)) { $user->addChild('apikeys'); } + $item = $user->apikeys->addChild('item'); + $item->addChild('key', $key); + $item->addChild('secret', crypt($secret, '$6$' . bin2hex(random_bytes(8)) . '$')); + $config->save(); + echo $key . "\n" . $secret . "\n"; + exit(0); + } +} +echo "ERROR: root user not found\n"; +exit(1); +"#; + shell + .write_content_to_file(php, "/tmp/create_api_key.php") + .await + .map_err(|e| BootstrapError::UnexpectedResponse(format!("SFTP upload failed: {e}")))?; + let output = shell + .exec("php /tmp/create_api_key.php && rm /tmp/create_api_key.php") + .await + .map_err(|e| BootstrapError::UnexpectedResponse(format!("SSH exec failed: {e}")))?; + let lines: Vec<&str> = output.trim().lines().collect(); + if lines.len() >= 2 && !lines[0].starts_with("ERROR") { + Ok((lines[0].to_string(), lines[1].to_string())) + } else { + Err(BootstrapError::UnexpectedResponse(format!( + "API key creation failed on firewall: {output}" + ))) + } +} + +/// Update the LAN's DHCP range via OPNsense's REST API. +/// +/// Wraps `opnsense_config::modules::dnsmasq::DhcpConfigDnsMasq::set_dhcp_range` +/// — the same code path harmony already uses elsewhere for DHCP range +/// edits. It calls the OPNsense REST API to add or update the dnsmasq +/// range bound to `interface == "lan"`, then asks OPNsense to +/// reconfigure dnsmasq. OPNsense's own model classes handle +/// validation and any dependent service restarts. +/// +/// Call this **before** [`change_lan_ip_via_ssh`] when the LAN move +/// shifts the firewall to a new subnet: the API endpoint sits on the +/// firewall's current LAN IP, so it must be hit before that IP flips +/// and our connection drops. +/// +/// **DHCP backend assumption.** OPNsense 26.x's default DHCP backend +/// is dnsmasq. If a future firewall uses Kea or ISC dhcpd instead, +/// a sibling helper using their respective API endpoints would be +/// needed. We default to the dnsmasq path because that's what every +/// fresh OPNsense install in this stack uses today. +/// +/// `start` / `end` are the new pool's first/last addresses (e.g. +/// `"192.168.200.100"` / `"192.168.200.199"`). +pub async fn set_lan_dhcp_range_via_api( + api_ip: &std::net::IpAddr, + api_port: u16, + api_key: &str, + api_secret: &str, + ssh_username: &str, + ssh_password: &str, + start: &str, + end: &str, +) -> Result<(), BootstrapError> { + use opnsense_config::config::OPNsenseShell; + use opnsense_config::modules::dnsmasq::DhcpConfigDnsMasq; + + let client = opnsense_api::OpnsenseClient::builder() + .base_url(format!("https://{api_ip}:{api_port}/api")) + .auth_from_key_secret(api_key, api_secret) + .skip_tls_verify() + .timeout_secs(60) + .build() + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "Failed to build OPNsense API client for DHCP range update: {e}" + )) + })?; + + // DhcpConfigDnsMasq holds an SSH shell for the few operations that + // have no REST equivalent (file uploads for PXE configs); we don't + // hit those here, but the constructor demands one. + let shell: std::sync::Arc = + std::sync::Arc::new(opnsense_ssh_shell(*api_ip, ssh_username, ssh_password)); + + let dhcp = DhcpConfigDnsMasq::new(client, shell); + dhcp.set_dhcp_range(start, end).await.map_err(|e| { + BootstrapError::UnexpectedResponse(format!("DhcpConfigDnsMasq::set_dhcp_range failed: {e}")) + })?; + + info!("LAN DHCP range set via OPNsense API: {start}-{end}"); + Ok(()) +} + +/// POST `core/firmware/reboot` and wait for the firewall to come back at +/// `final_ip:final_api_port`. +/// +/// Fire-and-forget POST — OPNsense tears down the TCP connection while +/// replying. Mirrors the reboot path inside `perform_firmware_upgrade`. +/// The waiter is shared with `firmware_upgrade::wait_for_reboot_cycle` +/// (unreachable-window probe → recovery probe → settle delay). +/// +/// Used as the terminal step of [`OPNsenseBootstrapScore`] to guarantee +/// the running state matches what was persisted after firmware upgrade, +/// optional LAN bridge, and optional LAN-IP rebind. +pub async fn reboot_and_verify_via_api( + final_ip: &str, + final_api_port: u16, + api_key: &str, + api_secret: &str, + tag: &str, +) -> Result<(), BootstrapError> { + let client = opnsense_api::OpnsenseClient::builder() + .base_url(format!("https://{final_ip}:{final_api_port}/api")) + .auth_from_key_secret(api_key, api_secret) + .skip_tls_verify() + .timeout_secs(60) + .build() + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "Failed to build OPNsense API client for terminal reboot: {e}" + )) + })?; + + info!("{tag} POST core/firmware/reboot ..."); + let _ = client + .post_typed::("core", "firmware", "reboot", None) + .await; + + super::firmware_upgrade::wait_for_reboot_cycle(final_ip, final_api_port, tag) + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "Reboot triggered but firewall did not return cleanly: {e}" + )) + }) +} + +/// Move the LAN interface to a new IP / subnet at runtime via SSH. +/// +/// SFTPs a PHP script that rewrites `interfaces.lan.ipaddr` and +/// `interfaces.lan.subnet` via OPNsense's `Config` singleton, then +/// schedules a detached `configctl interface reconfigure lan` plus +/// the matching service reloads (`dhcpd`/`dnsmasq`/`kea`/`unbound`/ +/// `dns`/`filter`) so our SSH session can close cleanly before the +/// kernel drops it from the IP flip. Running `configctl` synchronously +/// would hang russh forever on a connection the kernel has already +/// torn down. +/// +/// **DHCP range is not touched here** — call +/// [`set_lan_dhcp_range_via_api`] first if the LAN move shifts to a +/// new subnet. That helper goes through OPNsense's REST API so the +/// DHCP backend (dnsmasq / Kea / ISC dhcpd) and its dependent +/// services get reconfigured by OPNsense's own model classes. +/// +/// **Connectivity warning:** if the caller is on the LAN side of the +/// firewall, this call will sever their connection to the firewall +/// before the apply completes — they need to reattach into the new +/// subnet to verify. This helper does not (and cannot) assist with +/// that. +/// +/// `new_ip` is strictly parsed as an `IpAddr` before interpolation; +/// `username` / `password` are validated against PHP-injection-safe +/// characters. +pub async fn change_lan_ip_via_ssh( + current_ip: &str, + new_ip: &str, + subnet: u8, + username: &str, + password: &str, +) -> Result<(), BootstrapError> { + use opnsense_config::config::OPNsenseShell; + + validate_php_safe(username, "username")?; + validate_php_safe(password, "password")?; + let _: std::net::IpAddr = new_ip + .parse() + .map_err(|e| BootstrapError::UnexpectedResponse(format!("Invalid new LAN IP: {e}")))?; + + let ip: std::net::IpAddr = current_ip + .parse() + .map_err(|e| BootstrapError::UnexpectedResponse(format!("Invalid current SSH IP: {e}")))?; + let shell = opnsense_ssh_shell(ip, username, password); + + // PHP: update LAN interface ipaddr/subnet only. DHCP range updates + // happen via the OPNsense REST API in `set_lan_dhcp_range_via_api`, + // called by `OPNsenseBootstrapScore` before this function. That + // route goes through OPNsense's proper model classes and handles + // schema differences between dnsmasq / Kea / ISC dhcpd cleanly. + let php = format!( + r#"object(); +$root->interfaces->lan->ipaddr = '{new_ip}'; +$root->interfaces->lan->subnet = '{subnet}'; +$config->save(); +echo "OK\n"; +"#, + ); + + shell + .write_content_to_file(&php, "/tmp/change_ip.php") + .await + .map_err(|e| BootstrapError::UnexpectedResponse(format!("SFTP upload failed: {e}")))?; + + // ── Step 1: synchronously rewrite config.xml's . + // Fast, no connectivity disruption. + info!("LAN rebind: rewriting interfaces.lan in config.xml"); + let out = shell + .exec("php /tmp/change_ip.php && rm /tmp/change_ip.php") + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "SSH exec failed during config rewrite: {e}" + )) + })?; + if out.trim() != "OK" { + return Err(BootstrapError::UnexpectedResponse(format!( + "Config rewrite via PHP did not report OK; output was: {}", + out.trim() + ))); + } + info!("LAN rebind: interfaces.lan saved; scheduling detached apply"); + + // ── Step 2: apply via configctl, but DETACHED. The + // `configctl interface reconfigure lan` call kills our SSH + // connection as soon as the IP flips. Running it inline would + // hang russh waiting on a channel close that never comes; running + // it in `nohup … &` lets the outer shell exit immediately, SSH + // sees EOF on the channel, our exec returns, and the firewall + // applies the change a fraction of a second later (the sleep is + // there to give SSH time to disconnect first). + // + // The configctl chain is best-effort: each action is separated by + // `;` (not `&&`) so a missing action on a given OPNsense version + // doesn't abort the whole reload. Output goes to /tmp/lan_flip.log + // for forensics. + // + // The outer `sh -c '...'` is there because OPNsense's root login + // shell is tcsh, where `2>&1` is a syntax error. Bourne semantics + // are required for the redirect. + // Action names are taken from OPNsense's actions.d templates as used + // elsewhere in harmony (see `opnsense-config/src/modules/dnsmasq.rs` + // for the `dnsmasq restart` precedent). They differ from the operator- + // intuitive `reload` form because configd's allowed verbs are + // service-specific. We try every backend's restart action and ignore + // the ones that don't apply on this firewall (separator is `;`, not + // `&&`). + let apply_cmd = "sh -c 'nohup sh -c \ + \"sleep 1 && \ + configctl interface reconfigure lan; \ + configctl dhcpd restart; \ + configctl dnsmasq restart; \ + configctl kea restart; \ + configctl unbound restart; \ + configctl dns reload; \ + configctl filter reload\" \ + > /tmp/lan_flip.log 2>&1 < /dev/null &'"; + shell.exec(apply_cmd).await.map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "SSH exec failed scheduling detached apply: {e}" + )) + })?; + info!( + "LAN rebind: detached apply scheduled; firewall should answer at {new_ip} in a few seconds. \ + Per-step log on the firewall at /tmp/lan_flip.log." + ); + + Ok(()) +} + +/// Re-point the LAN logical interface at a different physical / bridge +/// interface (e.g. `bridge0`) by rewriting `` (and +/// optionally ``) via PHP-on-SSH, then applying via +/// **detached** configctl. +/// +/// Why this exists: when a `Score` builds an `if_bridge` spanning several +/// physical NICs, the firewall's LAN services (DHCP, firewall rules, +/// management IP) only see the broadcast domain of *one* NIC unless the +/// LAN logical interface is moved off that raw NIC and onto the bridge. +/// OPNsense's REST API for `interfaces/bridge_settings` creates the +/// bridge device but does NOT touch `` — that's a +/// legacy-config edit. We do it via the supported `Config::getInstance()` +/// SimpleXML path (NOT a raw `file_put_contents` — see the rule in +/// `feedback_opnsense_no_manual_config_xml`). +/// +/// Mirror of [`change_lan_ip_via_ssh`]: same SimpleXML write pattern, +/// same `nohup sh -c "..." &` detach for the configctl chain — without +/// the detach, russh deadlocks because the LAN reconfigure tears down +/// our own SSH channel before the exec returns. +pub async fn set_lan_member_via_ssh( + current_ip: &str, + new_if: &str, + mtu: Option, + username: &str, + password: &str, +) -> Result<(), BootstrapError> { + use opnsense_config::config::OPNsenseShell; + + validate_php_safe(username, "username")?; + validate_php_safe(password, "password")?; + validate_php_safe(new_if, "new_if")?; + + let ip: std::net::IpAddr = current_ip + .parse() + .map_err(|e| BootstrapError::UnexpectedResponse(format!("Invalid current SSH IP: {e}")))?; + let shell = opnsense_ssh_shell(ip, username, password); + + let mtu_line = match mtu { + Some(m) => format!("$root->interfaces->lan->mtu = '{m}';\n"), + None => String::new(), + }; + let php = format!( + r#"object(); +$root->interfaces->lan->if = '{new_if}'; +{mtu_line}$config->save(); +echo "OK\n"; +"#, + ); + + shell + .write_content_to_file(&php, "/tmp/lan_member.php") + .await + .map_err(|e| BootstrapError::UnexpectedResponse(format!("SFTP upload failed: {e}")))?; + + info!("LAN member rewrite via SSH: interfaces.lan.if -> {new_if}"); + let out = shell + .exec("php /tmp/lan_member.php && rm /tmp/lan_member.php") + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "SSH exec failed during interfaces.lan.if rewrite: {e}" + )) + })?; + if out.trim() != "OK" { + return Err(BootstrapError::UnexpectedResponse(format!( + "interfaces.lan.if rewrite via PHP did not report OK; output was: {}", + out.trim() + ))); + } + info!("LAN member rewrite: interfaces.lan saved; scheduling detached apply"); + + // Same detach pattern as `change_lan_ip_via_ssh`: configctl interface + // reconfigure lan kills our SSH channel mid-call, so we nohup the + // chain and let the outer exec return immediately. + let apply_cmd = "sh -c 'nohup sh -c \ + \"sleep 1 && \ + configctl interface reconfigure lan; \ + configctl filter reload\" \ + > /tmp/lan_member.log 2>&1 < /dev/null &'"; + shell.exec(apply_cmd).await.map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "SSH exec failed scheduling detached apply: {e}" + )) + })?; + info!( + "LAN member rewrite: detached apply scheduled. Per-step log on the firewall at \ + /tmp/lan_member.log." + ); + + Ok(()) +} + +/// Outcome of the atomic LAN-bridge ensure step. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum AtomicBridgeOutcome { + Created(String), + Updated(String), +} + +impl AtomicBridgeOutcome { + pub fn bridgeif(&self) -> &str { + match self { + AtomicBridgeOutcome::Created(s) | AtomicBridgeOutcome::Updated(s) => s.as_str(), + } + } + pub fn was_created(&self) -> bool { + matches!(self, AtomicBridgeOutcome::Created(_)) + } +} + +/// Ensure a LAN-bridge entry exists AND (when requested) `` +/// is repointed at it — in a **single atomic config save** on the firewall, +/// followed by a **detached** `configctl` chain that brings kernel state in +/// line. +/// +/// Members are passed as **physical NIC names** (`vtnet0`, `igc1`, …). The +/// PHP script resolves each one to a logical interface in the same save: +/// +/// * Already assigned to an OPT (`opt1`, `opt2`, …) → reuse that name. +/// * Unassigned → create a new `` entry (next-free `N`). +/// * Currently `` **and** `reassign_lan=true` → move +/// it to a new OPT entry. Otherwise it stays as `lan`, but a circular +/// reference (bridge has `lan` as a member while `lan.if=bridge0`) means +/// `_interfaces_bridge_configure` resolves the member back to bridge0 and +/// refuses to add it as its own member — the bridge ends up with **zero +/// kernel-level members** and the LAN goes dark. The transfer breaks the +/// circle by giving the physical NIC its own logical handle. +/// +/// Why a single atomic save: any non-atomic ordering leaves a window where +/// either the bridge has no IP (vtnet0 is already in bridge0 kernel-level +/// but `` still says vtnet0) or has no members (bridge created +/// with `members="lan"` while `lan.if=bridge0`). Both windows are +/// unreachable from the LAN. +/// +/// Returns `(action, bridgeif, new_opt_names)`. `action` indicates whether +/// the bridge entry was created or updated; `new_opt_names` lists any OPT +/// entries the PHP script just created so the caller's detached configctl +/// chain can bring them up. +pub async fn ensure_lan_bridge_atomic_via_ssh( + ssh_ip: &std::net::IpAddr, + username: &str, + password: &str, + physical_members: &[String], + description: &str, + enable_stp: bool, + reassign_lan: bool, + mtu: Option, +) -> Result { + use opnsense_config::config::OPNsenseShell; + + validate_php_safe(username, "username")?; + validate_php_safe(password, "password")?; + validate_php_safe(description, "description")?; + for m in physical_members { + validate_php_safe(m, "bridge member")?; + } + let physical_csv = physical_members.join(","); + let enable_stp_str = if enable_stp { "1" } else { "0" }; + let reassign_lan_str = if reassign_lan { "1" } else { "0" }; + let mtu_str = mtu.map(|m| m.to_string()).unwrap_or_default(); + + let shell = opnsense_ssh_shell(*ssh_ip, username, password); + + // Single PHP script: resolve every physical NIC to a logical name + // (creating new OPT entries / moving lan's NIC to a new OPT when + // needed), then write the bridge entry and (if requested) the + // `=bridgeN` reassignment in one `Config::save()`. Output + // format: `ACTION BRIDGEIF NEW_OPTS_CSV` (NEW_OPTS_CSV may be empty). + let php = format!( + r#"object(); +$descr = '{description}'; +$physical_csv = '{physical_csv}'; +$enable_stp = '{enable_stp_str}'; +$reassign_lan = '{reassign_lan_str}'; +$mtu = '{mtu_str}'; + +// Map current physical → logical (and back). +$phys_to_logical = []; +$logical_to_phys = []; +foreach ($root->interfaces->children() as $name => $iface) {{ + $p = (string)$iface->if; + if ($p !== '') {{ + $phys_to_logical[$p] = $name; + $logical_to_phys[$name] = $p; + }} +}} + +$wan_phys = $logical_to_phys['wan'] ?? ''; +$physical_list = array_filter(explode(',', $physical_csv), function ($p) {{ return $p !== ''; }}); +foreach ($physical_list as $p) {{ + if ($wan_phys !== '' && $p === $wan_phys) {{ + echo "ERROR WAN_INCLUDED $p\n"; + exit(1); + }} +}} + +function next_opt($interfaces) {{ + $next = 1; + foreach ($interfaces->children() as $name => $_ignored) {{ + if (preg_match('/^opt(\d+)$/', $name, $m)) {{ + $n = (int)$m[1]; + if ($n >= $next) $next = $n + 1; + }} + }} + return 'opt' . $next; +}} + +function add_opt(&$root, $name, $phys, $descr) {{ + $entry = $root->interfaces->addChild($name); + $entry->addChild('if', $phys); + $entry->addChild('descr', $descr); + $entry->addChild('enable', '1'); +}} + +$new_opts = []; +$resolved = []; +foreach ($physical_list as $phys) {{ + $current = $phys_to_logical[$phys] ?? null; + if ($current === null) {{ + // Unassigned NIC → new OPT. + $name = next_opt($root->interfaces); + add_opt($root, $name, $phys, "harmony bridge member ($phys)"); + $phys_to_logical[$phys] = $name; + $new_opts[] = $name; + $resolved[] = $name; + }} elseif ($current === 'lan' && $reassign_lan === '1') {{ + // Currently LAN AND we're moving LAN to the bridge — create a + // dedicated OPT so the bridge can reference the physical NIC + // without resolving back to itself via lan. + $name = next_opt($root->interfaces); + add_opt($root, $name, $phys, "harmony bridge member ($phys, was lan)"); + $phys_to_logical[$phys] = $name; + $new_opts[] = $name; + $resolved[] = $name; + }} else {{ + // Already has a non-circular logical assignment (opt, etc.). + $resolved[] = $current; + }} +}} +$members_csv = implode(',', $resolved); + +if (!isset($root->bridges)) {{ + $root->addChild('bridges'); +}} + +$found = null; +foreach ($root->bridges->bridged ?? [] as $b) {{ + if ((string)$b->descr === $descr) {{ $found = $b; break; }} +}} + +if ($found !== null) {{ + $bridgeif = (string)$found->bridgeif; + $found->members = $members_csv; + $found->enablestp = $enable_stp; + if (empty((string)$found->proto)) {{ $found->proto = 'rstp'; }} + $action = 'UPDATED'; +}} else {{ + $next = 0; + foreach ($root->bridges->bridged ?? [] as $b) {{ + if (preg_match('/^bridge(\d+)$/', (string)$b->bridgeif, $m)) {{ + $n = (int)$m[1]; + if ($n >= $next) $next = $n + 1; + }} + }} + $bridgeif = "bridge$next"; + $new = $root->bridges->addChild('bridged'); + $new->addChild('bridgeif', $bridgeif); + $new->addChild('members', $members_csv); + $new->addChild('descr', $descr); + $new->addChild('enablestp', $enable_stp); + $new->addChild('proto', 'rstp'); + $action = 'CREATED'; +}} + +if ($reassign_lan === '1') {{ + if ((string)$root->interfaces->lan->if !== $bridgeif) {{ + $root->interfaces->lan->if = $bridgeif; + }} + if ($mtu !== '') {{ $root->interfaces->lan->mtu = $mtu; }} +}} + +$cfg_obj->save(); +echo "$action $bridgeif " . implode(',', $new_opts) . "\n"; +"# + ); + + shell + .write_content_to_file(&php, "/tmp/lan_bridge.php") + .await + .map_err(|e| BootstrapError::UnexpectedResponse(format!("SFTP upload failed: {e}")))?; + + let out = shell + .exec("php /tmp/lan_bridge.php && rm /tmp/lan_bridge.php") + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "SSH exec failed during atomic LAN-bridge save: {e}" + )) + })?; + let out = out.trim(); + if let Some(rest) = out.strip_prefix("ERROR ") { + return Err(BootstrapError::UnexpectedResponse(format!( + "Atomic LAN-bridge script aborted: {rest}" + ))); + } + // Expected: `ACTION BRIDGEIF NEW_OPTS_CSV` (NEW_OPTS_CSV may be empty). + let parts: Vec<&str> = out.splitn(3, ' ').collect(); + let (action, bridgeif, new_opts_csv) = match parts.as_slice() { + [a, b, c] if !a.is_empty() && !b.is_empty() => (*a, b.to_string(), c.to_string()), + [a, b] if !a.is_empty() && !b.is_empty() => (*a, b.to_string(), String::new()), + _ => { + return Err(BootstrapError::UnexpectedResponse(format!( + "Atomic LAN-bridge script did not report ACTION BRIDGEIF [NEW_OPTS_CSV]; got: {out}" + ))); + } + }; + validate_php_safe(&bridgeif, "returned bridgeif")?; + let new_opts: Vec = if new_opts_csv.is_empty() { + Vec::new() + } else { + new_opts_csv + .split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + .collect() + }; + for o in &new_opts { + validate_php_safe(o, "new opt name")?; + } + info!( + "LAN-bridge atomic save: {action} {bridgeif} (new opts: {new_opts:?}); scheduling \ + detached apply" + ); + + // Detached configctl chain. Order matters: + // 1. For each new OPT entry: `interface configure ` so the + // logical interface comes up before the bridge tries to use it. + // 2. `interface bridge configure` — create bridge0 in the kernel + // and attach members (`get_real_interface()` resolves to + // the underlying physical NIC). + // 3. `interface reconfigure lan` — move LAN's IP onto bridge0. + // 4. `sshd restart` — sshd was bound to lan's previous interface + // device; without an explicit restart it stays bound to the + // old (now IP-less) NIC and new TCP connects time out. HTTPS + // (lighttpd) doesn't have this problem because the WebUI port + // configuration triggers its own restart elsewhere. + // 5. `filter reload` — reapply pf rules against the new interface + // topology, including the anti-lockout rule for LAN. + // Each step separated by `;` (not `&&`) so a missing action on a + // given OPNsense version doesn't abort the rest. `sleep 1` gives + // our SSH channel time to close cleanly before the LAN may briefly + // drop during step 2. + let mut chain = String::from("sleep 1"); + for o in &new_opts { + chain.push_str(&format!(" && configctl interface configure {o} || true")); + } + chain.push_str( + " ; configctl interface bridge configure; \ + configctl interface reconfigure lan; \ + configctl sshd restart; \ + configctl filter reload", + ); + let apply = format!( + "sh -c 'nohup sh -c \"{chain}\" > /tmp/lan_bridge.log 2>&1 < /dev/null &'", + ); + shell.exec(&apply).await.map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "SSH exec failed scheduling detached apply: {e}" + )) + })?; + info!( + "LAN-bridge atomic apply scheduled. Per-step log on the firewall at /tmp/lan_bridge.log." + ); + + Ok(match action { + "CREATED" => AtomicBridgeOutcome::Created(bridgeif), + _ => AtomicBridgeOutcome::Updated(bridgeif), + }) +} + +/// Ensure a physical NIC is registered as a logical interface. Returns +/// the logical name (`lan`, `opt1`, `opt2`, …) it is bound to. +/// +/// Idempotent — if `physical_nic` already appears as `/*/if`, +/// returns that logical name with no changes (no write, no reconfigure). +/// Otherwise writes a new `` entry with the next free +/// `optN` number, `{physical_nic}`, `1`, and +/// `{description}` (default: `"harmony bridge member +/// ()"`). Then runs `configctl interface configure {optN}` so +/// OPNsense brings the new device up. +/// +/// Why this exists: OPNsense's bridge model only accepts logical +/// interface names. A pico-DC operator selecting raw NICs (e.g. +/// `igc2`, `igc3`) needs each one to be assigned to an OPT slot first. +/// This helper closes that gap for callers like +/// [`crate::modules::opnsense::lan_bridge::OPNsenseLanBridgeScore`]. +/// +/// No detached-configctl trick here — configuring a fresh OPT does +/// not tear down our SSH channel (we're talking over LAN, the new +/// device is untouched). +pub async fn ensure_physical_nic_assigned_via_ssh( + ip: &std::net::IpAddr, + username: &str, + password: &str, + physical_nic: &str, + description: Option<&str>, +) -> Result { + use opnsense_config::config::OPNsenseShell; + + validate_php_safe(username, "username")?; + validate_php_safe(password, "password")?; + validate_php_safe(physical_nic, "physical_nic")?; + let descr = description + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("harmony bridge member ({physical_nic})")); + validate_php_safe(&descr, "description")?; + + let shell = opnsense_ssh_shell(*ip, username, password); + + // The script: find the existing logical name for $nic, or assign it + // to the next free optN. Prints `EXISTS ` or `ASSIGNED ` + // so the Rust side can decide whether to reconfigure. + let php = format!( + r#"object(); +$nic = '{physical_nic}'; +$descr = '{descr}'; + +// 1. Look for an existing assignment of $nic. +foreach ($root->interfaces->children() as $name => $iface) {{ + if ((string)$iface->if === $nic) {{ + echo "EXISTS " . $name . "\n"; + exit(0); + }} +}} + +// 2. Find the next free optN. +$next = 1; +foreach ($root->interfaces->children() as $name => $_iface) {{ + if (preg_match('/^opt(\d+)$/', $name, $m)) {{ + $n = intval($m[1]); + if ($n >= $next) {{ $next = $n + 1; }} + }} +}} +$new = 'opt' . $next; + +// 3. Add the new entry with minimum WebUI-equivalent schema. +$entry = $root->interfaces->addChild($new); +$entry->addChild('if', $nic); +$entry->addChild('descr', $descr); +$entry->addChild('enable', '1'); + +$config->save(); +echo "ASSIGNED " . $new . "\n"; +"# + ); + + shell + .write_content_to_file(&php, "/tmp/assign_nic.php") + .await + .map_err(|e| BootstrapError::UnexpectedResponse(format!("SFTP upload failed: {e}")))?; + + let out = shell + .exec("php /tmp/assign_nic.php && rm /tmp/assign_nic.php") + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "SSH exec failed during NIC assignment: {e}" + )) + })?; + let out = out.trim(); + if let Some(name) = out.strip_prefix("EXISTS ") { + let name = name.trim().to_string(); + info!("NIC assignment: {physical_nic} already = {name} (NOOP)"); + return Ok(name); + } + let new = out + .strip_prefix("ASSIGNED ") + .ok_or_else(|| { + BootstrapError::UnexpectedResponse(format!( + "Unexpected output from NIC-assignment script: {out}" + )) + })? + .trim() + .to_string(); + validate_php_safe(&new, "assigned logical name")?; + info!("NIC assignment: {physical_nic} → {new}; running configctl interface configure"); + let apply = format!("configctl interface configure {new}"); + shell.exec(&apply).await.map_err(|e| { + BootstrapError::UnexpectedResponse(format!( + "configctl interface configure {new} failed: {e}" + )) + })?; + Ok(new) +} + +/// The vendored upstream `ethname` rc.d script (MIT, © Eric Borisch +/// 2016–2019, frozen since v2.0.1 in March 2020). The Score +/// `OPNsenseBootstrapScore` SFTPs this onto every firewall it +/// bootstraps so that NIC names get pinned to MAC addresses before +/// any reboot. +/// +/// Upstream: . The script's +/// own header keeps full usage docs and Eric Borisch's copyright +/// notice; license text mirrored alongside at +/// `harmony/data/opnsense/ethname.LICENSE`. +/// +/// Why vendored instead of `pkg install`'d: on a fresh ISO `pkg +/// install` often fails because the firmware lags the live pkg +/// repo, and the firmware-upgrade reboot is precisely the boot we +/// need to defend against — we cannot run firmware upgrade first. +/// +/// See [`crate::modules::opnsense::pin_nic_names`] for the full +/// problem statement and the OPNsense forum thread #27023 link. +pub const ETHNAME_SCRIPT: &str = include_str!("../../../../data/opnsense/ethname.sh"); + +/// Driver names whose interfaces are physical NICs worth pinning. +/// Pseudo-interfaces (`lagg`, `vlan`, `bridge`, `pflog`, ...) are +/// excluded by *not* appearing here. Names from `ifconfig -l ether` +/// are matched against this list after stripping the trailing numeric +/// suffix, so `igc0`/`igc1`/`igc2` all match `"igc"`. +pub const DEFAULT_PHYSICAL_DRIVER_PREFIXES: &[&str] = &[ + // PCIe Intel + "igb", + "igc", + "em", + "ix", + "ixl", + "ice", + // PCIe Realtek / Broadcom / SysKonnect / Intel legacy + "re", + "bge", + "msk", + "fxp", + // Virtio / VMware / Hyper-V + "vtnet", + "vmx", + // USB ethernet (works only if the driver loads in early boot; + // see ethname forum thread for `if_*_load=YES` workaround.) + "axge", + "axe", + "aue", + // Mellanox / Amazon ENA + "mlx5_core", + "ena", +]; + +/// Enumerate physical NICs (name, MAC) over SSH, filtered by driver prefix. +/// +/// Runs `ifconfig -l ether` on the firewall to list ethernet-typed +/// interfaces, then for each candidate, parses the `ether` line out of +/// `ifconfig `. The MAC is normalised to lowercase. +/// +/// The driver-prefix filter keeps physical NICs (igb, igc, em, ...) and +/// drops pseudo-interfaces (lagg, vlan, bridge, vlan-tagged children, +/// pflog, etc.). Matching is on the interface name with trailing digits +/// stripped, so `igc0`/`igc1`/`igc2` all match `"igc"` exactly. +pub async fn list_physical_nics_via_ssh( + ip: &std::net::IpAddr, + username: &str, + password: &str, + driver_prefixes: &[&str], +) -> Result, BootstrapError> { + use opnsense_config::config::OPNsenseShell; + + let shell = opnsense_ssh_shell(*ip, username, password); + + let names_out = shell.exec("ifconfig -l ether").await.map_err(|e| { + BootstrapError::UnexpectedResponse(format!("ifconfig -l ether failed: {e}")) + })?; + + let mut pairs: Vec<(String, String)> = Vec::new(); + for name in names_out.split_whitespace() { + let driver = name.trim_end_matches(|c: char| c.is_ascii_digit()); + if !driver_prefixes.iter().any(|p| *p == driver) { + continue; + } + + let out = shell.exec(&format!("ifconfig {name}")).await.map_err(|e| { + BootstrapError::UnexpectedResponse(format!("ifconfig {name} failed: {e}")) + })?; + + let mac = out.lines().find_map(|line| { + let trimmed = line.trim_start(); + trimmed + .strip_prefix("ether ") + .and_then(|rest| rest.split_whitespace().next()) + .map(|m| m.to_lowercase()) + }); + + match mac { + Some(m) if !m.is_empty() => pairs.push((name.to_string(), m)), + _ => warn!("ifconfig {name}: no ether line; skipping"), + } + } + + Ok(pairs) +} + +/// Read `/etc/rc.conf.d/ethname` and return the set of MAC addresses pinned in it. +/// +/// Returns `Ok(None)` if the file does not exist (fresh firewall, never +/// pinned). Returns `Ok(Some(set))` if it does — the set contains every +/// MAC referenced by an `ethname__mac="..."` line, lowercased. +/// +/// Used by `OPNsenseBootstrapScore` to NOOP the pin step when the +/// file's MAC set already equals the live MAC set discovered via +/// [`list_physical_nics_via_ssh`]. +pub async fn read_ethname_mac_set_via_ssh( + ip: &std::net::IpAddr, + username: &str, + password: &str, +) -> Result>, BootstrapError> { + use opnsense_config::config::OPNsenseShell; + + let shell = opnsense_ssh_shell(*ip, username, password); + + // Use a sentinel so we can distinguish "file missing" from "exec error". + // + // Wrapped in `sh -c '...'` because OPNsense's root login shell is + // `/bin/csh` (tcsh) — Bourne `if/then/else/fi` is a syntax error + // there. Simple `&&`/`||` chains work in tcsh, but full conditionals + // need an explicit /bin/sh. + let out = shell + .exec( + "sh -c 'if [ -f /etc/rc.conf.d/ethname ]; then \ + cat /etc/rc.conf.d/ethname; \ + else \ + echo __ETHNAME_FILE_MISSING__; \ + fi'", + ) + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!("read /etc/rc.conf.d/ethname failed: {e}")) + })?; + + if out.trim() == "__ETHNAME_FILE_MISSING__" { + return Ok(None); + } + + let mut macs = std::collections::BTreeSet::new(); + for line in out.lines() { + let line = line.trim(); + let Some(rest) = line.strip_prefix("ethname_") else { + continue; + }; + let Some(eq) = rest.find('=') else { continue }; + let key = &rest[..eq]; + if !key.ends_with("_mac") { + continue; + } + let val = rest[eq + 1..].trim().trim_matches('"'); + if !val.is_empty() { + macs.insert(val.to_lowercase()); + } + } + + Ok(Some(macs)) +} + +/// SFTP the vendored ethname rc.d script onto the firewall and write the +/// matching `/etc/rc.conf.d/ethname` + early syshook so it activates on +/// next boot. +/// +/// `ethname_script` is the verbatim 280-line shell script (vendored in +/// `harmony/data/opnsense/ethname.sh`, MIT, © Eric Borisch 2016–2019), +/// embedded into the caller via `include_str!`. `pairs` is the list of +/// `(name, MAC)` to pin — typically the output of +/// [`list_physical_nics_via_ssh`]. +/// +/// Three files land on the firewall: +/// +/// * `/usr/local/etc/rc.d/ethname` (0755) — the rename script. +/// * `/etc/rc.conf.d/ethname` — `ethname_enable="NO"` + one +/// `ethname__mac="..."` line per pin. +/// * `/usr/local/etc/rc.syshook.d/early/02-ethname` (0755) — early hook +/// that calls `ethname onestart` before `netif` (the upstream rc.d +/// ordering runs ethname too late for OPNsense — it needs to happen +/// before any interface comes up). +/// +/// `ethname_enable="NO"` is intentional: the early syshook calls +/// `onestart` explicitly, so the regular rc.d enable would cause double +/// execution and a confusing second pass. +pub async fn install_ethname_via_ssh( + ip: &std::net::IpAddr, + username: &str, + password: &str, + ethname_script: &str, + pairs: &[(String, String)], +) -> Result<(), BootstrapError> { + use opnsense_config::config::OPNsenseShell; + + let shell = opnsense_ssh_shell(*ip, username, password); + + // 1. The script itself. + info!( + "ethname: uploading rc.d script to /usr/local/etc/rc.d/ethname \ + ({} bytes, MIT, vendored upstream)", + ethname_script.len() + ); + shell + .write_content_to_file(ethname_script, "/usr/local/etc/rc.d/ethname") + .await + .map_err(|e| BootstrapError::UnexpectedResponse(format!("SFTP ethname.sh failed: {e}")))?; + shell + .exec("chmod 0755 /usr/local/etc/rc.d/ethname") + .await + .map_err(|e| BootstrapError::UnexpectedResponse(format!("chmod ethname.sh failed: {e}")))?; + + // 2. The mapping file. + info!( + "ethname: writing /etc/rc.conf.d/ethname ({} mapping(s))", + pairs.len() + ); + let mut conf = String::from("ethname_enable=\"NO\"\nethname_timeout=30\n"); + for (name, mac) in pairs { + // Both fields come from this firewall's own `ifconfig` output + // moments earlier — trusted. A defensive sanity check guards + // against pathological output (spaces, quotes, command injection + // via $(...) in a name) that would corrupt the conf file. + if !name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') { + return Err(BootstrapError::UnexpectedResponse(format!( + "Refusing to write rc.conf.d/ethname: NIC name {name:?} contains \ + unexpected characters" + ))); + } + if !mac.chars().all(|c| c.is_ascii_hexdigit() || c == ':') { + return Err(BootstrapError::UnexpectedResponse(format!( + "Refusing to write rc.conf.d/ethname: MAC {mac:?} not in xx:xx:xx:xx:xx:xx form" + ))); + } + conf.push_str(&format!("ethname_{name}_mac=\"{mac}\"\n")); + } + shell + .write_content_to_file(&conf, "/etc/rc.conf.d/ethname") + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!("SFTP /etc/rc.conf.d/ethname failed: {e}")) + })?; + + // 3. The early syshook. + info!( + "ethname: writing early-boot syshook \ + /usr/local/etc/rc.syshook.d/early/02-ethname" + ); + shell + .exec("mkdir -p /usr/local/etc/rc.syshook.d/early") + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!("mkdir rc.syshook.d/early failed: {e}")) + })?; + let hook = "#!/bin/sh\n/usr/local/etc/rc.d/ethname onestart\n"; + shell + .write_content_to_file(hook, "/usr/local/etc/rc.syshook.d/early/02-ethname") + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!("SFTP 02-ethname syshook failed: {e}")) + })?; + shell + .exec("chmod 0755 /usr/local/etc/rc.syshook.d/early/02-ethname") + .await + .map_err(|e| { + BootstrapError::UnexpectedResponse(format!("chmod 02-ethname syshook failed: {e}")) + })?; + + Ok(()) +} + /// Extract the CSRF token field name and value from an OPNsense HTML page. /// /// OPNsense embeds CSRF tokens as hidden inputs with a dynamic field name. diff --git a/harmony/src/modules/opnsense/bootstrap_score.rs b/harmony/src/modules/opnsense/bootstrap_score.rs new file mode 100644 index 00000000..d1bbd45c --- /dev/null +++ b/harmony/src/modules/opnsense/bootstrap_score.rs @@ -0,0 +1,710 @@ +//! `OPNsenseBootstrapScore` — declarative wrapper around the OPNsense +//! first-boot procedure. +//! +//! Targets the minimal [`OPNsenseBootstrapTopology`], which represents a +//! factory-fresh OPNsense reachable at its default LAN IP with the +//! install-time root password. Running this Score: +//! +//! 1. Logs into the web UI, aborts the initial setup wizard, enables SSH. +//! 2. Moves the web GUI from port 443 to `target_api_port`. +//! 3. **Pins physical NIC names to MAC addresses** via the vendored +//! `ethname` rc.d script (MIT). Mandatory step — without it, the +//! firmware-upgrade reboot below can shuffle `igc0/igc1/...` and +//! silently re-point wan/lan at the wrong cables. Idempotent and +//! harmless on single-NIC VMs. +//! 4. SSHes in, mints an API key + secret on the root user, and persists +//! both `OPNSenseApiCredentials` and `OPNSenseFirewallCredentials` to +//! `harmony_secret::SecretManager`. +//! 5. (Default-on, via `firmware_upgrade`) Brings the firewall up to the +//! latest firmware/package level using the same logic as +//! [`OPNsenseFirmwareUpgradeScore`](crate::modules::opnsense::firmware_upgrade::OPNsenseFirmwareUpgradeScore). +//! Configurable via `FirmwareUpgradeMode` (Auto / AutoMinor / Prompt / +//! Disabled). +//! 6. **(Optional, via `lan_bridge`)** Creates an `if_bridge` spanning +//! the selected physical NICs and re-points `` +//! at it. Shares +//! [`ensure_lan_bridge_step`](crate::modules::opnsense::lan_bridge::ensure_lan_bridge_step) +//! with the standalone +//! [`OPNsenseLanBridgeScore`](crate::modules::opnsense::lan_bridge::OPNsenseLanBridgeScore). +//! Runs AFTER firmware upgrade (so the bridge lives in the final +//! firmware's config schema) and BEFORE the optional LAN-IP rebind. +//! 7. Optionally rebinds the LAN to a new IP/subnet. +//! +//! After it runs, callers construct a normal +//! [`OPNSenseFirewall`](crate::infra::opnsense::OPNSenseFirewall) from the +//! now-stored credentials and run `Score` composition +//! against it — that's where production configuration lives. +//! +//! # Side effects +//! +//! This Score writes to `SecretManager`. That's an acknowledged exception +//! to Score purity: the credentials *are* the Score's output, and they +//! must live somewhere durable so the second-phase topology can read them +//! back. It's the same model `SecretManager::get_or_prompt` already uses. + +use async_trait::async_trait; +use harmony_secret::SecretManager; +use harmony_types::id::Id; +use harmony_types::net::IpAddress; +use log::{info, warn}; +use serde::Serialize; + +use crate::{ + config::secret::{OPNSenseApiCredentials, OPNSenseFirewallCredentials}, + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::opnsense::bootstrap::{ + DEFAULT_PHYSICAL_DRIVER_PREFIXES, OPNsenseBootstrap, change_lan_ip_via_ssh, + create_api_key_ssh, probe_https, reboot_and_verify_via_api, + set_lan_dhcp_range_via_api, + }, + modules::opnsense::firmware_upgrade::{FirmwareUpgradeMode, perform_firmware_upgrade}, + modules::opnsense::lan_bridge::{LanBridgeParams, ensure_lan_bridge_step}, + modules::opnsense::pin_nic_names::pin_nic_names_step, + score::Score, + topology::OPNsenseBootstrapTopology, +}; + +/// New LAN address to apply at the end of the bootstrap. +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct LanRebind { + pub new_ip: IpAddress, + pub prefix: u8, +} + +/// Bring a factory-fresh OPNsense to a Harmony-driveable state, ending it +/// at a known port and (optionally) a new LAN address. +#[derive(Debug, Clone, Serialize)] +pub struct OPNsenseBootstrapScore { + /// HTTPS port the web GUI / API will end up on (typically `9443`). + pub target_api_port: u16, + /// If `Some`, the LAN interface is rebound to this address as the + /// final dance step. If `None`, the LAN stays where it was. + pub target_lan: Option, + /// How long to wait for the web GUI to come up on `target_api_port` + /// after the port move (default: 120s). + pub webgui_ready_timeout: std::time::Duration, + /// Disable OPNsense's automatic HTTP→HTTPS redirect on port 80. + /// Required when something else needs to bind `0.0.0.0:80` (e.g. + /// HAProxy on a CARP VIP). + pub disable_http_redirect: bool, + /// How aggressively to apply pending firmware updates immediately after + /// credentials are persisted and before any optional LAN rebind. + /// + /// Defaults to `FirmwareUpgradeMode::Auto` (apply everything). Use + /// `AutoMinor` to skip major-series upgrades, `Prompt` to ask the + /// operator for each pending update, or `Disabled` to skip the upgrade + /// step entirely (e.g. for VM integration tests, air-gapped + /// environments, or pinned-version deployments). The underlying logic + /// lives in + /// [`crate::modules::opnsense::firmware_upgrade::perform_firmware_upgrade`]. + pub firmware_upgrade: FirmwareUpgradeMode, + /// Optional `if_bridge` step. When `Some(_)`, creates a bridge with + /// the given members AFTER firmware upgrade and BEFORE the optional + /// LAN-IP rebind below. Re-points `` at the + /// bridge so the rebind (if any) targets the bridge interface. + /// When `None`, the bridge step is skipped entirely. Shares + /// [`ensure_lan_bridge_step`](crate::modules::opnsense::lan_bridge::ensure_lan_bridge_step) + /// with the standalone + /// [`OPNsenseLanBridgeScore`](crate::modules::opnsense::lan_bridge::OPNsenseLanBridgeScore). + pub lan_bridge: Option, +} + +impl Default for OPNsenseBootstrapScore { + fn default() -> Self { + Self { + target_api_port: 9443, + target_lan: None, + webgui_ready_timeout: std::time::Duration::from_secs(120), + disable_http_redirect: false, + firmware_upgrade: FirmwareUpgradeMode::Auto, + lan_bridge: None, + } + } +} + +impl Score for OPNsenseBootstrapScore { + fn name(&self) -> String { + "OPNsenseBootstrapScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OPNsenseBootstrapInterpret { + score: self.clone(), + }) + } +} + +#[derive(Debug)] +struct OPNsenseBootstrapInterpret { + score: OPNsenseBootstrapScore, +} + +/// The three terminal branches of the idempotency check. +#[derive(Debug, Clone, PartialEq, Eq)] +enum Decision { + Noop, + Dance, + Failure(String), +} + +/// Decide what the Interpret should do given the current observed state. +/// +/// Pure function over the four booleans so the matrix is unit-testable +/// without touching the network or the secret store. +fn decide( + api_creds_exist: bool, + ssh_creds_exist: bool, + vanilla_reachable: bool, + target_reachable: bool, +) -> Decision { + match ( + api_creds_exist, + ssh_creds_exist, + vanilla_reachable, + target_reachable, + ) { + // Already bootstrapped: vanilla gone, target up, both cred sets present. + (true, true, false, true) => Decision::Noop, + // Vanilla still answering — clean first run or mid-flight resume. + // The dance's individual steps are idempotent on the firewall side, + // so re-running a 90%-done bootstrap is cheap. + (_, _, true, _) => Decision::Dance, + // Vanilla gone, target up, but at least one cred set is missing — + // partial bootstrap that lost its secrets. + (false, _, false, true) | (_, false, false, true) => Decision::Failure( + "Detected a partial bootstrap: OPNsense answers at the target address but at least \ + one of OPNSenseApiCredentials / OPNSenseFirewallCredentials is missing from the \ + secret store. The factory-fresh state at the vanilla address is gone, so a fresh \ + key cannot be minted. Factory-reset the firewall (console menu option 4) and \ + re-run, or restore the lost credentials from your backup." + .to_string(), + ), + // Catch-all: nothing reachable anywhere. + _ => Decision::Failure( + "Firewall not reachable at either the vanilla address or the target address. \ + Check power, network cables, and dev-machine subnet membership." + .to_string(), + ), + } +} + +#[async_trait] +impl Interpret for OPNsenseBootstrapInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &OPNsenseBootstrapTopology, + ) -> Result { + let vanilla_ip = topology.vanilla_ip.to_string(); + let tag = format!("[OPNsenseBootstrap/{vanilla_ip}]"); + let probe_timeout = std::time::Duration::from_secs(3); + + // ── Step 1: idempotency probe ──────────────────────────────── + let target_ip_str = match &self.score.target_lan { + Some(rebind) => rebind.new_ip.to_string(), + None => vanilla_ip.clone(), + }; + let target_reachable = + probe_https(&target_ip_str, self.score.target_api_port, probe_timeout).await; + let vanilla_reachable = probe_https(&vanilla_ip, 443, probe_timeout).await; + let api_creds_exist = SecretManager::get::().await.is_ok(); + let ssh_creds_exist = SecretManager::get::() + .await + .is_ok(); + + info!( + "{tag} Idempotency probe: vanilla_reachable={vanilla_reachable}, \ + target_reachable={target_reachable}, api_creds_exist={api_creds_exist}, \ + ssh_creds_exist={ssh_creds_exist}" + ); + + match decide( + api_creds_exist, + ssh_creds_exist, + vanilla_reachable, + target_reachable, + ) { + Decision::Noop => { + info!( + "{tag} NOOP — firewall already bootstrapped and reachable at \ + https://{target_ip_str}:{}", + self.score.target_api_port + ); + return Ok(Outcome::noop(format!( + "OPNsense already bootstrapped at {target_ip_str}:{}; nothing to do", + self.score.target_api_port + ))); + } + Decision::Failure(reason) => { + return Err(InterpretError::new(reason)); + } + Decision::Dance => { + if api_creds_exist && ssh_creds_exist { + info!("{tag} DANCE — resuming from partial state (creds present)"); + } else { + info!("{tag} DANCE — starting fresh bootstrap from vanilla state"); + } + } + } + + // ── Step 2: web UI bootstrap dance ─────────────────────────── + let base_url = format!("https://{vanilla_ip}"); + let bootstrap = OPNsenseBootstrap::new(&base_url); + + bootstrap + .login(&topology.default_username, &topology.default_password) + .await + .map_err(|e| { + InterpretError::new(format!( + "Failed to log in to OPNsense web UI at {base_url}: {e}. Confirm the \ + firewall is at the install-time defaults — root password unchanged, \ + wizard not completed, web GUI still on 443." + )) + })?; + info!("{tag} Logged in to web UI as {}", topology.default_username); + + // Wizard-abort skipped: `POST /api/core/initial_setup/abort` + // requires a session-CSRF token we don't fetch (it returns 403 + // without it), AND empirically the wizard flag doesn't block any + // of the subsequent steps (SSH enable, port change, API key mint, + // LAN rebind). The only observable effect of leaving it set is + // that a human operator who later opens the WebUI manually will + // see the wizard prompt once. The helper `OPNsenseBootstrap:: + // abort_wizard()` is still available if a future caller wants to + // do it properly with CSRF. + + bootstrap + .enable_ssh(true, true) + .await + .map_err(|e| InterpretError::new(format!("Failed to enable SSH: {e}")))?; + info!("{tag} Enabled SSH (root login, password auth)"); + + bootstrap + .set_webgui_port( + self.score.target_api_port, + &vanilla_ip, + self.score.disable_http_redirect, + ) + .await + .map_err(|e| InterpretError::new(format!("Failed to move web GUI port: {e}")))?; + info!( + "{tag} Moved web GUI port 443 -> {}", + self.score.target_api_port + ); + + let new_url = format!("https://{vanilla_ip}:{}", self.score.target_api_port); + OPNsenseBootstrap::wait_for_ready(&new_url, self.score.webgui_ready_timeout) + .await + .map_err(|e| { + InterpretError::new(format!( + "Web UI did not respond on {new_url} within {:?}: {e}", + self.score.webgui_ready_timeout + )) + })?; + info!("{tag} Web UI ready at {new_url}"); + + // ── Step 2.5: pin NIC names to MAC addresses ───────────────── + // Mandatory built-in step. Shared with the standalone + // `OPNsensePinNicNamesScore` via `pin_nic_names_step`. Pins + // every physical NIC's name to its MAC *before* the + // firmware-upgrade reboot below — that's the first reboot + // the pinning has to defend against. Harmless on single-NIC + // VMs (one pin, no shuffle ever). + let _ = pin_nic_names_step( + &topology.vanilla_ip, + &topology.default_username, + &topology.default_password, + DEFAULT_PHYSICAL_DRIVER_PREFIXES, + &tag, + ) + .await?; + + // ── Step 3: mint API key & persist secrets ─────────────────── + // Persist BEFORE the LAN flip — if the LAN flip fails mid-execution, + // the operator can re-run; the dance branch picks up at "creds present, + // vanilla still reachable" and retries the rebind. + let (key, secret) = create_api_key_ssh( + &topology.vanilla_ip, + &topology.default_username, + &topology.default_password, + ) + .await + .map_err(|e| InterpretError::new(format!("Failed to mint API key over SSH: {e}")))?; + let key_prefix = &key[..key.len().min(12)]; + info!("{tag} Minted API key (key={key_prefix}…)"); + + SecretManager::set(&OPNSenseApiCredentials { + key: key.clone(), + secret: secret.clone(), + }) + .await?; + SecretManager::set(&OPNSenseFirewallCredentials { + username: topology.default_username.clone(), + password: topology.default_password.clone(), + }) + .await?; + info!("{tag} Persisted OPNSenseApiCredentials + OPNSenseFirewallCredentials"); + + // ── Step 4 (optional): firmware upgrade ────────────────────── + // Runs BEFORE the LAN rebind so the upgrade (which may reboot) + // happens against `vanilla_ip` — known reachable from here. The + // firewall will come back at `vanilla_ip:target_api_port`, then + // the rebind moves it onward. + if self.score.firmware_upgrade != FirmwareUpgradeMode::Disabled { + info!( + "{tag} Running firmware upgrade (mode={:?}) before optional LAN rebind ...", + self.score.firmware_upgrade + ); + let client = opnsense_api::OpnsenseClient::builder() + .base_url(format!( + "https://{vanilla_ip}:{}/api", + self.score.target_api_port + )) + .auth_from_key_secret(&key, &secret) + .skip_tls_verify() + .timeout_secs(60) + .build() + .map_err(|e| { + InterpretError::new(format!( + "Failed to build OPNsense client for firmware upgrade: {e}" + )) + })?; + let outcome = perform_firmware_upgrade( + &client, + &vanilla_ip, + self.score.target_api_port, + self.score.firmware_upgrade, + &tag, + ) + .await?; + info!("{tag} Firmware upgrade outcome: {}", outcome.message); + } else { + info!("{tag} firmware_upgrade=Disabled; skipping firmware upgrade"); + } + + // ── Step 4.5: optional LAN bridge ──────────────────────────── + // Shares `ensure_lan_bridge_step` with the standalone + // `OPNsenseLanBridgeScore`. Runs AFTER firmware upgrade (so the + // bridge lives in the final firmware's config schema) and + // BEFORE the LAN-IP rebind below (so the rebind targets the + // bridge, not the raw LAN NIC). + if let Some(params) = self.score.lan_bridge.clone() { + info!( + "{tag} LAN bridge step — members={:?}, reassign_lan={}, perf_tunables={}", + params.members, params.reassign_lan, params.perf_tunables + ); + let bridge_config = opnsense_config::Config::from_credentials_with_api_port( + topology.vanilla_ip, + None, + self.score.target_api_port, + &key, + &secret, + &topology.default_username, + &topology.default_password, + ) + .await + .map_err(|e| { + InterpretError::new(format!( + "Failed to build OPNsense Config for LAN bridge step: {e}" + )) + })?; + ensure_lan_bridge_step( + &bridge_config, + &topology.vanilla_ip, + &topology.default_username, + &topology.default_password, + ¶ms, + &tag, + ) + .await?; + } + + // ── Step 5: optional LAN rebind ────────────────────────────── + if let Some(rebind) = &self.score.target_lan { + info!( + "{tag} LAN rebind {vanilla_ip} -> {}/{}", + rebind.new_ip, rebind.prefix + ); + + // 5a. Update DHCP pool via API *before* flipping the LAN IP. + // The API endpoint lives on the firewall's current LAN IP, so + // it has to be hit before that IP changes. The new pool is the + // OPNsense-default `.100`–`.199` for the target + // subnet — operators who want a different range can resize + // via the WebUI / API after bootstrap. + let new_ip_v4 = match rebind.new_ip { + std::net::IpAddr::V4(v) => v, + _ => { + return Err(InterpretError::new( + "Target LAN must be IPv4 (IPv6 LAN rebind not yet supported)".into(), + )); + } + }; + let o = new_ip_v4.octets(); + let pool_from = format!("{}.{}.{}.100", o[0], o[1], o[2]); + let pool_to = format!("{}.{}.{}.199", o[0], o[1], o[2]); + + set_lan_dhcp_range_via_api( + &topology.vanilla_ip, + self.score.target_api_port, + &key, + &secret, + &topology.default_username, + &topology.default_password, + &pool_from, + &pool_to, + ) + .await + .map_err(|e| { + InterpretError::new(format!( + "Failed to update DHCP range to {pool_from}-{pool_to} via OPNsense API: {e}. \ + The LAN IP has NOT been changed yet — re-running this Score will retry." + )) + })?; + info!( + "{tag} DHCP range moved to {pool_from}-{pool_to} via OPNsense API \ + (dnsmasq reconfigured)" + ); + + // 5b. Flip the LAN IP itself. This is the step that severs the + // SSH/HTTP connection — everything before must be done. + change_lan_ip_via_ssh( + &vanilla_ip, + &rebind.new_ip.to_string(), + rebind.prefix, + &topology.default_username, + &topology.default_password, + ) + .await + .map_err(|e| { + InterpretError::new(format!( + "Persisted credentials successfully but the LAN-rebind step failed: {e}. \ + The firewall is still reachable at {vanilla_ip}; re-running this Score \ + will pick up at the rebind step (idempotency: creds present, vanilla up)." + )) + })?; + + // Best-effort post-flip probe. Connectivity from the dev machine to + // the new subnet is a physical concern outside this Score's control. + let post_url = rebind.new_ip.to_string(); + let post_probe = probe_https( + &post_url, + self.score.target_api_port, + std::time::Duration::from_secs(5), + ) + .await; + if !post_probe { + warn!( + "{tag} Could not confirm reachability at https://{post_url}:{} after the \ + LAN rebind. The firewall may need a few seconds to settle, or your dev \ + machine is no longer on the firewall's subnet — reconnect and verify \ + manually.", + self.score.target_api_port + ); + } + } + + // ── Step 5.5: pause for operator network reconnect ────────── + // The LAN rebind above severed the dev machine's connection to + // the firewall. The terminal reboot below needs the firewall + // reachable from this process. Pause and ask the operator to + // reconnect into the new subnet before proceeding. + if let Some(rebind) = &self.score.target_lan { + let new_addr = format!( + "https://{}:{}", + rebind.new_ip, self.score.target_api_port + ); + println!(); + println!("───────────────────────────────────────────────────────────"); + println!(" LAN rebind applied. The firewall is now at {new_addr}."); + println!(" Your machine is no longer on its subnet."); + println!(); + println!(" → Reconnect to the new LAN now:"); + println!(" • renew DHCP, or"); + println!( + " • set a static address in {}/{}.", + rebind.new_ip, rebind.prefix + ); + println!(); + println!(" Once your machine can reach {new_addr}, confirm below"); + println!(" to trigger the final reboot + verify step."); + println!("───────────────────────────────────────────────────────────"); + + let proceed = inquire::Confirm::new("Continue with the reboot?") + .with_default(true) + .prompt() + .map_err(|e| { + InterpretError::new(format!( + "Failed to read confirmation prompt: {e}. Re-run the Score \ + to retry (the dance will resume at the reboot step)." + )) + })?; + if !proceed { + return Err(InterpretError::new(format!( + "Aborted by operator after LAN rebind. The firewall is at \ + {new_addr} but has not been rebooted yet. Re-run the Score \ + after reconnecting to the new LAN to finish the bootstrap." + ))); + } + } + + // ── Step 6: terminal reboot + verify ──────────────────────── + // The dance has touched firmware, the optional LAN bridge, the + // DHCP pool, and the LAN IP itself. A clean reboot guarantees + // the running kernel/config matches what was persisted. Hard + // fails if the firewall does not reappear at the expected + // address within the recovery window. + let final_ip = match &self.score.target_lan { + Some(rebind) => rebind.new_ip.to_string(), + None => vanilla_ip.clone(), + }; + info!( + "{tag} Step 6: rebooting and verifying https://{final_ip}:{} comes back ...", + self.score.target_api_port + ); + reboot_and_verify_via_api( + &final_ip, + self.score.target_api_port, + &key, + &secret, + &tag, + ) + .await + .map_err(|e| { + InterpretError::new(format!( + "Persisted credentials and applied all config changes, but the final \ + reboot/verify step failed: {e}. On-disk firewall state should be \ + correct — investigate and reboot manually if needed." + )) + })?; + + // ── Build the success Outcome (runbook-shaped details) ─────── + let lan_line = match &self.score.target_lan { + Some(rebind) => format!( + " Final IP: {}/{} (LAN rebind applied)", + rebind.new_ip, rebind.prefix + ), + None => format!(" Final IP: {vanilla_ip} (no LAN rebind)"), + }; + + let mut details = vec![ + "OPNsense bootstrap complete".to_string(), + String::new(), + format!(" Vanilla IP: {vanilla_ip}"), + lan_line, + format!( + " Web UI: https://{final_ip}:{}", + self.score.target_api_port + ), + format!(" SSH: {}@{final_ip}", topology.default_username), + " API creds: stored as OPNSenseApiCredentials in SecretManager".to_string(), + " SSH creds: stored as OPNSenseFirewallCredentials in SecretManager".to_string(), + " Reboot: triggered and reachability verified at the final address".to_string(), + ]; + if self.score.target_lan.is_some() { + details.push(String::new()); + details.push("NEXT STEPS (manual):".to_string()); + details.push( + " The dev machine that ran this Score is no longer on the firewall's".to_string(), + ); + details.push( + " subnet. Reconnect into the new LAN (renew DHCP or set a static IP)".to_string(), + ); + details.push(" before running the next Score against this firewall.".to_string()); + } + + Ok(Outcome::success_with_details( + format!( + "OPNsense bootstrapped — web UI at https://{final_ip}:{}", + self.score.target_api_port + ), + details, + )) + } + + fn get_name(&self) -> InterpretName { + InterpretName::OPNsenseBootstrap + } + + fn get_version(&self) -> Version { + Version::from("1.0.0").unwrap() + } + + fn get_status(&self) -> InterpretStatus { + InterpretStatus::QUEUED + } + + fn get_children(&self) -> Vec { + vec![] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_score_name() { + let s = OPNsenseBootstrapScore::default(); + assert_eq!( + >::name(&s), + "OPNsenseBootstrapScore" + ); + } + + #[test] + fn test_score_serializes() { + let s = OPNsenseBootstrapScore::default(); + let _: serde_value::Value = + serde_value::to_value(&s).expect("OPNsenseBootstrapScore should serialize"); + } + + #[test] + fn test_decide_noop_when_target_up_creds_present_vanilla_gone() { + assert_eq!(decide(true, true, false, true), Decision::Noop); + } + + #[test] + fn test_decide_dance_on_clean_first_run() { + // No creds yet, vanilla reachable. + assert_eq!(decide(false, false, true, false), Decision::Dance); + assert_eq!(decide(false, false, true, true), Decision::Dance); + } + + #[test] + fn test_decide_dance_when_resuming_with_creds() { + // Creds present, vanilla still answering → LAN rebind didn't happen. + assert_eq!(decide(true, true, true, true), Decision::Dance); + assert_eq!(decide(true, true, true, false), Decision::Dance); + } + + #[test] + fn test_decide_failure_on_partial_creds_lost() { + for (api, ssh) in [(false, true), (true, false), (false, false)] { + match decide(api, ssh, false, true) { + Decision::Failure(m) => assert!( + m.contains("partial bootstrap"), + "expected 'partial bootstrap' in: {m}" + ), + d => panic!("expected Failure for ({api},{ssh},false,true), got {d:?}"), + } + } + } + + #[test] + fn test_decide_failure_when_nothing_reachable() { + for (api, ssh) in [(false, false), (true, true), (true, false), (false, true)] { + match decide(api, ssh, false, false) { + Decision::Failure(m) => { + assert!( + m.contains("not reachable"), + "expected 'not reachable' in: {m}" + ) + } + d => panic!("expected Failure for ({api},{ssh},false,false), got {d:?}"), + } + } + } +} diff --git a/harmony/src/modules/opnsense/firmware_upgrade.rs b/harmony/src/modules/opnsense/firmware_upgrade.rs new file mode 100644 index 00000000..de14eaf9 --- /dev/null +++ b/harmony/src/modules/opnsense/firmware_upgrade.rs @@ -0,0 +1,872 @@ +//! `OPNsenseFirmwareUpgradeScore` — bring an OPNsense firewall to the latest +//! firmware/package level via the REST API. +//! +//! The flow mirrors what OPNsense's web UI does when an operator clicks +//! "Check for updates", then "Update": kick `firmware/check` (async), poll +//! `firmware/upgradestatus` until the check reports `"done"`, read +//! `firmware/status` to see what's actionable, kick `firmware/update` or +//! `firmware/upgrade` (also async), poll `upgradestatus` until done, trigger +//! `firmware/reboot` if `status_reboot == "1"`, verify the version actually +//! moved, and loop in case the upgrade revealed further pending updates. +//! +//! The core logic is a free function ([`perform_firmware_upgrade`]) so it +//! can be reused from elsewhere in the framework — notably from +//! [`OPNsenseBootstrapScore`](crate::modules::opnsense::bootstrap_score::OPNsenseBootstrapScore) +//! when its `upgrade_firmware` knob is set. +//! +//! Idempotent: when nothing is pending on the first iteration, the helper +//! returns `UpgradeOutcome { upgraded: false, .. }` with the same version +//! before and after. + +use std::time::{Duration, Instant}; + +use async_trait::async_trait; +use harmony_types::id::Id; +use log::{debug, info}; +use opnsense_api::OpnsenseClient; +use serde::Serialize; +use thiserror::Error; + +use crate::{ + data::Version, + infra::opnsense::OPNSenseFirewall, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::opnsense::bootstrap::probe_https, + score::Score, +}; + +/// Maximum loop iterations. A single upgrade can sometimes reveal more +/// pending packages (e.g. the kernel upgrade unlocks new plugin versions), +/// so the helper loops; 5 is a sanity ceiling. +const MAX_UPGRADE_ITERATIONS: u32 = 5; + +/// How long to wait for an async firmware task to report `"done"`. +/// Empirically 20 min covers a full 26.1 → 26.1.x upgrade including +/// package download, install, and reboot on a 2-vCPU / 2 GiB VM. +const TASK_DONE_TIMEOUT: Duration = Duration::from_secs(1200); + +/// How long to wait for the API to come back after a reboot. 10 min is +/// the same ceiling OPNsense's own WebUI uses. +const REBOOT_RECOVERY_TIMEOUT: Duration = Duration::from_secs(600); + +/// How long to wait for the metadata-refresh `firmware/check` task to +/// reach `done`. Distinct from the upgrade timeout: the check itself +/// is fast (download + parse the package index), 5 min is plenty. +const CHECK_TASK_TIMEOUT: Duration = Duration::from_secs(300); + +/// Time to let an async task spin up after we trigger it, before we +/// start polling status. Without this, the first poll often catches +/// `status == "none"` from the prior state (the new task hasn't +/// registered yet) and we mistakenly conclude there's nothing to do. +const POST_TRIGGER_SETTLE: Duration = Duration::from_secs(3); + +/// Interval between polls of `firmware/upgradestatus` and friends. +const POLL_INTERVAL: Duration = Duration::from_secs(5); + +/// Time the firewall is given to come back unreachable after we kick +/// an explicit `firmware/reboot`. Tight on purpose — the reboot was +/// just triggered; if the API stays up beyond this, something's wrong. +const REBOOT_UNREACHABLE_TIMEOUT: Duration = Duration::from_secs(60); + +/// Brief HTTPS probe timeout used inside the wait/probe loops. +const PROBE_TIMEOUT: Duration = Duration::from_secs(2); + +/// After the firewall comes back from a reboot the TLS handshake is +/// answering but `configd` and the MVC backend are still spinning up. +/// 30 s is empirically enough on a 2-vCPU VM. +const POST_REBOOT_SETTLE: Duration = Duration::from_secs(30); + +/// How the firmware-upgrade helper decides whether (and how) to apply a +/// pending update. +/// +/// OPNsense's `firmware/status` endpoint returns the kind of pending change +/// in its `status` field: +/// +/// - `status == "update"` — in-series package update (e.g. 26.1 → 26.1.8). +/// Considered **minor**. +/// - `status == "upgrade"` — major-series upgrade (e.g. 26.1 → 26.7). +/// Considered **major**. +/// +/// This enum gates which kinds get applied automatically vs. require the +/// operator's explicit approval. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +pub enum FirmwareUpgradeMode { + /// Apply every pending update and upgrade automatically. Latest version + /// always wins. + Auto, + /// Apply in-series updates (`status == "update"`) automatically but + /// skip major-series upgrades (`status == "upgrade"`). The Score + /// returns success without applying the major; rerun with `Auto` or + /// `Prompt` to pick it up. + AutoMinor, + /// For each pending update, print a summary and ask the operator + /// `[Y/n]` via stdin. Fails with a clear error if there is no TTY + /// (CI/headless contexts must pick `Auto`, `AutoMinor`, or `Disabled` + /// explicitly). + Prompt, + /// Skip firmware upgrades entirely. + Disabled, +} + +impl Default for FirmwareUpgradeMode { + fn default() -> Self { + FirmwareUpgradeMode::Auto + } +} + +/// Errors the firmware-upgrade helper may surface. +#[derive(Debug, Error)] +pub enum FirmwareUpgradeError { + #[error("OPNsense API error during {phase}: {msg}")] + Api { phase: &'static str, msg: String }, + #[error("Timed out: {0}")] + Timeout(String), + #[error("Firmware status reports error: {0}")] + FirmwareErrorState(String), + #[error("Unexpected firmware status: {0}")] + UnexpectedStatus(String), + #[error("Reached max upgrade iterations ({0}); firmware may have further pending updates")] + TooManyIterations(u32), + #[error( + "FirmwareUpgradeMode::Prompt requires an interactive TTY. \ + Run in a terminal, or pick FirmwareUpgradeMode::Auto / AutoMinor / Disabled \ + for headless/CI contexts." + )] + PromptRequiresTty, + #[error("Operator declined the firmware update via interactive prompt")] + DeclinedByOperator, +} + +impl From for InterpretError { + fn from(e: FirmwareUpgradeError) -> Self { + InterpretError::new(format!("Firmware upgrade failed: {e}")) + } +} + +/// What [`perform_firmware_upgrade`] actually did. +#[derive(Debug, Clone)] +pub struct UpgradeOutcome { + /// `true` if at least one update/upgrade was applied. + pub upgraded: bool, + /// `true` if the firewall rebooted at least once during the upgrade. + pub rebooted: bool, + /// Version reported by `firmware/info` before the first check. + pub initial_version: String, + /// Version reported by `firmware/info` after the last upgrade cycle. + pub final_version: String, + /// How many check/upgrade iterations the helper ran. + pub iterations: u32, + /// Human-readable summary suitable for log lines / Score `Outcome`. + pub message: String, +} + +/// Bring an OPNsense firewall to the latest firmware/package level. +/// +/// `mode` gates whether and how each pending update is applied (see +/// [`FirmwareUpgradeMode`]). `firewall_ip` and `api_port` are needed for +/// post-reboot reachability probes — the `OpnsenseClient` already knows +/// them but doesn't expose them. `tag` is a short identifier (typically +/// an IP) used as a log prefix so this helper can be called from +/// multiple contexts without making log lines ambiguous. +/// +/// See module-level docs for the algorithm. +pub async fn perform_firmware_upgrade( + client: &OpnsenseClient, + firewall_ip: &str, + api_port: u16, + mode: FirmwareUpgradeMode, + tag: &str, +) -> Result { + // ── Disabled short-circuit ────────────────────────────────────── + if mode == FirmwareUpgradeMode::Disabled { + let v = read_firmware_version(client).await?; + info!("{tag} firmware_upgrade mode=Disabled; skipping"); + return Ok(UpgradeOutcome { + upgraded: false, + rebooted: false, + initial_version: v.clone(), + final_version: v, + iterations: 0, + message: "Firmware upgrade skipped (mode=Disabled)".into(), + }); + } + // ── Step 1: capture the initial version ────────────────────────── + let initial_version = read_firmware_version(client).await?; + info!("{tag} Initial firmware version: {initial_version}"); + + let mut current_version = initial_version.clone(); + let mut total_rebooted = false; + let mut iterations: u32 = 0; + let mut applied_any = false; + + loop { + iterations += 1; + if iterations > MAX_UPGRADE_ITERATIONS { + return Err(FirmwareUpgradeError::TooManyIterations( + MAX_UPGRADE_ITERATIONS, + )); + } + info!("{tag} ── Iteration {iterations} ──"); + + // ── Step 2: kick a check and wait for it to finish ─────────── + info!("{tag} Triggering firmware/check (async) ..."); + let _: serde_json::Value = client + .post_typed::("core", "firmware", "check", None) + .await + .map_err(|e| FirmwareUpgradeError::Api { + phase: "firmware/check", + msg: e.to_string(), + })?; + wait_for_task_done(client, "check", CHECK_TASK_TIMEOUT, tag).await?; + + // ── Step 3: read status to see what's actionable ───────────── + let status: serde_json::Value = client + .post_typed::("core", "firmware", "status", None) + .await + .map_err(|e| FirmwareUpgradeError::Api { + phase: "firmware/status", + msg: e.to_string(), + })?; + let status_kind = status["status"].as_str().unwrap_or("").to_string(); + let status_msg = status["status_msg"].as_str().unwrap_or("").to_string(); + let needs_reboot = status["status_reboot"].as_str() == Some("1"); + info!( + "{tag} firmware/status: status={status_kind:?}, status_msg={status_msg:?}, \ + status_reboot={needs_reboot}" + ); + + // ── Step 4: decide what to do ──────────────────────────────── + let action_endpoint: &'static str = match status_kind.as_str() { + "none" | "" => { + if !applied_any { + info!("{tag} No firmware updates available — already current"); + return Ok(UpgradeOutcome { + upgraded: false, + rebooted: false, + initial_version: initial_version.clone(), + final_version: current_version, + iterations, + message: format!( + "Already at latest firmware ({initial_version}); no upgrade needed" + ), + }); + } + info!("{tag} No more updates available; firmware is current"); + break; + } + "update" => "update", + "upgrade" => "upgrade", + "error" => return Err(FirmwareUpgradeError::FirmwareErrorState(status_msg)), + other => { + return Err(FirmwareUpgradeError::UnexpectedStatus(format!( + "{other:?} (status_msg: {status_msg:?})" + ))); + } + }; + + // ── Step 4b: mode-gating ───────────────────────────────────── + // Build a human-readable summary now so we can log it (and feed + // it to a prompt if needed). + let opnsense_change = extract_opnsense_version_change(&status); + let summary = render_upgrade_summary( + &status_msg, + action_endpoint, + ¤t_version, + opnsense_change.as_ref(), + needs_reboot, + ); + info!("{tag} Pending firmware {action_endpoint}:\n{summary}"); + + match mode { + FirmwareUpgradeMode::Disabled => { + // Unreachable — handled at the top of the function — but + // exhaustiveness is nice. + unreachable!("FirmwareUpgradeMode::Disabled short-circuits earlier"); + } + FirmwareUpgradeMode::Auto => { + // Proceed for both "update" and "upgrade". + } + FirmwareUpgradeMode::AutoMinor => { + if action_endpoint == "upgrade" { + info!( + "{tag} mode=AutoMinor; skipping major-series upgrade. \ + Rerun with FirmwareUpgradeMode::Auto or Prompt to apply it." + ); + let final_message = if applied_any { + format!( + "Firmware: {initial_version} → {current_version} \ + in {iterations} iteration(s); stopped before major-series \ + upgrade (mode=AutoMinor)" + ) + } else { + format!( + "Major-series upgrade available but skipped (mode=AutoMinor); \ + firmware unchanged at {current_version}" + ) + }; + return Ok(UpgradeOutcome { + upgraded: applied_any, + rebooted: total_rebooted, + initial_version: initial_version.clone(), + final_version: current_version, + iterations, + message: final_message, + }); + } + } + FirmwareUpgradeMode::Prompt => { + // Summary was already info!-logged just above; the prompt + // itself just asks the yes/no question. + let prompt_text = + format!("Apply this firmware {action_endpoint} on {firewall_ip}?"); + let answer = inquire::Confirm::new(&prompt_text) + .with_default(true) + .prompt(); + match answer { + Ok(true) => { + info!("{tag} Operator accepted the {action_endpoint}"); + } + Ok(false) => { + info!("{tag} Operator declined the {action_endpoint}"); + let final_message = if applied_any { + format!( + "Firmware: {initial_version} → {current_version} \ + in {iterations} iteration(s); stopped after operator declined \ + the next {action_endpoint}" + ) + } else { + format!( + "Firmware {action_endpoint} available but declined by operator; \ + firmware unchanged at {current_version}" + ) + }; + return Ok(UpgradeOutcome { + upgraded: applied_any, + rebooted: total_rebooted, + initial_version: initial_version.clone(), + final_version: current_version, + iterations, + message: final_message, + }); + } + Err(inquire::InquireError::NotTTY) => { + return Err(FirmwareUpgradeError::PromptRequiresTty); + } + Err(e) => { + return Err(FirmwareUpgradeError::Api { + phase: "interactive prompt", + msg: e.to_string(), + }); + } + } + } + } + + // ── Step 5: trigger the action ─────────────────────────────── + info!("{tag} Triggering firmware/{action_endpoint} (async) ..."); + let _: serde_json::Value = client + .post_typed::("core", "firmware", action_endpoint, None) + .await + .map_err(|e| FirmwareUpgradeError::Api { + phase: action_endpoint, + msg: e.to_string(), + })?; + + // ── Step 6: wait for the action to complete, possibly through + // a mid-task reboot ── + // Snapshot the version BEFORE the action so the multi-signal + // waiter can detect "version moved" as completion. `current_version` + // is also valid here, but explicit naming makes the intent obvious. + let version_before_action = current_version.clone(); + let task_outcome = wait_for_task_or_reboot( + client, + action_endpoint, + firewall_ip, + api_port, + &version_before_action, + tag, + ) + .await?; + let mut rebooted_this_iter = task_outcome.rebooted; + + // ── Step 7: if a reboot is needed but didn't happen, trigger it ── + if needs_reboot && !rebooted_this_iter { + info!("{tag} status_reboot=1; triggering explicit firmware/reboot ..."); + // Fire-and-forget — the server tears down its connection while + // replying. + let _ = client + .post_typed::("core", "firmware", "reboot", None) + .await; + wait_for_reboot_cycle(firewall_ip, api_port, tag).await?; + rebooted_this_iter = true; + } + if rebooted_this_iter { + total_rebooted = true; + } + + // ── Step 8: verify version actually moved ──────────────────── + let new_version = read_firmware_version(client).await?; + if new_version == current_version { + info!( + "{tag} Iteration {iterations} completed but version did not change: \ + {current_version}. Stopping to avoid an infinite loop." + ); + // Don't error — some "updates" change only package set without + // bumping product_version. Break out gracefully. + applied_any = true; + break; + } + info!("{tag} Iteration {iterations}: {current_version} → {new_version}"); + current_version = new_version; + applied_any = true; + + // ── Step 9: loop. Re-check; a major upgrade may have unlocked + // further package updates. + } + + let upgraded = current_version != initial_version || applied_any; + let message = if initial_version == current_version { + format!( + "Firmware upgrade completed: still on {current_version} \ + (packages refreshed; version unchanged) — {iterations} iteration(s)" + ) + } else { + format!( + "Firmware upgraded: {initial_version} → {current_version} in {iterations} iteration(s) \ + (rebooted: {total_rebooted})" + ) + }; + Ok(UpgradeOutcome { + upgraded, + rebooted: total_rebooted, + initial_version, + final_version: current_version, + iterations, + message, + }) +} + +/// Fetch the running firmware version from `/api/core/firmware/info`. +/// The version transition for the `opnsense` package itself, if it appears +/// in this update's package list. +struct OpnsensePackageChange { + old: String, + new: String, +} + +/// Look for an entry named `"opnsense"` in `status.all_packages` (status = +/// "update") or `status.all_sets` (status = "upgrade") and capture its +/// `old` → `new` version transition. +fn extract_opnsense_version_change(status: &serde_json::Value) -> Option { + // `all_packages` and `all_sets` are objects keyed by package name; the + // `opnsense` package being touched means a product-level version bump. + for field in ["all_packages", "all_sets"] { + if let Some(map) = status[field].as_object() + && let Some(entry) = map.get("opnsense").or_else(|| map.get("opnsense-update")) + { + let old = entry["old"].as_str().unwrap_or("").trim().to_string(); + let new = entry["new"].as_str().unwrap_or("").trim().to_string(); + if !new.is_empty() { + return Some(OpnsensePackageChange { old, new }); + } + } + } + None +} + +/// Build a short human-readable summary of a pending firmware update. +fn render_upgrade_summary( + status_msg: &str, + action_endpoint: &str, + current_version: &str, + opnsense_change: Option<&OpnsensePackageChange>, + needs_reboot: bool, +) -> String { + let main_version_line = match opnsense_change { + Some(c) => format!( + " Main OPNsense: {} → {} (the `opnsense` package itself is being updated)", + if c.old.is_empty() { "?" } else { &c.old }, + c.new + ), + None => format!( + " Main OPNsense: staying at {current_version} \ + (this update only touches packages, not the main OPNsense version)" + ), + }; + format!( + " Kind: {action_endpoint}\n\ + {main_version_line}\n\ + {summary_line}\n\ + {reboot_line}", + summary_line = format!(" Summary: {status_msg}"), + reboot_line = format!( + " Reboot needed: {}", + if needs_reboot { "yes" } else { "no" } + ), + ) +} + +async fn read_firmware_version(client: &OpnsenseClient) -> Result { + let info: serde_json::Value = + client + .get_typed("core", "firmware", "info") + .await + .map_err(|e| FirmwareUpgradeError::Api { + phase: "firmware/info", + msg: e.to_string(), + })?; + Ok(info["product_version"] + .as_str() + .unwrap_or("") + .to_string()) +} + +/// Poll `/api/core/firmware/upgradestatus` until it reports `status == "done"`. +/// +/// Tolerates transient errors (the endpoint is documented as +/// "known to be unstable" in OPNsense 26.1.6 release notes — the WebUI +/// itself traps its errors). A 404 between tasks is treated as "still in +/// progress, keep polling." +async fn wait_for_task_done( + client: &OpnsenseClient, + task_label: &str, + timeout: Duration, + tag: &str, +) -> Result<(), FirmwareUpgradeError> { + let deadline = Instant::now() + timeout; + let mut last_logged: Option = None; + while Instant::now() < deadline { + tokio::time::sleep(POST_TRIGGER_SETTLE).await; + match client + .get_typed::("core", "firmware", "upgradestatus") + .await + { + Ok(s) => { + let st = s["status"].as_str().unwrap_or("").to_string(); + if st == "done" { + info!("{tag} firmware/{task_label} task reported done"); + return Ok(()); + } + if last_logged.as_deref() != Some(st.as_str()) { + debug!("{tag} firmware/{task_label} task status: {st:?}"); + last_logged = Some(st); + } + } + Err(e) => { + debug!("{tag} upgradestatus poll error during {task_label}: {e}; retrying"); + } + } + } + Err(FirmwareUpgradeError::Timeout(format!( + "firmware/{task_label} did not reach 'done' within {timeout:?}" + ))) +} + +/// Internal helper return. +struct TaskOutcome { + rebooted: bool, +} + +/// Wait for a firmware-altering task (update/upgrade) to finish. +/// +/// Two completion regimes, one per branch: +/// +/// 1. **Reboot regime** — if the API goes unreachable mid-task, OPNsense +/// is rebooting. We wait for the reboot cycle to finish and return +/// immediately. The reboot completing IS the definitive completion +/// event; further polling is unreliable because OPNsense's configd +/// keeps stale task state until something kicks it (e.g. a fresh +/// `firmware/check`). The outer `perform_firmware_upgrade` loop will +/// itself call `firmware/check` at the top of the next iteration +/// and `firmware/info` for version verification — those are the +/// real post-reboot completion signals. +/// +/// 2. **No-reboot regime** — for `status_reboot=0` updates (e.g. pure +/// package metadata refresh), we poll three signals every iteration +/// and exit on any of them: +/// +/// - **A. version moved**: `GET firmware/info` `product_version` != +/// `version_before_action`. +/// - **B. configd idle**: `GET firmware/running` `status` field +/// empty for two consecutive polls. +/// - **C. upgradestatus done**: `GET firmware/upgradestatus` returns +/// `status == "done"`. 404s are ignored (documented unstable on +/// OPNsense 26.1). +async fn wait_for_task_or_reboot( + client: &OpnsenseClient, + task_label: &str, + firewall_ip: &str, + api_port: u16, + version_before_action: &str, + tag: &str, +) -> Result { + const IDLE_THRESHOLD: u32 = 2; + let poll_interval = POLL_INTERVAL; + let deadline = Instant::now() + TASK_DONE_TIMEOUT; + // No `mut rebooted` here: the reboot branch returns immediately with + // rebooted=true, and the polling branches below only fire when no + // reboot was observed. + let mut consecutive_idle: u32 = 0; + let mut last_running: Option = None; + + while Instant::now() < deadline { + tokio::time::sleep(poll_interval).await; + + // ── Reboot detection ──────────────────────────────────────── + // A reboot during a firmware-altering task IS the completion + // event — OPNsense schedules the reboot as the final install + // step. Don't poll signals A/B/C afterward: OPNsense's configd + // keeps the task marked as "running" until the next + // firmware/check kicks it, so signals B and C stay misleading, + // and signal A is unreliable for package-only updates that + // don't bump product_version. The outer loop's next iteration + // will trigger its own firmware/check and verify versions + // explicitly — that's the real post-reboot completion signal. + if !probe_https(firewall_ip, api_port, PROBE_TIMEOUT).await { + info!("{tag} firmware/{task_label}: API unreachable — OPNsense is rebooting"); + wait_for_reboot_cycle(firewall_ip, api_port, tag).await?; + info!("{tag} firmware/{task_label}: reboot cycle complete; treating as task complete"); + return Ok(TaskOutcome { rebooted: true }); + } + + // ── Signal A: version moved ───────────────────────────────── + // Definitive completion signal. Catches the case where + // upgradestatus 404s forever after a real upgrade. + match client + .get_typed::("core", "firmware", "info") + .await + { + Ok(info) => { + let v = info["product_version"].as_str().unwrap_or("").trim(); + if !v.is_empty() && v != version_before_action { + info!( + "{tag} firmware/{task_label}: version moved {version_before_action} → {v}; \ + task complete" + ); + return Ok(TaskOutcome { rebooted: false }); + } + } + Err(e) => { + debug!("{tag} firmware/info poll error: {e}; retrying"); + } + } + + // ── Signal B: configd reports no running task ─────────────── + match client + .get_typed::("core", "firmware", "running") + .await + { + Ok(running) => { + // OPNsense's `configctl firmware running` script (see + // core/scripts/firmware/running.sh) prints "ready" when + // no firmware operation holds the lock and "busy" when + // one does. Recognize "ready" (and defensive variants) + // as idle. + let st = running["status"] + .as_str() + .unwrap_or("") + .trim() + .to_ascii_lowercase(); + let is_idle = st.is_empty() || st == "ready" || st == "none"; + if is_idle { + consecutive_idle += 1; + if consecutive_idle >= IDLE_THRESHOLD { + info!( + "{tag} firmware/{task_label}: configd idle for {consecutive_idle} \ + polls; task complete" + ); + return Ok(TaskOutcome { rebooted: false }); + } + } else { + if last_running.as_deref() != Some(st.as_str()) { + debug!("{tag} firmware/running: {st:?}"); + last_running = Some(st); + } + consecutive_idle = 0; + } + } + Err(e) => { + debug!("{tag} firmware/running poll error: {e}; retrying"); + } + } + + // ── Signal C: upgradestatus reports "done" ────────────────── + // Shared helper centralizes the polling + 404-tolerance logic; + // `install_package` in opnsense-config uses the same primitive. + if opnsense_config::check_firmware_task_done(client) + .await + .is_some() + { + info!("{tag} firmware/{task_label}: upgradestatus reports done"); + return Ok(TaskOutcome { rebooted: false }); + } + } + + Err(FirmwareUpgradeError::Timeout(format!( + "firmware/{task_label} did not complete within {:?}", + TASK_DONE_TIMEOUT + ))) +} + +/// Wait for the firewall to go unreachable, come back, and settle. +/// +/// `firewall_ip` / `api_port` describe where the API should re-appear. +pub async fn wait_for_reboot_cycle( + firewall_ip: &str, + api_port: u16, + tag: &str, +) -> Result<(), FirmwareUpgradeError> { + info!("{tag} Waiting for the API to go unreachable (reboot in flight) ..."); + let unreach_deadline = Instant::now() + REBOOT_UNREACHABLE_TIMEOUT; + while Instant::now() < unreach_deadline { + tokio::time::sleep(PROBE_TIMEOUT).await; + if !probe_https(firewall_ip, api_port, PROBE_TIMEOUT).await { + info!("{tag} API unreachable — reboot in progress"); + break; + } + } + + info!("{tag} Waiting for OPNsense to come back at https://{firewall_ip}:{api_port} ..."); + let back_deadline = Instant::now() + REBOOT_RECOVERY_TIMEOUT; + let mut came_back = false; + while Instant::now() < back_deadline { + tokio::time::sleep(POLL_INTERVAL).await; + if probe_https(firewall_ip, api_port, POLL_INTERVAL).await { + came_back = true; + break; + } + } + if !came_back { + return Err(FirmwareUpgradeError::Timeout(format!( + "OPNsense did not come back at https://{firewall_ip}:{api_port} within {:?}", + REBOOT_RECOVERY_TIMEOUT + ))); + } + + info!( + "{tag} Web UI reachable; giving backend services {}s to settle ...", + POST_REBOOT_SETTLE.as_secs() + ); + tokio::time::sleep(POST_REBOOT_SETTLE).await; + Ok(()) +} + +/// Bring an already-bootstrapped OPNsense firewall to the latest firmware. +/// +/// Compose this Score right after `OPNsenseBootstrapScore` if you want +/// fine-grained control of the upgrade beat. If you're happy with the +/// default behavior, leave `OPNsenseBootstrapScore::upgrade_firmware` at +/// `true` — it calls the same helper internally. +#[derive(Debug, Clone, Serialize)] +pub struct OPNsenseFirmwareUpgradeScore { + /// HTTPS port the firewall's web GUI / API listens on. The default + /// (9443) matches the value `OPNsenseBootstrapScore` moves the GUI to. + pub api_port: u16, + /// How aggressive to be about applying pending updates. + pub mode: FirmwareUpgradeMode, +} + +impl Default for OPNsenseFirmwareUpgradeScore { + fn default() -> Self { + Self { + api_port: 9443, + mode: FirmwareUpgradeMode::Auto, + } + } +} + +impl Score for OPNsenseFirmwareUpgradeScore { + fn name(&self) -> String { + "OPNsenseFirmwareUpgradeScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OPNsenseFirmwareUpgradeInterpret { + score: self.clone(), + }) + } +} + +#[derive(Debug)] +struct OPNsenseFirmwareUpgradeInterpret { + score: OPNsenseFirmwareUpgradeScore, +} + +#[async_trait] +impl Interpret for OPNsenseFirmwareUpgradeInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &OPNSenseFirewall, + ) -> Result { + let firewall_ip = topology.get_ip().to_string(); + let tag = format!("[OPNsenseFirmwareUpgrade/{firewall_ip}]"); + let config = topology.get_opnsense_config(); + + let outcome = perform_firmware_upgrade( + config.client(), + &firewall_ip, + self.score.api_port, + self.score.mode, + &tag, + ) + .await?; + + if outcome.upgraded { + Ok(Outcome::success_with_details( + outcome.message.clone(), + vec![ + format!("Initial version: {}", outcome.initial_version), + format!("Final version: {}", outcome.final_version), + format!("Iterations: {}", outcome.iterations), + format!("Rebooted: {}", outcome.rebooted), + ], + )) + } else { + Ok(Outcome::noop(outcome.message)) + } + } + + fn get_name(&self) -> InterpretName { + InterpretName::OPNsenseFirmwareUpgrade + } + + fn get_version(&self) -> Version { + Version::from("1.0.0").unwrap() + } + + fn get_status(&self) -> InterpretStatus { + InterpretStatus::QUEUED + } + + fn get_children(&self) -> Vec { + vec![] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_score_name() { + let s = OPNsenseFirmwareUpgradeScore::default(); + assert_eq!( + >::name(&s), + "OPNsenseFirmwareUpgradeScore" + ); + } + + #[test] + fn test_score_default_api_port_is_9443() { + assert_eq!(OPNsenseFirmwareUpgradeScore::default().api_port, 9443); + } + + #[test] + fn test_score_serializes() { + let s = OPNsenseFirmwareUpgradeScore::default(); + let _: serde_value::Value = + serde_value::to_value(&s).expect("OPNsenseFirmwareUpgradeScore should serialize"); + } +} diff --git a/harmony/src/modules/opnsense/lan_bridge.rs b/harmony/src/modules/opnsense/lan_bridge.rs new file mode 100644 index 00000000..a4e71c76 --- /dev/null +++ b/harmony/src/modules/opnsense/lan_bridge.rs @@ -0,0 +1,609 @@ +//! `OPNsenseLanBridgeScore` — single `if_bridge(4)` spanning logical interfaces. +//! +//! # Why this exists +//! +//! Built for the **pico-DC** topology (1× OPNsense + N hyperconverged +//! nodes, no physical switch). To get L2 connectivity between every +//! node and the firewall's own LAN services (DHCP, firewall, management +//! IP), OPNsense itself becomes the L2 fabric — an `if_bridge` spanning +//! the selected ports. On low-CPU hardware like the Wize 5070 the Score +//! also tunes a handful of `net.link.bridge.*` sysctls and disables +//! TSO/LRO globally (those break `if_bridge` on FreeBSD). +//! +//! Optionally re-points `` at the new bridge so +//! the LAN logical interface (and everything that hangs off it) spans +//! every member NIC. +//! +//! # Members are physical NIC names; the Score auto-assigns OPT slots +//! +//! Callers pass **physical NIC names** (`vtnet0`, `igc1`, …) — what an +//! operator sees on the hardware. The Score then: +//! +//! 1. Looks each NIC up in ``. If it's already assigned to +//! a logical name (`lan`, `opt1`, …), that logical name is reused. +//! 2. If the NIC has no logical assignment yet, the Score adds a new +//! `` entry over SSH (next free `optN`, with +//! `=`, `1`, plus a sensible ``) and brings +//! it up via `configctl interface configure `. The actual +//! bridge model still receives the logical name (OPNsense's +//! `BridgeMemberField` rejects raw NIC names — that's why this +//! translation exists). +//! 3. The WAN port (``) is rejected up-front as a +//! member; a clear error is returned if the caller includes it. +//! +//! The pico-DC happy path: the operator's hardware has `lan` + `wan` +//! assigned (from the first-time wizard) and three unassigned PCIe +//! ports. They pass `members: Some(vec!["igc0","igc2","igc3","igc4"])` +//! (with `igc1` as WAN). After the Score runs they see `lan` + new +//! `opt1`/`opt2`/`opt3` entries in WebUI ▸ Interfaces ▸ Assignments, +//! plus `bridge0` spanning all four logical interfaces. +//! +//! # Two ways to use this +//! +//! * **Automatic.** [`OPNsenseBootstrapScore`](super::bootstrap_score::OPNsenseBootstrapScore) +//! composes [`ensure_lan_bridge_step`] internally when its +//! `lan_bridge: Option` field is `Some(_)`. Lives +//! between the firmware-upgrade and LAN-IP-rebind steps so the bridge +//! exists before any optional LAN-IP flip lands on it. +//! * **Standalone.** [`OPNsenseLanBridgeScore`] is a Score in its own +//! right (`Score`) — drop it into a normal +//! post-bootstrap Vec when configuring a firewall after the bootstrap +//! has already happened. + +use async_trait::async_trait; +use harmony_secret::SecretManager; +use harmony_types::id::Id; +use log::info; +use serde::Serialize; + +use crate::{ + config::secret::OPNSenseFirewallCredentials, + data::Version, + infra::opnsense::OPNSenseFirewall, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::opnsense::bootstrap::{ + DEFAULT_PHYSICAL_DRIVER_PREFIXES, ensure_lan_bridge_atomic_via_ssh, + list_physical_nics_via_ssh, opnsense_ssh_shell, + }, + score::Score, +}; + +/// Score parameters shared between the standalone Score and the +/// built-in step inside `OPNsenseBootstrapScore`. +#[derive(Debug, Clone, Serialize)] +pub struct LanBridgeParams { + /// **Physical NIC names** to add to the bridge (e.g. + /// `["igc0","igc2","igc3","igc4"]` or `["vtnet0"]` in a VM). The + /// Score translates each one to a logical interface name before + /// sending it to OPNsense's bridge model — unassigned NICs are + /// auto-promoted to the next free `optN` slot. + /// + /// Including the WAN port (whatever NIC backs ``) + /// is rejected with a hard error. + /// + /// `None` triggers an interactive `inquire::MultiSelect` over the + /// firewall's physical NICs (WAN excluded), each annotated with + /// its current logical assignment ("igc0 [lan]", "igc2 + /// [unassigned]", …). + pub members: Option>, + /// Bridge description (canonical identity for idempotency match). + pub description: String, + /// Optional MTU. Written to `` since the + /// OPNsense bridge model has no MTU field of its own. + pub mtu: Option, + /// Spanning Tree Protocol. Default `false` for point-to-point pico + /// DC (no redundant paths → no loops → STP just adds CPU overhead). + pub enable_stp: bool, + /// When `true`, re-point `` at the new bridge + /// after creation. Default `true`. + pub reassign_lan: bool, + /// When `true`, write opinionated `net.link.bridge.*` sysctls and + /// disable TSO/LRO globally. Default `true`. Required for any + /// reasonable bridge performance on low-CPU hardware. + pub perf_tunables: bool, +} + +impl Default for LanBridgeParams { + fn default() -> Self { + Self { + members: None, + description: "LAN bridge".to_string(), + mtu: None, + enable_stp: false, + reassign_lan: true, + perf_tunables: true, + } + } +} + +/// Result of running the bridge step. +#[derive(Debug, Clone)] +pub enum BridgeOutcome { + /// Bridge did not exist before; we created it. + Created { + bridgeif: String, + members: Vec, + }, + /// A matching bridge already existed; we wrote-through to ensure + /// drift convergence (the REST API treats this as a noop when the + /// payload matches what's already stored). + Updated { + bridgeif: String, + members: Vec, + }, +} + +/// Shared implementation of the LAN-bridge step. +/// +/// Used by both [`OPNsenseLanBridgeScore`] and +/// [`OPNsenseBootstrapScore`](super::bootstrap_score::OPNsenseBootstrapScore). +/// Both callers pass the same `LanBridgeParams` so the behaviour stays +/// in lockstep — there is no second implementation to drift. +pub async fn ensure_lan_bridge_step( + config: &opnsense_config::Config, + ssh_ip: &std::net::IpAddr, + ssh_user: &str, + ssh_pass: &str, + params: &LanBridgeParams, + tag: &str, +) -> Result { + // ── 1. Resolve physical NICs ─────────────────────────────────── + let physical_members = match ¶ms.members { + Some(ms) if !ms.is_empty() => ms.clone(), + Some(_) => { + return Err(InterpretError::new( + "OPNsenseLanBridgeScore: explicit `members` list is empty".into(), + )); + } + None => prompt_bridge_members(ssh_ip, ssh_user, ssh_pass, tag).await?, + }; + if physical_members.is_empty() { + return Err(InterpretError::new( + "OPNsenseLanBridgeScore: no bridge members selected".into(), + )); + } + + // ── 1a. Reject WAN ────────────────────────────────────────────── + let wan_phys = read_iface_if_via_ssh(ssh_ip, ssh_user, ssh_pass, "wan") + .await + .unwrap_or_default(); + if !wan_phys.is_empty() { + for phys in &physical_members { + if phys == &wan_phys { + return Err(InterpretError::new(format!( + "{phys} is the WAN port (interfaces.wan.if); refusing to add it \ + to a LAN bridge. Drop it from `members` and re-run." + ))); + } + } + } + + // ── 1b. Performance tunables (BEFORE bridge create) ───────────── + // `net.link.bridge.inherit_mac=1` only applies to bridges that + // attach a member AFTER the sysctl is set. If we ensure the bridge + // first, bridge0 has its own auto-generated MAC and the host's + // ARP/L2 path is silently broken once LAN's IP moves over. Set + // the sysctls first; the bridge then inherits the first member's + // MAC on creation. The other three sysctls (pfil_*) are pf-related + // and ordering-insensitive, but moving them too keeps the block + // atomic. + if params.perf_tunables { + ensure_bridge_sysctls(config, tag).await?; + ensure_offloads_disabled(config, tag).await?; + } else { + info!("{tag} perf_tunables=false; skipping bridge sysctls and offload toggles"); + } + + info!( + "{tag} Atomic bridge-save: descr=\"{}\", physical_members={:?}, reassign_lan={}", + params.description, physical_members, params.reassign_lan + ); + + // ── 2. Atomic resolve + bridge + (optional) LAN reassignment ── + // The helper takes physical NIC names and does the + // physical→logical resolution INSIDE one PHP `Config::save()` so + // every change (new OPT entries, bridge entry, lan.if=bridgeN) + // lands atomically. Splitting into separate steps creates a window + // either with bridge having no kernel members (circular `lan` + // reference when reassign_lan=true) or with vtnet0 already a + // bridge member while lan still claims it. See + // `crate::modules::opnsense::bootstrap::ensure_lan_bridge_atomic_via_ssh` + // for the full rationale and the rules for which members get a + // dedicated OPT slot vs. reusing their existing logical name. + let outcome = ensure_lan_bridge_atomic_via_ssh( + ssh_ip, + ssh_user, + ssh_pass, + &physical_members, + ¶ms.description, + params.enable_stp, + params.reassign_lan, + params.mtu, + ) + .await + .map_err(|e| InterpretError::new(format!("atomic LAN-bridge save failed: {e}")))?; + let bridgeif = outcome.bridgeif().to_string(); + info!( + "{tag} Bridge `{bridgeif}` {} (reassign_lan={})", + if outcome.was_created() { + "created" + } else { + "updated" + }, + params.reassign_lan + ); + + Ok(if outcome.was_created() { + BridgeOutcome::Created { + bridgeif, + members: physical_members, + } + } else { + BridgeOutcome::Updated { + bridgeif, + members: physical_members, + } + }) +} + +// ─── Private helpers ─────────────────────────────────────────────────── + +async fn prompt_bridge_members( + ip: &std::net::IpAddr, + user: &str, + pass: &str, + tag: &str, +) -> Result, InterpretError> { + info!("{tag} Enumerating physical NICs to offer for bridge membership"); + let nics = list_physical_nics_via_ssh(ip, user, pass, DEFAULT_PHYSICAL_DRIVER_PREFIXES) + .await + .map_err(|e| InterpretError::new(format!("physical-NIC enumeration failed: {e}")))?; + if nics.is_empty() { + return Err(InterpretError::new( + "no physical NICs detected via `ifconfig -l ether` — extend \ + DEFAULT_PHYSICAL_DRIVER_PREFIXES if your hardware uses an exotic driver" + .into(), + )); + } + + // Read current logical assignments so we can annotate each NIC. + let assignments = list_logical_interfaces_via_ssh(ip, user, pass) + .await + .unwrap_or_default(); + // Reverse map: physical NIC → logical name (e.g. "vtnet0" → "lan"). + let phys_to_logical: std::collections::HashMap = assignments + .iter() + .filter(|(_, phys)| !phys.is_empty()) + .map(|(name, phys)| (phys.clone(), name.clone())) + .collect(); + let wan_phys = assignments + .iter() + .find(|(name, _)| name == "wan") + .map(|(_, phys)| phys.clone()) + .unwrap_or_default(); + + // Drop the WAN port from the candidate list entirely. Anything else + // is a candidate, whether already assigned (will reuse the existing + // logical name) or unassigned (will get a new opt slot during the + // resolution step in `ensure_lan_bridge_step`). + let candidates: Vec<(String, String)> = nics + .into_iter() + .filter(|(name, _mac)| wan_phys.is_empty() || name != &wan_phys) + .map(|(name, _mac)| { + let annotation = phys_to_logical + .get(&name) + .map(String::as_str) + .unwrap_or("unassigned"); + let display = format!("{name} [{annotation}]"); + (display, name) + }) + .collect(); + if candidates.is_empty() { + return Err(InterpretError::new( + "no eligible bridge members left after filtering out the WAN port".into(), + )); + } + + let display_refs: Vec<&str> = candidates.iter().map(|(d, _)| d.as_str()).collect(); + let selected = inquire::MultiSelect::new( + "Select physical NICs to bridge for LAN (WAN excluded; unassigned will get a new OPT slot):", + display_refs, + ) + .prompt() + .map_err(|e| InterpretError::new(format!("interactive bridge-member selection failed: {e}")))?; + + Ok(candidates + .iter() + .filter(|(display, _)| selected.contains(&display.as_str())) + .map(|(_, name)| name.clone()) + .collect()) +} + +/// Enumerate all logical interfaces from `` plus their +/// backing physical NIC (``). Returns `[(logical_name, +/// physical_if), ...]` — e.g. `[("wan","vtnet1"), ("lan","vtnet0"), +/// ("opt1","igc2"), ...]`. Used by the interactive `MultiSelect` +/// prompt; the display shows both for clarity. +async fn list_logical_interfaces_via_ssh( + ip: &std::net::IpAddr, + user: &str, + pass: &str, +) -> Result, String> { + use opnsense_config::config::OPNsenseShell; + let shell = opnsense_ssh_shell(*ip, user, pass); + // Plain `name=if` pairs, one per line. tcsh-friendly: no inline `if/then/else`. + // NOTE on backslashes: shell single-quotes preserve `\` literally, so a + // single backslash in the Rust source IS what PHP parses. Doubling + // them produced `OPNsense\\Core\\Config` in PHP source which is a + // parse error (two consecutive separators), making `php -r` exit + // silently with empty stdout — caller can't tell apart "field missing" + // from "script never ran". + let php = "php -r 'require \"/usr/local/etc/inc/config.inc\"; \ + foreach (OPNsense\\Core\\Config::getInstance()->object()->interfaces->children() as $k => $v) { \ + echo $k . \"=\" . ((string)$v->if) . \"\\n\"; \ + }'"; + let out = shell + .exec(php) + .await + .map_err(|e| format!("ssh exec: {e}"))?; + let pairs = out + .lines() + .filter_map(|line| { + let line = line.trim(); + if line.is_empty() { + return None; + } + let (k, v) = line.split_once('=')?; + Some((k.trim().to_string(), v.trim().to_string())) + }) + .collect(); + Ok(pairs) +} + +/// Read `<{name}>` over SSH via PHP+SimpleXML through +/// the `Config` singleton (no manual config.xml edits). Returns the +/// physical NIC name bound to the named logical interface — e.g. +/// `"vtnet1"` for `wan`, `"bridge0"` for `lan` after reassignment. +async fn read_iface_if_via_ssh( + ip: &std::net::IpAddr, + user: &str, + pass: &str, + iface_name: &str, +) -> Result { + use opnsense_config::config::OPNsenseShell; + let shell = opnsense_ssh_shell(*ip, user, pass); + // Single `\` between namespace segments — shell single-quotes preserve + // backslashes literally, so this reaches PHP as `OPNsense\Core\Config`. + let php = format!( + "php -r 'require \"/usr/local/etc/inc/config.inc\"; \ + echo (string)OPNsense\\Core\\Config::getInstance()->object()->interfaces->{iface_name}->if;'" + ); + let out = shell + .exec(&php) + .await + .map_err(|e| format!("ssh exec: {e}"))?; + Ok(out.trim().to_string()) +} + +/// Write the four `net.link.bridge.*` sysctls through OPNsense's +/// `/api/core/tunables/*` endpoints — idempotent (no rewrite when the +/// value already matches). +async fn ensure_bridge_sysctls( + config: &opnsense_config::Config, + tag: &str, +) -> Result<(), InterpretError> { + const SYSCTLS: &[(&str, &str, &str)] = &[ + ( + "net.link.bridge.pfil_member", + "0", + "harmony: bridge perf — do not pf on member NICs", + ), + ( + "net.link.bridge.pfil_bridge", + "1", + "harmony: bridge perf — pf on bridge interface only", + ), + ( + "net.link.bridge.pfil_local_phys", + "0", + "harmony: bridge perf — do not pf local traffic on members", + ), + ( + "net.link.bridge.inherit_mac", + "1", + "harmony: bridge inherits MAC of first member", + ), + ]; + + let client = config.client(); + let mut changed = 0usize; + for (tunable, value, descr) in SYSCTLS { + // Search for an existing row with this tunable name. + let search: serde_json::Value = client + .post_typed( + "core", + "tunables", + "searchItem", + Some(&serde_json::json!({ "searchPhrase": tunable })), + ) + .await + .map_err(|e| InterpretError::new(format!("tunable searchItem({tunable}): {e}")))?; + + let existing = search["rows"].as_array().and_then(|rows| { + rows.iter() + .find(|r| r["tunable"].as_str() == Some(*tunable)) + }); + + let body = serde_json::json!({ + "sysctl": { "tunable": tunable, "value": value, "descr": descr }, + }); + + match existing { + Some(row) => { + let uuid = row["uuid"].as_str().unwrap_or("").to_string(); + let cur_value = row["value"].as_str().unwrap_or("").to_string(); + if cur_value == *value { + continue; + } + let _: serde_json::Value = client + .post_typed("core", "tunables", &format!("setItem/{uuid}"), Some(&body)) + .await + .map_err(|e| InterpretError::new(format!("tunable setItem({tunable}): {e}")))?; + changed += 1; + } + None => { + let _: serde_json::Value = client + .post_typed("core", "tunables", "addItem", Some(&body)) + .await + .map_err(|e| InterpretError::new(format!("tunable addItem({tunable}): {e}")))?; + changed += 1; + } + } + } + + if changed > 0 { + let _: serde_json::Value = client + .post_typed("core", "tunables", "reconfigure", None::<&()>) + .await + .map_err(|e| InterpretError::new(format!("tunables reconfigure: {e}")))?; + info!("{tag} Wrote {changed} bridge sysctl(s) and reconfigured tunables"); + } else { + info!("{tag} NOOP — all 4 bridge sysctls already match desired values"); + } + Ok(()) +} + +async fn ensure_offloads_disabled( + config: &opnsense_config::Config, + tag: &str, +) -> Result<(), InterpretError> { + let changed = config + .interface_settings() + .ensure_offloads_disabled() + .await + .map_err(|e| InterpretError::new(format!("offload toggles: {e}")))?; + if changed { + info!("{tag} Disabled hardware TSO + LRO offloads globally"); + } else { + info!("{tag} NOOP — TSO + LRO already disabled globally"); + } + Ok(()) +} + +// ─── Standalone Score ────────────────────────────────────────────────── + +/// Standalone Score over [`OPNSenseFirewall`] — composes the same +/// [`ensure_lan_bridge_step`] used internally by +/// [`OPNsenseBootstrapScore`](super::bootstrap_score::OPNsenseBootstrapScore). +#[derive(Debug, Clone, Default, Serialize)] +pub struct OPNsenseLanBridgeScore { + pub params: LanBridgeParams, +} + +impl Score for OPNsenseLanBridgeScore { + fn name(&self) -> String { + "OPNsenseLanBridgeScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OPNsenseLanBridgeInterpret { + score: self.clone(), + }) + } +} + +#[derive(Debug)] +struct OPNsenseLanBridgeInterpret { + score: OPNsenseLanBridgeScore, +} + +#[async_trait] +impl Interpret for OPNsenseLanBridgeInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &OPNSenseFirewall, + ) -> Result { + let ip: std::net::IpAddr = topology.get_ip(); + let tag = format!("[OPNsenseLanBridge/{ip}]"); + + let config = topology.get_opnsense_config(); + let ssh_creds = SecretManager::get::() + .await + .map_err(|e| { + InterpretError::new(format!( + "OPNsenseLanBridgeScore needs OPNSenseFirewallCredentials in SecretManager \ + (run OPNsenseBootstrapScore first): {e}" + )) + })?; + + let outcome = ensure_lan_bridge_step( + &config, + &ip, + &ssh_creds.username, + &ssh_creds.password, + &self.score.params, + &tag, + ) + .await?; + + let message = match &outcome { + BridgeOutcome::Created { bridgeif, members } => { + format!("Created bridge {bridgeif} with {} member(s)", members.len()) + } + BridgeOutcome::Updated { bridgeif, members } => { + format!("Updated bridge {bridgeif} ({} member(s))", members.len()) + } + }; + Ok(Outcome::success(message)) + } + + fn get_name(&self) -> InterpretName { + InterpretName::OPNsenseLanBridge + } + + fn get_version(&self) -> Version { + Version::from("1.0.0").unwrap() + } + + fn get_status(&self) -> InterpretStatus { + InterpretStatus::QUEUED + } + + fn get_children(&self) -> Vec { + vec![] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_score_name() { + let s = OPNsenseLanBridgeScore::default(); + assert_eq!( + >::name(&s), + "OPNsenseLanBridgeScore" + ); + } + + #[test] + fn test_score_serializes() { + let s = OPNsenseLanBridgeScore::default(); + let _: serde_value::Value = + serde_value::to_value(&s).expect("OPNsenseLanBridgeScore should serialize"); + } + + #[test] + fn test_default_params() { + let p = LanBridgeParams::default(); + assert_eq!(p.description, "LAN bridge"); + assert!(!p.enable_stp); + assert!(p.reassign_lan); + assert!(p.perf_tunables); + assert!(p.members.is_none()); + assert!(p.mtu.is_none()); + } +} diff --git a/harmony/src/modules/opnsense/mod.rs b/harmony/src/modules/opnsense/mod.rs index 1bac1f74..b53a529d 100644 --- a/harmony/src/modules/opnsense/mod.rs +++ b/harmony/src/modules/opnsense/mod.rs @@ -1,9 +1,14 @@ pub mod bootstrap; +pub mod bootstrap_score; pub mod dnat; pub mod firewall; +pub mod firmware_upgrade; pub mod image; pub mod lagg; +pub mod lan_bridge; pub mod node_exporter; +pub mod package_install; +pub mod pin_nic_names; mod shell; mod upgrade; pub mod vip; diff --git a/harmony/src/modules/opnsense/package_install.rs b/harmony/src/modules/opnsense/package_install.rs new file mode 100644 index 00000000..12db3624 --- /dev/null +++ b/harmony/src/modules/opnsense/package_install.rs @@ -0,0 +1,184 @@ +//! `OPNsensePackageInstallScore` — install one or more OPNsense plugin / +//! package via the REST API, idempotently. +//! +//! The Score is a thin wrapper around `opnsense_config::Config::install_package` +//! (the low-level method). It does two things on top of the bare call: +//! +//! 1. **Idempotency** — per package, skips the install when +//! `is_package_installed` already reports it present. +//! 2. **Score composition** — fits in a `Vec>>` +//! so operators can build linear pipelines instead of writing try/Err glue. +//! +//! Intentionally has **no** firmware-upgrade fallback. If the package fails to +//! install because the firmware is stale, the underlying `install_package` +//! returns a clear error that points the operator at +//! [`OPNsenseFirmwareUpgradeScore`](crate::modules::opnsense::firmware_upgrade::OPNsenseFirmwareUpgradeScore). +//! Compose that Score earlier in your pipeline if you want firmware-current +//! before plugin installs: +//! +//! ```ignore +//! vec![ +//! Box::new(OPNsenseFirmwareUpgradeScore { mode: Auto, api_port: 9443 }), +//! Box::new(OPNsensePackageInstallScore { +//! packages: vec!["os-haproxy".into()], +//! }), +//! // ... other Score ... +//! ] +//! ``` + +use async_trait::async_trait; +use harmony_types::id::Id; +use log::info; +use serde::Serialize; + +use crate::{ + data::Version, + infra::opnsense::OPNSenseFirewall, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + score::Score, +}; + +/// Install one or more OPNsense packages / plugins (e.g. `os-haproxy`). +/// +/// See module-level docs. +#[derive(Debug, Clone, Serialize)] +pub struct OPNsensePackageInstallScore { + /// Package names to install, in order. + pub packages: Vec, +} + +impl Score for OPNsensePackageInstallScore { + fn name(&self) -> String { + "OPNsensePackageInstallScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OPNsensePackageInstallInterpret { + score: self.clone(), + }) + } +} + +#[derive(Debug)] +struct OPNsensePackageInstallInterpret { + score: OPNsensePackageInstallScore, +} + +#[async_trait] +impl Interpret for OPNsensePackageInstallInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &OPNSenseFirewall, + ) -> Result { + let firewall_ip = topology.get_ip().to_string(); + let tag = format!("[OPNsensePackageInstall/{firewall_ip}]"); + let config = topology.get_opnsense_config(); + + if self.score.packages.is_empty() { + info!("{tag} No packages requested; nothing to do"); + return Ok(Outcome::noop("No packages requested".to_string())); + } + + let mut already_installed: Vec = Vec::new(); + let mut newly_installed: Vec = Vec::new(); + + for pkg in &self.score.packages { + if config.is_package_installed(pkg).await { + info!("{tag} {pkg}: already installed; skipping"); + already_installed.push(pkg.clone()); + continue; + } + info!("{tag} Installing {pkg} ..."); + config.install_package(pkg).await.map_err(|e| { + InterpretError::new(format!( + "Failed to install OPNsense package '{pkg}' on {firewall_ip}: {e}" + )) + })?; + info!("{tag} {pkg}: installed successfully"); + newly_installed.push(pkg.clone()); + } + + let total = self.score.packages.len(); + let details = vec![ + format!( + "Newly installed ({}): {:?}", + newly_installed.len(), + newly_installed + ), + format!( + "Already installed, skipped ({}): {:?}", + already_installed.len(), + already_installed + ), + ]; + + if newly_installed.is_empty() { + Ok(Outcome::noop(format!( + "All {total} package(s) already installed on {firewall_ip}" + ))) + } else { + Ok(Outcome::success_with_details( + format!( + "Installed {} of {total} packages on {firewall_ip} ({} already present)", + newly_installed.len(), + already_installed.len(), + ), + details, + )) + } + } + + fn get_name(&self) -> InterpretName { + InterpretName::OPNsensePackageInstall + } + + fn get_version(&self) -> Version { + Version::from("1.0.0").unwrap() + } + + fn get_status(&self) -> InterpretStatus { + InterpretStatus::QUEUED + } + + fn get_children(&self) -> Vec { + vec![] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_score_name() { + let s = OPNsensePackageInstallScore { + packages: vec!["os-haproxy".into()], + }; + assert_eq!( + >::name(&s), + "OPNsensePackageInstallScore" + ); + } + + #[test] + fn test_score_serializes() { + let s = OPNsensePackageInstallScore { + packages: vec!["os-haproxy".into(), "os-zerotier".into()], + }; + let _: serde_value::Value = + serde_value::to_value(&s).expect("OPNsensePackageInstallScore should serialize"); + } + + #[test] + fn test_empty_package_list_is_valid() { + let s = OPNsensePackageInstallScore { packages: vec![] }; + // Just confirm name + serialize still work with no packages. + assert_eq!( + >::name(&s), + "OPNsensePackageInstallScore" + ); + let _: serde_value::Value = serde_value::to_value(&s).unwrap(); + } +} diff --git a/harmony/src/modules/opnsense/pin_nic_names.rs b/harmony/src/modules/opnsense/pin_nic_names.rs new file mode 100644 index 00000000..d127949b --- /dev/null +++ b/harmony/src/modules/opnsense/pin_nic_names.rs @@ -0,0 +1,353 @@ +//! `OPNsensePinNicNamesScore` — pin physical NIC names to MAC addresses. +//! +//! # Why this exists +//! +//! On multi-NIC FreeBSD/OPNsense boxes (e.g. Wize 5070), PCIe/driver +//! enumeration order at boot is non-deterministic. `igc0/igc1/igc2/...` +//! shuffle between reboots, and OPNsense's logical `wan`/`lan` +//! assignments — bound to interface *names* — silently re-point at +//! whatever physical port that name happens to be on a given boot. +//! Firewall rules then apply to the wrong cables. +//! +//! ## Background reading +//! +//! * OPNsense forum, [Persistent NIC ordering/naming based on MAC +//! address(es)](https://forum.opnsense.org/index.php?topic=27023.0) +//! — the canonical thread describing the problem and franco +//! (OPNsense lead dev)'s endorsement of the `ethname` workaround. +//! * FreeBSD forums, [How to associate an interface name with its +//! MAC?](https://forums.freebsd.org/threads/how-to-associate-an-interface-name-with-its-mac.89337/) +//! — broader FreeBSD context for the same enumeration issue. +//! * GitHub [eborisch/ethname](https://github.com/eborisch/ethname) +//! — upstream repository (single 280-line POSIX shell script, MIT, +//! © Eric Borisch 2016–2019, frozen at v2.0.1 in March 2020). +//! * FreeBSD ports: [sysutils/ethname on +//! FreshPorts](https://www.freshports.org/sysutils/ethname/). +//! +//! # What it does +//! +//! Drops the vendored `ethname` rc.d script + an early-boot syshook +//! + a `/etc/rc.conf.d/ethname` mapping file onto the firewall, all +//! over SSH. On the next boot, `ethname` performs a two-stage +//! interface rename before `netif` so each MAC address always gets +//! the same interface name regardless of PCIe enumeration order. +//! +//! The script is vendored inline (see +//! [`crate::modules::opnsense::bootstrap::ETHNAME_SCRIPT`]) rather +//! than installed via `pkg install ethname` — `pkg install` on a +//! fresh ISO often fails because the firmware lags the live pkg +//! repo, and the firmware-upgrade reboot is precisely the boot we +//! need to defend against. Vendoring sidesteps the chicken-and-egg. +//! +//! # Two ways to use this +//! +//! * **Automatic.** [`OPNsenseBootstrapScore`](super::bootstrap_score::OPNsenseBootstrapScore) +//! composes [`pin_nic_names_step`] internally as a mandatory built-in +//! step. Every firewall bootstrapped through harmony gets pinned NIC +//! names without the caller asking for it. +//! * **Standalone.** [`OPNsensePinNicNamesScore`] is a Score in its own +//! right — drop it into a `Vec>>` +//! when re-pinning a firewall whose NICs you've shuffled, or when +//! running the step in isolation. + +use async_trait::async_trait; +use harmony_types::id::Id; +use log::{info, warn}; +use serde::Serialize; + +use crate::{ + data::Version, + interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, + inventory::Inventory, + modules::opnsense::bootstrap::{ + DEFAULT_PHYSICAL_DRIVER_PREFIXES, ETHNAME_SCRIPT, install_ethname_via_ssh, + list_physical_nics_via_ssh, read_ethname_mac_set_via_ssh, + }, + score::Score, + topology::OPNsenseBootstrapTopology, +}; + +/// Result of running the pin step. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PinOutcome { + /// Wrote `/etc/rc.conf.d/ethname` and friends. The listed pairs + /// take effect at the next reboot. + Pinned { pairs: Vec<(String, String)> }, + /// `/etc/rc.conf.d/ethname` already pinned the same MAC set we + /// just observed; nothing to do. + AlreadyCurrent { mac_count: usize }, + /// `ifconfig -l ether` returned no candidates matching the driver + /// prefix allowlist. Pinning is silently skipped (the caller + /// decides whether that's an error in context). + NoPhysicalNics, +} + +/// Shared implementation of the NIC-name pin step. +/// +/// Used both by [`OPNsensePinNicNamesScore`] (when run as a standalone +/// Score) and by [`OPNsenseBootstrapScore`](super::bootstrap_score::OPNsenseBootstrapScore) +/// as a built-in mandatory step. The two callers share this function +/// verbatim so the behaviour stays in lockstep — there is no second +/// implementation to drift. +/// +/// Logs progress with the provided `tag` so callers can scope log +/// lines (e.g. `[OPNsenseBootstrap/192.168.1.1]` vs +/// `[OPNsensePinNicNames/192.168.1.1]`). Idempotent — re-running on a +/// firewall whose MAC set already matches the config file returns +/// [`PinOutcome::AlreadyCurrent`] without touching anything. +pub async fn pin_nic_names_step( + ip: &std::net::IpAddr, + username: &str, + password: &str, + driver_prefixes: &[&str], + tag: &str, +) -> Result { + info!("{tag} Pinning physical NIC names to MAC addresses (vendored ethname)"); + + // 1. Discover current (name, MAC) pairings. + info!("{tag} Enumerating physical NICs via `ifconfig -l ether`"); + let pairs = list_physical_nics_via_ssh(ip, username, password, driver_prefixes) + .await + .map_err(|e| { + InterpretError::new(format!("Failed to enumerate physical NICs over SSH: {e}")) + })?; + + if pairs.is_empty() { + warn!( + "{tag} No physical NICs matched the driver-prefix allowlist. \ + If this is unexpected, the firewall's NIC driver may be missing \ + from DEFAULT_PHYSICAL_DRIVER_PREFIXES." + ); + return Ok(PinOutcome::NoPhysicalNics); + } + + info!( + "{tag} Discovered {} physical NIC(s): {}", + pairs.len(), + pairs + .iter() + .map(|(n, m)| format!("{n}={m}")) + .collect::>() + .join(", ") + ); + + // 2. Idempotency probe. + info!("{tag} Checking for existing /etc/rc.conf.d/ethname"); + let live_mac_set: std::collections::BTreeSet = + pairs.iter().map(|(_, m)| m.clone()).collect(); + let existing = read_ethname_mac_set_via_ssh(ip, username, password) + .await + .map_err(|e| InterpretError::new(format!("Failed to read existing ethname config: {e}")))?; + + if let Some(ref existing_set) = existing + && *existing_set == live_mac_set + { + info!( + "{tag} NOOP — /etc/rc.conf.d/ethname already pins the current MAC set ({} MAC(s))", + existing_set.len() + ); + return Ok(PinOutcome::AlreadyCurrent { + mac_count: existing_set.len(), + }); + } + match existing.as_ref() { + Some(existing_set) => warn!( + "{tag} /etc/rc.conf.d/ethname exists with a different MAC set \ + (was {existing_set:?}, now {live_mac_set:?}); rewriting" + ), + None => info!("{tag} No prior /etc/rc.conf.d/ethname; performing first-time pin"), + } + + // 3. Install (script + config + syshook). + info!( + "{tag} Installing ethname (rc.d script + /etc/rc.conf.d/ethname \ + + early-boot syshook)" + ); + install_ethname_via_ssh(ip, username, password, ETHNAME_SCRIPT, &pairs) + .await + .map_err(|e| { + InterpretError::new(format!( + "Failed to install ethname over SSH: {e}. \ + The firewall may be partially configured — check \ + /usr/local/etc/rc.d/ethname, /etc/rc.conf.d/ethname, \ + and /usr/local/etc/rc.syshook.d/early/02-ethname." + )) + })?; + + info!( + "{tag} Pinned {} NIC(s) via vendored ethname; takes effect at next reboot", + pairs.len() + ); + Ok(PinOutcome::Pinned { pairs }) +} + +/// Pin physical NIC names to MAC addresses on a factory-fresh OPNsense. +/// +/// Targets [`OPNsenseBootstrapTopology`] so it can run against a +/// vanilla firewall using install-time defaults. +/// [`OPNsenseBootstrapScore`](super::bootstrap_score::OPNsenseBootstrapScore) +/// already runs the same logic internally — this standalone Score +/// exists for cases where you want to pin without doing the full +/// bootstrap dance (e.g. re-pinning after a hardware swap or on a +/// firewall that's already been bootstrapped by a previous run). +#[derive(Debug, Clone, Serialize)] +pub struct OPNsensePinNicNamesScore { + /// Driver-name allowlist used to filter `ifconfig -l ether` down + /// to physical NICs. The default + /// ([`DEFAULT_PHYSICAL_DRIVER_PREFIXES`]) covers common server / + /// appliance hardware. Override only on exotic drivers not in the + /// default set. + pub physical_driver_prefixes: Vec, +} + +impl Default for OPNsensePinNicNamesScore { + fn default() -> Self { + Self { + physical_driver_prefixes: DEFAULT_PHYSICAL_DRIVER_PREFIXES + .iter() + .map(|s| (*s).to_string()) + .collect(), + } + } +} + +impl Score for OPNsensePinNicNamesScore { + fn name(&self) -> String { + "OPNsensePinNicNamesScore".to_string() + } + + fn create_interpret(&self) -> Box> { + Box::new(OPNsensePinNicNamesInterpret { + score: self.clone(), + }) + } +} + +#[derive(Debug)] +struct OPNsensePinNicNamesInterpret { + score: OPNsensePinNicNamesScore, +} + +#[async_trait] +impl Interpret for OPNsensePinNicNamesInterpret { + async fn execute( + &self, + _inventory: &Inventory, + topology: &OPNsenseBootstrapTopology, + ) -> Result { + let ip = topology.vanilla_ip; + let tag = format!("[OPNsensePinNicNames/{ip}]"); + + let prefixes: Vec<&str> = self + .score + .physical_driver_prefixes + .iter() + .map(|s| s.as_str()) + .collect(); + + match pin_nic_names_step( + &ip, + &topology.default_username, + &topology.default_password, + &prefixes, + &tag, + ) + .await? + { + PinOutcome::Pinned { pairs } => { + let mut details = vec![ + "OPNsense NIC names pinned to MAC addresses.".to_string(), + String::new(), + " Pinned mapping:".to_string(), + ]; + for (name, mac) in &pairs { + details.push(format!(" {name:<8} → {mac}")); + } + details.push(String::new()); + details.push(" ethname becomes active on the next reboot.".to_string()); + + Ok(Outcome::success_with_details( + format!( + "Pinned {} NIC name(s) to MAC addresses via vendored ethname script", + pairs.len() + ), + details, + )) + } + PinOutcome::AlreadyCurrent { mac_count } => Ok(Outcome::noop(format!( + "OPNsense NIC names already pinned ({mac_count} MAC(s)); nothing to do" + ))), + PinOutcome::NoPhysicalNics => Err(InterpretError::new(format!( + "No physical NICs matched the driver-prefix allowlist {:?}. \ + Either the firewall has no NICs visible to `ifconfig -l ether`, \ + or your hardware uses a driver not in the allowlist — extend \ + `OPNsensePinNicNamesScore::physical_driver_prefixes`.", + self.score.physical_driver_prefixes + ))), + } + } + + fn get_name(&self) -> InterpretName { + InterpretName::OPNsensePinNicNames + } + + fn get_version(&self) -> Version { + Version::from("1.0.0").unwrap() + } + + fn get_status(&self) -> InterpretStatus { + InterpretStatus::QUEUED + } + + fn get_children(&self) -> Vec { + vec![] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_score_name() { + let s = OPNsensePinNicNamesScore::default(); + assert_eq!( + >::name(&s), + "OPNsensePinNicNamesScore" + ); + } + + #[test] + fn test_score_serializes() { + let s = OPNsensePinNicNamesScore::default(); + let _: serde_value::Value = + serde_value::to_value(&s).expect("OPNsensePinNicNamesScore should serialize"); + } + + #[test] + fn test_default_driver_prefixes_include_common_hardware() { + let defaults = DEFAULT_PHYSICAL_DRIVER_PREFIXES; + for required in &["igc", "igb", "em", "vtnet"] { + assert!( + defaults.iter().any(|d| d == required), + "DEFAULT_PHYSICAL_DRIVER_PREFIXES missing required entry {required:?}" + ); + } + } + + #[test] + fn test_ethname_script_embedded() { + assert!( + ETHNAME_SCRIPT.starts_with("#!/bin/sh"), + "vendored ethname.sh does not start with #!/bin/sh" + ); + assert!( + ETHNAME_SCRIPT.contains("Eric Borisch"), + "vendored ethname.sh missing upstream copyright" + ); + assert!( + ETHNAME_SCRIPT.lines().count() > 200, + "vendored ethname.sh seems truncated" + ); + } +} diff --git a/opnsense-api/src/client.rs b/opnsense-api/src/client.rs index 43d3dbd4..0211e3d9 100644 --- a/opnsense-api/src/client.rs +++ b/opnsense-api/src/client.rs @@ -405,7 +405,13 @@ impl OpnsenseClient { Ok(json) } else { let body = response.text().await.unwrap_or_default(); - warn!(target: "opnsense-api", "{} {} → HTTP {status}: {}", method, url, body); + warn!( + target: "opnsense-api", + "{} {} → HTTP {status}: {}", + method, + url, + truncate_for_log(&body) + ); Err(Error::Api { status, method: method.to_string(), @@ -415,3 +421,58 @@ impl OpnsenseClient { } } } + +/// Squeeze an HTTP response body down to one short line suitable for a +/// log message. +/// +/// OPNsense's 404 (and many other error) pages are full HTML documents; +/// dumping them verbatim into the log makes WARN lines hundreds of +/// characters across multiple lines. This keeps the first non-empty line +/// (most of the time the document's first tag, e.g. ``), +/// trims it to ≤ 200 chars, and appends "…" if anything was elided. The +/// `Error::Api { body, .. }` value passed to callers is unchanged, so +/// code that needs the full body still has it. +fn truncate_for_log(body: &str) -> std::borrow::Cow<'_, str> { + const MAX: usize = 200; + let first_line = body.lines().find(|l| !l.trim().is_empty()).unwrap_or(""); + let trimmed = first_line.trim(); + let truncated_to_first_line = trimmed.len() < body.trim().len(); + let truncated_by_length = trimmed.len() > MAX; + if !truncated_to_first_line && !truncated_by_length { + std::borrow::Cow::Borrowed(trimmed) + } else { + let cut = trimmed + .char_indices() + .nth(MAX) + .map(|(i, _)| i) + .unwrap_or(trimmed.len()); + std::borrow::Cow::Owned(format!("{}…", &trimmed[..cut])) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn truncate_short_single_line_is_unchanged() { + let body = r#"{"error":"not found"}"#; + assert_eq!(truncate_for_log(body), body); + } + + #[test] + fn truncate_html_keeps_first_line_only() { + let body = "\n\n \n 404 Not Found\n \n\n"; + let out = truncate_for_log(body); + assert_eq!(out, "…"); + } + + #[test] + fn truncate_caps_at_200_chars_with_ellipsis() { + let body = "x".repeat(500); + let out = truncate_for_log(&body); + assert!(out.ends_with('…'), "expected ellipsis suffix, got {out:?}"); + // chars() not bytes() — ellipsis is multi-byte. + assert_eq!(out.chars().count(), 201); + } +} diff --git a/opnsense-api/src/generated/bridge.rs b/opnsense-api/src/generated/bridge.rs new file mode 100644 index 00000000..0d215df8 --- /dev/null +++ b/opnsense-api/src/generated/bridge.rs @@ -0,0 +1,403 @@ +//! Auto-generated from OPNsense model XML +//! Mount: `/bridges` — Version: `1.0.0` +//! +//! **DO NOT EDIT** — produced by opnsense-codegen + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +pub mod serde_helpers { + pub mod opn_bool_req { + use serde::{Deserialize, Deserializer, Serializer}; + pub fn serialize(value: &bool, serializer: S) -> Result { + serializer.serialize_str(if *value { "1" } else { "0" }) + } + pub fn deserialize<'de, D: Deserializer<'de>>(deserializer: D) -> Result { + let v = serde_json::Value::deserialize(deserializer)?; + match &v { + serde_json::Value::String(s) => match s.as_str() { + "1" | "true" => Ok(true), + "0" | "false" => Ok(false), + other => Err(serde::de::Error::custom(format!( + "invalid required bool: {other}" + ))), + }, + serde_json::Value::Bool(b) => Ok(*b), + serde_json::Value::Number(n) => match n.as_u64() { + Some(1) => Ok(true), + Some(0) => Ok(false), + _ => Err(serde::de::Error::custom(format!( + "invalid required bool number: {n}" + ))), + }, + _ => Err(serde::de::Error::custom( + "expected string, bool, or number for required bool", + )), + } + } + } + + pub mod opn_u16 { + use serde::{Deserialize, Deserializer, Serializer}; + pub fn serialize( + value: &Option, + serializer: S, + ) -> Result { + match value { + Some(v) => serializer.serialize_str(&v.to_string()), + None => serializer.serialize_str(""), + } + } + pub fn deserialize<'de, D: Deserializer<'de>>( + deserializer: D, + ) -> Result, D::Error> { + let v = serde_json::Value::deserialize(deserializer)?; + match &v { + serde_json::Value::String(s) if s.is_empty() => Ok(None), + serde_json::Value::String(s) => { + s.parse::().map(Some).map_err(serde::de::Error::custom) + } + serde_json::Value::Number(n) => n + .as_u64() + .and_then(|n| u16::try_from(n).ok()) + .map(Some) + .ok_or_else(|| serde::de::Error::custom("number out of u16 range")), + serde_json::Value::Null => Ok(None), + _ => Err(serde::de::Error::custom( + "expected string or number for u16", + )), + } + } + } + + pub mod opn_string { + use serde::{Deserialize, Deserializer, Serializer}; + pub fn serialize( + value: &Option, + serializer: S, + ) -> Result { + match value { + Some(v) => serializer.serialize_str(v), + None => serializer.serialize_str(""), + } + } + pub fn deserialize<'de, D: Deserializer<'de>>( + deserializer: D, + ) -> Result, D::Error> { + let v = serde_json::Value::deserialize(deserializer)?; + match v { + serde_json::Value::String(s) if s.is_empty() => Ok(None), + serde_json::Value::String(s) => Ok(Some(s)), + serde_json::Value::Object(map) => { + let selected = map + .iter() + .find(|(_, v)| v.get("selected").and_then(|s| s.as_i64()).unwrap_or(0) == 1) + .map(|(k, _)| k.clone()) + .filter(|k| !k.is_empty()); + Ok(selected) + } + serde_json::Value::Null => Ok(None), + serde_json::Value::Array(_) => Ok(None), + _ => Err(serde::de::Error::custom("expected string, object, or null")), + } + } + } + + pub mod opn_csv { + use serde::{Deserialize, Deserializer, Serializer}; + pub fn serialize( + value: &Option>, + serializer: S, + ) -> Result { + match value { + Some(v) if !v.is_empty() => serializer.serialize_str(&v.join(",")), + _ => serializer.serialize_str(""), + } + } + pub fn deserialize<'de, D: Deserializer<'de>>( + deserializer: D, + ) -> Result>, D::Error> { + let v = serde_json::Value::deserialize(deserializer)?; + match v { + serde_json::Value::String(s) if s.is_empty() => Ok(None), + serde_json::Value::String(s) => Ok(Some( + s.split(',').map(|item| item.trim().to_string()).collect(), + )), + serde_json::Value::Array(arr) => { + let items: Result, _> = arr + .into_iter() + .map(|v| match v { + serde_json::Value::String(s) => Ok(s), + other => Err(serde::de::Error::custom(format!( + "expected string in array, got: {other}" + ))), + }) + .collect(); + let items = items?; + if items.is_empty() { + Ok(None) + } else { + Ok(Some(items)) + } + } + serde_json::Value::Object(map) => { + let selected: Vec = map + .into_iter() + .filter(|(_, v)| { + v.get("selected").and_then(|s| s.as_i64()).unwrap_or(0) == 1 + }) + .map(|(k, _)| k) + .filter(|k| !k.is_empty()) + .collect(); + if selected.is_empty() { + Ok(None) + } else { + Ok(Some(selected)) + } + } + serde_json::Value::Null => Ok(None), + _ => Err(serde::de::Error::custom( + "expected string, array, or object for csv field", + )), + } + } + } + + pub mod opn_map { + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + use std::collections::HashMap; + use std::fmt; + use std::marker::PhantomData; + + pub fn deserialize<'de, D, V>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + V: Deserialize<'de>, + { + struct MapOrArray(PhantomData); + + impl<'de, V: Deserialize<'de>> serde::de::Visitor<'de> for MapOrArray { + type Value = HashMap; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("a map or an empty array") + } + + fn visit_map>( + self, + mut map: A, + ) -> Result { + let mut result = HashMap::new(); + while let Some((k, v)) = map.next_entry()? { + result.insert(k, v); + } + Ok(result) + } + + fn visit_seq>( + self, + mut seq: A, + ) -> Result { + while seq.next_element::()?.is_some() {} + Ok(HashMap::new()) + } + } + + deserializer.deserialize_any(MapOrArray(PhantomData)) + } + + pub fn serialize(map: &HashMap, serializer: S) -> Result + where + S: Serializer, + V: Serialize, + { + map.serialize(serializer) + } + } +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Enums +// ═══════════════════════════════════════════════════════════════════════════ + +/// BridgeProto — Required, default `rstp`. Options: `rstp` / `stp`. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum BridgeProto { + Rstp, + Stp, + /// Preserves unrecognized wire values for safe round-tripping. + Other(String), +} + +pub(crate) mod serde_bridge_proto { + use super::BridgeProto; + use serde::{Deserialize, Deserializer, Serializer}; + + pub fn serialize( + value: &Option, + serializer: S, + ) -> Result { + serializer.serialize_str(match value { + Some(BridgeProto::Rstp) => "rstp", + Some(BridgeProto::Stp) => "stp", + Some(BridgeProto::Other(s)) => s.as_str(), + None => "", + }) + } + + pub fn deserialize<'de, D: Deserializer<'de>>( + deserializer: D, + ) -> Result, D::Error> { + let v = serde_json::Value::deserialize(deserializer)?; + match v { + serde_json::Value::String(s) => match s.as_str() { + "rstp" => Ok(Some(BridgeProto::Rstp)), + "stp" => Ok(Some(BridgeProto::Stp)), + "" => Ok(None), + other => Ok(Some(BridgeProto::Other(other.to_string()))), + }, + serde_json::Value::Object(map) => { + let selected_key = map + .iter() + .find(|(_, v)| v.get("selected").and_then(|s| s.as_i64()).unwrap_or(0) == 1) + .map(|(k, _)| k.as_str()); + match selected_key { + Some("rstp") => Ok(Some(BridgeProto::Rstp)), + Some("stp") => Ok(Some(BridgeProto::Stp)), + Some("") | None => Ok(None), + Some(other) => Ok(Some(BridgeProto::Other(other.to_string()))), + } + } + serde_json::Value::Null => Ok(None), + serde_json::Value::Array(arr) => { + let selected = arr + .iter() + .find(|v| v.get("selected").and_then(|s| s.as_i64()).unwrap_or(0) == 1) + .and_then(|v| v.get("value").and_then(|s| s.as_str())); + match selected { + Some("rstp") => Ok(Some(BridgeProto::Rstp)), + Some("stp") => Ok(Some(BridgeProto::Stp)), + Some("") | None => Ok(None), + Some(other) => Ok(Some(BridgeProto::Other(other.to_string()))), + } + } + other => Err(serde::de::Error::custom(format!( + "unexpected type for BridgeProto: {:?}", + other + ))), + } + } +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Structs +// ═══════════════════════════════════════════════════════════════════════════ + +/// Root model for `/bridges` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct Bridges { + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_map")] + pub bridged: HashMap, +} + +/// Array item for `bridged` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct BridgesBridged { + /// TextField | required | regex `^bridge[\d]+$` + #[serde(default)] + pub bridgeif: String, + + /// BridgeMemberField | required | Multiple + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_csv")] + pub members: Option>, + + /// BooleanField | optional + #[serde( + default, + with = "crate::generated::bridge::serde_helpers::opn_bool_req" + )] + pub linklocal: bool, + + /// BooleanField | optional + #[serde( + default, + with = "crate::generated::bridge::serde_helpers::opn_bool_req" + )] + pub enablestp: bool, + + /// OptionField | required | default=rstp | enum=BridgeProto + #[serde(default, with = "crate::generated::bridge::serde_bridge_proto")] + pub proto: Option, + + /// BridgeMemberField | optional | Multiple + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_csv")] + pub stp: Option>, + + /// IntegerField | optional | [6-40] + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_u16")] + pub maxage: Option, + + /// IntegerField | optional | [4-30] + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_u16")] + pub fwdelay: Option, + + /// IntegerField | optional | [1-10] + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_u16")] + pub holdcnt: Option, + + /// IntegerField | optional | min=1 + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_u16")] + pub maxaddr: Option, + + /// IntegerField | optional | min=0 + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_u16")] + pub timeout: Option, + + /// BridgeMemberField | optional (single-valued) + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_string")] + pub span: Option, + + /// BridgeMemberField | optional | Multiple + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_csv")] + pub edge: Option>, + + /// BridgeMemberField | optional | Multiple + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_csv")] + pub autoedge: Option>, + + /// BridgeMemberField | optional | Multiple + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_csv")] + pub ptp: Option>, + + /// BridgeMemberField | optional | Multiple + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_csv")] + pub autoptp: Option>, + + /// BridgeMemberField | optional | Multiple + /// (`static` is a Rust keyword — exposed via the raw identifier.) + #[serde( + default, + rename = "static", + with = "crate::generated::bridge::serde_helpers::opn_csv" + )] + pub r#static: Option>, + + /// BridgeMemberField | optional | Multiple + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_csv")] + pub private: Option>, + + /// DescriptionField | optional + #[serde(default, with = "crate::generated::bridge::serde_helpers::opn_string")] + pub descr: Option, +} + +// ═══════════════════════════════════════════════════════════════════════════ +// API Wrapper +// ═══════════════════════════════════════════════════════════════════════════ + +/// Wrapper matching the OPNsense GET response envelope. +/// `GET /api/interfaces/bridge_settings/get` returns { "bridge": { ... } } +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct BridgesResponse { + pub bridge: Bridges, +} diff --git a/opnsense-api/src/generated/bridge_settings_api.rs b/opnsense-api/src/generated/bridge_settings_api.rs new file mode 100644 index 00000000..e53be070 --- /dev/null +++ b/opnsense-api/src/generated/bridge_settings_api.rs @@ -0,0 +1,95 @@ +//! Auto-generated typed API client for OPNsense `interfaces/bridge_settings`. +//! +//! **DO NOT EDIT** — produced by opnsense-codegen + +use crate::client::OpnsenseClient; +use crate::error::Error; +use crate::response::{SearchResponse, SearchRow, StatusResponse, UuidResponse}; + +#[derive(serde::Serialize)] +struct ItemEnvelope<'a, T: serde::Serialize> { + #[serde(rename = "bridge")] + inner: &'a T, +} + +/// Typed API client for `interfaces/bridge_settings` endpoints. +pub struct BridgeSettingsApi<'a> { + client: &'a OpnsenseClient, +} + +impl<'a> BridgeSettingsApi<'a> { + pub fn new(client: &'a OpnsenseClient) -> Self { + Self { client } + } + + /// Search items. + /// + /// Returns a typed [`SearchResponse`] with [`SearchRow`] entries. + /// Use `row.label()` for the description and `row.uuid` for the ID. + pub async fn search_items(&self) -> Result, Error> { + self.client + .search_items("interfaces", "bridge_settings", "Item") + .await + } + + /// Update a item by UUID. + /// + /// Pass the model struct directly — the JSON envelope is handled automatically. + pub async fn set_item( + &self, + uuid: &str, + item: &(impl serde::Serialize + Sync), + ) -> Result { + self.client + .set_item( + "interfaces", + "bridge_settings", + "Item", + uuid, + &ItemEnvelope { inner: item }, + ) + .await + } + + /// Add a new item. + /// + /// Pass the model struct directly — the JSON envelope + /// (`{"bridge": {...}}`) is handled automatically. + pub async fn add_item( + &self, + item: &(impl serde::Serialize + Sync), + ) -> Result { + self.client + .add_item( + "interfaces", + "bridge_settings", + "Item", + &ItemEnvelope { inner: item }, + ) + .await + } + + /// Get a single item by UUID. + pub async fn get_item( + &self, + uuid: &str, + ) -> Result { + self.client + .get_item("interfaces", "bridge_settings", "Item", uuid) + .await + } + + /// Delete a item by UUID. + pub async fn del_item(&self, uuid: &str) -> Result { + self.client + .del_item("interfaces", "bridge_settings", "Item", uuid) + .await + } + + /// Execute the `reconfigure` action. + pub async fn reconfigure(&self) -> Result { + self.client + .post_typed("interfaces", "bridge_settings", "reconfigure", None::<&()>) + .await + } +} diff --git a/opnsense-api/src/generated/interfaces.rs b/opnsense-api/src/generated/interfaces.rs index e04e2215..75d3f450 100644 --- a/opnsense-api/src/generated/interfaces.rs +++ b/opnsense-api/src/generated/interfaces.rs @@ -26,6 +26,18 @@ pub enum Disablevlanhwfilter { } /// Per-variant serde for [`Disablevlanhwfilter`]. +/// +/// Wire format note: `Settings.xml` declares the options as +/// `` — the `value` attribute is the actual +/// wire code (`"0"`/`"1"`/`"2"`), not the XML element name. Confirmed +/// via `BaseModel::parseOptionData` (vendor source). +/// +/// On GET, OPNsense returns the `BaseListField::getNodeOptions()` +/// select-widget structure. Because the option keys are numerical +/// strings (`"0"`/`"1"`/`"2"`), PHP's `json_encode` collapses them to +/// a JSON **array** rather than an object — so the array index IS the +/// wire code. The deserializer handles both shapes plus the plain-string +/// fast path used by `setItem` round-trips. pub(crate) mod serde_disablevlanhwfilter { use super::Disablevlanhwfilter; use log::debug; @@ -36,9 +48,9 @@ pub(crate) mod serde_disablevlanhwfilter { serializer: S, ) -> Result { serializer.serialize_str(match value { - Some(Disablevlanhwfilter::EnableVlanHardwareFiltering) => "opt0", - Some(Disablevlanhwfilter::DisableVlanHardwareFiltering) => "opt1", - Some(Disablevlanhwfilter::LeaveDefault) => "opt2", + Some(Disablevlanhwfilter::EnableVlanHardwareFiltering) => "0", + Some(Disablevlanhwfilter::DisableVlanHardwareFiltering) => "1", + Some(Disablevlanhwfilter::LeaveDefault) => "2", None => "", }) } @@ -48,19 +60,44 @@ pub(crate) mod serde_disablevlanhwfilter { ) -> Result, D::Error> { let v = serde_json::Value::deserialize(deserializer)?; debug!("Disablevlanhwfilter deserializing {v}"); - match v { - serde_json::Value::String(s) => match s.as_str() { - "opt0" => Ok(Some(Disablevlanhwfilter::EnableVlanHardwareFiltering)), - "opt1" => Ok(Some(Disablevlanhwfilter::DisableVlanHardwareFiltering)), - "opt2" => Ok(Some(Disablevlanhwfilter::LeaveDefault)), + fn from_key(key: &str) -> Result, E> { + match key { + "0" => Ok(Some(Disablevlanhwfilter::EnableVlanHardwareFiltering)), + "1" => Ok(Some(Disablevlanhwfilter::DisableVlanHardwareFiltering)), + "2" => Ok(Some(Disablevlanhwfilter::LeaveDefault)), "" => Ok(None), - other => Err(serde::de::Error::custom(format!( + other => Err(E::custom(format!( "unknown Disablevlanhwfilter variant: {other}" ))), - }, + } + } + match v { + serde_json::Value::String(s) => from_key(s.as_str()), serde_json::Value::Null => Ok(None), + // Object form: `{"0": {value:..., selected:0/1}, "1": {...}, ...}`. + // The map key IS the wire code. + serde_json::Value::Object(map) => { + let selected_key = map + .iter() + .find(|(_, v)| v.get("selected").and_then(|s| s.as_i64()).unwrap_or(0) == 1) + .map(|(k, _)| k.as_str()) + .unwrap_or(""); + from_key(selected_key) + } + // Array form (what OPNsense actually returns for this field — + // PHP's `json_encode` collapses string-numeric keys into a + // sequential JSON array). The array index IS the wire code. + serde_json::Value::Array(arr) => { + let idx = arr + .iter() + .position(|v| v.get("selected").and_then(|s| s.as_i64()).unwrap_or(0) == 1); + match idx { + Some(i) => from_key(&i.to_string()), + None => Ok(None), + } + } _ => Err(serde::de::Error::custom( - "expected string for Disablevlanhwfilter", + "expected string, object, array, or null for Disablevlanhwfilter", )), } } diff --git a/opnsense-api/src/generated/mod.rs b/opnsense-api/src/generated/mod.rs index 56e7d9e0..313cb7f4 100644 --- a/opnsense-api/src/generated/mod.rs +++ b/opnsense-api/src/generated/mod.rs @@ -2,6 +2,8 @@ //! //! Produced by `opnsense-codegen`. +pub mod bridge; +pub mod bridge_settings_api; pub mod caddy; pub mod d_nat_api; pub mod dnsmasq; diff --git a/opnsense-config/src/config/config.rs b/opnsense-config/src/config/config.rs index 8d27835d..792abe34 100644 --- a/opnsense-config/src/config/config.rs +++ b/opnsense-config/src/config/config.rs @@ -7,10 +7,11 @@ use serde::Deserialize; use crate::{ error::Error, modules::{ - caddy::CaddyConfig, dnat::DnatConfig, dnsmasq::DhcpConfigDnsMasq, - firewall::FirewallFilterConfig, lagg::LaggConfig as LaggConfigModule, - load_balancer::LoadBalancerConfig, node_exporter::NodeExporterConfig, tftp::TftpConfig, - vip::VipConfig, vlan::VlanConfig as VlanConfigModule, + bridge::BridgeConfig, caddy::CaddyConfig, dnat::DnatConfig, dnsmasq::DhcpConfigDnsMasq, + firewall::FirewallFilterConfig, interface_settings::InterfaceSettingsConfig, + lagg::LaggConfig as LaggConfigModule, load_balancer::LoadBalancerConfig, + node_exporter::NodeExporterConfig, tftp::TftpConfig, vip::VipConfig, + vlan::VlanConfig as VlanConfigModule, }, }; @@ -33,10 +34,36 @@ struct InstallResponse { msg_uuid: String, } -#[derive(Debug, Deserialize)] -struct UpgradeStatus { - #[serde(default)] - status: String, +/// Poll interval for `firmware/upgradestatus`-style task polling. +const FIRMWARE_TASK_POLL_INTERVAL: std::time::Duration = std::time::Duration::from_secs(3); + +/// Maximum attempts when polling `firmware/upgradestatus` for `"done"`. +/// 120 × 3 s = 6 min, an upper bound that's never hit in practice — the +/// install task either succeeds in seconds or fails in seconds (we surface +/// the failure via the `log` field). The ceiling guards against pathological +/// stuck-task cases. +const FIRMWARE_TASK_MAX_ATTEMPTS: u32 = 120; + +/// Single-shot probe of `/api/core/firmware/upgradestatus`. +/// +/// Returns `Some(status_json)` only when the endpoint reports +/// `status == "done"` (the task has finished). Returns `None` for every +/// other case — task still running, transient 404 (the endpoint is +/// documented as "known to be unstable" on OPNsense 26.1.6 and reliably +/// 404s when no task is registered), or any other error. +/// +/// Callers loop around this with their own timeout / interval, and +/// inspect the returned JSON (notably the `log` field) when `Some` is +/// returned. See `Config::install_package` and +/// `harmony::modules::opnsense::firmware_upgrade::wait_for_task_or_reboot`. +pub async fn check_firmware_task_done(client: &OpnsenseClient) -> Option { + match client + .get_typed::("core", "firmware", "upgradestatus") + .await + { + Ok(s) if s["status"].as_str() == Some("done") => Some(s), + _ => None, + } } impl Config { @@ -147,6 +174,14 @@ impl Config { LaggConfigModule::new(self.client.clone()) } + pub fn bridge(&self) -> BridgeConfig { + BridgeConfig::new(self.client.clone()) + } + + pub fn interface_settings(&self) -> InterfaceSettingsConfig { + InterfaceSettingsConfig::new(self.client.clone()) + } + pub fn firewall(&self) -> FirewallFilterConfig { FirewallFilterConfig::new(self.client.clone()) } @@ -177,8 +212,20 @@ impl Config { /// Install an OPNsense plugin package via the firmware API. /// - /// Triggers the install, polls for completion, and verifies the package - /// is listed as installed. + /// Triggers the install asynchronously, then polls + /// `/api/core/firmware/upgradestatus` for `status == "done"` (the same + /// pattern OPNsense's own WebUI uses for its install progress popup). + /// When the task ends, verifies via `/api/core/firmware/info` whether + /// the package actually got installed: + /// + /// - Installed → `Ok(())`. + /// - Not installed → `Err(Error::PackageInstall { … })`, with the + /// tail of `upgradestatus.log` (pkg's actual error output) embedded + /// in the message + a hint to run `OPNsenseFirmwareUpgradeScore`. + /// + /// `upgradestatus` errors are tolerated as transient (OPNsense 26.1.6 + /// release notes mark the endpoint as unstable; the WebUI traps its + /// error popup). The 120 × 3 s ceiling is the safety net. pub async fn install_package(&self, package_name: &str) -> Result<(), Error> { info!("Installing OPNsense package {package_name}"); @@ -205,44 +252,62 @@ impl Config { resp.msg_uuid ); - // Poll for completion - for _ in 0..120 { - tokio::time::sleep(std::time::Duration::from_secs(3)).await; - let status: UpgradeStatus = self + for _attempt in 0..FIRMWARE_TASK_MAX_ATTEMPTS { + tokio::time::sleep(FIRMWARE_TASK_POLL_INTERVAL).await; + let Some(status_json) = check_firmware_task_done(&self.client).await else { + continue; + }; + + // Task ended. Did it install the package? + let info: serde_json::Value = self .client - .get_typed("core", "firmware", "upgradestatus") + .get_typed("core", "firmware", "info") .await .map_err(Error::Api)?; - - if status.status == "done" { - break; + let installed = info["package"] + .as_array() + .and_then(|pkgs| { + pkgs.iter() + .find(|p| p["name"].as_str() == Some(package_name)) + }) + .and_then(|p| p["installed"].as_str()) + == Some("1"); + if installed { + info!("Package {package_name} installed successfully"); + return Ok(()); } + + // Install task ended without installing the package. Surface + // pkg's actual error output from the `log` field. + let log = status_json["log"].as_str().unwrap_or(""); + let tail: Vec<&str> = log + .lines() + .filter(|l| !l.trim().is_empty()) + .rev() + .take(8) + .collect::>() + .into_iter() + .rev() + .collect(); + let reason = if tail.is_empty() { + "(OPNsense returned no log output)".to_string() + } else { + format!("Last OPNsense log output:\n{}", tail.join("\n")) + }; + return Err(Error::PackageInstall(format!( + "OPNsense install task for {package_name} ended without installing \ + the package.\n\n{reason}\n\nThis typically means the firmware needs \ + to be brought current — run OPNsenseFirmwareUpgradeScore first, \ + then retry." + ))); } - // Verify installation - let info: serde_json::Value = self - .client - .get_typed("core", "firmware", "info") - .await - .map_err(Error::Api)?; - - let installed = info["package"] - .as_array() - .and_then(|pkgs| { - pkgs.iter() - .find(|p| p["name"].as_str() == Some(package_name)) - }) - .and_then(|p| p["installed"].as_str()) - == Some("1"); - - if installed { - info!("Package {package_name} installed successfully"); - Ok(()) - } else { - let msg = format!("Package {package_name} installation did not complete successfully"); - warn!("{msg}"); - Err(Error::PackageInstall(msg)) - } + let msg = format!( + "Package {package_name} did not appear as installed within {} seconds", + FIRMWARE_TASK_MAX_ATTEMPTS as u64 * FIRMWARE_TASK_POLL_INTERVAL.as_secs() + ); + warn!("{msg}"); + Err(Error::PackageInstall(msg)) } /// Check if a package is installed via the firmware API. diff --git a/opnsense-config/src/config/shell/ssh.rs b/opnsense-config/src/config/shell/ssh.rs index 1f82c636..3414256f 100644 --- a/opnsense-config/src/config/shell/ssh.rs +++ b/opnsense-config/src/config/shell/ssh.rs @@ -4,7 +4,6 @@ use std::{ sync::Arc, time::{SystemTime, UNIX_EPOCH}, }; -use tokio_stream::StreamExt; use async_trait::async_trait; use log::{debug, info, trace}; @@ -14,14 +13,19 @@ use russh::{ }; use russh_keys::key; use russh_sftp::client::SftpSession; -use tokio::io::AsyncWriteExt; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; use crate::{config::SshCredentials, Error}; use super::OPNsenseShell; use tokio::fs::read_dir; use tokio::fs::File; -use tokio_util::codec::{BytesCodec, FramedRead}; + +/// Local read buffer for SFTP uploads. The old `FramedRead<_, BytesCodec>` +/// path defaulted to ~8 KB chunks; each chunk became its own SFTP WRITE +/// round-trip. 256 KB collapses that to a fraction of the awaits and lets +/// `write_all` amortize over multiple in-flight protocol packets. +const UPLOAD_CHUNK_SIZE: usize = 256 * 1024; #[derive(Debug)] pub struct SshOPNSenseShell { @@ -110,18 +114,14 @@ impl OPNsenseShell for SshOPNSenseShell { let mut remote_file = sftp.create(remote_path.as_str()).await?; debug!("Writing file {remote_path:?}"); - let local_file = File::open(&local_path).await?; - let mut reader = FramedRead::new(local_file, BytesCodec::new()); - - while let Some(result) = reader.next().await { - match result { - Ok(bytes) => { - if !bytes.is_empty() { - AsyncWriteExt::write_all(&mut remote_file, &bytes).await?; - } - } - Err(e) => todo!("Error unhandled {e}"), - }; + let mut local_file = File::open(&local_path).await?; + let mut buf = vec![0u8; UPLOAD_CHUNK_SIZE]; + loop { + let n = local_file.read(&mut buf).await?; + if n == 0 { + break; + } + AsyncWriteExt::write_all(&mut remote_file, &buf[..n]).await?; } } else if entry.file_type().await?.is_dir() { let sub_source = entry.path(); @@ -129,6 +129,28 @@ impl OPNsenseShell for SshOPNSenseShell { format!("{}/{}", destination, entry.file_name().to_string_lossy()); self.upload_folder(sub_source.to_str().unwrap(), &sub_destination) .await?; + } else if entry.file_type().await?.is_symlink() { + // SFTP `create()` would dereference + copy the target, losing + // the link semantics; we instead recreate the symlink on the + // remote. Use `ln -sfn` over SSH rather than the SFTP + // SSH_FXP_SYMLINK opcode — its (path, target) argument order + // is inverted between OpenSSH server and the protocol spec, + // and `ln` has unambiguous semantics across shells. + let local_path = entry.path(); + let target = tokio::fs::read_link(&local_path).await?; + let target_str = target.to_string_lossy().to_string(); + let file_name = local_path + .file_name() + .expect("symlink entry must have a name") + .to_string_lossy(); + let remote_path = format!("{}/{}", destination, file_name); + info!("Creating remote symlink {remote_path} -> {target_str}"); + let cmd = format!( + "ln -sfn '{}' '{}'", + target_str.replace('\'', r"'\''"), + remote_path.replace('\'', r"'\''"), + ); + self.run_command(&cmd).await?; } } diff --git a/opnsense-config/src/lib.rs b/opnsense-config/src/lib.rs index 47ddb768..966a0fe7 100644 --- a/opnsense-config/src/lib.rs +++ b/opnsense-config/src/lib.rs @@ -2,5 +2,6 @@ pub mod config; pub mod error; pub mod modules; +pub use config::check_firmware_task_done; pub use config::Config; pub use error::Error; diff --git a/opnsense-config/src/modules/bridge.rs b/opnsense-config/src/modules/bridge.rs new file mode 100644 index 00000000..29b21eb3 --- /dev/null +++ b/opnsense-config/src/modules/bridge.rs @@ -0,0 +1,165 @@ +//! `BridgeConfig` — REST-API wrapper for OPNsense bridge interfaces. +//! +//! Mirrors [`crate::modules::lagg::LaggConfig`] line-for-line. The bridge +//! Score in `harmony` consumes this helper through +//! `Config::bridge().ensure_bridge(...)`. + +use log::{info, warn}; +use opnsense_api::generated::bridge::{BridgeProto, BridgesBridged}; +use opnsense_api::generated::bridge_settings_api::BridgeSettingsApi; +use opnsense_api::OpnsenseClient; + +use crate::Error; + +pub struct BridgeConfig { + client: OpnsenseClient, +} + +impl BridgeConfig { + pub(crate) fn new(client: OpnsenseClient) -> Self { + Self { client } + } + + fn api(&self) -> BridgeSettingsApi<'_> { + BridgeSettingsApi::new(&self.client) + } + + /// List all bridges currently configured. + pub async fn list_bridges(&self) -> Result, Error> { + let resp: opnsense_api::generated::bridge::BridgesResponse = self + .client + .get_typed("interfaces", "bridge_settings", "get") + .await + .map_err(Error::Api)?; + + let entries = resp + .bridge + .bridged + .into_iter() + .map(|(uuid, v)| { + let members = v + .members + .unwrap_or_default() + .into_iter() + .filter(|s| !s.is_empty()) + .collect(); + BridgeEntry { + uuid, + bridgeif: v.bridgeif, + members, + enablestp: v.enablestp, + description: v.descr.unwrap_or_default(), + } + }) + .collect(); + Ok(entries) + } + + /// Ensure a bridge exists with the given members. + /// + /// Idempotency: first match by `description` (canonical identity), then + /// fall back to a sorted-member-set match. If found, the entry is + /// updated in place via `set_item`; otherwise a fresh one is created. + /// `reconfigure` runs after the write. + /// + /// Returns `(uuid, bridgeif)` — the bridge name (`bridge0`, `bridge1`, + /// …) is assigned by OPNsense on create, so we re-read after `add_item`. + pub async fn ensure_bridge( + &self, + members: &[String], + description: &str, + enable_stp: bool, + ) -> Result<(String, String), Error> { + let existing = self.list_bridges().await?; + + let mut sorted_members: Vec = members.to_vec(); + sorted_members.sort(); + + // `proto` is Required="Y" in Bridge.xml — always send rstp; OPNsense + // honours `enablestp=0` as the off switch regardless of `proto`. + let bridge = BridgesBridged { + members: Some(members.to_vec()), + descr: Some(description.to_string()), + enablestp: enable_stp, + proto: Some(BridgeProto::Rstp), + ..Default::default() + }; + + if let Some(entry) = existing.iter().find(|b| { + if b.description == description { + return true; + } + let mut em = b.members.clone(); + em.sort(); + em == sorted_members + }) { + if entry.description != description || entry.enablestp != enable_stp || { + let mut em = entry.members.clone(); + em.sort(); + em != sorted_members + } { + warn!( + "Bridge {} (uuid={}) config differs — updating", + entry.bridgeif, entry.uuid + ); + } else { + info!( + "Bridge {} (uuid={}) already matches, updating to ensure consistency", + entry.bridgeif, entry.uuid + ); + } + self.api() + .set_item(&entry.uuid, &bridge) + .await + .map_err(Error::Api)?; + self.api().reconfigure().await.map_err(Error::Api)?; + return Ok((entry.uuid.clone(), entry.bridgeif.clone())); + } + + info!( + "Creating bridge with members {:?}, description \"{description}\"", + members + ); + let resp = self.api().add_item(&bridge).await.map_err(Error::Api)?; + self.api().reconfigure().await.map_err(Error::Api)?; + + // OPNsense assigns the `bridgeif` (e.g. `bridge0`) at create time; + // re-list to learn it. + let after = self.list_bridges().await?; + let bridgeif = after + .iter() + .find(|e| e.uuid == resp.uuid) + .map(|e| e.bridgeif.clone()) + .ok_or_else(|| { + Error::Unexpected(format!( + "Bridge {} added but not found in subsequent list", + resp.uuid + )) + })?; + Ok((resp.uuid, bridgeif)) + } + + /// Remove a bridge by UUID. + pub async fn remove_bridge(&self, uuid: &str) -> Result<(), Error> { + info!("Deleting bridge {uuid}"); + self.api().del_item(uuid).await.map_err(Error::Api)?; + self.api().reconfigure().await.map_err(Error::Api)?; + Ok(()) + } + + /// Trigger `reconfigure` without changing config — useful after manual + /// edits. + pub async fn reconfigure(&self) -> Result<(), Error> { + self.api().reconfigure().await.map_err(Error::Api)?; + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct BridgeEntry { + pub uuid: String, + pub bridgeif: String, + pub members: Vec, + pub enablestp: bool, + pub description: String, +} diff --git a/opnsense-config/src/modules/interface_settings.rs b/opnsense-config/src/modules/interface_settings.rs new file mode 100644 index 00000000..f81340b8 --- /dev/null +++ b/opnsense-config/src/modules/interface_settings.rs @@ -0,0 +1,71 @@ +//! `InterfaceSettingsConfig` — singleton wrapper for OPNsense's global +//! `interfaces/settings` model. +//! +//! Today exposes one operation: `ensure_offloads_disabled` — idempotently +//! sets `disablesegmentationoffloading` + `disablelargereceiveoffloading` +//! to `true`. TSO and LRO commonly break `if_bridge` on FreeBSD, so any +//! caller that brings up a bridge should call this first. + +use log::info; +use opnsense_api::generated::interfaces::{InterfacesSettings, InterfacesSettingsResponse}; +use opnsense_api::OpnsenseClient; +use serde::Serialize; + +use crate::Error; + +pub struct InterfaceSettingsConfig { + client: OpnsenseClient, +} + +#[derive(Serialize)] +struct SettingsEnvelope<'a> { + settings: &'a InterfacesSettings, +} + +impl InterfaceSettingsConfig { + pub(crate) fn new(client: OpnsenseClient) -> Self { + Self { client } + } + + /// Fetch the current global interface settings. + pub async fn get(&self) -> Result { + let resp: InterfacesSettingsResponse = self + .client + .get_typed("interfaces", "settings", "get") + .await + .map_err(Error::Api)?; + Ok(resp.settings) + } + + /// Idempotently disable hardware segmentation (TSO) and large-receive + /// (LRO) offload globally. Returns `true` when a write actually + /// happened, `false` when both flags were already set (NOOP). + /// + /// On a fresh OPNsense install both default to `false`; for bridge + /// performance on FreeBSD we want them both `true`. + pub async fn ensure_offloads_disabled(&self) -> Result { + let mut current = self.get().await?; + if current.disablesegmentationoffloading && current.disablelargereceiveoffloading { + return Ok(false); + } + current.disablesegmentationoffloading = true; + current.disablelargereceiveoffloading = true; + info!("Disabling segmentation + LRO offloads via interfaces/settings/set"); + let _: serde_json::Value = self + .client + .post_typed( + "interfaces", + "settings", + "set", + Some(&SettingsEnvelope { settings: ¤t }), + ) + .await + .map_err(Error::Api)?; + let _: serde_json::Value = self + .client + .post_typed("interfaces", "settings", "reconfigure", None::<&()>) + .await + .map_err(Error::Api)?; + Ok(true) + } +} diff --git a/opnsense-config/src/modules/mod.rs b/opnsense-config/src/modules/mod.rs index 4ca778c6..14494abb 100644 --- a/opnsense-config/src/modules/mod.rs +++ b/opnsense-config/src/modules/mod.rs @@ -1,3 +1,4 @@ +pub mod bridge; pub mod caddy; pub mod dhcp; pub mod dhcp_legacy; @@ -5,6 +6,7 @@ pub mod dnat; pub mod dns; pub mod dnsmasq; pub mod firewall; +pub mod interface_settings; pub mod lagg; pub mod load_balancer; pub mod node_exporter;