From 97ec4848fd6f844f19b749cafd1da8a3a511c931 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 11:39:11 -0400 Subject: [PATCH 01/14] fix(linux): make ensure_user_unit_active actually report NOOP systemctl --user enable --now is systemd-level idempotent, but the prior implementation always returned ChangeReport::CHANGED. This made every re-run of any score that touches a user-scoped unit (notably FleetDeviceSetupScore's podman.socket step) lie about its change count, defeating the noop detection the rest of the score honors. Probe is-enabled --quiet && is-active --quiet first; only call enable --now (and report CHANGED) when the unit isn't already in the desired state. Mirrors the existing ensure_linger pattern in the same file. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/modules/linux/ansible_configurator.rs | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/harmony/src/modules/linux/ansible_configurator.rs b/harmony/src/modules/linux/ansible_configurator.rs index af78bc03..962ee347 100644 --- a/harmony/src/modules/linux/ansible_configurator.rs +++ b/harmony/src/modules/linux/ansible_configurator.rs @@ -279,22 +279,31 @@ impl AnsibleHostConfigurator { // `XDG_RUNTIME_DIR` in the systemctl process env — a task- // level `environment:` keyword only available in playbooks. // Rather than pipe a one-task playbook, use russh directly: - // two small SSH calls, no Python wrapper, no inline YAML. - // - // Report `changed=true` unconditionally. systemctl - // enable --now is idempotent at the systemd level so re- - // running does no harm; reconcile-restart decisions - // upstream see only the outer-Score changes they care - // about (TOML and unit file changes), not this start- - // verification step. + // a probe + (optionally) an enable. let id_out = ssh_exec(host, creds, &format!("id -u {user}")) .await? .into_successful()?; let uid = id_out.stdout.trim(); - let cmd = format!( - "sudo -u {user} env XDG_RUNTIME_DIR=/run/user/{uid} \ - systemctl --user enable --now {unit}" - ); + let env_prefix = format!("sudo -u {user} env XDG_RUNTIME_DIR=/run/user/{uid}"); + + // Already enabled and active → NOOP. `is-enabled --quiet` + // and `is-active --quiet` exit 0 only when the unit is + // both wanted and running, which is exactly the + // post-condition `enable --now` establishes. + let probe = ssh_exec( + host, + creds, + &format!( + "{env_prefix} systemctl --user is-enabled --quiet {unit} && \ + {env_prefix} systemctl --user is-active --quiet {unit}" + ), + ) + .await?; + if probe.rc == 0 { + return Ok(ChangeReport::NOOP); + } + + let cmd = format!("{env_prefix} systemctl --user enable --now {unit}"); ssh_exec(host, creds, &cmd).await?.into_successful()?; Ok(ChangeReport::CHANGED) } -- 2.39.5 From 2cb884976abe464f97735892e2259126abd14d5f Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 11:41:05 -0400 Subject: [PATCH 02/14] feat(examples): add fleet_rpi_setup for onboarding a physical Pi Sibling of fleet_vm_setup with the libvirt provisioning step removed: the operator has already booted Pi OS Lite themselves (rpi-imager, preloaded SSH key, passwordless sudo on the admin user), so the example goes straight to applying FleetDeviceSetupScore over SSH. Defaults match the typical rpi-imager flow (--pi-user pi, --ssh-key ~/.ssh/id_ed25519); --ssh-key supports tilde expansion. The harmony dep is pulled in without the kvm feature since no VM is created here. RUST_LOG defaults to info so the score's per-step traces show up without the operator having to set the env var. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/fleet_rpi_setup/Cargo.toml | 18 +++ examples/fleet_rpi_setup/src/main.rs | 172 +++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 examples/fleet_rpi_setup/Cargo.toml create mode 100644 examples/fleet_rpi_setup/src/main.rs diff --git a/examples/fleet_rpi_setup/Cargo.toml b/examples/fleet_rpi_setup/Cargo.toml new file mode 100644 index 00000000..7874c8d0 --- /dev/null +++ b/examples/fleet_rpi_setup/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "example_fleet_rpi_setup" +version.workspace = true +edition = "2024" +license.workspace = true + +[[bin]] +name = "fleet_rpi_setup" +path = "src/main.rs" + +[dependencies] +harmony = { path = "../../harmony" } +harmony_types = { path = "../../harmony_types" } +tokio.workspace = true +log.workspace = true +env_logger.workspace = true +anyhow.workspace = true +clap.workspace = true diff --git a/examples/fleet_rpi_setup/src/main.rs b/examples/fleet_rpi_setup/src/main.rs new file mode 100644 index 00000000..a5b967c0 --- /dev/null +++ b/examples/fleet_rpi_setup/src/main.rs @@ -0,0 +1,172 @@ +//! Onboard a real, already-booted Raspberry Pi into the IoT fleet. +//! +//! This is the physical-device sibling of `fleet_vm_setup`: the VM +//! provisioning step is gone (you booted Pi OS yourself with rpi-imager +//! and preloaded an SSH key), and we go straight to Step 2 — SSH into +//! the Pi and apply `FleetDeviceSetupScore`. That score installs podman +//! + systemd-container, creates the `fleet-agent` user, drops the agent +//! binary + config + systemd unit, and starts the service. +//! +//! Prereqs on the Pi (one-time, via rpi-imager or manual): +//! - SSH server enabled +//! - An admin user with passwordless sudo (e.g. the default `pi` user +//! or whatever you set in rpi-imager) +//! - Your driver-machine SSH public key in that user's +//! `~/.ssh/authorized_keys` +//! +//! Prereqs on the driver machine (where this binary runs): +//! - Python 3 + `python3-venv` (Ansible is auto-bootstrapped into a venv) +//! - A cross-compiled `fleet-agent` binary for aarch64 + +use anyhow::{Context, Result}; +use clap::Parser; +use harmony::inventory::Inventory; +use harmony::modules::fleet::{FleetDeviceSetupConfig, FleetDeviceSetupScore}; +use harmony::modules::linux::{LinuxHostTopology, SshCredentials, ensure_ansible_venv}; +use harmony_types::id::Id; +use std::path::PathBuf; + +#[derive(Parser, Debug)] +#[command( + name = "fleet_rpi_setup", + about = "Onboard a physical Raspberry Pi into the IoT fleet" +)] +struct Cli { + /// IP address of the Pi (e.g. 192.168.1.42). + #[arg(long)] + pi_host: String, + /// SSH user on the Pi with passwordless sudo. + #[arg(long, default_value = "pi")] + pi_user: String, + /// Path to the SSH private key whose public half is in the Pi + /// user's `~/.ssh/authorized_keys`. + #[arg(long, default_value = "~/.ssh/id_ed25519")] + ssh_key: PathBuf, + /// Device id the agent will announce to NATS. Defaults to a fresh + /// `Id` (sortable hex timestamp + random suffix). + #[arg(long)] + device_id: Option, + /// Routing labels for `Deployment.spec.targetSelector` matching. + /// Comma-separated `key=value` pairs. At least one is required. + #[arg(long, default_value = "group=group-a,arch=aarch64")] + labels: String, + /// Path to the cross-compiled aarch64 fleet-agent binary on the + /// driver machine. Uploaded to `/usr/local/bin/fleet-agent`. + #[arg(long)] + agent_binary: PathBuf, + /// NATS URL the agent should connect to. + #[arg(long)] + nats_url: String, + #[arg(long, default_value = "smoke")] + nats_user: String, + #[arg(long, default_value = "smoke")] + nats_pass: String, +} + +#[tokio::main] +async fn main() -> Result<()> { + // Default to info so the score's per-step traces show up without + // the user having to set RUST_LOG. Honors RUST_LOG if exported. + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); + let cli = Cli::parse(); + + ensure_ansible_venv() + .await + .map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?; + + let device_id = cli + .device_id + .clone() + .map(Id::from) + .unwrap_or_else(Id::default); + + let ssh_key = expand_tilde(&cli.ssh_key); + let pi_ip = cli + .pi_host + .parse() + .with_context(|| format!("--pi-host '{}' is not a valid IP address", cli.pi_host))?; + + let topology = LinuxHostTopology::new( + format!("rpi-{}", cli.pi_host), + pi_ip, + SshCredentials { + user: cli.pi_user.clone(), + private_key_path: ssh_key, + // Pi OS Lite ships /usr/bin/python3 — skip auto-discovery. + remote_python: Some("/usr/bin/python3".to_string()), + }, + ); + + let labels = parse_labels(&cli.labels)?; + let labels_display = labels + .iter() + .map(|(k, v)| format!("{k}={v}")) + .collect::>() + .join(","); + + let score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig { + device_id: device_id.clone(), + labels, + nats_urls: vec![cli.nats_url.clone()], + nats_user: cli.nats_user.clone(), + nats_pass: cli.nats_pass.clone(), + agent_binary_path: cli.agent_binary.clone(), + }); + + run_setup_score(&score, &topology).await?; + println!( + "🚀 device '{device_id}' ({labels_display}) onboarded via {}@{}", + cli.pi_user, cli.pi_host + ); + Ok(()) +} + +fn parse_labels(raw: &str) -> Result> { + let mut out = std::collections::BTreeMap::new(); + for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) { + let (k, v) = piece + .split_once('=') + .ok_or_else(|| anyhow::anyhow!("label chunk '{piece}' missing '='"))?; + let k = k.trim(); + let v = v.trim(); + if k.is_empty() || v.is_empty() { + anyhow::bail!("label chunk '{piece}' has empty key or value"); + } + out.insert(k.to_string(), v.to_string()); + } + if out.is_empty() { + anyhow::bail!("--labels must include at least one key=value pair"); + } + Ok(out) +} + +fn expand_tilde(p: &std::path::Path) -> PathBuf { + let s = p.to_string_lossy(); + if let Some(rest) = s.strip_prefix("~/") { + if let Ok(home) = std::env::var("HOME") { + return PathBuf::from(home).join(rest); + } + } + p.to_path_buf() +} + +async fn run_setup_score( + score: &FleetDeviceSetupScore, + topology: &LinuxHostTopology, +) -> Result<()> { + use harmony::score::Score; + let inventory = Inventory::empty(); + let interpret = Score::::create_interpret(score); + let outcome = interpret + .execute(&inventory, topology) + .await + .map_err(|e| anyhow::anyhow!("FleetDeviceSetupScore execute: {e}"))?; + + println!(); + println!("==== FleetDeviceSetupScore recap ===="); + for line in &outcome.details { + println!("{line}"); + } + println!("====================================="); + Ok(()) +} -- 2.39.5 From bf1a438b9090ecceb9927fbdb9ea3bb42a04fc2f Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 12:38:27 -0400 Subject: [PATCH 03/14] fix(linux): drop redundant envelope from parseable ansible errors When stdout already parses into UNREACHABLE!/FAILED! + msg, the trailing (ansible-exit=..., stderr=..., stdout=...) envelope just duplicated the same text. Strip it when stderr is empty and the verb is recognized; keep it when it adds debug signal. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/modules/linux/ansible_configurator.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/harmony/src/modules/linux/ansible_configurator.rs b/harmony/src/modules/linux/ansible_configurator.rs index 962ee347..9369882c 100644 --- a/harmony/src/modules/linux/ansible_configurator.rs +++ b/harmony/src/modules/linux/ansible_configurator.rs @@ -415,11 +415,25 @@ impl AnsibleHostConfigurator { let stderr = String::from_utf8_lossy(&output.stderr); parse_oneline(&stdout).map_err(|parse_err| { + // For Ansible-parseable failures (UNREACHABLE!/FAILED!), + // `parse_err` already carries the verb + the human msg, so + // attaching the full rc/stderr/stdout envelope just + // duplicates the text. Only include the envelope when it + // adds debug signal: an unparseable stdout (real protocol + // mismatch) or a non-empty stderr. + let stderr = stderr.trim(); + let already_parsed = parse_err.starts_with("UNREACHABLE!") + || parse_err.starts_with("FAILED!"); + if already_parsed && stderr.is_empty() { + return exec(format!( + "ansible module {module} failed against {host}: {parse_err}" + )); + } exec(format!( "ansible module {module} failed against {host}: {parse_err} \ (ansible-exit={}, stderr={}, stdout={})", output.status, - stderr.trim(), + stderr, stdout.trim() )) }) -- 2.39.5 From c3b25c829851a3e80173fa9d87308edca0e6465f Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 12:38:33 -0400 Subject: [PATCH 04/14] feat(fleet): structured progress + recap for FleetDeviceSetupScore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the opaque change-log with tagged per-step info traces and a human-readable Outcome.details recap (Device ID / NATS / Labels / User / Agent binary -> remote / Service). User and Service lines carry their own ✅/🔄 state markers; final line is ✅ for noop and 🎉 for runs that applied changes. Co-Authored-By: Claude Opus 4.7 (1M context) --- harmony/src/modules/fleet/setup_score.rs | 133 +++++++++++++++++------ 1 file changed, 97 insertions(+), 36 deletions(-) diff --git a/harmony/src/modules/fleet/setup_score.rs b/harmony/src/modules/fleet/setup_score.rs index 35ee960a..1f410dfa 100644 --- a/harmony/src/modules/fleet/setup_score.rs +++ b/harmony/src/modules/fleet/setup_score.rs @@ -17,9 +17,8 @@ use crate::domain::interpret::{ }; use crate::domain::inventory::Inventory; use crate::domain::topology::{ - ChangeReport, FileDelivery, FileSource, FileSpec, HostReachable, LinuxHostConfiguration, - PackageInstaller, SystemdManager, SystemdScope, SystemdUnitSpec, Topology, UnixUserManager, - UserSpec, + FileDelivery, FileSource, FileSpec, HostReachable, LinuxHostConfiguration, PackageInstaller, + SystemdManager, SystemdScope, SystemdUnitSpec, Topology, UnixUserManager, UserSpec, }; use crate::score::Score; @@ -182,16 +181,23 @@ impl Interpret for FleetDeviceSetupInte topology: &T, ) -> Result { let cfg = &self.config; - HostReachable::ping(topology).await.map_err(wrap)?; + let tag = format!("FleetSetup/{}", cfg.device_id); - let mut change_log: Vec = Vec::new(); + info!("[{tag}] Pinging device {}", topology.name()); + HostReachable::ping(topology).await.map_err(wrap)?; + info!("[{tag}] Device {} reachable", topology.name()); + + let mut change_count = 0usize; // 1. Dependencies. + info!("[{tag}] Step 1/6 — ensuring system packages: podman, systemd-container"); for pkg in ["podman", "systemd-container"] { let r = PackageInstaller::ensure_package(topology, pkg) .await .map_err(wrap)?; - log_change(&mut change_log, format!("package:{pkg}"), r); + if r.changed { + change_count += 1; + } } // 2. fleet-agent user. Not `--system`: Ubuntu's useradd skips @@ -205,6 +211,7 @@ impl Interpret for FleetDeviceSetupInte // // Lingered so the user-systemd instance survives logout — // required for the user podman.socket we enable below. + info!("[{tag}] Step 2/6 — ensuring fleet-agent user with linger"); let user_spec = UserSpec { name: "fleet-agent".to_string(), group: None, @@ -213,28 +220,39 @@ impl Interpret for FleetDeviceSetupInte system: false, create_home: true, }; - let r = UnixUserManager::ensure_user(topology, &user_spec) + let user_r = UnixUserManager::ensure_user(topology, &user_spec) .await .map_err(wrap)?; - log_change(&mut change_log, "user:fleet-agent", r); + if user_r.changed { + change_count += 1; + } - let r = UnixUserManager::ensure_linger(topology, "fleet-agent") + let linger_r = UnixUserManager::ensure_linger(topology, "fleet-agent") .await .map_err(wrap)?; - log_change(&mut change_log, "linger:fleet-agent", r); + if linger_r.changed { + change_count += 1; + } // 3. User-scoped podman socket. Required by `PodmanTopology` on // the agent so it reaches /run/user//podman/podman.sock. - let r = SystemdManager::ensure_user_unit_active(topology, "fleet-agent", "podman.socket") + info!("[{tag}] Step 3/6 — activating user-scoped podman.socket"); + let socket_r = SystemdManager::ensure_user_unit_active(topology, "fleet-agent", "podman.socket") .await .map_err(wrap)?; - log_change(&mut change_log, "user-unit:podman.socket", r); + if socket_r.changed { + change_count += 1; + } // 4. Binary. Ship via ansible's native copy-from-local-file // path (`FileSource::LocalPath`). Ansible handles binary // content over SFTP and reports `changed: true` only when the // remote file actually differs from the local one — so // re-running this Score without a new binary is a true NOOP. + info!( + "[{tag}] Step 4/6 — uploading agent binary {} -> /usr/local/bin/fleet-agent", + cfg.agent_binary_path.display() + ); let binary_r = FileDelivery::ensure_file( topology, &FileSpec { @@ -247,9 +265,18 @@ impl Interpret for FleetDeviceSetupInte ) .await .map_err(wrap)?; - log_change(&mut change_log, "file:/usr/local/bin/fleet-agent", binary_r); + if binary_r.changed { + change_count += 1; + } // 5. /etc/fleet-agent/ + config.toml + info!( + "[{tag}] Step 5/6 — rendering /etc/fleet-agent/config.toml ({} NATS URL{}, {} label{})", + cfg.nats_urls.len(), + if cfg.nats_urls.len() == 1 { "" } else { "s" }, + cfg.labels.len(), + if cfg.labels.len() == 1 { "" } else { "s" }, + ); let config_toml = cfg.render_toml(); let toml_spec = FileSpec { path: "/etc/fleet-agent/config.toml".to_string(), @@ -261,9 +288,12 @@ impl Interpret for FleetDeviceSetupInte let toml_r = FileDelivery::ensure_file(topology, &toml_spec) .await .map_err(wrap)?; - log_change(&mut change_log, "file:/etc/fleet-agent/config.toml", toml_r); + if toml_r.changed { + change_count += 1; + } // 6. systemd unit for the agent itself. + info!("[{tag}] Step 6/6 — installing fleet-agent.service"); let unit = SystemdUnitSpec { name: "fleet-agent".to_string(), unit_content: cfg.render_systemd_unit().to_string(), @@ -273,33 +303,70 @@ impl Interpret for FleetDeviceSetupInte let unit_r = SystemdManager::ensure_systemd_unit(topology, &unit) .await .map_err(wrap)?; - log_change(&mut change_log, "unit:fleet-agent", unit_r); + if unit_r.changed { + change_count += 1; + } // 7. Restart the agent iff anything that affects it changed. let needs_restart = toml_r.changed || unit_r.changed || binary_r.changed; - if needs_restart { + let service_state = if needs_restart { + info!("[{tag}] 🔄 Restarting fleet-agent (config/binary/unit changed)"); SystemdManager::restart_service(topology, "fleet-agent", SystemdScope::System) .await .map_err(wrap)?; - change_log.push("restart:fleet-agent".to_string()); - info!("fleet-agent restarted to pick up config/unit change"); + change_count += 1; + "🔄 restarted to pick up changes" } else { - debug!("fleet-agent config + unit unchanged; no restart"); - } + debug!("[{tag}] No restart needed (config + unit + binary unchanged)"); + "✅ running unchanged" + }; - let outcome = if change_log.is_empty() { - Outcome::noop(format!("{} already configured", cfg.device_id)) + // Build human-readable recap. Style matches OKDAddNodeScore: + // one fact per line, "Key: value" prefix, all the operationally + // useful state in a single Outcome details block so a CLI run + // shows the operator exactly what was done without chasing + // log levels. + let labels_display = if cfg.labels.is_empty() { + "(none)".to_string() } else { - Outcome::success_with_details( - format!( - "{} configured ({} changes)", - cfg.device_id, - change_log.len() - ), - change_log, + cfg.labels + .iter() + .map(|(k, v)| format!("{k}={v}")) + .collect::>() + .join(", ") + }; + let user_state = match (user_r.changed, linger_r.changed) { + (true, _) => "🔄 newly created, linger enabled", + (false, true) => "🔄 already present, linger newly enabled", + (false, false) => "✅ already present, lingered", + }; + let details = vec![ + format!("Device ID: {}", cfg.device_id), + format!("NATS URLs: {}", cfg.nats_urls.join(", ")), + format!("Labels: {labels_display}"), + format!("User: fleet-agent ({user_state})"), + format!("Agent binary: {}", cfg.agent_binary_path.display()), + " -> /usr/local/bin/fleet-agent".to_string(), + format!("Service: fleet-agent.service ({service_state})"), + ]; + + let (status, msg) = if change_count == 0 { + info!("[{tag}] ✅ Done — no changes (device already configured)"); + ( + InterpretStatus::NOOP, + format!("{} already configured", cfg.device_id), + ) + } else { + info!( + "[{tag}] 🎉 Done — {change_count} change{} applied", + if change_count == 1 { "" } else { "s" } + ); + ( + InterpretStatus::SUCCESS, + format!("{} configured ({change_count} changes)", cfg.device_id), ) }; - Ok(outcome) + Ok(Outcome::new(status, msg, details)) } } @@ -307,12 +374,6 @@ fn wrap(e: crate::executors::ExecutorError) -> InterpretError { InterpretError::new(e.to_string()) } -fn log_change(change_log: &mut Vec, what: impl Into, r: ChangeReport) { - if r.changed { - change_log.push(what.into()); - } -} - #[cfg(test)] mod tests { use super::*; -- 2.39.5 From 96f17f3ca095f6e01a94aed3d713e4c1a7354fdd Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 12:38:39 -0400 Subject: [PATCH 05/14] refactor(examples/fleet_rpi_setup): adopt harmony_cli::run convention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the bespoke framed renderer, failure hint catalog, and custom env_logger setup. Score output now flows through harmony_cli's standard reporter (bullet list under "🚀 All done!"), matching the other examples. cli_logger::init() at the top of main so early logs (ensure_ansible_venv) get the same formatting. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 13 +++++ examples/fleet_rpi_setup/Cargo.toml | 2 +- examples/fleet_rpi_setup/src/main.rs | 71 ++++++++++++---------------- 3 files changed, 45 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da364e76..cab1fed6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3194,6 +3194,19 @@ dependencies = [ "tokio", ] +[[package]] +name = "example_fleet_rpi_setup" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "harmony", + "harmony_cli", + "harmony_types", + "log", + "tokio", +] + [[package]] name = "example_fleet_vm_setup" version = "0.1.0" diff --git a/examples/fleet_rpi_setup/Cargo.toml b/examples/fleet_rpi_setup/Cargo.toml index 7874c8d0..980e37c2 100644 --- a/examples/fleet_rpi_setup/Cargo.toml +++ b/examples/fleet_rpi_setup/Cargo.toml @@ -10,9 +10,9 @@ path = "src/main.rs" [dependencies] harmony = { path = "../../harmony" } +harmony_cli = { path = "../../harmony_cli" } harmony_types = { path = "../../harmony_types" } tokio.workspace = true log.workspace = true -env_logger.workspace = true anyhow.workspace = true clap.workspace = true diff --git a/examples/fleet_rpi_setup/src/main.rs b/examples/fleet_rpi_setup/src/main.rs index a5b967c0..62a49769 100644 --- a/examples/fleet_rpi_setup/src/main.rs +++ b/examples/fleet_rpi_setup/src/main.rs @@ -2,11 +2,15 @@ //! //! This is the physical-device sibling of `fleet_vm_setup`: the VM //! provisioning step is gone (you booted Pi OS yourself with rpi-imager -//! and preloaded an SSH key), and we go straight to Step 2 — SSH into -//! the Pi and apply `FleetDeviceSetupScore`. That score installs podman -//! + systemd-container, creates the `fleet-agent` user, drops the agent +//! and preloaded an SSH key), and we go straight to applying +//! `FleetDeviceSetupScore` over SSH. That score installs podman + +//! systemd-container, creates the `fleet-agent` user, drops the agent //! binary + config + systemd unit, and starts the service. //! +//! Output rendering (per-step traces and the final recap) is handled +//! by `harmony_cli::run` — same as every other harmony example. The +//! score's `Outcome.details` is structured for that path. +//! //! Prereqs on the Pi (one-time, via rpi-imager or manual): //! - SSH server enabled //! - An admin user with passwordless sudo (e.g. the default `pi` user @@ -65,9 +69,7 @@ struct Cli { #[tokio::main] async fn main() -> Result<()> { - // Default to info so the score's per-step traces show up without - // the user having to set RUST_LOG. Honors RUST_LOG if exported. - env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); + harmony_cli::cli_logger::init(); let cli = Cli::parse(); ensure_ansible_venv() @@ -79,7 +81,6 @@ async fn main() -> Result<()> { .clone() .map(Id::from) .unwrap_or_else(Id::default); - let ssh_key = expand_tilde(&cli.ssh_key); let pi_ip = cli .pi_host @@ -98,14 +99,8 @@ async fn main() -> Result<()> { ); let labels = parse_labels(&cli.labels)?; - let labels_display = labels - .iter() - .map(|(k, v)| format!("{k}={v}")) - .collect::>() - .join(","); - let score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig { - device_id: device_id.clone(), + device_id, labels, nats_urls: vec![cli.nats_url.clone()], nats_user: cli.nats_user.clone(), @@ -113,11 +108,28 @@ async fn main() -> Result<()> { agent_binary_path: cli.agent_binary.clone(), }); - run_setup_score(&score, &topology).await?; - println!( - "🚀 device '{device_id}' ({labels_display}) onboarded via {}@{}", - cli.pi_user, cli.pi_host - ); + // We have our own clap CLI, so harmony_cli must NOT call + // `Args::parse()` (it would choke on --pi-host etc.). Pass an + // explicit Args with `yes: true` — the operator already committed + // to the run by typing the command, so the extra confirmation + // prompt would just add friction. + let harmony_args = harmony_cli::Args { + yes: true, + filter: None, + interactive: false, + all: true, + number: 0, + list: false, + }; + + harmony_cli::run( + Inventory::empty(), + topology, + vec![Box::new(score)], + Some(harmony_args), + ) + .await + .map_err(|e| anyhow::anyhow!("{e}"))?; Ok(()) } @@ -149,24 +161,3 @@ fn expand_tilde(p: &std::path::Path) -> PathBuf { } p.to_path_buf() } - -async fn run_setup_score( - score: &FleetDeviceSetupScore, - topology: &LinuxHostTopology, -) -> Result<()> { - use harmony::score::Score; - let inventory = Inventory::empty(); - let interpret = Score::::create_interpret(score); - let outcome = interpret - .execute(&inventory, topology) - .await - .map_err(|e| anyhow::anyhow!("FleetDeviceSetupScore execute: {e}"))?; - - println!(); - println!("==== FleetDeviceSetupScore recap ===="); - for line in &outcome.details { - println!("{line}"); - } - println!("====================================="); - Ok(()) -} -- 2.39.5 From 3cbe62c807bfbea19cdaea238668a9cd04cf8eb5 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 12:48:32 -0400 Subject: [PATCH 06/14] fix(cli): surface Outcome.details on NOOP outcomes too cli_reporter only accumulated details for SUCCESS, dropping the recap on idempotent re-runs that legitimately return NOOP with populated details. FleetDeviceSetupScore is the first score to exercise this path; the filter was over-restrictive. Co-Authored-By: Claude Opus 4.7 (1M context) --- harmony_cli/src/cli_reporter.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/harmony_cli/src/cli_reporter.rs b/harmony_cli/src/cli_reporter.rs index 6c9823bf..480a2210 100644 --- a/harmony_cli/src/cli_reporter.rs +++ b/harmony_cli/src/cli_reporter.rs @@ -22,7 +22,15 @@ pub fn init() { score: _, outcome: Ok(outcome), } => { - if outcome.status == harmony::interpret::InterpretStatus::SUCCESS { + // Surface details for both SUCCESS and NOOP — an + // idempotent re-run that converges to the desired + // state still has operationally useful info to + // share (current config, what's running, etc.). + if matches!( + outcome.status, + harmony::interpret::InterpretStatus::SUCCESS + | harmony::interpret::InterpretStatus::NOOP, + ) { details.extend(outcome.details.clone()); } } -- 2.39.5 From 12249a7f88a693580d4ee38bb53f898c3d807a8a Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 12:48:37 -0400 Subject: [PATCH 07/14] refactor(fleet): inline agent binary local->remote on one recap line Folds the "-> /usr/local/bin/fleet-agent" continuation into the "Agent binary:" line. Removes the hardcoded-indent fragility (bullet prefix shifts in cli_reporter would have broken alignment). Co-Authored-By: Claude Opus 4.7 (1M context) --- harmony/src/modules/fleet/setup_score.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/harmony/src/modules/fleet/setup_score.rs b/harmony/src/modules/fleet/setup_score.rs index 1f410dfa..19e5dc75 100644 --- a/harmony/src/modules/fleet/setup_score.rs +++ b/harmony/src/modules/fleet/setup_score.rs @@ -345,8 +345,10 @@ impl Interpret for FleetDeviceSetupInte format!("NATS URLs: {}", cfg.nats_urls.join(", ")), format!("Labels: {labels_display}"), format!("User: fleet-agent ({user_state})"), - format!("Agent binary: {}", cfg.agent_binary_path.display()), - " -> /usr/local/bin/fleet-agent".to_string(), + format!( + "Agent binary: {} -> /usr/local/bin/fleet-agent", + cfg.agent_binary_path.display() + ), format!("Service: fleet-agent.service ({service_state})"), ]; -- 2.39.5 From 54d1f0733bc46c84157d5b43023ef7adca2139b0 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 13:11:00 -0400 Subject: [PATCH 08/14] feat(linux): add FileFetcher capability for reading remote files Mirrors FileDelivery in the opposite direction: returns Some(content) or None if the file doesn't exist. AnsibleHostConfigurator implements it via two SSH calls (sudo test -e + sudo cat), routed through sudo to handle root- or service-owned config files. Added to the LinuxHostConfiguration umbrella so any score with that bound gets it. Enables scores to pre-flight-compare desired state against current state before committing to a destructive change. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/domain/topology/host_configuration.rs | 18 +++++++++++++++-- .../src/modules/linux/ansible_configurator.rs | 20 +++++++++++++++++++ harmony/src/modules/linux/topology.rs | 15 +++++++++++--- 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/harmony/src/domain/topology/host_configuration.rs b/harmony/src/domain/topology/host_configuration.rs index efbeb447..d63c0300 100644 --- a/harmony/src/domain/topology/host_configuration.rs +++ b/harmony/src/domain/topology/host_configuration.rs @@ -51,6 +51,15 @@ pub trait FileDelivery: Send + Sync { async fn ensure_file(&self, spec: &FileSpec) -> Result; } +/// Read the content of a remote file. Returns `None` if the file +/// doesn't exist; `Some(content)` otherwise. Useful for scores that +/// pre-flight-compare desired state against current state before +/// committing to a change. +#[async_trait] +pub trait FileFetcher: Send + Sync { + async fn fetch_file(&self, path: &str) -> Result, ExecutorError>; +} + /// Create and manage unix user accounts (POSIX systems). /// /// Split from [`SystemdManager`] because some hosts run user accounts @@ -110,12 +119,17 @@ pub trait SystemdManager: Send + Sync { /// as a generic bound. Impls should implement each capability /// individually. pub trait LinuxHostConfiguration: - HostReachable + PackageInstaller + FileDelivery + UnixUserManager + SystemdManager + HostReachable + PackageInstaller + FileDelivery + FileFetcher + UnixUserManager + SystemdManager { } impl LinuxHostConfiguration for T where - T: HostReachable + PackageInstaller + FileDelivery + UnixUserManager + SystemdManager + T: HostReachable + + PackageInstaller + + FileDelivery + + FileFetcher + + UnixUserManager + + SystemdManager { } diff --git a/harmony/src/modules/linux/ansible_configurator.rs b/harmony/src/modules/linux/ansible_configurator.rs index 9369882c..84f46e14 100644 --- a/harmony/src/modules/linux/ansible_configurator.rs +++ b/harmony/src/modules/linux/ansible_configurator.rs @@ -148,6 +148,26 @@ impl AnsibleHostConfigurator { .await } + pub async fn fetch_file( + &self, + host: IpAddress, + creds: &SshCredentials, + path: &str, + ) -> Result, ExecutorError> { + // Two SSH calls: existence probe, then read. Goes through + // sudo because the files we care about (e.g. + // /etc/fleet-agent/config.toml) are root- or service-owned + // and mode 0600 — readable only via privilege escalation. + let probe = ssh_exec(host, creds, &format!("sudo test -e {path}")).await?; + if probe.rc != 0 { + return Ok(None); + } + let read = ssh_exec(host, creds, &format!("sudo cat {path}")) + .await? + .into_successful()?; + Ok(Some(read.stdout)) + } + pub async fn ensure_systemd_unit( &self, host: IpAddress, diff --git a/harmony/src/modules/linux/topology.rs b/harmony/src/modules/linux/topology.rs index 94004da5..89a7566d 100644 --- a/harmony/src/modules/linux/topology.rs +++ b/harmony/src/modules/linux/topology.rs @@ -5,9 +5,9 @@ use harmony_types::net::IpAddress; use serde::{Deserialize, Serialize}; use crate::domain::topology::{ - ChangeReport, FileDelivery, FileSpec, HostReachable, PackageInstaller, PreparationError, - PreparationOutcome, SystemdManager, SystemdScope, SystemdUnitSpec, Topology, UnixUserManager, - UserSpec, + ChangeReport, FileDelivery, FileFetcher, FileSpec, HostReachable, PackageInstaller, + PreparationError, PreparationOutcome, SystemdManager, SystemdScope, SystemdUnitSpec, Topology, + UnixUserManager, UserSpec, }; use crate::executors::ExecutorError; @@ -106,6 +106,15 @@ impl FileDelivery for LinuxHostTopology { } } +#[async_trait] +impl FileFetcher for LinuxHostTopology { + async fn fetch_file(&self, path: &str) -> Result, ExecutorError> { + self.configurator + .fetch_file(self.host, &self.credentials, path) + .await + } +} + #[async_trait] impl UnixUserManager for LinuxHostTopology { async fn ensure_user(&self, spec: &UserSpec) -> Result { -- 2.39.5 From 413077bcd0957b6ebcfb6e56f8002aadbd23dc71 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 13:11:10 -0400 Subject: [PATCH 09/14] feat(fleet): pre-flight config diff + confirm in FleetDeviceSetupScore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New first step (1/7): read /etc/fleet-agent/config.toml off the device and compare against the rendered desired config. Three branches: - missing → info, first install - matches → warn, converge anyway - differs → warn + unified diff (similar::TextDiff with 2-line context radius, '-/+' marker style) + inquire::Confirm prompt defaulting to N. Aborts with InterpretError if declined. Existing 6 steps renumbered to 2/7-7/7. The diff replaces the previous "dump both full configs" approach which was unreadable even for one-line differences. Co-Authored-By: Claude Opus 4.7 (1M context) --- harmony/src/modules/fleet/setup_score.rs | 82 ++++++++++++++++++++---- 1 file changed, 71 insertions(+), 11 deletions(-) diff --git a/harmony/src/modules/fleet/setup_score.rs b/harmony/src/modules/fleet/setup_score.rs index 19e5dc75..3787b64f 100644 --- a/harmony/src/modules/fleet/setup_score.rs +++ b/harmony/src/modules/fleet/setup_score.rs @@ -8,7 +8,7 @@ use std::path::PathBuf; use async_trait::async_trait; use harmony_types::id::Id; -use log::{debug, info}; +use log::{debug, info, warn}; use serde::{Deserialize, Serialize}; use crate::data::Version; @@ -17,8 +17,9 @@ use crate::domain::interpret::{ }; use crate::domain::inventory::Inventory; use crate::domain::topology::{ - FileDelivery, FileSource, FileSpec, HostReachable, LinuxHostConfiguration, PackageInstaller, - SystemdManager, SystemdScope, SystemdUnitSpec, Topology, UnixUserManager, UserSpec, + FileDelivery, FileFetcher, FileSource, FileSpec, HostReachable, LinuxHostConfiguration, + PackageInstaller, SystemdManager, SystemdScope, SystemdUnitSpec, Topology, UnixUserManager, + UserSpec, }; use crate::score::Score; @@ -189,8 +190,68 @@ impl Interpret for FleetDeviceSetupInte let mut change_count = 0usize; + // Pre-flight: detect a pre-existing config and decide whether + // to proceed silently, log a warning, or prompt the operator + // before letting downstream FileDelivery overwrite it. + let desired_config = cfg.render_toml(); + info!("[{tag}] Step 1/7 — checking existing config on device"); + match FileFetcher::fetch_file(topology, "/etc/fleet-agent/config.toml") + .await + .map_err(wrap)? + { + None => { + info!("[{tag}] no existing config — first install on this device"); + } + Some(existing) if existing == desired_config => { + warn!( + "[{tag}] device already has /etc/fleet-agent/config.toml \ + with identical content; converging anyway" + ); + } + Some(existing) => { + warn!( + "[{tag}] device already configured with a DIFFERENT config — \ + proceeding will OVERWRITE it" + ); + warn!("[{tag}] diff (- existing, + desired):"); + let diff = similar::TextDiff::from_lines(existing.as_str(), desired_config.as_str()); + let groups = diff.grouped_ops(2); + for (idx, group) in groups.iter().enumerate() { + if idx > 0 { + warn!("[{tag}] ..."); + } + for op in group { + for change in diff.iter_changes(op) { + let prefix = match change.tag() { + similar::ChangeTag::Equal => " ", + similar::ChangeTag::Delete => "-", + similar::ChangeTag::Insert => "+", + }; + warn!( + "[{tag}] {} {}", + prefix, + change.value().trim_end_matches('\n') + ); + } + } + } + let confirmed = inquire::Confirm::new( + "Device already has /etc/fleet-agent/config.toml with different content.\n \ + Overwrite it and apply the new config?", + ) + .with_default(false) + .prompt() + .map_err(|e| InterpretError::new(format!("User prompt failed: {e}")))?; + if !confirmed { + return Err(InterpretError::new( + "User aborted: refused to overwrite existing config".to_string(), + )); + } + } + } + // 1. Dependencies. - info!("[{tag}] Step 1/6 — ensuring system packages: podman, systemd-container"); + info!("[{tag}] Step 2/7 — ensuring system packages: podman, systemd-container"); for pkg in ["podman", "systemd-container"] { let r = PackageInstaller::ensure_package(topology, pkg) .await @@ -211,7 +272,7 @@ impl Interpret for FleetDeviceSetupInte // // Lingered so the user-systemd instance survives logout — // required for the user podman.socket we enable below. - info!("[{tag}] Step 2/6 — ensuring fleet-agent user with linger"); + info!("[{tag}] Step 3/7 — ensuring fleet-agent user with linger"); let user_spec = UserSpec { name: "fleet-agent".to_string(), group: None, @@ -236,7 +297,7 @@ impl Interpret for FleetDeviceSetupInte // 3. User-scoped podman socket. Required by `PodmanTopology` on // the agent so it reaches /run/user//podman/podman.sock. - info!("[{tag}] Step 3/6 — activating user-scoped podman.socket"); + info!("[{tag}] Step 4/7 — activating user-scoped podman.socket"); let socket_r = SystemdManager::ensure_user_unit_active(topology, "fleet-agent", "podman.socket") .await .map_err(wrap)?; @@ -250,7 +311,7 @@ impl Interpret for FleetDeviceSetupInte // remote file actually differs from the local one — so // re-running this Score without a new binary is a true NOOP. info!( - "[{tag}] Step 4/6 — uploading agent binary {} -> /usr/local/bin/fleet-agent", + "[{tag}] Step 5/7 — uploading agent binary {} -> /usr/local/bin/fleet-agent", cfg.agent_binary_path.display() ); let binary_r = FileDelivery::ensure_file( @@ -271,16 +332,15 @@ impl Interpret for FleetDeviceSetupInte // 5. /etc/fleet-agent/ + config.toml info!( - "[{tag}] Step 5/6 — rendering /etc/fleet-agent/config.toml ({} NATS URL{}, {} label{})", + "[{tag}] Step 6/7 — rendering /etc/fleet-agent/config.toml ({} NATS URL{}, {} label{})", cfg.nats_urls.len(), if cfg.nats_urls.len() == 1 { "" } else { "s" }, cfg.labels.len(), if cfg.labels.len() == 1 { "" } else { "s" }, ); - let config_toml = cfg.render_toml(); let toml_spec = FileSpec { path: "/etc/fleet-agent/config.toml".to_string(), - source: FileSource::Content(config_toml), + source: FileSource::Content(desired_config), owner: Some("fleet-agent".to_string()), group: Some("fleet-agent".to_string()), mode: Some(0o600), @@ -293,7 +353,7 @@ impl Interpret for FleetDeviceSetupInte } // 6. systemd unit for the agent itself. - info!("[{tag}] Step 6/6 — installing fleet-agent.service"); + info!("[{tag}] Step 7/7 — installing fleet-agent.service"); let unit = SystemdUnitSpec { name: "fleet-agent".to_string(), unit_content: cfg.render_systemd_unit().to_string(), -- 2.39.5 From 414fe1cf7b4ab0199a0f9450c1ee9c3a2642a75a Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 13:56:35 -0400 Subject: [PATCH 10/14] feat(secret): add SudoPassword Secret type Sudo password for a Linux bootstrap admin user. Stored under key "SudoPassword" via SecretManager when a host doesn't have passwordless sudo configured. Same shape as the other single-field Secret types in this file. Co-Authored-By: Claude Opus 4.7 (1M context) --- harmony/src/domain/config/secret.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/harmony/src/domain/config/secret.rs b/harmony/src/domain/config/secret.rs index 53d97be9..879c6ca7 100644 --- a/harmony/src/domain/config/secret.rs +++ b/harmony/src/domain/config/secret.rs @@ -25,3 +25,8 @@ pub struct SshKeyPair { pub struct RedhatSecret { pub pull_secret: String, } + +#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug, PartialEq)] +pub struct SudoPassword { + pub password: String, +} -- 2.39.5 From c2b8403f141ce70312a0fdef66df9f38bd60caca Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 13:56:50 -0400 Subject: [PATCH 11/14] feat(linux): support optional sudo_password on SshCredentials Lets callers populate creds.sudo_password when the bootstrap admin doesn't have passwordless sudo. None = current behavior unchanged. Wire-level injection: - ansible runs: when Some, write to a tempfile::NamedTempFile and pass ANSIBLE_BECOME_PASSWORD_FILE= via Command::env. Path in env, never value in argv. File deletes on drop. - direct ssh_exec sudo paths (ensure_linger, ensure_user_unit_active, fetch_file): new sudo_exec helper that uses `sudo -S` with the password piped via the new ssh_exec stdin parameter, otherwise plain sudo. ensure_user_unit_active's && chain folded into one sudo+sh -c call since `sudo -S` only reads stdin once. ssh_executor.rs: ssh_exec gains an optional stdin: Option<&str>; on Some, writes via channel.data() then channel.eof() so the remote reader doesn't hang. Existing 4 call sites pass None. fleet_vm_setup updated to set sudo_password: None (behavior identical). Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/fleet_vm_setup/src/main.rs | 1 + .../src/modules/linux/ansible_configurator.rs | 84 ++++++++++++++++--- harmony/src/modules/linux/ssh_executor.rs | 14 ++++ harmony/src/modules/linux/topology.rs | 5 ++ 4 files changed, 91 insertions(+), 13 deletions(-) diff --git a/examples/fleet_vm_setup/src/main.rs b/examples/fleet_vm_setup/src/main.rs index 2610047f..415f822f 100644 --- a/examples/fleet_vm_setup/src/main.rs +++ b/examples/fleet_vm_setup/src/main.rs @@ -196,6 +196,7 @@ async fn main() -> Result<()> { user: cli.admin_user.clone(), private_key_path: ssh.private_key.clone(), remote_python: Some("/usr/bin/python3".to_string()), + sudo_password: None, }, ); diff --git a/harmony/src/modules/linux/ansible_configurator.rs b/harmony/src/modules/linux/ansible_configurator.rs index 84f46e14..d86ac598 100644 --- a/harmony/src/modules/linux/ansible_configurator.rs +++ b/harmony/src/modules/linux/ansible_configurator.rs @@ -17,12 +17,14 @@ //! [`super::ansible_venv::ensure_ansible_venv`]. The operator does //! *not* need to install `ansible` system-wide. +use std::io::Write; use std::path::{Path, PathBuf}; use std::process::Stdio; use harmony_types::net::IpAddress; use serde::Serialize; use serde_json::{Value, json}; +use tempfile::NamedTempFile; use tokio::process::Command; use crate::domain::topology::{ @@ -31,9 +33,36 @@ use crate::domain::topology::{ use crate::executors::ExecutorError; use super::ansible_venv::ensure_ansible_venv; -use super::ssh_executor::ssh_exec; +use super::ssh_executor::{SshCommandOutput, ssh_exec}; use super::topology::SshCredentials; +/// Run a command via `sudo` on the remote host, automatically choosing +/// between passwordless `sudo` and `sudo -S` (with the password piped +/// via stdin) based on `creds.sudo_password`. `cmd_after_sudo` is the +/// portion of the command line that comes after `sudo` — e.g. for +/// `sudo cat /etc/foo`, pass `"cat /etc/foo"`; for +/// `sudo -u fleet-agent env ... systemctl`, pass +/// `"-u fleet-agent env ... systemctl"`. +/// +/// When chaining multiple privileged operations, fold them into one +/// `sudo` call (e.g. `sudo sh -c 'a && b'`) — `sudo -S` only reads +/// the password once, so two chained sudo calls would block on the +/// second prompt. +async fn sudo_exec( + host: IpAddress, + creds: &SshCredentials, + cmd_after_sudo: &str, +) -> Result { + if let Some(pw) = &creds.sudo_password { + let line = format!("sudo -S {cmd_after_sudo}"); + let stdin = format!("{pw}\n"); + ssh_exec(host, creds, &line, Some(&stdin)).await + } else { + let line = format!("sudo {cmd_after_sudo}"); + ssh_exec(host, creds, &line, None).await + } +} + pub struct AnsibleHostConfigurator; impl AnsibleHostConfigurator { @@ -158,11 +187,11 @@ impl AnsibleHostConfigurator { // sudo because the files we care about (e.g. // /etc/fleet-agent/config.toml) are root- or service-owned // and mode 0600 — readable only via privilege escalation. - let probe = ssh_exec(host, creds, &format!("sudo test -e {path}")).await?; + let probe = sudo_exec(host, creds, &format!("test -e {path}")).await?; if probe.rc != 0 { return Ok(None); } - let read = ssh_exec(host, creds, &format!("sudo cat {path}")) + let read = sudo_exec(host, creds, &format!("cat {path}")) .await? .into_successful()?; Ok(Some(read.stdout)) @@ -277,12 +306,13 @@ impl AnsibleHostConfigurator { host, creds, &format!("test -e /var/lib/systemd/linger/{user}"), + None, ) .await?; if check.rc == 0 { return Ok(ChangeReport::NOOP); } - ssh_exec(host, creds, &format!("sudo loginctl enable-linger {user}")) + sudo_exec(host, creds, &format!("loginctl enable-linger {user}")) .await? .into_successful()?; Ok(ChangeReport::CHANGED) @@ -300,22 +330,27 @@ impl AnsibleHostConfigurator { // level `environment:` keyword only available in playbooks. // Rather than pipe a one-task playbook, use russh directly: // a probe + (optionally) an enable. - let id_out = ssh_exec(host, creds, &format!("id -u {user}")) + let id_out = ssh_exec(host, creds, &format!("id -u {user}"), None) .await? .into_successful()?; let uid = id_out.stdout.trim(); - let env_prefix = format!("sudo -u {user} env XDG_RUNTIME_DIR=/run/user/{uid}"); + // Everything under sudo -u with the user's + // XDG_RUNTIME_DIR. We fold both the probe (is-enabled + + // is-active chained) and the enable-now into a single sudo + // call each, because `sudo -S` only reads the password once + // — chaining two sudos would block on the second prompt. + let env_prefix = format!("-u {user} env XDG_RUNTIME_DIR=/run/user/{uid}"); // Already enabled and active → NOOP. `is-enabled --quiet` // and `is-active --quiet` exit 0 only when the unit is // both wanted and running, which is exactly the // post-condition `enable --now` establishes. - let probe = ssh_exec( + let probe = sudo_exec( host, creds, &format!( - "{env_prefix} systemctl --user is-enabled --quiet {unit} && \ - {env_prefix} systemctl --user is-active --quiet {unit}" + "{env_prefix} sh -c 'systemctl --user is-enabled --quiet {unit} \ + && systemctl --user is-active --quiet {unit}'" ), ) .await?; @@ -324,7 +359,7 @@ impl AnsibleHostConfigurator { } let cmd = format!("{env_prefix} systemctl --user enable --now {unit}"); - ssh_exec(host, creds, &cmd).await?.into_successful()?; + sudo_exec(host, creds, &cmd).await?.into_successful()?; Ok(ChangeReport::CHANGED) } @@ -412,8 +447,25 @@ impl AnsibleHostConfigurator { argv.push(format!("ansible_python_interpreter={py}")); } - let output = Command::new(&bins.ansible) - .args(&argv) + // When a sudo password is configured, write it to a 0600 + // tempfile and point ansible at it via ANSIBLE_BECOME_PASSWORD_FILE. + // The path goes in the env, not the value — `ps` on the driver + // sees a path, never the secret. The file is bound to a + // local that drops (and unlinks) at end of scope. + let pw_file: Option = if let Some(pw) = &creds.sudo_password { + let mut f = NamedTempFile::new() + .map_err(|e| exec(format!("create become-password tempfile: {e}")))?; + f.write_all(pw.as_bytes()) + .map_err(|e| exec(format!("write become-password tempfile: {e}")))?; + f.flush() + .map_err(|e| exec(format!("flush become-password tempfile: {e}")))?; + Some(f) + } else { + None + }; + + let mut cmd = Command::new(&bins.ansible); + cmd.args(&argv) // Ad-hoc mode ignores ANSIBLE_STDOUT_CALLBACK unless // ANSIBLE_LOAD_CALLBACK_PLUGINS is also set. .env("ANSIBLE_LOAD_CALLBACK_PLUGINS", "True") @@ -426,10 +478,16 @@ impl AnsibleHostConfigurator { .env("ANSIBLE_PIPELINING", "True") .env("ANSIBLE_SSH_CONTROL_PATH_DIR", control_path_dir()) .stdout(Stdio::piped()) - .stderr(Stdio::piped()) + .stderr(Stdio::piped()); + if let Some(f) = &pw_file { + cmd.env("ANSIBLE_BECOME_PASSWORD_FILE", f.path()); + } + let output = cmd .output() .await .map_err(|e| exec(format!("spawn ansible: {e}")))?; + // pw_file dropped here → tempfile deleted. + drop(pw_file); let stdout = String::from_utf8_lossy(&output.stdout); let stderr = String::from_utf8_lossy(&output.stderr); diff --git a/harmony/src/modules/linux/ssh_executor.rs b/harmony/src/modules/linux/ssh_executor.rs index c20a6243..53775291 100644 --- a/harmony/src/modules/linux/ssh_executor.rs +++ b/harmony/src/modules/linux/ssh_executor.rs @@ -54,6 +54,7 @@ pub async fn ssh_exec( host: IpAddress, creds: &SshCredentials, command_line: &str, + stdin: Option<&str>, ) -> Result { let key_pair = load_secret_key(&creds.private_key_path, None).map_err(|e| { ExecutorError::AuthenticationError(format!( @@ -90,6 +91,19 @@ pub async fn ssh_exec( .await .map_err(|e| ExecutorError::NetworkError(format!("ssh exec: {e}")))?; + if let Some(input) = stdin { + channel + .data(input.as_bytes()) + .await + .map_err(|e| ExecutorError::NetworkError(format!("ssh stdin write: {e}")))?; + // Signal EOF so the remote process's read on stdin returns, + // letting it proceed instead of hanging waiting for more. + channel + .eof() + .await + .map_err(|e| ExecutorError::NetworkError(format!("ssh stdin eof: {e}")))?; + } + let mut stdout = Vec::new(); let mut stderr = Vec::new(); let mut rc: Option = None; diff --git a/harmony/src/modules/linux/topology.rs b/harmony/src/modules/linux/topology.rs index 89a7566d..6ee37c79 100644 --- a/harmony/src/modules/linux/topology.rs +++ b/harmony/src/modules/linux/topology.rs @@ -45,6 +45,11 @@ pub struct SshCredentials { /// wrong-interpreter traps). For Ubuntu-based cloud images `Some("/ /// usr/bin/python3")` is a safe, fast default. pub remote_python: Option, + /// Sudo password for `user`. None = expect passwordless sudo on the + /// host (the historical assumption). Skipped from serde so it never + /// lands in cached/checkpointed inventories. + #[serde(skip)] + pub sudo_password: Option, } impl LinuxHostTopology { -- 2.39.5 From be9073e461f830e520da523a90e839bd56dfb485 Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 13:57:00 -0400 Subject: [PATCH 12/14] feat(examples/fleet_rpi_setup): auto-fetch sudo password via SecretManager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Probe `sudo -n true` over SSH before constructing the topology. If the probe succeeds (passwordless sudo, the typical rpi-imager default), proceed silently. If it fails, fetch the password through SecretManager::get_or_prompt::() — first run prompts the operator, subsequent runs reuse the cached value (same flow SshKeyPair etc. use). Adds harmony_secret dep, env.sh with the standard HARMONY_SECRET_NAMESPACE / HARMONY_SECRET_STORE / HARMONY_DATABASE_URL / RUST_LOG variables, and a doc snippet at the top of main.rs pointing at it. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 1 + examples/fleet_rpi_setup/Cargo.toml | 1 + examples/fleet_rpi_setup/env.sh | 4 +++ examples/fleet_rpi_setup/src/main.rs | 54 +++++++++++++++++++++------- 4 files changed, 47 insertions(+), 13 deletions(-) create mode 100644 examples/fleet_rpi_setup/env.sh diff --git a/Cargo.lock b/Cargo.lock index cab1fed6..d2641945 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3202,6 +3202,7 @@ dependencies = [ "clap", "harmony", "harmony_cli", + "harmony_secret", "harmony_types", "log", "tokio", diff --git a/examples/fleet_rpi_setup/Cargo.toml b/examples/fleet_rpi_setup/Cargo.toml index 980e37c2..2a01a8a5 100644 --- a/examples/fleet_rpi_setup/Cargo.toml +++ b/examples/fleet_rpi_setup/Cargo.toml @@ -11,6 +11,7 @@ path = "src/main.rs" [dependencies] harmony = { path = "../../harmony" } harmony_cli = { path = "../../harmony_cli" } +harmony_secret = { path = "../../harmony_secret" } harmony_types = { path = "../../harmony_types" } tokio.workspace = true log.workspace = true diff --git a/examples/fleet_rpi_setup/env.sh b/examples/fleet_rpi_setup/env.sh new file mode 100644 index 00000000..e57741f9 --- /dev/null +++ b/examples/fleet_rpi_setup/env.sh @@ -0,0 +1,4 @@ +export HARMONY_SECRET_NAMESPACE=fleet-rpi-setup +export HARMONY_SECRET_STORE=file +export HARMONY_DATABASE_URL=sqlite://harmony_fleet_rpi_setup.sqlite +export RUST_LOG=info diff --git a/examples/fleet_rpi_setup/src/main.rs b/examples/fleet_rpi_setup/src/main.rs index 62a49769..0417eae8 100644 --- a/examples/fleet_rpi_setup/src/main.rs +++ b/examples/fleet_rpi_setup/src/main.rs @@ -7,14 +7,23 @@ //! systemd-container, creates the `fleet-agent` user, drops the agent //! binary + config + systemd unit, and starts the service. //! +//! Source `env.sh` first (sets `HARMONY_SECRET_NAMESPACE`, +//! `HARMONY_SECRET_STORE`, `HARMONY_DATABASE_URL`, `RUST_LOG`), then: +//! +//! ```bash +//! source examples/fleet_rpi_setup/env.sh +//! cargo run -p example_fleet_rpi_setup -- --pi-host ... +//! ``` +//! //! Output rendering (per-step traces and the final recap) is handled //! by `harmony_cli::run` — same as every other harmony example. The //! score's `Outcome.details` is structured for that path. //! //! Prereqs on the Pi (one-time, via rpi-imager or manual): //! - SSH server enabled -//! - An admin user with passwordless sudo (e.g. the default `pi` user -//! or whatever you set in rpi-imager) +//! - An admin user with sudo. Passwordless sudo is detected and +//! used silently; otherwise the example prompts for a sudo +//! password via `SecretManager` and caches it for next runs. //! - Your driver-machine SSH public key in that user's //! `~/.ssh/authorized_keys` //! @@ -24,10 +33,13 @@ use anyhow::{Context, Result}; use clap::Parser; +use harmony::config::secret::SudoPassword; use harmony::inventory::Inventory; use harmony::modules::fleet::{FleetDeviceSetupConfig, FleetDeviceSetupScore}; -use harmony::modules::linux::{LinuxHostTopology, SshCredentials, ensure_ansible_venv}; +use harmony::modules::linux::{LinuxHostTopology, SshCredentials, ensure_ansible_venv, ssh_exec}; +use harmony_secret::SecretManager; use harmony_types::id::Id; +use log::info; use std::path::PathBuf; #[derive(Parser, Debug)] @@ -87,16 +99,32 @@ async fn main() -> Result<()> { .parse() .with_context(|| format!("--pi-host '{}' is not a valid IP address", cli.pi_host))?; - let topology = LinuxHostTopology::new( - format!("rpi-{}", cli.pi_host), - pi_ip, - SshCredentials { - user: cli.pi_user.clone(), - private_key_path: ssh_key, - // Pi OS Lite ships /usr/bin/python3 — skip auto-discovery. - remote_python: Some("/usr/bin/python3".to_string()), - }, - ); + let mut creds = SshCredentials { + user: cli.pi_user.clone(), + private_key_path: ssh_key, + // Pi OS Lite ships /usr/bin/python3 — skip auto-discovery. + remote_python: Some("/usr/bin/python3".to_string()), + sudo_password: None, + }; + + // If the Pi doesn't have passwordless sudo, fetch the password + // through SecretManager (same flow other scores use for SSH keys + // etc. — see harmony_secret/src/lib.rs:145). First run prompts; + // subsequent runs reuse the cached value. Probe with `sudo -n` + // first so we don't prompt the operator for a password they + // don't need. + let probe = ssh_exec(pi_ip, &creds, "sudo -n true", None) + .await + .map_err(|e| anyhow::anyhow!("sudo probe: {e}"))?; + if probe.rc != 0 { + info!("device requires a sudo password — fetching from secret store"); + let secret = SecretManager::get_or_prompt::() + .await + .map_err(|e| anyhow::anyhow!("get sudo password: {e}"))?; + creds.sudo_password = Some(secret.password); + } + + let topology = LinuxHostTopology::new(format!("rpi-{}", cli.pi_host), pi_ip, creds); let labels = parse_labels(&cli.labels)?; let score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig { -- 2.39.5 From 34e2f832ec628f9db249d776834dd55d9bc153af Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 14:09:38 -0400 Subject: [PATCH 13/14] docs(linux): clarify sudo_password scope, TODO for SSH password auth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new sudo_password field is strictly for privilege escalation on the remote host (sudo -S, ansible become) — not for SSH login. SSH auth is still key-only. Adds a TODO on SshCredentials pointing at where SSH password support would land if/when we want it, and a matching note on the SudoPassword Secret type. Co-Authored-By: Claude Opus 4.7 (1M context) --- harmony/src/domain/config/secret.rs | 7 +++++++ harmony/src/modules/linux/topology.rs | 24 +++++++++++++++++++----- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/harmony/src/domain/config/secret.rs b/harmony/src/domain/config/secret.rs index 879c6ca7..44037d2e 100644 --- a/harmony/src/domain/config/secret.rs +++ b/harmony/src/domain/config/secret.rs @@ -26,6 +26,13 @@ pub struct RedhatSecret { pub pull_secret: String, } +/// Password used for **sudo escalation on a remote Linux host** — fed +/// to `sudo -S` (direct SSH calls) or to ansible's become-pass flow +/// when the bootstrap admin doesn't have passwordless sudo configured. +/// +/// Not used for SSH login itself — `LinuxHostTopology` still requires +/// a `private_key_path` for that. SSH password auth is a possible +/// future extension; see the TODO on `SshCredentials`. #[derive(Secret, Serialize, Deserialize, JsonSchema, Debug, PartialEq)] pub struct SudoPassword { pub password: String, diff --git a/harmony/src/modules/linux/topology.rs b/harmony/src/modules/linux/topology.rs index 6ee37c79..944dabaa 100644 --- a/harmony/src/modules/linux/topology.rs +++ b/harmony/src/modules/linux/topology.rs @@ -34,8 +34,17 @@ pub struct LinuxHostTopology { } /// SSH credentials for reaching the host. Key-based only — password auth -/// is not supported here because the bootstrap story assumes a cloud-init -/// / rpi-imager / PXE step that preloads a public key. +/// for the SSH login itself is not supported here because the bootstrap +/// story assumes a cloud-init / rpi-imager / PXE step that preloads a +/// public key. +/// +/// TODO: support SSH password auth as well — would mean making +/// `private_key_path` optional (or turning this into an enum +/// `Key { ... } | Password { ... }`), threading a password into both +/// `russh::authenticate_password` (`ssh_executor.rs`) and ansible's +/// `ANSIBLE_PASSWORD_FILE` / `-e ansible_ssh_pass=...` plumbing +/// (`ansible_configurator.rs`). Today's `sudo_password` field is +/// strictly for *sudo escalation on the remote*, not SSH login. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SshCredentials { pub user: String, @@ -45,9 +54,14 @@ pub struct SshCredentials { /// wrong-interpreter traps). For Ubuntu-based cloud images `Some("/ /// usr/bin/python3")` is a safe, fast default. pub remote_python: Option, - /// Sudo password for `user`. None = expect passwordless sudo on the - /// host (the historical assumption). Skipped from serde so it never - /// lands in cached/checkpointed inventories. + /// Sudo password for `user`, used for **privilege escalation on the + /// remote host only** — fed to `sudo -S` for direct ssh_exec calls + /// and to ansible's become-pass machinery. **Not** used for SSH + /// login; that still requires `private_key_path`. + /// + /// `None` = expect passwordless sudo on the host (the historical + /// assumption). Skipped from serde so it never lands in + /// cached/checkpointed inventories. #[serde(skip)] pub sudo_password: Option, } -- 2.39.5 From b86f8f11f953d0f203772355953012e903c318da Mon Sep 17 00:00:00 2001 From: Sylvain Tremblay Date: Fri, 1 May 2026 14:51:03 -0400 Subject: [PATCH 14/14] feat: add little script to call the fleet_rpi_setup example --- fleet/scripts/setup-rpi.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100755 fleet/scripts/setup-rpi.sh diff --git a/fleet/scripts/setup-rpi.sh b/fleet/scripts/setup-rpi.sh new file mode 100755 index 00000000..b62d6390 --- /dev/null +++ b/fleet/scripts/setup-rpi.sh @@ -0,0 +1,11 @@ +cargo run -p example_fleet_rpi_setup -- \ + --pi-host 192.168.2.19 \ + --pi-user admin \ + --ssh-key ~/.ssh/id_rsa \ + --device-id paul \ + --labels group=site-a,arch=aarch64 \ + --agent-binary target/aarch64-unknown-linux-gnu/release/harmony-fleet-agent \ + --nats-url nats://192.168.2.87:4222 \ + --nats-user "" \ + --nats-pass "" + -- 2.39.5