2026-04-21 19:04:53 +00:00 · 2026-04-21 18:46:38 +00:00 · 2026-04-21 18:47:05 +00:00
28 changed files with 2961 additions and 922 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3173,6 +3173,7 @@ dependencies = [
 "clap",
 "env_logger",
 "harmony",
+ "harmony_types",
 "log",
 "tokio",
 ]
@@ -6519,10 +6520,12 @@ dependencies = [
 "system-configuration",
 "tokio",
 "tokio-rustls 0.24.1",
+ "tokio-util",
 "tower-service",
 "url",
 "wasm-bindgen",
 "wasm-bindgen-futures",
+ "wasm-streams",
 "web-sys",
 "webpki-roots 0.25.4",
 "winreg",
--- a/ROADMAP/iot_platform/arm_vm_plan.md
+++ b/ROADMAP/iot_platform/arm_vm_plan.md
@@ -0,0 +1,207 @@
+# aarch64 VM support — plan
+
+## Why
+
+The v0 walking skeleton's whole point is validating the IoT agent
+against the *actual* distribution, arch, and package set the end-
+customer's Pi 5 devices run on (ROADMAP §1). Everything green so far
+runs the agent against an x86_64 Ubuntu cloud image with an x86_64
+Rust binary — which proves the code path works but not that the ARM
+target works. Every passing smoke-a3 run today is evidence that the
+wrong thing works.
+
+This plan adds arm64 emulation on x86_64 hosts (no hardware needed
+for CI) so:
+
+- the VM runs the same Ubuntu 24.04 arm64 cloud image customers will
+  eventually flash onto a Pi;
+- the iot-agent shipped to it is a real aarch64 binary produced by
+  our existing cross-compile toolchain;
+- apt/systemd/podman on the VM are the actual arm64 packages; and
+- smoke-a3 exercises all of it end-to-end.
+
+Acceptable cost: emulated boot is 5-15× slower than KVM-accelerated
+boot. That's the price of the target-arch validation.
+
+## Shape of the change
+
+Additive, type-safe, default-preserving. Existing callers of
+`VirtualMachineSpec` keep working unchanged; arm64 is opt-in via a
+new field.
+
+### 1. Architecture enum on the VM spec
+
+Introduce `VmArchitecture` in `harmony/src/domain/topology/
+virtualization.rs`:
+
+```rust
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub enum VmArchitecture {
+    #[default]
+    X86_64,
+    Aarch64,
+}
+```
+
+Add `pub architecture: VmArchitecture` to `VirtualMachineSpec`. With
+`#[derive(Default)]` + `VmArchitecture::X86_64` as default, every
+existing call site that uses struct init continues to compile. New
+constructor: `VirtualMachineSpec::new_aarch64(name)` for clarity.
+
+Same treatment on `VmConfig` in `modules/kvm/types.rs` — add a
+`pub architecture: VmArchitecture` field with `Default` impl.
+
+### 2. Libvirt XML parameterization
+
+Rewrite `modules/kvm/xml.rs::domain_xml` to branch on arch. What
+changes per-arch (the QEMU flags you gave as reference map directly
+to libvirt XML):
+
+| QEMU flag                    | libvirt XML                                                              | x86_64                 | aarch64                                |
+|------------------------------|--------------------------------------------------------------------------|------------------------|----------------------------------------|
+| `-accel kvm` vs `-accel tcg` | `<domain type='…'>`                                                      | `kvm`                  | `qemu`                                 |
+| `-M virt` / `-M q35`         | `<os><type machine='…'>`                                                 | `q35`                  | `virt`                                 |
+| arch                         | `<os><type arch='…'>`                                                    | `x86_64`               | `aarch64`                              |
+| emulator binary              | `<emulator>…</emulator>`                                                 | `/usr/bin/qemu-system-x86_64` | `/usr/bin/qemu-system-aarch64`  |
+| `-cpu max,pauth-impdef=on`   | `<cpu mode='custom'><model>max</model><feature …/></cpu>`                | `host-model` (current) | `max` + `pauth-impdef`                 |
+| `-bios QEMU_EFI.fd`          | `<os><loader readonly='yes' type='pflash'>…</loader><nvram>…</nvram></os>` | — (BIOS)             | AAVMF CODE + VARS pflash pair          |
+| `-accel tcg,thread=multi`    | MTTCG is default-on when `type='qemu'` + QEMU ≥ 9.1                      | n/a                    | implicit                               |
+
+**Type safety**: introduce a `DomainXmlParams` struct that captures
+the arch-specific knobs (domain_type, arch, machine, emulator path,
+cpu mode, firmware) and derives from `VmArchitecture`. The top-level
+`domain_xml` then consumes a fully-resolved `DomainXmlParams` rather
+than branching with `if arch == X86_64` strings.
+
+### 3. UEFI firmware discovery
+
+aarch64 guests boot via UEFI, not BIOS. libvirt needs two files:
+- `AAVMF_CODE.fd` — the firmware code (read-only, shared)
+- `AAVMF_VARS.fd` — per-VM NVRAM (writable, per-domain copy)
+
+Common paths across distros:
+
+| Distro         | CODE                                                | VARS (template)                              |
+|----------------|-----------------------------------------------------|----------------------------------------------|
+| Arch           | `/usr/share/edk2/aarch64/QEMU_CODE.fd`              | `/usr/share/edk2/aarch64/QEMU_VARS.fd`       |
+| Debian/Ubuntu  | `/usr/share/AAVMF/AAVMF_CODE.fd`                    | `/usr/share/AAVMF/AAVMF_VARS.fd`             |
+| Fedora         | `/usr/share/edk2/aarch64/QEMU_EFI-pflash.raw`       | `/usr/share/edk2/aarch64/vars-template-pflash.raw` |
+
+New module `harmony/src/modules/kvm/firmware.rs`:
+- `pub fn discover_aarch64_firmware() -> Result<AarchFirmware, KvmError>`
+  walks a small known-paths list and returns the first viable pair.
+  Returns a typed `AarchFirmware { code: PathBuf, vars_template: PathBuf }`.
+- Per-VM NVRAM copy is handled in `KvmVirtualMachineHost`: at
+  `ensure_vm` time, copy `vars_template` into
+  `$pool/<vm_name>-VARS.fd` and reference it in the domain XML.
+
+### 4. Cloud image for arm64
+
+Add to `modules/iot/assets.rs`:
+
+```rust
+pub const UBUNTU_2404_CLOUDIMG_ARM64_URL: &str =
+    "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img";
+pub const UBUNTU_2404_CLOUDIMG_ARM64_SHA256: &str = "<pinned>";
+
+pub async fn ensure_ubuntu_2404_cloud_image_for_arch(
+    arch: VmArchitecture,
+) -> Result<PathBuf, ExecutorError>;
+```
+
+The existing `ensure_ubuntu_2404_cloud_image()` becomes a thin
+wrapper that calls the arch-aware fn with `X86_64`, preserving all
+callers. SHA256 gets pinned against the live Ubuntu arm64 image at
+commit time.
+
+### 5. Preflight additions
+
+In `modules/iot/preflight.rs`, when the caller asks for arm64 VMs
+(new `check_iot_smoke_preflight_for_arch(VmArchitecture)` wrapper):
+- verify `qemu-system-aarch64` is on PATH;
+- verify the aarch64 firmware pair exists (reuse the discovery fn);
+- verify QEMU version ≥ 9.1 (MTTCG is a real perf multiplier — a
+  warning, not a hard block, if the host is older).
+
+### 6. Cross-compiled agent
+
+smoke-a3.sh phase 2 currently does native `cargo build --release
+-p iot-agent-v0`. When arch=aarch64:
+- `cargo build --release --target aarch64-unknown-linux-gnu
+  -p iot-agent-v0`
+- AGENT_BINARY points at `target/aarch64-unknown-linux-gnu/release/
+  iot-agent-v0`
+
+Opt-in via `--arch aarch64` CLI flag on both
+`example_iot_vm_setup` and `smoke-a3.sh`. Default stays x86_64.
+
+### 7. Timeout bumps
+
+First-boot cloud-init on emulated aarch64 takes 3-6× longer than
+KVM-accel x86_64. Bump `wait_for_ip` timeout from 300s → 900s when
+arch=aarch64. Smoke-a3's phase 5 reboot gate also lengthens.
+
+## Files to touch
+
+| File                                           | Change                                                                    |
+|------------------------------------------------|---------------------------------------------------------------------------|
+| `harmony/src/domain/topology/virtualization.rs`| Add `VmArchitecture`, field on `VirtualMachineSpec`, constructor helper.  |
+| `harmony/src/modules/kvm/types.rs`             | Add `architecture` field on `VmConfig`, `VmConfigBuilder` setter.         |
+| `harmony/src/modules/kvm/xml.rs`               | Rewrite `domain_xml` to take `DomainXmlParams` resolved from arch.        |
+| `harmony/src/modules/kvm/firmware.rs` (new)    | Discovery of AAVMF code+vars paths; `AarchFirmware` struct.               |
+| `harmony/src/modules/kvm/topology.rs`          | Copy per-VM NVRAM template on ensure_vm; thread arch through to XML.     |
+| `harmony/src/modules/iot/assets.rs`            | `ensure_ubuntu_2404_cloud_image_for_arch(arch)`; pin arm64 URL+sha256.    |
+| `harmony/src/modules/iot/preflight.rs`         | Arch-aware preflight; qemu-system-aarch64 + firmware + qemu-version.      |
+| `examples/iot_vm_setup/src/main.rs`            | `--arch x86_64|aarch64` CLI flag; resolve matching cloud image.           |
+| `iot/scripts/smoke-a3.sh`                      | Arch flag plumbing; cross-compile; extended timeouts; preflight.          |
+| `iot/scripts/smoke-a3-arm.sh` (new)            | Dedicated arm smoke as the CI hook — `ARCH=aarch64 ./smoke-a3.sh`.        |
+
+## Out of scope
+
+- Migrating OPNsense + other KVM examples to `VirtualMachineHost` /
+  `ProvisionVmScore` — real inconsistency in the codebase but a
+  separate refactor, orthogonal to the ARM work. Filing as follow-up.
+- KVM-accelerated aarch64-on-aarch64 (e.g. running on an ampere
+  runner). Emulation covers the x86 CI story; native aarch64
+  runners would use `<domain type='kvm'>` and no MTTCG flags, which
+  the arch enum + existing x86_64 XML path already model — so this
+  is effectively free when we get there.
+- Supporting multiple simultaneous guest arches on one host in the
+  same smoke run. Single-arch-per-run keeps everything simple.
+- Pinning AAVMF firmware like we pin the cloud image. Firmware is
+  distro-package-managed; pin when we hit a regression.
+
+## Commit plan (in order)
+
+1. **`VmArchitecture` domain type + `VirtualMachineSpec.architecture`
+   field** — tiny, just the enum and struct field; no behaviour
+   change (all callers get `X86_64` via `Default`).
+
+2. **XML parameterization via `DomainXmlParams`** — rewrite
+   `domain_xml` to be arch-driven. Tests under
+   `harmony/src/modules/kvm/xml.rs` get an arm64 variant.
+
+3. **AAVMF firmware discovery + per-VM NVRAM copy** —
+   `firmware.rs` + the copy in `topology.rs::ensure_vm`.
+
+4. **arm64 cloud image asset + preflight** —
+   `ensure_ubuntu_2404_cloud_image_for_arch(arch)` plus preflight
+   extensions. SHA256 pinned at commit time via a one-off
+   `curl | sha256sum`.
+
+5. **Example + smoke script plumbing** — `--arch` flag,
+   cross-compile, timeout bumps, `smoke-a3-arm.sh` wrapper.
+
+6. **End-to-end verification** — run `smoke-a3-arm.sh` from a
+   fresh `$HARMONY_DATA_DIR/iot/` and confirm the aarch64 agent
+   boots, joins NATS, and survives a power-cycle. Document timing
+   in the commit message.
+
+## Verification
+
+- `cargo check --all-targets --features kvm`: clean.
+- `cargo clippy --no-deps -- -D warnings` on touched files: clean.
+- `cargo fmt --check`: clean.
+- aarch64 cross-compile of harmony + iot crates: still green.
+- Fresh-cache arm64 smoke-a3: PASS, timing documented.
+- Existing x86_64 smoke-a3: still PASS (regression guard).
--- a/examples/iot_vm_setup/Cargo.toml
+++ b/examples/iot_vm_setup/Cargo.toml
@@ -10,6 +10,7 @@ path = "src/main.rs"

 [dependencies]
 harmony = { path = "../../harmony", features = ["kvm"] }
+harmony_types = { path = "../../harmony_types" }
 tokio.workspace = true
 log.workspace = true
 env_logger.workspace = true
--- a/examples/iot_vm_setup/src/main.rs
+++ b/examples/iot_vm_setup/src/main.rs
@@ -1,24 +1,44 @@
 //! End-to-end driver for the IoT walking-skeleton VM-as-device flow.
 //!
-//! Runs two scores back-to-back:
-//!   1. `KvmVmScore` — spin up a libvirt VM from an Ubuntu 24.04 cloud
-//!      image with a generated cloud-init seed authorizing one SSH key.
-//!   2. `IotDeviceSetupScore` — SSH into the booted VM (via Ansible)
-//!      and install podman + the iot-agent.
-//!
-//! After this runs, the VM is a member of the IoT fleet just like the
-//! localhost-based smoke test's agent was. Apply a Deployment CR against
-//! the same NATS and the VM's agent will pull it + run the container.
-
-use std::path::PathBuf;
+//! Runs two Scores back-to-back:
+//!   1. `ProvisionVmScore` — bound to the generic `VirtualMachineHost`
+//!      capability. Here we satisfy it with `KvmVirtualMachineHost`
+//!      (libvirt). Swapping to VMware/Proxmox/cloud would be a
+//!      different topology injection with the same Score code.
+//!   2. `IotDeviceSetupScore` — SSHes into the booted VM and installs
+//!      podman + iot-agent via the split Linux-host capabilities.

 use anyhow::{Context, Result};
 use clap::Parser;
 use harmony::inventory::Inventory;
-use harmony::modules::iot::{IotDeviceSetupConfig, IotDeviceSetupScore};
+use harmony::modules::iot::{
+    IotDeviceSetupConfig, IotDeviceSetupScore, ProvisionVmScore,
+    check_iot_smoke_preflight_for_arch, ensure_iot_ssh_keypair,
+};
+use harmony::modules::kvm::KvmVirtualMachineHost;
 use harmony::modules::kvm::config::init_executor;
-use harmony::modules::kvm::{CloudInitVmConfig, KvmHostTopology, KvmVmScore};
 use harmony::modules::linux::{LinuxHostTopology, SshCredentials};
+use harmony::topology::{VirtualMachineSpec, VmArchitecture, VmFirstBootConfig};
+use harmony_types::id::Id;
+use std::path::PathBuf;
+
+#[derive(Parser, Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
+enum CliArch {
+    /// Native KVM on x86_64 hosts.
+    X86_64,
+    /// Aarch64 guest. Runs on native KVM on arm64 hosts and under
+    /// qemu-system-aarch64 TCG emulation on x86_64 hosts (slower).
+    Aarch64,
+}
+
+impl From<CliArch> for VmArchitecture {
+    fn from(a: CliArch) -> Self {
+        match a {
+            CliArch::X86_64 => VmArchitecture::X86_64,
+            CliArch::Aarch64 => VmArchitecture::Aarch64,
+        }
+    }
+}

 #[derive(Parser, Debug)]
 #[command(
@@ -26,39 +46,28 @@ use harmony::modules::linux::{LinuxHostTopology, SshCredentials};
    about = "Provision one VM + onboard it into the IoT fleet"
 )]
 struct Cli {
+    /// Guest CPU architecture. Selects the cloud image, qemu
+    /// emulator, and firmware model.
+    #[arg(long, value_enum, default_value_t = CliArch::X86_64)]
+    arch: CliArch,
    /// libvirt domain name for the VM.
    #[arg(long, default_value = "iot-vm-01")]
    vm_name: String,
-    /// Device id the agent will announce to NATS.
-    #[arg(long, default_value = "iot-vm-01")]
-    device_id: String,
+    /// Device id the agent will announce to NATS. Defaults to a
+    /// fresh `Id` (hex timestamp + random suffix).
+    #[arg(long)]
+    device_id: Option<String>,
    /// Fleet group label to write into the agent's TOML config.
    #[arg(long, default_value = "group-a")]
    group: String,
-    /// libvirt network name to attach the VM to. `default` is the
-    /// libvirt-shipped NAT bridge.
+    /// libvirt network name to attach the VM to.
    #[arg(long, default_value = "default")]
    network: String,
-    /// Path to a pre-downloaded Ubuntu 24.04 cloud image (qcow2).
-    /// Required unless `--bootstrap-ansible-only` is set.
-    #[arg(long)]
-    base_image: Option<PathBuf>,
-    /// Path to an SSH public key to authorize on the VM.
-    /// Required unless `--bootstrap-ansible-only` is set.
-    #[arg(long)]
-    ssh_pubkey: Option<PathBuf>,
-    /// Path to the matching SSH private key.
-    /// Required unless `--bootstrap-ansible-only` is set.
-    #[arg(long)]
-    ssh_privkey: Option<PathBuf>,
-    /// Admin username the VM's cloud-init will create.
+    /// Admin username created on first boot.
    #[arg(long, default_value = "iot-admin")]
    admin_user: String,
-    /// Directory for cloud-init seed ISOs.
-    #[arg(long, default_value = "/var/tmp/iot-vm-setup")]
-    work_dir: PathBuf,
-    /// Path to the cross-compiled iot-agent binary to upload to the VM.
-    /// Required unless `--bootstrap-ansible-only` or `--only-vm` are set.
+    /// Path to the cross-compiled iot-agent binary.
+    /// Required unless `--bootstrap-only` is set.
    #[arg(long)]
    agent_binary: Option<PathBuf>,
    /// NATS URL the agent should connect to.
@@ -68,95 +77,110 @@ struct Cli {
    nats_user: String,
    #[arg(long, default_value = "smoke")]
    nats_pass: String,
-    /// Only run the VM-provisioning step; skip device setup. Useful when
-    /// iterating on the KvmVmScore piece.
+    /// Only run the VM-provisioning step; skip device setup.
    #[arg(long)]
    only_vm: bool,
-    /// Ensure the managed Ansible venv exists at $HARMONY_DATA_DIR/
-    /// ansible-venv and exit. Skips the VM-provisioning and device-
-    /// setup steps entirely. Useful as a first-run warmup so the real
-    /// smoke test isn't slowed by the one-time pip install.
+    /// Run preflight + asset bootstrap (ansible venv, cloud image,
+    /// SSH key, libvirt pool) and exit.
    #[arg(long)]
-    bootstrap_ansible_only: bool,
+    bootstrap_only: bool,
 }

 #[tokio::main]
 async fn main() -> Result<()> {
    env_logger::init();
    let cli = Cli::parse();
+    let arch: VmArchitecture = cli.arch.into();

-    // Shortcut: warm the managed Ansible venv and exit.
-    if cli.bootstrap_ansible_only {
-        let bins = harmony::modules::linux::ensure_ansible_venv()
+    check_iot_smoke_preflight_for_arch(arch)
        .await
        .map_err(|e| anyhow::anyhow!("{e}"))?;
-        let out = tokio::process::Command::new(&bins.ansible)
-            .arg("--version")
-            .output()
-            .await?;
-        anyhow::ensure!(
-            out.status.success(),
-            "ansible --version failed after bootstrap"
-        );
-        print!("{}", String::from_utf8_lossy(&out.stdout));
+
+    if cli.bootstrap_only {
+        harmony::modules::linux::ensure_ansible_venv()
+            .await
+            .map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?;
+        harmony::modules::iot::ensure_ubuntu_2404_cloud_image_for_arch(arch)
+            .await
+            .map_err(|e| anyhow::anyhow!("cloud image: {e}"))?;
+        ensure_iot_ssh_keypair()
+            .await
+            .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
+        harmony::modules::iot::ensure_harmony_iot_pool()
+            .await
+            .map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
+        println!("bootstrap complete");
        return Ok(());
    }

    // --- Step 1: provision the VM ---
+    let base_image = harmony::modules::iot::ensure_ubuntu_2404_cloud_image_for_arch(arch)
+        .await
+        .map_err(|e| anyhow::anyhow!("cloud image: {e}"))?;
+    let pool = harmony::modules::iot::ensure_harmony_iot_pool()
+        .await
+        .map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
+    let ssh = ensure_iot_ssh_keypair()
+        .await
+        .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
+    let authorized_key = harmony::modules::iot::read_public_key(&ssh)
+        .await
+        .map_err(|e| anyhow::anyhow!("read ssh pubkey: {e}"))?;
+
    let executor = init_executor().map_err(|e| anyhow::anyhow!("KVM init: {e}"))?;
-    let kvm_topology = KvmHostTopology::new("kvm-local", executor);
+    let vm_host = KvmVirtualMachineHost::new(
+        "kvm-local",
+        executor,
+        pool.name.clone(),
+        pool.path.clone(),
+        base_image,
+    );

-    let base_image = cli.base_image.clone().context("--base-image is required")?;
-    let ssh_pubkey = cli.ssh_pubkey.clone().context("--ssh-pubkey is required")?;
-
-    let authorized_key = std::fs::read_to_string(&ssh_pubkey)
-        .with_context(|| format!("read ssh pubkey {ssh_pubkey:?}"))?
-        .trim()
-        .to_string();
-
-    let vm_score = KvmVmScore {
-        config: CloudInitVmConfig {
-            vm_name: cli.vm_name.clone(),
-            hostname: Some(cli.vm_name.clone()),
-            vcpus: 2,
+    let vm_score = ProvisionVmScore {
+        spec: VirtualMachineSpec {
+            name: cli.vm_name.clone(),
+            architecture: arch,
+            cpus: 2,
            memory_mib: 2048,
-            base_image_path: base_image,
-            seed_output_dir: cli.work_dir.clone(),
-            admin_user: cli.admin_user.clone(),
-            authorized_key,
-            network_name: cli.network.clone(),
+            disk_size_gb: None,
+            network: cli.network.clone(),
+            first_boot: Some(VmFirstBootConfig {
+                hostname: Some(cli.vm_name.clone()),
+                admin_user: Some(cli.admin_user.clone()),
+                authorized_keys: vec![authorized_key],
+            }),
        },
    };
-
-    let vm_ip = run_vm_score(&vm_score, &kvm_topology).await?;
-    println!("VM '{}' up at {}", cli.vm_name, vm_ip);
+    let vm_ip = run_vm_score(&vm_score, &vm_host).await?;
+    println!("VM '{}' up at {vm_ip}", cli.vm_name);

    if cli.only_vm {
        return Ok(());
    }

    // --- Step 2: onboard the VM into the fleet ---
-    let ssh_privkey = cli
-        .ssh_privkey
-        .clone()
-        .context("--ssh-privkey is required")?;
    let agent_binary = cli
        .agent_binary
        .clone()
-        .context("--agent-binary is required")?;
+        .context("--agent-binary is required (e.g. target/release/iot-agent-v0)")?;
+    let device_id = cli
+        .device_id
+        .clone()
+        .map(Id::from)
+        .unwrap_or_else(Id::default);

    let linux_topology = LinuxHostTopology::new(
        format!("linux-{}", cli.vm_name),
        vm_ip.parse().context("VM IP is not a valid IP address")?,
        SshCredentials {
            user: cli.admin_user.clone(),
-            private_key_path: ssh_privkey,
+            private_key_path: ssh.private_key.clone(),
            remote_python: Some("/usr/bin/python3".to_string()),
        },
    );

    let setup_score = IotDeviceSetupScore::new(IotDeviceSetupConfig {
-        device_id: cli.device_id.clone(),
+        device_id: device_id.clone(),
        group: cli.group.clone(),
        nats_urls: vec![cli.nats_url.clone()],
        nats_user: cli.nats_user.clone(),
@@ -166,27 +190,29 @@ async fn main() -> Result<()> {

    run_setup_score(&setup_score, &linux_topology).await?;
    println!(
-        "device '{}' (group '{}') onboarded via {vm_ip}",
-        cli.device_id, cli.group
+        "device '{device_id}' (group '{}') onboarded via {vm_ip}",
+        cli.group
    );
    Ok(())
 }

-async fn run_vm_score(score: &KvmVmScore, topology: &KvmHostTopology) -> Result<String> {
+async fn run_vm_score(
+    score: &ProvisionVmScore,
+    topology: &KvmVirtualMachineHost,
+) -> Result<String> {
    use harmony::score::Score;
    let inventory = Inventory::empty();
-    let interpret = Score::<KvmHostTopology>::create_interpret(score);
+    let interpret = Score::<KvmVirtualMachineHost>::create_interpret(score);
    let outcome = interpret
        .execute(&inventory, topology)
        .await
-        .map_err(|e| anyhow::anyhow!("KvmVmScore execute: {e}"))?;
-    // The outcome details carry the IP as `ip=<addr>`.
+        .map_err(|e| anyhow::anyhow!("ProvisionVmScore execute: {e}"))?;
    for d in &outcome.details {
-        if let Some(ip) = d.strip_prefix("ip=") {
-            return Ok(ip.to_string());
+        if let Some(v) = d.strip_prefix("ip=") {
+            return Ok(v.to_string());
        }
    }
-    anyhow::bail!("KvmVmScore finished without reporting an IP: {outcome:?}")
+    anyhow::bail!("ProvisionVmScore finished without reporting an IP: {outcome:?}")
 }

 async fn run_setup_score(score: &IotDeviceSetupScore, topology: &LinuxHostTopology) -> Result<()> {
--- a/harmony/Cargo.toml
+++ b/harmony/Cargo.toml
@@ -18,6 +18,7 @@ reqwest = { version = "0.11", features = [
  "cookies",
  "json",
  "rustls-tls",
+  "stream",
 ], default-features = false }
 russh = "0.45.0"
 rust-ipmi = "0.1.1"
--- a/harmony/src/domain/topology/host_configuration.rs
+++ b/harmony/src/domain/topology/host_configuration.rs
@@ -1,76 +1,96 @@
+//! Split host-configuration capabilities.
+//!
+//! Originally a single `HostConfigurationProvider` interface, now
+//! broken into narrower concerns so implementations only need to
+//! implement what they can actually deliver. An Ansible-over-SSH
+//! adapter implements all of them; a future cloud-init / ignition /
+//! podman-agent backend would implement a subset and leave the rest
+//! to other topologies.
+//!
+//! The convenience umbrella [`LinuxHostConfiguration`] is blanket-
+//! impl'd for any type implementing all five capabilities, so Scores
+//! that need "a Linux host we can fully configure" can use one bound
+//! rather than five.
+//!
+//! Contract for every `ensure_*` method: converge the host to the
+//! desired state and return [`ChangeReport`] indicating whether any
+//! change was applied. Scores compose these into reconcile-restart
+//! logic (e.g. only bounce the service if its unit or config file
+//! actually changed).
+
 use async_trait::async_trait;
 use serde::{Deserialize, Serialize};
+use std::path::PathBuf;

 use crate::executors::ExecutorError;

-/// Capability: apply idempotent, host-level configuration to a single
-/// remote machine — package installs, user accounts, files, systemd units.
-///
-/// **Scope.** Intentionally narrow: the subset of configuration-management
-/// primitives the IoT device setup flow needs. This is explicitly *not* a
-/// general Ansible/Puppet/Chef replacement — if a Score needs templating,
-/// loops over hosts, handler triggers, or distro-specific branching, that
-/// Score should be decomposed until it fits these primitives (or a new
-/// capability trait should be added deliberately).
-///
-/// **Idempotency contract.** Every method converges the host to the given
-/// desired state and returns [`ChangeReport`] indicating whether any change
-/// was actually made. Callers compose these into reconcile-restart logic
-/// (e.g. only `systemctl restart` the service if its unit file or its
-/// config file was reported changed).
-///
-/// **Implementation note.** The concrete impl used on-device today is
-/// [`crate::modules::linux::AnsibleHostConfigurator`], which shells out to
-/// `ansible-playbook` with a generated one-task play per call. The trait is
-/// deliberately Ansible-agnostic so a Rust-native impl can be dropped in
-/// later without Score changes.
+// ---------------------------------------------------------------------
+// Capability traits (narrow, individually implementable)
+// ---------------------------------------------------------------------
+
+/// Reachability check. Every other capability implicitly requires the
+/// host to be reachable; this trait exists so Scores can preflight
+/// before committing to larger work.
 #[async_trait]
-pub trait HostConfigurationProvider: Send + Sync {
-    /// Test reachability. Implementations should exercise the same
-    /// transport the other methods use (SSH typically). Used as a
-    /// preflight by Scores before attempting real work.
+pub trait HostReachable: Send + Sync {
    async fn ping(&self) -> Result<(), ExecutorError>;
+}

-    /// Ensure a package is installed. Distro-agnostic: the implementation
-    /// picks the right package manager.
+/// Install distro packages. Intentionally distro-agnostic at the trait
+/// level — the implementation picks apt/dnf/pacman/apk based on the
+/// host's detected family. Name comes from /etc/os-release-style
+/// package names (so `podman` not `containers/podman`).
+#[async_trait]
+pub trait PackageInstaller: Send + Sync {
    async fn ensure_package(&self, name: &str) -> Result<ChangeReport, ExecutorError>;
+}

-    /// Ensure a user account exists with the given spec. Only the fields
-    /// in [`UserSpec`] are managed — other attributes of an existing user
-    /// are left alone.
-    async fn ensure_user(&self, spec: &UserSpec) -> Result<ChangeReport, ExecutorError>;
-
-    /// Ensure a file exists with exactly the given content, owner, and
-    /// mode. Atomic replacement; returns `changed: true` only if the file
-    /// was created or its content/owner/mode differed.
+/// Deliver a file to a specific path on the host, atomically.
+#[async_trait]
+pub trait FileDelivery: Send + Sync {
    async fn ensure_file(&self, spec: &FileSpec) -> Result<ChangeReport, ExecutorError>;
+}

-    /// Ensure a systemd unit file exists and is enabled (optionally
-    /// started). Kept separate from [`ensure_file`] so the implementation
-    /// can handle `daemon-reload` + enable/start in one atomic operation
-    /// and report accurate change state.
+/// Create and manage unix user accounts (POSIX systems).
+///
+/// Split from [`SystemdManager`] because some hosts run user accounts
+/// without systemd (e.g. Alpine default). The linger-related method
+/// belongs here because `loginctl enable-linger` is a
+/// systemd-logind-specific operation on the *user* rather than on a
+/// service.
+#[async_trait]
+pub trait UnixUserManager: Send + Sync {
+    async fn ensure_user(&self, spec: &UserSpec) -> Result<ChangeReport, ExecutorError>;
+    /// Enable `loginctl enable-linger` for a user so their systemd
+    /// user session (and any user-scoped services like
+    /// `podman.socket`) survives logout. Implemented via whatever
+    /// systemd-aware transport the adapter uses.
+    async fn ensure_linger(&self, user: &str) -> Result<ChangeReport, ExecutorError>;
+}
+
+/// Systemd-specific service lifecycle. Separated from file delivery
+/// because writing a unit file and enabling/starting it are
+/// conceptually one operation that the adapter can batch (daemon-
+/// reload etc.).
+#[async_trait]
+pub trait SystemdManager: Send + Sync {
    async fn ensure_systemd_unit(
        &self,
        spec: &SystemdUnitSpec,
    ) -> Result<ChangeReport, ExecutorError>;

-    /// Restart a systemd unit. Unconditional — used by Scores that
-    /// detected a config change that the service wouldn't pick up
-    /// otherwise.
+    /// Restart a unit unconditionally. Intended for use after a
+    /// caller-detected config change that the service wouldn't pick
+    /// up otherwise.
    async fn restart_service(
        &self,
        name: &str,
        scope: SystemdScope,
    ) -> Result<ChangeReport, ExecutorError>;

-    /// Enable `loginctl enable-linger` for a user, so their systemd user
-    /// session (and any user-scoped services like `podman.socket`)
-    /// survives logout. Idempotent.
-    async fn ensure_linger(&self, user: &str) -> Result<ChangeReport, ExecutorError>;
-
-    /// Enable+start a user-scoped systemd unit for the given user (e.g.
-    /// `podman.socket` under `iot-agent`). Assumes linger is already
-    /// configured.
+    /// Enable+start a user-scoped unit (e.g. `podman.socket` under
+    /// `iot-agent`). Assumes [`UnixUserManager::ensure_linger`] has
+    /// already been called for the user.
    async fn ensure_user_unit_active(
        &self,
        user: &str,
@@ -78,8 +98,33 @@ pub trait HostConfigurationProvider: Send + Sync {
    ) -> Result<ChangeReport, ExecutorError>;
 }

+// ---------------------------------------------------------------------
+// Umbrella trait (auto-impl) for Scores that want all of the above
+// ---------------------------------------------------------------------
+
+/// Convenience trait auto-implemented for any type that has the full
+/// Linux-host configuration toolkit. Scores can use this one bound
+/// rather than repeating five.
+///
+/// Intentionally *not* usable as an object-safe trait object — only
+/// as a generic bound. Impls should implement each capability
+/// individually.
+pub trait LinuxHostConfiguration:
+    HostReachable + PackageInstaller + FileDelivery + UnixUserManager + SystemdManager
+{
+}
+
+impl<T> LinuxHostConfiguration for T where
+    T: HostReachable + PackageInstaller + FileDelivery + UnixUserManager + SystemdManager
+{
+}
+
+// ---------------------------------------------------------------------
+// Shared types
+// ---------------------------------------------------------------------
+
 /// Whether the host state matched the desired spec already (`changed:
-/// false`, a NOOP) or was modified by this call (`changed: true`).
+/// false`, a NOOP) or was modified by this call.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub struct ChangeReport {
    pub changed: bool,
@@ -93,18 +138,17 @@ impl ChangeReport {
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct UserSpec {
    pub name: String,
-    /// If Some, create or update the system group of this name before
-    /// creating the user.
+    /// Primary group. None lets the OS default take over (on
+    /// Debian/Ubuntu with USERGROUPS_ENAB yes, useradd auto-creates a
+    /// group matching the username).
    pub group: Option<String>,
-    /// Additional supplementary groups to add the user to (e.g. `["wheel",
-    /// "docker"]`).
+    /// Additional supplementary groups (append-mode).
    pub supplementary_groups: Vec<String>,
-    /// Absolute path to the login shell, or None for the distro default.
+    /// Absolute login shell path; None → distro default.
    pub shell: Option<String>,
-    /// If true, create with `--system` (UID in the system range, no aging,
-    /// typically no login). Service-account flavour.
+    /// If true, create with `--system` (UID in the system range, no
+    /// aging, typically no login).
    pub system: bool,
-    /// If true, create a home directory.
    pub create_home: bool,
 }

@@ -112,19 +156,15 @@ pub struct UserSpec {
 pub struct FileSpec {
    /// Absolute path on the remote host.
    pub path: String,
-    /// Source of the file's content. Use `Content` for small generated
-    /// files (configs, systemd units); use `LocalPath` for anything
-    /// large or binary (agent binaries, shipped assets) — inline
-    /// content rides the argv for SSH transport and hits `ARG_MAX` at
-    /// a few MB.
+    /// Source of the file's content.
    pub source: FileSource,
-    /// Owner (user name). None means leave current owner alone if the
-    /// file already exists; on create defaults to root.
+    /// Owner (user name). None → leave current owner alone on update;
+    /// defaults to root on create.
    pub owner: Option<String>,
    /// Group name. Same semantics as `owner`.
    pub group: Option<String>,
-    /// POSIX mode (octal), e.g. 0o644. None means leave current mode
-    /// alone; on create defaults to 0o644.
+    /// POSIX mode (octal), e.g. 0o644. None → leave current mode
+    /// alone; defaults to 0o644 on create.
    pub mode: Option<u32>,
 }

@@ -133,10 +173,10 @@ pub enum FileSource {
    /// UTF-8 content to materialize on the remote host.
    Content(String),
    /// Absolute path on the Harmony-runner host pointing at a file to
-    /// be shipped to `path`. Works for binary files. The implementation
-    /// compares the remote file's content against this one and returns
-    /// `changed: false` when they already match.
-    LocalPath(std::path::PathBuf),
+    /// be shipped to `path`. Works for binary. The implementation
+    /// compares remote content before rewriting (returning `changed:
+    /// false` when they already match).
+    LocalPath(PathBuf),
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -146,8 +186,7 @@ pub struct SystemdUnitSpec {
    /// Exact content of the unit file.
    pub unit_content: String,
    pub scope: SystemdScope,
-    /// If true, `systemctl enable --now`; if false, `enable` only (caller
-    /// will start it later, or it's a one-shot driven by another unit).
+    /// If true, `systemctl enable --now`; else `enable` only.
    pub start_immediately: bool,
 }

@@ -157,8 +196,8 @@ pub enum SystemdScope {
    User(UserName),
 }

-/// Wrapper over a username for the User scope, mostly to stop
-/// `SystemdScope::User(String)` from begging for ad-hoc empty strings.
+/// Username newtype for `SystemdScope::User` so the variant's field is
+/// typed rather than a freeform String.
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct UserName(pub String);

--- a/harmony/src/domain/topology/mod.rs
+++ b/harmony/src/domain/topology/mod.rs
@@ -35,9 +35,11 @@ pub use tftp::*;
 mod container_runtime;
 mod helm_command;
 mod host_configuration;
+mod virtualization;
 pub use container_runtime::*;
 pub use helm_command::*;
 pub use host_configuration::*;
+pub use virtualization::*;

 use super::{
    executors::ExecutorError,
--- a/harmony/src/domain/topology/virtualization.rs
+++ b/harmony/src/domain/topology/virtualization.rs
@@ -0,0 +1,141 @@
+//! Generic virtualization capability.
+//!
+//! [`VirtualMachineHost`] is the abstraction Scores target when they
+//! want a VM. The trait intentionally doesn't name a hypervisor — an
+//! impl backed by KVM/libvirt sits in `modules::kvm`, but the same
+//! trait could be implemented for VMware, Proxmox, Hyper-V, or a
+//! cloud provider's API.
+//!
+//! **Scope.** What we need today for the IoT smoke test and for
+//! future CI/dev environments: ensure a VM exists with a given CPU
+//! count, memory size, disk size, and first-boot configuration; tear
+//! it down; read its runtime state. Deliberately no live migration,
+//! snapshots, disk attach/detach, or NIC hotplug — they belong in
+//! follow-on capabilities when a real use case surfaces.
+
+use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
+use std::net::IpAddr;
+
+use crate::executors::ExecutorError;
+
+#[async_trait]
+pub trait VirtualMachineHost: Send + Sync {
+    async fn list_vms(&self) -> Result<Vec<VirtualMachineRuntimeInfo>, ExecutorError>;
+
+    /// Create-or-update a VM matching `spec`. Idempotent: re-running
+    /// against an unchanged spec returns the existing VM's runtime
+    /// info. On first call or after destructive changes, boots the VM
+    /// and waits for an IP address (implementation-defined timeout).
+    async fn ensure_vm(
+        &self,
+        spec: &VirtualMachineSpec,
+    ) -> Result<VirtualMachineRuntimeInfo, ExecutorError>;
+
+    /// Stop and remove the VM, including its managed storage. No-op
+    /// if the VM does not exist.
+    async fn delete_vm(&self, name: &str) -> Result<(), ExecutorError>;
+
+    /// Read current runtime info for a VM by name. `None` if the VM
+    /// doesn't exist.
+    async fn get_vm_info(
+        &self,
+        name: &str,
+    ) -> Result<Option<VirtualMachineRuntimeInfo>, ExecutorError>;
+}
+
+/// Guest CPU architecture. Determines emulator binary, machine
+/// type, CPU model, and firmware on the KVM/libvirt backend.
+///
+/// Defaults to [`VmArchitecture::X86_64`] so existing call sites
+/// continue to compile without change — opt into arm64 by setting
+/// the field explicitly.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub enum VmArchitecture {
+    #[default]
+    X86_64,
+    Aarch64,
+}
+
+impl VmArchitecture {
+    /// Short canonical name used in URLs, filenames, and log lines
+    /// (`"x86_64"`, `"aarch64"`). Matches Ubuntu cloud image naming
+    /// (`…-cloudimg-amd64.img` vs `…-cloudimg-arm64.img`) only in
+    /// spirit — see [`ubuntu_cloudimg_suffix`] for the actual
+    /// image-naming convention.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::X86_64 => "x86_64",
+            Self::Aarch64 => "aarch64",
+        }
+    }
+
+    /// Suffix Ubuntu uses in its cloud image filenames (`amd64` vs
+    /// `arm64`). Different from [`as_str`] because Ubuntu doesn't
+    /// follow the Linux `uname -m` convention.
+    pub fn ubuntu_cloudimg_suffix(&self) -> &'static str {
+        match self {
+            Self::X86_64 => "amd64",
+            Self::Aarch64 => "arm64",
+        }
+    }
+}
+
+/// Declarative description of a VM the caller wants to exist.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VirtualMachineSpec {
+    pub name: String,
+    /// Guest CPU architecture. Defaults to
+    /// [`VmArchitecture::X86_64`]; set to
+    /// [`VmArchitecture::Aarch64`] to run an arm64 guest (emulated
+    /// via TCG on x86_64 hosts; KVM-accelerated on aarch64 hosts).
+    #[serde(default)]
+    pub architecture: VmArchitecture,
+    pub cpus: u32,
+    pub memory_mib: u64,
+    /// `None` → inherit from the hypervisor's default (e.g. the
+    /// backing cloud image's default disk size).
+    pub disk_size_gb: Option<u32>,
+    /// Name of the network to attach the VM to. For KVM this is a
+    /// libvirt network name (`default` = the shipped NAT bridge).
+    pub network: String,
+    /// Optional first-boot configuration for hypervisors that
+    /// support it (KVM via cloud-init, VMware via OVF properties,
+    /// Proxmox via cloud-init). Hypervisors without a mechanism for
+    /// this return an error if it's set and they can't honour it.
+    pub first_boot: Option<VmFirstBootConfig>,
+}
+
+/// First-boot declarative config. Hypervisor-agnostic; each impl
+/// translates to its native mechanism.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VmFirstBootConfig {
+    /// Guest hostname. `None` → use the image default.
+    pub hostname: Option<String>,
+    /// Username to provision with passwordless sudo + the
+    /// [`authorized_keys`]. `None` → reuse the image's default
+    /// user (`ubuntu` for Ubuntu cloud images, etc.).
+    pub admin_user: Option<String>,
+    /// Public SSH keys (OpenSSH single-line format) to authorize for
+    /// the admin user.
+    pub authorized_keys: Vec<String>,
+}
+
+/// Observed runtime info for a VM.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VirtualMachineRuntimeInfo {
+    pub name: String,
+    pub state: VmState,
+    /// Primary IPv4 of the VM, if it's running and has one.
+    pub ip: Option<IpAddr>,
+    /// Free-form identifier of the backing hypervisor ("kvm",
+    /// "vmware", "proxmox", …). Diagnostic only.
+    pub hypervisor: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub enum VmState {
+    Running,
+    Stopped,
+    Unknown,
+}
--- a/harmony/src/modules/iot/assets.rs
+++ b/harmony/src/modules/iot/assets.rs
@@ -0,0 +1,300 @@
+//! Bootstrapped assets shared across IoT workflows.
+//!
+//! Everything here follows the `ensure_*` pattern — idempotent, caches
+//! results under [`HARMONY_DATA_DIR`]`/iot/…`, and runs at most once per
+//! process (enforced by a `tokio::sync::OnceCell`). The goal is that an
+//! operator can run the IoT smoke test against a freshly-installed host
+//! with nothing but `libvirt + qemu + xorriso + python3 + cargo +
+//! podman` installed — no manual image downloads, no `ssh-keygen`, no
+//! `chmod` rituals.
+
+use std::path::{Path, PathBuf};
+use std::process::Stdio;
+
+use log::{info, warn};
+use sha2::{Digest, Sha256};
+use tokio::io::AsyncWriteExt;
+use tokio::process::Command;
+use tokio::sync::OnceCell;
+
+use crate::domain::config::HARMONY_DATA_DIR;
+use crate::domain::topology::VmArchitecture;
+use crate::executors::ExecutorError;
+
+// ---------------------------------------------------------------------
+// Cloud image
+// ---------------------------------------------------------------------
+
+/// Pinned Ubuntu 24.04 server cloud image (x86_64 / amd64). **Updating
+/// this constant requires updating [`UBUNTU_2404_CLOUDIMG_SHA256`] at
+/// the same time**; download is rejected on hash mismatch.
+///
+/// The upstream URL is the "current release" pointer, which rotates
+/// when Canonical pushes a point-release. When that happens, this
+/// constant's sha256 stops matching and `ensure_cloud_image` fails with
+/// a clear diff — bump both in one commit.
+pub const UBUNTU_2404_CLOUDIMG_URL: &str =
+    "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img";
+pub const UBUNTU_2404_CLOUDIMG_SHA256: &str =
+    "5c3ddb00f60bc455dac0862fabe9d8bacec46c33ac1751143c5c3683404b110d";
+pub const UBUNTU_2404_CLOUDIMG_FILENAME: &str = "ubuntu-24.04-server-cloudimg-amd64.img";
+
+/// Pinned Ubuntu 24.04 server cloud image (aarch64 / arm64). Same
+/// update semantics as the amd64 pair.
+pub const UBUNTU_2404_CLOUDIMG_ARM64_URL: &str =
+    "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img";
+pub const UBUNTU_2404_CLOUDIMG_ARM64_SHA256: &str =
+    "1ea801e659d2f5035ac294e0faab0aac9b6ba66753df933ba5c7beab0c689bd0";
+pub const UBUNTU_2404_CLOUDIMG_ARM64_FILENAME: &str = "ubuntu-24.04-server-cloudimg-arm64.img";
+
+/// Ensure the pinned Ubuntu 24.04 cloud image for `arch` is present
+/// on disk. Returns the path to the cached image. Slow path
+/// (download + sha256 verify) runs at most once per process per
+/// architecture — separate `OnceCell` per arch keeps the cache hits
+/// symmetric.
+pub async fn ensure_ubuntu_2404_cloud_image_for_arch(
+    arch: VmArchitecture,
+) -> Result<PathBuf, ExecutorError> {
+    // Per-arch OnceCell. Matches the `ensure_ansible_venv` pattern —
+    // first call downloads, subsequent calls hit the cache in one
+    // `exists()` stat.
+    static X86_64: OnceCell<PathBuf> = OnceCell::const_new();
+    static AARCH64: OnceCell<PathBuf> = OnceCell::const_new();
+    let cell = match arch {
+        VmArchitecture::X86_64 => &X86_64,
+        VmArchitecture::Aarch64 => &AARCH64,
+    };
+    let (url, sha256, filename) = match arch {
+        VmArchitecture::X86_64 => (
+            UBUNTU_2404_CLOUDIMG_URL,
+            UBUNTU_2404_CLOUDIMG_SHA256,
+            UBUNTU_2404_CLOUDIMG_FILENAME,
+        ),
+        VmArchitecture::Aarch64 => (
+            UBUNTU_2404_CLOUDIMG_ARM64_URL,
+            UBUNTU_2404_CLOUDIMG_ARM64_SHA256,
+            UBUNTU_2404_CLOUDIMG_ARM64_FILENAME,
+        ),
+    };
+    cell.get_or_try_init(|| async move { ensure_cloud_image(url, sha256, filename).await })
+        .await
+        .cloned()
+}
+
+/// Back-compat shim — returns the x86_64 image. Prefer
+/// [`ensure_ubuntu_2404_cloud_image_for_arch`] when the arch is
+/// known at the call site.
+pub async fn ensure_ubuntu_2404_cloud_image() -> Result<PathBuf, ExecutorError> {
+    ensure_ubuntu_2404_cloud_image_for_arch(VmArchitecture::X86_64).await
+}
+
+async fn ensure_cloud_image(
+    url: &str,
+    expected_sha256: &str,
+    filename: &str,
+) -> Result<PathBuf, ExecutorError> {
+    let dir = cloud_images_dir();
+    tokio::fs::create_dir_all(&dir)
+        .await
+        .map_err(|e| exec(format!("create cloud-images dir {dir:?}: {e}")))?;
+    // Make the cache directory world-traversable so libvirt-qemu can
+    // walk into it when reading the image. This is the perms
+    // concession that lets us stick with direct file paths for now.
+    make_world_traversable(&dir).await?;
+
+    let target = dir.join(filename);
+    if target.exists() {
+        let actual = sha256_of_file(&target).await?;
+        if actual == expected_sha256 {
+            info!("cloud image cache hit at {target:?}");
+            return Ok(target);
+        }
+        warn!(
+            "cached cloud image sha256 mismatch (expected {expected_sha256}, got {actual}); \
+             re-downloading to {target:?}"
+        );
+        tokio::fs::remove_file(&target)
+            .await
+            .map_err(|e| exec(format!("remove stale image: {e}")))?;
+    }
+
+    info!("downloading cloud image {url} → {target:?} (one-time)");
+    download_to(url, &target).await?;
+    // Re-verify the download we just did.
+    let actual = sha256_of_file(&target).await?;
+    if actual != expected_sha256 {
+        let _ = tokio::fs::remove_file(&target).await;
+        return Err(exec(format!(
+            "downloaded image sha256 mismatch: expected {expected_sha256}, got {actual}. \
+             Ubuntu may have rotated the 'current release' pointer — bump the pin in \
+             modules::iot::assets.rs."
+        )));
+    }
+    // World-readable so libvirt-qemu can open it without a chmod ritual.
+    tokio::fs::set_permissions(&target, std::os::unix::fs::PermissionsExt::from_mode(0o644))
+        .await
+        .map_err(|e| exec(format!("chmod image: {e}")))?;
+
+    Ok(target)
+}
+
+/// Bring up a 256-bit-hashed file download using `reqwest` streaming so
+/// we don't have to buffer a 600MB qcow2 in memory.
+async fn download_to(url: &str, target: &Path) -> Result<(), ExecutorError> {
+    let client = reqwest::Client::builder()
+        .build()
+        .map_err(|e| exec(format!("reqwest build: {e}")))?;
+    let resp = client
+        .get(url)
+        .send()
+        .await
+        .map_err(|e| exec(format!("GET {url}: {e}")))?;
+    if !resp.status().is_success() {
+        return Err(exec(format!(
+            "GET {url} returned {status}",
+            status = resp.status()
+        )));
+    }
+
+    let mut out = tokio::fs::File::create(target)
+        .await
+        .map_err(|e| exec(format!("create {target:?}: {e}")))?;
+
+    use futures_util::StreamExt;
+    let mut stream = resp.bytes_stream();
+    while let Some(chunk) = stream.next().await {
+        let chunk = chunk.map_err(|e| exec(format!("download chunk: {e}")))?;
+        out.write_all(&chunk)
+            .await
+            .map_err(|e| exec(format!("write chunk: {e}")))?;
+    }
+    out.flush()
+        .await
+        .map_err(|e| exec(format!("flush {target:?}: {e}")))?;
+    Ok(())
+}
+
+async fn sha256_of_file(path: &Path) -> Result<String, ExecutorError> {
+    use tokio::io::AsyncReadExt;
+    let mut file = tokio::fs::File::open(path)
+        .await
+        .map_err(|e| exec(format!("open {path:?}: {e}")))?;
+    let mut hasher = Sha256::new();
+    let mut buf = vec![0u8; 1 << 20]; // 1 MiB
+    loop {
+        let n = file
+            .read(&mut buf)
+            .await
+            .map_err(|e| exec(format!("read {path:?}: {e}")))?;
+        if n == 0 {
+            break;
+        }
+        hasher.update(&buf[..n]);
+    }
+    Ok(hex::encode(hasher.finalize()))
+}
+
+fn cloud_images_dir() -> PathBuf {
+    HARMONY_DATA_DIR.join("iot").join("cloud-images")
+}
+
+// ---------------------------------------------------------------------
+// SSH keypair
+// ---------------------------------------------------------------------
+
+/// Pair of on-disk paths to Harmony's per-user IoT SSH keypair. The
+/// same key identifies every VM we provision for smoke/integration
+/// testing — cheap to reuse, easy to discard (just `rm -rf` the dir).
+#[derive(Debug, Clone)]
+pub struct IotSshKeypair {
+    pub private_key: PathBuf,
+    pub public_key: PathBuf,
+}
+
+/// Ensure `$HARMONY_DATA_DIR/iot/ssh/id_ed25519[.pub]` exists. Runs
+/// `ssh-keygen` once; subsequent calls return the existing paths.
+pub async fn ensure_iot_ssh_keypair() -> Result<IotSshKeypair, ExecutorError> {
+    static CELL: OnceCell<IotSshKeypair> = OnceCell::const_new();
+    CELL.get_or_try_init(provision_ssh_keypair).await.cloned()
+}
+
+async fn provision_ssh_keypair() -> Result<IotSshKeypair, ExecutorError> {
+    let dir = HARMONY_DATA_DIR.join("iot").join("ssh");
+    tokio::fs::create_dir_all(&dir)
+        .await
+        .map_err(|e| exec(format!("create ssh dir {dir:?}: {e}")))?;
+    tokio::fs::set_permissions(&dir, std::os::unix::fs::PermissionsExt::from_mode(0o700))
+        .await
+        .map_err(|e| exec(format!("chmod ssh dir: {e}")))?;
+
+    let priv_path = dir.join("id_ed25519");
+    let pub_path = dir.join("id_ed25519.pub");
+    if priv_path.exists() && pub_path.exists() {
+        info!("ssh keypair cache hit at {priv_path:?}");
+        return Ok(IotSshKeypair {
+            private_key: priv_path,
+            public_key: pub_path,
+        });
+    }
+    // Remove stragglers from a partial previous run.
+    let _ = tokio::fs::remove_file(&priv_path).await;
+    let _ = tokio::fs::remove_file(&pub_path).await;
+
+    info!("generating ed25519 ssh keypair at {priv_path:?} (one-time)");
+    let status = Command::new("ssh-keygen")
+        .arg("-t")
+        .arg("ed25519")
+        .arg("-N")
+        .arg("") // no passphrase
+        .arg("-C")
+        .arg("harmony-iot-smoke")
+        .arg("-f")
+        .arg(&priv_path)
+        .stdout(Stdio::null())
+        .stderr(Stdio::piped())
+        .output()
+        .await
+        .map_err(|e| exec(format!("spawn ssh-keygen: {e}")))?;
+    if !status.status.success() {
+        return Err(exec(format!(
+            "ssh-keygen failed: {}",
+            String::from_utf8_lossy(&status.stderr).trim()
+        )));
+    }
+    Ok(IotSshKeypair {
+        private_key: priv_path,
+        public_key: pub_path,
+    })
+}
+
+/// Read the generated public key (one line, openssh format) into a string
+/// suitable for cloud-init's `authorized_keys`.
+pub async fn read_public_key(kp: &IotSshKeypair) -> Result<String, ExecutorError> {
+    let content = tokio::fs::read_to_string(&kp.public_key)
+        .await
+        .map_err(|e| exec(format!("read {:?}: {e}", kp.public_key)))?;
+    Ok(content.trim().to_string())
+}
+
+// ---------------------------------------------------------------------
+// helpers
+// ---------------------------------------------------------------------
+
+async fn make_world_traversable(dir: &Path) -> Result<(), ExecutorError> {
+    // Libvirt-qemu runs as a different user and needs to traverse our
+    // dirs to read the images we've placed there. 0755 on the whole
+    // chain from HARMONY_DATA_DIR down is the minimum that works
+    // without asking the operator to fiddle with ACLs.
+    //
+    // We only adjust the terminal dir here; parents are assumed to be
+    // world-traversable already (true by default on every distro's
+    // data-dir layout: `~/.local/share` is 755 on user create).
+    tokio::fs::set_permissions(dir, std::os::unix::fs::PermissionsExt::from_mode(0o755))
+        .await
+        .map_err(|e| exec(format!("chmod {dir:?}: {e}")))?;
+    Ok(())
+}
+
+fn exec(msg: impl Into<String>) -> ExecutorError {
+    ExecutorError::UnexpectedError(msg.into())
+}
--- a/harmony/src/modules/iot/libvirt_pool.rs
+++ b/harmony/src/modules/iot/libvirt_pool.rs
@@ -0,0 +1,130 @@
+//! Managed libvirt storage pool for IoT smoke runs.
+//!
+//! The first time a Harmony IoT workflow runs on a host, it needs a
+//! writable place to drop per-VM overlay disks + cloud-init seed ISOs.
+//! Rather than ask the operator to set that up, we create a user-
+//! owned dir-backed libvirt pool at
+//! `$HARMONY_DATA_DIR/iot/kvm/pool/` and let libvirt handle:
+//!
+//! - **Perms**: dir contents get chowned to libvirt-qemu on VM start
+//!   via dynamic-ownership (default-on), and back to us on VM stop
+//!   (via remember_owner, also default-on). No `chmod 644` gymnastics.
+//! - **Visibility**: `virsh vol-list harmony-iot` shows every
+//!   artifact we've created.
+//! - **Cleanup**: `virsh vol-delete <name> harmony-iot` removes
+//!   managed volumes alongside `virsh undefine --remove-all-storage`.
+//!
+//! We *don't* rewrite the VM XML to use `<source pool="…" volume="…"/>`
+//! yet — the existing `<source file="…"/>` form is fine because files
+//! inside a pool dir still benefit from dynamic ownership. The pool
+//! is effectively an "annotation" telling libvirt "you may touch this
+//! dir's perms."
+
+use std::path::PathBuf;
+
+use log::info;
+use tokio::sync::OnceCell;
+use virt::connect::Connect;
+use virt::storage_pool::StoragePool;
+
+use crate::domain::config::HARMONY_DATA_DIR;
+use crate::executors::ExecutorError;
+
+pub const HARMONY_IOT_POOL_NAME: &str = "harmony-iot";
+
+/// Filesystem path + libvirt name of the managed pool.
+#[derive(Debug, Clone)]
+pub struct HarmonyIotPool {
+    pub name: String,
+    pub path: PathBuf,
+}
+
+/// Ensure the Harmony IoT libvirt storage pool exists, is started, and
+/// is set to autostart. Idempotent; runs its slow path at most once per
+/// process.
+///
+/// **Requires libvirt-group membership**. When the user isn't in the
+/// group, libvirt rejects the `qemu:///system` connection — the
+/// preflight check catches that upstream.
+pub async fn ensure_harmony_iot_pool() -> Result<HarmonyIotPool, ExecutorError> {
+    static CELL: OnceCell<HarmonyIotPool> = OnceCell::const_new();
+    CELL.get_or_try_init(provision_pool).await.cloned()
+}
+
+async fn provision_pool() -> Result<HarmonyIotPool, ExecutorError> {
+    let pool_dir = HARMONY_DATA_DIR.join("iot").join("kvm").join("pool");
+    tokio::fs::create_dir_all(&pool_dir)
+        .await
+        .map_err(|e| exec(format!("create pool dir {pool_dir:?}: {e}")))?;
+    // Let libvirt-qemu walk into it; dynamic ownership handles file
+    // chmod on VM start.
+    tokio::fs::set_permissions(
+        &pool_dir,
+        std::os::unix::fs::PermissionsExt::from_mode(0o755),
+    )
+    .await
+    .map_err(|e| exec(format!("chmod pool dir: {e}")))?;
+
+    let pool_path = pool_dir.clone();
+    let pool_name = HARMONY_IOT_POOL_NAME.to_string();
+
+    // virt-rs is blocking C bindings — bounce into spawn_blocking.
+    let pool_name_blocking = pool_name.clone();
+    let pool_path_blocking = pool_path.clone();
+    tokio::task::spawn_blocking(move || -> Result<(), ExecutorError> {
+        let conn = Connect::open(Some("qemu:///system"))
+            .map_err(|e| exec(format!("libvirt connect qemu:///system: {e}")))?;
+        let (pool, is_fresh) = match StoragePool::lookup_by_name(&conn, &pool_name_blocking) {
+            Ok(p) => (p, false),
+            Err(_) => {
+                let xml = pool_xml(&pool_name_blocking, &pool_path_blocking);
+                info!("defining libvirt pool '{pool_name_blocking}' → {pool_path_blocking:?}");
+                let p = StoragePool::define_xml(&conn, &xml, 0)
+                    .map_err(|e| exec(format!("define pool: {e}")))?;
+                (p, true)
+            }
+        };
+        // `pool-build` creates the dir layout a dir-pool expects; only
+        // needed on first definition. Libvirt rejects `build` on an
+        // already-active pool.
+        if is_fresh {
+            pool.build(0)
+                .map_err(|e| exec(format!("pool build: {e}")))?;
+        }
+        let active = pool
+            .is_active()
+            .map_err(|e| exec(format!("pool is_active: {e}")))?;
+        if !active {
+            info!("starting libvirt pool '{pool_name_blocking}'");
+            pool.create(0)
+                .map_err(|e| exec(format!("pool create/start: {e}")))?;
+        }
+        pool.set_autostart(true)
+            .map_err(|e| exec(format!("pool set_autostart: {e}")))?;
+        Ok(())
+    })
+    .await
+    .map_err(|e| exec(format!("spawn_blocking pool setup: {e}")))??;
+
+    Ok(HarmonyIotPool {
+        name: pool_name,
+        path: pool_path,
+    })
+}
+
+fn pool_xml(name: &str, path: &std::path::Path) -> String {
+    format!(
+        r#"<pool type='dir'>
+  <name>{name}</name>
+  <target>
+    <path>{path}</path>
+  </target>
+</pool>"#,
+        name = name,
+        path = path.display(),
+    )
+}
+
+fn exec(msg: impl Into<String>) -> ExecutorError {
+    ExecutorError::UnexpectedError(msg.into())
+}
--- a/harmony/src/modules/iot/mod.rs
+++ b/harmony/src/modules/iot/mod.rs
@@ -11,6 +11,23 @@
 //! they run inside the Harmony framework proper, driven by the same
 //! `harmony_cli::run` story every other Score uses.

+pub mod assets;
+#[cfg(feature = "kvm")]
+pub mod libvirt_pool;
+pub mod preflight;
 mod setup_score;
+#[cfg(feature = "kvm")]
+mod vm_score;

+pub use assets::{
+    IotSshKeypair, UBUNTU_2404_CLOUDIMG_ARM64_FILENAME, UBUNTU_2404_CLOUDIMG_ARM64_SHA256,
+    UBUNTU_2404_CLOUDIMG_ARM64_URL, UBUNTU_2404_CLOUDIMG_FILENAME, UBUNTU_2404_CLOUDIMG_SHA256,
+    UBUNTU_2404_CLOUDIMG_URL, ensure_iot_ssh_keypair, ensure_ubuntu_2404_cloud_image,
+    ensure_ubuntu_2404_cloud_image_for_arch, read_public_key,
+};
+#[cfg(feature = "kvm")]
+pub use libvirt_pool::{HARMONY_IOT_POOL_NAME, HarmonyIotPool, ensure_harmony_iot_pool};
+pub use preflight::{check_iot_smoke_preflight, check_iot_smoke_preflight_for_arch};
 pub use setup_score::{IotDeviceSetupConfig, IotDeviceSetupScore};
+#[cfg(feature = "kvm")]
+pub use vm_score::ProvisionVmScore;
--- a/harmony/src/modules/iot/preflight.rs
+++ b/harmony/src/modules/iot/preflight.rs
@@ -0,0 +1,166 @@
+//! Fail-fast preflight checks for the IoT smoke test.
+//!
+//! The contract for a Harmony IoT smoke run is:
+//! the operator installs a short list of generic packages on the runner
+//! (kvm/libvirt/qemu, xorriso, python3, cargo, podman), puts their user
+//! in the `libvirt` group, and starts the default libvirt network —
+//! everything else is Harmony's problem. This module is where we verify
+//! those preconditions and turn each missing piece into an
+//! actionable error, rather than letting libvirt/virsh/ansible dump a
+//! cryptic failure three layers in.
+
+use std::process::Stdio;
+
+use tokio::process::Command;
+
+use crate::domain::topology::VmArchitecture;
+use crate::executors::ExecutorError;
+#[cfg(feature = "kvm")]
+use crate::modules::kvm::firmware::discover_aarch64_firmware;
+
+/// Run every preflight check for an x86_64 smoke run — equivalent
+/// to [`check_iot_smoke_preflight_for_arch`] with
+/// [`VmArchitecture::X86_64`]. Kept as a distinct function so
+/// existing callers don't need to thread an arch through yet.
+pub async fn check_iot_smoke_preflight() -> Result<(), ExecutorError> {
+    check_iot_smoke_preflight_for_arch(VmArchitecture::X86_64).await
+}
+
+/// Arch-aware preflight. On top of the host-generic checks
+/// (virsh, qemu-img, xorriso, python3, ssh-keygen, libvirt group,
+/// default network), an aarch64 target requires
+/// `qemu-system-aarch64` and a usable AAVMF firmware pair.
+pub async fn check_iot_smoke_preflight_for_arch(arch: VmArchitecture) -> Result<(), ExecutorError> {
+    check_tool_on_path("virsh", "libvirt client").await?;
+    check_tool_on_path("qemu-img", "qemu-utils").await?;
+    check_tool_on_path("xorriso", "ISO image builder").await?;
+    check_tool_on_path("python3", "for the managed Ansible venv").await?;
+    check_tool_on_path("ssh-keygen", "for bootstrapping the IoT SSH keypair").await?;
+    check_libvirt_group_membership().await?;
+    check_libvirt_default_network().await?;
+
+    if arch == VmArchitecture::Aarch64 {
+        check_tool_on_path("qemu-system-aarch64", "for aarch64 TCG emulation").await?;
+        // Runtime discovery: same call the topology makes at
+        // ensure_vm time — preflight surfaces it up front.
+        // Gated behind `kvm` because callers building `harmony`
+        // without `kvm` (e.g. the on-device agent) don't pull in
+        // libvirt at all; for them, aarch64 preflight simply
+        // stops after the qemu-system-aarch64 PATH check.
+        #[cfg(feature = "kvm")]
+        discover_aarch64_firmware()?;
+    }
+    Ok(())
+}
+
+async fn check_tool_on_path(name: &str, what_for: &str) -> Result<(), ExecutorError> {
+    let status = Command::new("sh")
+        .args(["-c", &format!("command -v {name}")])
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .await
+        .map_err(|e| exec(format!("spawn `command -v`: {e}")))?;
+    if status.success() {
+        return Ok(());
+    }
+    Err(exec(format!(
+        "missing required tool: `{name}` ({what_for}) is not on PATH.\n  \
+         Fix: install it one-time on this host. On Arch: `sudo pacman -S {arch_pkg}`. \
+         On Debian/Ubuntu: `sudo apt install {deb_pkg}`. On Fedora: `sudo dnf install {rpm_pkg}`.",
+        arch_pkg = arch_package_for(name),
+        deb_pkg = deb_package_for(name),
+        rpm_pkg = rpm_package_for(name),
+    )))
+}
+
+fn arch_package_for(tool: &str) -> String {
+    match tool {
+        "virsh" => "libvirt",
+        "qemu-img" => "qemu-img",
+        "qemu-system-aarch64" => "qemu-system-aarch64",
+        "xorriso" => "libisoburn",
+        "python3" => "python",
+        "ssh-keygen" => "openssh",
+        _ => return tool.to_string(),
+    }
+    .to_string()
+}
+
+fn deb_package_for(tool: &str) -> String {
+    match tool {
+        "virsh" => "libvirt-clients",
+        "qemu-img" => "qemu-utils",
+        "qemu-system-aarch64" => "qemu-system-arm",
+        "xorriso" => "xorriso",
+        "python3" => "python3 python3-venv",
+        "ssh-keygen" => "openssh-client",
+        _ => return tool.to_string(),
+    }
+    .to_string()
+}
+
+fn rpm_package_for(tool: &str) -> String {
+    match tool {
+        "virsh" => "libvirt-client",
+        "qemu-img" => "qemu-img",
+        "qemu-system-aarch64" => "qemu-system-aarch64",
+        "xorriso" => "xorriso",
+        "python3" => "python3 python3-pip",
+        "ssh-keygen" => "openssh-clients",
+        _ => return tool.to_string(),
+    }
+    .to_string()
+}
+
+async fn check_libvirt_group_membership() -> Result<(), ExecutorError> {
+    let output = Command::new("id")
+        .arg("-Gn")
+        .stdout(Stdio::piped())
+        .stderr(Stdio::null())
+        .output()
+        .await
+        .map_err(|e| exec(format!("spawn id: {e}")))?;
+    let groups = String::from_utf8_lossy(&output.stdout);
+    if groups.split_whitespace().any(|g| g == "libvirt") {
+        return Ok(());
+    }
+    Err(exec(
+        "current user is not in the `libvirt` group.\n  \
+         Fix: `sudo usermod -aG libvirt $USER` and then log out + back in (or `newgrp libvirt` \
+         for this shell). Needed so Harmony can manage a user-owned libvirt storage pool \
+         without sudo.",
+    ))
+}
+
+async fn check_libvirt_default_network() -> Result<(), ExecutorError> {
+    let output = Command::new("virsh")
+        .args(["--connect", "qemu:///system", "net-info", "default"])
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .output()
+        .await
+        .map_err(|e| exec(format!("spawn virsh: {e}")))?;
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(exec(format!(
+            "libvirt `default` network is missing or unreachable: {}.\n  \
+             Fix: ensure libvirtd is running (`sudo systemctl enable --now libvirtd`), \
+             then `sudo virsh net-define /usr/share/libvirt/networks/default.xml` and \
+             `sudo virsh net-start default && sudo virsh net-autostart default`.",
+            stderr.trim()
+        )));
+    }
+    let info = String::from_utf8_lossy(&output.stdout);
+    if !info.lines().any(|l| l.trim() == "Active:         yes") {
+        return Err(exec(
+            "libvirt `default` network exists but is not active.\n  \
+             Fix: `sudo virsh net-start default && sudo virsh net-autostart default`.",
+        ));
+    }
+    Ok(())
+}
+
+fn exec(msg: impl Into<String>) -> ExecutorError {
+    ExecutorError::UnexpectedError(msg.into())
+}
--- a/harmony/src/modules/iot/setup_score.rs
+++ b/harmony/src/modules/iot/setup_score.rs
@@ -16,8 +16,9 @@ use crate::domain::interpret::{
 };
 use crate::domain::inventory::Inventory;
 use crate::domain::topology::{
-    ChangeReport, FileSource, FileSpec, HostConfigurationProvider, SystemdScope, SystemdUnitSpec,
-    Topology, UserSpec,
+    ChangeReport, FileDelivery, FileSource, FileSpec, HostReachable, LinuxHostConfiguration,
+    PackageInstaller, SystemdManager, SystemdScope, SystemdUnitSpec, Topology, UnixUserManager,
+    UserSpec,
 };
 use crate::score::Score;

@@ -33,9 +34,12 @@ use crate::score::Score;
 /// fleet partitions once group routing lands.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct IotDeviceSetupConfig {
-    /// Stable device identifier. Written into the agent's TOML and used
-    /// as the KV key prefix (`<device_id>.<deployment>`).
-    pub device_id: String,
+    /// Stable device identifier. Written into the agent's TOML and
+    /// used as the KV key prefix (`<device_id>.<deployment>`). Harmony
+    /// `Id` values are sortable-by-creation-time and collision-safe
+    /// at up to ~10k devices/sec, which matches the feel of a fleet
+    /// registry.
+    pub device_id: Id,
    /// Fleet partition this device belongs to.
    pub group: String,
    /// NATS URLs the agent should connect to. Typically one entry.
@@ -53,41 +57,38 @@ pub struct IotDeviceSetupConfig {
 impl IotDeviceSetupConfig {
    /// Render the agent's `/etc/iot-agent/config.toml` content.
    pub fn render_toml(&self) -> String {
-        let mut out = String::new();
-        out.push_str("[agent]\n");
-        out.push_str(&format!(
-            "device_id = \"{}\"\n",
-            toml_escape(&self.device_id)
-        ));
-        out.push_str(&format!("group = \"{}\"\n", toml_escape(&self.group)));
-        out.push('\n');
-        out.push_str("[credentials]\n");
-        out.push_str("type = \"toml-shared\"\n");
-        out.push_str(&format!(
-            "nats_user = \"{}\"\n",
-            toml_escape(&self.nats_user)
-        ));
-        out.push_str(&format!(
-            "nats_pass = \"{}\"\n",
-            toml_escape(&self.nats_pass)
-        ));
-        out.push('\n');
-        out.push_str("[nats]\n");
-        out.push_str("urls = [");
-        for (i, url) in self.nats_urls.iter().enumerate() {
-            if i > 0 {
-                out.push_str(", ");
-            }
-            out.push_str(&format!("\"{}\"", toml_escape(url)));
-        }
-        out.push_str("]\n");
-        out
+        // Raw-string template with format! — the TOML escape rules for
+        // double-quoted strings are just `\` and `"`, handled by
+        // [`toml_escape`].
+        let device_id = toml_escape(&self.device_id.to_string());
+        let group = toml_escape(&self.group);
+        let nats_user = toml_escape(&self.nats_user);
+        let nats_pass = toml_escape(&self.nats_pass);
+        let urls = self
+            .nats_urls
+            .iter()
+            .map(|u| format!("\"{}\"", toml_escape(u)))
+            .collect::<Vec<_>>()
+            .join(", ");
+        format!(
+            r#"[agent]
+device_id = "{device_id}"
+group = "{group}"
+
+[credentials]
+type = "toml-shared"
+nats_user = "{nats_user}"
+nats_pass = "{nats_pass}"
+
+[nats]
+urls = [{urls}]
+"#
+        )
    }

    /// Render the systemd unit file content.
-    pub fn render_systemd_unit(&self) -> String {
-        String::from(
-            "[Unit]
+    pub fn render_systemd_unit(&self) -> &'static str {
+        r#"[Unit]
 Description=IoT Agent (Harmony)
 After=network-online.target
 Wants=network-online.target
@@ -105,8 +106,7 @@ StandardError=journal

 [Install]
 WantedBy=multi-user.target
-",
-        )
+"#
    }
 }

@@ -125,7 +125,7 @@ impl IotDeviceSetupScore {
    }
 }

-impl<T: Topology + HostConfigurationProvider> Score<T> for IotDeviceSetupScore {
+impl<T: Topology + LinuxHostConfiguration> Score<T> for IotDeviceSetupScore {
    fn name(&self) -> String {
        format!("IotDeviceSetupScore({})", self.config.device_id)
    }
@@ -147,7 +147,7 @@ struct IotDeviceSetupInterpret {
 }

 #[async_trait]
-impl<T: Topology + HostConfigurationProvider> Interpret<T> for IotDeviceSetupInterpret {
+impl<T: Topology + LinuxHostConfiguration> Interpret<T> for IotDeviceSetupInterpret {
    fn get_name(&self) -> InterpretName {
        InterpretName::IotDeviceSetup
    }
@@ -167,13 +167,15 @@ impl<T: Topology + HostConfigurationProvider> Interpret<T> for IotDeviceSetupInt
        topology: &T,
    ) -> Result<Outcome, InterpretError> {
        let cfg = &self.config;
-        topology.ping().await.map_err(wrap)?;
+        HostReachable::ping(topology).await.map_err(wrap)?;

        let mut change_log: Vec<String> = Vec::new();

        // 1. Dependencies.
        for pkg in ["podman", "systemd-container"] {
-            let r = topology.ensure_package(pkg).await.map_err(wrap)?;
+            let r = PackageInstaller::ensure_package(topology, pkg)
+                .await
+                .map_err(wrap)?;
            log_change(&mut change_log, format!("package:{pkg}"), r);
        }

@@ -191,16 +193,19 @@ impl<T: Topology + HostConfigurationProvider> Interpret<T> for IotDeviceSetupInt
            system: true,
            create_home: true,
        };
-        let r = topology.ensure_user(&user_spec).await.map_err(wrap)?;
+        let r = UnixUserManager::ensure_user(topology, &user_spec)
+            .await
+            .map_err(wrap)?;
        log_change(&mut change_log, "user:iot-agent", r);

-        let r = topology.ensure_linger("iot-agent").await.map_err(wrap)?;
+        let r = UnixUserManager::ensure_linger(topology, "iot-agent")
+            .await
+            .map_err(wrap)?;
        log_change(&mut change_log, "linger:iot-agent", r);

        // 3. User-scoped podman socket. Required by `PodmanTopology` on
        // the agent so it reaches /run/user/<uid>/podman/podman.sock.
-        let r = topology
-            .ensure_user_unit_active("iot-agent", "podman.socket")
+        let r = SystemdManager::ensure_user_unit_active(topology, "iot-agent", "podman.socket")
            .await
            .map_err(wrap)?;
        log_change(&mut change_log, "user-unit:podman.socket", r);
@@ -210,14 +215,16 @@ impl<T: Topology + HostConfigurationProvider> Interpret<T> for IotDeviceSetupInt
        // content over SFTP and reports `changed: true` only when the
        // remote file actually differs from the local one — so
        // re-running this Score without a new binary is a true NOOP.
-        let binary_r = topology
-            .ensure_file(&FileSpec {
+        let binary_r = FileDelivery::ensure_file(
+            topology,
+            &FileSpec {
                path: "/usr/local/bin/iot-agent".to_string(),
                source: FileSource::LocalPath(cfg.agent_binary_path.clone()),
                owner: Some("root".to_string()),
                group: Some("root".to_string()),
                mode: Some(0o755),
-            })
+            },
+        )
        .await
        .map_err(wrap)?;
        log_change(&mut change_log, "file:/usr/local/bin/iot-agent", binary_r);
@@ -231,24 +238,27 @@ impl<T: Topology + HostConfigurationProvider> Interpret<T> for IotDeviceSetupInt
            group: Some("iot-agent".to_string()),
            mode: Some(0o600),
        };
-        let toml_r = topology.ensure_file(&toml_spec).await.map_err(wrap)?;
+        let toml_r = FileDelivery::ensure_file(topology, &toml_spec)
+            .await
+            .map_err(wrap)?;
        log_change(&mut change_log, "file:/etc/iot-agent/config.toml", toml_r);

        // 6. systemd unit for the agent itself.
        let unit = SystemdUnitSpec {
            name: "iot-agent".to_string(),
-            unit_content: cfg.render_systemd_unit(),
+            unit_content: cfg.render_systemd_unit().to_string(),
            scope: SystemdScope::System,
            start_immediately: true,
        };
-        let unit_r = topology.ensure_systemd_unit(&unit).await.map_err(wrap)?;
+        let unit_r = SystemdManager::ensure_systemd_unit(topology, &unit)
+            .await
+            .map_err(wrap)?;
        log_change(&mut change_log, "unit:iot-agent", unit_r);

        // 7. Restart the agent iff anything that affects it changed.
        let needs_restart = toml_r.changed || unit_r.changed || binary_r.changed;
        if needs_restart {
-            topology
-                .restart_service("iot-agent", SystemdScope::System)
+            SystemdManager::restart_service(topology, "iot-agent", SystemdScope::System)
                .await
                .map_err(wrap)?;
            change_log.push("restart:iot-agent".to_string());
--- a/harmony/src/modules/iot/vm_score.rs
+++ b/harmony/src/modules/iot/vm_score.rs
@@ -0,0 +1,93 @@
+//! [`ProvisionVmScore`] — Harmony Score wrapping
+//! [`VirtualMachineHost::ensure_vm`].
+//!
+//! The Score itself has no knowledge of the hypervisor or how
+//! first-boot configuration is delivered to the guest (cloud-init
+//! seed ISO, OVF properties, Proxmox APIs — all hypervisor
+//! concerns). It takes a generic `VirtualMachineSpec`, calls the
+//! topology's `VirtualMachineHost` capability, and returns the
+//! runtime info the caller needs to SSH in.
+
+use async_trait::async_trait;
+use harmony_types::id::Id;
+use serde::{Deserialize, Serialize};
+
+use crate::data::Version;
+use crate::domain::interpret::{
+    Interpret, InterpretError, InterpretName, InterpretStatus, Outcome,
+};
+use crate::domain::inventory::Inventory;
+use crate::domain::topology::{Topology, VirtualMachineHost, VirtualMachineSpec};
+use crate::score::Score;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ProvisionVmScore {
+    pub spec: VirtualMachineSpec,
+}
+
+impl<T: Topology + VirtualMachineHost> Score<T> for ProvisionVmScore {
+    fn name(&self) -> String {
+        format!("ProvisionVmScore({})", self.spec.name)
+    }
+
+    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
+        Box::new(ProvisionVmInterpret {
+            spec: self.spec.clone(),
+            version: Version::from("0.1.0").expect("static version"),
+            status: InterpretStatus::QUEUED,
+        })
+    }
+}
+
+#[derive(Debug)]
+struct ProvisionVmInterpret {
+    spec: VirtualMachineSpec,
+    version: Version,
+    status: InterpretStatus,
+}
+
+#[async_trait]
+impl<T: Topology + VirtualMachineHost> Interpret<T> for ProvisionVmInterpret {
+    fn get_name(&self) -> InterpretName {
+        InterpretName::KvmVm
+    }
+    fn get_version(&self) -> Version {
+        self.version.clone()
+    }
+    fn get_status(&self) -> InterpretStatus {
+        self.status.clone()
+    }
+    fn get_children(&self) -> Vec<Id> {
+        vec![]
+    }
+
+    async fn execute(
+        &self,
+        _inventory: &Inventory,
+        topology: &T,
+    ) -> Result<Outcome, InterpretError> {
+        let info = topology
+            .ensure_vm(&self.spec)
+            .await
+            .map_err(|e| InterpretError::new(format!("ensure_vm: {e}")))?;
+
+        let mut details = vec![
+            format!("hypervisor={}", info.hypervisor),
+            format!("name={}", info.name),
+        ];
+        if let Some(ip) = info.ip {
+            details.push(format!("ip={ip}"));
+        }
+        Ok(Outcome::success_with_details(
+            format!(
+                "VM {} up on {} ({})",
+                info.name,
+                info.hypervisor,
+                info.ip
+                    .map(|i| i.to_string())
+                    .unwrap_or_else(|| "no-ip".to_string())
+            ),
+            details,
+        ))
+    }
+}
--- a/harmony/src/modules/kvm/cloudinit.rs
+++ b/harmony/src/modules/kvm/cloudinit.rs
@@ -1,16 +1,34 @@
 //! Cloud-init seed ISO generation.
 //!
-//! **Scope.** This is a convenience for the *VM-as-device test rig*. Real
-//! customer Pi deployments will not use cloud-init — they use rpi-imager's
-//! preconfigure flow, a PXE-boot appliance, or an equivalent OEM mechanism.
-//! Keep this helper tucked inside the KVM module to signal it's a test-rig
-//! concern, not a customer-facing capability.
+//! # Why customize the VM at all?
 //!
-//! What it does: given a hostname + one authorized SSH key + an optional
-//! privileged user, writes `user-data` and `meta-data` files, wraps them
-//! in an ISO 9660 volume labeled `CIDATA`, and returns the ISO path. That
-//! ISO is attached as a second CD-ROM on the VM; cloud-init on first boot
-//! reads it and applies the configuration.
+//! Ubuntu cloud images ship with no default login — no password,
+//! keys-only SSH, no authorized keys. That's the right posture for a
+//! cloud image but it means a freshly-booted VM is *unreachable* until
+//! we tell it who to trust. Every programmatic VM provisioning story
+//! customizes somehow. The menu:
+//!
+//! 1. **Cloud-init via seed ISO (what we do).** Attach a second CD-ROM
+//!    labeled `CIDATA` carrying `user-data` + `meta-data` files.
+//!    cloud-init's NoCloud datasource reads it on first boot.
+//!    Requires `xorriso` (one 2MB package, in every distro's repo).
+//! 2. **Cloud-init via NoCloud-net HTTP.** Run an in-process HTTP
+//!    server; pass its URL to the VM via SMBIOS. No extra system
+//!    dep, but ~100 lines of server/lifecycle code, and the SMBIOS
+//!    stanza has to live in domain XML. Good future option if xorriso
+//!    becomes painful.
+//! 3. **virt-customize / libguestfs.** Rewrites the qcow2 pre-boot
+//!    to inject the key directly into /root/.ssh/authorized_keys.
+//!    Heavier dep (libguestfs is ~100MB) and has SELinux/perm quirks
+//!    on several distros. Out of scope.
+//! 4. **Pre-baked image with known creds.** Mints its own maintenance
+//!    burden (rebuild on every Ubuntu point release). Out of scope.
+//!
+//! We picked (1) because `xorriso` is the lightest dep that delivers a
+//! stable, standard mechanism (cloud-init NoCloud datasource is in the
+//! official cloud-init spec). Keep this helper tucked inside the KVM
+//! module to signal it's a KVM-impl concern, not a customer-facing
+//! capability — the generic `VirtualMachineHost` abstraction hides it.

 use std::path::{Path, PathBuf};
 use std::process::Stdio;
@@ -28,19 +46,16 @@ pub struct CloudInitSeedConfig<'a> {
    /// Public SSH key (openssh format, single line) that the guest will
    /// authorize for the `user` account.
    pub authorized_key: &'a str,
-    /// Local username to create with passwordless sudo. Cloud-init's
-    /// `default` user on Ubuntu images is `ubuntu`; for clarity we create
-    /// an explicit one so the agent + setup score don't depend on distro
-    /// defaults.
+    /// Local username to create with passwordless sudo.
    pub user: &'a str,
-    /// Extra `runcmd` lines to append to the user-data. Mostly useful for
-    /// no-op debugging; keep empty in production paths.
+    /// Extra `runcmd` lines to append to the user-data. Mostly useful
+    /// for no-op debugging; keep empty in production paths.
    pub extra_runcmd: Vec<String>,
 }

 /// Write a seed ISO to `output_dir/<hostname>-seed.iso`. Uses `xorriso
-/// -as mkisofs` under the hood; if `xorriso` is not on PATH this returns
-/// a clear error asking for it.
+/// -as mkisofs` under the hood; if `xorriso` is not on PATH this
+/// returns a clear error asking for it.
 pub async fn build_seed_iso(
    cfg: &CloudInitSeedConfig<'_>,
    output_dir: &Path,
@@ -54,19 +69,17 @@ pub async fn build_seed_iso(
    }

    let workdir = tempdir().map_err(KvmError::Io)?;
-
    let user_data = render_user_data(cfg);
+
    // Fresh instance-id on every seed build. Cloud-init treats a new
-    // instance-id as a "first boot": it re-runs all of its per-instance
-    // modules. This is what makes the KvmVmScore repeatable against a
-    // reused overlay disk — without it, the second boot would skip all
-    // our user/hostname/ssh configuration because cloud-init cached the
-    // previous run under the same id.
+    // instance-id as a "first boot": it re-runs all of its per-
+    // instance modules. This is what makes ensure_vm repeatable
+    // against a reused overlay disk — without it, the second boot
+    // would skip all our user/hostname/ssh configuration because
+    // cloud-init cached the previous run under the same id.
    let instance_id = uuid::Uuid::new_v4();
    let meta_data = format!(
-        "instance-id: {instance_id}
-local-hostname: {hostname}
-",
+        "instance-id: {instance_id}\nlocal-hostname: {hostname}\n",
        hostname = cfg.hostname
    );

@@ -79,8 +92,8 @@ local-hostname: {hostname}
    let output_path = output_dir.join(format!("{}-seed.iso", cfg.hostname));

    // xorriso refuses to overwrite a pre-existing output file cleanly
-    // (it treats it as input "media"), so remove it first. Our seed is
-    // regenerated from config every run, which is the intended
+    // (it treats it as input "media"), so remove it first. Our seed
+    // is regenerated from config every run, which is the intended
    // behaviour — the file is a build artifact, not state.
    if output_path.exists() {
        tokio::fs::remove_file(&output_path)
@@ -88,22 +101,23 @@ local-hostname: {hostname}
            .map_err(KvmError::Io)?;
    }

-    // Use `.output()` (not `.status()`) so we actually drain stderr — a
-    // piped stderr that isn't read deadlocks xorriso once the pipe fills
-    // up, and in practice the kernel surfaces that as a SIGPIPE on the
-    // child. Keep stderr piped so failure diagnostics make it into the
-    // error message.
+    // Use `.output()` (not `.status()`) so we actually drain stderr —
+    // a piped stderr that isn't read deadlocks xorriso once the pipe
+    // fills up. Keep stderr piped so failure diagnostics make it
+    // into the error message.
    let output = Command::new("xorriso")
-        .arg("-as")
-        .arg("mkisofs")
-        .arg("-output")
-        .arg(&output_path)
-        .arg("-volid")
-        .arg("CIDATA")
-        .arg("-joliet")
-        .arg("-rock")
-        .arg(workdir.path().join("user-data"))
-        .arg(workdir.path().join("meta-data"))
+        .args([
+            "-as".as_ref(),
+            "mkisofs".as_ref(),
+            "-output".as_ref(),
+            output_path.as_os_str(),
+            "-volid".as_ref(),
+            "CIDATA".as_ref(),
+            "-joliet".as_ref(),
+            "-rock".as_ref(),
+            workdir.path().join("user-data").as_os_str(),
+            workdir.path().join("meta-data").as_os_str(),
+        ])
        .stdout(Stdio::null())
        .stderr(Stdio::piped())
        .output()
@@ -121,27 +135,34 @@ local-hostname: {hostname}
 }

 fn render_user_data(cfg: &CloudInitSeedConfig<'_>) -> String {
-    let mut s = String::new();
-    s.push_str("#cloud-config\n");
-    s.push_str(&format!("hostname: {}\n", cfg.hostname));
-    s.push_str(&format!("fqdn: {}.local\n", cfg.hostname));
-    s.push_str("manage_etc_hosts: true\n");
-    s.push_str("users:\n");
-    s.push_str(&format!("  - name: {}\n", cfg.user));
-    s.push_str("    sudo: ALL=(ALL) NOPASSWD:ALL\n");
-    s.push_str("    shell: /bin/bash\n");
-    s.push_str("    lock_passwd: true\n");
-    s.push_str("    ssh_authorized_keys:\n");
-    s.push_str(&format!("      - {}\n", cfg.authorized_key));
-    s.push_str("ssh_pwauth: false\n");
-    s.push_str("disable_root: true\n");
-    if !cfg.extra_runcmd.is_empty() {
-        s.push_str("runcmd:\n");
+    let runcmd = if cfg.extra_runcmd.is_empty() {
+        String::new()
+    } else {
+        let mut s = String::from("runcmd:\n");
        for line in &cfg.extra_runcmd {
-            s.push_str(&format!("  - {}\n", line));
-        }
+            s.push_str(&format!("  - {line}\n"));
        }
        s
+    };
+    format!(
+        r#"#cloud-config
+hostname: {hostname}
+fqdn: {hostname}.local
+manage_etc_hosts: true
+users:
+  - name: {user}
+    sudo: ALL=(ALL) NOPASSWD:ALL
+    shell: /bin/bash
+    lock_passwd: true
+    ssh_authorized_keys:
+      - {authorized_key}
+ssh_pwauth: false
+disable_root: true
+{runcmd}"#,
+        hostname = cfg.hostname,
+        user = cfg.user,
+        authorized_key = cfg.authorized_key,
+    )
 }

 async fn write_file(path: &Path, content: &str) -> Result<(), KvmError> {
--- a/harmony/src/modules/kvm/firmware.rs
+++ b/harmony/src/modules/kvm/firmware.rs
@@ -0,0 +1,205 @@
+//! UEFI firmware discovery for aarch64 guests.
+//!
+//! Libvirt needs two paths to boot an aarch64 VM via UEFI (there
+//! is no equivalent of SeaBIOS for arm64 — the virt machine type
+//! has no legacy chipset):
+//!
+//! - **CODE** — read-only firmware image (shared across all VMs).
+//! - **VARS template** — writable NVRAM prototype. Per-VM, libvirt
+//!   reads-then-copies this on first definition.
+//!
+//! Every major distro ships the AAVMF firmware under a different
+//! path. [`discover_aarch64_firmware`] walks a known list and
+//! returns the first viable pair, or an `ExecutorError` with the
+//! package-install command for each supported distro.
+
+use std::path::{Path, PathBuf};
+
+use crate::executors::ExecutorError;
+
+/// A code + vars-template firmware pair usable by libvirt's
+/// `<loader>` + `<nvram>` domain elements.
+///
+/// Both paths must be readable by libvirt-qemu. The `vars_template`
+/// path is read-only; per-VM writable NVRAM is produced by copying
+/// it to a per-domain location — see [`copy_vars_template_for_vm`].
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct AarchFirmware {
+    pub code: PathBuf,
+    pub vars_template: PathBuf,
+}
+
+/// Ordered candidates for the CODE + VARS pair, per distro. Each
+/// tuple is checked left-to-right — the first whose both files
+/// exist wins. Ordering puts Arch first (this workshop host) but
+/// callers shouldn't depend on the order beyond "match what the
+/// host actually ships."
+const CANDIDATES: &[(&str, &str)] = &[
+    // Arch Linux — current edk2-armvirt (package installs both a
+    // canonical copy under /usr/share/edk2/aarch64 and a compatibility
+    // copy under /usr/share/edk2-armvirt/aarch64; either is fine).
+    (
+        "/usr/share/edk2/aarch64/QEMU_EFI.fd",
+        "/usr/share/edk2/aarch64/QEMU_VARS.fd",
+    ),
+    (
+        "/usr/share/edk2-armvirt/aarch64/QEMU_EFI.fd",
+        "/usr/share/edk2-armvirt/aarch64/QEMU_VARS.fd",
+    ),
+    // Older Arch / AUR edk2-armvirt naming — vars template as a
+    // raw pflash image rather than an .fd wrapper.
+    (
+        "/usr/share/edk2-armvirt/aarch64/QEMU_EFI.fd",
+        "/usr/share/edk2-armvirt/aarch64/vars-template-pflash.raw",
+    ),
+    // Arch Linux — hypothetical CODE/VARS naming. Some newer edk2
+    // builds split CODE vs EFI; keep this candidate so we don't
+    // regress if upstream renames.
+    (
+        "/usr/share/edk2/aarch64/QEMU_CODE.fd",
+        "/usr/share/edk2/aarch64/QEMU_VARS.fd",
+    ),
+    // Debian / Ubuntu (qemu-efi-aarch64 package).
+    (
+        "/usr/share/AAVMF/AAVMF_CODE.fd",
+        "/usr/share/AAVMF/AAVMF_VARS.fd",
+    ),
+    // Fedora / RHEL (edk2-aarch64 package).
+    (
+        "/usr/share/edk2/aarch64/QEMU_EFI-pflash.raw",
+        "/usr/share/edk2/aarch64/vars-template-pflash.raw",
+    ),
+];
+
+pub fn discover_aarch64_firmware() -> Result<AarchFirmware, ExecutorError> {
+    for (code, vars) in CANDIDATES {
+        let code_path = Path::new(code);
+        let vars_path = Path::new(vars);
+        if code_path.is_file() && vars_path.is_file() {
+            return Ok(AarchFirmware {
+                code: code_path.to_path_buf(),
+                vars_template: vars_path.to_path_buf(),
+            });
+        }
+    }
+    Err(ExecutorError::UnexpectedError(
+        "no aarch64 UEFI firmware found. Install it one-time:\n  \
+         Arch:          `sudo pacman -S edk2-armvirt`\n  \
+         Debian/Ubuntu: `sudo apt install qemu-efi-aarch64`\n  \
+         Fedora:        `sudo dnf install edk2-aarch64`\n\
+         Checked paths (first pair to have both files wins):\n  \
+         - /usr/share/edk2/aarch64/QEMU_EFI.fd + QEMU_VARS.fd\n  \
+         - /usr/share/edk2-armvirt/aarch64/QEMU_EFI.fd + QEMU_VARS.fd\n  \
+         - /usr/share/edk2-armvirt/aarch64/QEMU_EFI.fd + vars-template-pflash.raw\n  \
+         - /usr/share/edk2/aarch64/QEMU_{CODE,VARS}.fd\n  \
+         - /usr/share/AAVMF/AAVMF_{CODE,VARS}.fd\n  \
+         - /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw + vars-template-pflash.raw"
+            .to_string(),
+    ))
+}
+
+/// Copy `firmware.vars_template` to `dest` so libvirt-qemu has a
+/// writable per-VM NVRAM. Overwrites `dest` if present — on a
+/// reused VM name we want fresh NVRAM. The file is chmod 0644 so
+/// libvirt-qemu's dynamic ownership chown on VM start works.
+///
+/// VARS templates are already 64 MiB on every distro we support
+/// (they're sized for the pflash region), so no padding is needed
+/// here — unlike [`ensure_code_pflash_padded`] for the CODE side.
+pub async fn copy_vars_template_for_vm(
+    firmware: &AarchFirmware,
+    dest: &Path,
+) -> Result<(), ExecutorError> {
+    use std::os::unix::fs::PermissionsExt;
+    tokio::fs::copy(&firmware.vars_template, dest)
+        .await
+        .map_err(|e| {
+            ExecutorError::UnexpectedError(format!(
+                "copy AAVMF vars template {:?} → {dest:?}: {e}",
+                firmware.vars_template
+            ))
+        })?;
+    tokio::fs::set_permissions(dest, std::fs::Permissions::from_mode(0o644))
+        .await
+        .map_err(|e| ExecutorError::UnexpectedError(format!("chmod {dest:?}: {e}")))?;
+    Ok(())
+}
+
+/// QEMU's `virt` machine wires pflash unit 0 as a CFI flash device
+/// of fixed size 64 MiB. When libvirt's `<loader type='pflash'>`
+/// points at a file smaller than that, qemu refuses to start:
+///
+///   cfi.pflash01 device '/machine/virt.flash0' requires 67108864
+///   bytes, block backend provides 3145728 bytes
+///
+/// Different distros ship the CODE firmware differently:
+///
+/// - **Pre-padded** (upstream QEMU `pc-bios/edk2-aarch64-code.fd`,
+///   Ubuntu `qemu-efi-aarch64`): file is 64 MiB, zero-padded at the
+///   tail. Works as-is with `-drive if=pflash`.
+/// - **Raw edk2 build output** (Arch `edk2-aarch64` 202508+): file
+///   is ~2-4 MiB, just the firmware volume without pflash padding.
+///   Has to be padded before libvirt will accept it.
+///
+/// [`ensure_code_pflash_padded`] produces a 64 MiB cached copy at
+/// `cache_path` when the source is smaller than the pflash region,
+/// and reuses it when it already exists with the right size. When
+/// the source is already 64 MiB, this returns it unchanged — no
+/// copy, no bytes moved.
+pub const AARCH64_PFLASH_BYTES: u64 = 64 * 1024 * 1024;
+
+pub async fn ensure_code_pflash_padded(
+    source: &Path,
+    cache_path: &Path,
+) -> Result<PathBuf, ExecutorError> {
+    let src_meta = tokio::fs::metadata(source).await.map_err(|e| {
+        ExecutorError::UnexpectedError(format!("stat firmware code {source:?}: {e}"))
+    })?;
+    if src_meta.len() == AARCH64_PFLASH_BYTES {
+        return Ok(source.to_path_buf());
+    }
+    if src_meta.len() > AARCH64_PFLASH_BYTES {
+        return Err(ExecutorError::UnexpectedError(format!(
+            "firmware code {source:?} is {} bytes, larger than the 64 MiB pflash \
+             region QEMU's virt machine provides. This firmware pair is not \
+             usable for the aarch64 virt machine type.",
+            src_meta.len()
+        )));
+    }
+    // Source is under 64 MiB — needs padding. If the cache already
+    // holds a correctly-sized copy newer than the source, reuse it.
+    if let Ok(cache_meta) = tokio::fs::metadata(cache_path).await
+        && cache_meta.len() == AARCH64_PFLASH_BYTES
+        && let Ok(cache_mtime) = cache_meta.modified()
+        && let Ok(src_mtime) = src_meta.modified()
+        && cache_mtime >= src_mtime
+    {
+        return Ok(cache_path.to_path_buf());
+    }
+
+    if let Some(parent) = cache_path.parent() {
+        tokio::fs::create_dir_all(parent).await.map_err(|e| {
+            ExecutorError::UnexpectedError(format!("create firmware cache dir {parent:?}: {e}"))
+        })?;
+    }
+    tokio::fs::copy(source, cache_path).await.map_err(|e| {
+        ExecutorError::UnexpectedError(format!(
+            "copy firmware code {source:?} → {cache_path:?}: {e}"
+        ))
+    })?;
+    let file = tokio::fs::OpenOptions::new()
+        .write(true)
+        .open(cache_path)
+        .await
+        .map_err(|e| ExecutorError::UnexpectedError(format!("open {cache_path:?} for pad: {e}")))?;
+    file.set_len(AARCH64_PFLASH_BYTES).await.map_err(|e| {
+        ExecutorError::UnexpectedError(format!(
+            "pad {cache_path:?} to {AARCH64_PFLASH_BYTES} bytes: {e}"
+        ))
+    })?;
+    use std::os::unix::fs::PermissionsExt;
+    tokio::fs::set_permissions(cache_path, std::fs::Permissions::from_mode(0o644))
+        .await
+        .map_err(|e| ExecutorError::UnexpectedError(format!("chmod {cache_path:?}: {e}")))?;
+    Ok(cache_path.to_path_buf())
+}
--- a/harmony/src/modules/kvm/mod.rs
+++ b/harmony/src/modules/kvm/mod.rs
@@ -4,16 +4,17 @@ pub mod cloudinit;
 pub mod config;
 pub mod error;
 pub mod executor;
+pub mod firmware;
 pub mod topology;
 pub mod types;
-pub mod vm_score;

 pub use cloudinit::{CloudInitSeedConfig, build_seed_iso};
 pub use error::KvmError;
 pub use executor::KvmExecutor;
-pub use topology::{KvmHost, KvmHostTopology};
+pub use firmware::{AarchFirmware, copy_vars_template_for_vm, discover_aarch64_firmware};
+pub use topology::{DEFAULT_ADMIN_USER, KvmVirtualMachineHost};
 pub use types::{
    BootDevice, CdromConfig, DhcpHost, DiskConfig, ForwardMode, NetworkConfig,
-    NetworkConfigBuilder, NetworkRef, VmConfig, VmConfigBuilder, VmInterface, VmStatus,
+    NetworkConfigBuilder, NetworkRef, UefiFirmware, VmArchitecture, VmConfig, VmConfigBuilder,
+    VmInterface, VmStatus,
 };
-pub use vm_score::{CloudInitVmConfig, KvmVmScore};
--- a/harmony/src/modules/kvm/topology.rs
+++ b/harmony/src/modules/kvm/topology.rs
@@ -1,61 +1,413 @@
+//! KVM-backed implementation of [`VirtualMachineHost`].
+//!
+//! `KvmVirtualMachineHost` wraps a [`KvmExecutor`] (libvirt
+//! connection) + the Harmony-managed libvirt storage pool and
+//! translates generic `VirtualMachineSpec` requests into concrete
+//! libvirt domain definitions. Cloud-init is an implementation
+//! detail here — callers never see it.
+
+use std::net::IpAddr;
+use std::path::PathBuf;
+use std::process::Stdio;
+
 use async_trait::async_trait;
+use log::info;
+use tokio::process::Command;

-use crate::domain::topology::{PreparationError, PreparationOutcome, Topology};
+use crate::domain::topology::{
+    PreparationError, PreparationOutcome, Topology, VirtualMachineHost, VirtualMachineRuntimeInfo,
+    VirtualMachineSpec, VmArchitecture, VmFirstBootConfig, VmState,
+};
+use crate::executors::ExecutorError;

+use super::cloudinit::{CloudInitSeedConfig, build_seed_iso};
+use super::error::KvmError;
 use super::executor::KvmExecutor;
+use super::firmware::{
+    copy_vars_template_for_vm, discover_aarch64_firmware, ensure_code_pflash_padded,
+};
+use super::types::{BootDevice, CdromConfig, DiskConfig, NetworkRef, UefiFirmware, VmConfig};

-/// Capability: access to a libvirt-reachable KVM hypervisor.
+pub const DEFAULT_ADMIN_USER: &str = "harmony-admin";
+
+/// Libvirt/KVM hypervisor host, implementing the generic
+/// [`VirtualMachineHost`] capability.
 ///
-/// Intentionally tool-shaped rather than industry-shaped (compare to the
-/// `PostgreSQL` exception in CLAUDE.md's capability doctrine): any Score
-/// that wants to provision a VM cares about hypervisor specifics like
-/// storage pools and network bridges — there isn't an honest tool-neutral
-/// abstraction to hide behind. When we want federation over heterogeneous
-/// hypervisors (KVM + VMware + cloud provider), a higher-level
-/// `VirtualMachineHost` capability can be introduced then, and we'll
-/// either implement it *in terms of* `KvmHost` or drop `KvmHost`
-/// altogether.
-pub trait KvmHost {
-    /// Access the libvirt executor used to drive this hypervisor.
-    fn kvm_executor(&self) -> &KvmExecutor;
-}
-
-/// Concrete Topology wrapping a single KVM hypervisor reachable via
-/// libvirt. Implements [`KvmHost`] directly.
-pub struct KvmHostTopology {
+/// Composes with a caller-chosen storage pool directory where per-VM
+/// overlays + seed ISOs are placed. Harmony's IoT workflows use
+/// [`crate::modules::iot::ensure_harmony_iot_pool`] to populate that
+/// dir; other callers can point at any user-owned libvirt pool root.
+pub struct KvmVirtualMachineHost {
    name: String,
    executor: KvmExecutor,
+    pool_name: String,
+    pool_path: PathBuf,
+    base_image_path: PathBuf,
 }

-impl KvmHostTopology {
-    pub fn new(name: impl Into<String>, executor: KvmExecutor) -> Self {
+impl KvmVirtualMachineHost {
+    pub fn new(
+        topology_name: impl Into<String>,
+        executor: KvmExecutor,
+        pool_name: impl Into<String>,
+        pool_path: PathBuf,
+        base_image_path: PathBuf,
+    ) -> Self {
        Self {
-            name: name.into(),
+            name: topology_name.into(),
            executor,
+            pool_name: pool_name.into(),
+            pool_path,
+            base_image_path,
        }
    }
+
+    pub fn executor(&self) -> &KvmExecutor {
+        &self.executor
+    }
 }

 #[async_trait]
-impl Topology for KvmHostTopology {
+impl Topology for KvmVirtualMachineHost {
    fn name(&self) -> &str {
        &self.name
    }

    async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
-        // The executor holds the URI — a cheap hypervisor-version query is
-        // the libvirt equivalent of our `podman info` ping. Not adding a
-        // dedicated method to KvmExecutor for v0; connection opens are
-        // lazy per-call, so we rely on the first real Score call to
-        // surface any connection issue. Callers that want an explicit
-        // preflight should issue a trivial op (e.g. vm_exists("probe"))
-        // before committing to bigger work.
+        // TODO(ROADMAP 12.1 — Phased topology): `ensure_ready` is
+        // called eagerly by Maestro before any Score in this run has
+        // declared whether it actually needs KVM. A hypervisor-version
+        // probe here would force every Harmony invocation that merely
+        // *mentions* this topology to pay the libvirt connect cost,
+        // even when the current run only touches unrelated capabilities.
+        // The phased-topology work (§12.1) will let us defer this to
+        // when a KVM-consuming Score actually runs. Until then, keep
+        // `ensure_ready` as a Noop and rely on the first `ensure_vm`
+        // call to surface any libvirt connectivity problem.
        Ok(PreparationOutcome::Noop)
    }
 }

-impl KvmHost for KvmHostTopology {
-    fn kvm_executor(&self) -> &KvmExecutor {
-        &self.executor
+#[async_trait]
+impl VirtualMachineHost for KvmVirtualMachineHost {
+    async fn list_vms(&self) -> Result<Vec<VirtualMachineRuntimeInfo>, ExecutorError> {
+        // The current KvmExecutor API doesn't expose list-domains; add
+        // it when a caller needs it. For the IoT walking skeleton we
+        // only ever touch our own VMs by name, so this can stay
+        // unimplemented for now without blocking anything.
+        Err(ExecutorError::UnexpectedError(
+            "KvmVirtualMachineHost::list_vms is not implemented yet".to_string(),
+        ))
+    }
+
+    async fn ensure_vm(
+        &self,
+        spec: &VirtualMachineSpec,
+    ) -> Result<VirtualMachineRuntimeInfo, ExecutorError> {
+        let vm_already_exists = self
+            .executor
+            .vm_exists(&spec.name)
+            .await
+            .map_err(|e| exec(format!("vm_exists: {e}")))?;
+
+        // Per-VM overlay backed by the cached base image. Wiped and
+        // recreated whenever the VM doesn't already exist, so a
+        // destroyed-then-re-ensured VM always gets a clean rootfs.
+        let overlay_path = self.pool_path.join(format!("{}.qcow2", spec.name));
+        if !vm_already_exists {
+            if overlay_path.exists() {
+                tokio::fs::remove_file(&overlay_path)
+                    .await
+                    .map_err(|e| exec(format!("remove stale overlay: {e}")))?;
+            }
+            create_overlay(&self.base_image_path, &overlay_path).await?;
+            info!(
+                "created overlay disk {overlay_path:?} backed by {:?}",
+                self.base_image_path
+            );
+            refresh_pool(&self.pool_name).await?;
+        }
+
+        // First-boot seed ISO (cloud-init NoCloud) iff requested.
+        // `seed_iso_path` stays `None` when no first-boot config was
+        // provided — the VM boots whatever the backing image is
+        // configured to boot into.
+        let seed_iso_path = match spec.first_boot.as_ref() {
+            Some(fb) => Some(build_cloud_init_seed(fb, &spec.name, &self.pool_path).await?),
+            None => None,
+        };
+        if seed_iso_path.is_some() {
+            refresh_pool(&self.pool_name).await?;
+        }
+
+        // aarch64 guests need a UEFI firmware pair; x86_64 boots
+        // SeaBIOS by default and leaves firmware = None.
+        let firmware = match spec.architecture {
+            VmArchitecture::X86_64 => None,
+            VmArchitecture::Aarch64 => Some(ensure_vm_firmware(&spec.name, &self.pool_path).await?),
+        };
+        if firmware.is_some() {
+            refresh_pool(&self.pool_name).await?;
+        }
+
+        let vm_config = VmConfig {
+            name: spec.name.clone(),
+            architecture: spec.architecture,
+            vcpus: spec.cpus,
+            memory_mib: spec.memory_mib,
+            disks: vec![DiskConfig {
+                size_gb: spec.disk_size_gb.unwrap_or(0),
+                device: "vda".to_string(),
+                pool: self.pool_name.clone(),
+                source_path: Some(overlay_path.to_string_lossy().into_owned()),
+            }],
+            networks: vec![NetworkRef::named(&spec.network)],
+            cdroms: match &seed_iso_path {
+                Some(p) => vec![CdromConfig {
+                    source: p.to_string_lossy().into_owned(),
+                    device: "hdb".to_string(),
+                }],
+                None => vec![],
+            },
+            boot_order: vec![BootDevice::Disk],
+            firmware,
+        };
+
+        self.executor
+            .ensure_vm(vm_config)
+            .await
+            .map_err(|e| exec(format!("ensure_vm: {e}")))?;
+        self.executor
+            .start_vm(&spec.name)
+            .await
+            .map_err(|e| exec(format!("start_vm: {e}")))?;
+
+        // First-boot cloud-init takes 2-4 minutes on native-arch KVM
+        // (datasource detection, package regeneration, SSH host-key
+        // generation, reboots). Under TCG emulation — aarch64 guest
+        // on an x86_64 host — the same boot path runs 3-5× slower
+        // because every guest instruction is translated. A cold
+        // first boot (no disk cache) has been observed at ~15 min
+        // on an 8-core x86 host even with virtio-rng and
+        // pauth-impdef=on; budget 30 min to cover slower CI workers.
+        let wait_budget = match spec.architecture {
+            VmArchitecture::X86_64 => std::time::Duration::from_secs(300),
+            VmArchitecture::Aarch64 => std::time::Duration::from_secs(1800),
+        };
+        let ip = self
+            .executor
+            .wait_for_ip(&spec.name, wait_budget)
+            .await
+            .map_err(|e| exec(format!("wait_for_ip: {e}")))?;
+
+        // DHCP lease ≠ usable VM. When first_boot (cloud-init) is
+        // requested, a subsequent Score will almost always SSH in —
+        // so block here until port 22 accepts a TCP handshake.
+        // Otherwise the caller races cloud-init: under TCG we've
+        // seen 60-180 s between DHCP lease and sshd-listening.
+        if spec.first_boot.is_some() {
+            wait_for_tcp_port(ip, 22, wait_budget).await?;
+        }
+
+        Ok(VirtualMachineRuntimeInfo {
+            name: spec.name.clone(),
+            state: VmState::Running,
+            ip: Some(ip),
+            hypervisor: "kvm".to_string(),
+        })
+    }
+
+    async fn delete_vm(&self, name: &str) -> Result<(), ExecutorError> {
+        let exists = self
+            .executor
+            .vm_exists(name)
+            .await
+            .map_err(|e| exec(format!("vm_exists: {e}")))?;
+        if !exists {
+            return Ok(());
+        }
+        // Destroy (ignore error if already stopped) then undefine.
+        let _ = self.executor.destroy_vm(name).await;
+        self.executor
+            .undefine_vm(name)
+            .await
+            .map_err(|e| exec(format!("undefine_vm: {e}")))?;
+        Ok(())
+    }
+
+    async fn get_vm_info(
+        &self,
+        name: &str,
+    ) -> Result<Option<VirtualMachineRuntimeInfo>, ExecutorError> {
+        let exists = self
+            .executor
+            .vm_exists(name)
+            .await
+            .map_err(|e| exec(format!("vm_exists: {e}")))?;
+        if !exists {
+            return Ok(None);
+        }
+        let vm_ip = self
+            .executor
+            .vm_ip(name)
+            .await
+            .map_err(|e| exec(format!("vm_ip: {e}")))?;
+        Ok(Some(VirtualMachineRuntimeInfo {
+            name: name.to_string(),
+            state: if vm_ip.is_some() {
+                VmState::Running
+            } else {
+                VmState::Unknown
+            },
+            ip: vm_ip.map(|i: IpAddr| i),
+            hypervisor: "kvm".to_string(),
+        }))
    }
 }
+
+/// Prepare a UEFI firmware pair for a single aarch64 VM:
+/// discover the host-shipped code + vars-template, copy the
+/// template to a per-VM NVRAM file inside the pool dir, return
+/// the paired paths for libvirt's `<loader>` + `<nvram>`.
+async fn ensure_vm_firmware(
+    vm_name: &str,
+    pool_path: &std::path::Path,
+) -> Result<UefiFirmware, ExecutorError> {
+    let discovered = discover_aarch64_firmware()?;
+    // Arch's `edk2-aarch64` ships CODE as a ~3 MiB raw edk2 build
+    // output, but QEMU's virt machine pflash region is fixed 64 MiB
+    // and refuses under-sized files. Pad (once, cached next to the
+    // pool) before handing the path to libvirt.
+    let padded_code =
+        ensure_code_pflash_padded(&discovered.code, &pool_path.join("aarch64-code-padded.fd"))
+            .await?;
+    let vars = pool_path.join(format!("{vm_name}-VARS.fd"));
+    copy_vars_template_for_vm(&discovered, &vars).await?;
+    info!(
+        "aarch64 firmware: code={padded_code:?} (padded from {:?}), \
+         nvram={vars:?} (from template {:?})",
+        discovered.code, discovered.vars_template
+    );
+    Ok(UefiFirmware {
+        code: padded_code,
+        vars,
+    })
+}
+
+async fn create_overlay(
+    base: &std::path::Path,
+    overlay: &std::path::Path,
+) -> Result<(), ExecutorError> {
+    let output = Command::new("qemu-img")
+        .args([
+            "create",
+            "-f",
+            "qcow2",
+            "-F",
+            "qcow2",
+            "-b",
+            base.to_str()
+                .ok_or_else(|| exec("base image path is not valid UTF-8"))?,
+            overlay
+                .to_str()
+                .ok_or_else(|| exec("overlay path is not valid UTF-8"))?,
+        ])
+        .stdout(Stdio::null())
+        .stderr(Stdio::piped())
+        .output()
+        .await
+        .map_err(|e| exec(format!("spawn qemu-img: {e}")))?;
+    if !output.status.success() {
+        return Err(exec(format!(
+            "qemu-img create overlay failed: {}",
+            String::from_utf8_lossy(&output.stderr).trim()
+        )));
+    }
+    Ok(())
+}
+
+async fn build_cloud_init_seed(
+    first_boot: &VmFirstBootConfig,
+    vm_name: &str,
+    pool_dir: &std::path::Path,
+) -> Result<PathBuf, ExecutorError> {
+    let hostname = first_boot
+        .hostname
+        .clone()
+        .unwrap_or_else(|| vm_name.to_string());
+    let admin_user = first_boot
+        .admin_user
+        .clone()
+        .unwrap_or_else(|| DEFAULT_ADMIN_USER.to_string());
+    let authorized_key = first_boot
+        .authorized_keys
+        .first()
+        .cloned()
+        .ok_or_else(|| exec("first_boot.authorized_keys must contain at least one key"))?;
+    build_seed_iso(
+        &CloudInitSeedConfig {
+            hostname: &hostname,
+            authorized_key: &authorized_key,
+            user: &admin_user,
+            extra_runcmd: vec![],
+        },
+        pool_dir,
+    )
+    .await
+    .map_err(|e: KvmError| exec(format!("cloud-init seed build: {e}")))
+}
+
+async fn refresh_pool(name: &str) -> Result<(), ExecutorError> {
+    let status = Command::new("virsh")
+        .args(["--connect", "qemu:///system", "pool-refresh", name])
+        .stdout(Stdio::null())
+        .stderr(Stdio::piped())
+        .output()
+        .await
+        .map_err(|e| exec(format!("spawn virsh pool-refresh: {e}")))?;
+    if !status.status.success() {
+        return Err(exec(format!(
+            "virsh pool-refresh {name} failed: {}",
+            String::from_utf8_lossy(&status.stderr).trim()
+        )));
+    }
+    Ok(())
+}
+
+/// Poll (with 1 s backoff) until a TCP connection to `addr:port`
+/// completes a handshake within `budget`. Each individual connect
+/// attempt gets a 5 s timeout so a dropped/filtered SYN doesn't
+/// burn half the budget on a single attempt.
+async fn wait_for_tcp_port(
+    addr: IpAddr,
+    port: u16,
+    budget: std::time::Duration,
+) -> Result<(), ExecutorError> {
+    let deadline = std::time::Instant::now() + budget;
+    let mut attempts = 0u32;
+    loop {
+        attempts += 1;
+        let connect = tokio::net::TcpStream::connect((addr, port));
+        match tokio::time::timeout(std::time::Duration::from_secs(5), connect).await {
+            Ok(Ok(_)) => {
+                info!("{addr}:{port} reachable after {attempts} attempt(s)");
+                return Ok(());
+            }
+            _ => {}
+        }
+        if std::time::Instant::now() >= deadline {
+            return Err(exec(format!(
+                "TCP port {addr}:{port} did not accept connections within {:?} \
+                 ({attempts} attempt(s)). The VM booted and got a DHCP lease, \
+                 but the service on that port never came up — commonly sshd \
+                 still starting, or cloud-init still in firstboot.",
+                budget
+            )));
+        }
+        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+    }
+}
+
+fn exec(msg: impl Into<String>) -> ExecutorError {
+    ExecutorError::UnexpectedError(msg.into())
+}
--- a/harmony/src/modules/kvm/types.rs
+++ b/harmony/src/modules/kvm/types.rs
@@ -1,6 +1,8 @@
 use harmony_types::net::MacAddress;
 use serde::{Deserialize, Serialize};

+pub use crate::domain::topology::VmArchitecture;
+
 /// Information about a VM's network interface, as reported by `virsh domiflist`.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct VmInterface {
@@ -139,6 +141,10 @@ impl BootDevice {
 pub struct VmConfig {
    /// VM name, must be unique on the host.
    pub name: String,
+    /// Guest CPU architecture. Defaults to
+    /// [`VmArchitecture::X86_64`].
+    #[serde(default)]
+    pub architecture: VmArchitecture,
    /// Number of virtual CPUs.
    pub vcpus: u32,
    /// Memory in mebibytes (MiB).
@@ -151,6 +157,24 @@ pub struct VmConfig {
    pub cdroms: Vec<CdromConfig>,
    /// Boot order. First entry has highest priority.
    pub boot_order: Vec<BootDevice>,
+    /// Optional UEFI firmware pair (code + per-VM NVRAM). Required
+    /// for aarch64 guests; unused for x86_64 (which boots via SeaBIOS
+    /// by default). The KVM topology resolves and populates this
+    /// when the VM's architecture requires it.
+    #[serde(default)]
+    pub firmware: Option<UefiFirmware>,
+}
+
+/// UEFI firmware file pair for `<loader>`+`<nvram>` libvirt elements.
+/// Both paths must be readable by libvirt-qemu.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct UefiFirmware {
+    /// Read-only firmware code (e.g. `AAVMF_CODE.fd`,
+    /// `edk2-aarch64-code.fd`).
+    pub code: std::path::PathBuf,
+    /// Writable NVRAM variables. Typically a per-VM copy of the
+    /// vendor-shipped vars template.
+    pub vars: std::path::PathBuf,
 }

 impl VmConfig {
@@ -163,27 +187,45 @@ impl VmConfig {
 #[derive(Debug)]
 pub struct VmConfigBuilder {
    name: String,
+    architecture: VmArchitecture,
    vcpus: u32,
    memory_mib: u64,
    disks: Vec<DiskConfig>,
    networks: Vec<NetworkRef>,
    cdroms: Vec<CdromConfig>,
    boot_order: Vec<BootDevice>,
+    firmware: Option<UefiFirmware>,
 }

 impl VmConfigBuilder {
    pub fn new(name: impl Into<String>) -> Self {
        Self {
            name: name.into(),
+            architecture: VmArchitecture::default(),
            vcpus: 2,
            memory_mib: 4096,
            disks: vec![],
            networks: vec![],
            cdroms: vec![],
            boot_order: vec![],
+            firmware: None,
        }
    }

+    /// Set the guest CPU architecture (default
+    /// [`VmArchitecture::X86_64`]). For aarch64 guests the caller
+    /// should also supply a [`UefiFirmware`] via [`firmware`].
+    pub fn architecture(mut self, arch: VmArchitecture) -> Self {
+        self.architecture = arch;
+        self
+    }
+
+    /// Attach a UEFI firmware pair (required for arm64 / aarch64).
+    pub fn firmware(mut self, firmware: UefiFirmware) -> Self {
+        self.firmware = Some(firmware);
+        self
+    }
+
    pub fn vcpus(mut self, vcpus: u32) -> Self {
        self.vcpus = vcpus;
        self
@@ -247,12 +289,14 @@ impl VmConfigBuilder {
    pub fn build(self) -> VmConfig {
        VmConfig {
            name: self.name,
+            architecture: self.architecture,
            vcpus: self.vcpus,
            memory_mib: self.memory_mib,
            disks: self.disks,
            networks: self.networks,
            cdroms: self.cdroms,
            boot_order: self.boot_order,
+            firmware: self.firmware,
        }
    }
 }
--- a/harmony/src/modules/kvm/vm_score.rs
+++ b/harmony/src/modules/kvm/vm_score.rs
@@ -1,222 +0,0 @@
-//! [`KvmVmScore`] — thin Score that provisions one libvirt VM from a
-//! cloud image + a cloud-init seed ISO built from the caller's config.
-//!
-//! This is *not* the customer-facing device-setup Score. It's the test
-//! rig that stands in for "a freshly flashed Pi on the network" so the
-//! IoT walking-skeleton smoke test can run end-to-end without physical
-//! hardware. See [`crate::modules::iot::IotDeviceSetupScore`] for the
-//! post-provisioning configuration Score that targets the VM via its
-//! [`crate::modules::linux::LinuxHostTopology`].
-
-use std::net::IpAddr;
-use std::path::PathBuf;
-use std::process::Stdio;
-
-use async_trait::async_trait;
-use harmony_types::id::Id;
-use log::info;
-use serde::{Deserialize, Serialize};
-use tokio::process::Command;
-
-use crate::data::Version;
-use crate::domain::interpret::{
-    Interpret, InterpretError, InterpretName, InterpretStatus, Outcome,
-};
-use crate::domain::inventory::Inventory;
-use crate::score::Score;
-use crate::topology::Topology;
-
-use super::cloudinit::{CloudInitSeedConfig, build_seed_iso};
-use super::topology::KvmHost;
-use super::types::{CdromConfig, DiskConfig, VmConfig};
-
-/// Everything `KvmVmScore` needs to bring a single VM up from a cloud
-/// image.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CloudInitVmConfig {
-    /// libvirt domain name (must be unique on the host).
-    pub vm_name: String,
-    /// Guest hostname (set by cloud-init). Defaults to `vm_name` if None.
-    pub hostname: Option<String>,
-    pub vcpus: u32,
-    pub memory_mib: u64,
-    /// Absolute path to a pre-downloaded cloud image (qcow2). The Score
-    /// uses this as the *backing file* for a per-VM overlay disk — so
-    /// the base image stays pristine and multiple VMs can share it.
-    /// Must be readable by libvirt-qemu (world-readable + traversable
-    /// parent dirs is the lightest setup; a libvirt storage pool is the
-    /// more serious option).
-    pub base_image_path: PathBuf,
-    /// Directory where the generated cloud-init seed ISO is written.
-    pub seed_output_dir: PathBuf,
-    /// Username created by cloud-init with passwordless sudo. The IoT
-    /// agent runs under a *different*, service-scoped account that the
-    /// device-setup Score creates later; this one is the admin identity
-    /// that `IotDeviceSetupScore` SSHes in as to apply configuration.
-    pub admin_user: String,
-    /// openssh-format public key line; authorized for `admin_user`.
-    pub authorized_key: String,
-    /// libvirt network name to attach a NIC to. Typically `"default"`
-    /// (the libvirt-shipped NAT bridge).
-    pub network_name: String,
-}
-
-/// Provision a single VM from a cloud image + a generated cloud-init
-/// seed. Idempotent at the libvirt level: re-running does not recreate
-/// if a domain with this name already exists.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct KvmVmScore {
-    pub config: CloudInitVmConfig,
-}
-
-impl<T: Topology + KvmHost> Score<T> for KvmVmScore {
-    fn name(&self) -> String {
-        format!("KvmVmScore({})", self.config.vm_name)
-    }
-
-    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
-        Box::new(KvmVmInterpret {
-            config: self.config.clone(),
-            version: Version::from("0.1.0").expect("static version"),
-            status: InterpretStatus::QUEUED,
-        })
-    }
-}
-
-#[derive(Debug)]
-struct KvmVmInterpret {
-    config: CloudInitVmConfig,
-    version: Version,
-    status: InterpretStatus,
-}
-
-#[async_trait]
-impl<T: Topology + KvmHost> Interpret<T> for KvmVmInterpret {
-    fn get_name(&self) -> InterpretName {
-        InterpretName::KvmVm
-    }
-    fn get_version(&self) -> Version {
-        self.version.clone()
-    }
-    fn get_status(&self) -> InterpretStatus {
-        self.status.clone()
-    }
-    fn get_children(&self) -> Vec<Id> {
-        vec![]
-    }
-
-    async fn execute(
-        &self,
-        _inventory: &Inventory,
-        topology: &T,
-    ) -> Result<Outcome, InterpretError> {
-        let cfg = &self.config;
-        let hostname = cfg.hostname.clone().unwrap_or_else(|| cfg.vm_name.clone());
-
-        // Per-VM overlay disk, backed by the base image. Gets created
-        // (or re-created) only when the libvirt domain doesn't already
-        // exist, so re-runs of an unchanged Score are true NOOPs. On a
-        // fresh run, the overlay is wiped so cloud-init boots against a
-        // clean rootfs.
-        let executor = topology.kvm_executor();
-        let overlay_path = cfg.seed_output_dir.join(format!("{}.qcow2", cfg.vm_name));
-        let vm_already_exists = executor
-            .vm_exists(&cfg.vm_name)
-            .await
-            .map_err(|e| InterpretError::new(format!("vm_exists: {e}")))?;
-        if !vm_already_exists {
-            if overlay_path.exists() {
-                tokio::fs::remove_file(&overlay_path)
-                    .await
-                    .map_err(|e| InterpretError::new(format!("remove stale overlay: {e}")))?;
-            }
-            tokio::fs::create_dir_all(&cfg.seed_output_dir)
-                .await
-                .map_err(|e| InterpretError::new(format!("create seed dir: {e}")))?;
-            let status = Command::new("qemu-img")
-                .arg("create")
-                .arg("-f")
-                .arg("qcow2")
-                .arg("-F")
-                .arg("qcow2")
-                .arg("-b")
-                .arg(&cfg.base_image_path)
-                .arg(&overlay_path)
-                .stdout(Stdio::null())
-                .stderr(Stdio::piped())
-                .output()
-                .await
-                .map_err(|e| InterpretError::new(format!("spawn qemu-img: {e}")))?;
-            if !status.status.success() {
-                let stderr = String::from_utf8_lossy(&status.stderr);
-                return Err(InterpretError::new(format!(
-                    "qemu-img create overlay failed: {}",
-                    stderr.trim()
-                )));
-            }
-            info!(
-                "created overlay disk {overlay_path:?} backed by {:?}",
-                cfg.base_image_path
-            );
-        }
-
-        // Build cloud-init seed ISO in the caller-chosen output dir.
-        let seed_iso_path = build_seed_iso(
-            &CloudInitSeedConfig {
-                hostname: &hostname,
-                authorized_key: &cfg.authorized_key,
-                user: &cfg.admin_user,
-                extra_runcmd: vec![],
-            },
-            &cfg.seed_output_dir,
-        )
-        .await
-        .map_err(|e| InterpretError::new(format!("cloud-init seed build: {e}")))?;
-        info!("cloud-init seed ready at {seed_iso_path:?}");
-
-        // Compose VM config: base image as the root disk, seed ISO as a
-        // secondary cdrom at hdb so libvirt boots off the disk (cloud-
-        // init on the image finds CIDATA on hdb automatically).
-        let vm_config = VmConfig {
-            name: cfg.vm_name.clone(),
-            vcpus: cfg.vcpus,
-            memory_mib: cfg.memory_mib,
-            disks: vec![DiskConfig {
-                size_gb: 0,
-                device: "vda".to_string(),
-                pool: "default".to_string(),
-                source_path: Some(overlay_path.to_string_lossy().into_owned()),
-            }],
-            networks: vec![super::types::NetworkRef::named(&cfg.network_name)],
-            cdroms: vec![CdromConfig {
-                source: seed_iso_path.to_string_lossy().into_owned(),
-                device: "hdb".to_string(),
-            }],
-            boot_order: vec![super::types::BootDevice::Disk],
-        };
-
-        executor
-            .ensure_vm(vm_config)
-            .await
-            .map_err(|e| InterpretError::new(format!("ensure_vm: {e}")))?;
-        executor
-            .start_vm(&cfg.vm_name)
-            .await
-            .map_err(|e| InterpretError::new(format!("start_vm: {e}")))?;
-
-        // First-boot cloud-init can easily take 2-4 minutes on a Pi
-        // target or a constrained CI worker: datasource detection,
-        // package regeneration, SSH host-key generation, reboots. 300s
-        // is a middle-of-the-road budget that still aborts before a
-        // whole-cluster CI pipeline gets painful.
-        let ip: IpAddr = executor
-            .wait_for_ip(&cfg.vm_name, std::time::Duration::from_secs(300))
-            .await
-            .map_err(|e| InterpretError::new(format!("wait_for_ip: {e}")))?;
-
-        Ok(Outcome::success_with_details(
-            format!("VM {} reachable at {ip}", cfg.vm_name),
-            vec![format!("seed_iso={seed_iso_path:?}"), format!("ip={ip}")],
-        ))
-    }
-}
--- a/harmony/src/modules/kvm/xml.rs
+++ b/harmony/src/modules/kvm/xml.rs
@@ -35,13 +35,114 @@
 //! serialization. The `VmConfig`/`NetworkConfig` builder API stays unchanged —
 //! only the internal XML generation changes.

-use super::types::{CdromConfig, DiskConfig, ForwardMode, NetworkConfig, VmConfig};
+use super::types::{
+    CdromConfig, DiskConfig, ForwardMode, NetworkConfig, UefiFirmware, VmArchitecture, VmConfig,
+};
+
+/// Resolved arch-specific knobs that feed the libvirt domain
+/// template. Keeps the per-arch branching out of the format string
+/// so the XML template itself stays readable.
+struct DomainXmlParams {
+    /// `kvm` for hardware-accelerated runs, `qemu` for TCG
+    /// emulation (aarch64 on x86_64 today).
+    domain_type: &'static str,
+    /// XML namespace attribute appended to `<domain>`. Non-empty
+    /// only when we need the `qemu:commandline` escape hatch.
+    domain_namespace: &'static str,
+    /// libvirt `<os>/<type arch='…'>`. Ubuntu/libvirt use the
+    /// `uname -m` names (`x86_64`, `aarch64`).
+    arch: &'static str,
+    /// libvirt `<os>/<type machine='…'>`. `q35` for x86_64
+    /// (modern PCIe), `virt` for aarch64 (no legacy chipsets).
+    machine: &'static str,
+    /// Emulator binary libvirt should exec.
+    emulator: &'static str,
+    /// `<cpu …>…</cpu>` contents. `host-model` for x86_64 (lets
+    /// libvirt pick a matching KVM-accelerated CPU model), a
+    /// named model for aarch64 TCG (`-cpu max`).
+    cpu_block: &'static str,
+    /// Optional `<qemu:commandline>` block, rendered as the last
+    /// child of `<domain>`. Used for QEMU CPU properties that
+    /// libvirt's schema doesn't know about (e.g. `pauth-impdef`,
+    /// which is a QEMU-defined property of `-cpu max`, not a CPU
+    /// feature in libvirt's feature database).
+    qemu_commandline: &'static str,
+    /// UEFI firmware to point `<loader>` + `<nvram>` at. None
+    /// for x86_64 (SeaBIOS default); required for aarch64.
+    firmware: Option<UefiFirmware>,
+}
+
+impl DomainXmlParams {
+    fn for_vm(vm: &VmConfig) -> Self {
+        match vm.architecture {
+            VmArchitecture::X86_64 => Self {
+                domain_type: "kvm",
+                domain_namespace: "",
+                arch: "x86_64",
+                machine: "q35",
+                emulator: "/usr/bin/qemu-system-x86_64",
+                // host-model: libvirt chooses a model compatible
+                // with the host CPU and exposes it to the guest.
+                // Safe default for bare-metal KVM.
+                cpu_block: "<cpu mode='host-model'/>",
+                qemu_commandline: "",
+                firmware: None,
+            },
+            VmArchitecture::Aarch64 => Self {
+                // TCG emulation on x86_64 hosts. On native aarch64
+                // hardware this would be `kvm` with no cpu_block
+                // override; we revisit when a native-aarch64
+                // runner shows up (single-line fork in for_vm).
+                domain_type: "qemu",
+                domain_namespace: " xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'",
+                arch: "aarch64",
+                machine: "virt",
+                emulator: "/usr/bin/qemu-system-aarch64",
+                // `<cpu mode='custom' ...><model>max</model></cpu>`
+                // tells libvirt to pass `-cpu max` to QEMU, but we
+                // cannot add `pauth-impdef` as a `<feature>` because
+                // libvirt's CPU-feature database doesn't know it —
+                // it's a QEMU property of `-cpu max`, not a CPU
+                // feature in the Arm sense. So we keep the libvirt
+                // `<cpu>` block minimal and override `-cpu` at the
+                // QEMU CLI layer below.
+                cpu_block: "<cpu mode='custom' match='exact'>\n    <model>max</model>\n  </cpu>",
+                // libvirt's escape hatch: append raw QEMU CLI args
+                // after its own. QEMU takes the LAST `-cpu` / `-accel`
+                // as authoritative, so `-cpu max` (from <cpu>) followed
+                // by `-cpu max,pauth-impdef=on` yields max-with-
+                // pauth-impdef, and `-accel tcg` from libvirt
+                // followed by `-accel tcg,thread=multi` forces MTTCG.
+                //
+                // `pauth-impdef=on` switches pointer-auth to an
+                // impl-defined algorithm, cutting the largest TCG
+                // perf hit on arm64 (Linaro, Jan 2025).
+                //
+                // `thread=multi` enables MTTCG (multi-threaded TCG).
+                // Despite QEMU docs claiming MTTCG is default on
+                // aarch64, in practice (QEMU 10.2 observed here)
+                // cross-arch `-accel tcg` runs single-threaded and
+                // only vcpu.0 executes. Forcing it doubles throughput
+                // on 2-vcpu guests and is the difference between a
+                // 20-minute cold boot and a 10-minute one.
+                qemu_commandline: "  <qemu:commandline>\n    \
+                                  <qemu:arg value='-cpu'/>\n    \
+                                  <qemu:arg value='max,pauth-impdef=on'/>\n    \
+                                  <qemu:arg value='-accel'/>\n    \
+                                  <qemu:arg value='tcg,thread=multi'/>\n  \
+                                  </qemu:commandline>\n",
+                firmware: vm.firmware.clone(),
+            },
+        }
+    }
+}

 /// Renders the libvirt domain XML for a VM definition.
 ///
 /// The caller passes the image directory where qcow2 volumes are stored.
 pub fn domain_xml(vm: &VmConfig, image_dir: &str) -> String {
    let memory_kib = vm.memory_mib * 1024;
+    let params = DomainXmlParams::for_vm(vm);

    let os_boot = vm
        .boot_order
@@ -49,6 +150,16 @@ pub fn domain_xml(vm: &VmConfig, image_dir: &str) -> String {
        .map(|b| format!("    <boot dev='{}'/>\n", b.as_xml_dev()))
        .collect::<String>();

+    let os_firmware = match &params.firmware {
+        Some(fw) => format!(
+            "    <loader readonly='yes' type='pflash'>{code}</loader>\n    \
+             <nvram>{vars}</nvram>\n",
+            code = fw.code.display(),
+            vars = fw.vars.display()
+        ),
+        None => String::new(),
+    };
+
    let devices = {
        let disks = disk_devices(vm, image_dir);
        let cdroms = cdrom_devices(vm);
@@ -57,33 +168,48 @@ pub fn domain_xml(vm: &VmConfig, image_dir: &str) -> String {
    };

    format!(
-        r#"<domain type='kvm'>
+        r#"<domain type='{domain_type}'{domain_namespace}>
  <name>{name}</name>
  <memory unit='KiB'>{memory_kib}</memory>
  <vcpu>{vcpus}</vcpu>
  <os>
-    <type arch='x86_64' machine='q35'>hvm</type>
-{os_boot}  </os>
+    <type arch='{arch}' machine='{machine}'>hvm</type>
+{os_firmware}{os_boot}  </os>
  <features>
    <acpi/>
    <apic/>
  </features>
-  <cpu mode='host-model'/>
+  {cpu_block}
  <devices>
-    <emulator>/usr/bin/qemu-system-x86_64</emulator>
+    <emulator>{emulator}</emulator>
 {devices}    <serial type='pty'>
      <target port='0'/>
    </serial>
    <console type='pty'>
      <target type='serial' port='0'/>
    </console>
+    <rng model='virtio'>
+      <!-- aarch64 cloud-init hangs for minutes on first-boot SSH
+           host-key generation unless the guest can see a hardware
+           RNG. KVM hosts rarely feel it; TCG always does. Pipe
+           host /dev/urandom in for both archs — cheap insurance. -->
+      <backend model='random'>/dev/urandom</backend>
+    </rng>
  </devices>
-</domain>"#,
+{qemu_commandline}</domain>"#,
+        domain_type = params.domain_type,
+        domain_namespace = params.domain_namespace,
        name = vm.name,
        memory_kib = memory_kib,
        vcpus = vm.vcpus,
+        arch = params.arch,
+        machine = params.machine,
+        os_firmware = os_firmware,
        os_boot = os_boot,
+        cpu_block = params.cpu_block,
+        emulator = params.emulator,
        devices = devices,
+        qemu_commandline = params.qemu_commandline,
    )
 }

@@ -318,6 +444,99 @@ mod tests {
        assert!(xml.contains("mode='host-model'"));
    }

+    // ── aarch64 ──────────────────────────────────────────────────────
+
+    #[test]
+    fn domain_xml_aarch64_defaults_to_qemu_tcg_with_virt_machine() {
+        use crate::modules::kvm::types::{UefiFirmware, VmArchitecture};
+        use std::path::PathBuf;
+        let vm = VmConfig::builder("arm-test")
+            .architecture(VmArchitecture::Aarch64)
+            .firmware(UefiFirmware {
+                code: PathBuf::from("/usr/share/AAVMF/AAVMF_CODE.fd"),
+                vars: PathBuf::from("/tmp/arm-test-VARS.fd"),
+            })
+            .disk(10)
+            .build();
+        let xml = domain_xml(&vm, "/tmp");
+        assert!(
+            xml.contains("<domain type='qemu'"),
+            "aarch64 uses TCG, not kvm"
+        );
+        assert!(xml.contains("arch='aarch64'"));
+        assert!(xml.contains("machine='virt'"));
+        assert!(xml.contains("/usr/bin/qemu-system-aarch64"));
+    }
+
+    #[test]
+    fn domain_xml_aarch64_sets_cpu_max_with_pauth_impdef_via_qemu_commandline() {
+        use crate::modules::kvm::types::{UefiFirmware, VmArchitecture};
+        use std::path::PathBuf;
+        let vm = VmConfig::builder("arm-cpu")
+            .architecture(VmArchitecture::Aarch64)
+            .firmware(UefiFirmware {
+                code: PathBuf::from("/usr/share/AAVMF/AAVMF_CODE.fd"),
+                vars: PathBuf::from("/tmp/arm-cpu-VARS.fd"),
+            })
+            .build();
+        let xml = domain_xml(&vm, "/tmp");
+        // `<cpu>` stays libvirt-validated (just the model).
+        assert!(xml.contains("<model>max</model>"));
+        assert!(
+            !xml.contains("name='pauth-impdef'"),
+            "pauth-impdef is not a libvirt CPU feature; \
+             must not leak into the <cpu><feature> block"
+        );
+        // Root `<domain>` must declare the qemu namespace — libvirt
+        // rejects `<qemu:*>` elements without it.
+        assert!(
+            xml.contains("xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'"),
+            "qemu namespace must be declared on <domain> when we use qemu:commandline"
+        );
+        // pauth-impdef is passed as a QEMU CLI override — the final
+        // `-cpu max,pauth-impdef=on` arg wins over libvirt's `-cpu max`.
+        assert!(xml.contains("<qemu:arg value='-cpu'/>"));
+        assert!(
+            xml.contains("<qemu:arg value='max,pauth-impdef=on'/>"),
+            "pauth-impdef=on is the single biggest TCG arm64 perf knob; \
+             must reach QEMU via qemu:commandline override"
+        );
+        // MTTCG override — without this, cross-arch TCG runs
+        // single-threaded (vcpu.1.time stays at 0).
+        assert!(xml.contains("<qemu:arg value='-accel'/>"));
+        assert!(
+            xml.contains("<qemu:arg value='tcg,thread=multi'/>"),
+            "thread=multi doubles throughput on multi-vcpu guests; \
+             libvirt passes bare `-accel tcg` without it"
+        );
+    }
+
+    #[test]
+    fn domain_xml_aarch64_emits_pflash_loader_and_nvram() {
+        use crate::modules::kvm::types::{UefiFirmware, VmArchitecture};
+        use std::path::PathBuf;
+        let vm = VmConfig::builder("arm-efi")
+            .architecture(VmArchitecture::Aarch64)
+            .firmware(UefiFirmware {
+                code: PathBuf::from("/usr/share/AAVMF/AAVMF_CODE.fd"),
+                vars: PathBuf::from("/var/lib/libvirt/nvram/arm-efi_VARS.fd"),
+            })
+            .build();
+        let xml = domain_xml(&vm, "/tmp");
+        assert!(xml.contains(
+            "<loader readonly='yes' type='pflash'>/usr/share/AAVMF/AAVMF_CODE.fd</loader>"
+        ));
+        assert!(xml.contains("<nvram>/var/lib/libvirt/nvram/arm-efi_VARS.fd</nvram>"));
+    }
+
+    #[test]
+    fn domain_xml_x86_64_has_no_efi_loader() {
+        let vm = VmConfig::builder("x86-bios").build();
+        let xml = domain_xml(&vm, "/tmp");
+        assert!(!xml.contains("<loader"));
+        assert!(!xml.contains("<nvram"));
+    }
+
    #[test]
    fn domain_xml_serial_console() {
        let vm = VmConfig::builder("console-test").build();
--- a/harmony/src/modules/linux/ansible_configurator.rs
+++ b/harmony/src/modules/linux/ansible_configurator.rs
@@ -1,24 +1,27 @@
-//! Ansible-backed impl of [`HostConfigurationProvider`] — ad-hoc mode
-//! only, **no YAML generation, no inventory file**.
+//! Ansible-backed impl of the Linux-host configuration capabilities
+//! ([`HostReachable`], [`PackageInstaller`], [`FileDelivery`],
+//! [`UnixUserManager`], [`SystemdManager`]) — ad-hoc mode only, no
+//! YAML generation, no inventory file.
 //!
 //! Every primitive maps to one or two invocations of `ansible all -i
-//! '<ip>,' -m <module> -a '<json>'`, with `--stdout-callback=oneline` so
-//! we get one `host | VERB => {json}` line per host. Harmony owns 100%
-//! of the orchestration/ordering; Ansible owns *only* the per-host
-//! idempotent module execution. This is the same reason we picked
-//! `podman-api` over shelling to `podman` elsewhere — use the mature
-//! upstream where it's mature (apt/systemd/user module idempotency),
-//! don't adopt its orchestration model (playbooks, inventory, YAML
-//! templating, the Kubespray mess).
+//! '<ip>,' -m <module> -a '<json>'`, with `--stdout-callback=oneline`
+//! so we get one `host | VERB => {json}` line per host. Harmony owns
+//! 100% of the orchestration/ordering; Ansible owns only per-host
+//! idempotent module execution. Matches the reasoning behind picking
+//! `podman-api` over shelling to `podman` elsewhere: use mature
+//! upstream where upstream is mature (apt/systemd/user/file module
+//! idempotency), don't adopt its orchestration model.
 //!
 //! The Ansible runtime itself lives in a managed venv under
-//! [`HARMONY_DATA_DIR`]; see [`super::ansible_venv::ensure_ansible_venv`].
-//! The operator does *not* need to install `ansible` system-wide.
+//! [`HARMONY_DATA_DIR`]; see
+//! [`super::ansible_venv::ensure_ansible_venv`]. The operator does
+//! *not* need to install `ansible` system-wide.

-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::process::Stdio;

 use harmony_types::net::IpAddress;
+use serde::Serialize;
 use serde_json::{Value, json};
 use tokio::process::Command;

@@ -28,6 +31,7 @@ use crate::domain::topology::{
 use crate::executors::ExecutorError;

 use super::ansible_venv::ensure_ansible_venv;
+use super::ssh_executor::ssh_exec;
 use super::topology::SshCredentials;

 pub struct AnsibleHostConfigurator;
@@ -49,28 +53,34 @@ impl AnsibleHostConfigurator {
        creds: &SshCredentials,
        name: &str,
    ) -> Result<ChangeReport, ExecutorError> {
-        // Target OS for v0 is Debian-family (Raspbian, Ubuntu — see
-        // ROADMAP/iot_platform/v0_walking_skeleton.md §5.3). Using
-        // `ansible.builtin.apt` directly (vs the generic `package`
-        // module) lets us set `update_cache=true` so `apt install` on
-        // a fresh cloud image doesn't fail with "no package matching"
-        // because the cache was never populated. `cache_valid_time`
-        // keeps re-runs cheap: the update is skipped if the cache was
-        // refreshed within the last hour.
+        // Distro dispatch lives inside this function — that's the
+        // encapsulation we want. Callers say "install podman"; we
+        // pick apt/dnf/pacman/apk. Debian-family is the only dispatch
+        // currently wired because it's our first concrete target (IoT
+        // runs on Raspbian/Ubuntu per ROADMAP/iot_platform/
+        // v0_walking_skeleton.md §5.3). Extending to RHEL/Fedora/
+        // Alpine is a matter of detecting the family here and picking
+        // `ansible.builtin.dnf` / `community.general.pacman` /
+        // `community.general.apk` with equivalent cache-warming
+        // flags — no trait or capability change. When we have more
+        // than one distro it may also be worth a companion Score
+        // (`EnsureQemuKvmInstalled`, etc.) that canonicalizes the
+        // cross-family package names for common infrastructure.
        //
-        // When we grow RHEL-family support, switch on the distro
-        // (cached in topology) and dispatch to `ansible.builtin.dnf`
-        // with its own cache warming. `package` alone isn't enough.
+        // `update_cache: true` + `cache_valid_time: 3600` makes sure a
+        // fresh cloud image's empty apt cache gets populated before
+        // the install, while re-runs within the hour stay cheap.
+        let apt_args = AptArgs {
+            name,
+            state: "present",
+            update_cache: true,
+            cache_valid_time: 3600,
+        };
        self.run_module(
            host,
            creds,
            "ansible.builtin.apt",
-            json!({
-                "name": name,
-                "state": "present",
-                "update_cache": true,
-                "cache_valid_time": 3600,
-            }),
+            to_value(&apt_args)?,
            true,
            None,
        )
@@ -83,23 +93,15 @@ impl AnsibleHostConfigurator {
        creds: &SshCredentials,
        spec: &UserSpec,
    ) -> Result<ChangeReport, ExecutorError> {
-        let mut args = json!({
-            "name": spec.name,
-            "state": "present",
-            "system": spec.system,
-            "create_home": spec.create_home,
-        });
-        if let Some(group) = &spec.group {
-            args["group"] = json!(group);
-        }
-        if !spec.supplementary_groups.is_empty() {
-            args["groups"] = json!(spec.supplementary_groups);
-            args["append"] = json!(true);
-        }
-        if let Some(shell) = &spec.shell {
-            args["shell"] = json!(shell);
-        }
-        self.run_module(host, creds, "ansible.builtin.user", args, true, None)
+        let args = AnsibleUserArgs::from(spec);
+        self.run_module(
+            host,
+            creds,
+            "ansible.builtin.user",
+            to_value(&args)?,
+            true,
+            None,
+        )
        .await
    }

@@ -109,23 +111,24 @@ impl AnsibleHostConfigurator {
        creds: &SshCredentials,
        spec: &FileSpec,
    ) -> Result<ChangeReport, ExecutorError> {
-        // Ansible's copy module doesn't auto-create parent dirs, so
+        // Ansible's `copy` module doesn't auto-create parent dirs, so
        // writes into fresh paths like `/etc/iot-agent/config.toml`
        // fail with "Destination directory … does not exist". Create
        // the parent first via the `file` module; state=directory is
        // idempotent so this is a cheap noop on re-run.
-        if let Some(parent) = std::path::Path::new(&spec.path).parent() {
+        if let Some(parent) = Path::new(&spec.path).parent() {
            let parent_str = parent.to_string_lossy().to_string();
            if !parent_str.is_empty() && parent_str != "/" {
+                let dir_args = AnsibleFileArgs {
+                    path: &parent_str,
+                    state: "directory",
+                    mode: Some("0755"),
+                };
                self.run_module(
                    host,
                    creds,
                    "ansible.builtin.file",
-                    json!({
-                        "path": parent_str,
-                        "state": "directory",
-                        "mode": "0755",
-                    }),
+                    to_value(&dir_args)?,
                    true,
                    None,
                )
@@ -133,33 +136,15 @@ impl AnsibleHostConfigurator {
            }
        }

-        let mut args = json!({
-            "dest": spec.path,
-        });
-        match &spec.source {
-            FileSource::Content(s) => {
-                args["content"] = json!(s);
-            }
-            FileSource::LocalPath(p) => {
-                // Ansible's copy module reads this path on the
-                // controller and ships the bytes over its usual SSH
-                // transport (not via argv), which is what lets us
-                // deliver binary files larger than ARG_MAX.
-                args["src"] = json!(p);
-            }
-        }
-        if let Some(owner) = &spec.owner {
-            args["owner"] = json!(owner);
-        }
-        if let Some(group) = &spec.group {
-            args["group"] = json!(group);
-        }
-        if let Some(mode) = spec.mode {
-            // Ansible accepts octal as a string. The leading `0` isn't
-            // strictly required but makes it unambiguous to a human.
-            args["mode"] = json!(format!("0{mode:o}"));
-        }
-        self.run_module(host, creds, "ansible.builtin.copy", args, true, None)
+        let args = AnsibleCopyArgs::from(spec);
+        self.run_module(
+            host,
+            creds,
+            "ansible.builtin.copy",
+            to_value(&args)?,
+            true,
+            None,
+        )
        .await
    }

@@ -169,7 +154,9 @@ impl AnsibleHostConfigurator {
        creds: &SshCredentials,
        spec: &SystemdUnitSpec,
    ) -> Result<ChangeReport, ExecutorError> {
-        // Step 1: write the unit file.
+        // Two ad-hoc invocations: drop the unit file, then enable +
+        // (optionally) start via `ansible.builtin.systemd`, which
+        // handles daemon-reload as part of the same module call.
        let (unit_path, scope_user) = match &spec.scope {
            SystemdScope::System => (format!("/etc/systemd/system/{}.service", spec.name), None),
            SystemdScope::User(u) => (
@@ -184,39 +171,32 @@ impl AnsibleHostConfigurator {
        let elevate = scope_user.is_none();
        let become_user = scope_user.as_deref();

-        let file_changed = self
-            .run_module(
-                host,
-                creds,
-                "ansible.builtin.copy",
-                json!({
-                    "dest": unit_path,
-                    "content": spec.unit_content,
-                    "mode": "0644",
-                }),
-                true,
-                None,
-            )
-            .await?;
+        let file_spec = FileSpec {
+            path: unit_path.clone(),
+            source: FileSource::Content(spec.unit_content.clone()),
+            owner: None,
+            group: None,
+            mode: Some(0o644),
+        };
+        let file_changed = self.ensure_file(host, creds, &file_spec).await?;

-        // Step 2: daemon-reload + enable + start.
-        let mut systemd_args = json!({
-            "name": spec.name,
-            "enabled": true,
-            "daemon_reload": true,
-        });
-        if spec.start_immediately {
-            systemd_args["state"] = json!("started");
-        }
-        if scope_user.is_some() {
-            systemd_args["scope"] = json!("user");
-        }
+        let systemd_args = AnsibleSystemdArgs {
+            name: &spec.name,
+            enabled: Some(true),
+            state: if spec.start_immediately {
+                Some("started")
+            } else {
+                None
+            },
+            daemon_reload: true,
+            scope: scope_user.as_deref().map(|_| "user"),
+        };
        let systemd_changed = self
            .run_module(
                host,
                creds,
                "ansible.builtin.systemd",
-                systemd_args,
+                to_value(&systemd_args)?,
                elevate,
                become_user,
            )
@@ -234,22 +214,22 @@ impl AnsibleHostConfigurator {
        name: &str,
        scope: SystemdScope,
    ) -> Result<ChangeReport, ExecutorError> {
-        let mut args = json!({
-            "name": name,
-            "state": "restarted",
-        });
-        let (elevate, become_user) = match &scope {
-            SystemdScope::System => (true, None),
-            SystemdScope::User(u) => {
-                args["scope"] = json!("user");
-                (true, Some(u.as_str().to_string()))
-            }
+        let (elevate, become_user, scope_str) = match &scope {
+            SystemdScope::System => (true, None, None),
+            SystemdScope::User(u) => (true, Some(u.as_str().to_string()), Some("user")),
+        };
+        let args = AnsibleSystemdArgs {
+            name,
+            enabled: None,
+            state: Some("restarted"),
+            daemon_reload: false,
+            scope: scope_str,
        };
        self.run_module(
            host,
            creds,
            "ansible.builtin.systemd",
-            args,
+            to_value(&args)?,
            elevate,
            become_user.as_deref(),
        )
@@ -262,37 +242,30 @@ impl AnsibleHostConfigurator {
        creds: &SshCredentials,
        user: &str,
    ) -> Result<ChangeReport, ExecutorError> {
-        // Ad-hoc mode has no `changed_when`, so we sentinel through
-        // stdout: the script echoes either "noop" or "changed" and we
-        // parse that out of the module's reported `stdout` field.
-        // `loginctl enable-linger` is itself idempotent; the wrapping
-        // `if` is purely to distinguish the two cases for reconcile-
-        // restart decisions upstream.
-        let script = format!(
-            "if loginctl show-user {user} 2>/dev/null | grep -q '^Linger=yes'; then \
-               echo noop; \
-             else \
-               loginctl enable-linger {user}; \
-               echo changed; \
-             fi"
-        );
-        let output = self
-            .run_module_full(
+        // systemd's user-session linger is the existence of
+        // `/var/lib/systemd/linger/<user>` (systemd-logind(8)). Two
+        // direct-over-SSH probes — no Ansible, because this is a
+        // tiny shell check and a single `loginctl` call with no
+        // per-module idempotency magic to lean on.
+        //
+        // Why not just `touch` the marker file? Touching it creates
+        // the file but doesn't fire the dbus signal that systemd-
+        // logind needs to actually start the user manager; every
+        // subsequent `systemctl --user …` then fails with "Failed
+        // to connect to bus". `loginctl enable-linger` does both.
+        let check = ssh_exec(
            host,
            creds,
-                "ansible.builtin.shell",
-                json!({ "cmd": script }),
-                true,
-                None,
+            &format!("test -e /var/lib/systemd/linger/{user}"),
        )
        .await?;
-        let changed = output
-            .payload
-            .get("stdout")
-            .and_then(Value::as_str)
-            .map(|s| s.trim() == "changed")
-            .unwrap_or(false);
-        Ok(ChangeReport { changed })
+        if check.rc == 0 {
+            return Ok(ChangeReport::NOOP);
+        }
+        ssh_exec(host, creds, &format!("sudo loginctl enable-linger {user}"))
+            .await?
+            .into_successful()?;
+        Ok(ChangeReport::CHANGED)
    }

    pub async fn ensure_user_unit_active(
@@ -302,20 +275,28 @@ impl AnsibleHostConfigurator {
        user: &str,
        unit: &str,
    ) -> Result<ChangeReport, ExecutorError> {
-        self.run_module(
-            host,
-            creds,
-            "ansible.builtin.systemd",
-            json!({
-                "name": unit,
-                "enabled": true,
-                "state": "started",
-                "scope": "user",
-            }),
-            true,
-            Some(user),
-        )
-        .await
+        // `ansible.builtin.systemd scope: user` needs
+        // `XDG_RUNTIME_DIR` in the systemctl process env — a task-
+        // level `environment:` keyword only available in playbooks.
+        // Rather than pipe a one-task playbook, use russh directly:
+        // two small SSH calls, no Python wrapper, no inline YAML.
+        //
+        // Report `changed=true` unconditionally. systemctl
+        // enable --now is idempotent at the systemd level so re-
+        // running does no harm; reconcile-restart decisions
+        // upstream see only the outer-Score changes they care
+        // about (TOML and unit file changes), not this start-
+        // verification step.
+        let id_out = ssh_exec(host, creds, &format!("id -u {user}"))
+            .await?
+            .into_successful()?;
+        let uid = id_out.stdout.trim();
+        let cmd = format!(
+            "sudo -u {user} env XDG_RUNTIME_DIR=/run/user/{uid} \
+             systemctl --user enable --now {unit}"
+        );
+        ssh_exec(host, creds, &cmd).await?.into_successful()?;
+        Ok(ChangeReport::CHANGED)
    }

    // -----------------------------------------------------------------
@@ -339,9 +320,6 @@ impl AnsibleHostConfigurator {
        })
    }

-    /// Like [`run_module`] but returns the full module payload so
-    /// callers that need to inspect module-specific fields (e.g. the
-    /// `stdout` of a `shell` invocation) can.
    async fn run_module_full(
        &self,
        host: IpAddress,
@@ -353,9 +331,9 @@ impl AnsibleHostConfigurator {
    ) -> Result<ModuleOutput, ExecutorError> {
        let bins = ensure_ansible_venv().await?;
        // Passing `-a '{}'` trips ansible-core 2.17's "extra params"
-        // check on parameterless modules (e.g. `ping`) — it seems to
-        // read the empty brace as positional rather than an empty dict.
-        // Skip `-a` entirely when there are no args to pass.
+        // check on parameterless modules (ping et al.) — it reads the
+        // empty brace as positional rather than an empty dict. Skip
+        // `-a` entirely when there are no args to pass.
        let args_json_opt = if args.as_object().is_some_and(|m| m.is_empty()) {
            None
        } else {
@@ -366,64 +344,58 @@ impl AnsibleHostConfigurator {
        };

        let inventory = format!("{host},");
-        let ssh_common =
-            "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10";
+        // `--ssh-common-args=<value>` (equals form) is required: the
+        // value starts with `-o`, which otherwise gets re-parsed by
+        // ansible's argparse as its own `-o` flag.
+        let ssh_common_arg = "--ssh-common-args=-o StrictHostKeyChecking=no \
+             -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10"
+            .to_string();
+        let private_key = creds
+            .private_key_path
+            .to_str()
+            .ok_or_else(|| exec("ssh private key path is not valid UTF-8"))?;

-        // `--ssh-common-args=<value>` (equals form, single arg) is
-        // required: the value starts with `-o` which otherwise gets
-        // re-parsed by ansible's argparse as its own `-o` (oneline
-        // output) flag and the whole command fails with a help dump.
-        let ssh_common_arg = format!("--ssh-common-args={ssh_common}");
-
-        let mut cmd = Command::new(&bins.ansible);
-        cmd.arg("all")
-            .arg("-i")
-            .arg(&inventory)
-            .arg("-m")
-            .arg(module)
-            .arg("-u")
-            .arg(&creds.user)
-            .arg("--private-key")
-            .arg(&creds.private_key_path)
-            .arg(&ssh_common_arg);
-        if let Some(args_json) = args_json_opt.as_ref() {
-            cmd.arg("-a").arg(args_json);
+        let mut argv: Vec<String> = vec![
+            "all".into(),
+            "-i".into(),
+            inventory,
+            "-m".into(),
+            module.into(),
+            "-u".into(),
+            creds.user.clone(),
+            "--private-key".into(),
+            private_key.into(),
+            ssh_common_arg,
+        ];
+        if let Some(args_json) = args_json_opt {
+            argv.push("-a".into());
+            argv.push(args_json);
        }
-        cmd
+        if elevate {
+            argv.push("--become".into());
+        }
+        if let Some(u) = become_user {
+            argv.push("--become-user".into());
+            argv.push(u.into());
+        }
+        if let Some(py) = &creds.remote_python {
+            argv.push("-e".into());
+            argv.push(format!("ansible_python_interpreter={py}"));
+        }
+
+        let output = Command::new(&bins.ansible)
+            .args(&argv)
            // Ad-hoc mode ignores ANSIBLE_STDOUT_CALLBACK unless
-            // ANSIBLE_LOAD_CALLBACK_PLUGINS is also set — a quirk
-            // carried over from ansible's early days when ad-hoc was a
-            // play-free code path. Without both, we get the default
-            // "one key-value block spread over many lines" format that
-            // doesn't parse.
+            // ANSIBLE_LOAD_CALLBACK_PLUGINS is also set.
            .env("ANSIBLE_LOAD_CALLBACK_PLUGINS", "True")
            .env("ANSIBLE_STDOUT_CALLBACK", "oneline")
            .env("ANSIBLE_HOST_KEY_CHECKING", "False")
            .env("ANSIBLE_DEPRECATION_WARNINGS", "False")
-            // Pipelining ships the module payload over SSH stdin rather
-            // than writing it to a temp file under the remote user's
-            // home first — which matters because when we `become` an
-            // unprivileged user (e.g. iot-agent for user-scope systemd
-            // operations), ansible's default temp-file shuffle trips
-            // over an ACL fallback that doesn't work on most Linux
-            // distros. Pipelining avoids the problem entirely.
+            // Pipelining avoids the become-to-unprivileged-user temp
+            // file dance (ansible falls back to an ACL chmod syntax
+            // no Linux distro accepts).
            .env("ANSIBLE_PIPELINING", "True")
-            // Keep control sockets inside our data dir so multiple
-            // Harmony processes don't collide in /tmp.
-            .env("ANSIBLE_SSH_CONTROL_PATH_DIR", control_path_dir());
-
-        if elevate {
-            cmd.arg("--become");
-        }
-        if let Some(u) = become_user {
-            cmd.arg("--become-user").arg(u);
-        }
-        if let Some(py) = &creds.remote_python {
-            cmd.arg("-e")
-                .arg(format!("ansible_python_interpreter={py}"));
-        }
-
-        let output = cmd
+            .env("ANSIBLE_SSH_CONTROL_PATH_DIR", control_path_dir())
            .stdout(Stdio::piped())
            .stderr(Stdio::piped())
            .output()
@@ -451,32 +423,128 @@ impl Default for AnsibleHostConfigurator {
    }
 }

-/// Parsed shape of one oneline-callback line.
-///
-/// The `oneline` callback prints exactly one line per targeted host,
-/// shaped `"<host> | <VERB> => {JSON}"`. We split on `" | "` then
-/// `" => "` and treat `FAILED!` / `UNREACHABLE!` as errors. The JSON
-/// payload always includes `changed: bool` for modules that report
-/// change; a few (e.g. `shell`) always report changed, which is why
-/// we also keep the raw payload for callers who want their own
-/// idempotency signal.
+// ---------------------------------------------------------------------
+// Typed module argument structs (serialized to the JSON dict ansible
+// expects). Prefer these over ad-hoc `json!` macros — easier to spot
+// typos at compile time and easier to grow as modules gain fields.
+// ---------------------------------------------------------------------
+
+#[derive(Debug, Serialize)]
+struct AptArgs<'a> {
+    name: &'a str,
+    state: &'a str,
+    update_cache: bool,
+    cache_valid_time: u32,
+}
+
+#[derive(Debug, Serialize)]
+struct AnsibleFileArgs<'a> {
+    path: &'a str,
+    state: &'a str,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    mode: Option<&'a str>,
+}
+
+#[derive(Debug, Serialize)]
+struct AnsibleUserArgs<'a> {
+    name: &'a str,
+    state: &'a str,
+    system: bool,
+    create_home: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    group: Option<&'a str>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    groups: Option<&'a [String]>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    append: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    shell: Option<&'a str>,
+}
+
+impl<'a> From<&'a UserSpec> for AnsibleUserArgs<'a> {
+    fn from(spec: &'a UserSpec) -> Self {
+        let has_extra = !spec.supplementary_groups.is_empty();
+        Self {
+            name: &spec.name,
+            state: "present",
+            system: spec.system,
+            create_home: spec.create_home,
+            group: spec.group.as_deref(),
+            groups: if has_extra {
+                Some(&spec.supplementary_groups)
+            } else {
+                None
+            },
+            append: if has_extra { Some(true) } else { None },
+            shell: spec.shell.as_deref(),
+        }
+    }
+}
+
+#[derive(Debug, Serialize)]
+struct AnsibleCopyArgs<'a> {
+    dest: &'a str,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    content: Option<&'a str>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    src: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    owner: Option<&'a str>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    group: Option<&'a str>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    mode: Option<String>,
+}
+
+impl<'a> From<&'a FileSpec> for AnsibleCopyArgs<'a> {
+    fn from(spec: &'a FileSpec) -> Self {
+        let (content, src): (Option<&str>, Option<String>) = match &spec.source {
+            FileSource::Content(s) => (Some(s.as_str()), None),
+            FileSource::LocalPath(p) => (None, Some(p.to_string_lossy().into_owned())),
+        };
+        Self {
+            dest: &spec.path,
+            content,
+            src,
+            owner: spec.owner.as_deref(),
+            group: spec.group.as_deref(),
+            mode: spec.mode.map(|m| format!("0{m:o}")),
+        }
+    }
+}
+
+#[derive(Debug, Serialize)]
+struct AnsibleSystemdArgs<'a> {
+    name: &'a str,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    enabled: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    state: Option<&'a str>,
+    #[serde(skip_serializing_if = "std::ops::Not::not")]
+    daemon_reload: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    scope: Option<&'a str>,
+}
+
+fn to_value<T: Serialize>(value: &T) -> Result<Value, ExecutorError> {
+    serde_json::to_value(value).map_err(|e| exec(format!("serialize ansible args: {e}")))
+}
+
+// ---------------------------------------------------------------------
+// oneline callback parsing
+// ---------------------------------------------------------------------
+
 struct ModuleOutput {
    changed: bool,
+    #[allow(dead_code)]
    payload: Value,
 }

 fn parse_oneline(stdout: &str) -> Result<ModuleOutput, String> {
-    // The oneline callback emits one of three shapes depending on the
-    // module. We disambiguate by the `" => "` / `": "` separators.
-    //
+    // Shapes the oneline callback emits:
    //   Success with JSON:     "<host> | VERB => {json}"
    //   Unreachable (no JSON): "<host> | VERB!: <message>"
-    //   Shell/command:         "<host> | VERB | rc=N | (stdout) ... | (stderr) ..."
-    //
-    // The shell/command shape is oneline's per-module override for
-    // anything whose return value includes `stdout`/`stderr`; we
-    // reconstruct a synthetic JSON payload so downstream callers see a
-    // consistent shape.
+    //   Shell/command:         "<host> | VERB | rc=N | (stdout) … | (stderr) …"
    let line = stdout
        .lines()
        .find(|l| l.contains(" | "))
@@ -485,7 +553,6 @@ fn parse_oneline(stdout: &str) -> Result<ModuleOutput, String> {
        .split_once(" | ")
        .expect("contains(' | ') just matched");

-    // Unreachable: "VERB!: msg" — no JSON, no pipe-delimited rc field.
    if let Some((verb_with_bang, msg)) = rest.split_once(": ")
        && verb_with_bang.ends_with('!')
        && !verb_with_bang.contains(" | ")
@@ -493,7 +560,6 @@ fn parse_oneline(stdout: &str) -> Result<ModuleOutput, String> {
        return Err(format!("{verb_with_bang} {msg}"));
    }

-    // Shell/command shape: presence of ` | rc=` after the verb.
    if let Some((verb, tail)) = rest.split_once(" | rc=") {
        let (rc_str, payload_tail) = tail.split_once(" | ").unwrap_or((tail, ""));
        let rc: i64 = rc_str.trim().parse().unwrap_or(-1);
@@ -513,7 +579,6 @@ fn parse_oneline(stdout: &str) -> Result<ModuleOutput, String> {
        });
    }

-    // Default shape: "VERB => {json}".
    let (verb, json_blob) = rest
        .split_once(" => ")
        .ok_or_else(|| format!("no ' => ' separator in line: {line}"))?;
@@ -540,8 +605,6 @@ fn parse_oneline(stdout: &str) -> Result<ModuleOutput, String> {
    }
 }

-/// Extract `(stdout) X (stderr) Y` segments from a shell-format line
-/// remainder. Either or both may be absent.
 fn extract_std(tail: &str) -> (String, String) {
    let mut out = String::new();
    let mut err = String::new();
@@ -568,5 +631,8 @@ fn exec(msg: impl Into<String>) -> ExecutorError {
    ExecutorError::UnexpectedError(msg.into())
 }

+/// Unused placeholder kept to remind us that `PathBuf` is in the
+/// module's vocabulary — a future `AnsibleFetchArgs` or
+/// `AnsibleArchiveArgs` will use it.
 #[allow(dead_code)]
-fn _ensure_path_exists(_: &Path) {}
+fn _pathbuf_placeholder(_: PathBuf) {}
--- a/harmony/src/modules/linux/ansible_venv.rs
+++ b/harmony/src/modules/linux/ansible_venv.rs
@@ -63,7 +63,11 @@ async fn provision_venv() -> Result<AnsibleBinaries, ExecutorError> {
        .map_err(|e| exec(format!("create venv dir {venv_dir:?}: {e}")))?;

    info!("creating ansible venv at {venv_dir:?}");
-    run(Command::new(&python).arg("-m").arg("venv").arg(&venv_dir))
+    run(Command::new(&python).args([
+        std::ffi::OsStr::new("-m"),
+        std::ffi::OsStr::new("venv"),
+        venv_dir.as_os_str(),
+    ]))
    .await
    .map_err(|e| {
        exec(format!(
@@ -74,11 +78,12 @@ async fn provision_venv() -> Result<AnsibleBinaries, ExecutorError> {

    let pip = venv_dir.join("bin").join("pip");
    info!("installing {ANSIBLE_CORE_SPEC} into ansible venv (first-run only)");
-    run(Command::new(&pip)
-        .arg("install")
-        .arg("--quiet")
-        .arg("--disable-pip-version-check")
-        .arg(ANSIBLE_CORE_SPEC))
+    run(Command::new(&pip).args([
+        "install",
+        "--quiet",
+        "--disable-pip-version-check",
+        ANSIBLE_CORE_SPEC,
+    ]))
    .await
    .map_err(|e| exec(format!("pip install {ANSIBLE_CORE_SPEC} failed: {e}")))?;

@@ -98,8 +103,7 @@ async fn find_python3() -> Result<PathBuf, ExecutorError> {
    // rather than a Rust crate to keep our dependency surface thin.
    for candidate in ["python3", "python"] {
        let status = Command::new("sh")
-            .arg("-c")
-            .arg(format!("command -v {candidate}"))
+            .args(["-c", &format!("command -v {candidate}")])
            .stdout(Stdio::piped())
            .stderr(Stdio::null())
            .output()
--- a/harmony/src/modules/linux/mod.rs
+++ b/harmony/src/modules/linux/mod.rs
@@ -1,7 +1,9 @@
 mod ansible_configurator;
 mod ansible_venv;
+mod ssh_executor;
 mod topology;

 pub use ansible_configurator::AnsibleHostConfigurator;
 pub use ansible_venv::{AnsibleBinaries, ensure_ansible_venv};
+pub use ssh_executor::{SshCommandOutput, ssh_exec};
 pub use topology::{LinuxHostTopology, SshCredentials};
--- a/harmony/src/modules/linux/ssh_executor.rs
+++ b/harmony/src/modules/linux/ssh_executor.rs
@@ -0,0 +1,135 @@
+//! Direct SSH command execution via russh.
+//!
+//! Used for one-shot shell-outs that don't benefit from Ansible's
+//! idempotency story: running `loginctl enable-linger`, invoking
+//! `systemctl --user` with a specific `XDG_RUNTIME_DIR` in the
+//! process env, etc. Ansible's `command` module would be a
+//! Python-wrapped SSH round trip for zero added value — whereas
+//! russh is already a workspace dependency and gives us the exit
+//! code, stdout, and stderr in a typed struct.
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use harmony_types::net::IpAddress;
+use russh::ChannelMsg;
+use russh::client::{self, Handler};
+use russh::keys::{key, load_secret_key};
+
+use crate::executors::ExecutorError;
+
+use super::topology::SshCredentials;
+
+/// Typed result of a single remote command execution.
+#[derive(Debug, Clone)]
+pub struct SshCommandOutput {
+    pub rc: i32,
+    pub stdout: String,
+    pub stderr: String,
+}
+
+impl SshCommandOutput {
+    /// Returns `Ok(self)` when rc == 0, else an `ExecutorError`
+    /// carrying the non-zero exit and stderr. Convenience for
+    /// callers that want to treat rc!=0 as a hard failure.
+    pub fn into_successful(self) -> Result<Self, ExecutorError> {
+        if self.rc == 0 {
+            Ok(self)
+        } else {
+            Err(ExecutorError::UnexpectedError(format!(
+                "ssh command exited with rc={}: {}",
+                self.rc,
+                self.stderr.trim()
+            )))
+        }
+    }
+}
+
+/// Run `command_line` on the remote host over SSH using the
+/// caller's credentials. `command_line` is passed verbatim to the
+/// remote default shell (sshd exec channel) — callers that need
+/// strict argv semantics should shell-quote their arguments
+/// themselves.
+pub async fn ssh_exec(
+    host: IpAddress,
+    creds: &SshCredentials,
+    command_line: &str,
+) -> Result<SshCommandOutput, ExecutorError> {
+    let key_pair = load_secret_key(&creds.private_key_path, None).map_err(|e| {
+        ExecutorError::AuthenticationError(format!(
+            "load ssh key {:?}: {e}",
+            creds.private_key_path
+        ))
+    })?;
+
+    let config = Arc::new(client::Config {
+        inactivity_timeout: Some(std::time::Duration::from_secs(60)),
+        ..client::Config::default()
+    });
+    let mut handle = client::connect(config, (host, 22), TrustAllHandler)
+        .await
+        .map_err(|e| ExecutorError::NetworkError(format!("ssh connect {host}: {e}")))?;
+
+    let auth_ok = handle
+        .authenticate_publickey(&creds.user, Arc::new(key_pair))
+        .await
+        .map_err(|e| ExecutorError::AuthenticationError(format!("ssh auth: {e}")))?;
+    if !auth_ok {
+        return Err(ExecutorError::AuthenticationError(format!(
+            "ssh pubkey auth rejected for {}@{host}",
+            creds.user
+        )));
+    }
+
+    let mut channel = handle
+        .channel_open_session()
+        .await
+        .map_err(|e| ExecutorError::NetworkError(format!("ssh channel: {e}")))?;
+    channel
+        .exec(true, command_line)
+        .await
+        .map_err(|e| ExecutorError::NetworkError(format!("ssh exec: {e}")))?;
+
+    let mut stdout = Vec::new();
+    let mut stderr = Vec::new();
+    let mut rc: Option<i32> = None;
+    // Drain every message the channel produces. Some sshd
+    // implementations emit `ExitStatus` *after* `Eof`, so
+    // breaking on `Eof` loses the rc. `wait()` returns `None`
+    // when the channel is actually done.
+    while let Some(msg) = channel.wait().await {
+        match msg {
+            ChannelMsg::Data { data } => stdout.extend_from_slice(&data),
+            // ssh channel extension codes: 1 = stderr (RFC 4254 §5.2).
+            // Other ext values (none currently defined) are ignored.
+            ChannelMsg::ExtendedData { data, ext: 1 } => {
+                stderr.extend_from_slice(&data);
+            }
+            ChannelMsg::ExitStatus { exit_status } => rc = Some(exit_status as i32),
+            _ => {}
+        }
+    }
+
+    Ok(SshCommandOutput {
+        rc: rc.unwrap_or(-1),
+        stdout: String::from_utf8_lossy(&stdout).into_owned(),
+        stderr: String::from_utf8_lossy(&stderr).into_owned(),
+    })
+}
+
+/// SSH client handler that accepts any host key. Fine for VMs we
+/// just provisioned (their host key is ephemeral per-boot anyway);
+/// anything touching real long-lived infrastructure should pin.
+struct TrustAllHandler;
+
+#[async_trait]
+impl Handler for TrustAllHandler {
+    type Error = russh::Error;
+
+    async fn check_server_key(
+        &mut self,
+        _server_public_key: &key::PublicKey,
+    ) -> Result<bool, Self::Error> {
+        Ok(true)
+    }
+}
--- a/harmony/src/modules/linux/topology.rs
+++ b/harmony/src/modules/linux/topology.rs
@@ -5,24 +5,27 @@ use harmony_types::net::IpAddress;
 use serde::{Deserialize, Serialize};

 use crate::domain::topology::{
-    ChangeReport, FileSpec, HostConfigurationProvider, PreparationError, PreparationOutcome,
-    SystemdUnitSpec, Topology, UserSpec,
+    ChangeReport, FileDelivery, FileSpec, HostReachable, PackageInstaller, PreparationError,
+    PreparationOutcome, SystemdManager, SystemdScope, SystemdUnitSpec, Topology, UnixUserManager,
+    UserSpec,
 };
 use crate::executors::ExecutorError;

 use super::ansible_configurator::AnsibleHostConfigurator;

-/// A single Linux host reachable over SSH, with an Ansible-backed
-/// [`HostConfigurationProvider`] implementation.
+/// A single Linux host reachable over SSH, implementing every
+/// capability in `LinuxHostConfiguration` via an Ansible-over-SSH
+/// backend.
 ///
-/// This is the topology Harmony Scores target when they need to configure
-/// a freshly-booted Linux machine (in our case, the VM or Pi that will run
-/// the IoT agent). It is *not* the topology a long-running daemon on that
-/// same machine would use — for that, see [`crate::modules::podman::
-/// PodmanTopology`] on the container-runtime side. A single host typically
-/// has both: it's first configured via `LinuxHostTopology` (podman
-/// installed, agent placed) and then *serves* `PodmanTopology` to its
-/// in-process agent.
+/// This is the topology Harmony Scores target when they need to
+/// configure a freshly-booted Linux machine (in our case, the VM or
+/// Pi that will run the IoT agent). It is *not* the topology a
+/// long-running daemon on that same machine would use — for that,
+/// see [`crate::modules::podman::PodmanTopology`] on the container-
+/// runtime side. A single host typically has both: it's first
+/// configured via `LinuxHostTopology` (podman installed, agent
+/// placed) and then *serves* `PodmanTopology` to its in-process
+/// agent.
 pub struct LinuxHostTopology {
    name: String,
    host: IpAddress,
@@ -79,29 +82,47 @@ impl Topology for LinuxHostTopology {
 }

 #[async_trait]
-impl HostConfigurationProvider for LinuxHostTopology {
+impl HostReachable for LinuxHostTopology {
    async fn ping(&self) -> Result<(), ExecutorError> {
        self.configurator.ping(self.host, &self.credentials).await
    }
+}

+#[async_trait]
+impl PackageInstaller for LinuxHostTopology {
    async fn ensure_package(&self, name: &str) -> Result<ChangeReport, ExecutorError> {
        self.configurator
            .ensure_package(self.host, &self.credentials, name)
            .await
    }
+}

+#[async_trait]
+impl FileDelivery for LinuxHostTopology {
+    async fn ensure_file(&self, spec: &FileSpec) -> Result<ChangeReport, ExecutorError> {
+        self.configurator
+            .ensure_file(self.host, &self.credentials, spec)
+            .await
+    }
+}
+
+#[async_trait]
+impl UnixUserManager for LinuxHostTopology {
    async fn ensure_user(&self, spec: &UserSpec) -> Result<ChangeReport, ExecutorError> {
        self.configurator
            .ensure_user(self.host, &self.credentials, spec)
            .await
    }

-    async fn ensure_file(&self, spec: &FileSpec) -> Result<ChangeReport, ExecutorError> {
+    async fn ensure_linger(&self, user: &str) -> Result<ChangeReport, ExecutorError> {
        self.configurator
-            .ensure_file(self.host, &self.credentials, spec)
+            .ensure_linger(self.host, &self.credentials, user)
            .await
    }
+}

+#[async_trait]
+impl SystemdManager for LinuxHostTopology {
    async fn ensure_systemd_unit(
        &self,
        spec: &SystemdUnitSpec,
@@ -114,19 +135,13 @@ impl HostConfigurationProvider for LinuxHostTopology {
    async fn restart_service(
        &self,
        name: &str,
-        scope: crate::domain::topology::SystemdScope,
+        scope: SystemdScope,
    ) -> Result<ChangeReport, ExecutorError> {
        self.configurator
            .restart_service(self.host, &self.credentials, name, scope)
            .await
    }

-    async fn ensure_linger(&self, user: &str) -> Result<ChangeReport, ExecutorError> {
-        self.configurator
-            .ensure_linger(self.host, &self.credentials, user)
-            .await
-    }
-
    async fn ensure_user_unit_active(
        &self,
        user: &str,
--- a/iot/scripts/smoke-a3-arm.sh
+++ b/iot/scripts/smoke-a3-arm.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+# Convenience wrapper: run the end-to-end smoke test against an
+# aarch64 guest (qemu-system-aarch64 TCG when the host is x86_64,
+# native KVM when the host is already arm64).
+#
+# This is exactly equivalent to:
+#   ARCH=aarch64 VM_NAME=iot-smoke-vm-arm ./smoke-a3.sh
+# with the VM name defaulted so it can live alongside an x86-64
+# smoke run on the same host without clobbering libvirt state.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+export ARCH=aarch64
+export VM_NAME="${VM_NAME:-iot-smoke-vm-arm}"
+export DEVICE_ID="${DEVICE_ID:-$VM_NAME}"
+export NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats-a3-arm}"
+export NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net-a3-arm}"
+
+exec "$SCRIPT_DIR/smoke-a3.sh" "$@"
--- a/iot/scripts/smoke-a3.sh
+++ b/iot/scripts/smoke-a3.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 # End-to-end smoke test for the VM-as-device flow.
 #
-#   [libvirt ${LIBVIRT_URI:-qemu:///system}] ──KvmVmScore──▶ VM (Ubuntu 24.04, cloud-init'd)
+#   [libvirt qemu:///system] ──KvmVmScore──▶ VM (Ubuntu 24.04, cloud-init'd)
 #                                              │
 #                             ssh+Ansible ◀────┘
 #                                   │
@@ -9,43 +9,40 @@
 #   IotDeviceSetupScore ──▶ podman + iot-agent on VM
 #                                   │
 #                                   ▼
-#   existing A1 operator ──NATS─────┘   (agent joins fleet, reconciles CR)
+#   existing operator ──NATS────────┘   (agent joins fleet, reconciles CR)
+#                                   │
+#                                   ▼   [phase 5]
+#                              virsh reboot → agent reconnects
 #
-# Prerequisites on the runner host:
-#   - Everything smoke-a1.sh needs (podman, kubectl, k3d)
-#   - libvirt + qemu-system-x86_64, with the default NAT network present
-#     and running (`virsh net-start default`)
-#   - xorriso (for cloud-init seed ISO)
-#   - python3 (Harmony auto-installs ansible-core into a managed venv
-#     under $HARMONY_DATA_DIR/ansible-venv/ on first use)
-#   - An Ubuntu 24.04 cloud image on disk (see $BASE_IMAGE below)
-#   - An SSH keypair we can authorize on the VM (see $SSH_PUBKEY,
-#     $SSH_PRIVKEY below)
+# Prerequisites on the runner host — all one-time, all generic:
+#   1. libvirt + qemu + xorriso + python3 + podman + cargo + kubectl
+#      (Arch: pacman -S libvirt qemu-full libisoburn python podman
+#       Debian/Ubuntu: apt install libvirt-daemon-system qemu-kvm
+#                                  xorriso python3 python3-venv podman)
+#   2. Be in the `libvirt` group (`sudo usermod -aG libvirt $USER`)
+#   3. `sudo virsh net-start default && sudo virsh net-autostart default`
 #
-# The test is NOT fully self-bootstrapping: downloading a ~700 MB cloud
-# image and generating SSH keys inside the smoke script would make a
-# single run cost tens of minutes. Instead, on first use the script
-# tells you what's missing and points at the exact command to run.
+# Harmony handles *everything else*: cloud image download, SSH key
+# generation, libvirt pool creation, ansible install, agent build.
+# First run costs ~2 min to populate caches; subsequent runs hit the
+# cache in <1 s.

 set -euo pipefail

 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"

-# ---------------------------- configuration ----------------------------
 VM_NAME="${VM_NAME:-iot-smoke-vm}"
 DEVICE_ID="${DEVICE_ID:-$VM_NAME}"
 GROUP="${GROUP:-group-a}"
-VM_WORK_DIR="${VM_WORK_DIR:-/var/tmp/harmony-iot-smoke}"
-BASE_IMAGE="${BASE_IMAGE:-$VM_WORK_DIR/ubuntu-24.04-server-cloudimg-amd64.img}"
-BASE_IMAGE_URL="${BASE_IMAGE_URL:-https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img}"
-SSH_KEY_DIR="${SSH_KEY_DIR:-$VM_WORK_DIR/ssh}"
-SSH_PRIVKEY="${SSH_PRIVKEY:-$SSH_KEY_DIR/id_ed25519}"
-SSH_PUBKEY="${SSH_PUBKEY:-$SSH_KEY_DIR/id_ed25519.pub}"
-LIBVIRT_NETWORK="${LIBVIRT_NETWORK:-default}"
-
 LIBVIRT_URI="${LIBVIRT_URI:-qemu:///system}"

+# Guest architecture. `x86-64` runs native KVM; `aarch64` runs under
+# qemu-system-aarch64 TCG on x86 hosts (3-5× slower but exercises the
+# real Pi target). Changes: cloud-image URL, qemu binary, agent build
+# target, phase 4 timeout.
+ARCH="${ARCH:-x86-64}"
+
 NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats-a3}"
 NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net-a3}"
 NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}"
@@ -54,46 +51,37 @@ NATS_PORT="${NATS_PORT:-4222}"
 log() { printf '\033[1;34m[smoke-a3]\033[0m %s\n' "$*"; }
 fail() { printf '\033[1;31m[smoke-a3 FAIL]\033[0m %s\n' "$*" >&2; exit 1; }

+case "$ARCH" in
+    x86-64|x86_64) EXAMPLE_ARCH=x86-64; AGENT_TARGET= ;;
+    aarch64|arm64) EXAMPLE_ARCH=aarch64; AGENT_TARGET=aarch64-unknown-linux-gnu ;;
+    *) fail "unsupported ARCH=$ARCH (expected: x86-64 | aarch64)" ;;
+esac
+
 cleanup() {
    local rc=$?
    log "cleanup…"
    if [[ "${KEEP:-0}" != "1" ]]; then
-        virsh --connect ${LIBVIRT_URI:-qemu:///system} destroy "$VM_NAME" 2>/dev/null || true
-        virsh --connect ${LIBVIRT_URI:-qemu:///system} undefine --remove-all-storage "$VM_NAME" 2>/dev/null || true
+        virsh --connect "$LIBVIRT_URI" destroy "$VM_NAME" 2>/dev/null || true
+        # `--nvram` is required for aarch64 domains (which have a
+        # per-VM NVRAM file); harmless on x86_64 where no NVRAM is
+        # registered. Without it, `undefine` refuses and the next
+        # run sees a stale domain with whatever XML the previous
+        # run defined — masking XML changes until manually cleaned.
+        virsh --connect "$LIBVIRT_URI" undefine --nvram \
+            --remove-all-storage "$VM_NAME" 2>/dev/null || true
        podman rm -f "$NATS_CONTAINER" >/dev/null 2>&1 || true
        podman network rm "$NATS_NET_NAME" >/dev/null 2>&1 || true
    else
-        log "KEEP=1 — leaving VM '$VM_NAME' and NATS container '$NATS_CONTAINER' running"
+        log "KEEP=1 — leaving VM '$VM_NAME' and NATS '$NATS_CONTAINER' running"
    fi
    exit $rc
 }
 trap cleanup EXIT INT TERM

-# ---------------------------- preflight ----------------------------
 require() { command -v "$1" >/dev/null 2>&1 || fail "missing required tool: $1"; }
-require virsh
 require podman
-require xorriso
-require python3
 require cargo
-
-[[ -f "$BASE_IMAGE" ]] || fail "Ubuntu 24.04 cloud image not found at $BASE_IMAGE.
-Download it with:
-  mkdir -p $VM_WORK_DIR
-  curl -o $BASE_IMAGE $BASE_IMAGE_URL"
-
-if [[ ! -f "$SSH_PRIVKEY" || ! -f "$SSH_PUBKEY" ]]; then
-    fail "SSH keypair missing at $SSH_KEY_DIR.
-Generate one with:
-  mkdir -p $SSH_KEY_DIR
-  ssh-keygen -t ed25519 -N '' -f $SSH_PRIVKEY"
-fi
-
-virsh --connect ${LIBVIRT_URI:-qemu:///system} net-info "$LIBVIRT_NETWORK" >/dev/null 2>&1 \
-    || fail "libvirt session network '$LIBVIRT_NETWORK' missing. \
-Run: virsh --connect ${LIBVIRT_URI:-qemu:///system} net-start $LIBVIRT_NETWORK"
-
-mkdir -p "$VM_WORK_DIR"
+require virsh

 # ---------------------------- phase 1: NATS ----------------------------
 log "phase 1: start NATS container on host"
@@ -105,57 +93,109 @@ podman run -d \
    -p "$NATS_PORT:4222" \
    "$NATS_IMAGE" -js >/dev/null

-# The VM will reach NATS via the libvirt NAT bridge gateway — typically
-# 192.168.122.1. Inspect to be sure.
-NAT_GW="$(virsh --connect ${LIBVIRT_URI:-qemu:///system} net-dumpxml "$LIBVIRT_NETWORK" \
+NAT_GW="$(virsh --connect "$LIBVIRT_URI" net-dumpxml default \
    | grep -oP "ip address='\K[^']+" | head -1)"
-[[ -n "$NAT_GW" ]] || fail "couldn't determine libvirt '$LIBVIRT_NETWORK' gateway IP"
+[[ -n "$NAT_GW" ]] || fail "couldn't determine libvirt 'default' gateway IP"
 log "libvirt network gateway = $NAT_GW (VM will dial NATS at nats://$NAT_GW:$NATS_PORT)"

-# ---------------------------- phase 2: build iot-agent ----------------------------
-log "phase 2: build iot-agent-v0 (release — debug binary is ~400MB and fills cloud rootfs)"
+# ---------------------------- phase 2: build ---------------------------
+log "phase 2: build iot-agent-v0 for guest arch=$ARCH (release — debug binary fills cloud rootfs)"
 (
    cd "$REPO_ROOT"
+    if [[ -n "$AGENT_TARGET" ]]; then
+        rustup target add "$AGENT_TARGET" >/dev/null
+        cargo build -q --release --target "$AGENT_TARGET" -p iot-agent-v0
+    else
        cargo build -q --release -p iot-agent-v0
+    fi
 )
+if [[ -n "$AGENT_TARGET" ]]; then
+    AGENT_BINARY="$REPO_ROOT/target/$AGENT_TARGET/release/iot-agent-v0"
+else
    AGENT_BINARY="$REPO_ROOT/target/release/iot-agent-v0"
+fi
 [[ -f "$AGENT_BINARY" ]] || fail "agent binary missing after build: $AGENT_BINARY"

-# ---------------------------- phase 3: provision VM + setup ----------------------------
-log "phase 3: provision VM via KvmVmScore, then onboard via IotDeviceSetupScore"
+# ---------------------------- phase 3: bootstrap + provision + setup ----------------------------
+log "phase 3: bootstrap assets + provision VM + onboard device (arch=$EXAMPLE_ARCH)"
 (
    cd "$REPO_ROOT"
-    cargo run -q -p example_iot_vm_setup -- \
+    cargo run -q --release -p example_iot_vm_setup -- \
+        --arch "$EXAMPLE_ARCH" \
        --vm-name "$VM_NAME" \
        --device-id "$DEVICE_ID" \
        --group "$GROUP" \
-        --network "$LIBVIRT_NETWORK" \
-        --base-image "$BASE_IMAGE" \
-        --ssh-pubkey "$SSH_PUBKEY" \
-        --ssh-privkey "$SSH_PRIVKEY" \
-        --work-dir "$VM_WORK_DIR" \
        --agent-binary "$AGENT_BINARY" \
        --nats-url "nats://$NAT_GW:$NATS_PORT"
 )

-# ---------------------------- phase 4: agent reaches NATS ----------------------------
-log "phase 4: verify agent connects to NATS from inside VM"
-# The agent writes `status.<device_id>` to the `agent-status` KV bucket
-# every 30s. Check it appears.
-for _ in $(seq 1 60); do
+# ---------------------------- phase 4: initial status ----------------------------
+# TCG emulation slows agent boot + first NATS publish significantly.
+# 60s is fine for native KVM but too tight for aarch64-on-x86.
+case "$ARCH" in
+    aarch64|arm64) STATUS_TIMEOUT=300 ;;
+    *)             STATUS_TIMEOUT=60  ;;
+esac
+log "phase 4: wait for agent to report status to NATS (timeout=${STATUS_TIMEOUT}s)"
+wait_for_status() {
+    local timeout=$1
+    for _ in $(seq 1 "$timeout"); do
        if podman run --rm --network "$NATS_NET_NAME" \
                docker.io/natsio/nats-box:latest \
                nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \
                "status.$DEVICE_ID" --raw >/dev/null 2>&1; then
-        log "agent has reported status"
-        break
+            return 0
        fi
        sleep 1
    done
+    return 1
+}
+wait_for_status "$STATUS_TIMEOUT" || fail "agent-status never appeared for $DEVICE_ID"
+log "agent status present on NATS"
+
+# ---------------------------- phase 5: hard power-cycle, expect recovery ----------------------------
+log "phase 5: power-cycle VM (virsh destroy + start) → agent must reconnect to NATS"
+
+nats_status_timestamp() {
+    # Prints the "timestamp" field of the status.<device> entry, or "".
+    # Never errors (for `set -e` safety).
    podman run --rm --network "$NATS_NET_NAME" \
        docker.io/natsio/nats-box:latest \
        nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \
-    "status.$DEVICE_ID" --raw >/dev/null 2>&1 \
-    || fail "agent-status KV entry never appeared for $DEVICE_ID"
+        "status.$DEVICE_ID" --raw 2>/dev/null \
+        | grep -oE '"timestamp":"[^"]+"' \
+        | head -1 | cut -d'"' -f4 || true
+}

-log "PASS — VM $VM_NAME is a fleet member reporting as $DEVICE_ID (group=$GROUP)"
+virsh --connect "$LIBVIRT_URI" destroy "$VM_NAME" >/dev/null
+# `virsh destroy` returns before the qemu process is fully torn down;
+# wait a couple seconds to be sure the agent is dead and can't flush a
+# final status update after our gate.
+sleep 3
+REBOOT_GATE="$(date -u +%Y-%m-%dT%H:%M:%S+00:00)"
+log "reboot gate = $REBOOT_GATE (any agent timestamp > this is post-reboot)"
+virsh --connect "$LIBVIRT_URI" start "$VM_NAME" >/dev/null
+
+case "$ARCH" in
+    aarch64|arm64) REBOOT_STEPS=900 ;;   # ~30 min under TCG
+    *)             REBOOT_STEPS=120 ;;   # ~4 min on native KVM
+esac
+log "waiting for agent to re-report status (post-reboot, up to $((REBOOT_STEPS*2))s)…"
+TS_AFTER=""
+for _ in $(seq 1 "$REBOOT_STEPS"); do
+    sleep 2
+    ts="$(nats_status_timestamp)"
+    # ISO-8601 timestamps compare correctly lexicographically when the
+    # format is identical. Both the agent and `date -u -Iseconds`
+    # produce RFC 3339 UTC strings so string > works.
+    if [[ -n "$ts" && "$ts" > "$REBOOT_GATE" ]]; then
+        TS_AFTER="$ts"
+        break
+    fi
+done
+if [[ -z "$TS_AFTER" ]]; then
+    fail "agent did not write a post-reboot status within ~$((REBOOT_STEPS*2))s (gate: $REBOOT_GATE)"
+fi
+log "post-reboot status seen at $TS_AFTER"
+
+log "PASS — VM $VM_NAME power-cycled and re-onboarded (group=$GROUP)"