diff --git a/Cargo.lock b/Cargo.lock index 86098083..16f386f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3173,6 +3173,7 @@ dependencies = [ "clap", "env_logger", "harmony", + "harmony_types", "log", "tokio", ] @@ -6519,10 +6520,12 @@ dependencies = [ "system-configuration", "tokio", "tokio-rustls 0.24.1", + "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "webpki-roots 0.25.4", "winreg", diff --git a/ROADMAP/iot_platform/arm_vm_plan.md b/ROADMAP/iot_platform/arm_vm_plan.md new file mode 100644 index 00000000..b4118cc8 --- /dev/null +++ b/ROADMAP/iot_platform/arm_vm_plan.md @@ -0,0 +1,207 @@ +# aarch64 VM support — plan + +## Why + +The v0 walking skeleton's whole point is validating the IoT agent +against the *actual* distribution, arch, and package set the end- +customer's Pi 5 devices run on (ROADMAP §1). Everything green so far +runs the agent against an x86_64 Ubuntu cloud image with an x86_64 +Rust binary — which proves the code path works but not that the ARM +target works. Every passing smoke-a3 run today is evidence that the +wrong thing works. + +This plan adds arm64 emulation on x86_64 hosts (no hardware needed +for CI) so: + +- the VM runs the same Ubuntu 24.04 arm64 cloud image customers will + eventually flash onto a Pi; +- the iot-agent shipped to it is a real aarch64 binary produced by + our existing cross-compile toolchain; +- apt/systemd/podman on the VM are the actual arm64 packages; and +- smoke-a3 exercises all of it end-to-end. + +Acceptable cost: emulated boot is 5-15× slower than KVM-accelerated +boot. That's the price of the target-arch validation. + +## Shape of the change + +Additive, type-safe, default-preserving. Existing callers of +`VirtualMachineSpec` keep working unchanged; arm64 is opt-in via a +new field. + +### 1. Architecture enum on the VM spec + +Introduce `VmArchitecture` in `harmony/src/domain/topology/ +virtualization.rs`: + +```rust +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum VmArchitecture { + #[default] + X86_64, + Aarch64, +} +``` + +Add `pub architecture: VmArchitecture` to `VirtualMachineSpec`. With +`#[derive(Default)]` + `VmArchitecture::X86_64` as default, every +existing call site that uses struct init continues to compile. New +constructor: `VirtualMachineSpec::new_aarch64(name)` for clarity. + +Same treatment on `VmConfig` in `modules/kvm/types.rs` — add a +`pub architecture: VmArchitecture` field with `Default` impl. + +### 2. Libvirt XML parameterization + +Rewrite `modules/kvm/xml.rs::domain_xml` to branch on arch. What +changes per-arch (the QEMU flags you gave as reference map directly +to libvirt XML): + +| QEMU flag | libvirt XML | x86_64 | aarch64 | +|------------------------------|--------------------------------------------------------------------------|------------------------|----------------------------------------| +| `-accel kvm` vs `-accel tcg` | `` | `kvm` | `qemu` | +| `-M virt` / `-M q35` | `` | `q35` | `virt` | +| arch | `` | `x86_64` | `aarch64` | +| emulator binary | `` | `/usr/bin/qemu-system-x86_64` | `/usr/bin/qemu-system-aarch64` | +| `-cpu max,pauth-impdef=on` | `max` | `host-model` (current) | `max` + `pauth-impdef` | +| `-bios QEMU_EFI.fd` | `` | — (BIOS) | AAVMF CODE + VARS pflash pair | +| `-accel tcg,thread=multi` | MTTCG is default-on when `type='qemu'` + QEMU ≥ 9.1 | n/a | implicit | + +**Type safety**: introduce a `DomainXmlParams` struct that captures +the arch-specific knobs (domain_type, arch, machine, emulator path, +cpu mode, firmware) and derives from `VmArchitecture`. The top-level +`domain_xml` then consumes a fully-resolved `DomainXmlParams` rather +than branching with `if arch == X86_64` strings. + +### 3. UEFI firmware discovery + +aarch64 guests boot via UEFI, not BIOS. libvirt needs two files: +- `AAVMF_CODE.fd` — the firmware code (read-only, shared) +- `AAVMF_VARS.fd` — per-VM NVRAM (writable, per-domain copy) + +Common paths across distros: + +| Distro | CODE | VARS (template) | +|----------------|-----------------------------------------------------|----------------------------------------------| +| Arch | `/usr/share/edk2/aarch64/QEMU_CODE.fd` | `/usr/share/edk2/aarch64/QEMU_VARS.fd` | +| Debian/Ubuntu | `/usr/share/AAVMF/AAVMF_CODE.fd` | `/usr/share/AAVMF/AAVMF_VARS.fd` | +| Fedora | `/usr/share/edk2/aarch64/QEMU_EFI-pflash.raw` | `/usr/share/edk2/aarch64/vars-template-pflash.raw` | + +New module `harmony/src/modules/kvm/firmware.rs`: +- `pub fn discover_aarch64_firmware() -> Result` + walks a small known-paths list and returns the first viable pair. + Returns a typed `AarchFirmware { code: PathBuf, vars_template: PathBuf }`. +- Per-VM NVRAM copy is handled in `KvmVirtualMachineHost`: at + `ensure_vm` time, copy `vars_template` into + `$pool/-VARS.fd` and reference it in the domain XML. + +### 4. Cloud image for arm64 + +Add to `modules/iot/assets.rs`: + +```rust +pub const UBUNTU_2404_CLOUDIMG_ARM64_URL: &str = + "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img"; +pub const UBUNTU_2404_CLOUDIMG_ARM64_SHA256: &str = ""; + +pub async fn ensure_ubuntu_2404_cloud_image_for_arch( + arch: VmArchitecture, +) -> Result; +``` + +The existing `ensure_ubuntu_2404_cloud_image()` becomes a thin +wrapper that calls the arch-aware fn with `X86_64`, preserving all +callers. SHA256 gets pinned against the live Ubuntu arm64 image at +commit time. + +### 5. Preflight additions + +In `modules/iot/preflight.rs`, when the caller asks for arm64 VMs +(new `check_iot_smoke_preflight_for_arch(VmArchitecture)` wrapper): +- verify `qemu-system-aarch64` is on PATH; +- verify the aarch64 firmware pair exists (reuse the discovery fn); +- verify QEMU version ≥ 9.1 (MTTCG is a real perf multiplier — a + warning, not a hard block, if the host is older). + +### 6. Cross-compiled agent + +smoke-a3.sh phase 2 currently does native `cargo build --release +-p iot-agent-v0`. When arch=aarch64: +- `cargo build --release --target aarch64-unknown-linux-gnu + -p iot-agent-v0` +- AGENT_BINARY points at `target/aarch64-unknown-linux-gnu/release/ + iot-agent-v0` + +Opt-in via `--arch aarch64` CLI flag on both +`example_iot_vm_setup` and `smoke-a3.sh`. Default stays x86_64. + +### 7. Timeout bumps + +First-boot cloud-init on emulated aarch64 takes 3-6× longer than +KVM-accel x86_64. Bump `wait_for_ip` timeout from 300s → 900s when +arch=aarch64. Smoke-a3's phase 5 reboot gate also lengthens. + +## Files to touch + +| File | Change | +|------------------------------------------------|---------------------------------------------------------------------------| +| `harmony/src/domain/topology/virtualization.rs`| Add `VmArchitecture`, field on `VirtualMachineSpec`, constructor helper. | +| `harmony/src/modules/kvm/types.rs` | Add `architecture` field on `VmConfig`, `VmConfigBuilder` setter. | +| `harmony/src/modules/kvm/xml.rs` | Rewrite `domain_xml` to take `DomainXmlParams` resolved from arch. | +| `harmony/src/modules/kvm/firmware.rs` (new) | Discovery of AAVMF code+vars paths; `AarchFirmware` struct. | +| `harmony/src/modules/kvm/topology.rs` | Copy per-VM NVRAM template on ensure_vm; thread arch through to XML. | +| `harmony/src/modules/iot/assets.rs` | `ensure_ubuntu_2404_cloud_image_for_arch(arch)`; pin arm64 URL+sha256. | +| `harmony/src/modules/iot/preflight.rs` | Arch-aware preflight; qemu-system-aarch64 + firmware + qemu-version. | +| `examples/iot_vm_setup/src/main.rs` | `--arch x86_64|aarch64` CLI flag; resolve matching cloud image. | +| `iot/scripts/smoke-a3.sh` | Arch flag plumbing; cross-compile; extended timeouts; preflight. | +| `iot/scripts/smoke-a3-arm.sh` (new) | Dedicated arm smoke as the CI hook — `ARCH=aarch64 ./smoke-a3.sh`. | + +## Out of scope + +- Migrating OPNsense + other KVM examples to `VirtualMachineHost` / + `ProvisionVmScore` — real inconsistency in the codebase but a + separate refactor, orthogonal to the ARM work. Filing as follow-up. +- KVM-accelerated aarch64-on-aarch64 (e.g. running on an ampere + runner). Emulation covers the x86 CI story; native aarch64 + runners would use `` and no MTTCG flags, which + the arch enum + existing x86_64 XML path already model — so this + is effectively free when we get there. +- Supporting multiple simultaneous guest arches on one host in the + same smoke run. Single-arch-per-run keeps everything simple. +- Pinning AAVMF firmware like we pin the cloud image. Firmware is + distro-package-managed; pin when we hit a regression. + +## Commit plan (in order) + +1. **`VmArchitecture` domain type + `VirtualMachineSpec.architecture` + field** — tiny, just the enum and struct field; no behaviour + change (all callers get `X86_64` via `Default`). + +2. **XML parameterization via `DomainXmlParams`** — rewrite + `domain_xml` to be arch-driven. Tests under + `harmony/src/modules/kvm/xml.rs` get an arm64 variant. + +3. **AAVMF firmware discovery + per-VM NVRAM copy** — + `firmware.rs` + the copy in `topology.rs::ensure_vm`. + +4. **arm64 cloud image asset + preflight** — + `ensure_ubuntu_2404_cloud_image_for_arch(arch)` plus preflight + extensions. SHA256 pinned at commit time via a one-off + `curl | sha256sum`. + +5. **Example + smoke script plumbing** — `--arch` flag, + cross-compile, timeout bumps, `smoke-a3-arm.sh` wrapper. + +6. **End-to-end verification** — run `smoke-a3-arm.sh` from a + fresh `$HARMONY_DATA_DIR/iot/` and confirm the aarch64 agent + boots, joins NATS, and survives a power-cycle. Document timing + in the commit message. + +## Verification + +- `cargo check --all-targets --features kvm`: clean. +- `cargo clippy --no-deps -- -D warnings` on touched files: clean. +- `cargo fmt --check`: clean. +- aarch64 cross-compile of harmony + iot crates: still green. +- Fresh-cache arm64 smoke-a3: PASS, timing documented. +- Existing x86_64 smoke-a3: still PASS (regression guard). diff --git a/examples/iot_vm_setup/Cargo.toml b/examples/iot_vm_setup/Cargo.toml index f74bba30..7bc93e10 100644 --- a/examples/iot_vm_setup/Cargo.toml +++ b/examples/iot_vm_setup/Cargo.toml @@ -10,6 +10,7 @@ path = "src/main.rs" [dependencies] harmony = { path = "../../harmony", features = ["kvm"] } +harmony_types = { path = "../../harmony_types" } tokio.workspace = true log.workspace = true env_logger.workspace = true diff --git a/examples/iot_vm_setup/src/main.rs b/examples/iot_vm_setup/src/main.rs index 38ed9684..3bc25fc9 100644 --- a/examples/iot_vm_setup/src/main.rs +++ b/examples/iot_vm_setup/src/main.rs @@ -1,24 +1,44 @@ //! End-to-end driver for the IoT walking-skeleton VM-as-device flow. //! -//! Runs two scores back-to-back: -//! 1. `KvmVmScore` — spin up a libvirt VM from an Ubuntu 24.04 cloud -//! image with a generated cloud-init seed authorizing one SSH key. -//! 2. `IotDeviceSetupScore` — SSH into the booted VM (via Ansible) -//! and install podman + the iot-agent. -//! -//! After this runs, the VM is a member of the IoT fleet just like the -//! localhost-based smoke test's agent was. Apply a Deployment CR against -//! the same NATS and the VM's agent will pull it + run the container. - -use std::path::PathBuf; +//! Runs two Scores back-to-back: +//! 1. `ProvisionVmScore` — bound to the generic `VirtualMachineHost` +//! capability. Here we satisfy it with `KvmVirtualMachineHost` +//! (libvirt). Swapping to VMware/Proxmox/cloud would be a +//! different topology injection with the same Score code. +//! 2. `IotDeviceSetupScore` — SSHes into the booted VM and installs +//! podman + iot-agent via the split Linux-host capabilities. use anyhow::{Context, Result}; use clap::Parser; use harmony::inventory::Inventory; -use harmony::modules::iot::{IotDeviceSetupConfig, IotDeviceSetupScore}; +use harmony::modules::iot::{ + IotDeviceSetupConfig, IotDeviceSetupScore, ProvisionVmScore, + check_iot_smoke_preflight_for_arch, ensure_iot_ssh_keypair, +}; +use harmony::modules::kvm::KvmVirtualMachineHost; use harmony::modules::kvm::config::init_executor; -use harmony::modules::kvm::{CloudInitVmConfig, KvmHostTopology, KvmVmScore}; use harmony::modules::linux::{LinuxHostTopology, SshCredentials}; +use harmony::topology::{VirtualMachineSpec, VmArchitecture, VmFirstBootConfig}; +use harmony_types::id::Id; +use std::path::PathBuf; + +#[derive(Parser, Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)] +enum CliArch { + /// Native KVM on x86_64 hosts. + X86_64, + /// Aarch64 guest. Runs on native KVM on arm64 hosts and under + /// qemu-system-aarch64 TCG emulation on x86_64 hosts (slower). + Aarch64, +} + +impl From for VmArchitecture { + fn from(a: CliArch) -> Self { + match a { + CliArch::X86_64 => VmArchitecture::X86_64, + CliArch::Aarch64 => VmArchitecture::Aarch64, + } + } +} #[derive(Parser, Debug)] #[command( @@ -26,39 +46,28 @@ use harmony::modules::linux::{LinuxHostTopology, SshCredentials}; about = "Provision one VM + onboard it into the IoT fleet" )] struct Cli { + /// Guest CPU architecture. Selects the cloud image, qemu + /// emulator, and firmware model. + #[arg(long, value_enum, default_value_t = CliArch::X86_64)] + arch: CliArch, /// libvirt domain name for the VM. #[arg(long, default_value = "iot-vm-01")] vm_name: String, - /// Device id the agent will announce to NATS. - #[arg(long, default_value = "iot-vm-01")] - device_id: String, + /// Device id the agent will announce to NATS. Defaults to a + /// fresh `Id` (hex timestamp + random suffix). + #[arg(long)] + device_id: Option, /// Fleet group label to write into the agent's TOML config. #[arg(long, default_value = "group-a")] group: String, - /// libvirt network name to attach the VM to. `default` is the - /// libvirt-shipped NAT bridge. + /// libvirt network name to attach the VM to. #[arg(long, default_value = "default")] network: String, - /// Path to a pre-downloaded Ubuntu 24.04 cloud image (qcow2). - /// Required unless `--bootstrap-ansible-only` is set. - #[arg(long)] - base_image: Option, - /// Path to an SSH public key to authorize on the VM. - /// Required unless `--bootstrap-ansible-only` is set. - #[arg(long)] - ssh_pubkey: Option, - /// Path to the matching SSH private key. - /// Required unless `--bootstrap-ansible-only` is set. - #[arg(long)] - ssh_privkey: Option, - /// Admin username the VM's cloud-init will create. + /// Admin username created on first boot. #[arg(long, default_value = "iot-admin")] admin_user: String, - /// Directory for cloud-init seed ISOs. - #[arg(long, default_value = "/var/tmp/iot-vm-setup")] - work_dir: PathBuf, - /// Path to the cross-compiled iot-agent binary to upload to the VM. - /// Required unless `--bootstrap-ansible-only` or `--only-vm` are set. + /// Path to the cross-compiled iot-agent binary. + /// Required unless `--bootstrap-only` is set. #[arg(long)] agent_binary: Option, /// NATS URL the agent should connect to. @@ -68,95 +77,110 @@ struct Cli { nats_user: String, #[arg(long, default_value = "smoke")] nats_pass: String, - /// Only run the VM-provisioning step; skip device setup. Useful when - /// iterating on the KvmVmScore piece. + /// Only run the VM-provisioning step; skip device setup. #[arg(long)] only_vm: bool, - /// Ensure the managed Ansible venv exists at $HARMONY_DATA_DIR/ - /// ansible-venv and exit. Skips the VM-provisioning and device- - /// setup steps entirely. Useful as a first-run warmup so the real - /// smoke test isn't slowed by the one-time pip install. + /// Run preflight + asset bootstrap (ansible venv, cloud image, + /// SSH key, libvirt pool) and exit. #[arg(long)] - bootstrap_ansible_only: bool, + bootstrap_only: bool, } #[tokio::main] async fn main() -> Result<()> { env_logger::init(); let cli = Cli::parse(); + let arch: VmArchitecture = cli.arch.into(); - // Shortcut: warm the managed Ansible venv and exit. - if cli.bootstrap_ansible_only { - let bins = harmony::modules::linux::ensure_ansible_venv() + check_iot_smoke_preflight_for_arch(arch) + .await + .map_err(|e| anyhow::anyhow!("{e}"))?; + + if cli.bootstrap_only { + harmony::modules::linux::ensure_ansible_venv() .await - .map_err(|e| anyhow::anyhow!("{e}"))?; - let out = tokio::process::Command::new(&bins.ansible) - .arg("--version") - .output() - .await?; - anyhow::ensure!( - out.status.success(), - "ansible --version failed after bootstrap" - ); - print!("{}", String::from_utf8_lossy(&out.stdout)); + .map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?; + harmony::modules::iot::ensure_ubuntu_2404_cloud_image_for_arch(arch) + .await + .map_err(|e| anyhow::anyhow!("cloud image: {e}"))?; + ensure_iot_ssh_keypair() + .await + .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?; + harmony::modules::iot::ensure_harmony_iot_pool() + .await + .map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?; + println!("bootstrap complete"); return Ok(()); } // --- Step 1: provision the VM --- + let base_image = harmony::modules::iot::ensure_ubuntu_2404_cloud_image_for_arch(arch) + .await + .map_err(|e| anyhow::anyhow!("cloud image: {e}"))?; + let pool = harmony::modules::iot::ensure_harmony_iot_pool() + .await + .map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?; + let ssh = ensure_iot_ssh_keypair() + .await + .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?; + let authorized_key = harmony::modules::iot::read_public_key(&ssh) + .await + .map_err(|e| anyhow::anyhow!("read ssh pubkey: {e}"))?; + let executor = init_executor().map_err(|e| anyhow::anyhow!("KVM init: {e}"))?; - let kvm_topology = KvmHostTopology::new("kvm-local", executor); + let vm_host = KvmVirtualMachineHost::new( + "kvm-local", + executor, + pool.name.clone(), + pool.path.clone(), + base_image, + ); - let base_image = cli.base_image.clone().context("--base-image is required")?; - let ssh_pubkey = cli.ssh_pubkey.clone().context("--ssh-pubkey is required")?; - - let authorized_key = std::fs::read_to_string(&ssh_pubkey) - .with_context(|| format!("read ssh pubkey {ssh_pubkey:?}"))? - .trim() - .to_string(); - - let vm_score = KvmVmScore { - config: CloudInitVmConfig { - vm_name: cli.vm_name.clone(), - hostname: Some(cli.vm_name.clone()), - vcpus: 2, + let vm_score = ProvisionVmScore { + spec: VirtualMachineSpec { + name: cli.vm_name.clone(), + architecture: arch, + cpus: 2, memory_mib: 2048, - base_image_path: base_image, - seed_output_dir: cli.work_dir.clone(), - admin_user: cli.admin_user.clone(), - authorized_key, - network_name: cli.network.clone(), + disk_size_gb: None, + network: cli.network.clone(), + first_boot: Some(VmFirstBootConfig { + hostname: Some(cli.vm_name.clone()), + admin_user: Some(cli.admin_user.clone()), + authorized_keys: vec![authorized_key], + }), }, }; - - let vm_ip = run_vm_score(&vm_score, &kvm_topology).await?; - println!("VM '{}' up at {}", cli.vm_name, vm_ip); + let vm_ip = run_vm_score(&vm_score, &vm_host).await?; + println!("VM '{}' up at {vm_ip}", cli.vm_name); if cli.only_vm { return Ok(()); } // --- Step 2: onboard the VM into the fleet --- - let ssh_privkey = cli - .ssh_privkey - .clone() - .context("--ssh-privkey is required")?; let agent_binary = cli .agent_binary .clone() - .context("--agent-binary is required")?; + .context("--agent-binary is required (e.g. target/release/iot-agent-v0)")?; + let device_id = cli + .device_id + .clone() + .map(Id::from) + .unwrap_or_else(Id::default); let linux_topology = LinuxHostTopology::new( format!("linux-{}", cli.vm_name), vm_ip.parse().context("VM IP is not a valid IP address")?, SshCredentials { user: cli.admin_user.clone(), - private_key_path: ssh_privkey, + private_key_path: ssh.private_key.clone(), remote_python: Some("/usr/bin/python3".to_string()), }, ); let setup_score = IotDeviceSetupScore::new(IotDeviceSetupConfig { - device_id: cli.device_id.clone(), + device_id: device_id.clone(), group: cli.group.clone(), nats_urls: vec![cli.nats_url.clone()], nats_user: cli.nats_user.clone(), @@ -166,27 +190,29 @@ async fn main() -> Result<()> { run_setup_score(&setup_score, &linux_topology).await?; println!( - "device '{}' (group '{}') onboarded via {vm_ip}", - cli.device_id, cli.group + "device '{device_id}' (group '{}') onboarded via {vm_ip}", + cli.group ); Ok(()) } -async fn run_vm_score(score: &KvmVmScore, topology: &KvmHostTopology) -> Result { +async fn run_vm_score( + score: &ProvisionVmScore, + topology: &KvmVirtualMachineHost, +) -> Result { use harmony::score::Score; let inventory = Inventory::empty(); - let interpret = Score::::create_interpret(score); + let interpret = Score::::create_interpret(score); let outcome = interpret .execute(&inventory, topology) .await - .map_err(|e| anyhow::anyhow!("KvmVmScore execute: {e}"))?; - // The outcome details carry the IP as `ip=`. + .map_err(|e| anyhow::anyhow!("ProvisionVmScore execute: {e}"))?; for d in &outcome.details { - if let Some(ip) = d.strip_prefix("ip=") { - return Ok(ip.to_string()); + if let Some(v) = d.strip_prefix("ip=") { + return Ok(v.to_string()); } } - anyhow::bail!("KvmVmScore finished without reporting an IP: {outcome:?}") + anyhow::bail!("ProvisionVmScore finished without reporting an IP: {outcome:?}") } async fn run_setup_score(score: &IotDeviceSetupScore, topology: &LinuxHostTopology) -> Result<()> { diff --git a/harmony/Cargo.toml b/harmony/Cargo.toml index fb9bfd5b..4143c5bf 100644 --- a/harmony/Cargo.toml +++ b/harmony/Cargo.toml @@ -18,6 +18,7 @@ reqwest = { version = "0.11", features = [ "cookies", "json", "rustls-tls", + "stream", ], default-features = false } russh = "0.45.0" rust-ipmi = "0.1.1" diff --git a/harmony/src/domain/topology/host_configuration.rs b/harmony/src/domain/topology/host_configuration.rs index e6ff1aad..0a8c6710 100644 --- a/harmony/src/domain/topology/host_configuration.rs +++ b/harmony/src/domain/topology/host_configuration.rs @@ -1,76 +1,96 @@ +//! Split host-configuration capabilities. +//! +//! Originally a single `HostConfigurationProvider` interface, now +//! broken into narrower concerns so implementations only need to +//! implement what they can actually deliver. An Ansible-over-SSH +//! adapter implements all of them; a future cloud-init / ignition / +//! podman-agent backend would implement a subset and leave the rest +//! to other topologies. +//! +//! The convenience umbrella [`LinuxHostConfiguration`] is blanket- +//! impl'd for any type implementing all five capabilities, so Scores +//! that need "a Linux host we can fully configure" can use one bound +//! rather than five. +//! +//! Contract for every `ensure_*` method: converge the host to the +//! desired state and return [`ChangeReport`] indicating whether any +//! change was applied. Scores compose these into reconcile-restart +//! logic (e.g. only bounce the service if its unit or config file +//! actually changed). + use async_trait::async_trait; use serde::{Deserialize, Serialize}; +use std::path::PathBuf; use crate::executors::ExecutorError; -/// Capability: apply idempotent, host-level configuration to a single -/// remote machine — package installs, user accounts, files, systemd units. -/// -/// **Scope.** Intentionally narrow: the subset of configuration-management -/// primitives the IoT device setup flow needs. This is explicitly *not* a -/// general Ansible/Puppet/Chef replacement — if a Score needs templating, -/// loops over hosts, handler triggers, or distro-specific branching, that -/// Score should be decomposed until it fits these primitives (or a new -/// capability trait should be added deliberately). -/// -/// **Idempotency contract.** Every method converges the host to the given -/// desired state and returns [`ChangeReport`] indicating whether any change -/// was actually made. Callers compose these into reconcile-restart logic -/// (e.g. only `systemctl restart` the service if its unit file or its -/// config file was reported changed). -/// -/// **Implementation note.** The concrete impl used on-device today is -/// [`crate::modules::linux::AnsibleHostConfigurator`], which shells out to -/// `ansible-playbook` with a generated one-task play per call. The trait is -/// deliberately Ansible-agnostic so a Rust-native impl can be dropped in -/// later without Score changes. +// --------------------------------------------------------------------- +// Capability traits (narrow, individually implementable) +// --------------------------------------------------------------------- + +/// Reachability check. Every other capability implicitly requires the +/// host to be reachable; this trait exists so Scores can preflight +/// before committing to larger work. #[async_trait] -pub trait HostConfigurationProvider: Send + Sync { - /// Test reachability. Implementations should exercise the same - /// transport the other methods use (SSH typically). Used as a - /// preflight by Scores before attempting real work. +pub trait HostReachable: Send + Sync { async fn ping(&self) -> Result<(), ExecutorError>; +} - /// Ensure a package is installed. Distro-agnostic: the implementation - /// picks the right package manager. +/// Install distro packages. Intentionally distro-agnostic at the trait +/// level — the implementation picks apt/dnf/pacman/apk based on the +/// host's detected family. Name comes from /etc/os-release-style +/// package names (so `podman` not `containers/podman`). +#[async_trait] +pub trait PackageInstaller: Send + Sync { async fn ensure_package(&self, name: &str) -> Result; +} - /// Ensure a user account exists with the given spec. Only the fields - /// in [`UserSpec`] are managed — other attributes of an existing user - /// are left alone. - async fn ensure_user(&self, spec: &UserSpec) -> Result; - - /// Ensure a file exists with exactly the given content, owner, and - /// mode. Atomic replacement; returns `changed: true` only if the file - /// was created or its content/owner/mode differed. +/// Deliver a file to a specific path on the host, atomically. +#[async_trait] +pub trait FileDelivery: Send + Sync { async fn ensure_file(&self, spec: &FileSpec) -> Result; +} - /// Ensure a systemd unit file exists and is enabled (optionally - /// started). Kept separate from [`ensure_file`] so the implementation - /// can handle `daemon-reload` + enable/start in one atomic operation - /// and report accurate change state. +/// Create and manage unix user accounts (POSIX systems). +/// +/// Split from [`SystemdManager`] because some hosts run user accounts +/// without systemd (e.g. Alpine default). The linger-related method +/// belongs here because `loginctl enable-linger` is a +/// systemd-logind-specific operation on the *user* rather than on a +/// service. +#[async_trait] +pub trait UnixUserManager: Send + Sync { + async fn ensure_user(&self, spec: &UserSpec) -> Result; + /// Enable `loginctl enable-linger` for a user so their systemd + /// user session (and any user-scoped services like + /// `podman.socket`) survives logout. Implemented via whatever + /// systemd-aware transport the adapter uses. + async fn ensure_linger(&self, user: &str) -> Result; +} + +/// Systemd-specific service lifecycle. Separated from file delivery +/// because writing a unit file and enabling/starting it are +/// conceptually one operation that the adapter can batch (daemon- +/// reload etc.). +#[async_trait] +pub trait SystemdManager: Send + Sync { async fn ensure_systemd_unit( &self, spec: &SystemdUnitSpec, ) -> Result; - /// Restart a systemd unit. Unconditional — used by Scores that - /// detected a config change that the service wouldn't pick up - /// otherwise. + /// Restart a unit unconditionally. Intended for use after a + /// caller-detected config change that the service wouldn't pick + /// up otherwise. async fn restart_service( &self, name: &str, scope: SystemdScope, ) -> Result; - /// Enable `loginctl enable-linger` for a user, so their systemd user - /// session (and any user-scoped services like `podman.socket`) - /// survives logout. Idempotent. - async fn ensure_linger(&self, user: &str) -> Result; - - /// Enable+start a user-scoped systemd unit for the given user (e.g. - /// `podman.socket` under `iot-agent`). Assumes linger is already - /// configured. + /// Enable+start a user-scoped unit (e.g. `podman.socket` under + /// `iot-agent`). Assumes [`UnixUserManager::ensure_linger`] has + /// already been called for the user. async fn ensure_user_unit_active( &self, user: &str, @@ -78,8 +98,33 @@ pub trait HostConfigurationProvider: Send + Sync { ) -> Result; } +// --------------------------------------------------------------------- +// Umbrella trait (auto-impl) for Scores that want all of the above +// --------------------------------------------------------------------- + +/// Convenience trait auto-implemented for any type that has the full +/// Linux-host configuration toolkit. Scores can use this one bound +/// rather than repeating five. +/// +/// Intentionally *not* usable as an object-safe trait object — only +/// as a generic bound. Impls should implement each capability +/// individually. +pub trait LinuxHostConfiguration: + HostReachable + PackageInstaller + FileDelivery + UnixUserManager + SystemdManager +{ +} + +impl LinuxHostConfiguration for T where + T: HostReachable + PackageInstaller + FileDelivery + UnixUserManager + SystemdManager +{ +} + +// --------------------------------------------------------------------- +// Shared types +// --------------------------------------------------------------------- + /// Whether the host state matched the desired spec already (`changed: -/// false`, a NOOP) or was modified by this call (`changed: true`). +/// false`, a NOOP) or was modified by this call. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub struct ChangeReport { pub changed: bool, @@ -93,18 +138,17 @@ impl ChangeReport { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct UserSpec { pub name: String, - /// If Some, create or update the system group of this name before - /// creating the user. + /// Primary group. None lets the OS default take over (on + /// Debian/Ubuntu with USERGROUPS_ENAB yes, useradd auto-creates a + /// group matching the username). pub group: Option, - /// Additional supplementary groups to add the user to (e.g. `["wheel", - /// "docker"]`). + /// Additional supplementary groups (append-mode). pub supplementary_groups: Vec, - /// Absolute path to the login shell, or None for the distro default. + /// Absolute login shell path; None → distro default. pub shell: Option, - /// If true, create with `--system` (UID in the system range, no aging, - /// typically no login). Service-account flavour. + /// If true, create with `--system` (UID in the system range, no + /// aging, typically no login). pub system: bool, - /// If true, create a home directory. pub create_home: bool, } @@ -112,19 +156,15 @@ pub struct UserSpec { pub struct FileSpec { /// Absolute path on the remote host. pub path: String, - /// Source of the file's content. Use `Content` for small generated - /// files (configs, systemd units); use `LocalPath` for anything - /// large or binary (agent binaries, shipped assets) — inline - /// content rides the argv for SSH transport and hits `ARG_MAX` at - /// a few MB. + /// Source of the file's content. pub source: FileSource, - /// Owner (user name). None means leave current owner alone if the - /// file already exists; on create defaults to root. + /// Owner (user name). None → leave current owner alone on update; + /// defaults to root on create. pub owner: Option, /// Group name. Same semantics as `owner`. pub group: Option, - /// POSIX mode (octal), e.g. 0o644. None means leave current mode - /// alone; on create defaults to 0o644. + /// POSIX mode (octal), e.g. 0o644. None → leave current mode + /// alone; defaults to 0o644 on create. pub mode: Option, } @@ -133,10 +173,10 @@ pub enum FileSource { /// UTF-8 content to materialize on the remote host. Content(String), /// Absolute path on the Harmony-runner host pointing at a file to - /// be shipped to `path`. Works for binary files. The implementation - /// compares the remote file's content against this one and returns - /// `changed: false` when they already match. - LocalPath(std::path::PathBuf), + /// be shipped to `path`. Works for binary. The implementation + /// compares remote content before rewriting (returning `changed: + /// false` when they already match). + LocalPath(PathBuf), } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -146,8 +186,7 @@ pub struct SystemdUnitSpec { /// Exact content of the unit file. pub unit_content: String, pub scope: SystemdScope, - /// If true, `systemctl enable --now`; if false, `enable` only (caller - /// will start it later, or it's a one-shot driven by another unit). + /// If true, `systemctl enable --now`; else `enable` only. pub start_immediately: bool, } @@ -157,8 +196,8 @@ pub enum SystemdScope { User(UserName), } -/// Wrapper over a username for the User scope, mostly to stop -/// `SystemdScope::User(String)` from begging for ad-hoc empty strings. +/// Username newtype for `SystemdScope::User` so the variant's field is +/// typed rather than a freeform String. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct UserName(pub String); diff --git a/harmony/src/domain/topology/mod.rs b/harmony/src/domain/topology/mod.rs index 4732786c..ae27e839 100644 --- a/harmony/src/domain/topology/mod.rs +++ b/harmony/src/domain/topology/mod.rs @@ -35,9 +35,11 @@ pub use tftp::*; mod container_runtime; mod helm_command; mod host_configuration; +mod virtualization; pub use container_runtime::*; pub use helm_command::*; pub use host_configuration::*; +pub use virtualization::*; use super::{ executors::ExecutorError, diff --git a/harmony/src/domain/topology/virtualization.rs b/harmony/src/domain/topology/virtualization.rs new file mode 100644 index 00000000..c4b30ec2 --- /dev/null +++ b/harmony/src/domain/topology/virtualization.rs @@ -0,0 +1,141 @@ +//! Generic virtualization capability. +//! +//! [`VirtualMachineHost`] is the abstraction Scores target when they +//! want a VM. The trait intentionally doesn't name a hypervisor — an +//! impl backed by KVM/libvirt sits in `modules::kvm`, but the same +//! trait could be implemented for VMware, Proxmox, Hyper-V, or a +//! cloud provider's API. +//! +//! **Scope.** What we need today for the IoT smoke test and for +//! future CI/dev environments: ensure a VM exists with a given CPU +//! count, memory size, disk size, and first-boot configuration; tear +//! it down; read its runtime state. Deliberately no live migration, +//! snapshots, disk attach/detach, or NIC hotplug — they belong in +//! follow-on capabilities when a real use case surfaces. + +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; +use std::net::IpAddr; + +use crate::executors::ExecutorError; + +#[async_trait] +pub trait VirtualMachineHost: Send + Sync { + async fn list_vms(&self) -> Result, ExecutorError>; + + /// Create-or-update a VM matching `spec`. Idempotent: re-running + /// against an unchanged spec returns the existing VM's runtime + /// info. On first call or after destructive changes, boots the VM + /// and waits for an IP address (implementation-defined timeout). + async fn ensure_vm( + &self, + spec: &VirtualMachineSpec, + ) -> Result; + + /// Stop and remove the VM, including its managed storage. No-op + /// if the VM does not exist. + async fn delete_vm(&self, name: &str) -> Result<(), ExecutorError>; + + /// Read current runtime info for a VM by name. `None` if the VM + /// doesn't exist. + async fn get_vm_info( + &self, + name: &str, + ) -> Result, ExecutorError>; +} + +/// Guest CPU architecture. Determines emulator binary, machine +/// type, CPU model, and firmware on the KVM/libvirt backend. +/// +/// Defaults to [`VmArchitecture::X86_64`] so existing call sites +/// continue to compile without change — opt into arm64 by setting +/// the field explicitly. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum VmArchitecture { + #[default] + X86_64, + Aarch64, +} + +impl VmArchitecture { + /// Short canonical name used in URLs, filenames, and log lines + /// (`"x86_64"`, `"aarch64"`). Matches Ubuntu cloud image naming + /// (`…-cloudimg-amd64.img` vs `…-cloudimg-arm64.img`) only in + /// spirit — see [`ubuntu_cloudimg_suffix`] for the actual + /// image-naming convention. + pub fn as_str(&self) -> &'static str { + match self { + Self::X86_64 => "x86_64", + Self::Aarch64 => "aarch64", + } + } + + /// Suffix Ubuntu uses in its cloud image filenames (`amd64` vs + /// `arm64`). Different from [`as_str`] because Ubuntu doesn't + /// follow the Linux `uname -m` convention. + pub fn ubuntu_cloudimg_suffix(&self) -> &'static str { + match self { + Self::X86_64 => "amd64", + Self::Aarch64 => "arm64", + } + } +} + +/// Declarative description of a VM the caller wants to exist. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VirtualMachineSpec { + pub name: String, + /// Guest CPU architecture. Defaults to + /// [`VmArchitecture::X86_64`]; set to + /// [`VmArchitecture::Aarch64`] to run an arm64 guest (emulated + /// via TCG on x86_64 hosts; KVM-accelerated on aarch64 hosts). + #[serde(default)] + pub architecture: VmArchitecture, + pub cpus: u32, + pub memory_mib: u64, + /// `None` → inherit from the hypervisor's default (e.g. the + /// backing cloud image's default disk size). + pub disk_size_gb: Option, + /// Name of the network to attach the VM to. For KVM this is a + /// libvirt network name (`default` = the shipped NAT bridge). + pub network: String, + /// Optional first-boot configuration for hypervisors that + /// support it (KVM via cloud-init, VMware via OVF properties, + /// Proxmox via cloud-init). Hypervisors without a mechanism for + /// this return an error if it's set and they can't honour it. + pub first_boot: Option, +} + +/// First-boot declarative config. Hypervisor-agnostic; each impl +/// translates to its native mechanism. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VmFirstBootConfig { + /// Guest hostname. `None` → use the image default. + pub hostname: Option, + /// Username to provision with passwordless sudo + the + /// [`authorized_keys`]. `None` → reuse the image's default + /// user (`ubuntu` for Ubuntu cloud images, etc.). + pub admin_user: Option, + /// Public SSH keys (OpenSSH single-line format) to authorize for + /// the admin user. + pub authorized_keys: Vec, +} + +/// Observed runtime info for a VM. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VirtualMachineRuntimeInfo { + pub name: String, + pub state: VmState, + /// Primary IPv4 of the VM, if it's running and has one. + pub ip: Option, + /// Free-form identifier of the backing hypervisor ("kvm", + /// "vmware", "proxmox", …). Diagnostic only. + pub hypervisor: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum VmState { + Running, + Stopped, + Unknown, +} diff --git a/harmony/src/modules/iot/assets.rs b/harmony/src/modules/iot/assets.rs new file mode 100644 index 00000000..2e1f405e --- /dev/null +++ b/harmony/src/modules/iot/assets.rs @@ -0,0 +1,300 @@ +//! Bootstrapped assets shared across IoT workflows. +//! +//! Everything here follows the `ensure_*` pattern — idempotent, caches +//! results under [`HARMONY_DATA_DIR`]`/iot/…`, and runs at most once per +//! process (enforced by a `tokio::sync::OnceCell`). The goal is that an +//! operator can run the IoT smoke test against a freshly-installed host +//! with nothing but `libvirt + qemu + xorriso + python3 + cargo + +//! podman` installed — no manual image downloads, no `ssh-keygen`, no +//! `chmod` rituals. + +use std::path::{Path, PathBuf}; +use std::process::Stdio; + +use log::{info, warn}; +use sha2::{Digest, Sha256}; +use tokio::io::AsyncWriteExt; +use tokio::process::Command; +use tokio::sync::OnceCell; + +use crate::domain::config::HARMONY_DATA_DIR; +use crate::domain::topology::VmArchitecture; +use crate::executors::ExecutorError; + +// --------------------------------------------------------------------- +// Cloud image +// --------------------------------------------------------------------- + +/// Pinned Ubuntu 24.04 server cloud image (x86_64 / amd64). **Updating +/// this constant requires updating [`UBUNTU_2404_CLOUDIMG_SHA256`] at +/// the same time**; download is rejected on hash mismatch. +/// +/// The upstream URL is the "current release" pointer, which rotates +/// when Canonical pushes a point-release. When that happens, this +/// constant's sha256 stops matching and `ensure_cloud_image` fails with +/// a clear diff — bump both in one commit. +pub const UBUNTU_2404_CLOUDIMG_URL: &str = + "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img"; +pub const UBUNTU_2404_CLOUDIMG_SHA256: &str = + "5c3ddb00f60bc455dac0862fabe9d8bacec46c33ac1751143c5c3683404b110d"; +pub const UBUNTU_2404_CLOUDIMG_FILENAME: &str = "ubuntu-24.04-server-cloudimg-amd64.img"; + +/// Pinned Ubuntu 24.04 server cloud image (aarch64 / arm64). Same +/// update semantics as the amd64 pair. +pub const UBUNTU_2404_CLOUDIMG_ARM64_URL: &str = + "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img"; +pub const UBUNTU_2404_CLOUDIMG_ARM64_SHA256: &str = + "1ea801e659d2f5035ac294e0faab0aac9b6ba66753df933ba5c7beab0c689bd0"; +pub const UBUNTU_2404_CLOUDIMG_ARM64_FILENAME: &str = "ubuntu-24.04-server-cloudimg-arm64.img"; + +/// Ensure the pinned Ubuntu 24.04 cloud image for `arch` is present +/// on disk. Returns the path to the cached image. Slow path +/// (download + sha256 verify) runs at most once per process per +/// architecture — separate `OnceCell` per arch keeps the cache hits +/// symmetric. +pub async fn ensure_ubuntu_2404_cloud_image_for_arch( + arch: VmArchitecture, +) -> Result { + // Per-arch OnceCell. Matches the `ensure_ansible_venv` pattern — + // first call downloads, subsequent calls hit the cache in one + // `exists()` stat. + static X86_64: OnceCell = OnceCell::const_new(); + static AARCH64: OnceCell = OnceCell::const_new(); + let cell = match arch { + VmArchitecture::X86_64 => &X86_64, + VmArchitecture::Aarch64 => &AARCH64, + }; + let (url, sha256, filename) = match arch { + VmArchitecture::X86_64 => ( + UBUNTU_2404_CLOUDIMG_URL, + UBUNTU_2404_CLOUDIMG_SHA256, + UBUNTU_2404_CLOUDIMG_FILENAME, + ), + VmArchitecture::Aarch64 => ( + UBUNTU_2404_CLOUDIMG_ARM64_URL, + UBUNTU_2404_CLOUDIMG_ARM64_SHA256, + UBUNTU_2404_CLOUDIMG_ARM64_FILENAME, + ), + }; + cell.get_or_try_init(|| async move { ensure_cloud_image(url, sha256, filename).await }) + .await + .cloned() +} + +/// Back-compat shim — returns the x86_64 image. Prefer +/// [`ensure_ubuntu_2404_cloud_image_for_arch`] when the arch is +/// known at the call site. +pub async fn ensure_ubuntu_2404_cloud_image() -> Result { + ensure_ubuntu_2404_cloud_image_for_arch(VmArchitecture::X86_64).await +} + +async fn ensure_cloud_image( + url: &str, + expected_sha256: &str, + filename: &str, +) -> Result { + let dir = cloud_images_dir(); + tokio::fs::create_dir_all(&dir) + .await + .map_err(|e| exec(format!("create cloud-images dir {dir:?}: {e}")))?; + // Make the cache directory world-traversable so libvirt-qemu can + // walk into it when reading the image. This is the perms + // concession that lets us stick with direct file paths for now. + make_world_traversable(&dir).await?; + + let target = dir.join(filename); + if target.exists() { + let actual = sha256_of_file(&target).await?; + if actual == expected_sha256 { + info!("cloud image cache hit at {target:?}"); + return Ok(target); + } + warn!( + "cached cloud image sha256 mismatch (expected {expected_sha256}, got {actual}); \ + re-downloading to {target:?}" + ); + tokio::fs::remove_file(&target) + .await + .map_err(|e| exec(format!("remove stale image: {e}")))?; + } + + info!("downloading cloud image {url} → {target:?} (one-time)"); + download_to(url, &target).await?; + // Re-verify the download we just did. + let actual = sha256_of_file(&target).await?; + if actual != expected_sha256 { + let _ = tokio::fs::remove_file(&target).await; + return Err(exec(format!( + "downloaded image sha256 mismatch: expected {expected_sha256}, got {actual}. \ + Ubuntu may have rotated the 'current release' pointer — bump the pin in \ + modules::iot::assets.rs." + ))); + } + // World-readable so libvirt-qemu can open it without a chmod ritual. + tokio::fs::set_permissions(&target, std::os::unix::fs::PermissionsExt::from_mode(0o644)) + .await + .map_err(|e| exec(format!("chmod image: {e}")))?; + + Ok(target) +} + +/// Bring up a 256-bit-hashed file download using `reqwest` streaming so +/// we don't have to buffer a 600MB qcow2 in memory. +async fn download_to(url: &str, target: &Path) -> Result<(), ExecutorError> { + let client = reqwest::Client::builder() + .build() + .map_err(|e| exec(format!("reqwest build: {e}")))?; + let resp = client + .get(url) + .send() + .await + .map_err(|e| exec(format!("GET {url}: {e}")))?; + if !resp.status().is_success() { + return Err(exec(format!( + "GET {url} returned {status}", + status = resp.status() + ))); + } + + let mut out = tokio::fs::File::create(target) + .await + .map_err(|e| exec(format!("create {target:?}: {e}")))?; + + use futures_util::StreamExt; + let mut stream = resp.bytes_stream(); + while let Some(chunk) = stream.next().await { + let chunk = chunk.map_err(|e| exec(format!("download chunk: {e}")))?; + out.write_all(&chunk) + .await + .map_err(|e| exec(format!("write chunk: {e}")))?; + } + out.flush() + .await + .map_err(|e| exec(format!("flush {target:?}: {e}")))?; + Ok(()) +} + +async fn sha256_of_file(path: &Path) -> Result { + use tokio::io::AsyncReadExt; + let mut file = tokio::fs::File::open(path) + .await + .map_err(|e| exec(format!("open {path:?}: {e}")))?; + let mut hasher = Sha256::new(); + let mut buf = vec![0u8; 1 << 20]; // 1 MiB + loop { + let n = file + .read(&mut buf) + .await + .map_err(|e| exec(format!("read {path:?}: {e}")))?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + Ok(hex::encode(hasher.finalize())) +} + +fn cloud_images_dir() -> PathBuf { + HARMONY_DATA_DIR.join("iot").join("cloud-images") +} + +// --------------------------------------------------------------------- +// SSH keypair +// --------------------------------------------------------------------- + +/// Pair of on-disk paths to Harmony's per-user IoT SSH keypair. The +/// same key identifies every VM we provision for smoke/integration +/// testing — cheap to reuse, easy to discard (just `rm -rf` the dir). +#[derive(Debug, Clone)] +pub struct IotSshKeypair { + pub private_key: PathBuf, + pub public_key: PathBuf, +} + +/// Ensure `$HARMONY_DATA_DIR/iot/ssh/id_ed25519[.pub]` exists. Runs +/// `ssh-keygen` once; subsequent calls return the existing paths. +pub async fn ensure_iot_ssh_keypair() -> Result { + static CELL: OnceCell = OnceCell::const_new(); + CELL.get_or_try_init(provision_ssh_keypair).await.cloned() +} + +async fn provision_ssh_keypair() -> Result { + let dir = HARMONY_DATA_DIR.join("iot").join("ssh"); + tokio::fs::create_dir_all(&dir) + .await + .map_err(|e| exec(format!("create ssh dir {dir:?}: {e}")))?; + tokio::fs::set_permissions(&dir, std::os::unix::fs::PermissionsExt::from_mode(0o700)) + .await + .map_err(|e| exec(format!("chmod ssh dir: {e}")))?; + + let priv_path = dir.join("id_ed25519"); + let pub_path = dir.join("id_ed25519.pub"); + if priv_path.exists() && pub_path.exists() { + info!("ssh keypair cache hit at {priv_path:?}"); + return Ok(IotSshKeypair { + private_key: priv_path, + public_key: pub_path, + }); + } + // Remove stragglers from a partial previous run. + let _ = tokio::fs::remove_file(&priv_path).await; + let _ = tokio::fs::remove_file(&pub_path).await; + + info!("generating ed25519 ssh keypair at {priv_path:?} (one-time)"); + let status = Command::new("ssh-keygen") + .arg("-t") + .arg("ed25519") + .arg("-N") + .arg("") // no passphrase + .arg("-C") + .arg("harmony-iot-smoke") + .arg("-f") + .arg(&priv_path) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .output() + .await + .map_err(|e| exec(format!("spawn ssh-keygen: {e}")))?; + if !status.status.success() { + return Err(exec(format!( + "ssh-keygen failed: {}", + String::from_utf8_lossy(&status.stderr).trim() + ))); + } + Ok(IotSshKeypair { + private_key: priv_path, + public_key: pub_path, + }) +} + +/// Read the generated public key (one line, openssh format) into a string +/// suitable for cloud-init's `authorized_keys`. +pub async fn read_public_key(kp: &IotSshKeypair) -> Result { + let content = tokio::fs::read_to_string(&kp.public_key) + .await + .map_err(|e| exec(format!("read {:?}: {e}", kp.public_key)))?; + Ok(content.trim().to_string()) +} + +// --------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------- + +async fn make_world_traversable(dir: &Path) -> Result<(), ExecutorError> { + // Libvirt-qemu runs as a different user and needs to traverse our + // dirs to read the images we've placed there. 0755 on the whole + // chain from HARMONY_DATA_DIR down is the minimum that works + // without asking the operator to fiddle with ACLs. + // + // We only adjust the terminal dir here; parents are assumed to be + // world-traversable already (true by default on every distro's + // data-dir layout: `~/.local/share` is 755 on user create). + tokio::fs::set_permissions(dir, std::os::unix::fs::PermissionsExt::from_mode(0o755)) + .await + .map_err(|e| exec(format!("chmod {dir:?}: {e}")))?; + Ok(()) +} + +fn exec(msg: impl Into) -> ExecutorError { + ExecutorError::UnexpectedError(msg.into()) +} diff --git a/harmony/src/modules/iot/libvirt_pool.rs b/harmony/src/modules/iot/libvirt_pool.rs new file mode 100644 index 00000000..e893d6b0 --- /dev/null +++ b/harmony/src/modules/iot/libvirt_pool.rs @@ -0,0 +1,130 @@ +//! Managed libvirt storage pool for IoT smoke runs. +//! +//! The first time a Harmony IoT workflow runs on a host, it needs a +//! writable place to drop per-VM overlay disks + cloud-init seed ISOs. +//! Rather than ask the operator to set that up, we create a user- +//! owned dir-backed libvirt pool at +//! `$HARMONY_DATA_DIR/iot/kvm/pool/` and let libvirt handle: +//! +//! - **Perms**: dir contents get chowned to libvirt-qemu on VM start +//! via dynamic-ownership (default-on), and back to us on VM stop +//! (via remember_owner, also default-on). No `chmod 644` gymnastics. +//! - **Visibility**: `virsh vol-list harmony-iot` shows every +//! artifact we've created. +//! - **Cleanup**: `virsh vol-delete harmony-iot` removes +//! managed volumes alongside `virsh undefine --remove-all-storage`. +//! +//! We *don't* rewrite the VM XML to use `` +//! yet — the existing `` form is fine because files +//! inside a pool dir still benefit from dynamic ownership. The pool +//! is effectively an "annotation" telling libvirt "you may touch this +//! dir's perms." + +use std::path::PathBuf; + +use log::info; +use tokio::sync::OnceCell; +use virt::connect::Connect; +use virt::storage_pool::StoragePool; + +use crate::domain::config::HARMONY_DATA_DIR; +use crate::executors::ExecutorError; + +pub const HARMONY_IOT_POOL_NAME: &str = "harmony-iot"; + +/// Filesystem path + libvirt name of the managed pool. +#[derive(Debug, Clone)] +pub struct HarmonyIotPool { + pub name: String, + pub path: PathBuf, +} + +/// Ensure the Harmony IoT libvirt storage pool exists, is started, and +/// is set to autostart. Idempotent; runs its slow path at most once per +/// process. +/// +/// **Requires libvirt-group membership**. When the user isn't in the +/// group, libvirt rejects the `qemu:///system` connection — the +/// preflight check catches that upstream. +pub async fn ensure_harmony_iot_pool() -> Result { + static CELL: OnceCell = OnceCell::const_new(); + CELL.get_or_try_init(provision_pool).await.cloned() +} + +async fn provision_pool() -> Result { + let pool_dir = HARMONY_DATA_DIR.join("iot").join("kvm").join("pool"); + tokio::fs::create_dir_all(&pool_dir) + .await + .map_err(|e| exec(format!("create pool dir {pool_dir:?}: {e}")))?; + // Let libvirt-qemu walk into it; dynamic ownership handles file + // chmod on VM start. + tokio::fs::set_permissions( + &pool_dir, + std::os::unix::fs::PermissionsExt::from_mode(0o755), + ) + .await + .map_err(|e| exec(format!("chmod pool dir: {e}")))?; + + let pool_path = pool_dir.clone(); + let pool_name = HARMONY_IOT_POOL_NAME.to_string(); + + // virt-rs is blocking C bindings — bounce into spawn_blocking. + let pool_name_blocking = pool_name.clone(); + let pool_path_blocking = pool_path.clone(); + tokio::task::spawn_blocking(move || -> Result<(), ExecutorError> { + let conn = Connect::open(Some("qemu:///system")) + .map_err(|e| exec(format!("libvirt connect qemu:///system: {e}")))?; + let (pool, is_fresh) = match StoragePool::lookup_by_name(&conn, &pool_name_blocking) { + Ok(p) => (p, false), + Err(_) => { + let xml = pool_xml(&pool_name_blocking, &pool_path_blocking); + info!("defining libvirt pool '{pool_name_blocking}' → {pool_path_blocking:?}"); + let p = StoragePool::define_xml(&conn, &xml, 0) + .map_err(|e| exec(format!("define pool: {e}")))?; + (p, true) + } + }; + // `pool-build` creates the dir layout a dir-pool expects; only + // needed on first definition. Libvirt rejects `build` on an + // already-active pool. + if is_fresh { + pool.build(0) + .map_err(|e| exec(format!("pool build: {e}")))?; + } + let active = pool + .is_active() + .map_err(|e| exec(format!("pool is_active: {e}")))?; + if !active { + info!("starting libvirt pool '{pool_name_blocking}'"); + pool.create(0) + .map_err(|e| exec(format!("pool create/start: {e}")))?; + } + pool.set_autostart(true) + .map_err(|e| exec(format!("pool set_autostart: {e}")))?; + Ok(()) + }) + .await + .map_err(|e| exec(format!("spawn_blocking pool setup: {e}")))??; + + Ok(HarmonyIotPool { + name: pool_name, + path: pool_path, + }) +} + +fn pool_xml(name: &str, path: &std::path::Path) -> String { + format!( + r#" + {name} + + {path} + +"#, + name = name, + path = path.display(), + ) +} + +fn exec(msg: impl Into) -> ExecutorError { + ExecutorError::UnexpectedError(msg.into()) +} diff --git a/harmony/src/modules/iot/mod.rs b/harmony/src/modules/iot/mod.rs index 829be50f..23ec2987 100644 --- a/harmony/src/modules/iot/mod.rs +++ b/harmony/src/modules/iot/mod.rs @@ -11,6 +11,23 @@ //! they run inside the Harmony framework proper, driven by the same //! `harmony_cli::run` story every other Score uses. +pub mod assets; +#[cfg(feature = "kvm")] +pub mod libvirt_pool; +pub mod preflight; mod setup_score; +#[cfg(feature = "kvm")] +mod vm_score; +pub use assets::{ + IotSshKeypair, UBUNTU_2404_CLOUDIMG_ARM64_FILENAME, UBUNTU_2404_CLOUDIMG_ARM64_SHA256, + UBUNTU_2404_CLOUDIMG_ARM64_URL, UBUNTU_2404_CLOUDIMG_FILENAME, UBUNTU_2404_CLOUDIMG_SHA256, + UBUNTU_2404_CLOUDIMG_URL, ensure_iot_ssh_keypair, ensure_ubuntu_2404_cloud_image, + ensure_ubuntu_2404_cloud_image_for_arch, read_public_key, +}; +#[cfg(feature = "kvm")] +pub use libvirt_pool::{HARMONY_IOT_POOL_NAME, HarmonyIotPool, ensure_harmony_iot_pool}; +pub use preflight::{check_iot_smoke_preflight, check_iot_smoke_preflight_for_arch}; pub use setup_score::{IotDeviceSetupConfig, IotDeviceSetupScore}; +#[cfg(feature = "kvm")] +pub use vm_score::ProvisionVmScore; diff --git a/harmony/src/modules/iot/preflight.rs b/harmony/src/modules/iot/preflight.rs new file mode 100644 index 00000000..f15b4750 --- /dev/null +++ b/harmony/src/modules/iot/preflight.rs @@ -0,0 +1,166 @@ +//! Fail-fast preflight checks for the IoT smoke test. +//! +//! The contract for a Harmony IoT smoke run is: +//! the operator installs a short list of generic packages on the runner +//! (kvm/libvirt/qemu, xorriso, python3, cargo, podman), puts their user +//! in the `libvirt` group, and starts the default libvirt network — +//! everything else is Harmony's problem. This module is where we verify +//! those preconditions and turn each missing piece into an +//! actionable error, rather than letting libvirt/virsh/ansible dump a +//! cryptic failure three layers in. + +use std::process::Stdio; + +use tokio::process::Command; + +use crate::domain::topology::VmArchitecture; +use crate::executors::ExecutorError; +#[cfg(feature = "kvm")] +use crate::modules::kvm::firmware::discover_aarch64_firmware; + +/// Run every preflight check for an x86_64 smoke run — equivalent +/// to [`check_iot_smoke_preflight_for_arch`] with +/// [`VmArchitecture::X86_64`]. Kept as a distinct function so +/// existing callers don't need to thread an arch through yet. +pub async fn check_iot_smoke_preflight() -> Result<(), ExecutorError> { + check_iot_smoke_preflight_for_arch(VmArchitecture::X86_64).await +} + +/// Arch-aware preflight. On top of the host-generic checks +/// (virsh, qemu-img, xorriso, python3, ssh-keygen, libvirt group, +/// default network), an aarch64 target requires +/// `qemu-system-aarch64` and a usable AAVMF firmware pair. +pub async fn check_iot_smoke_preflight_for_arch(arch: VmArchitecture) -> Result<(), ExecutorError> { + check_tool_on_path("virsh", "libvirt client").await?; + check_tool_on_path("qemu-img", "qemu-utils").await?; + check_tool_on_path("xorriso", "ISO image builder").await?; + check_tool_on_path("python3", "for the managed Ansible venv").await?; + check_tool_on_path("ssh-keygen", "for bootstrapping the IoT SSH keypair").await?; + check_libvirt_group_membership().await?; + check_libvirt_default_network().await?; + + if arch == VmArchitecture::Aarch64 { + check_tool_on_path("qemu-system-aarch64", "for aarch64 TCG emulation").await?; + // Runtime discovery: same call the topology makes at + // ensure_vm time — preflight surfaces it up front. + // Gated behind `kvm` because callers building `harmony` + // without `kvm` (e.g. the on-device agent) don't pull in + // libvirt at all; for them, aarch64 preflight simply + // stops after the qemu-system-aarch64 PATH check. + #[cfg(feature = "kvm")] + discover_aarch64_firmware()?; + } + Ok(()) +} + +async fn check_tool_on_path(name: &str, what_for: &str) -> Result<(), ExecutorError> { + let status = Command::new("sh") + .args(["-c", &format!("command -v {name}")]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await + .map_err(|e| exec(format!("spawn `command -v`: {e}")))?; + if status.success() { + return Ok(()); + } + Err(exec(format!( + "missing required tool: `{name}` ({what_for}) is not on PATH.\n \ + Fix: install it one-time on this host. On Arch: `sudo pacman -S {arch_pkg}`. \ + On Debian/Ubuntu: `sudo apt install {deb_pkg}`. On Fedora: `sudo dnf install {rpm_pkg}`.", + arch_pkg = arch_package_for(name), + deb_pkg = deb_package_for(name), + rpm_pkg = rpm_package_for(name), + ))) +} + +fn arch_package_for(tool: &str) -> String { + match tool { + "virsh" => "libvirt", + "qemu-img" => "qemu-img", + "qemu-system-aarch64" => "qemu-system-aarch64", + "xorriso" => "libisoburn", + "python3" => "python", + "ssh-keygen" => "openssh", + _ => return tool.to_string(), + } + .to_string() +} + +fn deb_package_for(tool: &str) -> String { + match tool { + "virsh" => "libvirt-clients", + "qemu-img" => "qemu-utils", + "qemu-system-aarch64" => "qemu-system-arm", + "xorriso" => "xorriso", + "python3" => "python3 python3-venv", + "ssh-keygen" => "openssh-client", + _ => return tool.to_string(), + } + .to_string() +} + +fn rpm_package_for(tool: &str) -> String { + match tool { + "virsh" => "libvirt-client", + "qemu-img" => "qemu-img", + "qemu-system-aarch64" => "qemu-system-aarch64", + "xorriso" => "xorriso", + "python3" => "python3 python3-pip", + "ssh-keygen" => "openssh-clients", + _ => return tool.to_string(), + } + .to_string() +} + +async fn check_libvirt_group_membership() -> Result<(), ExecutorError> { + let output = Command::new("id") + .arg("-Gn") + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .output() + .await + .map_err(|e| exec(format!("spawn id: {e}")))?; + let groups = String::from_utf8_lossy(&output.stdout); + if groups.split_whitespace().any(|g| g == "libvirt") { + return Ok(()); + } + Err(exec( + "current user is not in the `libvirt` group.\n \ + Fix: `sudo usermod -aG libvirt $USER` and then log out + back in (or `newgrp libvirt` \ + for this shell). Needed so Harmony can manage a user-owned libvirt storage pool \ + without sudo.", + )) +} + +async fn check_libvirt_default_network() -> Result<(), ExecutorError> { + let output = Command::new("virsh") + .args(["--connect", "qemu:///system", "net-info", "default"]) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output() + .await + .map_err(|e| exec(format!("spawn virsh: {e}")))?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(exec(format!( + "libvirt `default` network is missing or unreachable: {}.\n \ + Fix: ensure libvirtd is running (`sudo systemctl enable --now libvirtd`), \ + then `sudo virsh net-define /usr/share/libvirt/networks/default.xml` and \ + `sudo virsh net-start default && sudo virsh net-autostart default`.", + stderr.trim() + ))); + } + let info = String::from_utf8_lossy(&output.stdout); + if !info.lines().any(|l| l.trim() == "Active: yes") { + return Err(exec( + "libvirt `default` network exists but is not active.\n \ + Fix: `sudo virsh net-start default && sudo virsh net-autostart default`.", + )); + } + Ok(()) +} + +fn exec(msg: impl Into) -> ExecutorError { + ExecutorError::UnexpectedError(msg.into()) +} diff --git a/harmony/src/modules/iot/setup_score.rs b/harmony/src/modules/iot/setup_score.rs index 8692cf32..9f59cf70 100644 --- a/harmony/src/modules/iot/setup_score.rs +++ b/harmony/src/modules/iot/setup_score.rs @@ -16,8 +16,9 @@ use crate::domain::interpret::{ }; use crate::domain::inventory::Inventory; use crate::domain::topology::{ - ChangeReport, FileSource, FileSpec, HostConfigurationProvider, SystemdScope, SystemdUnitSpec, - Topology, UserSpec, + ChangeReport, FileDelivery, FileSource, FileSpec, HostReachable, LinuxHostConfiguration, + PackageInstaller, SystemdManager, SystemdScope, SystemdUnitSpec, Topology, UnixUserManager, + UserSpec, }; use crate::score::Score; @@ -33,9 +34,12 @@ use crate::score::Score; /// fleet partitions once group routing lands. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct IotDeviceSetupConfig { - /// Stable device identifier. Written into the agent's TOML and used - /// as the KV key prefix (`.`). - pub device_id: String, + /// Stable device identifier. Written into the agent's TOML and + /// used as the KV key prefix (`.`). Harmony + /// `Id` values are sortable-by-creation-time and collision-safe + /// at up to ~10k devices/sec, which matches the feel of a fleet + /// registry. + pub device_id: Id, /// Fleet partition this device belongs to. pub group: String, /// NATS URLs the agent should connect to. Typically one entry. @@ -53,41 +57,38 @@ pub struct IotDeviceSetupConfig { impl IotDeviceSetupConfig { /// Render the agent's `/etc/iot-agent/config.toml` content. pub fn render_toml(&self) -> String { - let mut out = String::new(); - out.push_str("[agent]\n"); - out.push_str(&format!( - "device_id = \"{}\"\n", - toml_escape(&self.device_id) - )); - out.push_str(&format!("group = \"{}\"\n", toml_escape(&self.group))); - out.push('\n'); - out.push_str("[credentials]\n"); - out.push_str("type = \"toml-shared\"\n"); - out.push_str(&format!( - "nats_user = \"{}\"\n", - toml_escape(&self.nats_user) - )); - out.push_str(&format!( - "nats_pass = \"{}\"\n", - toml_escape(&self.nats_pass) - )); - out.push('\n'); - out.push_str("[nats]\n"); - out.push_str("urls = ["); - for (i, url) in self.nats_urls.iter().enumerate() { - if i > 0 { - out.push_str(", "); - } - out.push_str(&format!("\"{}\"", toml_escape(url))); - } - out.push_str("]\n"); - out + // Raw-string template with format! — the TOML escape rules for + // double-quoted strings are just `\` and `"`, handled by + // [`toml_escape`]. + let device_id = toml_escape(&self.device_id.to_string()); + let group = toml_escape(&self.group); + let nats_user = toml_escape(&self.nats_user); + let nats_pass = toml_escape(&self.nats_pass); + let urls = self + .nats_urls + .iter() + .map(|u| format!("\"{}\"", toml_escape(u))) + .collect::>() + .join(", "); + format!( + r#"[agent] +device_id = "{device_id}" +group = "{group}" + +[credentials] +type = "toml-shared" +nats_user = "{nats_user}" +nats_pass = "{nats_pass}" + +[nats] +urls = [{urls}] +"# + ) } /// Render the systemd unit file content. - pub fn render_systemd_unit(&self) -> String { - String::from( - "[Unit] + pub fn render_systemd_unit(&self) -> &'static str { + r#"[Unit] Description=IoT Agent (Harmony) After=network-online.target Wants=network-online.target @@ -105,8 +106,7 @@ StandardError=journal [Install] WantedBy=multi-user.target -", - ) +"# } } @@ -125,7 +125,7 @@ impl IotDeviceSetupScore { } } -impl Score for IotDeviceSetupScore { +impl Score for IotDeviceSetupScore { fn name(&self) -> String { format!("IotDeviceSetupScore({})", self.config.device_id) } @@ -147,7 +147,7 @@ struct IotDeviceSetupInterpret { } #[async_trait] -impl Interpret for IotDeviceSetupInterpret { +impl Interpret for IotDeviceSetupInterpret { fn get_name(&self) -> InterpretName { InterpretName::IotDeviceSetup } @@ -167,13 +167,15 @@ impl Interpret for IotDeviceSetupInt topology: &T, ) -> Result { let cfg = &self.config; - topology.ping().await.map_err(wrap)?; + HostReachable::ping(topology).await.map_err(wrap)?; let mut change_log: Vec = Vec::new(); // 1. Dependencies. for pkg in ["podman", "systemd-container"] { - let r = topology.ensure_package(pkg).await.map_err(wrap)?; + let r = PackageInstaller::ensure_package(topology, pkg) + .await + .map_err(wrap)?; log_change(&mut change_log, format!("package:{pkg}"), r); } @@ -191,16 +193,19 @@ impl Interpret for IotDeviceSetupInt system: true, create_home: true, }; - let r = topology.ensure_user(&user_spec).await.map_err(wrap)?; + let r = UnixUserManager::ensure_user(topology, &user_spec) + .await + .map_err(wrap)?; log_change(&mut change_log, "user:iot-agent", r); - let r = topology.ensure_linger("iot-agent").await.map_err(wrap)?; + let r = UnixUserManager::ensure_linger(topology, "iot-agent") + .await + .map_err(wrap)?; log_change(&mut change_log, "linger:iot-agent", r); // 3. User-scoped podman socket. Required by `PodmanTopology` on // the agent so it reaches /run/user//podman/podman.sock. - let r = topology - .ensure_user_unit_active("iot-agent", "podman.socket") + let r = SystemdManager::ensure_user_unit_active(topology, "iot-agent", "podman.socket") .await .map_err(wrap)?; log_change(&mut change_log, "user-unit:podman.socket", r); @@ -210,16 +215,18 @@ impl Interpret for IotDeviceSetupInt // content over SFTP and reports `changed: true` only when the // remote file actually differs from the local one — so // re-running this Score without a new binary is a true NOOP. - let binary_r = topology - .ensure_file(&FileSpec { + let binary_r = FileDelivery::ensure_file( + topology, + &FileSpec { path: "/usr/local/bin/iot-agent".to_string(), source: FileSource::LocalPath(cfg.agent_binary_path.clone()), owner: Some("root".to_string()), group: Some("root".to_string()), mode: Some(0o755), - }) - .await - .map_err(wrap)?; + }, + ) + .await + .map_err(wrap)?; log_change(&mut change_log, "file:/usr/local/bin/iot-agent", binary_r); // 5. /etc/iot-agent/ + config.toml @@ -231,24 +238,27 @@ impl Interpret for IotDeviceSetupInt group: Some("iot-agent".to_string()), mode: Some(0o600), }; - let toml_r = topology.ensure_file(&toml_spec).await.map_err(wrap)?; + let toml_r = FileDelivery::ensure_file(topology, &toml_spec) + .await + .map_err(wrap)?; log_change(&mut change_log, "file:/etc/iot-agent/config.toml", toml_r); // 6. systemd unit for the agent itself. let unit = SystemdUnitSpec { name: "iot-agent".to_string(), - unit_content: cfg.render_systemd_unit(), + unit_content: cfg.render_systemd_unit().to_string(), scope: SystemdScope::System, start_immediately: true, }; - let unit_r = topology.ensure_systemd_unit(&unit).await.map_err(wrap)?; + let unit_r = SystemdManager::ensure_systemd_unit(topology, &unit) + .await + .map_err(wrap)?; log_change(&mut change_log, "unit:iot-agent", unit_r); // 7. Restart the agent iff anything that affects it changed. let needs_restart = toml_r.changed || unit_r.changed || binary_r.changed; if needs_restart { - topology - .restart_service("iot-agent", SystemdScope::System) + SystemdManager::restart_service(topology, "iot-agent", SystemdScope::System) .await .map_err(wrap)?; change_log.push("restart:iot-agent".to_string()); diff --git a/harmony/src/modules/iot/vm_score.rs b/harmony/src/modules/iot/vm_score.rs new file mode 100644 index 00000000..65b56e16 --- /dev/null +++ b/harmony/src/modules/iot/vm_score.rs @@ -0,0 +1,93 @@ +//! [`ProvisionVmScore`] — Harmony Score wrapping +//! [`VirtualMachineHost::ensure_vm`]. +//! +//! The Score itself has no knowledge of the hypervisor or how +//! first-boot configuration is delivered to the guest (cloud-init +//! seed ISO, OVF properties, Proxmox APIs — all hypervisor +//! concerns). It takes a generic `VirtualMachineSpec`, calls the +//! topology's `VirtualMachineHost` capability, and returns the +//! runtime info the caller needs to SSH in. + +use async_trait::async_trait; +use harmony_types::id::Id; +use serde::{Deserialize, Serialize}; + +use crate::data::Version; +use crate::domain::interpret::{ + Interpret, InterpretError, InterpretName, InterpretStatus, Outcome, +}; +use crate::domain::inventory::Inventory; +use crate::domain::topology::{Topology, VirtualMachineHost, VirtualMachineSpec}; +use crate::score::Score; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProvisionVmScore { + pub spec: VirtualMachineSpec, +} + +impl Score for ProvisionVmScore { + fn name(&self) -> String { + format!("ProvisionVmScore({})", self.spec.name) + } + + fn create_interpret(&self) -> Box> { + Box::new(ProvisionVmInterpret { + spec: self.spec.clone(), + version: Version::from("0.1.0").expect("static version"), + status: InterpretStatus::QUEUED, + }) + } +} + +#[derive(Debug)] +struct ProvisionVmInterpret { + spec: VirtualMachineSpec, + version: Version, + status: InterpretStatus, +} + +#[async_trait] +impl Interpret for ProvisionVmInterpret { + fn get_name(&self) -> InterpretName { + InterpretName::KvmVm + } + fn get_version(&self) -> Version { + self.version.clone() + } + fn get_status(&self) -> InterpretStatus { + self.status.clone() + } + fn get_children(&self) -> Vec { + vec![] + } + + async fn execute( + &self, + _inventory: &Inventory, + topology: &T, + ) -> Result { + let info = topology + .ensure_vm(&self.spec) + .await + .map_err(|e| InterpretError::new(format!("ensure_vm: {e}")))?; + + let mut details = vec![ + format!("hypervisor={}", info.hypervisor), + format!("name={}", info.name), + ]; + if let Some(ip) = info.ip { + details.push(format!("ip={ip}")); + } + Ok(Outcome::success_with_details( + format!( + "VM {} up on {} ({})", + info.name, + info.hypervisor, + info.ip + .map(|i| i.to_string()) + .unwrap_or_else(|| "no-ip".to_string()) + ), + details, + )) + } +} diff --git a/harmony/src/modules/kvm/cloudinit.rs b/harmony/src/modules/kvm/cloudinit.rs index 8e764369..4b1031fe 100644 --- a/harmony/src/modules/kvm/cloudinit.rs +++ b/harmony/src/modules/kvm/cloudinit.rs @@ -1,16 +1,34 @@ //! Cloud-init seed ISO generation. //! -//! **Scope.** This is a convenience for the *VM-as-device test rig*. Real -//! customer Pi deployments will not use cloud-init — they use rpi-imager's -//! preconfigure flow, a PXE-boot appliance, or an equivalent OEM mechanism. -//! Keep this helper tucked inside the KVM module to signal it's a test-rig -//! concern, not a customer-facing capability. +//! # Why customize the VM at all? //! -//! What it does: given a hostname + one authorized SSH key + an optional -//! privileged user, writes `user-data` and `meta-data` files, wraps them -//! in an ISO 9660 volume labeled `CIDATA`, and returns the ISO path. That -//! ISO is attached as a second CD-ROM on the VM; cloud-init on first boot -//! reads it and applies the configuration. +//! Ubuntu cloud images ship with no default login — no password, +//! keys-only SSH, no authorized keys. That's the right posture for a +//! cloud image but it means a freshly-booted VM is *unreachable* until +//! we tell it who to trust. Every programmatic VM provisioning story +//! customizes somehow. The menu: +//! +//! 1. **Cloud-init via seed ISO (what we do).** Attach a second CD-ROM +//! labeled `CIDATA` carrying `user-data` + `meta-data` files. +//! cloud-init's NoCloud datasource reads it on first boot. +//! Requires `xorriso` (one 2MB package, in every distro's repo). +//! 2. **Cloud-init via NoCloud-net HTTP.** Run an in-process HTTP +//! server; pass its URL to the VM via SMBIOS. No extra system +//! dep, but ~100 lines of server/lifecycle code, and the SMBIOS +//! stanza has to live in domain XML. Good future option if xorriso +//! becomes painful. +//! 3. **virt-customize / libguestfs.** Rewrites the qcow2 pre-boot +//! to inject the key directly into /root/.ssh/authorized_keys. +//! Heavier dep (libguestfs is ~100MB) and has SELinux/perm quirks +//! on several distros. Out of scope. +//! 4. **Pre-baked image with known creds.** Mints its own maintenance +//! burden (rebuild on every Ubuntu point release). Out of scope. +//! +//! We picked (1) because `xorriso` is the lightest dep that delivers a +//! stable, standard mechanism (cloud-init NoCloud datasource is in the +//! official cloud-init spec). Keep this helper tucked inside the KVM +//! module to signal it's a KVM-impl concern, not a customer-facing +//! capability — the generic `VirtualMachineHost` abstraction hides it. use std::path::{Path, PathBuf}; use std::process::Stdio; @@ -28,19 +46,16 @@ pub struct CloudInitSeedConfig<'a> { /// Public SSH key (openssh format, single line) that the guest will /// authorize for the `user` account. pub authorized_key: &'a str, - /// Local username to create with passwordless sudo. Cloud-init's - /// `default` user on Ubuntu images is `ubuntu`; for clarity we create - /// an explicit one so the agent + setup score don't depend on distro - /// defaults. + /// Local username to create with passwordless sudo. pub user: &'a str, - /// Extra `runcmd` lines to append to the user-data. Mostly useful for - /// no-op debugging; keep empty in production paths. + /// Extra `runcmd` lines to append to the user-data. Mostly useful + /// for no-op debugging; keep empty in production paths. pub extra_runcmd: Vec, } /// Write a seed ISO to `output_dir/-seed.iso`. Uses `xorriso -/// -as mkisofs` under the hood; if `xorriso` is not on PATH this returns -/// a clear error asking for it. +/// -as mkisofs` under the hood; if `xorriso` is not on PATH this +/// returns a clear error asking for it. pub async fn build_seed_iso( cfg: &CloudInitSeedConfig<'_>, output_dir: &Path, @@ -54,19 +69,17 @@ pub async fn build_seed_iso( } let workdir = tempdir().map_err(KvmError::Io)?; - let user_data = render_user_data(cfg); + // Fresh instance-id on every seed build. Cloud-init treats a new - // instance-id as a "first boot": it re-runs all of its per-instance - // modules. This is what makes the KvmVmScore repeatable against a - // reused overlay disk — without it, the second boot would skip all - // our user/hostname/ssh configuration because cloud-init cached the - // previous run under the same id. + // instance-id as a "first boot": it re-runs all of its per- + // instance modules. This is what makes ensure_vm repeatable + // against a reused overlay disk — without it, the second boot + // would skip all our user/hostname/ssh configuration because + // cloud-init cached the previous run under the same id. let instance_id = uuid::Uuid::new_v4(); let meta_data = format!( - "instance-id: {instance_id} -local-hostname: {hostname} -", + "instance-id: {instance_id}\nlocal-hostname: {hostname}\n", hostname = cfg.hostname ); @@ -79,8 +92,8 @@ local-hostname: {hostname} let output_path = output_dir.join(format!("{}-seed.iso", cfg.hostname)); // xorriso refuses to overwrite a pre-existing output file cleanly - // (it treats it as input "media"), so remove it first. Our seed is - // regenerated from config every run, which is the intended + // (it treats it as input "media"), so remove it first. Our seed + // is regenerated from config every run, which is the intended // behaviour — the file is a build artifact, not state. if output_path.exists() { tokio::fs::remove_file(&output_path) @@ -88,22 +101,23 @@ local-hostname: {hostname} .map_err(KvmError::Io)?; } - // Use `.output()` (not `.status()`) so we actually drain stderr — a - // piped stderr that isn't read deadlocks xorriso once the pipe fills - // up, and in practice the kernel surfaces that as a SIGPIPE on the - // child. Keep stderr piped so failure diagnostics make it into the - // error message. + // Use `.output()` (not `.status()`) so we actually drain stderr — + // a piped stderr that isn't read deadlocks xorriso once the pipe + // fills up. Keep stderr piped so failure diagnostics make it + // into the error message. let output = Command::new("xorriso") - .arg("-as") - .arg("mkisofs") - .arg("-output") - .arg(&output_path) - .arg("-volid") - .arg("CIDATA") - .arg("-joliet") - .arg("-rock") - .arg(workdir.path().join("user-data")) - .arg(workdir.path().join("meta-data")) + .args([ + "-as".as_ref(), + "mkisofs".as_ref(), + "-output".as_ref(), + output_path.as_os_str(), + "-volid".as_ref(), + "CIDATA".as_ref(), + "-joliet".as_ref(), + "-rock".as_ref(), + workdir.path().join("user-data").as_os_str(), + workdir.path().join("meta-data").as_os_str(), + ]) .stdout(Stdio::null()) .stderr(Stdio::piped()) .output() @@ -121,27 +135,34 @@ local-hostname: {hostname} } fn render_user_data(cfg: &CloudInitSeedConfig<'_>) -> String { - let mut s = String::new(); - s.push_str("#cloud-config\n"); - s.push_str(&format!("hostname: {}\n", cfg.hostname)); - s.push_str(&format!("fqdn: {}.local\n", cfg.hostname)); - s.push_str("manage_etc_hosts: true\n"); - s.push_str("users:\n"); - s.push_str(&format!(" - name: {}\n", cfg.user)); - s.push_str(" sudo: ALL=(ALL) NOPASSWD:ALL\n"); - s.push_str(" shell: /bin/bash\n"); - s.push_str(" lock_passwd: true\n"); - s.push_str(" ssh_authorized_keys:\n"); - s.push_str(&format!(" - {}\n", cfg.authorized_key)); - s.push_str("ssh_pwauth: false\n"); - s.push_str("disable_root: true\n"); - if !cfg.extra_runcmd.is_empty() { - s.push_str("runcmd:\n"); + let runcmd = if cfg.extra_runcmd.is_empty() { + String::new() + } else { + let mut s = String::from("runcmd:\n"); for line in &cfg.extra_runcmd { - s.push_str(&format!(" - {}\n", line)); + s.push_str(&format!(" - {line}\n")); } - } - s + s + }; + format!( + r#"#cloud-config +hostname: {hostname} +fqdn: {hostname}.local +manage_etc_hosts: true +users: + - name: {user} + sudo: ALL=(ALL) NOPASSWD:ALL + shell: /bin/bash + lock_passwd: true + ssh_authorized_keys: + - {authorized_key} +ssh_pwauth: false +disable_root: true +{runcmd}"#, + hostname = cfg.hostname, + user = cfg.user, + authorized_key = cfg.authorized_key, + ) } async fn write_file(path: &Path, content: &str) -> Result<(), KvmError> { diff --git a/harmony/src/modules/kvm/firmware.rs b/harmony/src/modules/kvm/firmware.rs new file mode 100644 index 00000000..9491f2eb --- /dev/null +++ b/harmony/src/modules/kvm/firmware.rs @@ -0,0 +1,205 @@ +//! UEFI firmware discovery for aarch64 guests. +//! +//! Libvirt needs two paths to boot an aarch64 VM via UEFI (there +//! is no equivalent of SeaBIOS for arm64 — the virt machine type +//! has no legacy chipset): +//! +//! - **CODE** — read-only firmware image (shared across all VMs). +//! - **VARS template** — writable NVRAM prototype. Per-VM, libvirt +//! reads-then-copies this on first definition. +//! +//! Every major distro ships the AAVMF firmware under a different +//! path. [`discover_aarch64_firmware`] walks a known list and +//! returns the first viable pair, or an `ExecutorError` with the +//! package-install command for each supported distro. + +use std::path::{Path, PathBuf}; + +use crate::executors::ExecutorError; + +/// A code + vars-template firmware pair usable by libvirt's +/// `` + `` domain elements. +/// +/// Both paths must be readable by libvirt-qemu. The `vars_template` +/// path is read-only; per-VM writable NVRAM is produced by copying +/// it to a per-domain location — see [`copy_vars_template_for_vm`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AarchFirmware { + pub code: PathBuf, + pub vars_template: PathBuf, +} + +/// Ordered candidates for the CODE + VARS pair, per distro. Each +/// tuple is checked left-to-right — the first whose both files +/// exist wins. Ordering puts Arch first (this workshop host) but +/// callers shouldn't depend on the order beyond "match what the +/// host actually ships." +const CANDIDATES: &[(&str, &str)] = &[ + // Arch Linux — current edk2-armvirt (package installs both a + // canonical copy under /usr/share/edk2/aarch64 and a compatibility + // copy under /usr/share/edk2-armvirt/aarch64; either is fine). + ( + "/usr/share/edk2/aarch64/QEMU_EFI.fd", + "/usr/share/edk2/aarch64/QEMU_VARS.fd", + ), + ( + "/usr/share/edk2-armvirt/aarch64/QEMU_EFI.fd", + "/usr/share/edk2-armvirt/aarch64/QEMU_VARS.fd", + ), + // Older Arch / AUR edk2-armvirt naming — vars template as a + // raw pflash image rather than an .fd wrapper. + ( + "/usr/share/edk2-armvirt/aarch64/QEMU_EFI.fd", + "/usr/share/edk2-armvirt/aarch64/vars-template-pflash.raw", + ), + // Arch Linux — hypothetical CODE/VARS naming. Some newer edk2 + // builds split CODE vs EFI; keep this candidate so we don't + // regress if upstream renames. + ( + "/usr/share/edk2/aarch64/QEMU_CODE.fd", + "/usr/share/edk2/aarch64/QEMU_VARS.fd", + ), + // Debian / Ubuntu (qemu-efi-aarch64 package). + ( + "/usr/share/AAVMF/AAVMF_CODE.fd", + "/usr/share/AAVMF/AAVMF_VARS.fd", + ), + // Fedora / RHEL (edk2-aarch64 package). + ( + "/usr/share/edk2/aarch64/QEMU_EFI-pflash.raw", + "/usr/share/edk2/aarch64/vars-template-pflash.raw", + ), +]; + +pub fn discover_aarch64_firmware() -> Result { + for (code, vars) in CANDIDATES { + let code_path = Path::new(code); + let vars_path = Path::new(vars); + if code_path.is_file() && vars_path.is_file() { + return Ok(AarchFirmware { + code: code_path.to_path_buf(), + vars_template: vars_path.to_path_buf(), + }); + } + } + Err(ExecutorError::UnexpectedError( + "no aarch64 UEFI firmware found. Install it one-time:\n \ + Arch: `sudo pacman -S edk2-armvirt`\n \ + Debian/Ubuntu: `sudo apt install qemu-efi-aarch64`\n \ + Fedora: `sudo dnf install edk2-aarch64`\n\ + Checked paths (first pair to have both files wins):\n \ + - /usr/share/edk2/aarch64/QEMU_EFI.fd + QEMU_VARS.fd\n \ + - /usr/share/edk2-armvirt/aarch64/QEMU_EFI.fd + QEMU_VARS.fd\n \ + - /usr/share/edk2-armvirt/aarch64/QEMU_EFI.fd + vars-template-pflash.raw\n \ + - /usr/share/edk2/aarch64/QEMU_{CODE,VARS}.fd\n \ + - /usr/share/AAVMF/AAVMF_{CODE,VARS}.fd\n \ + - /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw + vars-template-pflash.raw" + .to_string(), + )) +} + +/// Copy `firmware.vars_template` to `dest` so libvirt-qemu has a +/// writable per-VM NVRAM. Overwrites `dest` if present — on a +/// reused VM name we want fresh NVRAM. The file is chmod 0644 so +/// libvirt-qemu's dynamic ownership chown on VM start works. +/// +/// VARS templates are already 64 MiB on every distro we support +/// (they're sized for the pflash region), so no padding is needed +/// here — unlike [`ensure_code_pflash_padded`] for the CODE side. +pub async fn copy_vars_template_for_vm( + firmware: &AarchFirmware, + dest: &Path, +) -> Result<(), ExecutorError> { + use std::os::unix::fs::PermissionsExt; + tokio::fs::copy(&firmware.vars_template, dest) + .await + .map_err(|e| { + ExecutorError::UnexpectedError(format!( + "copy AAVMF vars template {:?} → {dest:?}: {e}", + firmware.vars_template + )) + })?; + tokio::fs::set_permissions(dest, std::fs::Permissions::from_mode(0o644)) + .await + .map_err(|e| ExecutorError::UnexpectedError(format!("chmod {dest:?}: {e}")))?; + Ok(()) +} + +/// QEMU's `virt` machine wires pflash unit 0 as a CFI flash device +/// of fixed size 64 MiB. When libvirt's `` +/// points at a file smaller than that, qemu refuses to start: +/// +/// cfi.pflash01 device '/machine/virt.flash0' requires 67108864 +/// bytes, block backend provides 3145728 bytes +/// +/// Different distros ship the CODE firmware differently: +/// +/// - **Pre-padded** (upstream QEMU `pc-bios/edk2-aarch64-code.fd`, +/// Ubuntu `qemu-efi-aarch64`): file is 64 MiB, zero-padded at the +/// tail. Works as-is with `-drive if=pflash`. +/// - **Raw edk2 build output** (Arch `edk2-aarch64` 202508+): file +/// is ~2-4 MiB, just the firmware volume without pflash padding. +/// Has to be padded before libvirt will accept it. +/// +/// [`ensure_code_pflash_padded`] produces a 64 MiB cached copy at +/// `cache_path` when the source is smaller than the pflash region, +/// and reuses it when it already exists with the right size. When +/// the source is already 64 MiB, this returns it unchanged — no +/// copy, no bytes moved. +pub const AARCH64_PFLASH_BYTES: u64 = 64 * 1024 * 1024; + +pub async fn ensure_code_pflash_padded( + source: &Path, + cache_path: &Path, +) -> Result { + let src_meta = tokio::fs::metadata(source).await.map_err(|e| { + ExecutorError::UnexpectedError(format!("stat firmware code {source:?}: {e}")) + })?; + if src_meta.len() == AARCH64_PFLASH_BYTES { + return Ok(source.to_path_buf()); + } + if src_meta.len() > AARCH64_PFLASH_BYTES { + return Err(ExecutorError::UnexpectedError(format!( + "firmware code {source:?} is {} bytes, larger than the 64 MiB pflash \ + region QEMU's virt machine provides. This firmware pair is not \ + usable for the aarch64 virt machine type.", + src_meta.len() + ))); + } + // Source is under 64 MiB — needs padding. If the cache already + // holds a correctly-sized copy newer than the source, reuse it. + if let Ok(cache_meta) = tokio::fs::metadata(cache_path).await + && cache_meta.len() == AARCH64_PFLASH_BYTES + && let Ok(cache_mtime) = cache_meta.modified() + && let Ok(src_mtime) = src_meta.modified() + && cache_mtime >= src_mtime + { + return Ok(cache_path.to_path_buf()); + } + + if let Some(parent) = cache_path.parent() { + tokio::fs::create_dir_all(parent).await.map_err(|e| { + ExecutorError::UnexpectedError(format!("create firmware cache dir {parent:?}: {e}")) + })?; + } + tokio::fs::copy(source, cache_path).await.map_err(|e| { + ExecutorError::UnexpectedError(format!( + "copy firmware code {source:?} → {cache_path:?}: {e}" + )) + })?; + let file = tokio::fs::OpenOptions::new() + .write(true) + .open(cache_path) + .await + .map_err(|e| ExecutorError::UnexpectedError(format!("open {cache_path:?} for pad: {e}")))?; + file.set_len(AARCH64_PFLASH_BYTES).await.map_err(|e| { + ExecutorError::UnexpectedError(format!( + "pad {cache_path:?} to {AARCH64_PFLASH_BYTES} bytes: {e}" + )) + })?; + use std::os::unix::fs::PermissionsExt; + tokio::fs::set_permissions(cache_path, std::fs::Permissions::from_mode(0o644)) + .await + .map_err(|e| ExecutorError::UnexpectedError(format!("chmod {cache_path:?}: {e}")))?; + Ok(cache_path.to_path_buf()) +} diff --git a/harmony/src/modules/kvm/mod.rs b/harmony/src/modules/kvm/mod.rs index e402b215..c686f6ef 100644 --- a/harmony/src/modules/kvm/mod.rs +++ b/harmony/src/modules/kvm/mod.rs @@ -4,16 +4,17 @@ pub mod cloudinit; pub mod config; pub mod error; pub mod executor; +pub mod firmware; pub mod topology; pub mod types; -pub mod vm_score; pub use cloudinit::{CloudInitSeedConfig, build_seed_iso}; pub use error::KvmError; pub use executor::KvmExecutor; -pub use topology::{KvmHost, KvmHostTopology}; +pub use firmware::{AarchFirmware, copy_vars_template_for_vm, discover_aarch64_firmware}; +pub use topology::{DEFAULT_ADMIN_USER, KvmVirtualMachineHost}; pub use types::{ BootDevice, CdromConfig, DhcpHost, DiskConfig, ForwardMode, NetworkConfig, - NetworkConfigBuilder, NetworkRef, VmConfig, VmConfigBuilder, VmInterface, VmStatus, + NetworkConfigBuilder, NetworkRef, UefiFirmware, VmArchitecture, VmConfig, VmConfigBuilder, + VmInterface, VmStatus, }; -pub use vm_score::{CloudInitVmConfig, KvmVmScore}; diff --git a/harmony/src/modules/kvm/topology.rs b/harmony/src/modules/kvm/topology.rs index 74eeb11a..c0f30c67 100644 --- a/harmony/src/modules/kvm/topology.rs +++ b/harmony/src/modules/kvm/topology.rs @@ -1,61 +1,413 @@ +//! KVM-backed implementation of [`VirtualMachineHost`]. +//! +//! `KvmVirtualMachineHost` wraps a [`KvmExecutor`] (libvirt +//! connection) + the Harmony-managed libvirt storage pool and +//! translates generic `VirtualMachineSpec` requests into concrete +//! libvirt domain definitions. Cloud-init is an implementation +//! detail here — callers never see it. + +use std::net::IpAddr; +use std::path::PathBuf; +use std::process::Stdio; + use async_trait::async_trait; +use log::info; +use tokio::process::Command; -use crate::domain::topology::{PreparationError, PreparationOutcome, Topology}; +use crate::domain::topology::{ + PreparationError, PreparationOutcome, Topology, VirtualMachineHost, VirtualMachineRuntimeInfo, + VirtualMachineSpec, VmArchitecture, VmFirstBootConfig, VmState, +}; +use crate::executors::ExecutorError; +use super::cloudinit::{CloudInitSeedConfig, build_seed_iso}; +use super::error::KvmError; use super::executor::KvmExecutor; +use super::firmware::{ + copy_vars_template_for_vm, discover_aarch64_firmware, ensure_code_pflash_padded, +}; +use super::types::{BootDevice, CdromConfig, DiskConfig, NetworkRef, UefiFirmware, VmConfig}; -/// Capability: access to a libvirt-reachable KVM hypervisor. +pub const DEFAULT_ADMIN_USER: &str = "harmony-admin"; + +/// Libvirt/KVM hypervisor host, implementing the generic +/// [`VirtualMachineHost`] capability. /// -/// Intentionally tool-shaped rather than industry-shaped (compare to the -/// `PostgreSQL` exception in CLAUDE.md's capability doctrine): any Score -/// that wants to provision a VM cares about hypervisor specifics like -/// storage pools and network bridges — there isn't an honest tool-neutral -/// abstraction to hide behind. When we want federation over heterogeneous -/// hypervisors (KVM + VMware + cloud provider), a higher-level -/// `VirtualMachineHost` capability can be introduced then, and we'll -/// either implement it *in terms of* `KvmHost` or drop `KvmHost` -/// altogether. -pub trait KvmHost { - /// Access the libvirt executor used to drive this hypervisor. - fn kvm_executor(&self) -> &KvmExecutor; -} - -/// Concrete Topology wrapping a single KVM hypervisor reachable via -/// libvirt. Implements [`KvmHost`] directly. -pub struct KvmHostTopology { +/// Composes with a caller-chosen storage pool directory where per-VM +/// overlays + seed ISOs are placed. Harmony's IoT workflows use +/// [`crate::modules::iot::ensure_harmony_iot_pool`] to populate that +/// dir; other callers can point at any user-owned libvirt pool root. +pub struct KvmVirtualMachineHost { name: String, executor: KvmExecutor, + pool_name: String, + pool_path: PathBuf, + base_image_path: PathBuf, } -impl KvmHostTopology { - pub fn new(name: impl Into, executor: KvmExecutor) -> Self { +impl KvmVirtualMachineHost { + pub fn new( + topology_name: impl Into, + executor: KvmExecutor, + pool_name: impl Into, + pool_path: PathBuf, + base_image_path: PathBuf, + ) -> Self { Self { - name: name.into(), + name: topology_name.into(), executor, + pool_name: pool_name.into(), + pool_path, + base_image_path, } } + + pub fn executor(&self) -> &KvmExecutor { + &self.executor + } } #[async_trait] -impl Topology for KvmHostTopology { +impl Topology for KvmVirtualMachineHost { fn name(&self) -> &str { &self.name } async fn ensure_ready(&self) -> Result { - // The executor holds the URI — a cheap hypervisor-version query is - // the libvirt equivalent of our `podman info` ping. Not adding a - // dedicated method to KvmExecutor for v0; connection opens are - // lazy per-call, so we rely on the first real Score call to - // surface any connection issue. Callers that want an explicit - // preflight should issue a trivial op (e.g. vm_exists("probe")) - // before committing to bigger work. + // TODO(ROADMAP 12.1 — Phased topology): `ensure_ready` is + // called eagerly by Maestro before any Score in this run has + // declared whether it actually needs KVM. A hypervisor-version + // probe here would force every Harmony invocation that merely + // *mentions* this topology to pay the libvirt connect cost, + // even when the current run only touches unrelated capabilities. + // The phased-topology work (§12.1) will let us defer this to + // when a KVM-consuming Score actually runs. Until then, keep + // `ensure_ready` as a Noop and rely on the first `ensure_vm` + // call to surface any libvirt connectivity problem. Ok(PreparationOutcome::Noop) } } -impl KvmHost for KvmHostTopology { - fn kvm_executor(&self) -> &KvmExecutor { - &self.executor +#[async_trait] +impl VirtualMachineHost for KvmVirtualMachineHost { + async fn list_vms(&self) -> Result, ExecutorError> { + // The current KvmExecutor API doesn't expose list-domains; add + // it when a caller needs it. For the IoT walking skeleton we + // only ever touch our own VMs by name, so this can stay + // unimplemented for now without blocking anything. + Err(ExecutorError::UnexpectedError( + "KvmVirtualMachineHost::list_vms is not implemented yet".to_string(), + )) + } + + async fn ensure_vm( + &self, + spec: &VirtualMachineSpec, + ) -> Result { + let vm_already_exists = self + .executor + .vm_exists(&spec.name) + .await + .map_err(|e| exec(format!("vm_exists: {e}")))?; + + // Per-VM overlay backed by the cached base image. Wiped and + // recreated whenever the VM doesn't already exist, so a + // destroyed-then-re-ensured VM always gets a clean rootfs. + let overlay_path = self.pool_path.join(format!("{}.qcow2", spec.name)); + if !vm_already_exists { + if overlay_path.exists() { + tokio::fs::remove_file(&overlay_path) + .await + .map_err(|e| exec(format!("remove stale overlay: {e}")))?; + } + create_overlay(&self.base_image_path, &overlay_path).await?; + info!( + "created overlay disk {overlay_path:?} backed by {:?}", + self.base_image_path + ); + refresh_pool(&self.pool_name).await?; + } + + // First-boot seed ISO (cloud-init NoCloud) iff requested. + // `seed_iso_path` stays `None` when no first-boot config was + // provided — the VM boots whatever the backing image is + // configured to boot into. + let seed_iso_path = match spec.first_boot.as_ref() { + Some(fb) => Some(build_cloud_init_seed(fb, &spec.name, &self.pool_path).await?), + None => None, + }; + if seed_iso_path.is_some() { + refresh_pool(&self.pool_name).await?; + } + + // aarch64 guests need a UEFI firmware pair; x86_64 boots + // SeaBIOS by default and leaves firmware = None. + let firmware = match spec.architecture { + VmArchitecture::X86_64 => None, + VmArchitecture::Aarch64 => Some(ensure_vm_firmware(&spec.name, &self.pool_path).await?), + }; + if firmware.is_some() { + refresh_pool(&self.pool_name).await?; + } + + let vm_config = VmConfig { + name: spec.name.clone(), + architecture: spec.architecture, + vcpus: spec.cpus, + memory_mib: spec.memory_mib, + disks: vec![DiskConfig { + size_gb: spec.disk_size_gb.unwrap_or(0), + device: "vda".to_string(), + pool: self.pool_name.clone(), + source_path: Some(overlay_path.to_string_lossy().into_owned()), + }], + networks: vec![NetworkRef::named(&spec.network)], + cdroms: match &seed_iso_path { + Some(p) => vec![CdromConfig { + source: p.to_string_lossy().into_owned(), + device: "hdb".to_string(), + }], + None => vec![], + }, + boot_order: vec![BootDevice::Disk], + firmware, + }; + + self.executor + .ensure_vm(vm_config) + .await + .map_err(|e| exec(format!("ensure_vm: {e}")))?; + self.executor + .start_vm(&spec.name) + .await + .map_err(|e| exec(format!("start_vm: {e}")))?; + + // First-boot cloud-init takes 2-4 minutes on native-arch KVM + // (datasource detection, package regeneration, SSH host-key + // generation, reboots). Under TCG emulation — aarch64 guest + // on an x86_64 host — the same boot path runs 3-5× slower + // because every guest instruction is translated. A cold + // first boot (no disk cache) has been observed at ~15 min + // on an 8-core x86 host even with virtio-rng and + // pauth-impdef=on; budget 30 min to cover slower CI workers. + let wait_budget = match spec.architecture { + VmArchitecture::X86_64 => std::time::Duration::from_secs(300), + VmArchitecture::Aarch64 => std::time::Duration::from_secs(1800), + }; + let ip = self + .executor + .wait_for_ip(&spec.name, wait_budget) + .await + .map_err(|e| exec(format!("wait_for_ip: {e}")))?; + + // DHCP lease ≠ usable VM. When first_boot (cloud-init) is + // requested, a subsequent Score will almost always SSH in — + // so block here until port 22 accepts a TCP handshake. + // Otherwise the caller races cloud-init: under TCG we've + // seen 60-180 s between DHCP lease and sshd-listening. + if spec.first_boot.is_some() { + wait_for_tcp_port(ip, 22, wait_budget).await?; + } + + Ok(VirtualMachineRuntimeInfo { + name: spec.name.clone(), + state: VmState::Running, + ip: Some(ip), + hypervisor: "kvm".to_string(), + }) + } + + async fn delete_vm(&self, name: &str) -> Result<(), ExecutorError> { + let exists = self + .executor + .vm_exists(name) + .await + .map_err(|e| exec(format!("vm_exists: {e}")))?; + if !exists { + return Ok(()); + } + // Destroy (ignore error if already stopped) then undefine. + let _ = self.executor.destroy_vm(name).await; + self.executor + .undefine_vm(name) + .await + .map_err(|e| exec(format!("undefine_vm: {e}")))?; + Ok(()) + } + + async fn get_vm_info( + &self, + name: &str, + ) -> Result, ExecutorError> { + let exists = self + .executor + .vm_exists(name) + .await + .map_err(|e| exec(format!("vm_exists: {e}")))?; + if !exists { + return Ok(None); + } + let vm_ip = self + .executor + .vm_ip(name) + .await + .map_err(|e| exec(format!("vm_ip: {e}")))?; + Ok(Some(VirtualMachineRuntimeInfo { + name: name.to_string(), + state: if vm_ip.is_some() { + VmState::Running + } else { + VmState::Unknown + }, + ip: vm_ip.map(|i: IpAddr| i), + hypervisor: "kvm".to_string(), + })) } } + +/// Prepare a UEFI firmware pair for a single aarch64 VM: +/// discover the host-shipped code + vars-template, copy the +/// template to a per-VM NVRAM file inside the pool dir, return +/// the paired paths for libvirt's `` + ``. +async fn ensure_vm_firmware( + vm_name: &str, + pool_path: &std::path::Path, +) -> Result { + let discovered = discover_aarch64_firmware()?; + // Arch's `edk2-aarch64` ships CODE as a ~3 MiB raw edk2 build + // output, but QEMU's virt machine pflash region is fixed 64 MiB + // and refuses under-sized files. Pad (once, cached next to the + // pool) before handing the path to libvirt. + let padded_code = + ensure_code_pflash_padded(&discovered.code, &pool_path.join("aarch64-code-padded.fd")) + .await?; + let vars = pool_path.join(format!("{vm_name}-VARS.fd")); + copy_vars_template_for_vm(&discovered, &vars).await?; + info!( + "aarch64 firmware: code={padded_code:?} (padded from {:?}), \ + nvram={vars:?} (from template {:?})", + discovered.code, discovered.vars_template + ); + Ok(UefiFirmware { + code: padded_code, + vars, + }) +} + +async fn create_overlay( + base: &std::path::Path, + overlay: &std::path::Path, +) -> Result<(), ExecutorError> { + let output = Command::new("qemu-img") + .args([ + "create", + "-f", + "qcow2", + "-F", + "qcow2", + "-b", + base.to_str() + .ok_or_else(|| exec("base image path is not valid UTF-8"))?, + overlay + .to_str() + .ok_or_else(|| exec("overlay path is not valid UTF-8"))?, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .output() + .await + .map_err(|e| exec(format!("spawn qemu-img: {e}")))?; + if !output.status.success() { + return Err(exec(format!( + "qemu-img create overlay failed: {}", + String::from_utf8_lossy(&output.stderr).trim() + ))); + } + Ok(()) +} + +async fn build_cloud_init_seed( + first_boot: &VmFirstBootConfig, + vm_name: &str, + pool_dir: &std::path::Path, +) -> Result { + let hostname = first_boot + .hostname + .clone() + .unwrap_or_else(|| vm_name.to_string()); + let admin_user = first_boot + .admin_user + .clone() + .unwrap_or_else(|| DEFAULT_ADMIN_USER.to_string()); + let authorized_key = first_boot + .authorized_keys + .first() + .cloned() + .ok_or_else(|| exec("first_boot.authorized_keys must contain at least one key"))?; + build_seed_iso( + &CloudInitSeedConfig { + hostname: &hostname, + authorized_key: &authorized_key, + user: &admin_user, + extra_runcmd: vec![], + }, + pool_dir, + ) + .await + .map_err(|e: KvmError| exec(format!("cloud-init seed build: {e}"))) +} + +async fn refresh_pool(name: &str) -> Result<(), ExecutorError> { + let status = Command::new("virsh") + .args(["--connect", "qemu:///system", "pool-refresh", name]) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .output() + .await + .map_err(|e| exec(format!("spawn virsh pool-refresh: {e}")))?; + if !status.status.success() { + return Err(exec(format!( + "virsh pool-refresh {name} failed: {}", + String::from_utf8_lossy(&status.stderr).trim() + ))); + } + Ok(()) +} + +/// Poll (with 1 s backoff) until a TCP connection to `addr:port` +/// completes a handshake within `budget`. Each individual connect +/// attempt gets a 5 s timeout so a dropped/filtered SYN doesn't +/// burn half the budget on a single attempt. +async fn wait_for_tcp_port( + addr: IpAddr, + port: u16, + budget: std::time::Duration, +) -> Result<(), ExecutorError> { + let deadline = std::time::Instant::now() + budget; + let mut attempts = 0u32; + loop { + attempts += 1; + let connect = tokio::net::TcpStream::connect((addr, port)); + match tokio::time::timeout(std::time::Duration::from_secs(5), connect).await { + Ok(Ok(_)) => { + info!("{addr}:{port} reachable after {attempts} attempt(s)"); + return Ok(()); + } + _ => {} + } + if std::time::Instant::now() >= deadline { + return Err(exec(format!( + "TCP port {addr}:{port} did not accept connections within {:?} \ + ({attempts} attempt(s)). The VM booted and got a DHCP lease, \ + but the service on that port never came up — commonly sshd \ + still starting, or cloud-init still in firstboot.", + budget + ))); + } + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } +} + +fn exec(msg: impl Into) -> ExecutorError { + ExecutorError::UnexpectedError(msg.into()) +} diff --git a/harmony/src/modules/kvm/types.rs b/harmony/src/modules/kvm/types.rs index 590bfe61..58b97fd5 100644 --- a/harmony/src/modules/kvm/types.rs +++ b/harmony/src/modules/kvm/types.rs @@ -1,6 +1,8 @@ use harmony_types::net::MacAddress; use serde::{Deserialize, Serialize}; +pub use crate::domain::topology::VmArchitecture; + /// Information about a VM's network interface, as reported by `virsh domiflist`. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VmInterface { @@ -139,6 +141,10 @@ impl BootDevice { pub struct VmConfig { /// VM name, must be unique on the host. pub name: String, + /// Guest CPU architecture. Defaults to + /// [`VmArchitecture::X86_64`]. + #[serde(default)] + pub architecture: VmArchitecture, /// Number of virtual CPUs. pub vcpus: u32, /// Memory in mebibytes (MiB). @@ -151,6 +157,24 @@ pub struct VmConfig { pub cdroms: Vec, /// Boot order. First entry has highest priority. pub boot_order: Vec, + /// Optional UEFI firmware pair (code + per-VM NVRAM). Required + /// for aarch64 guests; unused for x86_64 (which boots via SeaBIOS + /// by default). The KVM topology resolves and populates this + /// when the VM's architecture requires it. + #[serde(default)] + pub firmware: Option, +} + +/// UEFI firmware file pair for ``+`` libvirt elements. +/// Both paths must be readable by libvirt-qemu. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UefiFirmware { + /// Read-only firmware code (e.g. `AAVMF_CODE.fd`, + /// `edk2-aarch64-code.fd`). + pub code: std::path::PathBuf, + /// Writable NVRAM variables. Typically a per-VM copy of the + /// vendor-shipped vars template. + pub vars: std::path::PathBuf, } impl VmConfig { @@ -163,27 +187,45 @@ impl VmConfig { #[derive(Debug)] pub struct VmConfigBuilder { name: String, + architecture: VmArchitecture, vcpus: u32, memory_mib: u64, disks: Vec, networks: Vec, cdroms: Vec, boot_order: Vec, + firmware: Option, } impl VmConfigBuilder { pub fn new(name: impl Into) -> Self { Self { name: name.into(), + architecture: VmArchitecture::default(), vcpus: 2, memory_mib: 4096, disks: vec![], networks: vec![], cdroms: vec![], boot_order: vec![], + firmware: None, } } + /// Set the guest CPU architecture (default + /// [`VmArchitecture::X86_64`]). For aarch64 guests the caller + /// should also supply a [`UefiFirmware`] via [`firmware`]. + pub fn architecture(mut self, arch: VmArchitecture) -> Self { + self.architecture = arch; + self + } + + /// Attach a UEFI firmware pair (required for arm64 / aarch64). + pub fn firmware(mut self, firmware: UefiFirmware) -> Self { + self.firmware = Some(firmware); + self + } + pub fn vcpus(mut self, vcpus: u32) -> Self { self.vcpus = vcpus; self @@ -247,12 +289,14 @@ impl VmConfigBuilder { pub fn build(self) -> VmConfig { VmConfig { name: self.name, + architecture: self.architecture, vcpus: self.vcpus, memory_mib: self.memory_mib, disks: self.disks, networks: self.networks, cdroms: self.cdroms, boot_order: self.boot_order, + firmware: self.firmware, } } } diff --git a/harmony/src/modules/kvm/vm_score.rs b/harmony/src/modules/kvm/vm_score.rs deleted file mode 100644 index a1c59ac8..00000000 --- a/harmony/src/modules/kvm/vm_score.rs +++ /dev/null @@ -1,222 +0,0 @@ -//! [`KvmVmScore`] — thin Score that provisions one libvirt VM from a -//! cloud image + a cloud-init seed ISO built from the caller's config. -//! -//! This is *not* the customer-facing device-setup Score. It's the test -//! rig that stands in for "a freshly flashed Pi on the network" so the -//! IoT walking-skeleton smoke test can run end-to-end without physical -//! hardware. See [`crate::modules::iot::IotDeviceSetupScore`] for the -//! post-provisioning configuration Score that targets the VM via its -//! [`crate::modules::linux::LinuxHostTopology`]. - -use std::net::IpAddr; -use std::path::PathBuf; -use std::process::Stdio; - -use async_trait::async_trait; -use harmony_types::id::Id; -use log::info; -use serde::{Deserialize, Serialize}; -use tokio::process::Command; - -use crate::data::Version; -use crate::domain::interpret::{ - Interpret, InterpretError, InterpretName, InterpretStatus, Outcome, -}; -use crate::domain::inventory::Inventory; -use crate::score::Score; -use crate::topology::Topology; - -use super::cloudinit::{CloudInitSeedConfig, build_seed_iso}; -use super::topology::KvmHost; -use super::types::{CdromConfig, DiskConfig, VmConfig}; - -/// Everything `KvmVmScore` needs to bring a single VM up from a cloud -/// image. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CloudInitVmConfig { - /// libvirt domain name (must be unique on the host). - pub vm_name: String, - /// Guest hostname (set by cloud-init). Defaults to `vm_name` if None. - pub hostname: Option, - pub vcpus: u32, - pub memory_mib: u64, - /// Absolute path to a pre-downloaded cloud image (qcow2). The Score - /// uses this as the *backing file* for a per-VM overlay disk — so - /// the base image stays pristine and multiple VMs can share it. - /// Must be readable by libvirt-qemu (world-readable + traversable - /// parent dirs is the lightest setup; a libvirt storage pool is the - /// more serious option). - pub base_image_path: PathBuf, - /// Directory where the generated cloud-init seed ISO is written. - pub seed_output_dir: PathBuf, - /// Username created by cloud-init with passwordless sudo. The IoT - /// agent runs under a *different*, service-scoped account that the - /// device-setup Score creates later; this one is the admin identity - /// that `IotDeviceSetupScore` SSHes in as to apply configuration. - pub admin_user: String, - /// openssh-format public key line; authorized for `admin_user`. - pub authorized_key: String, - /// libvirt network name to attach a NIC to. Typically `"default"` - /// (the libvirt-shipped NAT bridge). - pub network_name: String, -} - -/// Provision a single VM from a cloud image + a generated cloud-init -/// seed. Idempotent at the libvirt level: re-running does not recreate -/// if a domain with this name already exists. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct KvmVmScore { - pub config: CloudInitVmConfig, -} - -impl Score for KvmVmScore { - fn name(&self) -> String { - format!("KvmVmScore({})", self.config.vm_name) - } - - fn create_interpret(&self) -> Box> { - Box::new(KvmVmInterpret { - config: self.config.clone(), - version: Version::from("0.1.0").expect("static version"), - status: InterpretStatus::QUEUED, - }) - } -} - -#[derive(Debug)] -struct KvmVmInterpret { - config: CloudInitVmConfig, - version: Version, - status: InterpretStatus, -} - -#[async_trait] -impl Interpret for KvmVmInterpret { - fn get_name(&self) -> InterpretName { - InterpretName::KvmVm - } - fn get_version(&self) -> Version { - self.version.clone() - } - fn get_status(&self) -> InterpretStatus { - self.status.clone() - } - fn get_children(&self) -> Vec { - vec![] - } - - async fn execute( - &self, - _inventory: &Inventory, - topology: &T, - ) -> Result { - let cfg = &self.config; - let hostname = cfg.hostname.clone().unwrap_or_else(|| cfg.vm_name.clone()); - - // Per-VM overlay disk, backed by the base image. Gets created - // (or re-created) only when the libvirt domain doesn't already - // exist, so re-runs of an unchanged Score are true NOOPs. On a - // fresh run, the overlay is wiped so cloud-init boots against a - // clean rootfs. - let executor = topology.kvm_executor(); - let overlay_path = cfg.seed_output_dir.join(format!("{}.qcow2", cfg.vm_name)); - let vm_already_exists = executor - .vm_exists(&cfg.vm_name) - .await - .map_err(|e| InterpretError::new(format!("vm_exists: {e}")))?; - if !vm_already_exists { - if overlay_path.exists() { - tokio::fs::remove_file(&overlay_path) - .await - .map_err(|e| InterpretError::new(format!("remove stale overlay: {e}")))?; - } - tokio::fs::create_dir_all(&cfg.seed_output_dir) - .await - .map_err(|e| InterpretError::new(format!("create seed dir: {e}")))?; - let status = Command::new("qemu-img") - .arg("create") - .arg("-f") - .arg("qcow2") - .arg("-F") - .arg("qcow2") - .arg("-b") - .arg(&cfg.base_image_path) - .arg(&overlay_path) - .stdout(Stdio::null()) - .stderr(Stdio::piped()) - .output() - .await - .map_err(|e| InterpretError::new(format!("spawn qemu-img: {e}")))?; - if !status.status.success() { - let stderr = String::from_utf8_lossy(&status.stderr); - return Err(InterpretError::new(format!( - "qemu-img create overlay failed: {}", - stderr.trim() - ))); - } - info!( - "created overlay disk {overlay_path:?} backed by {:?}", - cfg.base_image_path - ); - } - - // Build cloud-init seed ISO in the caller-chosen output dir. - let seed_iso_path = build_seed_iso( - &CloudInitSeedConfig { - hostname: &hostname, - authorized_key: &cfg.authorized_key, - user: &cfg.admin_user, - extra_runcmd: vec![], - }, - &cfg.seed_output_dir, - ) - .await - .map_err(|e| InterpretError::new(format!("cloud-init seed build: {e}")))?; - info!("cloud-init seed ready at {seed_iso_path:?}"); - - // Compose VM config: base image as the root disk, seed ISO as a - // secondary cdrom at hdb so libvirt boots off the disk (cloud- - // init on the image finds CIDATA on hdb automatically). - let vm_config = VmConfig { - name: cfg.vm_name.clone(), - vcpus: cfg.vcpus, - memory_mib: cfg.memory_mib, - disks: vec![DiskConfig { - size_gb: 0, - device: "vda".to_string(), - pool: "default".to_string(), - source_path: Some(overlay_path.to_string_lossy().into_owned()), - }], - networks: vec![super::types::NetworkRef::named(&cfg.network_name)], - cdroms: vec![CdromConfig { - source: seed_iso_path.to_string_lossy().into_owned(), - device: "hdb".to_string(), - }], - boot_order: vec![super::types::BootDevice::Disk], - }; - - executor - .ensure_vm(vm_config) - .await - .map_err(|e| InterpretError::new(format!("ensure_vm: {e}")))?; - executor - .start_vm(&cfg.vm_name) - .await - .map_err(|e| InterpretError::new(format!("start_vm: {e}")))?; - - // First-boot cloud-init can easily take 2-4 minutes on a Pi - // target or a constrained CI worker: datasource detection, - // package regeneration, SSH host-key generation, reboots. 300s - // is a middle-of-the-road budget that still aborts before a - // whole-cluster CI pipeline gets painful. - let ip: IpAddr = executor - .wait_for_ip(&cfg.vm_name, std::time::Duration::from_secs(300)) - .await - .map_err(|e| InterpretError::new(format!("wait_for_ip: {e}")))?; - - Ok(Outcome::success_with_details( - format!("VM {} reachable at {ip}", cfg.vm_name), - vec![format!("seed_iso={seed_iso_path:?}"), format!("ip={ip}")], - )) - } -} diff --git a/harmony/src/modules/kvm/xml.rs b/harmony/src/modules/kvm/xml.rs index cc4db9b2..06bbdfd7 100644 --- a/harmony/src/modules/kvm/xml.rs +++ b/harmony/src/modules/kvm/xml.rs @@ -35,13 +35,114 @@ //! serialization. The `VmConfig`/`NetworkConfig` builder API stays unchanged — //! only the internal XML generation changes. -use super::types::{CdromConfig, DiskConfig, ForwardMode, NetworkConfig, VmConfig}; +use super::types::{ + CdromConfig, DiskConfig, ForwardMode, NetworkConfig, UefiFirmware, VmArchitecture, VmConfig, +}; + +/// Resolved arch-specific knobs that feed the libvirt domain +/// template. Keeps the per-arch branching out of the format string +/// so the XML template itself stays readable. +struct DomainXmlParams { + /// `kvm` for hardware-accelerated runs, `qemu` for TCG + /// emulation (aarch64 on x86_64 today). + domain_type: &'static str, + /// XML namespace attribute appended to ``. Non-empty + /// only when we need the `qemu:commandline` escape hatch. + domain_namespace: &'static str, + /// libvirt `/`. Ubuntu/libvirt use the + /// `uname -m` names (`x86_64`, `aarch64`). + arch: &'static str, + /// libvirt `/`. `q35` for x86_64 + /// (modern PCIe), `virt` for aarch64 (no legacy chipsets). + machine: &'static str, + /// Emulator binary libvirt should exec. + emulator: &'static str, + /// `` contents. `host-model` for x86_64 (lets + /// libvirt pick a matching KVM-accelerated CPU model), a + /// named model for aarch64 TCG (`-cpu max`). + cpu_block: &'static str, + /// Optional `` block, rendered as the last + /// child of ``. Used for QEMU CPU properties that + /// libvirt's schema doesn't know about (e.g. `pauth-impdef`, + /// which is a QEMU-defined property of `-cpu max`, not a CPU + /// feature in libvirt's feature database). + qemu_commandline: &'static str, + /// UEFI firmware to point `` + `` at. None + /// for x86_64 (SeaBIOS default); required for aarch64. + firmware: Option, +} + +impl DomainXmlParams { + fn for_vm(vm: &VmConfig) -> Self { + match vm.architecture { + VmArchitecture::X86_64 => Self { + domain_type: "kvm", + domain_namespace: "", + arch: "x86_64", + machine: "q35", + emulator: "/usr/bin/qemu-system-x86_64", + // host-model: libvirt chooses a model compatible + // with the host CPU and exposes it to the guest. + // Safe default for bare-metal KVM. + cpu_block: "", + qemu_commandline: "", + firmware: None, + }, + VmArchitecture::Aarch64 => Self { + // TCG emulation on x86_64 hosts. On native aarch64 + // hardware this would be `kvm` with no cpu_block + // override; we revisit when a native-aarch64 + // runner shows up (single-line fork in for_vm). + domain_type: "qemu", + domain_namespace: " xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'", + arch: "aarch64", + machine: "virt", + emulator: "/usr/bin/qemu-system-aarch64", + // `max` + // tells libvirt to pass `-cpu max` to QEMU, but we + // cannot add `pauth-impdef` as a `` because + // libvirt's CPU-feature database doesn't know it — + // it's a QEMU property of `-cpu max`, not a CPU + // feature in the Arm sense. So we keep the libvirt + // `` block minimal and override `-cpu` at the + // QEMU CLI layer below. + cpu_block: "\n max\n ", + // libvirt's escape hatch: append raw QEMU CLI args + // after its own. QEMU takes the LAST `-cpu` / `-accel` + // as authoritative, so `-cpu max` (from ) followed + // by `-cpu max,pauth-impdef=on` yields max-with- + // pauth-impdef, and `-accel tcg` from libvirt + // followed by `-accel tcg,thread=multi` forces MTTCG. + // + // `pauth-impdef=on` switches pointer-auth to an + // impl-defined algorithm, cutting the largest TCG + // perf hit on arm64 (Linaro, Jan 2025). + // + // `thread=multi` enables MTTCG (multi-threaded TCG). + // Despite QEMU docs claiming MTTCG is default on + // aarch64, in practice (QEMU 10.2 observed here) + // cross-arch `-accel tcg` runs single-threaded and + // only vcpu.0 executes. Forcing it doubles throughput + // on 2-vcpu guests and is the difference between a + // 20-minute cold boot and a 10-minute one. + qemu_commandline: " \n \ + \n \ + \n \ + \n \ + \n \ + \n", + firmware: vm.firmware.clone(), + }, + } + } +} /// Renders the libvirt domain XML for a VM definition. /// /// The caller passes the image directory where qcow2 volumes are stored. pub fn domain_xml(vm: &VmConfig, image_dir: &str) -> String { let memory_kib = vm.memory_mib * 1024; + let params = DomainXmlParams::for_vm(vm); let os_boot = vm .boot_order @@ -49,6 +150,16 @@ pub fn domain_xml(vm: &VmConfig, image_dir: &str) -> String { .map(|b| format!(" \n", b.as_xml_dev())) .collect::(); + let os_firmware = match ¶ms.firmware { + Some(fw) => format!( + " {code}\n \ + {vars}\n", + code = fw.code.display(), + vars = fw.vars.display() + ), + None => String::new(), + }; + let devices = { let disks = disk_devices(vm, image_dir); let cdroms = cdrom_devices(vm); @@ -57,33 +168,48 @@ pub fn domain_xml(vm: &VmConfig, image_dir: &str) -> String { }; format!( - r#" + r#" {name} {memory_kib} {vcpus} - hvm -{os_boot} + hvm +{os_firmware}{os_boot} - + {cpu_block} - /usr/bin/qemu-system-x86_64 + {emulator} {devices} + + + /dev/urandom + -"#, +{qemu_commandline}"#, + domain_type = params.domain_type, + domain_namespace = params.domain_namespace, name = vm.name, memory_kib = memory_kib, vcpus = vm.vcpus, + arch = params.arch, + machine = params.machine, + os_firmware = os_firmware, os_boot = os_boot, + cpu_block = params.cpu_block, + emulator = params.emulator, devices = devices, + qemu_commandline = params.qemu_commandline, ) } @@ -318,6 +444,99 @@ mod tests { assert!(xml.contains("mode='host-model'")); } + // ── aarch64 ────────────────────────────────────────────────────── + + #[test] + fn domain_xml_aarch64_defaults_to_qemu_tcg_with_virt_machine() { + use crate::modules::kvm::types::{UefiFirmware, VmArchitecture}; + use std::path::PathBuf; + let vm = VmConfig::builder("arm-test") + .architecture(VmArchitecture::Aarch64) + .firmware(UefiFirmware { + code: PathBuf::from("/usr/share/AAVMF/AAVMF_CODE.fd"), + vars: PathBuf::from("/tmp/arm-test-VARS.fd"), + }) + .disk(10) + .build(); + let xml = domain_xml(&vm, "/tmp"); + assert!( + xml.contains("` stays libvirt-validated (just the model). + assert!(xml.contains("max")); + assert!( + !xml.contains("name='pauth-impdef'"), + "pauth-impdef is not a libvirt CPU feature; \ + must not leak into the block" + ); + // Root `` must declare the qemu namespace — libvirt + // rejects `` elements without it. + assert!( + xml.contains("xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'"), + "qemu namespace must be declared on when we use qemu:commandline" + ); + // pauth-impdef is passed as a QEMU CLI override — the final + // `-cpu max,pauth-impdef=on` arg wins over libvirt's `-cpu max`. + assert!(xml.contains("")); + assert!( + xml.contains(""), + "pauth-impdef=on is the single biggest TCG arm64 perf knob; \ + must reach QEMU via qemu:commandline override" + ); + // MTTCG override — without this, cross-arch TCG runs + // single-threaded (vcpu.1.time stays at 0). + assert!(xml.contains("")); + assert!( + xml.contains(""), + "thread=multi doubles throughput on multi-vcpu guests; \ + libvirt passes bare `-accel tcg` without it" + ); + } + + #[test] + fn domain_xml_aarch64_emits_pflash_loader_and_nvram() { + use crate::modules::kvm::types::{UefiFirmware, VmArchitecture}; + use std::path::PathBuf; + let vm = VmConfig::builder("arm-efi") + .architecture(VmArchitecture::Aarch64) + .firmware(UefiFirmware { + code: PathBuf::from("/usr/share/AAVMF/AAVMF_CODE.fd"), + vars: PathBuf::from("/var/lib/libvirt/nvram/arm-efi_VARS.fd"), + }) + .build(); + let xml = domain_xml(&vm, "/tmp"); + assert!(xml.contains( + "/usr/share/AAVMF/AAVMF_CODE.fd" + )); + assert!(xml.contains("/var/lib/libvirt/nvram/arm-efi_VARS.fd")); + } + + #[test] + fn domain_xml_x86_64_has_no_efi_loader() { + let vm = VmConfig::builder("x86-bios").build(); + let xml = domain_xml(&vm, "/tmp"); + assert!(!xml.contains(",' -m -a ''`, with `--stdout-callback=oneline` so -//! we get one `host | VERB => {json}` line per host. Harmony owns 100% -//! of the orchestration/ordering; Ansible owns *only* the per-host -//! idempotent module execution. This is the same reason we picked -//! `podman-api` over shelling to `podman` elsewhere — use the mature -//! upstream where it's mature (apt/systemd/user module idempotency), -//! don't adopt its orchestration model (playbooks, inventory, YAML -//! templating, the Kubespray mess). +//! ',' -m -a ''`, with `--stdout-callback=oneline` +//! so we get one `host | VERB => {json}` line per host. Harmony owns +//! 100% of the orchestration/ordering; Ansible owns only per-host +//! idempotent module execution. Matches the reasoning behind picking +//! `podman-api` over shelling to `podman` elsewhere: use mature +//! upstream where upstream is mature (apt/systemd/user/file module +//! idempotency), don't adopt its orchestration model. //! //! The Ansible runtime itself lives in a managed venv under -//! [`HARMONY_DATA_DIR`]; see [`super::ansible_venv::ensure_ansible_venv`]. -//! The operator does *not* need to install `ansible` system-wide. +//! [`HARMONY_DATA_DIR`]; see +//! [`super::ansible_venv::ensure_ansible_venv`]. The operator does +//! *not* need to install `ansible` system-wide. -use std::path::Path; +use std::path::{Path, PathBuf}; use std::process::Stdio; use harmony_types::net::IpAddress; +use serde::Serialize; use serde_json::{Value, json}; use tokio::process::Command; @@ -28,6 +31,7 @@ use crate::domain::topology::{ use crate::executors::ExecutorError; use super::ansible_venv::ensure_ansible_venv; +use super::ssh_executor::ssh_exec; use super::topology::SshCredentials; pub struct AnsibleHostConfigurator; @@ -49,28 +53,34 @@ impl AnsibleHostConfigurator { creds: &SshCredentials, name: &str, ) -> Result { - // Target OS for v0 is Debian-family (Raspbian, Ubuntu — see - // ROADMAP/iot_platform/v0_walking_skeleton.md §5.3). Using - // `ansible.builtin.apt` directly (vs the generic `package` - // module) lets us set `update_cache=true` so `apt install` on - // a fresh cloud image doesn't fail with "no package matching" - // because the cache was never populated. `cache_valid_time` - // keeps re-runs cheap: the update is skipped if the cache was - // refreshed within the last hour. + // Distro dispatch lives inside this function — that's the + // encapsulation we want. Callers say "install podman"; we + // pick apt/dnf/pacman/apk. Debian-family is the only dispatch + // currently wired because it's our first concrete target (IoT + // runs on Raspbian/Ubuntu per ROADMAP/iot_platform/ + // v0_walking_skeleton.md §5.3). Extending to RHEL/Fedora/ + // Alpine is a matter of detecting the family here and picking + // `ansible.builtin.dnf` / `community.general.pacman` / + // `community.general.apk` with equivalent cache-warming + // flags — no trait or capability change. When we have more + // than one distro it may also be worth a companion Score + // (`EnsureQemuKvmInstalled`, etc.) that canonicalizes the + // cross-family package names for common infrastructure. // - // When we grow RHEL-family support, switch on the distro - // (cached in topology) and dispatch to `ansible.builtin.dnf` - // with its own cache warming. `package` alone isn't enough. + // `update_cache: true` + `cache_valid_time: 3600` makes sure a + // fresh cloud image's empty apt cache gets populated before + // the install, while re-runs within the hour stay cheap. + let apt_args = AptArgs { + name, + state: "present", + update_cache: true, + cache_valid_time: 3600, + }; self.run_module( host, creds, "ansible.builtin.apt", - json!({ - "name": name, - "state": "present", - "update_cache": true, - "cache_valid_time": 3600, - }), + to_value(&apt_args)?, true, None, ) @@ -83,24 +93,16 @@ impl AnsibleHostConfigurator { creds: &SshCredentials, spec: &UserSpec, ) -> Result { - let mut args = json!({ - "name": spec.name, - "state": "present", - "system": spec.system, - "create_home": spec.create_home, - }); - if let Some(group) = &spec.group { - args["group"] = json!(group); - } - if !spec.supplementary_groups.is_empty() { - args["groups"] = json!(spec.supplementary_groups); - args["append"] = json!(true); - } - if let Some(shell) = &spec.shell { - args["shell"] = json!(shell); - } - self.run_module(host, creds, "ansible.builtin.user", args, true, None) - .await + let args = AnsibleUserArgs::from(spec); + self.run_module( + host, + creds, + "ansible.builtin.user", + to_value(&args)?, + true, + None, + ) + .await } pub async fn ensure_file( @@ -109,23 +111,24 @@ impl AnsibleHostConfigurator { creds: &SshCredentials, spec: &FileSpec, ) -> Result { - // Ansible's copy module doesn't auto-create parent dirs, so + // Ansible's `copy` module doesn't auto-create parent dirs, so // writes into fresh paths like `/etc/iot-agent/config.toml` // fail with "Destination directory … does not exist". Create // the parent first via the `file` module; state=directory is // idempotent so this is a cheap noop on re-run. - if let Some(parent) = std::path::Path::new(&spec.path).parent() { + if let Some(parent) = Path::new(&spec.path).parent() { let parent_str = parent.to_string_lossy().to_string(); if !parent_str.is_empty() && parent_str != "/" { + let dir_args = AnsibleFileArgs { + path: &parent_str, + state: "directory", + mode: Some("0755"), + }; self.run_module( host, creds, "ansible.builtin.file", - json!({ - "path": parent_str, - "state": "directory", - "mode": "0755", - }), + to_value(&dir_args)?, true, None, ) @@ -133,34 +136,16 @@ impl AnsibleHostConfigurator { } } - let mut args = json!({ - "dest": spec.path, - }); - match &spec.source { - FileSource::Content(s) => { - args["content"] = json!(s); - } - FileSource::LocalPath(p) => { - // Ansible's copy module reads this path on the - // controller and ships the bytes over its usual SSH - // transport (not via argv), which is what lets us - // deliver binary files larger than ARG_MAX. - args["src"] = json!(p); - } - } - if let Some(owner) = &spec.owner { - args["owner"] = json!(owner); - } - if let Some(group) = &spec.group { - args["group"] = json!(group); - } - if let Some(mode) = spec.mode { - // Ansible accepts octal as a string. The leading `0` isn't - // strictly required but makes it unambiguous to a human. - args["mode"] = json!(format!("0{mode:o}")); - } - self.run_module(host, creds, "ansible.builtin.copy", args, true, None) - .await + let args = AnsibleCopyArgs::from(spec); + self.run_module( + host, + creds, + "ansible.builtin.copy", + to_value(&args)?, + true, + None, + ) + .await } pub async fn ensure_systemd_unit( @@ -169,7 +154,9 @@ impl AnsibleHostConfigurator { creds: &SshCredentials, spec: &SystemdUnitSpec, ) -> Result { - // Step 1: write the unit file. + // Two ad-hoc invocations: drop the unit file, then enable + + // (optionally) start via `ansible.builtin.systemd`, which + // handles daemon-reload as part of the same module call. let (unit_path, scope_user) = match &spec.scope { SystemdScope::System => (format!("/etc/systemd/system/{}.service", spec.name), None), SystemdScope::User(u) => ( @@ -184,39 +171,32 @@ impl AnsibleHostConfigurator { let elevate = scope_user.is_none(); let become_user = scope_user.as_deref(); - let file_changed = self - .run_module( - host, - creds, - "ansible.builtin.copy", - json!({ - "dest": unit_path, - "content": spec.unit_content, - "mode": "0644", - }), - true, - None, - ) - .await?; + let file_spec = FileSpec { + path: unit_path.clone(), + source: FileSource::Content(spec.unit_content.clone()), + owner: None, + group: None, + mode: Some(0o644), + }; + let file_changed = self.ensure_file(host, creds, &file_spec).await?; - // Step 2: daemon-reload + enable + start. - let mut systemd_args = json!({ - "name": spec.name, - "enabled": true, - "daemon_reload": true, - }); - if spec.start_immediately { - systemd_args["state"] = json!("started"); - } - if scope_user.is_some() { - systemd_args["scope"] = json!("user"); - } + let systemd_args = AnsibleSystemdArgs { + name: &spec.name, + enabled: Some(true), + state: if spec.start_immediately { + Some("started") + } else { + None + }, + daemon_reload: true, + scope: scope_user.as_deref().map(|_| "user"), + }; let systemd_changed = self .run_module( host, creds, "ansible.builtin.systemd", - systemd_args, + to_value(&systemd_args)?, elevate, become_user, ) @@ -234,22 +214,22 @@ impl AnsibleHostConfigurator { name: &str, scope: SystemdScope, ) -> Result { - let mut args = json!({ - "name": name, - "state": "restarted", - }); - let (elevate, become_user) = match &scope { - SystemdScope::System => (true, None), - SystemdScope::User(u) => { - args["scope"] = json!("user"); - (true, Some(u.as_str().to_string())) - } + let (elevate, become_user, scope_str) = match &scope { + SystemdScope::System => (true, None, None), + SystemdScope::User(u) => (true, Some(u.as_str().to_string()), Some("user")), + }; + let args = AnsibleSystemdArgs { + name, + enabled: None, + state: Some("restarted"), + daemon_reload: false, + scope: scope_str, }; self.run_module( host, creds, "ansible.builtin.systemd", - args, + to_value(&args)?, elevate, become_user.as_deref(), ) @@ -262,37 +242,30 @@ impl AnsibleHostConfigurator { creds: &SshCredentials, user: &str, ) -> Result { - // Ad-hoc mode has no `changed_when`, so we sentinel through - // stdout: the script echoes either "noop" or "changed" and we - // parse that out of the module's reported `stdout` field. - // `loginctl enable-linger` is itself idempotent; the wrapping - // `if` is purely to distinguish the two cases for reconcile- - // restart decisions upstream. - let script = format!( - "if loginctl show-user {user} 2>/dev/null | grep -q '^Linger=yes'; then \ - echo noop; \ - else \ - loginctl enable-linger {user}; \ - echo changed; \ - fi" - ); - let output = self - .run_module_full( - host, - creds, - "ansible.builtin.shell", - json!({ "cmd": script }), - true, - None, - ) - .await?; - let changed = output - .payload - .get("stdout") - .and_then(Value::as_str) - .map(|s| s.trim() == "changed") - .unwrap_or(false); - Ok(ChangeReport { changed }) + // systemd's user-session linger is the existence of + // `/var/lib/systemd/linger/` (systemd-logind(8)). Two + // direct-over-SSH probes — no Ansible, because this is a + // tiny shell check and a single `loginctl` call with no + // per-module idempotency magic to lean on. + // + // Why not just `touch` the marker file? Touching it creates + // the file but doesn't fire the dbus signal that systemd- + // logind needs to actually start the user manager; every + // subsequent `systemctl --user …` then fails with "Failed + // to connect to bus". `loginctl enable-linger` does both. + let check = ssh_exec( + host, + creds, + &format!("test -e /var/lib/systemd/linger/{user}"), + ) + .await?; + if check.rc == 0 { + return Ok(ChangeReport::NOOP); + } + ssh_exec(host, creds, &format!("sudo loginctl enable-linger {user}")) + .await? + .into_successful()?; + Ok(ChangeReport::CHANGED) } pub async fn ensure_user_unit_active( @@ -302,20 +275,28 @@ impl AnsibleHostConfigurator { user: &str, unit: &str, ) -> Result { - self.run_module( - host, - creds, - "ansible.builtin.systemd", - json!({ - "name": unit, - "enabled": true, - "state": "started", - "scope": "user", - }), - true, - Some(user), - ) - .await + // `ansible.builtin.systemd scope: user` needs + // `XDG_RUNTIME_DIR` in the systemctl process env — a task- + // level `environment:` keyword only available in playbooks. + // Rather than pipe a one-task playbook, use russh directly: + // two small SSH calls, no Python wrapper, no inline YAML. + // + // Report `changed=true` unconditionally. systemctl + // enable --now is idempotent at the systemd level so re- + // running does no harm; reconcile-restart decisions + // upstream see only the outer-Score changes they care + // about (TOML and unit file changes), not this start- + // verification step. + let id_out = ssh_exec(host, creds, &format!("id -u {user}")) + .await? + .into_successful()?; + let uid = id_out.stdout.trim(); + let cmd = format!( + "sudo -u {user} env XDG_RUNTIME_DIR=/run/user/{uid} \ + systemctl --user enable --now {unit}" + ); + ssh_exec(host, creds, &cmd).await?.into_successful()?; + Ok(ChangeReport::CHANGED) } // ----------------------------------------------------------------- @@ -339,9 +320,6 @@ impl AnsibleHostConfigurator { }) } - /// Like [`run_module`] but returns the full module payload so - /// callers that need to inspect module-specific fields (e.g. the - /// `stdout` of a `shell` invocation) can. async fn run_module_full( &self, host: IpAddress, @@ -353,9 +331,9 @@ impl AnsibleHostConfigurator { ) -> Result { let bins = ensure_ansible_venv().await?; // Passing `-a '{}'` trips ansible-core 2.17's "extra params" - // check on parameterless modules (e.g. `ping`) — it seems to - // read the empty brace as positional rather than an empty dict. - // Skip `-a` entirely when there are no args to pass. + // check on parameterless modules (ping et al.) — it reads the + // empty brace as positional rather than an empty dict. Skip + // `-a` entirely when there are no args to pass. let args_json_opt = if args.as_object().is_some_and(|m| m.is_empty()) { None } else { @@ -366,64 +344,58 @@ impl AnsibleHostConfigurator { }; let inventory = format!("{host},"); - let ssh_common = - "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10"; + // `--ssh-common-args=` (equals form) is required: the + // value starts with `-o`, which otherwise gets re-parsed by + // ansible's argparse as its own `-o` flag. + let ssh_common_arg = "--ssh-common-args=-o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10" + .to_string(); + let private_key = creds + .private_key_path + .to_str() + .ok_or_else(|| exec("ssh private key path is not valid UTF-8"))?; - // `--ssh-common-args=` (equals form, single arg) is - // required: the value starts with `-o` which otherwise gets - // re-parsed by ansible's argparse as its own `-o` (oneline - // output) flag and the whole command fails with a help dump. - let ssh_common_arg = format!("--ssh-common-args={ssh_common}"); - - let mut cmd = Command::new(&bins.ansible); - cmd.arg("all") - .arg("-i") - .arg(&inventory) - .arg("-m") - .arg(module) - .arg("-u") - .arg(&creds.user) - .arg("--private-key") - .arg(&creds.private_key_path) - .arg(&ssh_common_arg); - if let Some(args_json) = args_json_opt.as_ref() { - cmd.arg("-a").arg(args_json); + let mut argv: Vec = vec![ + "all".into(), + "-i".into(), + inventory, + "-m".into(), + module.into(), + "-u".into(), + creds.user.clone(), + "--private-key".into(), + private_key.into(), + ssh_common_arg, + ]; + if let Some(args_json) = args_json_opt { + argv.push("-a".into()); + argv.push(args_json); } - cmd + if elevate { + argv.push("--become".into()); + } + if let Some(u) = become_user { + argv.push("--become-user".into()); + argv.push(u.into()); + } + if let Some(py) = &creds.remote_python { + argv.push("-e".into()); + argv.push(format!("ansible_python_interpreter={py}")); + } + + let output = Command::new(&bins.ansible) + .args(&argv) // Ad-hoc mode ignores ANSIBLE_STDOUT_CALLBACK unless - // ANSIBLE_LOAD_CALLBACK_PLUGINS is also set — a quirk - // carried over from ansible's early days when ad-hoc was a - // play-free code path. Without both, we get the default - // "one key-value block spread over many lines" format that - // doesn't parse. + // ANSIBLE_LOAD_CALLBACK_PLUGINS is also set. .env("ANSIBLE_LOAD_CALLBACK_PLUGINS", "True") .env("ANSIBLE_STDOUT_CALLBACK", "oneline") .env("ANSIBLE_HOST_KEY_CHECKING", "False") .env("ANSIBLE_DEPRECATION_WARNINGS", "False") - // Pipelining ships the module payload over SSH stdin rather - // than writing it to a temp file under the remote user's - // home first — which matters because when we `become` an - // unprivileged user (e.g. iot-agent for user-scope systemd - // operations), ansible's default temp-file shuffle trips - // over an ACL fallback that doesn't work on most Linux - // distros. Pipelining avoids the problem entirely. + // Pipelining avoids the become-to-unprivileged-user temp + // file dance (ansible falls back to an ACL chmod syntax + // no Linux distro accepts). .env("ANSIBLE_PIPELINING", "True") - // Keep control sockets inside our data dir so multiple - // Harmony processes don't collide in /tmp. - .env("ANSIBLE_SSH_CONTROL_PATH_DIR", control_path_dir()); - - if elevate { - cmd.arg("--become"); - } - if let Some(u) = become_user { - cmd.arg("--become-user").arg(u); - } - if let Some(py) = &creds.remote_python { - cmd.arg("-e") - .arg(format!("ansible_python_interpreter={py}")); - } - - let output = cmd + .env("ANSIBLE_SSH_CONTROL_PATH_DIR", control_path_dir()) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .output() @@ -451,32 +423,128 @@ impl Default for AnsibleHostConfigurator { } } -/// Parsed shape of one oneline-callback line. -/// -/// The `oneline` callback prints exactly one line per targeted host, -/// shaped `" | => {JSON}"`. We split on `" | "` then -/// `" => "` and treat `FAILED!` / `UNREACHABLE!` as errors. The JSON -/// payload always includes `changed: bool` for modules that report -/// change; a few (e.g. `shell`) always report changed, which is why -/// we also keep the raw payload for callers who want their own -/// idempotency signal. +// --------------------------------------------------------------------- +// Typed module argument structs (serialized to the JSON dict ansible +// expects). Prefer these over ad-hoc `json!` macros — easier to spot +// typos at compile time and easier to grow as modules gain fields. +// --------------------------------------------------------------------- + +#[derive(Debug, Serialize)] +struct AptArgs<'a> { + name: &'a str, + state: &'a str, + update_cache: bool, + cache_valid_time: u32, +} + +#[derive(Debug, Serialize)] +struct AnsibleFileArgs<'a> { + path: &'a str, + state: &'a str, + #[serde(skip_serializing_if = "Option::is_none")] + mode: Option<&'a str>, +} + +#[derive(Debug, Serialize)] +struct AnsibleUserArgs<'a> { + name: &'a str, + state: &'a str, + system: bool, + create_home: bool, + #[serde(skip_serializing_if = "Option::is_none")] + group: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + groups: Option<&'a [String]>, + #[serde(skip_serializing_if = "Option::is_none")] + append: Option, + #[serde(skip_serializing_if = "Option::is_none")] + shell: Option<&'a str>, +} + +impl<'a> From<&'a UserSpec> for AnsibleUserArgs<'a> { + fn from(spec: &'a UserSpec) -> Self { + let has_extra = !spec.supplementary_groups.is_empty(); + Self { + name: &spec.name, + state: "present", + system: spec.system, + create_home: spec.create_home, + group: spec.group.as_deref(), + groups: if has_extra { + Some(&spec.supplementary_groups) + } else { + None + }, + append: if has_extra { Some(true) } else { None }, + shell: spec.shell.as_deref(), + } + } +} + +#[derive(Debug, Serialize)] +struct AnsibleCopyArgs<'a> { + dest: &'a str, + #[serde(skip_serializing_if = "Option::is_none")] + content: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + src: Option, + #[serde(skip_serializing_if = "Option::is_none")] + owner: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + group: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + mode: Option, +} + +impl<'a> From<&'a FileSpec> for AnsibleCopyArgs<'a> { + fn from(spec: &'a FileSpec) -> Self { + let (content, src): (Option<&str>, Option) = match &spec.source { + FileSource::Content(s) => (Some(s.as_str()), None), + FileSource::LocalPath(p) => (None, Some(p.to_string_lossy().into_owned())), + }; + Self { + dest: &spec.path, + content, + src, + owner: spec.owner.as_deref(), + group: spec.group.as_deref(), + mode: spec.mode.map(|m| format!("0{m:o}")), + } + } +} + +#[derive(Debug, Serialize)] +struct AnsibleSystemdArgs<'a> { + name: &'a str, + #[serde(skip_serializing_if = "Option::is_none")] + enabled: Option, + #[serde(skip_serializing_if = "Option::is_none")] + state: Option<&'a str>, + #[serde(skip_serializing_if = "std::ops::Not::not")] + daemon_reload: bool, + #[serde(skip_serializing_if = "Option::is_none")] + scope: Option<&'a str>, +} + +fn to_value(value: &T) -> Result { + serde_json::to_value(value).map_err(|e| exec(format!("serialize ansible args: {e}"))) +} + +// --------------------------------------------------------------------- +// oneline callback parsing +// --------------------------------------------------------------------- + struct ModuleOutput { changed: bool, + #[allow(dead_code)] payload: Value, } fn parse_oneline(stdout: &str) -> Result { - // The oneline callback emits one of three shapes depending on the - // module. We disambiguate by the `" => "` / `": "` separators. - // + // Shapes the oneline callback emits: // Success with JSON: " | VERB => {json}" // Unreachable (no JSON): " | VERB!: " - // Shell/command: " | VERB | rc=N | (stdout) ... | (stderr) ..." - // - // The shell/command shape is oneline's per-module override for - // anything whose return value includes `stdout`/`stderr`; we - // reconstruct a synthetic JSON payload so downstream callers see a - // consistent shape. + // Shell/command: " | VERB | rc=N | (stdout) … | (stderr) …" let line = stdout .lines() .find(|l| l.contains(" | ")) @@ -485,7 +553,6 @@ fn parse_oneline(stdout: &str) -> Result { .split_once(" | ") .expect("contains(' | ') just matched"); - // Unreachable: "VERB!: msg" — no JSON, no pipe-delimited rc field. if let Some((verb_with_bang, msg)) = rest.split_once(": ") && verb_with_bang.ends_with('!') && !verb_with_bang.contains(" | ") @@ -493,7 +560,6 @@ fn parse_oneline(stdout: &str) -> Result { return Err(format!("{verb_with_bang} {msg}")); } - // Shell/command shape: presence of ` | rc=` after the verb. if let Some((verb, tail)) = rest.split_once(" | rc=") { let (rc_str, payload_tail) = tail.split_once(" | ").unwrap_or((tail, "")); let rc: i64 = rc_str.trim().parse().unwrap_or(-1); @@ -513,7 +579,6 @@ fn parse_oneline(stdout: &str) -> Result { }); } - // Default shape: "VERB => {json}". let (verb, json_blob) = rest .split_once(" => ") .ok_or_else(|| format!("no ' => ' separator in line: {line}"))?; @@ -540,8 +605,6 @@ fn parse_oneline(stdout: &str) -> Result { } } -/// Extract `(stdout) X (stderr) Y` segments from a shell-format line -/// remainder. Either or both may be absent. fn extract_std(tail: &str) -> (String, String) { let mut out = String::new(); let mut err = String::new(); @@ -568,5 +631,8 @@ fn exec(msg: impl Into) -> ExecutorError { ExecutorError::UnexpectedError(msg.into()) } +/// Unused placeholder kept to remind us that `PathBuf` is in the +/// module's vocabulary — a future `AnsibleFetchArgs` or +/// `AnsibleArchiveArgs` will use it. #[allow(dead_code)] -fn _ensure_path_exists(_: &Path) {} +fn _pathbuf_placeholder(_: PathBuf) {} diff --git a/harmony/src/modules/linux/ansible_venv.rs b/harmony/src/modules/linux/ansible_venv.rs index 60900cd1..e12723d9 100644 --- a/harmony/src/modules/linux/ansible_venv.rs +++ b/harmony/src/modules/linux/ansible_venv.rs @@ -63,22 +63,27 @@ async fn provision_venv() -> Result { .map_err(|e| exec(format!("create venv dir {venv_dir:?}: {e}")))?; info!("creating ansible venv at {venv_dir:?}"); - run(Command::new(&python).arg("-m").arg("venv").arg(&venv_dir)) - .await - .map_err(|e| { - exec(format!( - "python3 -m venv failed: {e}. On Debian/Ubuntu install the venv package: \ + run(Command::new(&python).args([ + std::ffi::OsStr::new("-m"), + std::ffi::OsStr::new("venv"), + venv_dir.as_os_str(), + ])) + .await + .map_err(|e| { + exec(format!( + "python3 -m venv failed: {e}. On Debian/Ubuntu install the venv package: \ `apt install python3-venv`. On Arch venv is bundled with `python`." - )) - })?; + )) + })?; let pip = venv_dir.join("bin").join("pip"); info!("installing {ANSIBLE_CORE_SPEC} into ansible venv (first-run only)"); - run(Command::new(&pip) - .arg("install") - .arg("--quiet") - .arg("--disable-pip-version-check") - .arg(ANSIBLE_CORE_SPEC)) + run(Command::new(&pip).args([ + "install", + "--quiet", + "--disable-pip-version-check", + ANSIBLE_CORE_SPEC, + ])) .await .map_err(|e| exec(format!("pip install {ANSIBLE_CORE_SPEC} failed: {e}")))?; @@ -98,8 +103,7 @@ async fn find_python3() -> Result { // rather than a Rust crate to keep our dependency surface thin. for candidate in ["python3", "python"] { let status = Command::new("sh") - .arg("-c") - .arg(format!("command -v {candidate}")) + .args(["-c", &format!("command -v {candidate}")]) .stdout(Stdio::piped()) .stderr(Stdio::null()) .output() diff --git a/harmony/src/modules/linux/mod.rs b/harmony/src/modules/linux/mod.rs index b96b4a13..d0b69a7c 100644 --- a/harmony/src/modules/linux/mod.rs +++ b/harmony/src/modules/linux/mod.rs @@ -1,7 +1,9 @@ mod ansible_configurator; mod ansible_venv; +mod ssh_executor; mod topology; pub use ansible_configurator::AnsibleHostConfigurator; pub use ansible_venv::{AnsibleBinaries, ensure_ansible_venv}; +pub use ssh_executor::{SshCommandOutput, ssh_exec}; pub use topology::{LinuxHostTopology, SshCredentials}; diff --git a/harmony/src/modules/linux/ssh_executor.rs b/harmony/src/modules/linux/ssh_executor.rs new file mode 100644 index 00000000..c20a6243 --- /dev/null +++ b/harmony/src/modules/linux/ssh_executor.rs @@ -0,0 +1,135 @@ +//! Direct SSH command execution via russh. +//! +//! Used for one-shot shell-outs that don't benefit from Ansible's +//! idempotency story: running `loginctl enable-linger`, invoking +//! `systemctl --user` with a specific `XDG_RUNTIME_DIR` in the +//! process env, etc. Ansible's `command` module would be a +//! Python-wrapped SSH round trip for zero added value — whereas +//! russh is already a workspace dependency and gives us the exit +//! code, stdout, and stderr in a typed struct. + +use std::sync::Arc; + +use async_trait::async_trait; +use harmony_types::net::IpAddress; +use russh::ChannelMsg; +use russh::client::{self, Handler}; +use russh::keys::{key, load_secret_key}; + +use crate::executors::ExecutorError; + +use super::topology::SshCredentials; + +/// Typed result of a single remote command execution. +#[derive(Debug, Clone)] +pub struct SshCommandOutput { + pub rc: i32, + pub stdout: String, + pub stderr: String, +} + +impl SshCommandOutput { + /// Returns `Ok(self)` when rc == 0, else an `ExecutorError` + /// carrying the non-zero exit and stderr. Convenience for + /// callers that want to treat rc!=0 as a hard failure. + pub fn into_successful(self) -> Result { + if self.rc == 0 { + Ok(self) + } else { + Err(ExecutorError::UnexpectedError(format!( + "ssh command exited with rc={}: {}", + self.rc, + self.stderr.trim() + ))) + } + } +} + +/// Run `command_line` on the remote host over SSH using the +/// caller's credentials. `command_line` is passed verbatim to the +/// remote default shell (sshd exec channel) — callers that need +/// strict argv semantics should shell-quote their arguments +/// themselves. +pub async fn ssh_exec( + host: IpAddress, + creds: &SshCredentials, + command_line: &str, +) -> Result { + let key_pair = load_secret_key(&creds.private_key_path, None).map_err(|e| { + ExecutorError::AuthenticationError(format!( + "load ssh key {:?}: {e}", + creds.private_key_path + )) + })?; + + let config = Arc::new(client::Config { + inactivity_timeout: Some(std::time::Duration::from_secs(60)), + ..client::Config::default() + }); + let mut handle = client::connect(config, (host, 22), TrustAllHandler) + .await + .map_err(|e| ExecutorError::NetworkError(format!("ssh connect {host}: {e}")))?; + + let auth_ok = handle + .authenticate_publickey(&creds.user, Arc::new(key_pair)) + .await + .map_err(|e| ExecutorError::AuthenticationError(format!("ssh auth: {e}")))?; + if !auth_ok { + return Err(ExecutorError::AuthenticationError(format!( + "ssh pubkey auth rejected for {}@{host}", + creds.user + ))); + } + + let mut channel = handle + .channel_open_session() + .await + .map_err(|e| ExecutorError::NetworkError(format!("ssh channel: {e}")))?; + channel + .exec(true, command_line) + .await + .map_err(|e| ExecutorError::NetworkError(format!("ssh exec: {e}")))?; + + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + let mut rc: Option = None; + // Drain every message the channel produces. Some sshd + // implementations emit `ExitStatus` *after* `Eof`, so + // breaking on `Eof` loses the rc. `wait()` returns `None` + // when the channel is actually done. + while let Some(msg) = channel.wait().await { + match msg { + ChannelMsg::Data { data } => stdout.extend_from_slice(&data), + // ssh channel extension codes: 1 = stderr (RFC 4254 §5.2). + // Other ext values (none currently defined) are ignored. + ChannelMsg::ExtendedData { data, ext: 1 } => { + stderr.extend_from_slice(&data); + } + ChannelMsg::ExitStatus { exit_status } => rc = Some(exit_status as i32), + _ => {} + } + } + + Ok(SshCommandOutput { + rc: rc.unwrap_or(-1), + stdout: String::from_utf8_lossy(&stdout).into_owned(), + stderr: String::from_utf8_lossy(&stderr).into_owned(), + }) +} + +/// SSH client handler that accepts any host key. Fine for VMs we +/// just provisioned (their host key is ephemeral per-boot anyway); +/// anything touching real long-lived infrastructure should pin. +struct TrustAllHandler; + +#[async_trait] +impl Handler for TrustAllHandler { + type Error = russh::Error; + + async fn check_server_key( + &mut self, + _server_public_key: &key::PublicKey, + ) -> Result { + Ok(true) + } +} diff --git a/harmony/src/modules/linux/topology.rs b/harmony/src/modules/linux/topology.rs index 11201cd0..94004da5 100644 --- a/harmony/src/modules/linux/topology.rs +++ b/harmony/src/modules/linux/topology.rs @@ -5,24 +5,27 @@ use harmony_types::net::IpAddress; use serde::{Deserialize, Serialize}; use crate::domain::topology::{ - ChangeReport, FileSpec, HostConfigurationProvider, PreparationError, PreparationOutcome, - SystemdUnitSpec, Topology, UserSpec, + ChangeReport, FileDelivery, FileSpec, HostReachable, PackageInstaller, PreparationError, + PreparationOutcome, SystemdManager, SystemdScope, SystemdUnitSpec, Topology, UnixUserManager, + UserSpec, }; use crate::executors::ExecutorError; use super::ansible_configurator::AnsibleHostConfigurator; -/// A single Linux host reachable over SSH, with an Ansible-backed -/// [`HostConfigurationProvider`] implementation. +/// A single Linux host reachable over SSH, implementing every +/// capability in `LinuxHostConfiguration` via an Ansible-over-SSH +/// backend. /// -/// This is the topology Harmony Scores target when they need to configure -/// a freshly-booted Linux machine (in our case, the VM or Pi that will run -/// the IoT agent). It is *not* the topology a long-running daemon on that -/// same machine would use — for that, see [`crate::modules::podman:: -/// PodmanTopology`] on the container-runtime side. A single host typically -/// has both: it's first configured via `LinuxHostTopology` (podman -/// installed, agent placed) and then *serves* `PodmanTopology` to its -/// in-process agent. +/// This is the topology Harmony Scores target when they need to +/// configure a freshly-booted Linux machine (in our case, the VM or +/// Pi that will run the IoT agent). It is *not* the topology a +/// long-running daemon on that same machine would use — for that, +/// see [`crate::modules::podman::PodmanTopology`] on the container- +/// runtime side. A single host typically has both: it's first +/// configured via `LinuxHostTopology` (podman installed, agent +/// placed) and then *serves* `PodmanTopology` to its in-process +/// agent. pub struct LinuxHostTopology { name: String, host: IpAddress, @@ -79,29 +82,47 @@ impl Topology for LinuxHostTopology { } #[async_trait] -impl HostConfigurationProvider for LinuxHostTopology { +impl HostReachable for LinuxHostTopology { async fn ping(&self) -> Result<(), ExecutorError> { self.configurator.ping(self.host, &self.credentials).await } +} +#[async_trait] +impl PackageInstaller for LinuxHostTopology { async fn ensure_package(&self, name: &str) -> Result { self.configurator .ensure_package(self.host, &self.credentials, name) .await } +} +#[async_trait] +impl FileDelivery for LinuxHostTopology { + async fn ensure_file(&self, spec: &FileSpec) -> Result { + self.configurator + .ensure_file(self.host, &self.credentials, spec) + .await + } +} + +#[async_trait] +impl UnixUserManager for LinuxHostTopology { async fn ensure_user(&self, spec: &UserSpec) -> Result { self.configurator .ensure_user(self.host, &self.credentials, spec) .await } - async fn ensure_file(&self, spec: &FileSpec) -> Result { + async fn ensure_linger(&self, user: &str) -> Result { self.configurator - .ensure_file(self.host, &self.credentials, spec) + .ensure_linger(self.host, &self.credentials, user) .await } +} +#[async_trait] +impl SystemdManager for LinuxHostTopology { async fn ensure_systemd_unit( &self, spec: &SystemdUnitSpec, @@ -114,19 +135,13 @@ impl HostConfigurationProvider for LinuxHostTopology { async fn restart_service( &self, name: &str, - scope: crate::domain::topology::SystemdScope, + scope: SystemdScope, ) -> Result { self.configurator .restart_service(self.host, &self.credentials, name, scope) .await } - async fn ensure_linger(&self, user: &str) -> Result { - self.configurator - .ensure_linger(self.host, &self.credentials, user) - .await - } - async fn ensure_user_unit_active( &self, user: &str, diff --git a/iot/scripts/smoke-a3-arm.sh b/iot/scripts/smoke-a3-arm.sh new file mode 100755 index 00000000..49812d5a --- /dev/null +++ b/iot/scripts/smoke-a3-arm.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Convenience wrapper: run the end-to-end smoke test against an +# aarch64 guest (qemu-system-aarch64 TCG when the host is x86_64, +# native KVM when the host is already arm64). +# +# This is exactly equivalent to: +# ARCH=aarch64 VM_NAME=iot-smoke-vm-arm ./smoke-a3.sh +# with the VM name defaulted so it can live alongside an x86-64 +# smoke run on the same host without clobbering libvirt state. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +export ARCH=aarch64 +export VM_NAME="${VM_NAME:-iot-smoke-vm-arm}" +export DEVICE_ID="${DEVICE_ID:-$VM_NAME}" +export NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats-a3-arm}" +export NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net-a3-arm}" + +exec "$SCRIPT_DIR/smoke-a3.sh" "$@" diff --git a/iot/scripts/smoke-a3.sh b/iot/scripts/smoke-a3.sh index 18184770..8bb8d5a5 100755 --- a/iot/scripts/smoke-a3.sh +++ b/iot/scripts/smoke-a3.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # End-to-end smoke test for the VM-as-device flow. # -# [libvirt ${LIBVIRT_URI:-qemu:///system}] ──KvmVmScore──▶ VM (Ubuntu 24.04, cloud-init'd) +# [libvirt qemu:///system] ──KvmVmScore──▶ VM (Ubuntu 24.04, cloud-init'd) # │ # ssh+Ansible ◀────┘ # │ @@ -9,43 +9,40 @@ # IotDeviceSetupScore ──▶ podman + iot-agent on VM # │ # ▼ -# existing A1 operator ──NATS─────┘ (agent joins fleet, reconciles CR) +# existing operator ──NATS────────┘ (agent joins fleet, reconciles CR) +# │ +# ▼ [phase 5] +# virsh reboot → agent reconnects # -# Prerequisites on the runner host: -# - Everything smoke-a1.sh needs (podman, kubectl, k3d) -# - libvirt + qemu-system-x86_64, with the default NAT network present -# and running (`virsh net-start default`) -# - xorriso (for cloud-init seed ISO) -# - python3 (Harmony auto-installs ansible-core into a managed venv -# under $HARMONY_DATA_DIR/ansible-venv/ on first use) -# - An Ubuntu 24.04 cloud image on disk (see $BASE_IMAGE below) -# - An SSH keypair we can authorize on the VM (see $SSH_PUBKEY, -# $SSH_PRIVKEY below) +# Prerequisites on the runner host — all one-time, all generic: +# 1. libvirt + qemu + xorriso + python3 + podman + cargo + kubectl +# (Arch: pacman -S libvirt qemu-full libisoburn python podman +# Debian/Ubuntu: apt install libvirt-daemon-system qemu-kvm +# xorriso python3 python3-venv podman) +# 2. Be in the `libvirt` group (`sudo usermod -aG libvirt $USER`) +# 3. `sudo virsh net-start default && sudo virsh net-autostart default` # -# The test is NOT fully self-bootstrapping: downloading a ~700 MB cloud -# image and generating SSH keys inside the smoke script would make a -# single run cost tens of minutes. Instead, on first use the script -# tells you what's missing and points at the exact command to run. +# Harmony handles *everything else*: cloud image download, SSH key +# generation, libvirt pool creation, ansible install, agent build. +# First run costs ~2 min to populate caches; subsequent runs hit the +# cache in <1 s. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" -# ---------------------------- configuration ---------------------------- VM_NAME="${VM_NAME:-iot-smoke-vm}" DEVICE_ID="${DEVICE_ID:-$VM_NAME}" GROUP="${GROUP:-group-a}" -VM_WORK_DIR="${VM_WORK_DIR:-/var/tmp/harmony-iot-smoke}" -BASE_IMAGE="${BASE_IMAGE:-$VM_WORK_DIR/ubuntu-24.04-server-cloudimg-amd64.img}" -BASE_IMAGE_URL="${BASE_IMAGE_URL:-https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img}" -SSH_KEY_DIR="${SSH_KEY_DIR:-$VM_WORK_DIR/ssh}" -SSH_PRIVKEY="${SSH_PRIVKEY:-$SSH_KEY_DIR/id_ed25519}" -SSH_PUBKEY="${SSH_PUBKEY:-$SSH_KEY_DIR/id_ed25519.pub}" -LIBVIRT_NETWORK="${LIBVIRT_NETWORK:-default}" - LIBVIRT_URI="${LIBVIRT_URI:-qemu:///system}" +# Guest architecture. `x86-64` runs native KVM; `aarch64` runs under +# qemu-system-aarch64 TCG on x86 hosts (3-5× slower but exercises the +# real Pi target). Changes: cloud-image URL, qemu binary, agent build +# target, phase 4 timeout. +ARCH="${ARCH:-x86-64}" + NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats-a3}" NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net-a3}" NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}" @@ -54,46 +51,37 @@ NATS_PORT="${NATS_PORT:-4222}" log() { printf '\033[1;34m[smoke-a3]\033[0m %s\n' "$*"; } fail() { printf '\033[1;31m[smoke-a3 FAIL]\033[0m %s\n' "$*" >&2; exit 1; } +case "$ARCH" in + x86-64|x86_64) EXAMPLE_ARCH=x86-64; AGENT_TARGET= ;; + aarch64|arm64) EXAMPLE_ARCH=aarch64; AGENT_TARGET=aarch64-unknown-linux-gnu ;; + *) fail "unsupported ARCH=$ARCH (expected: x86-64 | aarch64)" ;; +esac + cleanup() { local rc=$? log "cleanup…" if [[ "${KEEP:-0}" != "1" ]]; then - virsh --connect ${LIBVIRT_URI:-qemu:///system} destroy "$VM_NAME" 2>/dev/null || true - virsh --connect ${LIBVIRT_URI:-qemu:///system} undefine --remove-all-storage "$VM_NAME" 2>/dev/null || true + virsh --connect "$LIBVIRT_URI" destroy "$VM_NAME" 2>/dev/null || true + # `--nvram` is required for aarch64 domains (which have a + # per-VM NVRAM file); harmless on x86_64 where no NVRAM is + # registered. Without it, `undefine` refuses and the next + # run sees a stale domain with whatever XML the previous + # run defined — masking XML changes until manually cleaned. + virsh --connect "$LIBVIRT_URI" undefine --nvram \ + --remove-all-storage "$VM_NAME" 2>/dev/null || true podman rm -f "$NATS_CONTAINER" >/dev/null 2>&1 || true podman network rm "$NATS_NET_NAME" >/dev/null 2>&1 || true else - log "KEEP=1 — leaving VM '$VM_NAME' and NATS container '$NATS_CONTAINER' running" + log "KEEP=1 — leaving VM '$VM_NAME' and NATS '$NATS_CONTAINER' running" fi exit $rc } trap cleanup EXIT INT TERM -# ---------------------------- preflight ---------------------------- require() { command -v "$1" >/dev/null 2>&1 || fail "missing required tool: $1"; } -require virsh require podman -require xorriso -require python3 require cargo - -[[ -f "$BASE_IMAGE" ]] || fail "Ubuntu 24.04 cloud image not found at $BASE_IMAGE. -Download it with: - mkdir -p $VM_WORK_DIR - curl -o $BASE_IMAGE $BASE_IMAGE_URL" - -if [[ ! -f "$SSH_PRIVKEY" || ! -f "$SSH_PUBKEY" ]]; then - fail "SSH keypair missing at $SSH_KEY_DIR. -Generate one with: - mkdir -p $SSH_KEY_DIR - ssh-keygen -t ed25519 -N '' -f $SSH_PRIVKEY" -fi - -virsh --connect ${LIBVIRT_URI:-qemu:///system} net-info "$LIBVIRT_NETWORK" >/dev/null 2>&1 \ - || fail "libvirt session network '$LIBVIRT_NETWORK' missing. \ -Run: virsh --connect ${LIBVIRT_URI:-qemu:///system} net-start $LIBVIRT_NETWORK" - -mkdir -p "$VM_WORK_DIR" +require virsh # ---------------------------- phase 1: NATS ---------------------------- log "phase 1: start NATS container on host" @@ -105,57 +93,109 @@ podman run -d \ -p "$NATS_PORT:4222" \ "$NATS_IMAGE" -js >/dev/null -# The VM will reach NATS via the libvirt NAT bridge gateway — typically -# 192.168.122.1. Inspect to be sure. -NAT_GW="$(virsh --connect ${LIBVIRT_URI:-qemu:///system} net-dumpxml "$LIBVIRT_NETWORK" \ +NAT_GW="$(virsh --connect "$LIBVIRT_URI" net-dumpxml default \ | grep -oP "ip address='\K[^']+" | head -1)" -[[ -n "$NAT_GW" ]] || fail "couldn't determine libvirt '$LIBVIRT_NETWORK' gateway IP" +[[ -n "$NAT_GW" ]] || fail "couldn't determine libvirt 'default' gateway IP" log "libvirt network gateway = $NAT_GW (VM will dial NATS at nats://$NAT_GW:$NATS_PORT)" -# ---------------------------- phase 2: build iot-agent ---------------------------- -log "phase 2: build iot-agent-v0 (release — debug binary is ~400MB and fills cloud rootfs)" +# ---------------------------- phase 2: build --------------------------- +log "phase 2: build iot-agent-v0 for guest arch=$ARCH (release — debug binary fills cloud rootfs)" ( cd "$REPO_ROOT" - cargo build -q --release -p iot-agent-v0 + if [[ -n "$AGENT_TARGET" ]]; then + rustup target add "$AGENT_TARGET" >/dev/null + cargo build -q --release --target "$AGENT_TARGET" -p iot-agent-v0 + else + cargo build -q --release -p iot-agent-v0 + fi ) -AGENT_BINARY="$REPO_ROOT/target/release/iot-agent-v0" +if [[ -n "$AGENT_TARGET" ]]; then + AGENT_BINARY="$REPO_ROOT/target/$AGENT_TARGET/release/iot-agent-v0" +else + AGENT_BINARY="$REPO_ROOT/target/release/iot-agent-v0" +fi [[ -f "$AGENT_BINARY" ]] || fail "agent binary missing after build: $AGENT_BINARY" -# ---------------------------- phase 3: provision VM + setup ---------------------------- -log "phase 3: provision VM via KvmVmScore, then onboard via IotDeviceSetupScore" +# ---------------------------- phase 3: bootstrap + provision + setup ---------------------------- +log "phase 3: bootstrap assets + provision VM + onboard device (arch=$EXAMPLE_ARCH)" ( cd "$REPO_ROOT" - cargo run -q -p example_iot_vm_setup -- \ + cargo run -q --release -p example_iot_vm_setup -- \ + --arch "$EXAMPLE_ARCH" \ --vm-name "$VM_NAME" \ --device-id "$DEVICE_ID" \ --group "$GROUP" \ - --network "$LIBVIRT_NETWORK" \ - --base-image "$BASE_IMAGE" \ - --ssh-pubkey "$SSH_PUBKEY" \ - --ssh-privkey "$SSH_PRIVKEY" \ - --work-dir "$VM_WORK_DIR" \ --agent-binary "$AGENT_BINARY" \ --nats-url "nats://$NAT_GW:$NATS_PORT" ) -# ---------------------------- phase 4: agent reaches NATS ---------------------------- -log "phase 4: verify agent connects to NATS from inside VM" -# The agent writes `status.` to the `agent-status` KV bucket -# every 30s. Check it appears. -for _ in $(seq 1 60); do - if podman run --rm --network "$NATS_NET_NAME" \ - docker.io/natsio/nats-box:latest \ - nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \ - "status.$DEVICE_ID" --raw >/dev/null 2>&1; then - log "agent has reported status" +# ---------------------------- phase 4: initial status ---------------------------- +# TCG emulation slows agent boot + first NATS publish significantly. +# 60s is fine for native KVM but too tight for aarch64-on-x86. +case "$ARCH" in + aarch64|arm64) STATUS_TIMEOUT=300 ;; + *) STATUS_TIMEOUT=60 ;; +esac +log "phase 4: wait for agent to report status to NATS (timeout=${STATUS_TIMEOUT}s)" +wait_for_status() { + local timeout=$1 + for _ in $(seq 1 "$timeout"); do + if podman run --rm --network "$NATS_NET_NAME" \ + docker.io/natsio/nats-box:latest \ + nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \ + "status.$DEVICE_ID" --raw >/dev/null 2>&1; then + return 0 + fi + sleep 1 + done + return 1 +} +wait_for_status "$STATUS_TIMEOUT" || fail "agent-status never appeared for $DEVICE_ID" +log "agent status present on NATS" + +# ---------------------------- phase 5: hard power-cycle, expect recovery ---------------------------- +log "phase 5: power-cycle VM (virsh destroy + start) → agent must reconnect to NATS" + +nats_status_timestamp() { + # Prints the "timestamp" field of the status. entry, or "". + # Never errors (for `set -e` safety). + podman run --rm --network "$NATS_NET_NAME" \ + docker.io/natsio/nats-box:latest \ + nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \ + "status.$DEVICE_ID" --raw 2>/dev/null \ + | grep -oE '"timestamp":"[^"]+"' \ + | head -1 | cut -d'"' -f4 || true +} + +virsh --connect "$LIBVIRT_URI" destroy "$VM_NAME" >/dev/null +# `virsh destroy` returns before the qemu process is fully torn down; +# wait a couple seconds to be sure the agent is dead and can't flush a +# final status update after our gate. +sleep 3 +REBOOT_GATE="$(date -u +%Y-%m-%dT%H:%M:%S+00:00)" +log "reboot gate = $REBOOT_GATE (any agent timestamp > this is post-reboot)" +virsh --connect "$LIBVIRT_URI" start "$VM_NAME" >/dev/null + +case "$ARCH" in + aarch64|arm64) REBOOT_STEPS=900 ;; # ~30 min under TCG + *) REBOOT_STEPS=120 ;; # ~4 min on native KVM +esac +log "waiting for agent to re-report status (post-reboot, up to $((REBOOT_STEPS*2))s)…" +TS_AFTER="" +for _ in $(seq 1 "$REBOOT_STEPS"); do + sleep 2 + ts="$(nats_status_timestamp)" + # ISO-8601 timestamps compare correctly lexicographically when the + # format is identical. Both the agent and `date -u -Iseconds` + # produce RFC 3339 UTC strings so string > works. + if [[ -n "$ts" && "$ts" > "$REBOOT_GATE" ]]; then + TS_AFTER="$ts" break fi - sleep 1 done -podman run --rm --network "$NATS_NET_NAME" \ - docker.io/natsio/nats-box:latest \ - nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \ - "status.$DEVICE_ID" --raw >/dev/null 2>&1 \ - || fail "agent-status KV entry never appeared for $DEVICE_ID" +if [[ -z "$TS_AFTER" ]]; then + fail "agent did not write a post-reboot status within ~$((REBOOT_STEPS*2))s (gate: $REBOOT_GATE)" +fi +log "post-reboot status seen at $TS_AFTER" -log "PASS — VM $VM_NAME is a fleet member reporting as $DEVICE_ID (group=$GROUP)" +log "PASS — VM $VM_NAME power-cycled and re-onboarded (group=$GROUP)"