Files
harmony/examples/fleet_device_enroll/src/main.rs
Jean-Gabriel Gill-Couture 50f62b6437 chore: warning sweep — auto-fix pass + scoped allows for generated code
Workspace warning count: 408 → 105.

Three buckets cleared:

* Auto-fixable (`cargo fix` + `cargo clippy --fix`): unused imports
  removed, unused variables prefixed with `_`, deprecated method
  calls updated. Applied across harmony, harmony-k8s, harmony-agent,
  harmony_inventory_agent, the fleet/ workspace, and ~15 examples.
* Generated code (opnsense-api/src/generated/): 269 snake_case
  warnings + ~10 unreachable-pattern warnings come from
  CamelCase-preserving bindings to OPNsense's HAProxy/Caddy XML
  schemas. Scoped a single `#[allow(non_snake_case,
  unreachable_patterns)]` at `pub mod generated;` rather than
  fighting the codegen — renaming would break serde round-trips
  and the codegen would regenerate them anyway.
* opnsense-codegen parser's defensive `let...else` guards on
  `XmlNode` (currently single-variant): file-level
  `#![allow(irrefutable_let_patterns)]` with a comment explaining
  why we keep the `else` arms (they re-arm if the IR grows a
  second variant).

`harmony_inventory_agent::local_presence::{DiscoveryEvent,
discover_agents}` re-exports were stripped twice by the auto-fix
passes (consumers live in another crate, so the local crate looks
"unused" to lint). Anchored with explicit `pub use` + an
`#[allow(unused_imports)]` annotation noting why.

All 151 harmony lib tests still pass. Remaining ~105 warnings are
mostly real dead code in non-fleet modules + a handful of
unused-imports/variables clippy couldn't auto-resolve; cleared in
the next pass.
2026-05-06 22:51:44 -04:00

640 lines
24 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Per-device enrollment driver — runs `FleetDeviceSetupScore` with
//! the new `FleetDeviceAuth::ZitadelEnroll` variant. Two workflows
//! land on the same code path:
//!
//! - **Dev-on-device**: developer runs this on a Pi they have a
//! keyboard / display attached to. They target their own Pi via
//! `--target ssh://<user>@127.0.0.1` (sshd is enabled in the
//! factory image so this works out of the box). The score opens
//! the local browser to Zitadel SSO, the dev signs in with their
//! personal account (must hold the admin role), the score mints
//! a per-device user + key, drops the keyfile + config in place,
//! and brings the agent up.
//!
//! - **Production-via-SSH**: operator runs this from a workstation,
//! targets each device over SSH (`--target ssh://pi@10.0.0.42`).
//! Browser opens once on the workstation; for v0 the resulting
//! token is held in memory only — re-running for the next device
//! re-prompts. Token caching is on the roadmap.
//!
//! `--vm-rehearsal` boots an aarch64 KVM VM and enrolls it through
//! the same path, so we can dry-run the whole flow without a Pi.
use std::path::PathBuf;
use anyhow::{Context, Result};
use clap::Parser;
use harmony::inventory::Inventory;
use harmony::modules::fleet::{
AdminAuth, FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore,
ensure_fleet_ssh_keypair,
};
use harmony::modules::linux::{LinuxHostTopology, LinuxLocalhostTopology, SshCredentials};
use harmony_types::id::Id;
// VM-rehearsal-only imports. Hidden behind a feature so `cargo build
// --no-default-features` (the device-side / aarch64 cross-compile)
// doesn't pull in libvirt — `libvirt-dev` doesn't link against arm64
// targets on most distros.
#[cfg(feature = "vm-rehearsal")]
use harmony::modules::fleet::{ProvisionVmScore, check_fleet_smoke_preflight_for_arch};
#[cfg(feature = "vm-rehearsal")]
use harmony::modules::kvm::KvmVirtualMachineHost;
#[cfg(feature = "vm-rehearsal")]
use harmony::modules::kvm::config::init_executor;
#[cfg(feature = "vm-rehearsal")]
use harmony::topology::{VirtualMachineSpec, VmArchitecture, VmFirstBootConfig};
#[derive(Parser, Debug)]
#[command(
name = "fleet_device_enroll",
about = "Enroll a device into the fleet by minting its Zitadel \
credentials inline (browser SSO or pre-acquired token)"
)]
struct Cli {
// ---- target ----------------------------------------------------------
/// Where to apply the score.
///
/// - **Omitted** → run on the same machine the binary is invoked
/// on (no SSH, no keypair). Ansible's `-c local` connection
/// does the work; sudo still goes through your normal
/// credentials.
/// - **`ssh://user@host`** → drive the score against a remote
/// device over SSH using the harmony fleet SSH key.
///
/// Ignored when `--vm-rehearsal` is set (the rehearsal targets
/// the freshly-booted VM).
#[arg(long)]
target: Option<String>,
/// Spin up a fresh aarch64 libvirt VM and enroll it. Pulls the
/// stock Ubuntu cloud image, attaches to the libvirt `default`
/// network, waits for SSH, then runs the setup score against it.
/// Requires the `vm-rehearsal` feature (enabled by default on
/// host builds, disabled on device-side aarch64 builds).
#[cfg(feature = "vm-rehearsal")]
#[arg(long)]
vm_rehearsal: bool,
/// Boot a Pi-equivalent aarch64 VM (Debian trixie generic-cloud
/// image — the same distribution base as Raspberry Pi OS, since
/// Pi OS itself is locked to Pi hardware and won't boot in
/// generic KVM) and **exit**. Prints the SSH connection details
/// so you can connect manually and run `fleet_device_enroll`
/// against the booted VM as a separate command. Useful for
/// dev-on-device rehearsal: launch once, then iterate with the
/// enrollment binary against the running VM. Requires the
/// `vm-rehearsal` feature.
#[cfg(feature = "vm-rehearsal")]
#[arg(long)]
launch_pi_vm: bool,
// ---- Zitadel + NATS endpoints ----------------------------------------
/// Zitadel issuer URL — what the agent will use as its OIDC
/// issuer and what the score talks to during enrollment.
/// Required for enrollment; ignored with `--launch-pi-vm`.
#[arg(long)]
issuer_url: Option<String>,
/// Zitadel project ID (the project's numeric id). Becomes the
/// agent's `audience` for JWT-bearer mint requests, and tags the
/// machine user so the auth callout's `aud` check passes.
#[arg(long)]
audience: Option<String>,
/// Project name (human-readable) the device's machine user
/// belongs to. Must already exist — created by the staging
/// install's `ZitadelSetupScore`.
#[arg(long, default_value = "fleet")]
project_name: String,
/// NATS URL the agent should connect to.
#[arg(long)]
nats_url: Option<String>,
// ---- device identity -------------------------------------------------
/// Device id baked into the agent's TOML, the Zitadel machine
/// username (`device-<device_id>`), and the Kubernetes Device CR
/// name on the operator side. **Required.**
///
/// Must be a valid RFC1123 DNS label / subdomain since the
/// operator builds Kubernetes resource names from it. The
/// validator in this binary rejects anything else upfront so
/// enrollment can't produce a Zitadel machine user that the
/// operator will later choke on with `metadata.name: Invalid value`.
///
/// Allowed: lowercase alphanumerics + `-`, must start and end with
/// an alphanumeric, max 63 chars per segment. Segments separated
/// by `.` are accepted (full RFC1123 subdomain) but `-` is the
/// usual choice.
///
/// Examples that pass: `pi-001`, `lab-rehearsal-3`, `dev-jg-vm`.
/// Examples that fail: `pi_001` (underscore), `Pi001` (uppercase),
/// `-pi001` (leading dash), `pi001-` (trailing dash).
#[arg(long)]
device_id: String,
/// Zitadel machine username for this device. Defaults to
/// `device-<device_id>` so re-running with the same device_id
/// reuses the same Zitadel user.
#[arg(long)]
device_username: Option<String>,
/// Project-scoped Zitadel role to grant the device's user.
/// Defaults to `device` — the role the auth callout maps to
/// per-device-scoped pub/sub permissions.
#[arg(long, default_value = "device")]
device_role: String,
/// Routing labels (`key=value,key=value`) the agent publishes in
/// every DeviceInfo heartbeat.
#[arg(long, default_value = "group=group-a")]
labels: String,
// ---- admin auth ------------------------------------------------------
/// Pre-acquired Bearer token (PAT or out-of-band access token).
/// When set, skips the browser device-code flow.
#[arg(long, env = "HARMONY_ZITADEL_ADMIN_TOKEN")]
admin_token: Option<String>,
/// Zitadel OIDC `client_id` for the device-code app — the
/// **numeric id** Zitadel assigns when the app is created (e.g.
/// `371639797157987125@fleet`), NOT the human-readable app name
/// (`harmony-cli`). The staging install prints this value in its
/// final summary; copy it from there. Required when using SSO
/// (omit only when `--admin-token` is set).
#[arg(long)]
admin_oidc_client_id: Option<String>,
/// Forward to the agent's HTTP client AND to our admin-side calls
/// to Zitadel. Set when talking to a dev cluster with a
/// self-signed cert.
#[arg(long)]
danger_accept_invalid_certs: bool,
/// Override the Zitadel **org context** (`x-zitadel-orgid` header)
/// for management API calls. Set when the SSO operator's primary
/// org differs from where the project + device users live —
/// typical for human SSO accounts on a Zitadel where the project
/// was provisioned by the system iam-admin (their org defaults
/// don't match). Symptom: `Project '<name>' not found in
/// Zitadel` even though the project clearly exists. Find the
/// right value in Zitadel's admin UI → Organization → Resource
/// ID, or via `/admin/v1/orgs/_search`.
#[arg(long)]
admin_org_id: Option<String>,
// ---- agent binary ----------------------------------------------------
/// Path to the cross-compiled fleet-agent binary that gets
/// uploaded to the device and installed at /usr/local/bin/fleet-agent.
/// Optional when `--launch-pi-vm` is set (no enrollment runs).
#[arg(long)]
agent_binary: Option<PathBuf>,
// ---- VM rehearsal knobs (only relevant with --vm-rehearsal) ----------
/// libvirt domain name for the rehearsal VM.
#[cfg(feature = "vm-rehearsal")]
#[arg(long, default_value = "fleet-enroll-rehearsal")]
vm_name: String,
#[cfg(feature = "vm-rehearsal")]
#[arg(long, default_value = "default")]
vm_network: String,
#[cfg(feature = "vm-rehearsal")]
#[arg(long, default_value = "fleet-admin")]
vm_admin_user: String,
#[cfg(feature = "vm-rehearsal")]
#[arg(long, default_value_t = 16)]
vm_disk_size_gb: u32,
}
#[tokio::main]
async fn main() -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
.try_init()
.ok();
let cli = Cli::parse();
#[cfg(feature = "vm-rehearsal")]
if cli.launch_pi_vm {
let vm_ip = boot_pi_rehearsal_vm(&cli).await?;
println!();
println!("=== Pi-equivalent VM ready ===");
println!("VM: {} (debian-trixie arm64)", cli.vm_name);
println!("IP: {vm_ip}");
println!(
"SSH: ssh -i {} {}@{vm_ip}",
harmony::modules::fleet::ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?
.private_key
.display(),
cli.vm_admin_user
);
println!();
println!("To enroll this VM, run from your workstation:");
println!(
" fleet_device_enroll \\\n \
--target ssh://{}@{vm_ip} \\\n \
--device-id <ID> # required, RFC1123 (e.g. pi-001) \\\n \
--issuer-url <ISSUER> \\\n \
--audience <PROJECT_ID> \\\n \
--nats-url <NATS_URL> \\\n \
--admin-oidc-client-id <CLIENT_ID> \\\n \
--agent-binary <AGENT_BIN>",
cli.vm_admin_user
);
return Ok(());
}
validate_device_id(&cli.device_id)?;
let device_id = Id::from(cli.device_id.clone());
let device_username = cli
.device_username
.clone()
.unwrap_or_else(|| format!("device-{device_id}"));
let labels = parse_labels(&cli.labels)?;
let issuer_url = cli
.issuer_url
.clone()
.context("--issuer-url is required for enrollment (omit only with --launch-pi-vm)")?;
let audience = cli
.audience
.clone()
.context("--audience is required for enrollment")?;
let nats_url = cli
.nats_url
.clone()
.context("--nats-url is required for enrollment")?;
let agent_binary = cli
.agent_binary
.clone()
.context("--agent-binary is required for enrollment")?;
let auth = FleetDeviceAuth::ZitadelEnroll {
oidc_issuer_url: issuer_url,
audience,
project_name: cli.project_name.clone(),
device_username: device_username.clone(),
device_display_name: format!("Fleet Device {device_id}"),
device_role_keys: vec![cli.device_role.clone()],
admin: match &cli.admin_token {
Some(t) => AdminAuth::Token(t.clone()),
None => AdminAuth::Sso {
client_id: cli.admin_oidc_client_id.clone().context(
"--admin-oidc-client-id is required for SSO login. \
This is the **numeric** Zitadel client_id (e.g. \
`371639797157987125@fleet`), not the app name. \
The staging install prints it in its final summary. \
Alternatively, pass --admin-token <PAT> to skip SSO.",
)?,
},
},
admin_org_id: cli.admin_org_id.clone(),
danger_accept_invalid_certs: cli.danger_accept_invalid_certs,
};
let setup_config = FleetDeviceSetupConfig {
device_id: device_id.clone(),
labels,
nats_urls: vec![nats_url],
auth,
agent_binary_path: agent_binary,
hosts_entries: vec![],
};
let setup_score = FleetDeviceSetupScore::new(setup_config);
#[cfg(feature = "vm-rehearsal")]
if cli.vm_rehearsal {
let vm_ip = boot_rehearsal_vm(&cli).await?;
let ssh = ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
let topology = LinuxHostTopology::new(
format!("rehearsal-{}", cli.vm_name),
vm_ip
.parse()
.context("rehearsal VM did not yield a valid IP")?,
SshCredentials {
user: cli.vm_admin_user.clone(),
private_key_path: ssh.private_key.clone(),
remote_python: Some("/usr/bin/python3".to_string()),
sudo_password: None,
},
);
run_setup(&setup_score, &topology).await?;
println!(
"✅ rehearsal device '{device_id}' enrolled via VM {} ({vm_ip})",
cli.vm_name
);
return Ok(());
}
match cli.target.as_deref() {
// No `--target` → run on the same machine. ansible's `-c
// local` connection skips SSH entirely; sudo still works the
// usual way (operator types the password if not configured
// passwordless).
None => {
let topology = LinuxLocalhostTopology::new("localhost");
run_setup(&setup_score, &topology).await?;
}
Some(target) => {
let (user, host) = parse_ssh_target(target)?;
let ssh = ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
let topology = LinuxHostTopology::new(
format!("ssh-{host}"),
host.parse().context("--target host is not a valid IP")?,
SshCredentials {
user,
private_key_path: ssh.private_key.clone(),
remote_python: Some("/usr/bin/python3".to_string()),
sudo_password: None,
},
);
run_setup(&setup_score, &topology).await?;
}
}
println!("✅ device '{device_id}' enrolled");
Ok(())
}
#[cfg(feature = "vm-rehearsal")]
async fn boot_rehearsal_vm(cli: &Cli) -> Result<String> {
boot_vm(cli, RehearsalImage::Ubuntu).await
}
#[cfg(feature = "vm-rehearsal")]
async fn boot_pi_rehearsal_vm(cli: &Cli) -> Result<String> {
boot_vm(cli, RehearsalImage::DebianTrixie).await
}
#[cfg(feature = "vm-rehearsal")]
#[derive(Debug, Clone, Copy)]
enum RehearsalImage {
Ubuntu,
DebianTrixie,
}
#[cfg(feature = "vm-rehearsal")]
async fn boot_vm(cli: &Cli, image: RehearsalImage) -> Result<String> {
let arch = VmArchitecture::Aarch64;
check_fleet_smoke_preflight_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("preflight: {e}"))?;
let base_image = match image {
RehearsalImage::Ubuntu => {
harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("cloud image: {e}"))?
}
RehearsalImage::DebianTrixie => {
harmony::modules::fleet::ensure_debian_trixie_arm64_cloud_image()
.await
.map_err(|e| anyhow::anyhow!("debian cloud image: {e}"))?
}
};
let pool = harmony::modules::fleet::ensure_harmony_fleet_pool()
.await
.map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
let ssh = ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
let authorized_key = harmony::modules::fleet::read_public_key(&ssh)
.await
.map_err(|e| anyhow::anyhow!("read ssh pubkey: {e}"))?;
let executor = init_executor().map_err(|e| anyhow::anyhow!("KVM init: {e}"))?;
let vm_host = KvmVirtualMachineHost::new(
"kvm-local",
executor,
pool.name.clone(),
pool.path.clone(),
base_image,
);
let vm_score = ProvisionVmScore {
spec: VirtualMachineSpec {
name: cli.vm_name.clone(),
architecture: arch,
cpus: 2,
memory_mib: 2048,
disk_size_gb: Some(cli.vm_disk_size_gb),
network: cli.vm_network.clone(),
first_boot: Some(VmFirstBootConfig {
hostname: Some(cli.vm_name.clone()),
admin_user: Some(cli.vm_admin_user.clone()),
authorized_keys: vec![authorized_key],
admin_password: None,
}),
},
};
use harmony::score::Score;
let outcome = Score::<KvmVirtualMachineHost>::create_interpret(&vm_score)
.execute(&Inventory::empty(), &vm_host)
.await
.map_err(|e| anyhow::anyhow!("ProvisionVmScore: {e}"))?;
for d in &outcome.details {
if let Some(v) = d.strip_prefix("ip=") {
return Ok(v.to_string());
}
}
anyhow::bail!("ProvisionVmScore finished without an IP")
}
async fn run_setup<T>(score: &FleetDeviceSetupScore, topology: &T) -> Result<()>
where
T: harmony::topology::Topology + harmony::topology::LinuxHostConfiguration,
{
use harmony::score::Score;
let outcome = Score::<T>::create_interpret(score)
.execute(&Inventory::empty(), topology)
.await
.map_err(|e| anyhow::anyhow!("FleetDeviceSetupScore: {e}"))?;
println!("setup outcome: {} ({:?})", outcome.message, outcome.details);
Ok(())
}
/// Validate `device_id` against RFC1123 subdomain rules so the
/// operator's downstream Device CR upsert can't fail with
/// `metadata.name: Invalid value`. See
/// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names
///
/// Constraints applied here:
/// - non-empty, ≤253 chars total
/// - one or more dot-separated **labels**
/// - each label: 1-63 chars, lowercase alphanumeric + `-`, must start
/// AND end with an alphanumeric (no leading/trailing `-`)
///
/// We're stricter than just "kube name valid" because the same
/// device_id is also embedded in NATS subjects via the auth
/// callout's permission templates — and `_`/uppercase there silently
/// passes NATS but breaks the kube path. Rejecting upfront beats
/// debugging from three layers down.
fn validate_device_id(id: &str) -> Result<()> {
if id.is_empty() {
anyhow::bail!("device id is empty");
}
if id.len() > 253 {
anyhow::bail!(
"device id '{id}' is {len} chars, max 253 (RFC1123 subdomain limit)",
len = id.len()
);
}
for label in id.split('.') {
validate_dns_label(label).with_context(|| format!("device id '{id}'"))?;
}
Ok(())
}
fn validate_dns_label(label: &str) -> Result<()> {
if label.is_empty() {
anyhow::bail!("empty label (consecutive dots or leading/trailing dot)");
}
if label.len() > 63 {
anyhow::bail!(
"label '{label}' is {len} chars, max 63 per RFC1123 label",
len = label.len()
);
}
let bytes = label.as_bytes();
if !bytes[0].is_ascii_alphanumeric() {
anyhow::bail!(
"label '{label}' must start with an alphanumeric (got `{}`)",
label.chars().next().unwrap()
);
}
if !bytes[bytes.len() - 1].is_ascii_alphanumeric() {
anyhow::bail!(
"label '{label}' must end with an alphanumeric (got `{}`)",
label.chars().last().unwrap()
);
}
for (i, c) in label.chars().enumerate() {
let ok = c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-';
if !ok {
anyhow::bail!(
"label '{label}' has invalid char `{c}` at position {i}; \
only lowercase a-z, 0-9, and `-` are allowed (no `_`, no uppercase)"
);
}
}
Ok(())
}
fn parse_ssh_target(target: &str) -> Result<(String, String)> {
let rest = target
.strip_prefix("ssh://")
.context("--target must start with `ssh://` or be `localhost`")?;
let (user, host) = rest
.split_once('@')
.context("--target must be `ssh://user@host`")?;
if user.is_empty() || host.is_empty() {
anyhow::bail!("--target ssh:// has empty user or host");
}
Ok((user.to_string(), host.to_string()))
}
fn parse_labels(raw: &str) -> Result<std::collections::BTreeMap<String, String>> {
let mut out = std::collections::BTreeMap::new();
for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) {
let (k, v) = piece
.split_once('=')
.ok_or_else(|| anyhow::anyhow!("label '{piece}' missing '='"))?;
let k = k.trim();
let v = v.trim();
if k.is_empty() || v.is_empty() {
anyhow::bail!("label '{piece}' has empty key or value");
}
out.insert(k.to_string(), v.to_string());
}
if out.is_empty() {
anyhow::bail!("--labels must include at least one key=value pair");
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::validate_device_id;
#[test]
fn accepts_simple_labels() {
for ok in [
"pi",
"pi-001",
"lab-rehearsal-3",
"dev-jg-vm",
"a",
"0",
"fb5310-qm2kpoq",
// multi-label subdomain
"pi-001.lab-east.fleet",
] {
assert!(
validate_device_id(ok).is_ok(),
"expected '{ok}' to be accepted: {:?}",
validate_device_id(ok)
);
}
}
fn err_chain(e: anyhow::Error) -> String {
// anyhow's `.to_string()` only renders the top-level context;
// the validator emits the *cause* message (`invalid char …`,
// `max 63`, etc.) further down the chain. `{:#}` renders the
// full chain joined by `: ` which is what we want to match.
format!("{e:#}")
}
#[test]
fn rejects_underscore() {
// The original `Id::default()` shape that triggered this fix.
let err = err_chain(validate_device_id("fb5310_Qm2kPoQ").unwrap_err());
assert!(err.contains("invalid char `_`"), "got: {err}");
}
#[test]
fn rejects_uppercase() {
let err = err_chain(validate_device_id("Pi001").unwrap_err());
assert!(err.contains("invalid char"), "got: {err}");
}
#[test]
fn rejects_leading_or_trailing_dash() {
assert!(validate_device_id("-pi001").is_err());
assert!(validate_device_id("pi001-").is_err());
}
#[test]
fn rejects_empty() {
assert!(validate_device_id("").is_err());
}
#[test]
fn rejects_consecutive_dots() {
assert!(validate_device_id("a..b").is_err());
}
#[test]
fn rejects_too_long_label() {
let long = "a".repeat(64);
let err = err_chain(validate_device_id(&long).unwrap_err());
assert!(err.contains("max 63"), "got: {err}");
}
#[test]
fn rejects_too_long_total() {
// 4 × (63 + 1) - 1 = 255 chars total; rejects on >253.
let segment = "a".repeat(63);
let id = [segment.as_str(); 4].join(".");
assert!(id.len() > 253);
let err = err_chain(validate_device_id(&id).unwrap_err());
assert!(err.contains("max 253"), "got: {err}");
}
}