2026-05-22 22:16:18 +00:00
402 changed files with 46428 additions and 1915 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -6,3 +6,6 @@ rustflags = ["-C", "link-arg=-Wl,--stack,8000000"]

 [target.aarch64-unknown-linux-gnu]
 linker = "aarch64-linux-gnu-gcc"
+
+[profile.test]
+debug = 0
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,6 +1,64 @@
+# Build context filter for `podman build`. The bare invocations in
+# fleet/scripts/build_and_push_images.sh use the workspace root as
+# context (cargo workspace path-deps require it). Without this list,
+# the context tar would carry tens of GB of build artifacts, agent
+# worktrees, and demo blobs to the build daemon for every image.
+#
+# Pattern semantics (Docker/Podman): no leading slash → matches at
+# any depth. `**/foo` is the explicit recursive form (some older
+# implementations require it).
+
+# ---- Cargo build outputs (the bulk: ~100 GB combined) ---------------
 target/
+**/target/
+
+# ---- VCS + tooling caches (4-40 GB) --------------------------------
+.git/
+.gitignore
+.gitattributes
+.claude/
+.idea/
+.vscode/
+.cargo/
+
+# ---- Local-only debug / demo artifacts -----------------------------
+data/
+demos/
+manual_mint/
+
+# ---- Cluster + cloud-image blobs (.qcow2 etc. easily exceed 1 GB) ---
+*.qcow2
+*.iso
+*.img
+*.tar
+*.tar.gz
+*.tgz
+**/cloud-images/
+**/kvm/pool/
+
+# ---- Test outputs / databases --------------------------------------
+*.sqlite
+*.sqlite-journal
+*.log
+**/previous_runs/
+**/reports/
+
+# ---- Python venvs that may sneak in via the manual-mint helper -----
+venv/
+.venv/
+__pycache__/
+*.pyc
+
+# ---- JS that could land via a docs/site preview --------------------
+node_modules/
+
+# ---- Build context noise -------------------------------------------
 Dockerfile
-.git
-data
-target
-demos
+**/Dockerfile.dev
+docker-compose*.yml
+.dockerignore
+
+# ---- OS / editor ----------------------------------------------------
+.DS_Store
+*.swp
+*~
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,7 @@
+FLEET_AUTH_ISSUER_URL=
+FLEET_AUTH_AUTHORIZE_URL=
+FLEET_AUTH_TOKEN_URL=
+FLEET_AUTH_CLIENT_ID=
+FLEET_AUTH_REDIRECT_URI=
+FLEET_AUTH_SCOPE=
+FLEET_AUTH_TRUSTED_AUDIENCES=
--- a/.gitea/workflows/harmony-fleet-operator.yaml
+++ b/.gitea/workflows/harmony-fleet-operator.yaml
@@ -0,0 +1,44 @@
+name: Build and push harmony-fleet-operator image
+on:
+  push:
+    branches:
+      - master
+  workflow_dispatch:
+
+jobs:
+  build_and_push:
+    container:
+      image: hub.nationtech.io/harmony/harmony_composer:latest
+    runs-on: dind
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Log in to hub.nationtech.io
+        uses: docker/login-action@v3
+        with:
+          registry: hub.nationtech.io
+          username: ${{ secrets.HUB_BOT_USER }}
+          password: ${{ secrets.HUB_BOT_PASSWORD }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      # Build context is the workspace root because the operator's
+      # Cargo.toml has `path = "../../harmony"` deps. The multi-stage
+      # Dockerfile runs `cargo build` itself inside a pinned rust
+      # image, so no host-side cargo step is needed.
+      #
+      # TODO: add buildx layer caching. Each run currently recompiles
+      # the whole `harmony` workspace from scratch in the builder
+      # stage. Add `cache-from: type=gha` + `cache-to: type=gha,mode=max`
+      # below once build time becomes the bottleneck. If layer cache
+      # alone isn't enough, consider splitting the Dockerfile with
+      # cargo-chef (no other crate in this repo does that yet).
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: fleet/harmony-fleet-operator/Dockerfile
+          push: true
+          tags: hub.nationtech.io/harmony/harmony-fleet-operator:latest
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,15 @@
 ### General ###
 private_repos/
+.env

 ### Harmony ###
 harmony.log
 data/okd/installation_files*

+# Compiled tailwind output for the operator's maud+htmx frontend.
+# Source is `fleet/harmony-fleet-operator/style/input.css`.
+fleet/harmony-fleet-operator/style/dist/
+
 ### Helm ###
 # Chart dependencies
 **/charts/*.tgz
@@ -32,3 +37,9 @@ ignore

 # Generated book
 book
+
+# Scratch and agent worktrees — never commit
+.claude/
+ui-idea.md
+ROADMAP/00-priority-matrix.md
+fleet/harmony-fleet-agent/agent-config.toml
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -0,0 +1,171 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Build & Test Commands
+
+```bash
+# Full CI check (check + fmt + clippy + test)
+./build/check.sh
+
+# Individual commands
+cargo check --all-targets --all-features --keep-going
+cargo fmt --check          # Check formatting
+cargo clippy               # Lint
+cargo test                 # Run all tests
+
+# Run a single test
+cargo test -p <crate_name> <test_name>
+
+# Run a specific example
+cargo run -p <example_crate_name>
+
+# Build the mdbook documentation
+mdbook build
+```
+
+## What Harmony Is
+
+Harmony is the orchestration framework powering NationTech's vision of **decentralized micro datacenters** — small computing clusters deployed in homes, offices, and community spaces instead of hyperscaler facilities. The goal: make computing cleaner, more resilient, locally beneficial, and resistant to centralized points of failure (including geopolitical threats).
+
+Harmony exists because existing IaC tools (Terraform, Ansible, Helm) are trapped in a **YAML mud pit**: static configuration files validated only at runtime, fragmented across tools, with errors surfacing at 3 AM instead of at compile time. Harmony replaces this entire class of tools with a single Rust codebase where **the compiler catches infrastructure misconfigurations before anything is deployed**.
+
+This is not a wrapper around existing tools. It is a paradigm shift: infrastructure-as-real-code with compile-time safety guarantees that no YAML/HCL/DSL-based tool can provide.
+
+## The Score-Topology-Interpret Pattern
+
+This is the core design pattern. Understand it before touching the codebase.
+
+**Score** — declarative desired state. A Rust struct generic over `T: Topology` that describes *what* you want (e.g., "a PostgreSQL cluster", "DNS records for these hosts"). Scores are serializable, cloneable, idempotent.
+
+**Topology** — infrastructure capabilities. Represents *where* things run and *what the environment can do*. Exposes capabilities as traits (`DnsServer`, `K8sclient`, `HelmCommand`, `LoadBalancer`, `Firewall`, etc.). Examples: `K8sAnywhereTopology` (local K3D or any K8s cluster), `HAClusterTopology` (bare-metal HA with redundant firewalls/switches).
+
+**Interpret** — execution glue. Translates a Score into concrete operations against a Topology's capabilities. Returns an `Outcome` (SUCCESS, NOOP, FAILURE, RUNNING, QUEUED, BLOCKED).
+
+**The key insight — compile-time safety through trait bounds:**
+```rust
+impl<T: Topology + DnsServer + DhcpServer> Score<T> for DnsScore { ... }
+```
+The compiler rejects any attempt to use `DnsScore` with a Topology that doesn't implement `DnsServer` and `DhcpServer`. Invalid infrastructure configurations become compilation errors, not runtime surprises.
+
+**Higher-order topologies** compose transparently:
+- `FailoverTopology<T>` — primary/replica orchestration
+- `DecentralizedTopology<T>` — multi-site coordination
+
+If `T: PostgreSQL`, then `FailoverTopology<T>: PostgreSQL` automatically via blanket impls. Zero boilerplate.
+
+## Architecture (Hexagonal)
+
+```
+harmony/src/
+├── domain/           # Core domain — the heart of the framework
+│   ├── score.rs      # Score trait (desired state)
+│   ├── topology/     # Topology trait + implementations
+│   ├── interpret/    # Interpret trait + InterpretName enum (25+ variants)
+│   ├── inventory/    # Physical infrastructure metadata (hosts, switches, mgmt interfaces)
+│   ├── executors/    # Executor trait definitions
+│   └── maestro/      # Orchestration engine (registers scores, manages topology state, executes)
+├── infra/            # Infrastructure adapters (driven ports)
+│   ├── opnsense/     # OPNsense firewall adapter
+│   ├── brocade.rs    # Brocade switch adapter
+│   ├── kube.rs       # Kubernetes executor
+│   └── sqlx.rs       # Database executor
+└── modules/          # Concrete deployment modules (23+)
+    ├── k8s/          # Kubernetes (namespaces, deployments, ingress)
+    ├── postgresql/   # CloudNativePG clusters + multi-site failover
+    ├── okd/          # OpenShift bare-metal from scratch
+    ├── helm/         # Helm chart inflation → vanilla K8s YAML
+    ├── opnsense/     # OPNsense (DHCP, DNS, etc.)
+    ├── monitoring/   # Prometheus, Alertmanager, Grafana
+    ├── kvm/          # KVM virtual machine management
+    ├── network/      # Network services (iPXE, TFTP, bonds)
+    └── ...
+```
+
+Domain types to know: `Inventory` (read-only physical infra context), `Maestro<T>` (orchestrator — calls `topology.ensure_ready()` then executes scores), `Outcome` / `InterpretError` (execution results).
+
+## Key Crates
+
+| Crate | Purpose |
+|---|---|
+| `harmony` | Core framework: domain, infra adapters, deployment modules |
+| `harmony_cli` | CLI + optional TUI (`--features tui`) |
+| `harmony_config` | Unified config+secret management (env → SQLite → OpenBao → interactive prompt) |
+| `harmony_secret` / `harmony_secret_derive` | Secret backends (LocalFile, OpenBao, Infisical) |
+| `harmony_execution` | Execution engine |
+| `harmony_agent` / `harmony_inventory_agent` | Persistent agent framework (NATS JetStream mesh), hardware discovery |
+| `harmony_assets` | Asset management (URLs, local cache, S3) |
+| `harmony_composer` | Infrastructure composition tool |
+| `harmony-k8s` | Kubernetes utilities |
+| `k3d` | Local K3D cluster management |
+| `brocade` | Brocade network switch integration |
+
+## OPNsense Crates
+
+The `opnsense-codegen` and `opnsense-api` crates exist because OPNsense's automation ecosystem is poor — no typed API client exists. These are support crates, not the core of Harmony.
+
+- `opnsense-codegen`: XML model files → IR → Rust structs with serde helpers for OPNsense wire format quirks (`opn_bool` for "0"/"1" strings, `opn_u16`/`opn_u32` for string-encoded numbers). Vendor sources are git submodules under `opnsense-codegen/vendor/`.
+- `opnsense-api`: Hand-written `OpnsenseClient` + generated model types in `src/generated/`.
+
+## Key Design Decisions (ADRs in docs/adr/)
+
+- **ADR-001**: Rust chosen for type system, refactoring safety, and performance
+- **ADR-002**: Hexagonal architecture — domain isolated from adapters
+- **ADR-003**: Infrastructure abstractions at domain level, not provider level (no vendor lock-in)
+- **ADR-005**: Custom Rust DSL over YAML/Score-spec — real language, Cargo deps, composable
+- **ADR-007**: K3D as default runtime (K8s-certified, lightweight, cross-platform)
+- **ADR-009**: Helm charts inflated to vanilla K8s YAML, then deployed via existing code paths
+- **ADR-015**: Higher-order topologies via blanket trait impls (zero-cost composition)
+- **ADR-016**: Agent-based architecture with NATS JetStream for real-time failover and distributed consensus
+- **ADR-020**: Unified config+secret management — Rust struct is the schema, resolution chain: env → store → prompt
+- **ADR-023**: Deploy architecture — Scores everywhere (incl. tests), per-component `*-deploy` crates, deploy blocks on smoke-test, topologies are compile-time
+
+## Capability and Score Design Rules
+
+**Capabilities are industry concepts, not tools.** A capability trait represents a standard infrastructure need (e.g., `DnsServer`, `LoadBalancer`, `Router`, `CertificateManagement`) that can be fulfilled by different products. OPNsense provides `DnsServer` today; CoreDNS or Route53 could provide it tomorrow. Scores must not break when the backend changes.
+
+**Exception:** When the developer fundamentally needs to know the implementation. `PostgreSQL` is a capability (not `Database`) because the developer writes PostgreSQL-specific SQL and replication configs. Swapping to MariaDB would break the application, not just the infrastructure.
+
+**Test:** If you could swap the underlying tool without rewriting any Score that uses the capability, the boundary is correct.
+
+**Don't name capabilities after tools.** `SecretVault` not `OpenbaoStore`. `IdentityProvider` not `ZitadelAuth`. Think: what is the core developer need that leads to using this tool?
+
+**Scores encapsulate operational complexity.** Move procedural knowledge (init sequences, retry logic, distribution-specific config) into Scores. A high-level example should be ~15 lines, not ~400 lines of imperative orchestration.
+
+**Scores must be idempotent.** Running twice = same result as once. Use create-or-update, handle "already exists" gracefully.
+
+**Scores must not depend on execution order.** Declare capability requirements via trait bounds, don't assume another Score ran first. If Score B needs what Score A provides, Score B should declare that capability as a trait bound.
+
+See `docs/guides/writing-a-score.md` for the full guide.
+
+## Deploy Architecture (ADR-023)
+
+The Score-Topology-Interpret pattern above tells you how to **describe** a deployment. The rules below tell you how to **ship** one. These are non-negotiable.
+
+**Deploy with Scores, not handrolled manifests.** No `k8s_openapi::api::*` structs outside of `Score::interpret` bodies. CLIs, examples, and **test harnesses** all compose `*Score` types — they never reimplement deploys. If you find yourself building `Deployment` / `Service` / `ConfigMap` structs in a test harness, stop: that's the YAML-mud-pit anti-pattern in Rust clothing. Reach for the existing Score, or write a missing Score in the right deploy crate.
+
+**E2E uses the same Scores as production.** Only the `Topology` instance changes (local k3d, remote OKD, bare-metal HA). A test harness is a Score-composer running against a test Topology. If e2e needs something prod doesn't, add the knob to the Score — don't fork the manifest in the harness.
+
+**One Score per deployable component.** Composition is the user-facing primitive: `MyAppScore` pulls in `PostgresScore`, `HttpServerScore`, etc. Don't build monolithic "deploy everything" Scores; build small testable ones and compose upward.
+
+**Deploy returns only after smoke-test success.** Every Score owns a readiness + smoke-test contract that the framework runs and blocks on. `helm install && hope` is the anti-pattern harmony exists to fix. Convergence errors must be actionable in the style of `rustc`'s error messages, not "exit code 1 from helm". (The implementation shape of the smoke-test contract is open; the principle is locked in.)
+
+**Deploy logic lives in a `*-deploy` crate** that depends on both `harmony` and the runtime crate. Runtime binaries (the thing that ships to constrained devices and to in-cluster pods) stay free of the `harmony` dep. Pattern: `harmony_agent/deploy`, `fleet/harmony-fleet-deploy`. *Each app area gets one deploy crate that holds every component's Score plus a `main.rs` driven by `harmony_cli` that selects which component to deploy.*
+
+**Topologies are compile-time, selected at runtime.** A deploy binary statically lists its supported topologies; the user picks one at deploy time. Adding a new topology backend is a rebuild — that's an acceptable cost because dynamic-discovery topologies like `K8sAnywhere` already cover "any physical place that runs k8s". No `Box<dyn Topology>` plugin loaders.
+
+**Extend Scores with companions, not API changes.** New capabilities the framework wants to attach to Scores (planning, dry-run, observability, eventually smoke-test) default to a *companion* type or trait that wraps a Score rather than a new method on `Score`/`Interpret`. The base public API stays simple.
+
+**CLI: hybrid, staged.** Today (B): first-party tools ship as separate `harmony-*` binaries built on the existing `harmony_cli` crate. Tomorrow (C): a top-level `harmony` binary discovers `harmony-*` plugin binaries on `$PATH` (`kubectl`-style). The plugin protocol is **not** in scope for any current PR — dedicated future effort.
+
+**Use `thiserror` almost everywhere; `anyhow` only at binary glue.** Library code, public crate boundaries, anything callers might want to match on — typed errors via `thiserror`. `anyhow` is reserved for `main.rs`-level glue where the error is just printed.
+
+See `docs/adr/023-deploy-architecture.md` for the full rationale, including what's explicitly deferred (Score derive macro, Score registry, plugin CLI discovery, inventory redesign, smoke-test contract shape).
+
+## Conventions
+
+- **Rust edition 2024**, resolver v2
+- **Conventional commits**: `feat:`, `fix:`, `chore:`, `docs:`, `refactor:`
+- **Small PRs**: max ~200 lines (excluding generated code), single-purpose
+- **License**: GNU AGPL v3
+- **Quality bar**: This framework demands high-quality engineering. The type system is a feature, not a burden. Leverage it. Prefer compile-time guarantees over runtime checks. Abstractions should be domain-level, not provider-specific.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,146 +0,0 @@
-# CLAUDE.md
-
-This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
-
-## Build & Test Commands
-
-```bash
-# Full CI check (check + fmt + clippy + test)
-./build/check.sh
-
-# Individual commands
-cargo check --all-targets --all-features --keep-going
-cargo fmt --check          # Check formatting
-cargo clippy               # Lint
-cargo test                 # Run all tests
-
-# Run a single test
-cargo test -p <crate_name> <test_name>
-
-# Run a specific example
-cargo run -p <example_crate_name>
-
-# Build the mdbook documentation
-mdbook build
-```
-
-## What Harmony Is
-
-Harmony is the orchestration framework powering NationTech's vision of **decentralized micro datacenters** — small computing clusters deployed in homes, offices, and community spaces instead of hyperscaler facilities. The goal: make computing cleaner, more resilient, locally beneficial, and resistant to centralized points of failure (including geopolitical threats).
-
-Harmony exists because existing IaC tools (Terraform, Ansible, Helm) are trapped in a **YAML mud pit**: static configuration files validated only at runtime, fragmented across tools, with errors surfacing at 3 AM instead of at compile time. Harmony replaces this entire class of tools with a single Rust codebase where **the compiler catches infrastructure misconfigurations before anything is deployed**.
-
-This is not a wrapper around existing tools. It is a paradigm shift: infrastructure-as-real-code with compile-time safety guarantees that no YAML/HCL/DSL-based tool can provide.
-
-## The Score-Topology-Interpret Pattern
-
-This is the core design pattern. Understand it before touching the codebase.
-
-**Score** — declarative desired state. A Rust struct generic over `T: Topology` that describes *what* you want (e.g., "a PostgreSQL cluster", "DNS records for these hosts"). Scores are serializable, cloneable, idempotent.
-
-**Topology** — infrastructure capabilities. Represents *where* things run and *what the environment can do*. Exposes capabilities as traits (`DnsServer`, `K8sclient`, `HelmCommand`, `LoadBalancer`, `Firewall`, etc.). Examples: `K8sAnywhereTopology` (local K3D or any K8s cluster), `HAClusterTopology` (bare-metal HA with redundant firewalls/switches).
-
-**Interpret** — execution glue. Translates a Score into concrete operations against a Topology's capabilities. Returns an `Outcome` (SUCCESS, NOOP, FAILURE, RUNNING, QUEUED, BLOCKED).
-
-**The key insight — compile-time safety through trait bounds:**
-```rust
-impl<T: Topology + DnsServer + DhcpServer> Score<T> for DnsScore { ... }
-```
-The compiler rejects any attempt to use `DnsScore` with a Topology that doesn't implement `DnsServer` and `DhcpServer`. Invalid infrastructure configurations become compilation errors, not runtime surprises.
-
-**Higher-order topologies** compose transparently:
- `FailoverTopology<T>` — primary/replica orchestration
- `DecentralizedTopology<T>` — multi-site coordination
-
-If `T: PostgreSQL`, then `FailoverTopology<T>: PostgreSQL` automatically via blanket impls. Zero boilerplate.
-
-## Architecture (Hexagonal)
-
-```
-harmony/src/
-├── domain/           # Core domain — the heart of the framework
-│   ├── score.rs      # Score trait (desired state)
-│   ├── topology/     # Topology trait + implementations
-│   ├── interpret/    # Interpret trait + InterpretName enum (25+ variants)
-│   ├── inventory/    # Physical infrastructure metadata (hosts, switches, mgmt interfaces)
-│   ├── executors/    # Executor trait definitions
-│   └── maestro/      # Orchestration engine (registers scores, manages topology state, executes)
-├── infra/            # Infrastructure adapters (driven ports)
-│   ├── opnsense/     # OPNsense firewall adapter
-│   ├── brocade.rs    # Brocade switch adapter
-│   ├── kube.rs       # Kubernetes executor
-│   └── sqlx.rs       # Database executor
-└── modules/          # Concrete deployment modules (23+)
-    ├── k8s/          # Kubernetes (namespaces, deployments, ingress)
-    ├── postgresql/   # CloudNativePG clusters + multi-site failover
-    ├── okd/          # OpenShift bare-metal from scratch
-    ├── helm/         # Helm chart inflation → vanilla K8s YAML
-    ├── opnsense/     # OPNsense (DHCP, DNS, etc.)
-    ├── monitoring/   # Prometheus, Alertmanager, Grafana
-    ├── kvm/          # KVM virtual machine management
-    ├── network/      # Network services (iPXE, TFTP, bonds)
-    └── ...
-```
-
-Domain types to know: `Inventory` (read-only physical infra context), `Maestro<T>` (orchestrator — calls `topology.ensure_ready()` then executes scores), `Outcome` / `InterpretError` (execution results).
-
-## Key Crates
-
-| Crate | Purpose |
-|---|---|
-| `harmony` | Core framework: domain, infra adapters, deployment modules |
-| `harmony_cli` | CLI + optional TUI (`--features tui`) |
-| `harmony_config` | Unified config+secret management (env → SQLite → OpenBao → interactive prompt) |
-| `harmony_secret` / `harmony_secret_derive` | Secret backends (LocalFile, OpenBao, Infisical) |
-| `harmony_execution` | Execution engine |
-| `harmony_agent` / `harmony_inventory_agent` | Persistent agent framework (NATS JetStream mesh), hardware discovery |
-| `harmony_assets` | Asset management (URLs, local cache, S3) |
-| `harmony_composer` | Infrastructure composition tool |
-| `harmony-k8s` | Kubernetes utilities |
-| `k3d` | Local K3D cluster management |
-| `brocade` | Brocade network switch integration |
-
-## OPNsense Crates
-
-The `opnsense-codegen` and `opnsense-api` crates exist because OPNsense's automation ecosystem is poor — no typed API client exists. These are support crates, not the core of Harmony.
-
- `opnsense-codegen`: XML model files → IR → Rust structs with serde helpers for OPNsense wire format quirks (`opn_bool` for "0"/"1" strings, `opn_u16`/`opn_u32` for string-encoded numbers). Vendor sources are git submodules under `opnsense-codegen/vendor/`.
- `opnsense-api`: Hand-written `OpnsenseClient` + generated model types in `src/generated/`.
-
-## Key Design Decisions (ADRs in docs/adr/)
-
- **ADR-001**: Rust chosen for type system, refactoring safety, and performance
- **ADR-002**: Hexagonal architecture — domain isolated from adapters
- **ADR-003**: Infrastructure abstractions at domain level, not provider level (no vendor lock-in)
- **ADR-005**: Custom Rust DSL over YAML/Score-spec — real language, Cargo deps, composable
- **ADR-007**: K3D as default runtime (K8s-certified, lightweight, cross-platform)
- **ADR-009**: Helm charts inflated to vanilla K8s YAML, then deployed via existing code paths
- **ADR-015**: Higher-order topologies via blanket trait impls (zero-cost composition)
- **ADR-016**: Agent-based architecture with NATS JetStream for real-time failover and distributed consensus
- **ADR-020**: Unified config+secret management — Rust struct is the schema, resolution chain: env → store → prompt
-
-## Capability and Score Design Rules
-
-**Capabilities are industry concepts, not tools.** A capability trait represents a standard infrastructure need (e.g., `DnsServer`, `LoadBalancer`, `Router`, `CertificateManagement`) that can be fulfilled by different products. OPNsense provides `DnsServer` today; CoreDNS or Route53 could provide it tomorrow. Scores must not break when the backend changes.
-
-**Exception:** When the developer fundamentally needs to know the implementation. `PostgreSQL` is a capability (not `Database`) because the developer writes PostgreSQL-specific SQL and replication configs. Swapping to MariaDB would break the application, not just the infrastructure.
-
-**Test:** If you could swap the underlying tool without rewriting any Score that uses the capability, the boundary is correct.
-
-**Don't name capabilities after tools.** `SecretVault` not `OpenbaoStore`. `IdentityProvider` not `ZitadelAuth`. Think: what is the core developer need that leads to using this tool?
-
-**Scores encapsulate operational complexity.** Move procedural knowledge (init sequences, retry logic, distribution-specific config) into Scores. A high-level example should be ~15 lines, not ~400 lines of imperative orchestration.
-
-**Scores must be idempotent.** Running twice = same result as once. Use create-or-update, handle "already exists" gracefully.
-
-**Scores must not depend on execution order.** Declare capability requirements via trait bounds, don't assume another Score ran first. If Score B needs what Score A provides, Score B should declare that capability as a trait bound.
-
-See `docs/guides/writing-a-score.md` for the full guide.
-
-## Conventions
-
- **Rust edition 2024**, resolver v2
- **Conventional commits**: `feat:`, `fix:`, `chore:`, `docs:`, `refactor:`
- **Small PRs**: max ~200 lines (excluding generated code), single-purpose
- **License**: GNU AGPL v3
- **Quality bar**: This framework demands high-quality engineering. The type system is a feature, not a burden. Leverage it. Prefer compile-time guarantees over runtime checks. Abstractions should be domain-level, not provider-specific.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,6 +4,7 @@ members = [
  "examples/*",
  "private_repos/*",
  "harmony",
+  "harmony_zitadel_auth",
  "harmony_types",
  "harmony_macros",
  "harmony_tui",
@@ -28,6 +29,17 @@ members = [
  "harmony_node_readiness",
  "harmony-k8s",
  "harmony_assets", "opnsense-codegen", "opnsense-api",
+  "fleet/harmony-fleet-operator",
+  "fleet/harmony-fleet-agent",
+  "fleet/harmony-fleet-auth",
+  "fleet/harmony-fleet-deploy",
+  "fleet/harmony-fleet-e2e",
+  "harmony-reconciler-contracts",
+  "examples/fleet_server_install",
+  "examples/fleet_staging_install",
+  "nats/jwt",
+  "nats/callout",
+  "nats/integration-test-callout",
 ]

 [workspace.package]
@@ -63,7 +75,7 @@ kube = { version = "1.1.0", features = [
  "ws",
  "jsonpatch",
 ] }
-k8s-openapi = { version = "0.25", features = ["v1_30"] }
+k8s-openapi = { version = "0.25", features = ["v1_30", "schemars"] }
 # TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml
 serde_yaml = "0.9"
 serde-value = "0.7"
@@ -96,4 +108,14 @@ reqwest = { version = "0.12", features = [
 assertor = "0.0.4"
 tokio-test = "0.4"
 anyhow = "1.0"
-clap = { version = "4", features = ["derive"] }
+clap = { version = "4", features = ["derive", "env"] }
+# `websockets` enables `ws://` / `wss://` URL schemes. Without it the
+# connector parses the URL but treats it as a raw TCP connect (no TLS,
+# no HTTP Upgrade), so the agent against the OKD edge-TLS Route hangs
+# 30s on `expected INFO, got nothing` because the router only speaks
+# TLS+HTTPS on 443. The operator works without this feature because
+# it talks to NATS in-cluster on `nats://...:4222` (raw TCP).
+async-nats = { version = "0.45.0", features = ["websockets"] }
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+toml = "0.8"
--- a/ROADMAP/12-code-review-april-2026.md
+++ b/ROADMAP/12-code-review-april-2026.md
@@ -0,0 +1,116 @@
+# Phase 12: Code Review Items (April 2026)
+
+Items identified during the `feat/opnsense-codegen` PR review that require further design or cross-cutting work.
+
+## Completed in this PR
+
+- **1.1** Remove panic in `haproxy_service_to_harmony` — returns `None` with `warn!()` instead of panicking on invalid bind address
+- **1.2** Use `MacAddress` type from `harmony_types` in KVM module — replaced `String` MAC fields in `VmInterface`, `NetworkRef`, `DhcpHost`, and `set_interface_link`
+- **1.3** Compare both firewalls in `FirewallPairTopology::list_static_mappings` — warns on mismatch between primary and backup
+- **1.4** Remove no-op default for `LoadBalancer::ensure_wan_access` — now a required trait method
+- **2.1** Remove `wan_firewall_ports` from `LoadBalancerScore` — callers handle WAN access separately
+- **2.2** Add timeout to OKD bootstrap wait — 90min default, configurable via `HARMONY_OKD_BOOTSTRAP_TIMEOUT_MINUTES`
+
+## Tasks (deferred)
+
+### 12.1 Phased topology: LinuxHostTopology → KvmHostTopology
+
+**Priority**: HIGH
+**Status**: Not started
+**Related**: Phase 6 (KVM E2E tests)
+
+The `examples/opnsense_vm_integration/setup-libvirt.sh` shell script should be a Score using a phased topology approach. A `LinuxHostTopology` would be "promoted" to a `KvmHostTopology` after KVM packages are installed and libvirtd is running.
+
+Key design challenges:
+- Type-safe phase transitions (how does a topology gain new capabilities at runtime?)
+- Package installation as a Score (distro-agnostic or trait-based)
+- Service management (systemd enable/start) as a Score primitive
+
+This is a major architectural feature that enables the full bare-metal-to-VM pipeline without shell scripts.
+
+### 12.2 KvmHost validated type with compile-time macro
+
+**Priority**: MEDIUM
+**Status**: Not started
+**Related**: 12.1
+
+`KvmConnectionUri::RemoteSsh { host: String, username: String }` should become a validated `KvmHost` type with:
+- A `kvm_host!("root@hypervisor1")` macro for compile-time validation
+- Proper SSH URI parsing and validation
+- Integration with the phased topology (12.1)
+
+### 12.3 Unified directory module
+
+**Priority**: LOW
+**Status**: Not started
+**Related**: Phase 9 (SSO + Config Hardening)
+
+Currently three different directory patterns exist:
+- `HARMONY_DATA_DIR` in `harmony/src/domain/config/mod.rs` (lazy_static, `BaseDirs`)
+- `harmony_config` uses `ProjectDirs::from("io", "NationTech", "Harmony")`
+- `harmony_secret` uses `BaseDirs::data_dir().join("harmony")`
+- `openbao/setup.rs` has its own `keys_dir()` function
+
+Unify into a single `harmony_dirs` module providing: `data_dir()`, `cache_dir()`, `secrets_dir()`, `keys_dir(namespace)`.
+
+### 12.4 OpenBao unseal key storage — bootstrap secret management
+
+**Priority**: MEDIUM
+**Status**: Research needed
+**Related**: Phase 9 (SSO + Config Hardening), task 9.8 (auto-unseal)
+
+The chicken-and-egg problem: OpenBao needs to be initialized before it can be used as a secret store, but its unseal keys need to be stored somewhere. Current approach stores them as a local JSON file with 0600 permissions.
+
+Industry solutions to evaluate:
+- Upstream OpenBao/Vault storing downstream seal keys (transit auto-unseal)
+- HSM-backed auto-unseal (cloud KMS or on-prem HSM)
+- TPM-based local encryption
+- Shamir-split recovery with multiple administrators
+- TOTP-based vault (mentioned in review)
+
+No perfect solution exists. This requires threat modeling specific to the decentralized micro-datacenter use case.
+
+### 12.5 Use `vaultrs` crate for type-safe OpenBao provisioning
+
+**Priority**: MEDIUM
+**Status**: Not started
+**Related**: Phase 9
+
+Replace `kubectl exec bao ...` shell commands in `openbao/setup.rs` with typed `vaultrs` API calls. The `vaultrs` 0.7.4 crate (already a dependency in `harmony_secret`) provides full coverage:
+
+| Current shell command | vaultrs equivalent |
+|---|---|
+| `bao operator init` | `vaultrs::sys::start_initialization()` |
+| `bao operator unseal` | `vaultrs::sys::unseal()` |
+| `bao secrets enable kv-v2` | `vaultrs::sys::mount::enable()` |
+| `bao auth enable userpass` | `vaultrs::sys::auth::enable()` |
+| `bao policy write` | `vaultrs::sys::policy::set()` |
+| `bao write auth/userpass/users/...` | `vaultrs::auth::userpass::user::set()` |
+| `bao auth enable jwt` | `vaultrs::sys::auth::enable()` |
+| JWT config + role | `vaultrs::auth::oidc::config::set()` + `role::set()` |
+
+**Prerequisite**: Requires port-forward or ingress to OpenBao (currently uses `kubectl exec` into the pod). Consider adding a `K8sPortForward` utility to `harmony-k8s`.
+
+### 12.6 Topology proliferation — opinionated topologies leaking into narrow use cases
+
+**Priority**: MEDIUM
+**Status**: Not started
+**Related**: 12.1 (phased topology), `feat/install-reconcile-operator-score`
+
+`K8sAnywhereTopology` and `HAClusterTopology` have accumulated opinions — cert-manager install, tenant manager setup, helm probes, TLS passthrough, SSO wiring — that make them unfit for narrow, ad-hoc Score execution. Calling `ensure_ready()` on `K8sAnywhereTopology` to apply a single CRD installs a full product stack as a side effect; that's the opposite of what "make me ready" should mean.
+
+Concrete example: `fleet/harmony-fleet-operator/src/install.rs` needed a topology that satisfies `K8sclient` for a single `K8sResourceScore::<CustomResourceDefinition>` apply. `K8sAnywhereTopology` was wrong (too heavy); `HAClusterTopology` was wrong (bare-metal). Work-around: a 30-line inline `InstallTopology` that wraps a pre-built `K8sClient` and has a noop `ensure_ready`. That file flags the architectural smell in its doc comment and points back to this entry.
+
+If every narrow Score ends up vendoring its own ad-hoc topology, we get exactly the proliferation this entry is meant to prevent.
+
+**Design direction (to be refined, not prescribed):**
+
+- A **minimal ad-hoc topology** in harmony — `K8sBareTopology` or similar — that carries a `K8sClient` and implements `K8sclient` + noop `ensure_ready`. One screen of code. Consumed by any Score that just needs to apply a typed resource against an existing cluster.
+- Existing opinionated topologies (`K8sAnywhereTopology`) stay, but grow a clear doctrine: `ensure_ready` is for *their* product setup, callers who don't need that product use the bare topology.
+- Longer-term: unbundle the product-setup logic from `K8sAnywhereTopology::ensure_ready` into discrete Scores the product compositions explicitly run — so the distinction between "I'm installing a cluster" and "I'm using a cluster" is a composition choice, not a topology choice.
+
+**What "good" looks like:**
+
+- Adding a new ad-hoc Score against k8s doesn't require inventing a new topology.
+- `K8sAnywhereTopology` stops being the default reach and starts being a deliberate product choice.
+- Test: can we delete the inline `InstallTopology` in `fleet/harmony-fleet-operator/src/install.rs` by replacing it with a one-liner `K8sBareTopology::from_env()`? That's the smoke test for "we fixed the proliferation."
--- a/ROADMAP/fleet_platform/v0_1_plan.md
+++ b/ROADMAP/fleet_platform/v0_1_plan.md
@@ -0,0 +1,399 @@
+# IoT Platform v0.1 and beyond — forward plan
+
+Authoritative forward plan for the NationTech decentralized-infra /
+IoT platform, written after the v0 walking skeleton shipped
+(see `v0_walking_skeleton.md` for the historical diary). Organized as
+five chapters in execution order.
+
+## State of the world (as of 2026-04-23)
+
+**Green, end-to-end:**
+
+- CRD → operator → NATS JetStream KV write path (`smoke-a1.sh`).
+- Agent watches KV, reconciles podman containers (`smoke-a1.sh`).
+- VM-as-device provisioning: cloud-init + fleet-agent install + NATS
+  smoke (`smoke-a3.sh`), x86_64 (native KVM) and aarch64 (TCG).
+- Power-cycle / reboot resilience (`smoke-a3.sh` phase 5).
+- aarch64 cross-compile of the agent (no Harmony modules need to
+  feature-gate aarch64).
+- Operator installed via a harmony Score (typed Rust, no yaml).
+- `harmony-reconciler-contracts` crate — cross-boundary types
+  (bucket names, key helpers, `DeviceInfo`, `DeploymentState`,
+  `HeartbeatPayload`, `DeploymentName`, `Id` re-export).
+
+**Chapter 1 shipped** (2026-04-21): composed end-to-end demo
+(`smoke-a4.sh`) — operator in k3d + in-cluster NATS + ARM VM +
+typed-Rust CR applier + hand-off menu + `--auto` regression. Green
+on x86_64 (native KVM) and aarch64 (TCG).
+
+**Chapter 2 shipped** (2026-04-23): selector-based targeting +
+Device CRD + `.status.aggregate` reflect-back. `Deployment.spec.
+targetSelector: LabelSelector` resolves against cluster-scoped
+`Device` CRs materialized from NATS `device-info`. Operator writes
+`desired-state` KV per matched pair, patches
+`.status.aggregate` (matchedDeviceCount / succeeded / failed /
+pending / lastError) at 1 Hz. Load-tested to 10 000 devices ×
+1 000 Deployments at 10 000 KV writes/s sustained, zero errors.
+
+**Not yet wired (real v0.1 work still to go):**
+
+- Helm packaging of the operator (Chapter 3).
+- Zitadel + OpenBao auth (per-device credentials, SSO for
+  operator users). Placeholder `CredentialSource` trait on the
+  agent side (Chapter 4).
+- Any frontend (Chapter 5).
+- Small quality items (not blockers): agent config-driven labels,
+  `matchExpressions` in selectors, `Device.status.conditions`
+  populated from heartbeat staleness.
+
+**Verified during planning** (so future implementation doesn't
+have to re-litigate):
+
+- **Upgrade already works.** `reconciler.rs::apply` byte-compares
+  serialized score payloads; drift triggers re-reconcile.
+  `PodmanTopology::ensure_service_running` removes then re-creates
+  containers on spec drift. No "stale + new" window.
+- **The polymorphism stays.** `ReconcileScore` is an externally-tagged
+  enum; adding `OkdApplyV0` later is additive.
+
+**Surprises since v0 started** (for context, none architectural):
+
+- Arch `edk2-aarch64-202602-2` shipped empty firmware blobs;
+  `202508-1` ships unpadded edk2 that needs 64 MiB pflash padding.
+  Fixed via runtime discovery + padding in `modules/kvm/firmware.rs`.
+- MTTCG isn't default for cross-arch TCG on QEMU 10.2; force via
+  `qemu:commandline` override. `pauth-impdef=on` likewise a
+  qemu:commandline opt-in.
+- `ensure_vm` is idempotent on "domain exists" — re-apply of a
+  changed XML requires manual `undefine --nvram --remove-all-storage`.
+  Noted as a follow-up in the code comments.
+
+---
+
+## Chapter 1 — Hands-on end-to-end demo (imminent)
+
+**Goal:** the user runs one command, watches operator + NATS + ARM
+VM come up, then drives a CRD through the full loop by hand:
+`kubectl apply` it (manually or via a typed Rust applier), watch the
+operator log "acquired," check the NATS KV store with `natsbox`,
+SSH/console into the VM, `curl` the running nginx container from
+the workstation.
+
+### User-facing requirements (explicit)
+
+- **No yaml fixtures.** Sample `Deployment` CRs constructed in
+  typed Rust using `DeploymentSpec` + `PodmanV0Score`. Same
+  discipline as the `install` Score that replaced `gen-crd | kubectl
+  apply`.
+- **ArgoCD deferred.** User's production clusters have it; bringing
+  it into the smoke harness adds setup overhead without validating
+  anything `helm install` doesn't. Chapter 3 produces the chart;
+  ArgoCD integration is a later operational concern.
+- **Operator logs every CR it acquires** — `controller.rs` already
+  does `tracing::info!(%ns, %name, "reconcile")`; verify the output
+  reads well in the command-menu hand-off.
+- **natsbox debugging is first-class.** Script prints exact
+  natsbox one-liners at hand-off so the user can inspect KV state.
+- **In-cluster NATS.** Not a side-by-side podman container (as
+  smoke-a1 does today). Expose to the libvirt VM via k3d
+  loadbalancer port mapping.
+
+### Design decisions
+
+- **Rust CR applier.** New binary `examples/harmony_apply_deployment/`.
+  CLI flags `--name --namespace --target-device --image --port
+  --delete`. Constructs the `Deployment` CR via
+  `kube::Api<Deployment>` + typed `DeploymentSpec`; calls
+  `api.apply(...)`. Can also `--print` the CR JSON to stdout so
+  `kubectl apply -f -` still works from the terminal.
+- **smoke-a4.sh orchestration stays bash for now.** User agreed
+  this is test-harness scope, not framework path; converting it
+  to Rust is "not as important right now."
+- **Hand-off is the default mode**, not `--keep`. The whole point
+  of Chapter 1 is that the user drives the last stage interactively.
+  `smoke-a4.sh` brings everything up, applies *nothing*, prints
+  the command menu, waits on `INT/TERM` to tear down. `--auto`
+  runs the full apply/curl/upgrade/delete regression for CI.
+- **In-cluster NATS path.** Preferred: use `harmony::modules::nats`
+  if it has a lightweight single-node / no-supercluster mode.
+  Fallback: typed `K8sResourceScore` applying a minimal Deployment
+  + NodePort Service. 15-min research task before committing.
+
+### Composed smoke phases (`smoke-a4.sh`)
+
+1. k3d cluster up with `-p "4222:4222@loadbalancer"` so the host
+   port 4222 forwards into the cluster. Reachable from the
+   libvirt VM via the gateway IP (typically `192.168.122.1:4222`).
+2. NATS in-cluster via the chosen path (harmony module or direct
+   K8sResourceScore). Wait for readiness.
+3. Install CRD via the operator's `install` subcommand (typed Rust).
+4. Spawn operator as a host-side process (same pattern as
+   smoke-a1). Operator connects to `nats://localhost:4222`.
+5. Provision ARM VM via `example_iot_vm_setup` (same entry point
+   smoke-a3 uses). Agent configured to connect to
+   `nats://<libvirt_gateway>:4222` — discover the gateway IP via
+   `virsh net-dumpxml default`, as smoke-a3 already does.
+6. Sanity: `kubectl wait ... crd Established`, operator logged
+   "KV bucket ready", agent logged "watching KV keys",
+   `status.<device>` present in `agent-status` bucket.
+7. Hand off. Print the command menu below. Exit 0 with a cleanup
+   trap on `INT/TERM`.
+
+### Command menu at hand-off
+
+- `kubectl get deployments.fleet.nationtech.io -A -w` — watch CR
+  reconcile reactively.
+- `cargo run -q -p example_harmony_apply_deployment -- --image
+   nginx:latest --target-device $TARGET_DEVICE` — apply an nginx
+  deployment via typed Rust.
+- `cargo run -q -p example_harmony_apply_deployment -- --print
+   --image nginx:latest --target-device $TARGET_DEVICE |
+   kubectl apply -f -` — same thing, through kubectl.
+- `ssh -i $SSH_KEY fleet-admin@$VM_IP` — connect to the VM.
+- `virsh console $VM_NAME --force` — serial console alternative.
+- `podman --url unix://$VM_IP:... ps` or ssh + `podman ps`
+  — list containers on the VM from the workstation.
+- `podman run --rm docker.io/natsio/nats-box nats --server
+   nats://localhost:4222 kv ls desired-state` — list desired
+  state keys (from the host).
+- `podman run --rm ... nats kv get desired-state
+   '<device>.<deployment>' --raw` — dump a specific desired state.
+- `podman run --rm ... nats kv get agent-status
+   'status.<device>' --raw` — dump the heartbeat.
+- `curl http://$VM_IP:8080/` — hit the deployed nginx.
+
+### `--auto` path (for regression)
+
+1. Apply `nginx:latest`, wait for container on VM, `curl` 200.
+2. Apply `nginx:1.26` (upgrade), wait for container *id* to change,
+   `curl` 200 against the new container.
+3. Apply `--delete`, wait for container gone from VM.
+
+### Files
+
+- **NEW** `examples/harmony_apply_deployment/Cargo.toml` +
+  `src/main.rs` — typed applier.
+- **NEW** `fleet/scripts/smoke-a4.sh`.
+- **NO yaml fixtures.** Rust CLI flags cover the shape.
+- Optional: factor shared smoke phases (NATS up, k3d up, operator
+  spawn, VM provision) into `fleet/scripts/lib/` if the duplication
+  across a1/a3/a4 becomes obvious. Don't force it.
+
+### NATS exposure — implementation-time notes
+
+- k3d `@loadbalancer` port mapping binds the host's `0.0.0.0:4222`
+  by default; libvirt VMs on `virbr0` can reach it via the gateway
+  IP. No special NAT config required.
+- Fallback if environmental snag: keep the side-by-side podman
+  container on an opt-in `NATS_MODE=podman` flag. Don't default
+  to that — user explicitly asked for in-cluster.
+
+### Verification
+
+- Fresh host: `ARCH=aarch64 ./fleet/scripts/smoke-a4.sh` completes
+  in 8-15 min, prints the command menu.
+- `ARCH=aarch64 ./fleet/scripts/smoke-a4.sh --auto` PASSes
+  end-to-end including upgrade id-change assertion.
+- x86_64 (`ARCH=x86-64`) completes in 2-5 min.
+
+### Explicitly out of scope
+
+- `AgentStatus` / `DeploymentStatus` enrichment — Chapter 2.
+- Helm chart, ArgoCD, auth, frontend — later chapters.
+- Lifting the applier into a reusable `ApplyDeploymentScore` —
+  only if a second consumer appears.
+
+---
+
+## Chapter 2 — Status reflect-back + selector-based targeting **[SHIPPED 2026-04-23]**
+
+**Goal:** CRD `.status` reflects fleet reality — per-deployment
+success/failure/pending counts, last-error surface, freshness. The
+Deployment CR targets devices by label selector, not by id list.
+
+> The shipped design replaces the original `AgentStatus` + list-of-ids
+> proposal wholesale. See `chapter_4_aggregation_scale.md` for the
+> superseded design-doc archaeology. Commits:
+> `refactor(iot): delete legacy AgentStatus path`,
+> `refactor(iot): operator watches device-state KV directly; drop event stream`,
+> `refactor(iot): Deployment.targetSelector + Device CRD (DaemonSet-like)`.
+
+### What shipped
+
+**Wire format** (in `harmony-reconciler-contracts`): four per-concern
+payloads on dedicated NATS KV buckets. No monolithic per-device blob,
+no separate event stream.
+
+| Type | Bucket | Cadence |
+|------|--------|---------|
+| `DeviceInfo` | `device-info` | on startup + label/inventory change |
+| `DeploymentState` | `device-state` | on reconcile phase transition |
+| `HeartbeatPayload` | `device-heartbeat` | every 30 s |
+
+**CRDs.** Two cluster resources:
+
+- `Deployment` (namespaced) — `spec.targetSelector: LabelSelector`
+  (standard K8s `matchLabels` / `matchExpressions`). No device list
+  on spec. `.status.aggregate` carries `matchedDeviceCount`,
+  `succeeded`, `failed`, `pending`, `lastError`.
+- `Device` (cluster-scoped, like `Node`) — `metadata.labels` carries
+  the device's routing labels; `spec.inventory` holds the hardware/OS
+  snapshot; `status.conditions` is reserved for liveness (populated
+  lazily by a future heartbeat-freshness reconciler, not every ping).
+
+**Operator tasks** (three concurrent loops in one process):
+
+1. `controller` — validates Deployment CR names, holds the finalizer
+   that cleans `desired-state.<device>.<deployment>` KV entries on
+   delete. No writes on apply (aggregator handles that).
+2. `device_reconciler` — watches the `device-info` KV; server-side-
+   applies a `Device` CR per `DeviceInfo` payload, with label
+   sanitization. Agents remain kube-unaware.
+3. `fleet_aggregator` — three caches driven by watches (Deployment
+   CRs, Device CRs, `device-state` KV). On any change, resolves
+   each selector against the Device cache, writes/deletes
+   `desired-state` KV entries for diffed matches, and patches
+   `.status.aggregate` at 1 Hz for the CRs whose counters moved.
+
+**Agents** publish `device-id=<id>` as a default DeviceInfo label, so
+targeting a single device with `matchLabels: {device-id: pi-42}` is
+zero-config. User-defined labels layer on from agent config (scoped
+out of this chapter; follow-up item).
+
+### Scale proof
+
+`fleet/scripts/load-test.sh` + `examples/fleet_load_test` simulate N
+devices across M Deployments, driving `device-state` KV updates at a
+configurable cadence while the full operator stack runs against a
+local k3d apiserver. Verified:
+
+- 100 devices / 10 groups / 1 Hz / 60 s — 100 writes/s sustained,
+  all 10 CR aggregates converge.
+- 10 000 devices / 1 000 groups / 1 Hz / 120 s — ~10 000 writes/s
+  sustained, 0 errors, all 1 000 CR aggregates correct
+  (`matchedDeviceCount == expected`, `succeeded + failed + pending
+  == matched`). Same envelope before and after the selector rewrite.
+
+### Out of scope in this chapter (follow-ups)
+
+- Agent config-driven labels (`[labels]` in agent toml → DeviceInfo).
+  ~30 lines; deferred until a concrete need lands.
+- `matchExpressions` evaluator. Operator currently supports
+  `matchLabels` only and logs a warning for expression-bearing
+  selectors. ~50 lines; deferred.
+- `Device.status.conditions` populated from heartbeat staleness
+  (Reachable / Stale transitions). Liveness is computable today by
+  reading `device-heartbeat` directly; CR-side reflection is a
+  convenience. ~100 lines; deferred.
+- Full journald log streaming. The `.status.aggregate.lastError`
+  surface covers the user's reflect-back requirement for now.
+- Multi-device regression smoke — defer until real hardware or a
+  second VM is around.
+
+---
+
+## Chapter 3 — Helm chart (ArgoCD deferred)
+
+**Goal:** operator ships as a versioned helm chart with CRD
+version-locked inside.
+
+User clarified this session: ArgoCD exists in production; all it
+does is apply resources from the chart. Standing up ArgoCD in the
+smoke adds setup overhead with no incremental validation value.
+
+Chapter 3 produces the chart + validates `helm install / helm
+upgrade` lifecycles. ArgoCD consumption is a user operational
+concern downstream.
+
+### Sketch
+
+- Chart location: `fleet/harmony-fleet-operator/chart/` (or sibling repo —
+  defer decision to implementation time).
+- Templates: Namespace, SA, ClusterRole, ClusterRoleBinding,
+  Deployment (operator pod), CRD.
+- **CRD yaml in the chart is generated at chart-publish time** from
+  the Rust `Deployment::crd()`. One-off release artifact, not
+  framework path — consistent with "no yaml in framework code."
+- Values: operator image tag, NATS URL, log level.
+- Smoke: `helm install` into k3d → CR apply → same assertions as
+  Chapter 1.
+
+### Open questions
+
+- Chart repo: subdir vs. separate git repo.
+- CRD install mechanism: chart hook vs. templates directory.
+  Drives CRD upgrade story.
+
+---
+
+## Chapter 4 — Auth: Zitadel + OpenBao + per-device identity
+
+**Goal:** per-device granular NATS credentials; SSO for operator
+users; OpenBao policy per device; JWT bootstrap from Zitadel.
+
+Zitadel + OpenBao are already ~99% integrated in harmony; this
+chapter is wiring the IoT-specific flows.
+
+### Sketch
+
+- Agent's `CredentialSource` trait (already abstract in agent
+  `config.rs`) gets a Zitadel-JWT-backed implementation. Mints
+  short-lived NATS creds via OpenBao auth callout.
+- Remove the shared-credentials `toml-shared` variant (v0 demo
+  leftover).
+- Availability: auth-callout caches policies, tolerates OpenBao
+  outages.
+- SSO for operator users (separate flow): Zitadel groups →
+  Kubernetes RBAC subjects on the `Deployment` CRD.
+
+---
+
+## Chapter 5 — Frontend (last)
+
+**Goal:** operator-friendly UI for the decentralized platform.
+
+Form factor undecided: Leptos web dashboard, CLI extension to
+`harmony_cli`, or a TUI. Minimum viable product: read-only view of
+fleet state (devices + deployments + aggregated status) powered by
+the CRD `.status` from Chapter 2. Aspiration: write operations with
+auth from Chapter 4.
+
+---
+
+## Chapter 6 — Customer demo rehearsal **[in progress]**
+
+48-hour customer demo prep. PO assessment concluded that promising a
+real-OKD deployment without first proving the JWT-auth chain is
+reckless. **VM-based rehearsal first**, OKD second.
+
+The rehearsal extends `smoke-a4` (k3d + libvirt VM + agent + apply
+CR + reconcile podman) with **Zitadel + auth callout + agent JWT
+auth**. Two devices + one admin. Same code paths as production —
+only the cluster topology differs.
+
+Detailed plan: [`v0_demo_e2e.md`](v0_demo_e2e.md).
+
+Once the VM rehearsal is green (success criteria in that doc), the
+residual deltas to ship to real OKD are configuration, not new code.
+
+---
+
+## Principles — what we've learned and want to keep doing
+
+- **No yaml in framework code paths.** Every kube-rs type is
+  typed; every Score apply goes through typed Rust. Yaml generation
+  happens only at chart-publish time, never at runtime.
+- **Scores describe desired state; topologies expose capabilities.**
+  Prefer adding capability traits over thickening a single topology.
+- **Minimal topologies for ad-hoc Score execution.** `K8sAnywhereTopology`
+  has too many opinions (cert-manager install, tenant-manager bootstrap,
+  helm probes) for narrow apply-a-CRD use cases. See ROADMAP
+  §12.6 — a lean shared `K8sBareTopology` is the durable fix.
+- **Cross-boundary wire types in `harmony-reconciler-contracts`**,
+  everything else in its natural crate.
+- **Never ship untested code.** Every commit that changes runtime
+  behavior is verified against a smoke script before landing.
+  Cargo check + unit tests aren't enough.
+- **Prove claims about upstream before blaming upstream.** The
+  Arch edk2 investigation showed this matters; see
+  `memory/feedback_prove_before_blaming_upstream.md`.
--- a/ROADMAP/fleet_platform/v0_2_plan.md
+++ b/ROADMAP/fleet_platform/v0_2_plan.md
@@ -0,0 +1,231 @@
+# Fleet Platform v0.2 — 3-day production push
+
+Authoritative plan for the next three days. Picks up where
+`v0_1_plan.md` left the chapter structure and supersedes its forward
+chapters where they conflict. Written 2026-05-06, end of the
+`feat/iot-walking-skeleton` branch (31 996 LOC, 184 commits).
+
+## State coming in
+
+- Skeleton end-to-end works against an OKD staging cluster: Zitadel
+  + NATS + auth callout + operator + agent (one VM today, real Pi
+  tomorrow). Verified by hand 2026-05-06.
+- ~10 ancillary PRs still open across the team. Branch graph is
+  noisy.
+- `harmony/modules/fleet/` is the wrong long-term home for the fleet
+  code. Flagged in the April 2026 code review. Reasons we kept it
+  there during bring-up are subtle (cross-module dependencies on
+  `K8sAnywhereTopology`, `HelmChartScore`, `K8sResourceScore`,
+  `harmony_secret`, the `Topology` capability traits) — those need
+  to be written down before the file move, not after. **ADR
+  pending; not started yet.**
+- Agent upgrade path is undefined. Without it we cannot ship a
+  v0.1 agent into the field.
+- ~408 compilation warnings. Not blocking but needs to be 0 before
+  we put `-Dwarnings` in CI.
+
+## Strategy
+
+This isn't 10 weeks of scaffolding. It's three days of locking the
+**API surface** so the inevitable refactor — moving fleet out of
+`harmony/modules/fleet/` into `fleet/harmony-fleet/`, splitting
+`K8sAnywhereTopology` into `K8sBareTopology`, etc. — is mechanical
+when we get to it.
+
+The frame from JG's *Pour l'amour des compilateurs* talk applies
+directly: **design the brick before moving the brick.** Physical
+relocation is cheap. Redesigning a public API after customers
+depend on it is expensive. We use these three days to make sure
+the type-level contract is what we want it to be at v1.0, even if
+the file paths still smell like v0.1.
+
+## Day 1 — Lock the brick design
+
+**Goal:** a fleet façade stable enough to ship to production and
+refactor freely afterwards.
+
+### 1.1 Decompose `FleetDeviceAuth` to *resolved states only*
+
+Today: `TomlShared | ZitadelJwt | ZitadelEnroll`. Cardinality 3.
+
+After: `ZitadelJwt`-shape only. Cardinality 1.
+
+- `TomlShared` — v0 dev cruft, no production caller. Delete.
+- `ZitadelEnroll` — *pre-resolution* state (carries unresolved
+  admin credentials). Doesn't belong in a type that represents
+  "the agent's NATS auth on disk". Move to its own type
+  (`DeviceEnrollmentIntent`) used only by the enrollment Score
+  + binary. Resolution produces a `ZitadelJwt` and that's what
+  the agent sees.
+
+The `render_toml` match on `&self.auth` collapses to one arm. The
+"is this resolved yet?" branch class disappears. Test
+`render_toml_zitadel_enroll_renders_same_as_zitadel_jwt` becomes
+unnecessary (the question is undefined; you can't render an
+unresolved auth).
+
+### 1.2 Define the `fleet` façade
+
+What does code outside the fleet module see? Today that's a deep
+walk into `harmony::modules::fleet::operator::chart::ChartOptions`.
+Leakage. Lock the seam:
+
+```text
+harmony::modules::fleet::
+    FleetServerScore         (existing — composed install)
+    FleetDeviceEnrollScore   (new — wraps fleet_device_enroll)
+    FleetDeviceSetupScore    (existing — keeps API)
+    FleetDeviceAuth          (resolved-only, per 1.1)
+    AdminAuth                (existing)
+
+    // sealed:
+    operator::                pub(crate)
+    setup_score's internals   pub(crate)
+    chart::                   pub(crate)
+```
+
+Once locked, the *file location* doesn't matter. `pub use`
+re-exports preserve callers' imports across the eventual physical
+move.
+
+### 1.3 Defer the placement ADR
+
+JG isn't satisfied with the design yet. ADR-021 stays in *proposed*
+limbo until the seam from 1.2 is committed and we've lived with it
+for a sprint.
+
+**Day 1 done when:** fleet façade committed, `TomlShared` and
+`ZitadelEnroll` removed from `FleetDeviceAuth`, every existing
+caller compiles unchanged, no file moves.
+
+---
+
+## Day 2 — Polish E2E + ship the upgrade ADR
+
+Two streams in parallel.
+
+### Stream A — E2E hardening (~½ day)
+
+- **A.1 Operator graceful degradation on bad device_id.** The CLI
+  now rejects bad ids upfront, but a stray bad KV entry shouldn't
+  take the operator down. Log + skip, don't restart-loop.
+- **A.2 Persist `nats_auth_pass` and the issuer NKey via
+  `harmony_secret`.** The regenerate-every-run footgun bit us
+  twice on 2026-05-06. Make these `Secret`s the same way `NatsAdmin`
+  and `ZitadelAdmin` already are.
+- **A.3 Single regression script.** `fleet/scripts/e2e-prod-shape.sh`.
+  Full bring-up + enroll + assert against a target cluster. Same
+  shape as the existing `smoke-a*.sh`. CI consumes this later.
+
+### Stream B — ADR-022: Agent upgrade procedure (~½ day)
+
+The ADR is the deliverable, not the implementation. Specifies the
+mechanism so anyone can implement it later without inventing the
+design. See `docs/adr/022-fleet-agent-upgrade.md`.
+
+Summary of the design (full detail in the ADR):
+
+- **K8s rolling-update shape, single-host.** Wait for in-flight
+  reconciles to complete + all managed services healthy + a
+  scheduling lock from the operator before swapping.
+- **Versioned binary layout on disk:**
+  ```
+  /usr/bin/fleet-agent-v0.1.1
+  /usr/bin/fleet-agent-v0.1.2
+  /usr/local/bin/fleet-agent  → symlink to current
+  ```
+  No version is ever erased — N-history is the rollback target.
+- **Old verifies new + reports up.** Old agent stages new,
+  smoke-tests it (`--self-test`), starts it, watches for the new
+  agent's heartbeat to land in NATS with the new version. Only then
+  does the operator know the upgrade succeeded.
+- **Operator drives the cutover.** Operator sends an explicit stop
+  signal to the old agent over NATS. Old agent exits cleanly. New
+  agent is already running and takes over.
+- **Reverse path is identical.** Roll back = operator publishes
+  desired_version = previous; new agent does the same dance to
+  hand off to old.
+
+**Day 2 done when:** A.1–A.3 committed, ADR-022 landed, regression
+script green against staging.
+
+---
+
+## Day 3 — Production deploy
+
+**Goal:** customer cluster on v0.1, runbook accurate, signed off.
+
+- **3.1** Tag `v0.1.0` from `master` after `feat/iot-walking-skeleton`
+  is merged.
+- **3.2** Run `e2e-prod-shape.sh` against the customer's prod OKD
+  cluster. Every diff between scripted and reality goes back into
+  the script — so the script *is* the runbook.
+- **3.3** Production-shape doc twin of
+  `docs/guides/fleet-staging-install.md`. Deltas only, ~50 lines.
+- **3.4** `docs/guides/fleet-device-enrollment.md` — operator-facing
+  enrollment runbook. Captures the SSO `--admin-oidc-client-id`
+  resolution and the `--device-id` RFC1123 validation we locked in
+  on 2026-05-06.
+- **3.5** Operational basics: revoke a device, rotate a key, read
+  the operator's logs, read NATS. Bullet lists are fine — bullet-
+  list-quality docs beat missing docs.
+
+**Day 3 done when:** customer's prod cluster runs real workloads,
+the runbook is what we actually used, and we'd hand operations to
+someone else.
+
+---
+
+## In parallel — frontend (junior, ~1 week, target Day 5 merge)
+
+Junior owns end-to-end. Spec:
+
+- **F.1** Read-only Leptos SPA. Devices + Deployments + per-device
+  drilldown (DeviceInfo + last-heartbeat + agent version).
+- **F.2** NATS tail panel. SSE stream of `device-info` and
+  `device-state` updates, plain text.
+- **F.3** Served by the operator pod itself (one less Deployment).
+  SSO via the existing Zitadel device-code app (`harmony-cli`).
+- **F.4** **Not** in v0.1: write paths, metrics dashboards, fleet-
+  wide rollout views, NATS GUI. None of those.
+
+This validates the platform is observable from outside the
+operator's logs — the customer's specific ask.
+
+---
+
+## What slips to v0.2+ (post-prod backlog)
+
+No calendar pressure on these; sequence after we see real customer
+usage.
+
+| Item | Why deferred | Cost when we do it |
+|---|---|---|
+| Pluggable `harmony` CLI (kubectl-style PATH discovery) + `harmony-fleet` plugin | Customer doesn't run it themselves yet; we do. Examples are good enough. | ~1 week, mostly rename/restructure given Day 1's API freeze. |
+| Physical refactor of `harmony/modules/fleet/` → `fleet/harmony-fleet/` | The Day-1 façade settles the design; the move is mechanical and the ADR for it is still in draft. | ~2 days. |
+| Agent upgrade implementation (ADR ships Day 2; impl later) | First customer fleet is small enough to hand-upgrade if needed. | ~1 week. |
+| ArgoCD chart publishing | Customer uses ArgoCD downstream but their initial deploy goes through harmony directly. | ~3 days. |
+| Full CI e2e (k3d nightly + libvirt + OKD daily) | Manual rehearsal works for one customer. | ~1 week + runner capacity. |
+| OpenBao integration (replaces `ZitadelClientConfig` cache file) | Cache file works for single-operator use; OpenBao is the multi-operator answer. | ~1 week. |
+| `harmony run <ScoreName> --field=value` ad-hoc Score CLI | No v0.1 customer flow needs it. | ~2 weeks (Score-flag derive macro is the hard part). |
+| Fleet-wide rollout strategies (canary, %-based) on top of the agent-upgrade primitive | Single-device upgrade is sufficient until >100-device fleets. | ~1 week. |
+| Drop `K8sAnywhereTopology` for ad-hoc Score execution; introduce `K8sBareTopology` | Per the existing v0_1 §"Principles". Not blocking prod. | ~3 days. |
+
+---
+
+## Principles (kept verbatim from v0_1, still load-bearing)
+
+- **No yaml in framework code paths.** Typed kube-rs everywhere.
+- **Scores describe desired state; topologies expose capabilities.**
+- **Cross-boundary wire types in `harmony-reconciler-contracts`.**
+- **Never ship untested code.**
+- **Prove claims about upstream before blaming upstream.**
+
+Adding one for v0.2:
+
+- **Design the brick before moving the brick.** Lock the public API
+  contract first; physical relocation later. Cardinality-matched
+  types, "make impossible states impossible" — the type system is
+  the deterministic feedback loop that scales with LLM-era code
+  generation throughput. (See JG's *Pour l'amour des compilateurs*,
+  Botpress Meetup, 2026-04-30.)
--- a/brocade/examples/main.rs
+++ b/brocade/examples/main.rs
@@ -58,6 +58,8 @@ async fn main() {
    }

    println!("--------------");
+    #[allow(unreachable_code)]
+    {
        todo!();
        let channel_name = "1";
        brocade.clear_port_channel(channel_name).await.unwrap();
@@ -72,4 +74,5 @@ async fn main() {
            .create_port_channel(channel_id, channel_name, &ports)
            .await
            .unwrap();
+    }
 }
--- a/brocade/src/fast_iron.rs
+++ b/brocade/src/fast_iron.rs
@@ -140,7 +140,7 @@ impl BrocadeClient for FastIronClient {

    async fn configure_interfaces(
        &self,
-        _interfaces: &Vec<(String, PortOperatingMode)>,
+        _interfaces: &[(String, PortOperatingMode)],
    ) -> Result<(), Error> {
        todo!()
    }
--- a/brocade/src/lib.rs
+++ b/brocade/src/lib.rs
@@ -208,7 +208,7 @@ pub trait BrocadeClient: std::fmt::Debug {
    /// Configures a set of interfaces to be operated with a specified mode (access ports, ISL, etc.).
    async fn configure_interfaces(
        &self,
-        interfaces: &Vec<(String, PortOperatingMode)>,
+        interfaces: &[(String, PortOperatingMode)],
    ) -> Result<(), Error>;

    /// Scans the existing configuration to find the next available (unused)
--- a/brocade/src/network_operating_system.rs
+++ b/brocade/src/network_operating_system.rs
@@ -115,8 +115,8 @@ impl NetworkOperatingSystemClient {
    fn map_configure_interfaces_error(&self, err: Error) -> Error {
        debug!("[Brocade] {err}");

-        if let Error::CommandError(message) = &err {
-            if message.contains("switchport")
+        if let Error::CommandError(message) = &err
+            && message.contains("switchport")
            && message.contains("Cannot configure aggregator member")
        {
            let re = Regex::new(r"\(conf-if-([a-zA-Z]+)-([\d/]+)\)#").unwrap();
@@ -131,7 +131,6 @@ impl NetworkOperatingSystemClient {
                ));
            }
        }
-        }

        err
    }
@@ -187,7 +186,7 @@ impl BrocadeClient for NetworkOperatingSystemClient {

    async fn configure_interfaces(
        &self,
-        interfaces: &Vec<(String, PortOperatingMode)>,
+        interfaces: &[(String, PortOperatingMode)],
    ) -> Result<(), Error> {
        info!("[Brocade] Configuring {} interface(s)...", interfaces.len());

--- a/brocade/src/ssh.rs
+++ b/brocade/src/ssh.rs
@@ -35,7 +35,6 @@ impl SshOptions {
                ..Default::default()
            },
            port,
-            ..Default::default()
        }
    }

@@ -47,7 +46,6 @@ impl SshOptions {
                ..Default::default()
            },
            port,
-            ..Default::default()
        }
    }
 }
@@ -72,8 +70,10 @@ pub async fn try_init_client(
    ip: &std::net::IpAddr,
    base_options: &BrocadeOptions,
 ) -> Result<BrocadeOptions, Error> {
-    let mut default = SshOptions::default();
-    default.port = base_options.ssh.port;
+    let default = SshOptions {
+        port: base_options.ssh.port,
+        ..Default::default()
+    };
    let ssh_options = vec![
        default,
        SshOptions::ecdhsa_sha2_nistp256(base_options.ssh.port),
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -0,0 +1,127 @@
+# Architecture
+
+Starting point for a human-readable architecture overview of Harmony.
+The `docs/` directory has multiple overlapping documents
+(`concepts.md`, `architecture-challenges.md`, `cyborg-metaphor.md`,
+the `concepts/` subdirectory, ADRs under `docs/adr/`, the in-repo
+`CLAUDE.md`). Cohesion work is scheduled for a follow-up PR — this
+file is the new front door and the placeholder that work will
+build from.
+
+## What Harmony is
+
+An orchestration framework for **decentralized micro datacenters**:
+small computing clusters deployed in homes, offices, and community
+spaces instead of hyperscaler facilities. The framework's goal is
+to make infrastructure-as-code **compile-time-safe** — invalid
+configurations become Rust compile errors, not 3AM YAML
+surprises.
+
+Not a wrapper around existing tools. A single Rust codebase that
+replaces Terraform/Ansible/Helm in its target domain by making the
+Rust type system the configuration language.
+
+## The framework primer
+
+The Score-Topology-Interpret pattern, the hexagonal architecture,
+the module layout, and the conventions are all documented in
+`CLAUDE.md` at the repo root (also available as `AGENTS.md`). That
+file is kept current as the canonical entry point. Read it first.
+
+Key ADRs that lock the foundational decisions:
+
+- **ADR-001** — Rust chosen for type system + refactoring safety.
+- **ADR-002** — Hexagonal architecture; domain isolated from
+  adapters.
+- **ADR-003** — Infrastructure abstractions at the domain level,
+  not the provider level (no vendor lock-in).
+- **ADR-005** — Real Rust DSL over YAML/HCL.
+- **ADR-009** — Helm charts inflate into vanilla K8s YAML and
+  flow through the Score pipeline.
+- **ADR-015** — Higher-order topologies via blanket trait impls.
+- **ADR-016** — Agent-based architecture with NATS JetStream for
+  the global mesh.
+- **ADR-020** — Unified config + secret management.
+- **ADR-023** — Deploy architecture: Scores everywhere (including
+  tests), per-app `*-deploy` crates, deploy blocks on smoke-test,
+  topologies are compile-time.
+
+The full ADR set lives under `docs/adr/`.
+
+## Why Harmony (the framework choice)
+
+Three load-bearing reasons that shape every other decision:
+
+1. **The compiler is the validator.** Existing IaC tools validate
+   at runtime, after a deploy has already been kicked off. Harmony
+   validates at `cargo check`. The cost of a bad configuration
+   drops from "1 AM page" to "red squiggle in your editor."
+
+2. **Decentralized by design.** The target deployment surface is
+   thousands of small clusters in homes, offices, partner sites,
+   field-deployed devices — not three hyperscaler regions. The
+   framework's primitives reflect that: topologies are
+   parameterized over physical placement, the agent mesh is
+   NATS-based with strict-ordered supercluster semantics, and the
+   capability traits never assume a centralized control plane.
+
+3. **The team is its own largest customer.** NationTech runs
+   multiple OKD clusters already and uses Harmony to manage them.
+   Every dogfooded primitive is a primitive that's been pressure-
+   tested against real operational pain before it ships to
+   external customers.
+
+## Why custom over k3s + ArgoCD (the fleet-platform choice)
+
+A specific instance of the framework-choice reasoning, decided
+during the fleet platform v0 work:
+
+- **End-customer engineers are mechanical / electrical /
+  chemical, not Kubernetes-literate.** A k3s device forces them
+  to learn `kubectl` / CRDs / CNI. A single Rust binary plus
+  `podman` is debuggable with `systemctl`, `journalctl`, `ps` —
+  tools they already use daily.
+- **The platform bet is strategic, not technical.** Building a
+  custom platform on the "no vendor lock-in, decentralized,
+  open-source" positioning differentiates NationTech as a platform
+  company; an ArgoCD-on-k3s integration positions it as an
+  integration shop on someone else's runtime.
+- **NATS is a coordination fabric, not a queue.** Federation
+  across regions, strict ordering across the supercluster, and
+  the "operator in multiple clusters, deployments coming from
+  everywhere" topology all depend on this choice. ArgoCD doesn't
+  federate naturally; that's a fundamental shape problem, not a
+  feature gap.
+- **Harmony's daemon-mode `Score::interpret()` is already
+  production**, running CNPG PostgreSQL failover today via
+  `harmony_agent`. The fleet agent is the same pattern at a
+  smaller scale.
+
+## Decision hierarchy when contributing
+
+When the framework is silent on a question, resolve in this
+order:
+
+1. **Does this preserve the compile-time-safety guarantee?** If
+   the answer involves "we'll validate it at runtime," reach for
+   a type instead.
+2. **Does this preserve a capability boundary?** Capability traits
+   (`DnsServer`, `LoadBalancer`, `IdentityProvider`, …) are the
+   seam between domain and adapters. If unsure, favor the
+   boundary.
+3. **Is this in the smallest possible PR?** Two ~200-line PRs
+   beat one 400-line PR. ADR-002 placement and convention rules
+   live in `CLAUDE.md`.
+4. **Would this introduce a string where a type would do?** Pull
+   the type. The `ScoreEnvelope` mistake (a string-wrapped
+   discriminator that re-implemented `serde` tagged enums by
+   hand) is the canonical anti-pattern.
+5. **Is this aligned with the existing module layout?** Use the
+   existing patterns (`*-deploy` crates per ADR-023,
+   `harmony/src/modules/<thing>/` for framework primitives).
+   Don't invent placement; ask if you can't fit the change into
+   the current shape.
+
+If after all of the above the answer is still unclear, surface
+the question in a small ADR draft under `docs/adr/drafts/`
+rather than guessing in code.
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -23,6 +23,7 @@
 - [Writing a Score](./guides/writing-a-score.md)
 - [Writing a Topology](./guides/writing-a-topology.md)
 - [Adding Capabilities](./guides/adding-capabilities.md)
+- [Web Authentication and CSRF Security](./guides/web-auth-security.md)

 ## Configuration

--- a/docs/adr/022-fleet-agent-upgrade.md
+++ b/docs/adr/022-fleet-agent-upgrade.md
@@ -0,0 +1,356 @@
+# Architecture Decision Record: Fleet Agent Upgrade Procedure
+
+Initial Author: Jean-Gabriel Gill-Couture
+
+Initial Date: 2026-05-06
+
+Last Updated Date: 2026-05-06
+
+## Status
+
+Accepted (design); implementation deferred — see roadmap
+`ROADMAP/fleet_platform/v0_2_plan.md`.
+
+## Context
+
+The v0.1 fleet agent ships as a single static aarch64-musl binary
+sitting at `/usr/local/bin/fleet-agent`, started by a systemd
+unit dropped at install time by `FleetDeviceSetupScore`. Every
+managed device runs one. Today the only "upgrade procedure" is
+`scp` + `systemctl restart` — fine for the bring-up phase, not
+fine once paying customers run real workloads on the fleet.
+
+Without a defined upgrade story we cannot ship a v0.1 agent into
+the field. The contract a customer needs is:
+
+1. New agent versions can be rolled out without operator-side
+   manual intervention per device.
+2. Workloads currently reconciled on the device do not flap
+   (start/stop/start) during the upgrade.
+3. A failed new version automatically reverts to the last
+   known-good version, on its own, without page.
+4. The operator (the central one in the cluster, not the human)
+   sees what version each device is running, can drive a target
+   version per device, and observes upgrade progress.
+
+The agent itself is the only process on-device with full context
+on what's reconciling and what's healthy. Anything centralized
+(Ansible-pushed, OS-package-managed) doesn't have that signal.
+The agent must be the one driving its own swap, with the
+operator coordinating but not executing.
+
+## Decision
+
+We adopt a **K8s rolling-update–shape upgrade**, single-host,
+agent-driven, operator-coordinated. Old version stays alive until
+new is verified healthy from the operator's vantage point; only
+then does the operator signal old to exit. **No version is ever
+erased from disk.** Symlinks select the active binary.
+
+### On-disk layout
+
+```
+/usr/bin/fleet-agent-v0.1.1            ← versioned binary, immutable
+/usr/bin/fleet-agent-v0.1.2            ← versioned binary, immutable
+/usr/bin/fleet-agent-v0.1.3            ← versioned binary, immutable
+/usr/local/bin/fleet-agent             → symlink to current versioned binary
+```
+
+- Versioned binaries are the source of truth. They live forever
+  (history-preserving, no GC). Disk use is bounded by humans
+  cleaning up explicitly, not by the upgrade procedure.
+- The systemd unit installed by `FleetDeviceSetupScore` references
+  `/usr/local/bin/fleet-agent`. Symlink swap is the cutover
+  primitive — atomic on POSIX (`renameat2`).
+- Naming convention: exact crate version string, `v<MAJOR>.<MINOR>.<PATCH>`,
+  no build metadata in the path. Build metadata lives in the agent's
+  reported version string but not in the file path (otherwise you
+  can't predict the path from a version pin).
+
+### State machine on the agent side
+
+```
+Running ──[operator publishes desired_version != current]──▶ Draining
+   ▲                                                            │
+   │                                                            │
+   │                                                            ▼
+   │                                                          Staging
+   │                                                            │
+   │                                                            ▼
+   │                                                         Verifying
+   │                                                            │
+   │                                                            ▼
+   │       ┌──────────────────────────────[smoke fails]────────┤
+   │       │                                                   │
+   │   [revert: symlink → previous,                            ▼
+   │    stay at current]                                  Cutover-Ready
+   │                                                            │
+   │   [Cutover-Ready persists ≥ T_OPERATOR_OBSERVE              │
+   │    until operator publishes stop_signal]                   │
+   │                                                            ▼
+   └────────────────────────────────────────────────────── Stopping
+                                                                │
+                                                                ▼
+                                                              (exit)
+```
+
+States in detail:
+
+- **Running** — normal reconcile loop.
+- **Draining** — refuses to start new podman services for new
+  desired-state writes. In-flight reconciles complete and report
+  their final state to the operator. Existing services stay
+  running. Heartbeat continues. State is published as part of the
+  agent's heartbeat (`agent_state: "draining"`).
+- **Staging** — fetch new versioned binary URL (signed,
+  hash-pinned), verify, place at `/usr/bin/fleet-agent-v<new>`.
+  Set chmod, ownership. No other state mutation.
+- **Verifying** — invoke the staged binary with `--self-test`. New
+  binary parses its config, opens NATS connection, validates JWT,
+  prints version + "ok", exits 0. **No state mutation.** Catches
+  obvious breakage (missing dependency, wrong arch, corrupt
+  download, broken config-schema migration) before swap.
+- **Cutover-Ready** — staged binary is healthy. Old agent updates
+  the symlink atomically:
+  ```
+  ln -sfn /usr/bin/fleet-agent-v0.1.2 /usr/local/bin/fleet-agent.new
+  mv -T /usr/local/bin/fleet-agent.new /usr/local/bin/fleet-agent
+  ```
+  Old agent then `systemctl start fleet-agent-v0.1.2.service` (a
+  parallel transient service, not `systemctl restart` of itself).
+  Both old and new are now running. New publishes its first
+  heartbeat with `version=v0.1.2`. Operator sees two heartbeats
+  per device for a brief window.
+- **Stopping** — operator publishes a stop signal to the old
+  agent's NATS subject. Old agent receives, gracefully exits.
+  systemd's `Restart=on-failure` does *not* trigger because the
+  exit is `success` (rc=0, code-path-driven). New agent is now
+  the only one running. systemd unit is reconfigured to point at
+  the *current* symlink target on its next restart, but that's
+  cosmetic — the symlink already does the job.
+
+### Operator-side coordination
+
+The operator is the only source of truth for "what version should
+this device run". One new field per device, two new subjects.
+
+**New on `Device` CR / KV `device-info`:**
+- `current_version` — what the agent is running right now.
+  Reported in heartbeat; reflected to the CR.
+- `desired_version` — what the operator wants the agent to run.
+  Set by operator-side logic (default: latest published; eventually
+  canary / %-based).
+
+**New NATS subjects (per-device, scoped by callout permissions):**
+- `device-cmd.<device_id>.upgrade-stop` — operator → old agent.
+  Payload: `{"reason": "...", "deadline_ms": ...}`. Sent only after
+  operator has observed a heartbeat from the new version with
+  `current_version == desired_version` AND `agent_state == "running"`.
+- `device-state.<device_id>.upgrade` — agent → operator. Status
+  events: `staging`, `verifying`, `cutover-ready`, `failed`, `done`.
+  Drives `Device.status.upgrade.{phase, last_error, ...}`.
+
+The operator only emits `upgrade-stop` after it has independently
+verified the new agent is up. **Old agent does not stop itself
+based on its own observations.** This is the load-bearing
+property: the same operator that disagreed with the upgrade
+("haven't seen new version's heartbeat") would never have sent
+the stop signal. Single-source-of-truth handoff.
+
+### Failure modes and rollback
+
+- **Staging fails (download / hash mismatch):** Agent stays in
+  `Running`. Reports `phase: "failed"`, `last_error`. Operator
+  sees the failure; can fix the artifact + retry by re-publishing
+  the same desired_version (any change to desired_version
+  re-triggers the state machine).
+- **Verifying fails (smoke test rc != 0):** Agent stays in
+  `Running`. Reports failure. Staged binary stays on disk for
+  inspection. Operator can collect, debug, ship a fixed version.
+- **Cutover-ready, but new agent never publishes a heartbeat
+  with the new version within T_HEARTBEAT_TIMEOUT (suggested
+  60s):** Old agent reverts the symlink, stops the parallel
+  systemd transient service, transitions back to Running with
+  the old version. Reports `failed`. Same recovery path.
+- **Operator never sends stop signal (e.g., operator-side
+  outage):** Old agent stays in Cutover-Ready indefinitely. Both
+  agents are running; only the new one is publishing as the
+  active one (the old one's writes are gated on its state). This
+  is expensive (2× resource use) but safe — the operator is the
+  authoritative coordinator and any other behavior would risk
+  losing both agents at once.
+- **Both agents alive but new agent crashes:** systemd's
+  `Restart=on-failure` on the new agent's transient unit retries.
+  If it can't come back, the operator never sends the stop signal,
+  the old agent stays Cutover-Ready, and a human investigates.
+  The fleet keeps working on the old version — the rollback is
+  implicit.
+- **Operator publishes an older `desired_version`:** Reverse
+  rollout. Same mechanism, just with old/new swapped. The "new"
+  binary is older, but the procedure is identical. The fact that
+  no version is ever GC'd is what makes this work.
+
+### What this isn't
+
+- **Not fleet-wide.** Per-device. Fleet-wide canary / %-based
+  rollouts are operator-side orchestration **on top of** this
+  primitive. The operator would publish `desired_version` to a
+  rolling subset of devices and watch heartbeats. Out of scope
+  for v0.2 — single-device upgrade is sufficient for a 100-Pi
+  fleet which is more than the 12-month customer roadmap.
+- **Not blue/green of the entire OS.** We swap one userspace
+  binary. The OS, podman, the systemd unit text, the kernel — all
+  unchanged. Out of scope.
+- **Not a package manager.** Versioned binaries land at fixed
+  paths because we control them. apt / dpkg / OSTree are
+  orthogonal and not in the loop.
+
+## Rationale
+
+- **No version ever erased.** Trivializes rollback (the previous
+  binary is a `ln -sfn` away). Simplifies the failure tree:
+  every "what if" branch resolves to "old still on disk". Disk
+  cost on aarch64-musl is ~5–10 MB per version — at 12 versions
+  / year, that's 100 MB after a decade of upgrades. Negligible
+  compared to Pi storage.
+- **Symlink swap as cutover.** POSIX-atomic. No daemon state.
+  Cheap to revert. Compatible with systemd unit references that
+  point at a stable path.
+- **Old verifies new, then reports up.** This is the load-bearing
+  property: it places the verification at the agent (which has
+  the only complete view of its own runtime state) but the
+  *commitment* at the operator (which is the only thing safe to
+  trust as the cluster-wide source of truth). Either side alone
+  can fail safe; only consensus advances the upgrade.
+- **Operator-driven stop, not agent self-stop.** A self-stopping
+  agent could decide to exit before the operator agrees, leaving
+  the cluster blind. Forcing the stop through the operator means
+  any disagreement keeps the old agent alive — which is the
+  desired bias.
+- **Drains in-flight work first.** Mirrors K8s pod-shutdown
+  semantics. A workload reconciling at the moment of swap
+  finishes its current step, reports state, then queues. New
+  agent picks up the queue once it's the active version. No
+  observable flap on the workload.
+- **Heartbeat-driven version reporting.** The agent already
+  publishes heartbeats; adding the version field is one line.
+  No new transport.
+
+## Consequences
+
+**Pros:**
+
+- Bounded blast radius per upgrade (one device).
+- Rollback is the same code path as upgrade — no special-case
+  bug class.
+- Operator's view is monotonic: heartbeats with versions are
+  immutable history; there's no "did the upgrade really happen"
+  state.
+- Old agent never decides to exit on its own. The most dangerous
+  failure mode in self-upgrading software (premature exit) is
+  designed out.
+- Compatible with eventual fleet-wide rollouts (canary, %-based)
+  which become operator-side orchestration on top of this
+  primitive.
+
+**Cons:**
+
+- Briefly runs two agents per device (Cutover-Ready window).
+  Memory and connection-count both ~2× during that window.
+  Acceptable for the upgrade duration (typically <60s).
+- Requires reliable connectivity between agent and operator to
+  complete the handoff. A device whose NATS link fails mid-
+  upgrade stays in Cutover-Ready until link recovers.
+- Disk grows monotonically with version count. Bounded by human
+  cleanup. We do not GC.
+- New NATS subjects, new heartbeat fields, new `Device.status`
+  fields. Schema bump that operators-in-the-field need to handle
+  (the operator must understand "old agent reporting no version
+  field" as `version: unknown`, not crash).
+
+## Alternatives considered
+
+1. **OS-package upgrade (apt / dpkg / OSTree).** *Pros:* zero
+   custom code, standard toolchain, GPG-signed.
+   *Cons:* Loses the "agent verifies the new agent before swap"
+   property. apt's restart hook flips the symlink and `systemctl
+   restart`s; if the new binary is broken, the device is bricked
+   until human intervention. Doesn't drain in-flight work. Doesn't
+   know about NATS-managed pause states. Couples the upgrade
+   schedule to the distro's repo, not to the cluster operator's
+   intent. Rejected.
+
+2. **Pull-from-OCI-registry on each agent restart.** *Pros:* same
+   primitive as podman / kube node-image-rotation.
+   *Cons:* Coupling to a registry the device must reach — many
+   customer fleets are on private subnets without registry
+   access. Would mean shipping a registry mirror per fleet. Adds
+   a dependency for a problem we can solve with a signed binary
+   on a CDN.
+
+3. **Two systemd units, blue/green at the unit level.**
+   `fleet-agent-v0.1.1.service` and `fleet-agent-v0.1.2.service`,
+   ratchet via systemctl enable/disable. *Pros:* no symlink dance.
+   *Cons:* duplicates a lot of unit-file content; harder to
+   reason about what the "active" unit is (you have to ask
+   systemd, not `readlink`); doesn't compose well with the
+   `ExecStart=/usr/local/bin/fleet-agent` line we already ship.
+   Symlink swap is the lighter primitive.
+
+4. **Self-stopping agent (no operator stop signal).** New agent
+   tells old agent "I'm up, you can go" via NATS. *Pros:* one
+   fewer subject.
+   *Cons:* The new agent is also the agent we're least sure of
+   — putting it in charge of the old one's lifecycle inverts the
+   trust model. If the new agent has a bug that causes it to
+   announce ready prematurely, the cluster goes blind. The
+   operator path is the conservative choice.
+
+5. **Operator-pushed binary (instead of agent-pulled).** The
+   operator sshes / executes a one-off command per device.
+   *Pros:* operator controls timing precisely.
+   *Cons:* Reintroduces SSH as a control plane (we just spent a
+   month getting rid of it for the enrollment flow). Doesn't
+   scale to fleets where most devices are NATted away from the
+   operator.
+
+## Implementation milestones
+
+(For a future implementer; not committed to a date here. Lives
+in the v0.2+ backlog.)
+
+1. **M1** — Versioned binary layout: builds produce
+   `fleet-agent-v<version>` artifacts; install Score writes them
+   to `/usr/bin/fleet-agent-v<version>` + creates
+   `/usr/local/bin/fleet-agent` symlink. Existing tests cover the
+   rest.
+2. **M2** — Version field in heartbeat + `Device.status.current_version`
+   reflection on the operator side. No upgrade behavior yet.
+3. **M3** — `desired_version` field on the device-info KV +
+   operator setter. No agent-side action yet.
+4. **M4** — Agent state machine, end to end, gated by a feature
+   flag. Operator publishes desired_version → agent does the
+   dance → operator sends stop signal → done. Includes failure-
+   mode tests (download fail, smoke fail, heartbeat-timeout
+   revert).
+5. **M5** — Remove the feature flag. Default-on.
+6. **M6** — Operator-side rollout strategies (canary, %-based) —
+   only after M5 has been in production for 30 days against a
+   real fleet.
+
+## Additional Notes
+
+- Binary signing + signature verification is in scope for the
+  `Staging` step but the *which* signing scheme (cosign / Rekor
+  / minisign) is deferred until the M1 implementation. Whatever
+  we pick must work on aarch64-musl Pi devices without
+  additional system dependencies.
+- The N-versions-on-disk policy is "all of them, forever" per
+  the constraint above. If disk pressure becomes real on some
+  customer fleet, a manual GC tool can prune `/usr/bin/fleet-agent-v*`
+  by date — never automatic, never as part of the upgrade
+  itself.
+- See JG's *Pour l'amour des compilateurs* talk (Botpress
+  Meetup, 2026-04-30) for the framing applied here:
+  cardinality-matched types and operator-as-coordinator are the
+  same idea, applied to one function and to one platform.
--- a/docs/adr/023-deploy-architecture.md
+++ b/docs/adr/023-deploy-architecture.md
@@ -0,0 +1,193 @@
+# Architecture Decision Record: Deploy Architecture — Scores, Deploy Crates, and the E2E Contract
+
+Initial Author: Jean-Gabriel Gill-Couture
+
+Initial Date: 2026-05-18
+
+Last Updated Date: 2026-05-20
+
+## Status
+
+Accepted. Extends the Score-Topology-Interpret pattern documented
+in `CLAUDE.md` (ADR-002, ADR-003) with the *deploy* side of the
+contract: what a deploy crate is, how e2e harnesses relate to
+production deploys, how the CLI surface is shaped, and the
+smoke-test-on-deploy semantics.
+
+## Context
+
+Three failure modes recur in tooling that ships infrastructure as
+code, and Harmony exists in part to defeat them. This ADR locks
+the deploy-time discipline that keeps them out.
+
+1. **Manifests outside the type system.** YAML/HCL configurations
+   are validated at runtime, not compile time — the original
+   "YAML mud pit" that ADR-005 names. A Rust framework that
+   re-introduces raw `Deployment` / `Service` / `ConfigMap`
+   structs in test harnesses, examples, or CLI helpers has only
+   dressed up the same anti-pattern in Rust syntax: the typed
+   Scores get a clean sample size of one (production), and
+   everything else can silently diverge.
+
+2. **Deploy logic with no canonical home.** A "how to apply
+   component X end-to-end" routine that lives in three places
+   (an example crate, a CLI subcommand, ad-hoc orchestration in
+   a test harness) will drift. The framework needs one address
+   per deployable component, and every consumer of that
+   component composes from there.
+
+3. **"Applied" is not "working."** `helm install` returns
+   success the moment the API server accepts the manifest, and
+   leaves the operator to debug downstream. Harmony's whole
+   reason for existing is to shorten that feedback loop — a
+   deploy primitive that doesn't itself verify the result keeps
+   the loop open.
+
+## Decision
+
+Nine principles, grouped.
+
+### Deployment as Scores
+
+1. **Deploy with Scores, not handrolled manifests.** Capability
+   traits + compile-time bounds are the contract. No
+   `k8s_openapi::api::*` structs outside of `Score::interpret`
+   bodies. Test harnesses, examples, and CLI helpers compose
+   `*Score` types — they never reimplement deploys.
+
+2. **E2E uses the same Scores as production.** Only the
+   `Topology` instance changes (local k3d, remote OKD,
+   bare-metal HA, …). A test harness is a `Score`-composer
+   running against a test Topology. If e2e needs something prod
+   doesn't, add the knob to the Score — don't fork the manifest
+   in the harness.
+
+3. **One Score per deployable component.** Composition is the
+   user-facing primitive: a `MyAppScore` pulls in
+   `PostgresScore`, `HttpServerScore`, etc. Don't build
+   monolithic "deploy everything" Scores. Each primitive Score
+   must be independently testable and substitutable.
+
+4. **Deploy returns only after smoke-test success.** Every Score
+   owns a readiness + smoke-test contract that the framework
+   runs and blocks on. Convergence errors must be actionable, in
+   the style of `rustc`'s error messages, not "exit code 1 from
+   helm". The implementation shape of the smoke-test contract is
+   deferred (see §Out of scope); the principle is locked in.
+
+### Where deploy logic lives
+
+5. **Deploy logic lives in a `*-deploy` crate** that depends on
+   both `harmony` and the runtime crate it deploys. Runtime
+   binaries (the artefacts that ship to constrained devices and
+   to in-cluster pods) stay free of the `harmony` dep. One
+   deploy crate per app area, holding every component-Score for
+   that app plus the `main.rs` that drives them via
+   `harmony_cli`. The same crate is the single import for any
+   consumer — CLI, e2e harness, future control planes.
+
+   `harmony` core stays focused on framework primitives and
+   reusable provider modules (DNS, K8s resources, Helm charts,
+   NATS, PostgreSQL, …). It is not a parking lot for
+   application-specific deploy Scores.
+
+### Topology selection
+
+6. **Topologies are compile-time, selected at runtime.** A
+   deploy binary statically lists its supported topologies; the
+   operator picks one at deploy time. Adding a brand-new
+   topology backend (AWS, GCP, …) is a rebuild — acceptable
+   cost, because dynamic-discovery topologies like
+   `K8sAnywhereTopology` already cover "any physical place that
+   runs k8s". No `Box<dyn Topology>` plugin loaders.
+
+### Framework evolution
+
+7. **Extend Scores with companions, not API changes.** New
+   capabilities the framework wants to attach to Scores
+   (planning, dry-run, observability, eventually smoke-test)
+   default to a *companion* type or trait that wraps a Score
+   rather than a new method on `Score` / `Interpret`. The base
+   public API stays simple. The exception is principles every
+   Score must honor (which may force a required method) — but
+   only after the principle has been validated in practice via
+   the companion-first iteration.
+
+### CLI
+
+8. **CLI: hybrid, staged.** Today (B): first-party tools ship as
+   separate `harmony-*` binaries built on the existing
+   `harmony_cli` crate. Tomorrow (C): a top-level `harmony`
+   binary discovers `harmony-*` plugin binaries on `$PATH`
+   (`kubectl`-style) so a third-party `MyAppScore` author gets
+   `harmony deploy my-app` for free. The plugin protocol is
+   deferred (see §Out of scope).
+
+### Error handling
+
+9. **`thiserror` almost everywhere; `anyhow` only at binary
+   glue.** Library code, public crate boundaries, anything a
+   caller might want to match on — typed errors via `thiserror`.
+   `anyhow` is reserved for `main.rs`-level glue where the error
+   is just printed.
+
+## Out of scope (deferred, not rejected)
+
+- **Score derive macro / deployment DSL.** Strategic intent from
+  day one; the framework's value-add concentrates here. Separate
+  design effort.
+- **Score registry** (Crichton-style:
+  <https://willcrichton.net/rust-api-type-patterns/registries.html>).
+  Real itch — examples and Scores are hard to discover today.
+  Research + ADR first.
+- **Inventory as capability-defined physical assets.** Inventory
+  is under-engineered today; the original idea is to represent
+  physical infrastructure (building → cable → switch port → MAC)
+  but most use cases ignore it. Decomposing inventory into a
+  capability set is a deep redesign.
+- **Plug-in CLI discovery layer (C in principle 8).** The fix
+  for the "too many disconnected CLIs" cohesion problem.
+  Roadmap item, dedicated future effort.
+- **`Application features` ↔ `capabilities` relationship.** An
+  in-progress concept the project lead is personally unsure
+  about. Not resolved in this ADR.
+- **Concrete smoke-test contract shape (principle 4).** Whether
+  smoke-test lives as a separate trait, a required method on
+  `Score`, a companion struct, or a typestate is open. Until
+  it's locked, deploy crates implement per-Score readiness
+  checks inside `interpret` bodies — the principle is what
+  travels with the Score, not yet the trait shape.
+
+## Consequences
+
+- New deployable components are authored as `*Score` types in a
+  `*-deploy` crate, not in `harmony` core. `harmony` core is
+  framework primitives plus reusable provider modules; it does
+  not accumulate application-specific deploy logic.
+- Test harnesses are Score-composers. A harness that finds
+  itself building `Deployment` / `Service` / `ConfigMap` structs
+  is the signal that a Score is missing, not that the harness
+  needs a special path.
+- Every Score owns its readiness story. Whatever shape the
+  smoke-test contract eventually takes, the Score is the home
+  for the logic — not a parallel test fixture.
+- Adding a new deploy backend (a new topology) is a deploy-
+  binary rebuild. Dynamic loading of topologies is rejected by
+  this ADR, and that posture is load-bearing for the
+  compile-time-safety guarantees in CLAUDE.md.
+- New framework-level capabilities (dry-run, observability,
+  smoke-test) ride in on companion types first. Only after a
+  companion proves out does it earn a place in the `Score` /
+  `Interpret` public API.
+
+## References
+
+- `CLAUDE.md` — Score-Topology-Interpret pattern, capability
+  design rules.
+- `docs/adr/002-hexagonal-architecture.md` — domain/adapter split
+  this builds on.
+- `docs/adr/005-interactive-project.md` — the original "no
+  YAML-mud-pit" call (Rust DSL over YAML/HCL).
+- `docs/adr/009-helm-and-kustomize-handling.md` — established
+  pattern: external charts inflate into the same Score pipeline.
+- `harmony_agent/deploy` — `*-deploy` crate exemplar.
--- a/docs/adr/024-fleet-platform-capability-decomposition.md
+++ b/docs/adr/024-fleet-platform-capability-decomposition.md
@@ -0,0 +1,182 @@
+# Architecture Decision Record: Fleet Platform — Capability Decomposition
+
+Initial Author: Jean-Gabriel Gill-Couture (with research by Claude)
+
+Initial Date: 2026-05-20
+
+Last Updated Date: 2026-05-20
+
+## Status
+
+**Draft — under review.** Captures the proposed shape for review;
+not yet locked. If accepted, supersedes the as-built layout of
+`harmony/src/modules/fleet/` documented in ADR-023's first
+revision.
+
+## Context
+
+The fleet platform shipped under `feat/iot-walking-skeleton`
+spans three concerns that today share two locations:
+
+1. **Domain logic** — what a `FleetDevice` is, what a
+   `FleetDeployment` looks like, what the reconciler-contracts
+   wire types mean.
+2. **Adapters** — concrete NATS, Zitadel, Kubernetes, Helm
+   integrations that drive the domain.
+3. **Deploy procedures** — how to bring up the operator, agent,
+   NATS, Zitadel as Scores against a Topology.
+
+Today these live in `harmony/src/modules/fleet/` (mixed), the
+`harmony-reconciler-contracts` crate (wire types only), the
+`harmony-fleet-deploy` crate (Scores for deploy), and the
+`harmony-fleet-operator`/`harmony-fleet-agent` binaries
+(runtime). The boundary between domain and adapter is not
+type-level: `harmony/src/modules/fleet/setup_score.rs` for
+example reaches into Zitadel, NATS, Kube, and Helm directly.
+Anyone wanting to swap NATS for a different transport would
+touch every fleet file.
+
+ADR-023 already addressed the *deploy*-side of this (deploy
+Scores live in `*-deploy` crates, not in `harmony` core). This
+ADR proposes the *domain*-side decomposition: pull a thin
+fleet-domain crate above the existing reconciler-contracts, push
+provider-specific code into adapter crates, and re-direct the
+deploy crate to consume the domain rather than the framework
+primitives directly.
+
+## Decision (proposed)
+
+Five crates, layered by dependency direction:
+
+```
+harmony-reconciler-contracts      (existing — wire types only)
+        ▲
+        │
+harmony-fleet-domain               (new — domain records + capability traits)
+        ▲
+        │
+harmony-fleet-adapters-*           (new — one crate per provider)
+        ▲                          (nats, zitadel, kube)
+        │
+harmony-fleet-deploy               (existing — bring-up Scores)
+harmony-fleet-operator             (existing — daemon)
+harmony-fleet-agent                (existing — daemon)
+```
+
+### `harmony-fleet-domain`
+
+The domain crate. Depends only on `harmony-reconciler-contracts`
+and `harmony_types`. Holds:
+
+- **Domain records**: `FleetDevice`, `FleetDeployment`,
+  `FleetState`, `EnrollmentIntent`, `DeviceCredential`.
+- **Capability traits**: `DeviceRegistry`,
+  `DesiredStatePublisher`, `ObservedStateConsumer`,
+  `IdentityProvider`, `AgentLifecycle`. These are the seam
+  between domain logic and provider-specific implementations.
+
+### `harmony-fleet-adapters-nats`, `-zitadel`, `-kube`
+
+One crate per provider. Each implements the capability traits
+above for its specific backend:
+
+- `nats` — `NatsDeviceRegistry`, `NatsDesiredStatePublisher`,
+  `NatsObservedStateConsumer`.
+- `zitadel` — `ZitadelIdentityProvider`, machine-user
+  provisioning, JWT-bearer minting.
+- `kube` — `KubeFleetReflector` writes `Device` and
+  `Deployment` CRDs as a *reflection* of domain state, not as
+  the source of truth. CRD types move here from
+  `harmony-fleet-operator`.
+
+### `harmony-fleet-deploy`
+
+Stays as the home for `FleetOperatorScore`, `FleetAgentScore`,
+`FleetNatsScore`, `FleetCalloutScore`. Updates: imports
+`harmony-fleet-domain` for types, uses
+`harmony-fleet-adapters-*` to compose Scores against capability
+traits rather than reaching directly into NATS/Zitadel client
+crates.
+
+### Direction of dependency
+
+The fleet *domain* doesn't depend on the framework. The
+framework's *deploy procedures* depend on the fleet's domain.
+Inversion of today's direction, where `harmony::modules::fleet`
+imports from `harmony_secret`, `harmony_zitadel_auth`, NATS
+client crates, kube client crates, etc.
+
+After this ADR is implemented, `harmony::modules::fleet`
+disappears entirely. `harmony` core stays focused on framework
+primitives.
+
+## Open questions
+
+These are the decision points pending review — flagged so the
+review has concrete pivots:
+
+- **Q1.** Is `IdentityProvider` the right capability name, or
+  should we name the two distinct concerns separately
+  (`DeviceCredentialMinter`, `OperatorTokenProvider`)? CLAUDE.md
+  rule says "if reality has two distinct concerns, two
+  traits."
+- **Q2.** Should the `Device` CRD exist at all, or should the
+  agent publish to a kube `Node` (per the alternative-D
+  direction)? Today's mid-ground (own CRD that mirrors `Node`)
+  arguably the worst of both worlds.
+- **Q3.** Where does `ReconcileScore`'s adjacently-tagged enum
+  live? It's the canonical wire seam between operator and
+  agent. Should sit in `harmony-reconciler-contracts` (so both
+  binaries import only that crate); confirm before the move.
+- **Q4.** Does this redesign block the v0.1 production push, or
+  does it land in v0.2 alongside the agent-upgrade work
+  (ADR-022)? Public API churn after a customer is on it is more
+  expensive than a 3-day delay before they are. Recommendation:
+  ship the redesign first.
+- **Q5.** Where do runtime tools (the `harmony-fleet` CLI plugin,
+  the operator's frontend) sit in the dependency graph? If they
+  depend on `harmony-fleet-domain` only, they build without
+  pulling in helm/kube/ansible at compile time — which is also
+  the right shape for the device-side enrollment binary
+  (currently feature-gated).
+
+## Out of scope
+
+- **Alternative D (kube-native devices).** A future v2.0
+  destination, not v0.1 or v0.2 work. Captured as the long-term
+  direction; the capability traits in this ADR are the
+  intentional seam that makes the migration possible later.
+- **Topology decomposition.** Whether `K8sBareTopology` /
+  `K8sAnywhereTopology` should themselves be capability sets is a
+  separate concern. Tracked as a working draft at
+  `docs/adr/drafts/topology-proliferation.md`.
+
+## Consequences
+
+If accepted:
+
+- New deployable fleet components author their Scores against
+  capability traits in `harmony-fleet-domain`, not against
+  provider clients directly. Swapping NATS for a different
+  transport becomes a single-crate change.
+- CRD types move out of operator code and into
+  `harmony-fleet-adapters-kube`. Operator depends on adapter
+  crate; runtime binary stays slim.
+- `harmony` core has no fleet code. The framework's `modules/`
+  directory is reserved for general-purpose primitives (DNS,
+  K8s, Helm, NATS, PostgreSQL, …); domain-specific code lives
+  in its own crate tree.
+- Future fleet adapters (a different transport, a different
+  identity provider) are additive: one new crate, no changes to
+  domain or deploy.
+
+## References
+
+- `ROADMAP/fleet_platform/architecture_review.md` §§4–5 —
+  comparison matrix and Alternative-B rationale from which this
+  ADR is extracted.
+- `docs/adr/023-deploy-architecture.md` — companion ADR for the
+  deploy-side rules. This ADR is the domain-side companion.
+- `docs/adr/022-fleet-agent-upgrade.md` — the agent-upgrade
+  procedure, which sits cleanly on top of the
+  `AgentLifecycle` capability proposed here.
--- a/docs/adr/README.md
+++ b/docs/adr/README.md
@@ -52,6 +52,7 @@ Every ADR follows this structure:
 | [019](./019-Network-bond-setup.md) | Network Bond Setup | Proposed |
 | [020-1](./020-1-zitadel-openbao-secure-config-store.md) | Zitadel + OpenBao Secure Config Store | Accepted |
 | [020](./020-interactive-configuration-crate.md) | Interactive Configuration Crate | Proposed |
+| [022](./022-fleet-agent-upgrade.md) | Fleet Agent Upgrade Procedure | Accepted |

 ## Contributing

--- a/docs/adr/drafts/024-architecture-review.md
+++ b/docs/adr/drafts/024-architecture-review.md
@@ -0,0 +1,886 @@
+# Fleet platform — architecture review
+
+Working document for the architectural redesign of the fleet platform
+before v0.1 ships to production. Started 2026-05-07.
+
+This is a research + design document, not a plan to execute. The
+output of this work is an ADR (or set of ADRs) that lock the new
+shape; the v0.2 roadmap will reference whichever option we pick.
+
+## Why now
+
+- Three days from production. No customers depend on the API yet
+  → API/UX/DX is still cheap to change. After ship, every breaking
+  change costs us a week of customer-coordination overhead.
+- The `harmony/modules/fleet/` placement is wrong — already flagged
+  in code review. The reasons it ended up there are subtle (cross-
+  module imports of `K8sAnywhereTopology`, `HelmChartScore`,
+  `K8sResourceScore`, `harmony_secret`, `Topology` capability
+  traits). Those need to be written down before the file move,
+  not after.
+- The plumbing — NATS + Zitadel + auth callout + operator + agent
+  — is sound. Highly secure, scalable by design, low resource
+  footprint. The redesign is about **moving code** and **better
+  data structures**, not rebuilding mechanisms.
+- The frame from JG's *Pour l'amour des compilateurs* talk:
+  cardinality-matched types, "make impossible states impossible",
+  expressive types as the deterministic feedback loop that scales
+  with LLM-era code generation throughput. Apply that frame here.
+
+## Working plan
+
+1. **Inventory.** Map every public type, trait, score, module, and
+   crate that participates in the fleet domain. Markdown-bullet
+   shape; no diagrams.
+2. **Read the room.** Pull principles from JG's talk, its
+   references, and harmony's existing ADRs (002 hexagonal, 003
+   infrastructure abstractions, 015 higher-order topologies, 016
+   harmony agent + global mesh, 017 NATS interconnection, 018
+   template hydration). Note where the existing fleet design
+   already follows them and where it doesn't.
+3. **Identify the design problems.** Not bugs — *shape* problems.
+   Cardinality mismatches, leaky boundaries, "is this resolved
+   yet" branches, location/dependency loops.
+4. **Sketch alternatives.** Three to five. At least one
+   conventional cleanup, at least one out-of-the-box that
+   reframes the domain. Compare on the same axes (cardinality,
+   placement, ergonomics, extensibility).
+5. **Pick (or recommend) one.** Land as ADR.
+
+This document covers steps 1–4. The pick happens in conversation
+with JG before the ADR.
+
+---
+
+## §1 — Current state inventory
+
+### §1.1 — Where the code lives
+
+The fleet domain spans **three concerns** that today live in
+**three locations**:
+
+- **Framework-side scoring** (what runs on the operator's
+  workstation when they `cargo run` the install) → lives in
+  `harmony/src/modules/fleet/`. This is the wrong home; it's the
+  thing this review is about moving.
+  - `mod.rs` — re-exports
+  - `assets.rs` — Ubuntu/Debian cloud image fetchers, libvirt SSH
+    keypair management
+  - `libvirt_pool.rs` — libvirt storage pool bring-up
+  - `setup_score.rs` (1053 LOC, the monster) — `FleetDeviceSetupScore`,
+    `FleetDeviceSetupConfig`, `FleetDeviceAuth`
+    (TomlShared|ZitadelJwt|ZitadelEnroll), `AdminAuth`, `HostsEntry`,
+    `merge_hosts_file`
+  - `vm_score.rs` — `ProvisionVmScore` (libvirt VM bring-up)
+  - `preflight.rs` — `check_fleet_smoke_preflight*` (host system
+    checks)
+  - `server.rs` — `FleetServerScore`, `FleetServerInterpret`
+    (composed bring-up of Zitadel + NATS + callout + operator)
+  - `operator/`
+    - `mod.rs`, `score.rs` — `FleetOperatorScore`,
+      `FleetOperatorInterpret` (operator helm install)
+    - `chart.rs` (453 LOC) — chart rendering (`ChartOptions`,
+      `OperatorCredentials`, `build_chart`, `operator_secret`,
+      `build_operator_deployment`, `build_cluster_role`)
+    - `crd.rs` — `Deployment` CRD type (`DeploymentSpec`,
+      `Rollout`, `RolloutStrategy`, `DeploymentStatus`,
+      `DeploymentAggregate`, `AggregateLastError`); `Device` CRD type
+      (`DeviceSpec`)
+- **Cross-boundary wire types** (the "contract" agent and operator
+  both have to agree on) → lives in `harmony-reconciler-contracts/`.
+  - `fleet.rs` — `DeviceInfo`, `DeploymentState`, `HeartbeatPayload`,
+    `DeploymentName`, `InvalidDeploymentName`
+  - `kv.rs` — bucket name constants + key-builder functions
+  - `status.rs` — `Phase`, `InventorySnapshot`
+  - re-exports `harmony_types::id::Id`
+- **Runtime binaries** (what runs in the cluster + on devices) →
+  lives in `fleet/`.
+  - `harmony-fleet-operator/` — the operator pod. `controller.rs`,
+    `device_reconciler.rs`, `fleet_aggregator.rs` (833 LOC),
+    `install.rs`, `main.rs`. Pulls `Deployment`/`Device` CRDs from
+    `harmony::modules::fleet::operator::crd` (cross-crate import
+    that should give us pause).
+  - `harmony-fleet-agent/` — the on-device daemon. `config.rs`,
+    `reconciler.rs`, `fleet_publisher.rs`, `main.rs`.
+  - `harmony-fleet-auth/` — JWT-bearer / NATS-credentials helpers
+    used by both the operator AND the agent. `config.rs`,
+    `credentials.rs` (553 LOC). Sits between contracts and the
+    runtime crates.
+
+### §1.2 — Public types, sorted by domain meaning (not location)
+
+#### Identity & devices
+
+- `harmony_types::id::Id` — opaque, sortable, collision-safe
+  identifier. Used as device id, deployment id, …
+- `DeploymentName` (newtype with validation, `harmony-reconciler-contracts`)
+- `DeviceInfo` — heartbeat payload that materializes into a
+  `Device` CR
+- `DeviceSpec` — kube CRD, holds an optional `InventorySnapshot`
+- `InventorySnapshot` — hardware/OS facts published once at
+  registration
+
+#### Deployment desired-state
+
+- `DeploymentSpec` — kube CRD: `target_selector: LabelSelector`,
+  `score: ReconcileScore`, `rollout: Rollout`
+- `ReconcileScore` (in `harmony::modules::podman`, re-exported
+  from `harmony::modules::fleet::operator::crd`) — externally-tagged
+  enum, today only `PodmanV0(PodmanV0Score)`
+- `PodmanV0Score`, `PodmanService`, `EnvVar`, `VolumeMount`,
+  `RestartPolicy`
+- `Rollout`, `RolloutStrategy::Immediate`
+
+#### Deployment observed-state
+
+- `DeploymentState` — what the agent publishes per device per
+  deployment after reconcile
+- `DeploymentStatus` (kube CRD) — operator-side rollup of all
+  device states for one Deployment CR
+- `DeploymentAggregate` — counts (matched, succeeded, failed,
+  pending) + `last_error: Option<AggregateLastError>`
+- `Phase` — `Pending | Running | Failed`
+
+#### Authentication / identity provider
+
+- `FleetDeviceAuth` — sum type with `TomlShared | ZitadelJwt |
+  ZitadelEnroll`. **The `ZitadelEnroll` arm carries
+  unresolved-state — admin credentials that must be turned into a
+  device JSON key at execute time. Mixes resolved and unresolved
+  states in one type, which is the cardinality bug we keep hitting.**
+- `AdminAuth` — `Sso { client_id } | Token(String)` (used inside
+  `ZitadelEnroll`)
+- `CredentialsSection` — TOML-on-disk shape (in
+  `harmony-fleet-auth`, parallel to `FleetDeviceAuth`)
+- `CredentialSource` — runtime credential factory
+- `NatsCredential` — what async-nats actually consumes
+- `MachineKeyFile`, `CachedToken`
+
+#### Setup procedures (Scores)
+
+- `FleetDeviceSetupScore` (`FleetDeviceSetupConfig`) — the workhorse:
+  installs podman, drops the agent binary, drops the credentials
+  TOML, drops the keyfile, brings up the systemd unit.
+- `FleetServerScore` — orchestrates Zitadel install + identity
+  setup + NATS install + callout install + operator install. Wraps
+  five other scores.
+- `FleetOperatorScore` — operator helm chart render + install + the
+  credentials Secret apply.
+- `ProvisionVmScore` — libvirt VM bring-up. Used by VM rehearsals.
+- (External, not in fleet/) `ZitadelScore`, `ZitadelSetupScore`,
+  `NatsK8sScore`, `NatsAuthCalloutScore` — all consumed by the
+  composed install.
+
+#### Operator-internal types
+
+- `FleetState`, `SharedFleetState`, `DeploymentKey`, `DevicePair`,
+  `CachedDeployment`, `Context`, `Error` (the controller's local
+  error type), `selector_matches`, `apply_state`, `drop_state`,
+  `compute_aggregate`
+
+#### Agent-internal types
+
+- `AgentConfig`, `AgentSection`, `NatsSection`, `CredentialsSection`
+- `FleetPublisher`, `Reconciler`
+
+#### Fleet plumbing for development
+
+- `FleetSshKeypair`, the cloud-image consts, `HarmonyFleetPool`,
+  `merge_hosts_file`, `HostsEntry`, `check_fleet_smoke_preflight*`
+
+#### NATS subjects + KV buckets (the wire seam)
+
+- `BUCKET_DESIRED_STATE` = `"desired-state"`
+- `BUCKET_DEVICE_INFO` = `"device-info"`
+- `BUCKET_DEVICE_STATE` = `"device-state"`
+- `BUCKET_DEVICE_HEARTBEAT` = `"device-heartbeat"`
+- Key builders: `desired_state_key(device_id, deployment_name)`,
+  `device_info_key(device_id)`, `device_state_key(device_id,
+  deployment_name)`, `device_heartbeat_key(device_id)`
+
+### §1.3 — Concept clusters
+
+When you squint at the inventory, the domain falls into **five
+clusters**:
+
+1. **Identity** — who is this device, who is this deployment, who
+   is the operator, what auth do they have.
+2. **Desired state** — what should be running where.
+3. **Observed state** — what is actually running where.
+4. **Setup** — bringing all this into existence on a fresh
+   cluster + fresh device.
+5. **Plumbing** — the NATS/kube/Zitadel mechanisms that make 1–4
+   work.
+
+The current code does not cleanly separate these. Examples:
+
+- `setup_score.rs` mixes **Setup** (drop binary, run systemd) with
+  **Identity** (`FleetDeviceAuth`). 1053 LOC.
+- `FleetDeviceAuth` mixes resolved-Identity (`ZitadelJwt` —
+  here's a key) with Setup-time-Identity-resolution-intent
+  (`ZitadelEnroll` — here's how to mint a key).
+- The chart-render helpers (`build_operator_deployment`, etc.) are
+  `pub` from `harmony::modules::fleet::operator::chart` so the
+  composed-install scores can pluck the secret out before helm
+  install. Plumbing leaking through Setup.
+- `harmony::modules::fleet::operator::crd::DeploymentSpec` is the
+  CRD definition AND it's the type the operator daemon imports to
+  reconcile. Cross-crate import from a runtime crate
+  (`harmony-fleet-operator`) into a framework crate (`harmony`).
+  This is the placement bug.
+
+### §1.4 — The shape problem in one diagram (text)
+
+```
+                         framework/operator workstation
+                              │
+   harmony::modules::fleet  ──┤  Scores: FleetServerScore, FleetDeviceSetupScore,
+                              │          FleetOperatorScore, ProvisionVmScore
+                              │  CRD types: Deployment, Device, DeploymentSpec, ...
+                              │  Chart rendering helpers (operator/chart.rs)
+                              │
+   harmony-reconciler-contracts ── wire types: DeviceInfo, DeploymentState,
+                              │                HeartbeatPayload, KV constants
+                              │  ▲                                              ▲
+                              │  │                                              │
+                              │  │  imports                              imports│
+                              │  │                                              │
+                       fleet/harmony-fleet-agent          fleet/harmony-fleet-operator
+                              ▲                                          ▲
+                              │                                          │
+                              │  ALSO imports                ALSO imports│
+                              │  from harmony::modules::      from harmony::modules::
+                              │  podman (PodmanV0Score)       fleet::operator::crd
+```
+
+Two problematic edges:
+
+1. `harmony-fleet-operator` imports `harmony::modules::fleet::operator::crd::Deployment`. The runtime daemon depends on the framework crate just for CRD type definitions.
+2. `harmony-fleet-agent` imports `harmony::modules::podman::{PodmanV0Score, PodmanTopology, ReconcileScore}`. The agent depends on the framework crate's *podman module* for the score it deserializes off the wire.
+
+Both edges should run *through* `harmony-reconciler-contracts`, not around it. That's the placement bug surfaced.
+
+---
+
+## §2 — Theory review
+
+### §2.1 — From the talk
+
+Pulling the load-bearing principles, ranked by relevance to this
+redesign:
+
+1. **Cardinality matters.** Types should match the cardinality of
+   the real-world concept. `&str` for "primary color" admits
+   infinite invalid inputs; `enum { Red, Yellow, Blue }` admits
+   exactly three. Friction is proportional to mismatch.
+2. **Make impossible states impossible.** Don't comment the
+   constraint, code it. Push runtime errors to the design phase.
+3. **Representations matter.** Same data, different shapes ↔
+   different operations are cheap. Roman numerals ↔ addition; Arabic
+   ↔ multiplication. "An API is a computational representation of
+   real-world concepts."
+4. **The compiler is a deterministic feedback channel.** In an era
+   when LLMs generate code at 5–10K LOC/day, the only sensor that
+   keeps up runs in milliseconds and is deterministic. Lean on it.
+5. **Strong types reduce code volume + test boilerplate + token
+   waste + review burden + CI time + production incidents** — and
+   *increase* refactoring confidence and velocity-over-time. The
+   bet is asymmetric.
+
+### §2.2 — From the references
+
+Grouping by what they imply for *this* redesign:
+
+#### Will Crichton — *Type-Driven API Design* + *Rust API Type Patterns*
+
+- **Typestate.** Encode "phase of an operation" in the type
+  parameter. A `ProgressBar<Bounded>` exposes `.with_eta()`; a
+  `ProgressBar<Unbounded>` doesn't. The contradictory call doesn't
+  compile.
+- Direct application: **`FleetDeviceAuth` mixes phases.** The
+  `ZitadelEnroll` arm is unresolved, the `ZitadelJwt` arm is
+  resolved, the `TomlShared` arm doesn't even need resolution. A
+  typestate would model these as distinct types; only one of them
+  has `agent.write_to_disk()`.
+
+#### Richard Feldman — *Making Impossible States Impossible*
+
+- Slogan-as-tool. Look at every `Option<T>` and ask *"can two of
+  these be inconsistent at once?"* If yes, that's an impossible
+  state — refactor.
+- Direct application: `FleetDeviceSetupConfig` has `auth:
+  FleetDeviceAuth` AND `agent_binary_path: PathBuf`. Today nothing
+  prevents `auth = TomlShared` (no Zitadel) with
+  `agent_binary_path` pointing at the wrong-arch binary. We could
+  encode the agent binary's target arch as a typestate parameter
+  and refuse to deploy to a device with a known-different arch
+  inventory.
+
+#### Sandy Maguire — *Protos Are Wrong*
+
+- Protocol buffers throw away information real type systems
+  preserve. Sum types, exhaustiveness, parametric polymorphism,
+  Maybe/Result — protos can't express any of them precisely. The
+  "loose contract" sells you weak invariants.
+- Direct application: `harmony-reconciler-contracts` is JSON-shaped
+  at the wire (matched on `type` tag for `ReconcileScore`).
+  We're already paying the proto-class tax: any new variant
+  requires both ends to know about it; the wire format doesn't
+  enforce a schema; old agents see new variants as parse errors.
+  This is an honest constraint — wire formats need to be permissive
+  by design — but it argues for keeping the **wire types small and
+  obviously evolvable** while letting in-memory types be
+  cardinality-matched.
+
+#### Sean Goedecke — *Invalid States*
+
+- The skeptic's case: making impossible states impossible *can be
+  over-applied*. Sometimes a `String` is the right cardinality
+  even when an enum exists, because the enum binds you to a
+  closed world.
+- Direct application: **Don't make `device_id` a closed enum.**
+  The newtype + RFC1123 validation we just added is the right
+  cardinality match: it's a string-like, but only valid strings.
+  Over-modeling would have us build `enum DeviceId {
+  Pi(PiSerial), Vm(VmName), …}` — closed world, breaks first time
+  a customer plugs in an x86 box.
+- Useful guardrail: **type-driven** ≠ **type-everything**. The
+  question to ask each time is "what's the cardinality of this
+  concept in reality" — not "can I model this".
+
+#### Martin Fowler — *Harness Engineering* (April 2026)
+
+- Computational sensors (compilers, type checkers, linters) over
+  inferential ones (tests, code review). Compiler runs on every
+  change; tests don't.
+- Direct application: prefer compiler-checked invariants over
+  doc-comment invariants. If the docs say "this Score's `auth`
+  field must be resolved at the call site of `execute()`", the
+  compiler should enforce it.
+
+### §2.3 — From harmony's own ADRs
+
+Reading the existing ADRs *as design language already in use* —
+what vocabulary should the new fleet shape stay consistent with?
+
+#### ADR-002 (hexagonal architecture)
+
+- "Domain isolated from adapters." Domain types own the
+  vocabulary; adapters (k8s client, NATS, helm) translate at the
+  edge.
+- **Implication for fleet:** the *domain* is identity + desired
+  state + observed state. The *adapters* are NATS-KV, kube-CRD,
+  helm-chart, ansible-over-SSH. The current
+  `harmony::modules::fleet` mixes both. Pulling adapters out is the
+  refactor.
+
+#### ADR-003 (infrastructure abstractions)
+
+- "Abstractions at domain level, not provider level. `DnsServer`
+  not `OPNsenseDns`."
+- **Implication for fleet:** capability traits like
+  `DeviceRegistry`, `DesiredStatePublisher`, `ObservedStateConsumer`
+  — each a standard infrastructure need that NATS-KV happens to
+  fulfill today, that another transport (gRPC streaming, MQTT,
+  Redis streams) could fulfill tomorrow.
+
+#### ADR-015 (higher-order topologies)
+
+- Higher-order topologies (`FailoverTopology<T>`,
+  `DecentralizedTopology<T>`) compose via blanket trait impls.
+  `T: PostgreSQL` ⇒ `FailoverTopology<T>: PostgreSQL`. Zero
+  boilerplate.
+- **Implication for fleet:** `FleetTopology<T>` could compose with
+  a base `K8sTopology<T>` rather than being a parallel concept.
+  "A fleet is a thing that is *both* a kube cluster *and* a
+  device registry."
+
+#### ADR-016 (Harmony Agent + Global Mesh)
+
+- Agents are processes that observe + reconcile per a desired
+  state published into a NATS mesh. Mesh is the reliable hop;
+  agents are stateless processors at the edge.
+- **Implication for fleet:** the IoT fleet is a *specialization*
+  of the agent + mesh ADR — devices are agents, the operator is
+  a coordinator. The fleet domain types should fit ADR-016's
+  vocabulary, not invent a parallel one.
+
+#### ADR-017 (NATS clusters interconnection)
+
+- Trust topology: per-cluster account isolation, gateway-mediated
+  cross-cluster traffic. Per-device permissions are a
+  specialization of per-account.
+- **Implication for fleet:** the auth callout's per-device permission
+  templates should compose with the cluster-interconnection
+  account model — currently they're treated as orthogonal, which
+  is fine until we actually cross fleets.
+
+#### ADR-018 (template hydration)
+
+- Hydrating templates at the edge of the framework, not in the
+  middle. Same pattern as our generated chart YAML: render once,
+  apply via typed code.
+- **Implication for fleet:** chart-rendering helpers
+  (`build_operator_deployment` et al.) are template-hydration
+  edges. They *should* be hidden from domain code. Today they're
+  `pub` — visible to consumers like `fleet_staging_install` who
+  reach in and grab `operator_secret(opts)`. That's adapter
+  leakage.
+
+### §2.4 — Synthesis: principles for the redesign
+
+A short list, ordered. Each line is something the new shape
+should satisfy:
+
+1. **Domain types in `harmony-reconciler-contracts` (or a sibling
+   crate)**, with no dependency on `harmony` framework types.
+2. **Resolved types only at the API surface.** Pre-resolution
+   intent is a separate type, used only by the resolver.
+3. **Capabilities as traits**, not concrete types. `DeviceRegistry`,
+   `DesiredStatePublisher`, etc. The NATS-backed impl is one of
+   several allowed.
+4. **Closed cardinality where reality is closed; open where reality
+   is open.** Goedecke's check, not Feldman's.
+5. **Higher-order topology, not parallel topology.** A fleet is a
+   `FleetTopology<T>` over a base K8s topology, not a separate
+   capability hierarchy.
+6. **Adapters hidden behind capabilities.** Helm chart rendering,
+   k8s resource apply, NATS subjects — none of these surface from
+   the fleet's public API.
+7. **No yaml in framework code paths.** Existing principle from
+   v0_1; keep.
+8. **Keep wire types minimal + permissive.** Not because they're
+   the canonical model, but because they're the
+   evolvability seam (Maguire's protos critique applies in
+   reverse — *embrace* the loose contract on the wire, *reject* it
+   in-memory).
+
+---
+
+## §3 — Design problems with the current shape
+
+Concrete issues the redesign needs to fix. Not "bugs" — *shape*
+problems. Each numbered so we can refer back when comparing
+alternatives.
+
+- **P1. `harmony/modules/fleet/` is in the wrong crate.** It pulls
+  framework dependencies (`HelmChartScore`, `K8sResourceScore`,
+  `K8sAnywhereTopology`, `harmony_secret`, etc.) and the runtime
+  daemons import *from it*. This makes the operator/agent depend
+  transitively on every harmony module — including the OPNsense
+  XML codegen, OKD bootstrap stuff, etc. Compile times suffer; the
+  release surface is wrong (you can't `cargo install
+  harmony-fleet-operator` without all of harmony).
+- **P2. `FleetDeviceAuth` mixes resolved + unresolved states.**
+  `ZitadelEnroll` is pre-resolution intent; `ZitadelJwt` is
+  post-resolution credential. A single match arm has to handle
+  both. The "render TOML for both" hack we wrote works but is a
+  symptom — the TOML for an unresolved auth should be undefined,
+  not "same as resolved".
+- **P3. `setup_score.rs` is 1053 LOC monolith.** Eight responsibilities
+  in one file: ssh-vs-local connection, ansible orchestration,
+  systemd unit text, hosts-file merging, podman package install,
+  fleet-agent user provisioning, keyfile writing, agent restart.
+  Readability is poor; testability is per-orchestration not
+  per-step.
+- **P4. CRD types live in framework crate.** `Deployment` and
+  `Device` CRDs are defined in
+  `harmony::modules::fleet::operator::crd`. The runtime operator
+  crate (`harmony-fleet-operator`) imports them from there. This
+  is the most visible symptom of P1.
+- **P5. `ReconcileScore` polymorphism is anemic.** Today there's
+  exactly one variant, `PodmanV0`. The wire format is set up for
+  evolution but no second variant exists, and the cross-crate
+  import from `harmony::modules::podman` makes adding one
+  expensive (re-export dance).
+- **P6. Adapter leakage from chart rendering.**
+  `build_operator_deployment`, `operator_secret`, `build_chart`
+  are `pub`. Consumers in `examples/` reach in to compose helm
+  releases by hand. Domain code should not see "what does the
+  operator's helm chart look like".
+- **P7. Composed scores wrap composed scores wrap composed scores.**
+  `FleetServerScore` wraps {ZitadelScore, ZitadelSetupScore,
+  NatsK8sScore, NatsAuthCalloutScore, FleetOperatorScore}. Each
+  of those does its own k8s resource apply + helm install.
+  Failure modes are deep: a problem in one score's interpret
+  surfaces wrapped through five layers of "context()". Hard to
+  debug; hard to reason about ordering.
+- **P8. Topology assumptions are everywhere.** Every `Score`
+  bound is a hand-rolled union of capability traits — `T:
+  Topology + HelmCommand + K8sclient + TlsRouter + 'static`. Add
+  a new capability and every callsite has to be updated. Higher-
+  order topology composition (ADR-015) would let us name "a
+  thing that is a fleet-capable cluster" once.
+- **P9. `Id` is overloaded.** Same type for device IDs, machine
+  user IDs, deployment IDs, topology names. Newtype-ing each
+  would catch arg-order swaps at compile time.
+- **P10. Configuration is a staircase.** Operator workstation has
+  `ZitadelClientConfig` cache file. Operator pod has env-var-from-
+  Secret. Agent has TOML on disk. Three different shapes for
+  fundamentally the same data (issuer URL, audience, key
+  material). Maguire's protos critique applies internally — we're
+  using *several* loose-contract serializations of the same
+  domain object.
+
+---
+
+## §4 — Design alternatives
+
+Five sketches. The first three are increasingly principled
+cleanups; the last two are deliberately weird, included to force
+us to recognize where the *core* of the domain actually is.
+
+For each: one paragraph of premise, the resulting top-level types,
+how it answers each of P1–P10 (✓ / ✗ / partial), and the
+honest pros + cons.
+
+### Alternative A — Move + thin façade (the conservative cleanup)
+
+**Premise:** the existing types are mostly right; the location is
+wrong and the façade leaks. Move `harmony/modules/fleet/` to
+`fleet/harmony-fleet/`. Re-export only what's intended public.
+Don't redesign types.
+
+**Top-level types:** unchanged. `FleetDeviceSetupScore`,
+`FleetServerScore`, `FleetOperatorScore`, `FleetDeviceAuth`,
+`AdminAuth`, `Deployment` CRD, `Device` CRD. Same shapes, new
+location.
+
+**P1 ✓** (location fix is the goal). **P2 ✗** (auth still mixes
+resolved/unresolved). **P3 ✗** (monolith preserved). **P4 ✓**
+(CRDs co-located with operator). **P5 ✗**. **P6 partial** (we
+can `pub(crate)` the chart helpers but the underlying coupling
+remains). **P7 ✗**. **P8 ✗**. **P9 ✗**. **P10 ✗**.
+
+**Pros:** small, safe, mechanical. Two days of work. No customer-
+visible breakage. Unblocks P4 cleanup naturally.
+
+**Cons:** doesn't actually fix the shape. We'd be back here in
+six weeks. JG's review already said this isn't enough. Not the
+right answer for v0.1 timing — *would* be the right answer if
+we'd already shipped to two customers and couldn't break their
+code.
+
+### Alternative B — Resolved-only at boundaries + capability traits (the principled cleanup)
+
+**Premise:** Crichton's typestate + ADR-003's domain capabilities
+applied to the existing shape. Split resolved vs. unresolved
+auth into separate types. Define capability traits for the
+adapters. Move into the right crate. **No wholesale rewrite.**
+
+**Top-level types:**
+
+- New crate `harmony-fleet/` (sibling to `harmony-fleet-operator`,
+  -agent, -auth). Domain types live here.
+- `FleetIdentity`, `FleetDevice`, `FleetDeployment` — domain
+  records. Plain data.
+- `DeviceCredential` — *resolved* only (a JSON keyfile + issuer
+  URL + audience). Replaces `FleetDeviceAuth::ZitadelJwt`.
+- `EnrollmentIntent` — pre-resolution. Carries `AdminAuth` and
+  what to mint. Method `resolve(&self) -> Result<DeviceCredential>`.
+- `Score`s become small + single-responsibility:
+  - `EnrollDeviceScore` — runs `EnrollmentIntent::resolve` then
+    publishes to NATS.
+  - `InstallAgentScore` — drops binary + config + systemd unit.
+    Takes a `DeviceCredential`. Doesn't know about Zitadel.
+  - `InstallOperatorScore` — helm chart + Secret. Doesn't know
+    about devices.
+  - `BringUpFleetScore` — composes the above. Single layer of
+    composition, not five.
+- Capability traits:
+  - `DeviceRegistry` — list/get/upsert/delete a `FleetDevice`.
+    Implementations: `NatsKvDeviceRegistry`,
+    (later) `RedisStreamsDeviceRegistry`.
+  - `DesiredStatePublisher`, `ObservedStateConsumer` — same
+    shape.
+  - `IdentityProvider` — mint a device credential, issue an
+    admin token. Today: Zitadel. Tomorrow: something else.
+
+**P1 ✓ P2 ✓ P3 ✓** (split into 4–5 small Scores). **P4 ✓ P5 ✓**
+(resolve in the runtime crate, contracts stay neutral).
+**P6 ✓** (chart helpers `pub(crate)`, surfaced via `IdentityProvider`
+ `DeploymentReleaseManager` traits). **P7 ✓** (one composer,
+not five). **P8 partial** (capability traits defined but bound
+unions still get long). **P9 ✓** with newtypes. **P10 partial**
+(still three on-disk shapes for credentials, but unified by
+trait).
+
+**Pros:** highest-leverage incremental redesign. Buys us most of
+the principles without rebuilding plumbing. Customer-visible
+breakage is contained to public API renames + import path
+moves — no behavior change. Three days is realistic.
+
+**Cons:** we still have a `Score`-shaped mental model where the
+*unit of execution* is "a Score". If the right primitive turns
+out to be smaller (an effect, an event, a capability call), this
+choice wastes some leverage.
+
+### Alternative C — The dataflow reframe (events in, state out)
+
+**Premise:** the fleet platform is, in essence, a **stream
+processor**. Events flow in (heartbeats, intent CR creates,
+agent reconcile reports). State materializes out (Device CRs,
+DeploymentAggregate counters, KV desired-state writes). Today
+we model it imperatively as a series of `Score`s; the dataflow
+shape is fighting that.
+
+**Top-level types:**
+
+- `FleetEvent` — sum type. `DeviceHeartbeat | DeviceFirstSeen |
+  DeploymentDesired | DeploymentObserved | DeploymentDeleted | …`
+- `FleetStateSnapshot` — what the operator currently knows. Pure
+  data, derivable.
+- `Reducer` — `(state, event) → state`. Pure function. Tests
+  trivially.
+- `Effect` — sum type of side-effects the reducer wants done:
+  `WriteKv(bucket, key, value) | UpsertCr(cr) | EmitMetric(...)`.
+  Reducer returns `(new_state, Vec<Effect>)`.
+- `EffectRunner` — adapter that performs effects. The only thing
+  that touches NATS / kube. One implementation per environment.
+- The operator pod's main loop: `for event in stream { (state,
+  effects) = reduce(state, event); runner.run_all(effects) }`.
+  ~50 lines.
+
+**P1 ✓ P2 ✓ P3 ✓ P4 ✓ P5 ✓ P6 ✓ P7 ✓ P8 ✓** (capabilities
+collapse into the `EffectRunner` trait). **P9 ✓ P10 partial**.
+
+**Pros:** dramatically simpler operator code. Reducer is pure →
+property-test-friendly. The dataflow is the platform. Aligns
+with how Kafka / Materialize / Flink-class systems are
+structured. Easy to add a new event type — the compiler shows
+you every reducer arm to update.
+
+**Cons:** large rewrite of the operator. Three days is
+unrealistic. The current `fleet_aggregator.rs` (833 LOC) already
+roughly does this but in a less disciplined shape — maybe the
+incremental version of this is "make `apply_state` a real
+reducer and split `compute_aggregate` into pure pieces". That's
+more like Alternative B with extra discipline. The full effect-
+typed version is a nice end-state but not a sprint goal.
+
+**Cite:** Materialize's dataflow paper; Kent Beck's *Augmented
+Coding* on factoring; Gergely Orosz on event-sourcing; the talk's
+"good Lego bricks" framing applies — *events* are the bricks.
+
+### Alternative D — The fleet as a **kube control plane**, period (deliberately weird)
+
+**Premise:** strip the design to one observation. **A fleet is a
+Kubernetes cluster whose Nodes happen to be devices, not
+servers.** Stop modelling Devices and Deployments separately
+from kube primitives. Use Kubernetes itself as the data model.
+The operator is one CRD reconciler. NATS is just the transport
+between the API server (in the cluster) and the device-side
+kubelet-equivalent.
+
+**Top-level types:**
+
+- `Device` is a Node CR. Already exists; we stop wrapping it.
+- `Deployment` is a `DaemonSet` (one pod per matching node) or a
+  `Deployment` (count: N targeted nodes). We stop inventing a
+  CRD; we use the standard one.
+- `DeviceInfo` is the Node's `.status` (capacity, allocatable,
+  conditions). We stop publishing parallel data; we update
+  Node status from the agent's NATS messages.
+- The agent on the device is a custom kubelet that speaks NATS to
+  the operator instead of HTTPS to the API server.
+- The auth callout still exists; it gates NATS access.
+- No `harmony-fleet-operator`-specific CRDs. No `Deployment` /
+  `Device` CRs of our own.
+
+**P1 ✓ P2 ✓ P3 ✓ P4 N/A** (no CRDs of our own to misplace).
+**P5 ✓ P6 ✓ P7 ✓ P8 ✓ P9 ✓ P10 ✓**.
+
+**Pros:** the simplest *conceptual* answer. We stop fighting kube
+ inventing parallel concepts. Customers already understand
+DaemonSets, Node selectors, and `kubectl get nodes`. The agent
+becomes a known kind of thing (a kubelet variant) with shoulders
+to stand on (k3s-iot, kine, virtual-kubelet projects already
+prove this works).
+
+**Cons:** *a lot* of plumbing changes. Devices need to register
+as Nodes (which means either a real kubelet on each Pi, or a
+virtual-kubelet façade). The agent's reconcile loop becomes
+"watch a CR via NATS, render manifests, run pods" — bigger than
+"watch a KV value, run podman". JetStream KV becomes redundant
+with the kube API server. **Probably the right end-state for
+v2.0, wrong for v0.1.** Worth noting, though, because comparing
+A/B/C to D pulls out which of our current invented concepts are
+load-bearing (very few — DeviceInfo is mostly just Node.status;
+DeploymentAggregate is mostly just kube's
+.status.observedGeneration / .status.conditions stuff).
+
+**Cite:** virtual-kubelet, k3s-iot, KubeEdge, OpenYurt. They've
+walked this path; the lessons are public.
+
+### Alternative E — Algebra of fleets (deliberately weird, mathematical)
+
+**Premise:** model the platform as a small algebra. A fleet is a
+**set of devices** + an **assignment function** (selector → set
+of deployments). Operations on fleets are set-theoretic +
+function composition. Treat the API as a query language over
+this algebra.
+
+**Top-level types:**
+
+- `Fleet` ::= `Set<Device>`. With operations: union, intersection,
+  filter-by-selector, partition.
+- `Selector` ::= a pure predicate `Device → bool`. Built from
+  primitives `label("k") = "v"`, `arch = aarch64`, …, combined
+  with `&`, `|`, `!`.
+- `Assignment` ::= `Selector → Set<Deployment>`. Pure function.
+- `World` ::= `(Fleet, Assignment)`. Pure data. The operator's job
+  is to make reality match the World.
+- `Diff(World, Reality) → Vec<Action>`. Pure function. Closed
+  form — given the algebra, you can prove what actions are
+  *necessary* and *sufficient*.
+
+**P1–P10 ✓** (in principle). **Code volume probably 30% of
+current.**
+
+**Pros:** clarity. Properties become provable: "no device gets
+an unassigned deployment", "removing a label removes the
+assignment", "two operators can edit independently and the merge
+is well-defined" (because functions compose). The "make
+impossible states impossible" principle, applied to the *fleet
+shape itself*, not to individual types.
+
+**Cons:** **almost certainly an over-fit.** The real platform has
+dirty edges (devices that fail, network partitions, half-applied
+state) that don't sit naturally in a pure algebra. Most teams
+that go down this road end up bolting "real-world" escape hatches
+back on, ending up with the original design plus extra category
+theory. **Useful as a north star** for the cardinality choices,
+**not as the platform's actual shape.**
+
+**Cite:** Hillel Wayne *Using Formal Methods at Work*; Conal
+Elliott on functional reactive programming; the classic "set
+theory for systems people" talks.
+
+### Comparison matrix
+
+| | A. Move | B. Capabilities | C. Dataflow | D. Kube-native | E. Algebra |
+|---|---|---|---|---|---|
+| Fixes P1 (location) | ✓ | ✓ | ✓ | ✓ | ✓ |
+| Fixes P2 (auth states) | ✗ | ✓ | ✓ | ✓ | ✓ |
+| Fixes P3 (monolith) | ✗ | ✓ | ✓ | ✓ | ✓ |
+| Fixes P4 (CRD placement) | ✓ | ✓ | ✓ | N/A | N/A |
+| Fixes P5 (anemic enum) | ✗ | ✓ | ✓ | N/A | partial |
+| Fixes P6 (adapter leak) | partial | ✓ | ✓ | ✓ | ✓ |
+| Fixes P7 (deep wrap) | ✗ | ✓ | ✓ | ✓ | ✓ |
+| Fixes P8 (trait union) | ✗ | partial | ✓ | ✓ | ✓ |
+| Fixes P9 (Id overload) | ✗ | ✓ | ✓ | ✓ | ✓ |
+| Fixes P10 (config staircase) | ✗ | partial | partial | ✓ | partial |
+| Fits 3-day window | ✓ | ✓ (tight) | ✗ | ✗ | ✗ |
+| Customer-visible breakage | low | medium | medium | very high | high |
+| Risk to demo schedule | very low | low | medium | very high | high |
+| Long-term ceiling | low | high | high | very high | very high |
+
+---
+
+## §5 — Recommendation (preliminary)
+
+Read the matrix as: **B is the right answer for now**, with
+**explicit awareness of D as the v2.0 destination**.
+
+- A is too little. We'd be back here.
+- C and E are right in shape but wrong in timing — we don't have a
+  week to rebuild the operator's reconcile loop, and the platform
+  isn't in production yet, so there's no urgent "we have to
+  refactor anyway" pressure.
+- D is conceptually the cleanest, but a v0.1 production push
+  is the wrong moment to start running custom kubelets.
+- B captures most of the leverage of C/D within the 3-day window,
+  with a clean migration path to either of them later (the
+  capability traits are the seam — swap the implementation, not the
+  callers).
+
+**One concrete shape** to pursue under Alternative B (worth
+sketching as the strawman ADR):
+
+- New crate `harmony-fleet/` (the domain crate). Depends on
+  `harmony-reconciler-contracts` only.
+  - Domain records: `FleetDevice`, `FleetDeployment`, `FleetState`.
+  - Capability traits: `DeviceRegistry`, `DesiredStatePublisher`,
+    `ObservedStateConsumer`, `IdentityProvider`,
+    `AgentLifecycle`.
+- `harmony-fleet-adapters-nats/` — `NatsDeviceRegistry`,
+  `NatsDesiredStatePublisher`, etc. NATS-specific.
+- `harmony-fleet-adapters-zitadel/` — `ZitadelIdentityProvider`.
+- `harmony-fleet-adapters-kube/` — `KubeFleetReflector` (writes
+  `Device` and `Deployment` CRs as a *reflection* of the domain
+  state, not as the source of truth).
+- `harmony-fleet-operator/` — daemon. Wires adapters together.
+- `harmony-fleet-agent/` — daemon. Wires adapters together.
+- `harmony-fleet-cli/` — tomorrow's `harmony-fleet` plugin.
+- `harmony/modules/fleet/` is **deleted**. The framework `harmony`
+  crate gets a thin `harmony::modules::fleet` *re-export only*
+  module that points at `harmony-fleet`. After v0.2 is shipped,
+  the re-export module goes away too.
+
+CRDs (`Deployment`, `Device`) move to
+`harmony-fleet-adapters-kube/` because they're a kube-specific
+projection of the domain, not the domain itself. The agent
+imports `harmony-fleet`'s domain types, not the CRDs.
+
+The setup-side scores stay in `harmony` (because they need the
+framework's `HelmCommand`, `K8sclient`, etc.) but they consume
+`harmony-fleet`'s domain types. The fleet's *domain* doesn't
+depend on the framework; the framework's *deploy procedures*
+depend on the fleet's domain. Direction of dependency is the
+inverse of today.
+
+## §6 — Open questions before we lock this
+
+These are real questions; pulling them out so JG's review has
+something concrete to react to:
+
+- **Q1.** Is `IdentityProvider` the right capability name, or is
+  it more honest to name it after what we actually need
+  (`DeviceCredentialMinter`, `OperatorTokenProvider`)? The talk
+  argues against generic names — if reality has two distinct
+  concerns, two traits.
+- **Q2.** Should `Device` CRD live in adapters-kube, or should it
+  not exist at all (replaced by reading kube-API node info, per
+  alternative D)? The middle ground (own CRD that mirrors kube
+  Node) is what we have today, and it's the worst of both.
+- **Q3.** The agent's wire-format for `ReconcileScore` —
+  externally tagged enum, today only `PodmanV0`. Move it to
+  `harmony-reconciler-contracts` (canonical wire seam) and let
+  *both* the agent and the operator import only that crate. This
+  removes the `harmony::modules::podman` cross-crate dependency.
+  Worth doing in any of A/B/C.
+- **Q4.** Does the v0.1 prod push wait for this redesign, or does
+  it ship on the current shape with the redesign happening in
+  v0.2? Tradeoff: shipping now means committing to *some* public
+  API; shipping after means slipping the customer date.
+  Recommendation: **ship the redesign first, slip 3 days**, on
+  the grounds that public API churn after a customer is on it
+  costs more than a 3-day delay before they're on it.
+- **Q5.** Where do the *runtime tools* (the `harmony-fleet` CLI
+  plugin, future frontend) sit in the dependency graph? If they
+  depend on `harmony-fleet`'s domain crate only, we can build
+  them without pulling in helm / kube / ansible at compile time.
+  This is what we want for the device-side enrollment binary too
+  (already feature-gated; the redesign should make the gate
+  unnecessary).
+
+---
+
+## §7 — Next steps
+
+1. Sit with this document. Walk away from it for an hour.
+2. Round-table on §3 — do P1–P10 capture *the* problems, or are
+   we missing one?
+3. Round-table on §4 — does the comparison matrix feel honest,
+   or is it tilted?
+4. Pick one alternative as the working hypothesis.
+5. Spike: take one slice through the chosen alternative
+   (suggested: `EnrollmentIntent::resolve` + `DeviceCredential` +
+   the `IdentityProvider` trait — the smallest end-to-end shape
+   that touches every layer). Commit it on a branch. Eyeball:
+   does the resulting code feel better?
+6. Either: commit to the alternative as ADR-023, or back out
+   and try another.
+
+This document gets updated as we go. It is NOT meant to be
+locked at first draft.
--- a/docs/adr/drafts/topology-proliferation.md
+++ b/docs/adr/drafts/topology-proliferation.md
@@ -0,0 +1,83 @@
+# Working draft: Topology proliferation
+
+**Not an ADR yet.** A concern noted in many places that doesn't
+yet have a clean answer. Parked here so it doesn't get lost.
+When the answer is clear, promote to a numbered ADR under
+`docs/adr/`.
+
+## The concern
+
+Harmony has accumulated topology structs without a clear rule
+for when a *new topology* is the right answer versus when a
+*capability trait on an existing topology* is the right answer:
+
+- `K8sAnywhereTopology` — dynamic discovery, the "any place
+  that runs k8s" default.
+- `HAClusterTopology` — bare-metal HA with redundant
+  firewalls/switches.
+- `K8sBareTopology` — minimal topology for ad-hoc Score
+  execution (introduced in this branch).
+- `FailoverTopology<T>` — higher-order, primary/replica.
+- `DecentralizedTopology<T>` — higher-order, multi-site.
+- `FirewallPairTopology`, `SwitchTopology`, …
+
+There is no written framework rule for which path to take. The
+result is a slow drift toward "every new deploy shape becomes a
+new topology struct," which is the opposite of ADR-015's
+zero-cost higher-order composition direction.
+
+## Where the concern is already noted
+
+- `ROADMAP/12-code-review-april-2026.md` task 12.6 — "topology
+  proliferation."
+- `ROADMAP/12-code-review-april-2026.md` task 12.1 — phased
+  topology (`LinuxHostTopology` → `KvmHostTopology` after
+  package install). Different angle on the same shape problem:
+  how does a topology *gain* capabilities at runtime?
+- ADR-015 ("Higher-order topologies via blanket trait impls")
+  argues for capability composition, but doesn't lock the rule
+  "prefer capabilities over new topology structs."
+- ADR-023 §6 references topology as compile-time-static —
+  closing the door on `Box<dyn Topology>` plugins but leaving
+  open which axis of variability is a new type and which is a
+  capability impl.
+
+## Open question
+
+Is the right rule:
+
+- (a) **Capability-first**: a new deploy shape adds a capability
+  trait to an existing topology when possible; new topology
+  structs are reserved for genuinely new physical-shape
+  categories (single-host vs. HA vs. cloud). `K8sBareTopology`
+  arguably should have been a constructor on
+  `K8sAnywhereTopology`.
+- (b) **Topology-first**: every coherent deploy environment is
+  its own topology. `K8sBareTopology` is correct; the concern
+  is just naming and documentation.
+- (c) **A test-driven middle ground**: capability sets are
+  primary, topologies are bundles of capability-set + physical
+  context. Re-frames the existing types but doesn't break
+  anything.
+
+The fleet platform work (ADR-024 draft, capability decomposition)
+points toward (a) or (c). The framework as it stands runs on
+(b)-by-default.
+
+## What's needed before this can be an ADR
+
+- A worked example of converting one of the existing topology
+  structs to its capability-first equivalent (probably
+  `K8sBareTopology` → constructor on `K8sAnywhereTopology` with
+  a feature-flag capability set).
+- A look at whether ADR-024's capability decomposition for the
+  fleet platform suggests a generalization of the same
+  decomposition for the framework's topology layer.
+- A look at how phased topology (task 12.1) fits — phase
+  transition is conceptually a *gain of capabilities*, which is
+  hard to express in (b) but natural in (a)/(c).
+
+## Owner
+
+Unassigned. Next time someone touches topology code and feels the
+itch, this draft is the place to add their notes.
--- a/docs/guides/fleet-manual-token-mint.md
+++ b/docs/guides/fleet-manual-token-mint.md
@@ -0,0 +1,189 @@
+# Manual Zitadel token mint + NATS write
+
+Operator-side recipe for talking to a callout-protected NATS by
+hand: sign a JWT-bearer assertion with a Zitadel machine user's
+private key, exchange it for an access token, drive `nats` CLI
+commands with the token. Useful for debugging the auth chain,
+poking the desired-state KV without the operator running, and
+validating that a deployed callout is actually accepting what
+you think it should.
+
+Read [fleet-zitadel-faq.md](./fleet-zitadel-faq.md) first for the
+underlying mechanism (RFC 7523 JWT-bearer flow, why we sign
+locally, what each claim means).
+
+## Inputs you need
+
+Five strings:
+
+| Input | Where to find it |
+| --- | --- |
+| `OIDC_ISSUER_URL` (the Zitadel base URL) | callout Deployment env: `kubectl exec -n fleet-system deploy/fleet-callout -- printenv OIDC_ISSUER_URL` |
+| `project_id` (becomes the access token's `aud`) | callout Deployment env: `OIDC_AUDIENCE` |
+| Machine user's `userId` | the JSON keyfile's `userId` field |
+| Machine user's `keyId` | the JSON keyfile's `keyId` field |
+| Private RSA key (PEM) | the JSON keyfile's `key` field |
+
+Get the `fleet-ops` (admin role) JSON keyfile from the cache:
+
+```bash
+jq -r '.machine_keys["fleet-ops"]' \
+  ~/.local/share/harmony/zitadel/client-config.json \
+  > /tmp/fleet-ops.json
+
+jq -r '.userId' /tmp/fleet-ops.json    # → user_id
+jq -r '.keyId'  /tmp/fleet-ops.json    # → key_id
+jq -r '.key'    /tmp/fleet-ops.json    > /tmp/fleet-ops.pem
+```
+
+The cache may drift from the deployed Zitadel state if Zitadel has
+been re-seeded; **always pull `OIDC_AUDIENCE` from the running
+callout**, not from the cache. The cache fix landed in commit
+`f4d6fb94` but older entries can still trip you up.
+
+## Mint script (PyJWT)
+
+```python
+# pip install PyJWT requests   ← MUST be PyJWT, not the `jwt` package.
+# The two share `import jwt`; `jwt` (the package) refuses raw PEM
+# strings and demands an AbstractJWKBase wrapper. PyJWT takes PEM
+# directly. If you ever see `TypeError: key must be an instance of
+# a class implements jwt.AbstractJWKBase`, you have the wrong one.
+
+import jwt, time, requests
+
+# These come from the running callout + Zitadel. Don't reuse stale
+# values from a checked-in note; verify against the live cluster.
+OIDC_ISSUER_URL = "http://sso.fleet.local:8080"
+PROJECT_ID      = "371158654839160853"   # = OIDC_AUDIENCE on callout
+USER_ID         = "..."                  # from machine keyfile
+KEY_ID          = "..."                  # from machine keyfile
+
+key = open("/tmp/fleet-ops.pem").read()
+now = int(time.time())
+
+assertion = jwt.encode(
+    {
+        "iss": USER_ID,
+        "sub": USER_ID,
+        "aud": OIDC_ISSUER_URL,   # for Zitadel itself, NOT the project_id
+        "exp": now + 60,          # Zitadel rejects exp - iat > 60s
+        "iat": now,
+    },
+    key,
+    algorithm="RS256",
+    headers={"kid": KEY_ID},      # PyJWT spelling — `headers=`, not `optional_headers=`
+)
+
+r = requests.post(
+    f"{OIDC_ISSUER_URL}/oauth/v2/token",
+    data={
+        "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
+        "assertion":  assertion,
+        # Three scopes:
+        #   openid                                     — base OIDC
+        #   urn:zitadel:iam:org:projects:roles         — PLURAL.
+        #     Without this, Zitadel omits the role claim and the
+        #     callout rejects with "no authorized role in token".
+        #   urn:zitadel:iam:org:project:id:<id>:aud    — singular.
+        #     Tells Zitadel to put <id> into the access token's
+        #     `aud` claim, which the callout's audience check
+        #     compares against OIDC_AUDIENCE.
+        "scope": (
+            "openid "
+            "urn:zitadel:iam:org:projects:roles "
+            f"urn:zitadel:iam:org:project:id:{PROJECT_ID}:aud"
+        ),
+    },
+)
+r.raise_for_status()
+token = r.json()["access_token"]
+
+# Sanity check — decode without verifying signature so you can see
+# what Zitadel actually emitted. If anything below is wrong, the
+# callout will reject your token.
+print(jwt.decode(token, options={"verify_signature": False}))
+print(token)
+```
+
+Expected decoded claims (the parts the callout will check):
+
+| Claim | What it should be | Why |
+| --- | --- | --- |
+| `iss` | `OIDC_ISSUER_URL` (byte-equal) | Callout: `validation.set_issuer(&[&self.issuer_url])` |
+| `aud` | `["<PROJECT_ID>"]` | Callout: `validation.set_audience(&[&self.audience])`; the array form is Zitadel's default |
+| `exp` | ~now + 12h | Zitadel default access token TTL |
+| `client_id` | the machine user's username (`fleet-ops`, `device-vm-device-00`, …) | Callout uses this as `device_id_claim` (with optional `DEVICE_ID_PREFIX_STRIP` applied) |
+| `urn:zitadel:iam:org:project:<PROJECT_ID>:roles` | object with role names as keys (e.g. `{"fleet-admin": {"<orgId>": "<orgName>"}}`) | Callout uses this as `roles_claim` and admits the role if `fleet-admin` or `device` is present |
+
+If any of these is wrong, fix the script before bothering with NATS.
+
+## Drive NATS with the token
+
+`nats --token=<bearer>` puts the value into the CONNECT frame's
+`auth_token`, which is what the callout expects.
+
+```bash
+NATS_SERVER=192.168.122.1:30422       # libvirt host's port mapping
+TOKEN=$(python3 mint.py | tail -1)    # last line is the raw token
+
+# Read everything (admin role allows >):
+nats --server "$NATS_SERVER" --token "$TOKEN" kv ls device-info
+nats --server "$NATS_SERVER" --token "$TOKEN" kv get device-info info.vm-device-00
+
+# Write a desired state — agent's KV watcher fires within 1s,
+# reconciler creates the podman container.
+nats --server "$NATS_SERVER" --token "$TOKEN" \
+  kv put desired-state vm-device-00.hello-web '{
+    "name": "hello-web",
+    "type": "PodmanV0",
+    "data": {
+      "services": [{
+        "name":  "testnginx",
+        "image": "docker.io/nginx:latest",
+        "ports": ["8080:80"]
+      }]
+    }
+  }'
+```
+
+The exact JSON shape comes from
+`harmony-reconciler-contracts/src/fleet.rs` — read that crate when
+in doubt about field names, NOT this doc; this doc is a worked
+example and may drift.
+
+## Common failures and what they mean
+
+| Symptom | Likely cause |
+| --- | --- |
+| `TypeError: key must be an instance of … AbstractJWKBase` | Wrong PyPI package. `pip uninstall jwt && pip install PyJWT`. |
+| HTTP 400 from `/oauth/v2/token`: `"invalid_grant_type"` | Forgot the percent-encoded form encoding, OR `grant_type` value mistyped. The full URN is `urn:ietf:params:oauth:grant-type:jwt-bearer`. |
+| HTTP 400: `"jwt: token is expired"` | Your assertion's `exp` is in the past. Wall-clock skew between your laptop and the cluster — sync NTP. |
+| Token mints but no `urn:zitadel:…:roles` claim | Missing the **plural** `urn:zitadel:iam:org:projects:roles` in scope. |
+| Token mints but `aud` is the issuer URL instead of the project id | Forgot the `urn:zitadel:iam:org:project:id:<id>:aud` scope. |
+| NATS CLI: `nats: Authorization Violation` | Token is good but callout rejected it — check `kubectl logs -n fleet-system -l app=fleet-callout` for the actual reason. The most common ones are "InvalidAudience" (your `aud` ≠ deployed `OIDC_AUDIENCE`) and "no authorized role in token". |
+| Callout log: `JWT validation failed: InvalidIssuer` | Trailing slash drift. `OIDC_ISSUER_URL=http://sso.fleet.local:8080/` ≠ `http://sso.fleet.local:8080`. Match exactly. |
+
+When the callout rejects, **its log is the source of truth**, not
+your decoded claims. The validation error includes which check
+failed; work backwards from there.
+
+## Rotating the deployed `OIDC_AUDIENCE`
+
+If Zitadel was re-seeded and `OIDC_AUDIENCE` on the callout now
+points at a non-existent project:
+
+```bash
+# 1. Confirm the live project id
+oc -n zitadel exec -ti deploy/zitadel -- /bin/sh -c \
+  'curl -s -H "Authorization: Bearer $PAT" \
+        $ZITADEL_URL/management/v1/projects/_search \
+   | jq ".result[] | select(.name == \"fleet\") | .id"'
+
+# 2. Re-run the bring-up — the live-query fix in f4d6fb94 will
+#    refresh OIDC_AUDIENCE on the next NatsAuthCalloutScore apply.
+```
+
+The shape of `mint.py` doesn't change between regular operation
+and post-recovery — you just plug in fresh values for
+`OIDC_AUDIENCE` and `PROJECT_ID`.
--- a/docs/guides/fleet-staging-install.md
+++ b/docs/guides/fleet-staging-install.md
@@ -0,0 +1,460 @@
+# Fleet staging install on OKD
+
+End-to-end runbook for deploying the fleet stack (Zitadel + NATS +
+auth callout + operator) on an OKD cluster, with a remote agent
+connecting through the public WSS endpoint. Targets the staging
+shape — single-instance NATS, public Zitadel + NATS WS Routes with
+edge-TLS via cert-manager, env-only Secret config (no volume
+mounts) so default `restricted-v2` SCC is enough.
+
+Time budget: ~30 min on a warm cluster, ~60 min cold.
+
+## 0. Prereqs
+
+- `oc` CLI logged in with cluster-admin (or at least
+  cluster-scoped privileges on the namespaces below — namespace
+  create, CRD apply, ClusterRole create).
+- `podman` on your laptop, authenticated to the destination registry
+  (default `hub.nationtech.io/harmony` — `podman login` if needed).
+- `helm` on PATH (used by Harmony's helm chart Scores).
+- The staging cluster has:
+  - cert-manager installed and a `ClusterIssuer` ready for the
+    cluster's base domain (default name: `letsencrypt-prod` —
+    override with `--cluster-issuer` if yours differs).
+  - CNPG (cloudnative-pg) operator installed (Zitadel relies on it
+    for its Postgres cluster).
+  - DNS: the chosen `--base-domain` resolves to the OKD ingress
+    router. For `cb1.nationtech.io`, that means `*.cb1.nationtech.io`
+    or at least `sso-staging.cb1.nationtech.io` and
+    `nats-fleet-staging.cb1.nationtech.io` must point at the OKD
+    router VIP. If you're using the cluster's apps domain
+    (`apps.cb1.nationtech.io`), set `--base-domain` accordingly.
+- Access to write a `[credentials]` TOML on whichever machine will
+  run the agent (your laptop is fine for the demo).
+
+## 1. Build and push images
+
+The staging install pulls operator + auth-callout images from your
+registry. The helper script builds both, tags them, and pushes:
+
+```bash
+cd /path/to/harmony
+./fleet/scripts/build_and_push_images.sh
+```
+
+Defaults: `REGISTRY=hub.nationtech.io/harmony`, `IMAGE_TAG=dev`,
+`PUSH=1`. Override with environment variables. Skip the push (e.g.
+to inspect the images locally first) with `PUSH=0`.
+
+Output ends with the exact `--operator-image` / `--callout-image`
+flags to paste into step 4.
+
+**Verify:**
+
+```bash
+podman images | grep harmony   # both refs present locally
+podman pull hub.nationtech.io/harmony/harmony-fleet-operator:dev   # registry confirmed
+```
+
+## 2. Create namespaces
+
+```bash
+oc new-project zitadel-staging
+oc new-project fleet-staging
+```
+
+If `hub.nationtech.io` requires authentication, add the imagePullSecret
+to both namespaces (each pod that pulls from the registry needs it):
+
+```bash
+# adjust to whatever you have for hub.nationtech.io
+oc -n fleet-staging   secrets link default <hub-pull-secret> --for=pull
+oc -n zitadel-staging secrets link default <hub-pull-secret> --for=pull
+```
+
+(For Zitadel + Postgres the chart pulls from public registries, so
+the secret is only strictly required in `fleet-staging` for the
+operator + callout images. Linking both is safest.)
+
+## 3. Set KUBECONFIG and verify cluster context
+
+```bash
+export KUBECONFIG=$ADMIN_KUBECONFIG
+oc whoami
+oc config current-context
+oc get clusterversion        # confirm OKD reachable + healthy
+```
+
+The install runs with this `KUBECONFIG`. **Double-check** before
+running step 4 — Harmony's `K8sAnywhereTopology::from_env()` honors
+this and there's no second confirmation prompt.
+
+## 4. Run `fleet_staging_install`
+
+```bash
+cargo run --release -p example_fleet_staging_install -- \
+  --base-domain cb1.nationtech.io \
+  --operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev \
+  --callout-image  hub.nationtech.io/harmony/harmony-nats-callout:dev
+```
+
+Optional flags (defaults shown):
+
+```
+  --cluster-issuer    letsencrypt-prod
+  --fleet-namespace   fleet-staging
+  --zitadel-namespace zitadel-staging
+  --nats-account      FLEET
+  --zitadel-version   v4.12.1
+  --project-name      fleet
+  --admin-role        fleet-admin
+  --device-role       device
+  --operator-username fleet-operator
+  --admin-username    fleet-ops
+```
+
+Step-by-step the binary does:
+
+1. **Zitadel helm install** — Postgres (CNPG) + Zitadel chart into
+   `--zitadel-namespace`. Edge-TLS Route at `sso-staging.<base>`
+   with cert-manager-driven certificate.
+2. **Zitadel setup** — project, two roles (`fleet-admin`, `device`),
+   API app `nats`, and two machine users (`fleet-ops` for manual
+   admin work, `fleet-operator` for the operator pod). Both get
+   JSON keys cached at `~/.local/share/harmony/zitadel/client-config.json`.
+3. **NATS install** — single-instance JetStream, `auth_callout`
+   block referencing the issuer NKey pubkey, WebSocket listener on
+   8080. Edge-TLS Route at `nats-fleet-staging.<base>`.
+4. **Auth callout deployment** — env-only Secret config (no mounts),
+   wired to the same issuer + Zitadel project audience.
+5. **Operator deployment** — single Secret holding the credentials
+   TOML (with the operator's JSON keyfile inlined). One env var,
+   `FLEET_OPERATOR_CREDENTIALS_TOML`, no volumes.
+
+The binary prints the URLs + project_id at the end. Save that block
+— you'll need the project_id for the agent config.
+
+**Expected output tail:**
+
+```
+=== fleet-staging install complete ===
+Zitadel:           https://sso-staging.cb1.nationtech.io/
+NATS WS public:    wss://nats-fleet-staging.cb1.nationtech.io/
+NATS in-cluster:   nats://fleet-nats.fleet-staging.svc.cluster.local:4222
+Operator:          oc -n fleet-staging get deploy/harmony-fleet-operator
+Auth callout:      oc -n fleet-staging get deploy/fleet-callout
+Project id:        371xxxxxxxxxxxxxxx
+Admin user:        fleet-ops (machine key in ~/.local/share/harmony/zitadel/client-config.json)
+Operator user:     fleet-operator (machine key embedded in operator's Secret)
+```
+
+## 5. Verify each layer
+
+### 5.1 Zitadel reachable, certificate provisioned
+
+```bash
+# pod up
+oc -n zitadel-staging get pods
+# expect: zitadel-* Running, zitadel-pg-1/2 Running
+
+# Route + certificate (cert-manager creates the secret)
+oc -n zitadel-staging get route
+oc -n zitadel-staging get certificate
+
+# OIDC discovery from the public URL
+curl -s https://sso-staging.cb1.nationtech.io/.well-known/openid-configuration | jq .issuer
+# expect: "https://sso-staging.cb1.nationtech.io"
+```
+
+If `curl` fails with TLS errors, the cert-manager certificate isn't
+ready yet. Watch its status:
+
+```bash
+oc -n zitadel-staging describe certificate
+oc -n cert-manager logs deploy/cert-manager --tail=50
+```
+
+A `Ready` condition `True` + `secretName: zitadel-tls` populated
+means the Route can serve HTTPS.
+
+### 5.2 NATS pod up, callout connected
+
+```bash
+oc -n fleet-staging get pods
+# expect:
+#   fleet-nats-0          2/2 Running   (NATS + reloader sidecar)
+#   fleet-callout-...     1/1 Running
+
+oc -n fleet-staging logs deploy/fleet-callout --tail=30 | grep -E "starting|JWKS|listening"
+# expect:
+#   starting harmony NATS auth callout
+#   JWKS refreshed count=2
+#   auth callout service listening subject="$SYS.REQ.USER.AUTH"
+```
+
+If the callout pod CrashLoopBackOff:
+
+```bash
+oc -n fleet-staging logs deploy/fleet-callout --previous --tail=30
+```
+
+Most common: OIDC issuer URL mismatch. The callout's
+`OIDC_ISSUER_URL` env must byte-equal what Zitadel emits as `iss` in
+its discovery doc. Check both:
+
+```bash
+oc -n fleet-staging exec deploy/fleet-callout -- printenv OIDC_ISSUER_URL
+# vs.
+curl -s https://sso-staging.cb1.nationtech.io/.well-known/openid-configuration | jq .issuer
+```
+
+### 5.3 Operator authenticated and running
+
+```bash
+oc -n fleet-staging get pods -l app.kubernetes.io/name=harmony-fleet-operator
+oc -n fleet-staging logs deploy/harmony-fleet-operator --tail=30
+```
+
+Look for, in order:
+
+```
+minted fresh Zitadel access token audience=<project_id>
+connected successfully server=4222
+NATS connected
+KV bucket ready bucket=desired-state
+starting Deployment controller
+device-reconciler: watching device-info KV
+aggregator: startup complete
+```
+
+If you see `Permissions Violation` errors, the callout's
+`OIDC_AUDIENCE` (project_id at deploy time) doesn't match the
+project_id in Zitadel today. Re-run step 4 — the live-query fix
+in the Zitadel setup will refresh.
+
+### 5.4 NATS WSS reachable from outside the cluster
+
+```bash
+curl -sSI https://nats-fleet-staging.cb1.nationtech.io/ | head -5
+```
+
+Expect a 4xx (NATS doesn't speak HTTP, but the TLS handshake should
+succeed and you'll get back a WebSocket-upgrade-related response).
+A connection refused or TLS handshake error means the Route or
+cert-manager is unhappy.
+
+### 5.5 CRDs registered
+
+```bash
+oc get crd | grep fleet.nationtech.io
+# expect:
+#   deployments.fleet.nationtech.io
+#   devices.fleet.nationtech.io
+```
+
+## 6. Connect a remote agent
+
+The fleet agent runs on the device (laptop, Pi, anywhere with
+outbound HTTPS). It needs:
+
+- Its own Zitadel machine user with the `device` role grant.
+- The JSON keyfile from that user.
+- A `[credentials]` TOML pointing at the public Zitadel + the WSS
+  NATS URL.
+
+### 6.1 Mint a per-device machine user
+
+Use `oc port-forward` or a helper to call Zitadel's API. Easier
+path: drop a quick Score that adds one machine user. For tonight,
+do it from the Zitadel UI:
+
+1. Browse to `https://sso-staging.cb1.nationtech.io/ui/console/`,
+   log in as the human admin (password from Zitadel ConfigMap on
+   first install — see `docs/guides/fleet-zitadel-faq.md`).
+2. Pick the `Default` org → `fleet` project → Roles → confirm
+   `device` exists.
+3. Org → Users → Service Users → New: name `device-laptop-01`,
+   userName `device-laptop-01`. Save.
+4. The user's "Personal Information" tab → Authorizations or
+   "Authorization" → "+New" — grant the `fleet` project's `device`
+   role to this user.
+5. The user's "Keys" tab → "+New", type `JSON`, expiration future
+   date. **Download the keyfile JSON** — Zitadel only shows the
+   private half once. Save as `~/.local/share/harmony/fleet/agents/device-laptop-01.json`.
+
+### 6.2 Build the agent locally
+
+```bash
+cargo build --release -p harmony-fleet-agent
+ls -la target/release/harmony-fleet-agent
+```
+
+### 6.3 Render the agent's config TOML
+
+```bash
+PROJECT_ID=$(oc -n fleet-staging exec deploy/fleet-callout -- printenv OIDC_AUDIENCE)
+cat > /tmp/fleet-agent-config.toml <<EOF
+[agent]
+device_id = "device-laptop-01"
+
+[nats]
+urls = ["wss://nats-fleet-staging.cb1.nationtech.io"]
+
+[credentials]
+type = "zitadel-jwt"
+key_path = "/etc/fleet-agent/zitadel-key.json"
+oidc_issuer_url = "https://sso-staging.cb1.nationtech.io"
+audience = "$PROJECT_ID"
+
+[labels]
+env = "staging"
+location = "laptop"
+arch = "$(uname -m)"
+EOF
+```
+
+The agent's username convention is `device-<device_id>`, matching
+the callout's `DEVICE_ID_PREFIX_STRIP=device-`. The Zitadel machine
+user must literally be `device-laptop-01` for the JWT-bearer flow
+to extract the right device id.
+
+### 6.4 Run the agent
+
+```bash
+sudo mkdir -p /etc/fleet-agent
+sudo cp ~/.local/share/harmony/fleet/agents/device-laptop-01.json \
+       /etc/fleet-agent/zitadel-key.json
+sudo chown $(id -u):$(id -g) /etc/fleet-agent/zitadel-key.json
+sudo chmod 0400 /etc/fleet-agent/zitadel-key.json
+
+FLEET_AGENT_CONFIG=/tmp/fleet-agent-config.toml \
+  RUST_LOG=info \
+  ./target/release/harmony-fleet-agent
+```
+
+Watch the log:
+
+```
+fleet-agent-v0 starting device_id=device-laptop-01
+podman socket ready
+inventory loaded hostname=...
+connecting to NATS ["wss://nats-fleet-staging.cb1.nationtech.io"]
+minted fresh Zitadel access token audience=<project_id>
+connected successfully server=...
+NATS connected
+fleet publisher ready
+watching KV keys filter=device-laptop-01.>
+```
+
+If you hit `Permissions Violation` errors after `connected`:
+- check `oc -n fleet-staging logs deploy/fleet-callout --tail=20` —
+  it'll show why the JWT was rejected (audience, role claim,
+  device_id format).
+
+### 6.5 Verify the operator created a Device CR
+
+```bash
+oc get devices
+# expect:
+#   NAME                AGE
+#   device-laptop-01    Xs
+oc describe device device-laptop-01
+# labels block reflects what the agent sent in [labels]
+```
+
+## 7. Drive a deployment end to end
+
+```bash
+cat > /tmp/hello-web.yaml <<'EOF'
+apiVersion: fleet.nationtech.io/v1alpha1
+kind: Deployment
+metadata:
+  name: hello-web
+spec:
+  score:
+    type: PodmanV0
+    data:
+      services:
+        - name: testnginx
+          image: docker.io/nginx:latest
+          ports:
+            - "8080:80"
+  targetSelector:
+    matchLabels:
+      env: staging
+  rollout:
+    strategy: Immediate
+EOF
+
+oc apply -f /tmp/hello-web.yaml
+
+# Status reflect-back from the agent (takes ~5-15s)
+oc get deployment.fleet.nationtech.io hello-web -o yaml | yq '.status'
+# expect:
+#   aggregate:
+#     matchedDeviceCount: 1
+#     succeeded: 1
+#     failed: 0
+#     pending: 0
+
+# On the device:
+podman ps
+# expect: testnginx running, port 8080→80
+curl -sS http://localhost:8080 | head -3
+```
+
+## 8. Common failure modes
+
+| Symptom | Cause / fix |
+| --- | --- |
+| `cert-manager` Certificate stuck `False` for 5+ min | DNS for the host doesn't resolve to the OKD router yet. `dig sso-staging.<base> +short` should match the cluster's ingress IP. Or your `letsencrypt-prod` ClusterIssuer is using HTTP01 and the route isn't reachable from Let's Encrypt. |
+| Operator pod `Error: constructing CredentialSource` | The credentials TOML in the Secret is malformed. `oc -n fleet-staging get secret harmony-fleet-operator-secrets -o jsonpath='{.data.credentials\.toml}' \| base64 -d` and inspect; the `key_json` field must be a valid JSON keyfile string (multi-line triple-quoted in TOML is fine). |
+| Operator pod `Permissions Violation` after `NATS connected` | Issuer pubkey or project_id mismatch between callout and NATS chart values, or Zitadel was reset and the operator's machine key no longer authenticates. Re-run `cargo run -p example_fleet_staging_install`. |
+| Agent: `Zitadel token endpoint returned 400: invalid_grant_type` | TOML scope assembly bug or wrong `audience`. Confirm `audience` matches `oc exec deploy/fleet-callout -- printenv OIDC_AUDIENCE`. |
+| Agent: connects, then `Permissions Violation for Publish to "$KV.device-info..."` | The device's machine user has no `device` role grant. Add via Zitadel UI → user → Authorizations. |
+| `Deployment.fleet.nationtech.io` CR applied but `matchedDeviceCount: 0` | `targetSelector.matchLabels` doesn't match any Device CR's `metadata.labels`. `oc get devices --show-labels`. |
+| Container redeploys every 30s on the device | Known FIXME — the agent's `matches_spec` returns false for any spec with env or volumes. For the demo, use trivial specs (the hello-web above is fine). Tracked in `harmony/src/modules/podman/topology.rs`. |
+
+## 9. Tear down
+
+The Helm releases own the bulk of the resources, so the cleanest
+recovery from a broken state is:
+
+```bash
+helm -n zitadel-staging uninstall zitadel
+helm -n fleet-staging   uninstall fleet-nats
+oc -n fleet-staging delete deploy/harmony-fleet-operator deploy/fleet-callout
+oc -n fleet-staging delete secret harmony-fleet-operator-secrets fleet-callout-secrets
+oc -n zitadel-staging delete pgcluster zitadel-pg --ignore-not-found
+oc delete project zitadel-staging fleet-staging
+
+# CRDs persist (helm.sh/resource-policy: keep). Delete by hand if you
+# really want a clean slate:
+oc delete crd deployments.fleet.nationtech.io devices.fleet.nationtech.io
+```
+
+The host-side `~/.local/share/harmony/zitadel/client-config.json`
+caches machine keys + project IDs from this install. Wipe it before
+re-installing against a freshly reset Zitadel:
+
+```bash
+rm -f ~/.local/share/harmony/zitadel/client-config.json
+```
+
+(The cache-vs-live drift bug is fixed — `ZitadelSetupScore` now
+re-queries Zitadel for IDs on every apply — but stale machine-key
+material from a deleted Zitadel project will fail at JWT-bearer
+mint until you delete + re-create.)
+
+## 10. Cross-reference
+
+- [`fleet-zitadel-faq.md`](./fleet-zitadel-faq.md) — concepts behind
+  Zitadel projects, roles, machine users, audit-trail decisions.
+- [`fleet-manual-token-mint.md`](./fleet-manual-token-mint.md) —
+  worked recipe for minting an admin token by hand and using it
+  with `nats kv` commands.
+- `examples/fleet_staging_install/src/main.rs` — the install code
+  itself; the comments narrate every step.
+- `harmony/src/modules/fleet/server.rs::FleetServerScore` —
+  composable form of the same install for callers that don't need
+  the intermediate read of `ZitadelClientConfig`.
--- a/docs/guides/fleet-zitadel-faq.md
+++ b/docs/guides/fleet-zitadel-faq.md
@@ -0,0 +1,185 @@
+# Fleet × Zitadel FAQ
+
+Technical reference for the Zitadel setup behind the fleet
+auth callout. Describes what exists, why it's that way, and where
+each piece lives in the code.
+
+Code anchors:
+- `examples/fleet_e2e_demo/src/lib.rs` — bring-up flow
+- `harmony/src/modules/zitadel/setup.rs` — `ZitadelSetupScore`
+- `harmony/src/modules/zitadel/mod.rs` — Helm install
+- `nats/callout/src/handler.rs` — auth callout
+- `fleet/harmony-fleet-agent/src/credentials.rs` — JWT-bearer mint
+
+---
+
+## What is an "application" in Zitadel?
+
+An OIDC client config: `clientId`, allowed grant types, redirect
+URIs (browser apps only), PKCE settings (browser apps only).
+
+Apps are not containers for users or roles — those live one
+level up at the org. An app is the entry point a service uses to
+delegate auth to Zitadel.
+
+The `nats` app is **API type**: JWT-bearer / client-credentials
+only, no browser flow. Headless agents never see a login page.
+The app's `clientId` is what tokens carry as `aud` and what the
+auth callout validates against (`OIDC_AUDIENCE` env on the callout
+Deployment).
+
+## Why are users and roles at org level instead of per-project?
+
+Roles are defined inside a project but are essentially labels —
+strings + display names with no inherent permissions. Each app
+enforces them in code (the callout maps `device` → a
+permission template).
+
+Users live at org level so one identity can hold roles across
+multiple projects in the same org and SSO between them. Role
+grants are the join: "user X has roles \[A, B\] on project Y."
+
+The only privilege ladder Zitadel enforces directly is at the
+instance/org level (IAM-Owner, Org-Owner). Project roles say
+nothing about Zitadel admin rights.
+
+## What is each service account for?
+
+| User | Created by | Purpose |
+| --- | --- | --- |
+| `iam-admin` | Helm `FirstInstance.Org.Machine` | IAM-Owner. Its PAT (`iam-admin-pat` k8s Secret) drives the management API from `ZitadelSetupScore`. |
+| `login-client` | Helm `FirstInstance.Org.LoginClient` | Internal — Zitadel's login UI pod uses it to call back into Zitadel. Don't touch. |
+| `fleet-ops` | `fleet_e2e_demo` admin setup | `fleet-admin` role grant, JSON key, used by tests and admin tooling. |
+| `device-vm-device-NN` | `fleet_e2e_demo::provision_device` | One per VM. JSON key copied to `/etc/fleet-agent/zitadel-key.json`. `device` role grant. |
+| `ops-station`, `sensor-a`, `sensor-b`, `intruder` | `fleet_auth_callout` (separate example) | Leftovers from previous runs. Postgres survives cluster recreates. Harmless, deletable. |
+
+The `device-` prefix on per-device usernames is intentional:
+Zitadel emits the username verbatim in the access token's
+`client_id` claim. The callout strips `device-` to recover the
+bare device id used for NATS subject interpolation
+(`DEVICE_ID_PREFIX_STRIP=device-` env var on the callout;
+`nats/callout/src/zitadel.rs::extract_device_id`).
+
+## How does the agent authenticate? Are JWTs / refresh tokens cached?
+
+On disk the agent keeps **only the JSON machine key** (RSA
+private key) at `/etc/fleet-agent/zitadel-key.json`.
+
+It does NOT store:
+- access tokens (in memory only)
+- refresh tokens (the JWT-bearer flow has none — RFC 7523 is
+  stateless by design)
+
+On every NATS (re)connect, `credentials.rs::zitadel_mint`:
+
+1. Builds a JWT assertion with `exp = now + 60s`, signs it with
+   the RSA key
+2. POSTs it to `<zitadel>/oauth/v2/token` with grant type
+   `urn:ietf:params:oauth:grant-type:jwt-bearer`
+3. Receives an access token (~12h validity), caches it in memory
+4. Re-mints when within 5min of expiry
+   (`TOKEN_REFRESH_LEEWAY_SECS`)
+
+## What happens to an offline agent?
+
+| Time offline | Behavior |
+| --- | --- |
+| 0 – ~12 h | Cached access token still valid. Reconnects work transparently. |
+| > ~12 h | Token expired. Agent enters reconnect loop until network returns, then mints fresh on first successful reach. |
+
+The RSA key never expires until rotated server-side.
+
+## Where are the lifetimes set?
+
+- **Access token TTL** — Zitadel UI: Org → Settings → OIDC
+  Settings → "Access Token Lifetime" (default 12 h).
+- **Assertion TTL** — hardcoded 60 s in
+  `credentials.rs::ASSERTION_LIFETIME_SECS`. Zitadel rejects
+  assertions where `exp - iat > 60 s`; this is server-enforced,
+  not a knob.
+- **Machine key TTL** — set when the key is created in
+  `harmony/src/modules/zitadel/setup.rs::create_machine_key`.
+
+## Why is a JSON machine key more secure than a PAT?
+
+Both are "if stolen, full impersonation" — the same blast radius.
+The difference is in leak surface:
+
+- **PAT**: a 60-char bearer string sent on every authenticated
+  request. Every log line, every env dump, every misrouted
+  request is a leak opportunity.
+- **JSON key**: an RSA private key. Only ever signs short-lived
+  (60 s) assertions sent to one endpoint
+  (`<zitadel>/oauth/v2/token`). The bearer token NATS sees is
+  the access token — short-lived (12 h max), scoped, distinct
+  from the long-term secret. A full network capture of the
+  agent ↔ NATS traffic yields only access tokens that expire
+  within 12 h.
+
+Plus: Zitadel allows multiple keys per machine user, so rotation
+is zero-downtime (mint new → push to device → delete old). PATs
+rotate one-at-a-time and are disruptive.
+
+What this does not defend against: a fully compromised device
+where the attacker reads the keyfile. That requires hardware
+(TPM / secure element) and is out of scope.
+
+## The machine keys expire in year 9999. Isn't that effectively forever?
+
+Yes. Currently set in `ZitadelSetupScore::create_machine_key` as
+a known-bad default chosen for demo convenience (re-running tests
+shouldn't produce expired keys mid-run). Tracked as a known issue.
+
+## Why is the IAM-Owner PAT stored as a plain k8s Secret?
+
+K8s Secrets are base64-encoded, **not** encrypted at rest unless
+etcd encryption-at-rest is explicitly enabled with a KMS provider.
+Anyone with `get secrets` in the `zitadel` namespace effectively
+has Zitadel admin.
+
+The PAT exists because `ZitadelSetupScore` calls Zitadel's
+management API (create project, role, machine user, mint key),
+which requires IAM-Owner privileges. A PAT is the simplest
+credential that survives across applies.
+
+This is a known production-hardening gap. Harmony has the
+`harmony_secret` crate (ADR-020) with OpenBao and local-encrypted-file
+backends; the Score is currently wired against a k8s Secret only.
+
+## What lifetime is set for the human admin password — why does the ConfigMap show one that doesn't work?
+
+`ZitadelScore` regenerates a random admin password on every apply
+and writes it to the rendered ConfigMap. Helm's `FirstInstance`
+block only seeds Postgres on the **first** install against an
+empty DB, so re-applies render a new ConfigMap password but leave
+the original Postgres hash untouched. The displayed password is
+stale on every apply after the first.
+
+To recover access: use the `iam-admin-pat` to call Zitadel's
+management API and reset the human admin's password directly.
+Tracked as a known bug.
+
+## Quick reference — tokens on the wire
+
+| Token | Lives where | Lifetime | Signed by | Purpose |
+| --- | --- | --- | --- | --- |
+| **Assertion** | Agent memory, in-flight | 60 s | Agent (RSA key) | "I'm machine user X — give me an access token" |
+| **Access token** | Agent memory + on-the-wire to NATS | ~12 h | Zitadel | "Zitadel says I'm device X with role `device`" |
+| **NATS user JWT** | NATS server connection state | callout-defined (~30 s) | Auth callout (NKey) | "I have these permissions on these subjects" |
+
+The agent only holds the RSA key on disk and the access token
+in memory. The NATS user JWT is server-internal — agents don't
+see it.
+
+## Code map
+
+| Topic | File |
+| --- | --- |
+| Helm install, masterkey, admin password | `harmony/src/modules/zitadel/mod.rs` |
+| Project/role/machine user provisioning | `harmony/src/modules/zitadel/setup.rs` |
+| Per-device machine user + key handoff | `examples/fleet_e2e_demo/src/lib.rs::provision_device` |
+| JWT-bearer mint | `fleet/harmony-fleet-agent/src/credentials.rs::zitadel_mint` |
+| Auth callout decision tree | `nats/callout/src/handler.rs::decide` |
+| Per-device permission template | `nats/callout/src/permissions.rs::device_default` |
+| End-to-end rehearsal runbook | `examples/fleet_e2e_demo/RUNBOOK.md` |
+| Manual JWT-bearer mint + NATS write recipe | [`fleet-manual-token-mint.md`](./fleet-manual-token-mint.md) |
--- a/docs/guides/kubernetes-ingress.md
+++ b/docs/guides/kubernetes-ingress.md
@@ -146,6 +146,50 @@ For wildcard certificates (e.g. `*.example.com`), HTTP01 cannot be used — conf

 ---

+## Multiple Ingresses on the Same Host
+
+When a single host is fronted by more than one Ingress (e.g. a Helm chart that ships separate Ingresses for an API and a UI under the same hostname), **all of them must reference the same TLS Secret, and only one of them should trigger cert-manager**.
+
+```yaml
+# Ingress 1 — owns the certificate request
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: app-api
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-prod
+spec:
+  rules:
+  - host: app.example.com
+    http: { paths: [{ path: /, pathType: Prefix, backend: { service: { name: app-api, port: { number: 8080 } } } }] }
+  tls:
+  - hosts: [app.example.com]
+    secretName: app-example-com-tls   # cert-manager will populate this
+
+---
+# Ingress 2 — references the same Secret, no cert-manager annotation
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: app-ui
+spec:
+  rules:
+  - host: app.example.com
+    http: { paths: [{ path: /ui, pathType: Prefix, backend: { service: { name: app-ui, port: { number: 3000 } } } }] }
+  tls:
+  - hosts: [app.example.com]
+    secretName: app-example-com-tls   # reuses the cert above
+```
+
+Why this matters — and the failure mode if you don't:
+
+- Two cert-manager-annotated Ingresses on the same host create **two `Certificate` resources** and **two ACME `Order`s** for the same domain.
+- Both Orders launch HTTP01 challenges concurrently; the ingress controller sees two competing challenge Ingresses for `/.well-known/acme-challenge/...` with different tokens — one wins, the other fails.
+- The loser's Certificate stays `Pending`, its Secret is never created.
+- On OKD specifically, the ingress-to-route controller **inlines the TLS cert/key into the generated Route** at creation time. With no Secret it cannot inline anything, and the Route for the second Ingress is silently never admitted — the path becomes unreachable, while the first Ingress's path works fine.
+
+The diagnostic signature: `kubectl get ingress` shows both Ingresses, `kubectl get route` shows only one, the second Ingress's `status.loadBalancer` is `{}`, and the second Certificate is stuck in `Pending`.
+
 ## OKD / OpenShift Notes

 On OKD, standard Ingress resources are automatically translated into OpenShift `Route` objects. The default TLS termination mode is `edge`, which is correct for most HTTP applications. To control this explicitly, add:
--- a/docs/guides/web-auth-security.md
+++ b/docs/guides/web-auth-security.md
@@ -0,0 +1,217 @@
+# Web Authentication and CSRF Security Guidelines
+
+These guidelines define the baseline for Harmony web frontends and future operator dashboards that use browser-based authentication, cookie sessions, Axum, HTMX, or OIDC providers such as Zitadel.
+
+## Goals
+
+- Prevent unauthenticated access.
+- Prevent authenticated users from performing actions they are not authorized to perform.
+- Prevent CSRF on state-changing endpoints.
+- Reduce XSS impact with CSP and safe rendering practices.
+- Keep authentication code understandable and reusable across projects.
+
+## Required Baseline
+
+Every browser-facing authenticated application must implement the following controls before production use:
+
+1. **OIDC Authorization Code + PKCE** for login.
+2. **OIDC nonce validation** on login callback.
+3. **Explicit authorization checks** using roles, groups, claims, or permissions.
+4. **CSRF protection** on all mutating routes.
+5. **Secure cookie settings**: `HttpOnly`, `Secure` in production, constrained `SameSite`, and appropriate path/domain scoping.
+6. **Strict security headers**, especially Content Security Policy.
+7. **No permissive credentialed CORS** for operator dashboards.
+8. **Generic client-facing errors** with detailed errors logged server-side only.
+
+## OIDC Login Requirements
+
+Use Authorization Code flow with PKCE. On login start, generate and persist a short-lived login attempt containing:
+
+- `state`
+- `pkce_code_verifier`
+- `nonce`
+- creation timestamp or cookie expiration
+
+Send `state`, PKCE challenge, and `nonce` to the authorization endpoint.
+
+On callback:
+
+1. Require a valid login-attempt cookie.
+2. Validate returned `state` against the stored state.
+3. Exchange the authorization code using the stored PKCE verifier.
+4. Validate the returned ID token as an OIDC ID token, including:
+   - signature
+   - issuer
+   - audience/client ID
+   - expiration/not-before
+   - nonce
+   - authorized party (`azp`) when applicable
+5. Create the application session only after all checks pass.
+6. Delete the login-attempt cookie.
+
+`state` and `nonce` are not interchangeable:
+
+- `state` binds the callback redirect to the browser login attempt.
+- `nonce` binds the returned ID token to the browser login attempt.
+- PKCE binds the code exchange to the client that started the flow.
+
+## Session Requirements
+
+For small internal dashboards, a verified short-lived ID token in an `HttpOnly` cookie may be acceptable. For higher-risk systems, prefer server-side sessions:
+
+- Store a random session ID in the browser cookie.
+- Store tokens and session metadata server-side.
+- Support revocation, rotation, idle timeout, and absolute timeout.
+
+Session cookies must use:
+
+- `HttpOnly`
+- `Secure` outside local development
+- `SameSite=Lax` or `SameSite=Strict`
+- `Path=/` unless a narrower path is possible
+- No broad `Domain` attribute unless explicitly required
+
+Production services should fail closed if HTTPS/secure-cookie configuration is inconsistent.
+
+## Authorization Requirements
+
+Authentication is not authorization. A valid identity provider token only proves who the user is.
+
+Every protected application must define required permissions for each state-changing or sensitive route. Examples:
+
+- `fleet:viewer` for read-only dashboard access
+- `fleet:operator` for alert acknowledgement and operational actions
+- `fleet:admin` for settings, user management, or destructive actions
+
+Authorization must be enforced server-side. UI hiding is not sufficient.
+
+## CSRF Protection Standard
+
+For Axum + HTMX dashboards, the recommended baseline is:
+
+1. Require a custom header on all mutating requests.
+2. Validate `Origin` or `Referer` against the configured application origin.
+3. Keep cookies `SameSite=Lax` or stricter.
+4. Do not enable permissive credentialed CORS.
+
+Mutating methods are:
+
+- `POST`
+- `PUT`
+- `PATCH`
+- `DELETE`
+
+Recommended behavior:
+
+- Reject mutating requests without `x-csrf-token`.
+- Reject mutating requests whose `Origin` is present and does not match the configured base URL origin.
+- If `Origin` is absent, require `Referer` to match the configured base URL origin.
+- Reject when neither `Origin` nor `Referer` is available, unless the route is explicitly exempted and documented.
+
+The CSRF header value may be static for HTMX dashboards, for example `x-csrf-token: 1`. The protection comes from the fact that cross-origin HTML forms cannot set custom headers, and cross-origin JavaScript cannot send custom headers with credentials unless CORS allows it.
+
+Do not rely on header presence alone if adding Origin/Referer validation is practical.
+
+## HTMX Integration
+
+Add the CSRF header globally from a static JavaScript file:
+
+```js
+document.body.addEventListener('htmx:configRequest', (event) => {
+  event.detail.headers['x-csrf-token'] = '1';
+});
+```
+
+Serve this as a static asset, for example `/static/app.js`. Avoid inline scripts so that the application can use a strict CSP without `unsafe-inline`.
+
+## Content Security Policy
+
+Every browser-facing dashboard should set a restrictive CSP. A good starting point is:
+
+```http
+Content-Security-Policy: default-src 'self'; script-src 'self'; style-src 'self'; img-src 'self' data:; connect-src 'self'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'
+```
+
+Meaning:
+
+- Only load scripts, styles, and API/SSE/HTMX connections from the same origin.
+- Prevent clickjacking with `frame-ancestors 'none'`.
+- Prevent plugin/object execution with `object-src 'none'`.
+- Prevent injected `<base>` tags from rewriting relative URLs.
+- Prevent forms from submitting to external origins.
+
+If inline scripts or styles are unavoidable, prefer per-response nonces over `unsafe-inline`.
+
+## Other Security Headers
+
+Set these headers on all HTML responses, or globally when safe:
+
+```http
+X-Content-Type-Options: nosniff
+Referrer-Policy: same-origin
+Permissions-Policy: geolocation=(), microphone=(), camera=()
+```
+
+When the service is HTTPS-only, also set HSTS:
+
+```http
+Strict-Transport-Security: max-age=31536000; includeSubDomains
+```
+
+Only enable HSTS when the domain and subdomains are intended to be HTTPS-only.
+
+## CORS Policy
+
+Operator dashboards should normally not enable CORS.
+
+Never combine all of the following unless there is a reviewed, explicit integration need:
+
+- credentialed requests
+- arbitrary or reflected origins
+- custom request headers such as `x-csrf-token`
+
+A permissive credentialed CORS policy can bypass custom-header CSRF protection.
+
+## Error Handling
+
+Client-facing auth errors should be generic, for example:
+
+```text
+Authentication failed. Please start login again.
+```
+
+Detailed causes, provider responses, token validation failures, and stack traces should be logged server-side only.
+
+Avoid returning raw OIDC provider error bodies or JWT validation details to the browser.
+
+## Implementation Checklist
+
+Before shipping a Harmony web frontend:
+
+- [ ] Login uses Authorization Code + PKCE.
+- [ ] Login attempt stores `state`, PKCE verifier, `nonce`, and expires quickly.
+- [ ] Callback validates `state`.
+- [ ] Callback validates ID token nonce.
+- [ ] JWT validation checks issuer and exact intended audience/client.
+- [ ] Authorization roles/permissions are enforced server-side.
+- [ ] Mutating routes are protected by CSRF middleware.
+- [ ] CSRF middleware requires custom header and same-origin `Origin`/`Referer`.
+- [ ] Session cookies are `HttpOnly`, `Secure` in production, and `SameSite=Lax` or stricter.
+- [ ] No permissive credentialed CORS is enabled.
+- [ ] CSP is configured without `unsafe-inline` where practical.
+- [ ] Security headers are configured.
+- [ ] Auth errors shown to users are generic.
+- [ ] Detailed auth failures are logged server-side.
+
+## Recommended Default for Harmony Dashboards
+
+For current and future Axum + HTMX dashboards, use this default design:
+
+- Zitadel/OIDC Authorization Code + PKCE + nonce.
+- Short-lived encrypted login-attempt cookie.
+- Server-side authorization middleware based on roles/claims.
+- `HttpOnly`, `Secure`, `SameSite=Lax` or `Strict` session cookie.
+- CSRF middleware requiring `x-csrf-token` and same-origin `Origin`/`Referer`.
+- Static `/static/app.js` that adds the HTMX CSRF header.
+- Strict CSP that allows scripts only from `self`.
+- No CORS unless explicitly reviewed.
--- a/examples/example_linux_vm/Cargo.toml
+++ b/examples/example_linux_vm/Cargo.toml
@@ -9,7 +9,7 @@ name = "example_linux_vm"
 path = "src/main.rs"

 [dependencies]
-harmony = { path = "../../harmony" }
+harmony = { path = "../../harmony", features = ["kvm"] }
 tokio.workspace = true
 log.workspace = true
 env_logger.workspace = true
--- a/examples/fleet_auth_callout/Cargo.toml
+++ b/examples/fleet_auth_callout/Cargo.toml
@@ -0,0 +1,46 @@
+[package]
+name = "example-fleet-auth-callout"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "End-to-end fleet IoT security model: Zitadel + NATS + auth callout on k3d"
+
+[lib]
+name = "example_fleet_auth_callout"
+path = "src/lib.rs"
+
+[[bin]]
+name = "fleet-auth-callout"
+path = "src/main.rs"
+
+[[test]]
+name = "security_model"
+path = "tests/security_model.rs"
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony-k8s = { path = "../../harmony-k8s" }
+harmony_types = { path = "../../harmony_types" }
+k3d-rs = { path = "../../k3d" }
+harmony-nats-callout = { path = "../../nats/callout" }
+async-nats.workspace = true
+nkeys = "0.4"
+jsonwebtoken = "9"
+reqwest = { workspace = true }
+tokio = { workspace = true, features = ["full"] }
+tokio-test.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+anyhow.workspace = true
+tracing.workspace = true
+tracing-subscriber.workspace = true
+log.workspace = true
+env_logger.workspace = true
+futures-util.workspace = true
+k8s-openapi.workspace = true
+kube.workspace = true
+base64 = "0.22"
+tempfile.workspace = true
+url.workspace = true
+directories = "6.0.0"
--- a/examples/fleet_auth_callout/src/lib.rs
+++ b/examples/fleet_auth_callout/src/lib.rs
@@ -0,0 +1,806 @@
+//! End-to-end fleet IoT security model harness.
+//!
+//! Brings up the full stack on a local k3d cluster:
+//! 1. k3d cluster (creates if missing) with HTTP/NATS port mappings.
+//! 2. Zitadel + Postgres (via the official Helm chart).
+//! 3. Project + roles (`fleet-admin`, `device`) + 4 machine users +
+//!    JWT keys via ZitadelSetupScore.
+//! 4. NATS server with `auth_callout` block referencing the issuer NKey.
+//! 5. The harmony-nats-callout binary as a Deployment, sideloaded as a
+//!    container image into k3d.
+//!
+//! `main.rs` calls [`bring_up_stack`] then prints credentials and waits.
+//! Tests under `tests/` share a single cluster via `OnceCell` and exercise
+//! the security model through real `async_nats` clients using JWT-bearer
+//! access tokens minted from the machine keys produced in step 3.
+//!
+//! ## Why this lives in an example, not under `harmony/src/modules/`
+//!
+//! Everything in this crate is a *composition* of reusable Scores plus
+//! test fixtures (the JWT-bearer helper, image-build glue). The Scores
+//! themselves are in `harmony/src/modules/{zitadel,nats_auth_callout}`.
+
+use std::path::PathBuf;
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use harmony::inventory::Inventory;
+use harmony::modules::k8s::coredns::{CoreDNSRewrite, CoreDNSRewriteScore};
+use harmony::modules::nats::NatsHelmChartScore;
+use harmony::modules::nats_auth_callout::{NatsAuthCalloutScore, render_auth_callout_block};
+use harmony::modules::zitadel::{
+    MachineKeyType, ZitadelApiApp, ZitadelClientConfig, ZitadelMachineUser, ZitadelRole,
+    ZitadelScore, ZitadelSetupScore,
+};
+use harmony::score::Score;
+use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
+use jsonwebtoken::{Algorithm, EncodingKey, Header as JwtHeader, encode as jwt_encode};
+use k3d_rs::{K3d, PortMapping};
+use log::info;
+use nkeys::KeyPair;
+use serde::{Deserialize, Serialize};
+
+pub const CLUSTER_NAME: &str = "fleet-auth-callout";
+pub const HTTP_PORT: u32 = 8080;
+pub const NATS_NODE_PORT: i32 = 30422;
+pub const ZITADEL_HOST: &str = "sso.fleet.local";
+
+pub const FLEET_NAMESPACE: &str = "fleet-system";
+pub const NATS_NAMESPACE: &str = FLEET_NAMESPACE;
+pub const NATS_RELEASE: &str = "fleet-nats";
+pub const CALLOUT_DEPLOYMENT_NAME: &str = "fleet-callout";
+/// `localhost/` prefix matches what podman tags images as internally —
+/// `podman build -t foo:tag` produces `localhost/foo:tag`. After
+/// `podman save → k3d image import`, the image lands in the k3d node's
+/// containerd under that exact name. Without the prefix, K8s would
+/// treat `foo:tag` as a Docker Hub reference and ImagePullBackOff.
+pub const CALLOUT_IMAGE_TAG: &str = "localhost/harmony-nats-callout:dev";
+
+pub const PROJECT_NAME: &str = "fleet";
+pub const API_APP_NAME: &str = "nats";
+pub const ADMIN_ROLE_KEY: &str = "fleet-admin";
+pub const DEVICE_ROLE_KEY: &str = "device";
+
+pub const ADMIN_USERNAME: &str = "ops-station";
+pub const DEVICE_A_USERNAME: &str = "sensor-a";
+pub const DEVICE_B_USERNAME: &str = "sensor-b";
+pub const NO_ROLE_USERNAME: &str = "intruder";
+
+/// Service-side NATS account user that the callout itself authenticates
+/// with (listed in `auth_callout.auth_users` to bypass the callout).
+pub const NATS_AUTH_USER: &str = "auth";
+pub const NATS_AUTH_PASS: &str = "auth-callout-pass";
+pub const NATS_ACCOUNT: &str = "DEVICES";
+pub const NATS_SYSTEM_USER: &str = "sys-admin";
+pub const NATS_SYSTEM_PASS: &str = "sys-admin-pass";
+
+#[derive(Debug, Clone)]
+pub struct StackHandles {
+    pub cluster_name: String,
+    pub nats_url_external: String,
+    pub zitadel_url: String,
+    pub project_id: String,
+    pub admin_machine_key: String,
+    pub device_a_machine_key: String,
+    pub device_b_machine_key: String,
+    pub intruder_machine_key: String,
+    pub issuer_pubkey: String,
+}
+
+/// JSON keyfile content as Zitadel emits it for `KEY_TYPE_JSON` machine keys.
+#[derive(Debug, Deserialize, Serialize)]
+pub struct MachineKeyFile {
+    #[serde(rename = "type")]
+    pub r#type: String,
+    #[serde(rename = "keyId")]
+    pub key_id: String,
+    /// PEM-encoded RSA private key.
+    pub key: String,
+    #[serde(rename = "userId")]
+    pub user_id: String,
+}
+
+fn data_dir() -> PathBuf {
+    directories::BaseDirs::new()
+        .map(|dirs| dirs.data_dir().join("harmony").join("k3d"))
+        .unwrap_or_else(|| PathBuf::from("/tmp/harmony"))
+}
+
+pub fn create_k3d() -> K3d {
+    let base = data_dir();
+    std::fs::create_dir_all(&base).expect("create k3d data dir");
+    K3d::new(base, Some(CLUSTER_NAME.to_string()))
+        // HTTP_PORT:80 so /etc/hosts entries (or curl --resolve) hit ingress.
+        // NATS_NODE_PORT lets clients off-cluster talk to the NATS service.
+        .with_port_mappings(vec![
+            PortMapping::new(HTTP_PORT, 80),
+            PortMapping::new(NATS_NODE_PORT as u32, NATS_NODE_PORT as u32),
+        ])
+}
+
+pub fn create_topology(k3d: &K3d) -> K8sAnywhereTopology {
+    let context = k3d
+        .context_name()
+        .unwrap_or_else(|| format!("k3d-{CLUSTER_NAME}"));
+    unsafe {
+        std::env::set_var("HARMONY_USE_LOCAL_K3D", "false");
+        std::env::set_var("HARMONY_AUTOINSTALL", "false");
+        std::env::set_var("HARMONY_K8S_CONTEXT", &context);
+    }
+    K8sAnywhereTopology::from_env()
+}
+
+/// Build the NATS Helm values that wire `auth_callout` to a callout
+/// service running in the same account, plus a NodePort for off-cluster
+/// access from tests on the host.
+///
+/// **Why the explicit `service.merge.spec.ports` list:** the upstream
+/// chart's `service.ports.<name>.merge` field is *not* a strategic-merge
+/// directive — it gets emitted as-is into the rendered Service (the
+/// chart's `_helpers.tpl` does `merge (dict "name" $k) $v` which leaves
+/// `merge: …` as a literal field on each port). K8s then rejects the
+/// Service with "field not declared in schema". Only the top-level
+/// `service.merge` is actually a `mergeOverwrite` patch; we use that
+/// path and re-state the full ports list so `nats` gets our nodePort.
+pub fn render_nats_values(issuer_pubkey: &str) -> String {
+    let auth_callout = render_auth_callout_block(issuer_pubkey, NATS_AUTH_USER, NATS_ACCOUNT);
+    format!(
+        r#"fullnameOverride: {nats_release}
+config:
+  cluster:
+    enabled: false
+  jetstream:
+    enabled: true
+    fileStorage:
+      enabled: true
+      size: 2Gi
+  merge:
+    {auth_callout_indented}
+    accounts:
+      {nats_account}:
+        jetstream: enabled
+        users:
+          - user: "{auth_user}"
+            password: "{auth_pass}"
+      SYS:
+        users:
+          - user: "{sys_user}"
+            password: "{sys_pass}"
+    system_account: SYS
+service:
+  merge:
+    spec:
+      type: NodePort
+      ports:
+        - appProtocol: tcp
+          name: nats
+          port: 4222
+          targetPort: nats
+          nodePort: {node_port}
+        - appProtocol: http
+          name: monitor
+          port: 8222
+          targetPort: monitor
+"#,
+        nats_release = NATS_RELEASE,
+        auth_callout_indented = auth_callout
+            .lines()
+            .enumerate()
+            .map(|(i, l)| if i == 0 {
+                l.to_string()
+            } else {
+                format!("    {l}")
+            })
+            .collect::<Vec<_>>()
+            .join("\n"),
+        nats_account = NATS_ACCOUNT,
+        auth_user = NATS_AUTH_USER,
+        auth_pass = NATS_AUTH_PASS,
+        sys_user = NATS_SYSTEM_USER,
+        sys_pass = NATS_SYSTEM_PASS,
+        node_port = NATS_NODE_PORT,
+    )
+}
+
+/// Bring the entire stack up on a local k3d cluster. Idempotent —
+/// re-running picks up existing resources.
+///
+/// Returns handles + credentials. The machine key fields contain raw
+/// JSON keyfile content (`MachineKeyFile`) and can be passed straight
+/// to [`mint_access_token`] to authenticate as the corresponding user.
+pub async fn bring_up_stack() -> Result<StackHandles> {
+    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init();
+
+    let k3d = create_k3d();
+
+    info!("[1/8] ensuring k3d cluster '{CLUSTER_NAME}' is up");
+    k3d.ensure_installed()
+        .await
+        .map_err(|e| anyhow::anyhow!("k3d ensure: {e}"))?;
+
+    let topology = create_topology(&k3d);
+    topology.ensure_ready().await.context("topology init")?;
+
+    info!("[2/8] deploying Zitadel (this takes several minutes the first time)");
+    deploy_zitadel(&topology).await?;
+
+    info!("[3/8] CoreDNS rewrite so in-cluster lookups for {ZITADEL_HOST} resolve");
+    CoreDNSRewriteScore {
+        rewrites: vec![CoreDNSRewrite {
+            hostname: ZITADEL_HOST.to_string(),
+            target: "zitadel.zitadel.svc.cluster.local".to_string(),
+        }],
+    }
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("CoreDNS rewrite")?;
+
+    info!("[4/8] waiting for Zitadel HTTP to respond");
+    wait_for_zitadel_ready().await?;
+
+    info!("[5/8] provisioning project + roles + machine users in Zitadel");
+    let setup = ZitadelSetupScore {
+        host: ZITADEL_HOST.to_string(),
+        scheme: Default::default(),
+        port: None,
+        skip_tls: true,
+        endpoint: Some(format!("http://127.0.0.1:{HTTP_PORT}")),
+        admin_org_id: None,
+        namespace: "zitadel".to_string(),
+        applications: vec![],
+        api_apps: vec![ZitadelApiApp {
+            project_name: PROJECT_NAME.to_string(),
+            app_name: API_APP_NAME.to_string(),
+        }],
+        roles: vec![
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: ADMIN_ROLE_KEY.to_string(),
+                display_name: "Fleet Admin".to_string(),
+                group: None,
+            },
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: DEVICE_ROLE_KEY.to_string(),
+                display_name: "Device".to_string(),
+                group: None,
+            },
+        ],
+        machine_users: vec![
+            ZitadelMachineUser {
+                username: ADMIN_USERNAME.to_string(),
+                name: "Ops Station".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![ADMIN_ROLE_KEY.to_string()],
+            },
+            ZitadelMachineUser {
+                username: DEVICE_A_USERNAME.to_string(),
+                name: "Sensor A".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![DEVICE_ROLE_KEY.to_string()],
+            },
+            ZitadelMachineUser {
+                username: DEVICE_B_USERNAME.to_string(),
+                name: "Sensor B".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![DEVICE_ROLE_KEY.to_string()],
+            },
+            ZitadelMachineUser {
+                username: NO_ROLE_USERNAME.to_string(),
+                name: "Intruder".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: None,
+                grant_roles: vec![],
+            },
+        ],
+    };
+    setup
+        .interpret(&Inventory::autoload(), &topology)
+        .await
+        .context("ZitadelSetupScore failed")?;
+
+    let zcfg = ZitadelClientConfig::load()
+        .context("ZitadelSetupScore did not produce a client config cache")?;
+    let project_id = zcfg
+        .project_id_by_name(PROJECT_NAME)
+        .or(zcfg.project_id.as_ref())
+        .context("project_id missing from cache")?
+        .clone();
+
+    info!("[6/8] generating callout issuer NKey + deploying NATS with auth_callout");
+    // Re-use a deterministic seed across runs by stashing it in a
+    // K8s secret in the fleet namespace. Fall back to a fresh one
+    // and persist it. Keeping it stable lets us reuse the cached
+    // user JWTs Zitadel issued.
+    let issuer_seed = ensure_issuer_seed(&topology).await?;
+    let issuer_kp = KeyPair::from_seed(&issuer_seed)
+        .map_err(|e| anyhow::anyhow!("invalid persisted issuer seed: {e}"))?;
+    let issuer_pubkey = issuer_kp.public_key();
+
+    NatsHelmChartScore::new(
+        NATS_RELEASE.to_string(),
+        NATS_NAMESPACE.to_string(),
+        render_nats_values(&issuer_pubkey),
+    )
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("NATS deploy")?;
+
+    info!("[7/8] building + sideloading callout image into k3d");
+    build_and_load_callout_image(&k3d).await?;
+
+    info!("[8/8] deploying NatsAuthCalloutScore");
+    let mut callout = NatsAuthCalloutScore::new(
+        CALLOUT_DEPLOYMENT_NAME,
+        FLEET_NAMESPACE,
+        format!("nats://{NATS_RELEASE}.{NATS_NAMESPACE}.svc.cluster.local:4222"),
+        format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+        // Zitadel emits aud = projectId for tokens issued via the
+        // `urn:zitadel:iam:org:project:id:<projectId>:aud` scope.
+        project_id.clone(),
+        NATS_AUTH_USER,
+        NATS_AUTH_PASS,
+        issuer_seed.clone(),
+    )
+    .image(CALLOUT_IMAGE_TAG)
+    .target_account(NATS_ACCOUNT)
+    .admin_role(ADMIN_ROLE_KEY)
+    .device_role(DEVICE_ROLE_KEY)
+    .danger_accept_invalid_certs(true);
+    // Zitadel doesn't emit a custom `device_id` claim by default — that
+    // would require a Zitadel Action to map metadata into an extension
+    // claim. For this example we use `preferred_username`, which is
+    // populated with the machine user's username (`sensor-a`,
+    // `ops-station`, …). Production deployments that want a separate
+    // `device_id` claim should configure a Zitadel Action and override
+    // the device_id_claim path back to `device_id`.
+    // Zitadel access tokens for machine users:
+    //   * Don't carry `preferred_username` (that's an OIDC ID-token claim);
+    //   * Do carry `client_id` set to the machine user's userName — perfect
+    //     for our device-id-from-username case.
+    //
+    // The project's role claim lives at a *project-scoped* path
+    // `urn:zitadel:iam:org:project:<projectId>:roles` (NOT the unqualified
+    // `urn:zitadel:iam:org:project:roles`) because we request the
+    // `urn:zitadel:iam:org:project:id:<projectId>:aud` scope. The latter
+    // forces Zitadel to scope role claims to the specific project, which
+    // is what we want for tenant isolation.
+    callout.device_id_claim = "client_id".to_string();
+    // Zitadel's `client_id` for a machine user equals its userName, so
+    // a user created as `device-vm-device-00` (matching the
+    // `device_username()` convention used by both fleet_e2e_demo and
+    // fleet_rpi_setup) lands in the JWT verbatim. Strip the `device-`
+    // prefix so the callout interpolates permissions against the bare
+    // device id (`vm-device-00`) the agent uses for KV keys.
+    callout.device_id_prefix_strip = "device-".to_string();
+    callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles");
+    callout
+        .interpret(&Inventory::autoload(), &topology)
+        .await
+        .context("callout deploy")?;
+
+    info!("waiting for callout pod to be Ready before handing the stack over");
+    wait_for_callout_ready(&topology).await?;
+
+    let admin_machine_key = zcfg
+        .machine_key(ADMIN_USERNAME)
+        .context("admin machine key missing from cache")?
+        .clone();
+    let device_a_machine_key = zcfg
+        .machine_key(DEVICE_A_USERNAME)
+        .context("device A machine key missing from cache")?
+        .clone();
+    let device_b_machine_key = zcfg
+        .machine_key(DEVICE_B_USERNAME)
+        .context("device B machine key missing from cache")?
+        .clone();
+    let intruder_machine_key = zcfg
+        .machine_key(NO_ROLE_USERNAME)
+        .context("intruder machine key missing from cache")?
+        .clone();
+
+    Ok(StackHandles {
+        cluster_name: CLUSTER_NAME.to_string(),
+        nats_url_external: format!("nats://127.0.0.1:{NATS_NODE_PORT}"),
+        zitadel_url: format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+        project_id,
+        admin_machine_key,
+        device_a_machine_key,
+        device_b_machine_key,
+        intruder_machine_key,
+        issuer_pubkey,
+    })
+}
+
+pub async fn deploy_zitadel(topology: &K8sAnywhereTopology) -> Result<()> {
+    let zitadel = ZitadelScore {
+        host: ZITADEL_HOST.to_string(),
+        zitadel_version: "v4.12.1".to_string(),
+        external_secure: false,
+        // Match the host-side k3d port mapping so Zitadel's emitted
+        // issuer is `http://sso.fleet.local:8080`. Without this, JWT-bearer
+        // audience validation fails with `Errors.Internal` (the assertion
+        // `aud` doesn't match the chart-default issuer at port 80).
+        external_port: Some(HTTP_PORT),
+        ..Default::default()
+    };
+    zitadel
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("ZitadelScore deploy")?;
+    Ok(())
+}
+
+pub async fn wait_for_callout_ready(topology: &K8sAnywhereTopology) -> Result<()> {
+    let _ = topology;
+    // `kubectl rollout status deployment` is the canonical "is the new
+    // ReplicaSet's pod up?" check — it handles observed-generation
+    // tracking, terminating-old-replica edge cases, and pod-readiness in
+    // one call. Reproducing that in the kube client is doable but error-
+    // prone; shelling out keeps it short and obviously-correct.
+    let status = tokio::process::Command::new("kubectl")
+        .args([
+            "--context",
+            "k3d-fleet-auth-callout",
+            "rollout",
+            "status",
+            "-n",
+            FLEET_NAMESPACE,
+            &format!("deployment/{CALLOUT_DEPLOYMENT_NAME}"),
+            "--timeout=60s",
+        ])
+        .status()
+        .await
+        .context("invoke kubectl rollout status")?;
+    if !status.success() {
+        anyhow::bail!("kubectl rollout status timed out / failed");
+    }
+    Ok(())
+}
+
+pub async fn wait_for_zitadel_ready() -> Result<()> {
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(5))
+        .build()?;
+    for attempt in 1..=120 {
+        match client
+            .get(format!(
+                "http://127.0.0.1:{HTTP_PORT}/.well-known/openid-configuration"
+            ))
+            // Include the port in Host so Zitadel emits a matching issuer URL
+            // — see `mint_access_token` for the underlying mechanism.
+            .header("Host", format!("{ZITADEL_HOST}:{HTTP_PORT}"))
+            .send()
+            .await
+        {
+            Ok(r) if r.status().is_success() => return Ok(()),
+            Ok(r) if attempt % 15 == 0 => {
+                info!("Zitadel HTTP {} (attempt {attempt}/120)", r.status())
+            }
+            Err(e) if attempt % 15 == 0 => {
+                info!("Zitadel unreachable: {e} (attempt {attempt}/120)")
+            }
+            _ => {}
+        }
+        tokio::time::sleep(Duration::from_secs(2)).await;
+    }
+    anyhow::bail!("timed out waiting for Zitadel")
+}
+
+/// Persist the callout's issuer NKey seed in a K8s secret so re-runs of
+/// the example don't invalidate previously issued user JWTs in NATS.
+pub async fn ensure_issuer_seed(topology: &K8sAnywhereTopology) -> Result<String> {
+    use k8s_openapi::ByteString;
+    use k8s_openapi::api::core::v1::{Namespace, Secret};
+    use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
+    use std::collections::BTreeMap;
+
+    let k8s = topology
+        .k8s_client()
+        .await
+        .map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
+
+    // Ensure namespace exists first — secret creation requires it.
+    if k8s
+        .get_resource::<Namespace>(FLEET_NAMESPACE, None)
+        .await?
+        .is_none()
+    {
+        let ns = Namespace {
+            metadata: ObjectMeta {
+                name: Some(FLEET_NAMESPACE.to_string()),
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+        k8s.create(&ns, None).await.ok();
+    }
+
+    let secret_name = "callout-issuer-seed";
+
+    if let Some(existing) = k8s
+        .get_resource::<Secret>(secret_name, Some(FLEET_NAMESPACE))
+        .await?
+        && let Some(data) = existing.data
+        && let Some(seed_bytes) = data.get("seed")
+    {
+        let seed = String::from_utf8(seed_bytes.0.clone())?;
+        return Ok(seed.trim().to_string());
+    }
+
+    let seed = KeyPair::new_account()
+        .seed()
+        .map_err(|e| anyhow::anyhow!("nkey seed: {e}"))?;
+    let mut data = BTreeMap::new();
+    data.insert("seed".to_string(), ByteString(seed.as_bytes().to_vec()));
+    let secret = Secret {
+        metadata: ObjectMeta {
+            name: Some(secret_name.to_string()),
+            namespace: Some(FLEET_NAMESPACE.to_string()),
+            ..Default::default()
+        },
+        data: Some(data),
+        type_: Some("Opaque".to_string()),
+        ..Default::default()
+    };
+    k8s.create(&secret, Some(FLEET_NAMESPACE)).await.ok();
+    Ok(seed)
+}
+
+/// Build the callout binary, package the container image, and import it
+/// into the running k3d cluster. Mirrors `fleet/scripts/load-test.sh`'s
+/// staging-context pattern (the workspace `.dockerignore` excludes
+/// `target/`).
+pub async fn build_and_load_callout_image(k3d: &K3d) -> Result<()> {
+    let workspace_root = std::env::var("CARGO_MANIFEST_DIR")
+        .map(|d| PathBuf::from(d).join("..").join(".."))
+        .unwrap_or_else(|_| PathBuf::from("."));
+    let workspace_root = workspace_root.canonicalize().unwrap_or(workspace_root);
+
+    info!("cargo build --release -p harmony-nats-callout");
+    let status = tokio::process::Command::new("cargo")
+        .args(["build", "--release", "-p", "harmony-nats-callout"])
+        .current_dir(&workspace_root)
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("cargo build failed");
+    }
+
+    let ctx = tempfile::tempdir()?;
+    let bin_dst = ctx.path().join("target/release");
+    std::fs::create_dir_all(&bin_dst)?;
+    std::fs::copy(
+        workspace_root.join("target/release/harmony-nats-callout"),
+        bin_dst.join("harmony-nats-callout"),
+    )?;
+    // The shipped `nats/callout/Dockerfile` is multi-stage (used by
+    // the production build script — see
+    // `fleet/scripts/build_and_push_images.sh`). The k3d e2e harness
+    // wants the host-built binary copied in directly, so we write a
+    // tiny single-stage Dockerfile inline here. Same runtime image
+    // (archlinux:base for matched glibc — explained in the original
+    // Dockerfile) and same USER directive.
+    std::fs::write(
+        ctx.path().join("Dockerfile"),
+        r#"FROM docker.io/library/archlinux:base
+COPY target/release/harmony-nats-callout /usr/local/bin/harmony-nats-callout
+USER 65532:65532
+ENTRYPOINT ["/usr/local/bin/harmony-nats-callout"]
+"#,
+    )?;
+
+    info!("podman build → {CALLOUT_IMAGE_TAG}");
+    let status = tokio::process::Command::new("podman")
+        .args(["build", "-q", "-t", CALLOUT_IMAGE_TAG, "."])
+        .current_dir(ctx.path())
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("podman build failed");
+    }
+
+    info!("k3d image import {CALLOUT_IMAGE_TAG}");
+    let cluster = k3d.cluster_name().unwrap_or(CLUSTER_NAME).to_string();
+    // Deterministic .tar path with a per-process suffix so concurrent
+    // test crates don't trample each other.
+    let tar_path =
+        std::env::temp_dir().join(format!("harmony-callout-image-{}.tar", std::process::id()));
+    // `podman save` (docker-archive format) refuses to overwrite an
+    // existing archive — wipe any leftover from a prior failed run.
+    let _ = std::fs::remove_file(&tar_path);
+    let status = tokio::process::Command::new("podman")
+        .args(["save", "-o", tar_path.to_str().unwrap(), CALLOUT_IMAGE_TAG])
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("podman save failed");
+    }
+    // The k3d binary lives in `~/.local/share/harmony/k3d/k3d` — it's
+    // managed by k3d-rs, not on the system PATH (the user's interactive
+    // shell typically has it as an alias, but child processes don't
+    // inherit aliases). Run it via k3d-rs's accessor.
+    let tar_path_str = tar_path.to_str().unwrap().to_string();
+    let cluster_for_blocking = cluster.clone();
+    let tar_path_clone = tar_path.clone();
+    let result = tokio::task::spawn_blocking(move || {
+        k3d_rs::K3d::new(data_dir(), Some(cluster_for_blocking.clone())).run_k3d_command([
+            "image",
+            "import",
+            tar_path_str.as_str(),
+            "-c",
+            cluster_for_blocking.as_str(),
+        ])
+    })
+    .await
+    .context("spawn_blocking k3d image import")?;
+    let _ = std::fs::remove_file(&tar_path_clone);
+    let output = result.map_err(|e| anyhow::anyhow!("k3d image import failed: {e}"))?;
+    if !output.status.success() {
+        anyhow::bail!(
+            "k3d image import returned {}: {}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
+    }
+    Ok(())
+}
+
+/// RFC 7523 JWT-bearer client for Zitadel.
+///
+/// `issuer_url` should be the externally-visible Zitadel URL
+/// (e.g. `http://sso.fleet.local:8080`) — it's used as the JWT
+/// assertion's `aud` claim. The actual HTTP transport hits
+/// `127.0.0.1:HTTP_PORT` and forwards the hostname via the `Host`
+/// header, which is how the k3d ingress routes without requiring a
+/// host-side `/etc/hosts` entry.
+///
+/// `machine_key_json` is the raw keyfile content Zitadel emits
+/// (decoded from `keyDetails`). `scopes` are appended to the standard
+/// set; pass `[format!("urn:zitadel:iam:org:project:id:{project_id}:aud")]`
+/// to make the resulting access token's `aud` include the project ID.
+pub async fn mint_access_token(
+    issuer_url: &str,
+    machine_key_json: &str,
+    scopes: &[String],
+) -> Result<String> {
+    let key: MachineKeyFile =
+        serde_json::from_str(machine_key_json).context("machine key JSON parse")?;
+
+    let now = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)?
+        .as_secs() as i64;
+
+    let claims = serde_json::json!({
+        "iss": key.user_id,
+        "sub": key.user_id,
+        "aud": issuer_url,
+        "exp": now + 60,
+        "iat": now,
+    });
+
+    let mut header = JwtHeader::new(Algorithm::RS256);
+    header.kid = Some(key.key_id.clone());
+    let assertion = jwt_encode(
+        &header,
+        &claims,
+        &EncodingKey::from_rsa_pem(key.key.as_bytes())
+            .context("parse RSA private key from machine key file")?,
+    )?;
+
+    let scope = {
+        let mut s = vec![
+            "openid".to_string(),
+            "profile".to_string(),
+            "urn:zitadel:iam:org:projects:roles".to_string(),
+        ];
+        s.extend(scopes.iter().cloned());
+        s.join(" ")
+    };
+
+    let client = reqwest::Client::builder()
+        .danger_accept_invalid_certs(true)
+        .timeout(Duration::from_secs(10))
+        .build()?;
+    // The Zitadel chart's ingress routes by Host header. Hitting
+    // 127.0.0.1:HTTP_PORT bypasses the need for an /etc/hosts entry
+    // on the host running the tests (k3d's loadbalancer maps the
+    // port; the ingress controller dispatches by Host header).
+    //
+    // The Host MUST include the port: Zitadel derives the OIDC issuer
+    // string from the request's Host header. With `Host: sso.fleet.local`
+    // it emits `iss: http://sso.fleet.local`; with `Host: sso.fleet.local:8080`
+    // it emits `iss: http://sso.fleet.local:8080`. Our JWT assertion's `aud`
+    // must match Zitadel's issuer exactly, so we always send the port.
+    let host = url::Url::parse(issuer_url)
+        .ok()
+        .and_then(|u| {
+            let h = u.host_str()?;
+            let p = u.port_or_known_default();
+            Some(match p {
+                Some(p) => format!("{h}:{p}"),
+                None => h.to_string(),
+            })
+        })
+        .unwrap_or_else(|| format!("{ZITADEL_HOST}:{HTTP_PORT}"));
+    let token_url = format!("http://127.0.0.1:{HTTP_PORT}/oauth/v2/token");
+
+    let resp = client
+        .post(&token_url)
+        .header("Host", host)
+        .form(&[
+            (
+                "grant_type",
+                "urn:ietf:params:oauth:grant-type:jwt-bearer".to_string(),
+            ),
+            ("assertion", assertion),
+            ("scope", scope),
+        ])
+        .send()
+        .await
+        .context("POST /oauth/v2/token")?;
+
+    if !resp.status().is_success() {
+        let status = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+        anyhow::bail!("token endpoint returned {status}: {body}");
+    }
+
+    #[derive(Deserialize)]
+    struct TokenResponse {
+        access_token: String,
+    }
+    let tr: TokenResponse = resp.json().await.context("parse token response")?;
+    if std::env::var("FLEET_AUTH_CALLOUT_DEBUG_TOKENS").is_ok()
+        && let Some(payload_b64) = tr.access_token.split('.').nth(1)
+    {
+        use base64::Engine;
+        let pad = "=".repeat((4 - payload_b64.len() % 4) % 4);
+        if let Ok(bytes) = base64::engine::general_purpose::URL_SAFE_NO_PAD
+            .decode(format!("{payload_b64}{pad}").trim_end_matches('='))
+            && let Ok(claims) = serde_json::from_slice::<serde_json::Value>(&bytes)
+        {
+            log::info!(
+                "[debug] access token claims: {}",
+                serde_json::to_string_pretty(&claims).unwrap_or_default()
+            );
+        }
+    }
+    Ok(tr.access_token)
+}
+
+/// Build the standard scope list for our project: standard claims + a
+/// project-id audience scope so the access token's `aud` matches what the
+/// callout's `oidc_audience` expects.
+pub fn scopes_for_project(project_id: &str) -> Vec<String> {
+    vec![format!("urn:zitadel:iam:org:project:id:{project_id}:aud")]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn render_nats_values_inlines_auth_callout_block() {
+        let yaml = render_nats_values("ABCDEF");
+        assert!(yaml.contains("issuer: ABCDEF"));
+        assert!(yaml.contains("auth_users: [ auth ]"));
+        assert!(yaml.contains("account: DEVICES"));
+        assert!(yaml.contains("system_account: SYS"));
+        assert!(yaml.contains("nodePort: 30422"));
+    }
+
+    #[test]
+    fn scopes_for_project_emits_audience_scope() {
+        let s = scopes_for_project("12345");
+        assert_eq!(s, vec!["urn:zitadel:iam:org:project:id:12345:aud"]);
+    }
+}
--- a/examples/fleet_auth_callout/src/main.rs
+++ b/examples/fleet_auth_callout/src/main.rs
@@ -0,0 +1,55 @@
+//! `cargo run -p example-fleet-auth-callout` brings the full Zitadel +
+//! NATS + auth callout stack up on a local k3d cluster, prints the URLs
+//! and credentials, and waits for Ctrl-C.
+//!
+//! Tests under `tests/` exercise the security model. They do NOT run
+//! unless explicitly requested with `cargo test -p example-fleet-auth-callout`
+//! since they bring up the same heavy stack.
+
+use anyhow::Result;
+use example_fleet_auth_callout::{
+    ADMIN_USERNAME, DEVICE_A_USERNAME, DEVICE_B_USERNAME, NO_ROLE_USERNAME, bring_up_stack,
+};
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let handles = bring_up_stack().await?;
+
+    println!("\n=========================================================");
+    println!(" Fleet Auth Callout — STACK READY");
+    println!("=========================================================");
+    println!(" k3d cluster:     {}", handles.cluster_name);
+    println!(" Zitadel:         {}", handles.zitadel_url);
+    println!(
+        "   admin login:   admin / (see Zitadel ConfigMap 'zitadel-config-yaml' for password)"
+    );
+    println!(" NATS (external): {}", handles.nats_url_external);
+    println!("   account:       DEVICES");
+    println!(" Project ID:      {}", handles.project_id);
+    println!(" Issuer pubkey:   {}", handles.issuer_pubkey);
+    println!();
+    println!(" Machine keys provisioned (admin / sensor-a / sensor-b / intruder):");
+    for (name, key_json) in [
+        (ADMIN_USERNAME, &handles.admin_machine_key),
+        (DEVICE_A_USERNAME, &handles.device_a_machine_key),
+        (DEVICE_B_USERNAME, &handles.device_b_machine_key),
+        (NO_ROLE_USERNAME, &handles.intruder_machine_key),
+    ] {
+        // Print only the keyId so the output is tidy; the full keyfile is
+        // cached at ~/.local/share/harmony/zitadel/client-config.json
+        let key_id = serde_json::from_str::<serde_json::Value>(key_json)
+            .ok()
+            .and_then(|v| {
+                v.get("keyId")
+                    .and_then(|k| k.as_str().map(|s| s.to_string()))
+            })
+            .unwrap_or_else(|| "<unknown>".to_string());
+        println!("   {name:14}  keyId={key_id}");
+    }
+    println!();
+    println!(" Stack is running. Press Ctrl-C to exit (cluster keeps running).");
+    println!("=========================================================");
+
+    tokio::signal::ctrl_c().await?;
+    Ok(())
+}
--- a/examples/fleet_auth_callout/tests/security_model.rs
+++ b/examples/fleet_auth_callout/tests/security_model.rs
@@ -0,0 +1,134 @@
+//! Real cargo tests proving the IoT fleet security model.
+//!
+//! All tests share a single bringup of the stack via [`OnceCell`]. The
+//! cluster keeps running across the suite, with each test using the
+//! cached machine keys to mint Zitadel JWTs and exercise NATS through
+//! the auth callout. Three invariants:
+//!
+//! 1. `admin_can_read_any_device_subject` — fleet-admin sees other devices' state.
+//! 2. `device_can_only_access_own_subjects` — sensor-a is denied access to sensor-b's commands.
+//! 3. `unknown_role_is_rejected` — a Zitadel-authenticated user with no
+//!    fleet role cannot connect to NATS.
+//!
+//! ## Why these tests are real-stack
+//!
+//! Mocking the OIDC issuer or NATS would only re-prove the unit tests
+//! already cover. The point of this suite is to confirm — in CI, in
+//! cargo — that the **deployed** stack on k3d enforces the security
+//! model end-to-end. Hidden cluster-level misconfiguration (an unset
+//! `auth_callout` block, a wrong issuer pubkey, a CoreDNS rewrite drift,
+//! a permissions YAML typo) only shows up here.
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use async_nats::ConnectOptions;
+use example_fleet_auth_callout::{
+    StackHandles, bring_up_stack, mint_access_token, scopes_for_project,
+};
+use futures_util::StreamExt;
+use tokio::sync::OnceCell;
+
+static STACK: OnceCell<Arc<StackHandles>> = OnceCell::const_new();
+
+async fn shared_stack() -> Result<Arc<StackHandles>> {
+    let cell = STACK
+        .get_or_try_init(|| async {
+            let handles = bring_up_stack().await?;
+            anyhow::Ok(Arc::new(handles))
+        })
+        .await?;
+    Ok(cell.clone())
+}
+
+async fn connect_with_role(stack: &StackHandles, key_json: &str) -> Result<async_nats::Client> {
+    let token = mint_access_token(
+        &stack.zitadel_url,
+        key_json,
+        &scopes_for_project(&stack.project_id),
+    )
+    .await
+    .context("mint Zitadel access token")?;
+
+    ConnectOptions::with_token(token)
+        .connection_timeout(Duration::from_secs(5))
+        .connect(&stack.nats_url_external)
+        .await
+        .map_err(|e| anyhow::anyhow!("NATS connect: {e}"))
+}
+
+#[tokio::test]
+#[ignore = "requires k3d + docker environment"]
+async fn admin_can_read_any_device_subject() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+
+    let admin = connect_with_role(&stack, &stack.admin_machine_key).await?;
+    let device = connect_with_role(&stack, &stack.device_a_machine_key).await?;
+
+    let mut admin_sub = admin.subscribe("device-state.>").await?;
+    admin.flush().await?;
+
+    device
+        .publish("device-state.sensor-a", "telemetry-payload".into())
+        .await?;
+    device.flush().await?;
+
+    let msg = tokio::time::timeout(Duration::from_secs(5), admin_sub.next())
+        .await
+        .context("admin sub timeout")?
+        .context("admin sub closed")?;
+    assert_eq!(msg.payload.as_ref(), b"telemetry-payload");
+
+    Ok(())
+}
+
+#[tokio::test]
+#[ignore = "requires k3d + docker environment"]
+async fn device_can_only_access_own_subjects() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+
+    let device_a = connect_with_role(&stack, &stack.device_a_machine_key).await?;
+    let device_b = connect_with_role(&stack, &stack.device_b_machine_key).await?;
+
+    let _b_sub = device_b.subscribe("device-commands.sensor-b").await?;
+    let mut a_wrong = device_a.subscribe("device-commands.sensor-b").await?;
+    device_a.flush().await?;
+    device_b.flush().await?;
+
+    // We only care that A's subscription does NOT receive B's traffic;
+    // pushing through B-side traffic would be a no-op since A's
+    // subscription was rejected by NATS at SUB time.
+    device_b
+        .publish("device-commands.sensor-b", "should-not-leak".into())
+        .await?;
+    device_b.flush().await?;
+
+    let result = tokio::time::timeout(Duration::from_millis(750), a_wrong.next()).await;
+    assert!(
+        result.is_err(),
+        "device A must not observe device B's commands"
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+#[ignore = "requires k3d + docker environment"]
+async fn unknown_role_is_rejected() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+
+    // The intruder has a valid Zitadel JWT but no fleet-admin/device role
+    // grant. The callout must reject the connection — NATS surfaces that
+    // as `authorization violation` at connect time.
+    let result = connect_with_role(&stack, &stack.intruder_machine_key).await;
+    assert!(
+        result.is_err(),
+        "JWT without fleet role must not be admitted to NATS"
+    );
+
+    Ok(())
+}
--- a/examples/fleet_device_enroll/Cargo.toml
+++ b/examples/fleet_device_enroll/Cargo.toml
@@ -0,0 +1,35 @@
+[package]
+name = "example_fleet_device_enroll"
+version.workspace = true
+edition = "2024"
+license.workspace = true
+
+[[bin]]
+name = "fleet_device_enroll"
+path = "src/main.rs"
+
+[features]
+default = ["vm-rehearsal"]
+# `--launch-pi-vm` and `--vm-rehearsal` flags. Enables the `kvm`
+# feature on `harmony`, which pulls in libvirt (`libvirt-dev`) and
+# does NOT cross-compile for arm64 (no aarch64 libvirt static libs
+# in most distros). Disable this feature when building the
+# enrollment binary FOR the target device:
+#   cargo build --release --target aarch64-unknown-linux-gnu \
+#       -p example_fleet_device_enroll --no-default-features
+# A device-side build leaves out the rehearsal code entirely; the
+# binary is enrollment-only and links with no native dependencies.
+vm-rehearsal = ["harmony/kvm"]
+
+[dependencies]
+# `podman` is required even on device-side builds (the operator CRD
+# definitions in `harmony::modules::fleet::operator` depend on
+# `podman` types via the reconciler-contracts shape). `kvm` is the
+# only feature that pulls libvirt and stays opt-in via `vm-rehearsal`.
+harmony = { path = "../../harmony", default-features = false, features = ["podman"] }
+harmony_types = { path = "../../harmony_types" }
+tokio.workspace = true
+log.workspace = true
+env_logger.workspace = true
+anyhow.workspace = true
+clap.workspace = true
--- a/examples/fleet_device_enroll/README.md
+++ b/examples/fleet_device_enroll/README.md
@@ -0,0 +1,193 @@
+# Example: Fleet Device Enroll
+
+Enrolls a device into the fleet by minting its Zitadel machine user + JSON key inline (browser SSO or pre-acquired admin token), then runs `FleetDeviceSetupScore` against the device to install podman, drop the keyfile + agent config, and bring up the agent under systemd.
+
+Two operator workflows land on the same code path:
+
+- **Dev-on-device** — developer runs the score on a Pi with keyboard + display attached. Browser opens locally, dev signs in with their personal SSO account, the score provisions credentials for that one device.
+- **Production-via-SSH** — operator runs the score from a workstation, targets each device over SSH. Browser opens once on the workstation. (Per-batch token caching is on the roadmap; v0 re-prompts per device but the browser session cookie keeps the click cheap.)
+
+## How to use
+
+### Prerequisites
+
+- A running staging install (Zitadel + NATS + auth callout + operator) — see `examples/fleet_staging_install/`.
+- The Zitadel project ID for `fleet` (from the staging install output).
+- A cross-compiled `fleet-agent` binary for the target arch.
+- For VM rehearsal: libvirt + qemu-system-aarch64 + xorriso installed locally. Run `cargo run -p example_fleet_vm_setup -- --bootstrap-only --arch aarch64` once to prime the asset cache and SSH keys.
+- Your Zitadel SSO account must hold a role permitting machine-user, role-grant, and machine-key creation (typically `IAM_OWNER` or `ORG_OWNER`).
+
+### Build flavors
+
+The crate has two flavors selected by Cargo features:
+
+| Flavor | Command | What it includes |
+|---|---|---|
+| **Workstation** (default) | `cargo build --release -p example_fleet_device_enroll` | Everything: `--launch-pi-vm`, `--vm-rehearsal`, full enrollment. Pulls in libvirt via the `vm-rehearsal` feature. |
+| **Device-side** (cross-compile) | `cargo build --release --target aarch64-unknown-linux-musl -p example_fleet_device_enroll --no-default-features` | Enrollment-only — no VM-rehearsal flags, no libvirt. Builds for arm64. **Use the musl target, not gnu** (see below). |
+
+#### Why musl, not gnu
+
+Building with `--target aarch64-unknown-linux-gnu` links against the host's glibc. On a current Arch / Fedora workstation that's glibc 2.41+; on the device it might be glibc 2.36 (Debian 12) or 2.41 (Debian 13). When the workstation's glibc is newer than the device's, the binary fails to start with:
+
+```
+./fleet_device_enroll: /lib/aarch64-linux-gnu/libc.so.6: version `GLIBC_2.39' not found
+```
+
+`aarch64-unknown-linux-musl` produces a **fully static binary** linked against musl libc, which is bundled in. It runs on any aarch64 Linux regardless of the host's libc generation — Debian 12, 13, Pi OS, Alpine, all the same. That's what we want for a device-side binary that gets shipped onto whatever userland the production line happens to flash.
+
+#### One-time musl setup
+
+```bash
+rustup target add aarch64-unknown-linux-musl
+# Arch:   sudo pacman -S aarch64-linux-musl   (AUR) or use mold-aarch64
+# Fedora: sudo dnf install gcc-aarch64-linux-gnu  (we use musl-cross via rustup)
+```
+
+You may need to point Cargo at the right linker. In `~/.cargo/config.toml`:
+
+```toml
+[target.aarch64-unknown-linux-musl]
+linker = "aarch64-linux-musl-gcc"
+```
+
+Or use `cross` (`cargo install cross`) which handles the toolchain automatically:
+
+```bash
+cross build --release --target aarch64-unknown-linux-musl \
+  -p example_fleet_device_enroll --no-default-features
+```
+
+#### Copying to the device
+
+```bash
+scp target/aarch64-unknown-linux-musl/release/fleet_device_enroll pi@<host>:
+```
+
+Then SSH to the device and run it as documented in [Dev-on-device](#dev-on-device) above.
+
+### Quickstart — Pi-equivalent VM rehearsal
+
+Boot a Pi-equivalent VM (Debian bookworm arm64 generic-cloud — same Debian base Pi OS is built on; Pi OS itself is locked to Pi hardware and won't boot in generic KVM) with one command:
+
+```bash
+cargo run -p example_fleet_device_enroll -- --launch-pi-vm
+```
+
+The command boots the VM and exits, printing the SSH connection details and a suggested next command. From there, enroll the running VM:
+
+```bash
+./target/debug/fleet_device_enroll \
+  --target ssh://fleet-admin@<VM_IP> \
+  --device-id pi-rehearsal-01 \
+  --issuer-url https://sso-staging.cb1.nationtech.io \
+  --audience <PROJECT_ID> \
+  --nats-url wss://nats-fleet-staging.cb1.nationtech.io \
+  --admin-oidc-client-id <CLIENT_ID> \
+  --agent-binary target/aarch64-unknown-linux-gnu/release/fleet-agent
+```
+
+`--device-id` is required and validated against RFC1123 subdomain rules (lowercase alphanumeric + `-`, must start and end with an alphanumeric, ≤253 chars total / ≤63 chars per label). Same id is reused for the agent's TOML, the Zitadel machine username (`device-<id>`), and the Kubernetes Device CR — so anything kube wouldn't accept as a `metadata.name` is rejected upfront here instead of three layers down at operator-reconcile time.
+
+The browser opens to Zitadel's device-code login. Sign in with your SSO account; the score mints the per-device user, drops the keyfile, and brings up the agent.
+
+### Dev-on-device
+
+Run the binary on the Pi itself, omit `--target` entirely. The score uses ansible's local connection and runs everything on the same machine — no SSH, no keypair:
+
+```bash
+fleet_device_enroll \
+  --issuer-url https://sso.example.com \
+  --audience <PROJECT_ID> \
+  --nats-url wss://nats.example.com \
+  --admin-oidc-client-id <CLIENT_ID> \
+  --agent-binary /usr/local/bin/fleet-agent \
+  --device-id pi-001 \
+  --labels group=lab,arch=aarch64
+```
+
+Browser opens on the Pi's local display. The dev signs in once; the score handles the rest. Sudo prompts the operator's password if passwordless sudo isn't configured (which is fine — Debian's default).
+
+Auto-installs `python3-venv` on first run if missing (Debian splits it out of base python3); the score detects the failure, runs `sudo apt-get install -y python3-venv`, and retries the venv create.
+
+### Production-via-SSH
+
+Operator runs from a workstation, targeting devices on the LAN:
+
+```bash
+fleet_device_enroll \
+  --target ssh://pi@10.0.0.42 \
+  --issuer-url https://sso.example.com \
+  --audience <PROJECT_ID> \
+  --nats-url wss://nats.example.com \
+  --agent-binary ./build/fleet-agent-aarch64 \
+  --device-id batch7-042 \
+  --labels group=batch7,site=warehouse-east
+```
+
+Each invocation re-prompts the browser. Token caching across runs is tracked in `ROADMAP/fleet_platform/device_enrollment_token_caching.md`.
+
+### Non-interactive (CI / scripted)
+
+Skip the browser by passing a Bearer token:
+
+```bash
+HARMONY_ZITADEL_ADMIN_TOKEN=<pat-or-access-token> \
+fleet_device_enroll \
+  --target ssh://pi@10.0.0.42 \
+  --issuer-url https://sso.example.com \
+  --audience <PROJECT_ID> \
+  --nats-url wss://nats.example.com \
+  --agent-binary ./build/fleet-agent-aarch64
+```
+
+## What the score does on the device
+
+For each invocation the score:
+
+1. Calls Zitadel `/management/v1/*` with the admin token to find-or-create the device's machine user, grant it the `device` role on the fleet project, and mint a JSON key (idempotent on user + grant; always mints a new key because Zitadel doesn't return existing material).
+2. SSHes to the target, ensures `podman` + `systemd-container` packages, creates the `fleet-agent` user with linger, activates the user-scoped podman socket.
+3. Uploads the agent binary to `/usr/local/bin/fleet-agent`.
+4. Drops the JSON keyfile at `/etc/fleet-agent/zitadel-key.json` (mode 0640, owned by `fleet-agent`).
+5. Renders `/etc/fleet-agent/config.toml` with the agent's NATS URLs, labels, and `[credentials]` block pointing at the keyfile.
+6. Installs and starts `fleet-agent.service`. Restarts only if config / binary / unit changed.
+
+The agent then mints NATS JWTs from the keyfile via the auth callout's JWT-bearer flow and registers itself in the `device-info` KV.
+
+## Verification
+
+After enrollment, the device's heartbeat should appear within seconds:
+
+```bash
+nats kv get fleet-device-info <device-id>
+```
+
+Or watch via the operator's dashboard / CRs:
+
+```bash
+kubectl get fleetdev   # devices CRD
+```
+
+## SSO `client_id` — where to get it
+
+`--admin-oidc-client-id` is the **numeric Zitadel-assigned client_id**, not the human-readable app name. When `fleet_staging_install` provisions the `harmony-cli` device-code app, Zitadel generates a numeric client_id like `371639797157987125@fleet`. The staging install prints this value in its final summary block — copy it from there.
+
+If you ever need to look it up after the fact, it's in the staging-install operator's local cache:
+
+```bash
+jq -r '.apps."harmony-cli"' ~/.local/share/harmony/zitadel/client-config.json
+```
+
+That cache is on the **operator's workstation** (the host that ran `fleet_staging_install`). The device itself doesn't have it — the operator must pass `--admin-oidc-client-id <numeric>` explicitly when running enrollment from the device, or set `HARMONY_ZITADEL_ADMIN_TOKEN` to skip SSO entirely.
+
+## Common failure modes
+
+- **`invalid_client: no active client not found`** — `--admin-oidc-client-id` is wrong. Most likely you passed the app name (`harmony-cli`) instead of the numeric client_id. See above.
+- **`Project '<name>' not visible to the current Zitadel token`** — your SSO token's primary org differs from where the project lives. Most common when the staging install created the project as the system iam-admin user (system org) and you're signing in with a personal Zitadel account (your own org). Pass `--admin-org-id <id>` (find it in Zitadel UI → Organization → Resource ID). Alternatively, the score now logs `projects visible in current org context: …` right before the error — that list shows what your token CAN see, which usually pinpoints the org mismatch.
+- **403 on management API** — operator SSO account doesn't hold a role permitting management calls. Grant `IAM_OWNER` (or equivalent scoped permission) in Zitadel admin UI.
+- **`CaUsedAsEndEntity` from rustls** — talking to a dev cluster with a self-signed cert. Pass `--danger-accept-invalid-certs`.
+- **Browser doesn't open over SSH** — `webbrowser` can't find a GUI. The score still prints the URL; copy it into a browser on your workstation.
+
+## CLI flags
+
+Run `fleet_device_enroll --help` for the full surface.
--- a/examples/fleet_device_enroll/src/main.rs
+++ b/examples/fleet_device_enroll/src/main.rs
@@ -0,0 +1,639 @@
+//! Per-device enrollment driver — runs `FleetDeviceSetupScore` with
+//! the new `FleetDeviceAuth::ZitadelEnroll` variant. Two workflows
+//! land on the same code path:
+//!
+//! - **Dev-on-device**: developer runs this on a Pi they have a
+//!   keyboard / display attached to. They target their own Pi via
+//!   `--target ssh://<user>@127.0.0.1` (sshd is enabled in the
+//!   factory image so this works out of the box). The score opens
+//!   the local browser to Zitadel SSO, the dev signs in with their
+//!   personal account (must hold the admin role), the score mints
+//!   a per-device user + key, drops the keyfile + config in place,
+//!   and brings the agent up.
+//!
+//! - **Production-via-SSH**: operator runs this from a workstation,
+//!   targets each device over SSH (`--target ssh://pi@10.0.0.42`).
+//!   Browser opens once on the workstation; for v0 the resulting
+//!   token is held in memory only — re-running for the next device
+//!   re-prompts. Token caching is on the roadmap.
+//!
+//! `--vm-rehearsal` boots an aarch64 KVM VM and enrolls it through
+//! the same path, so we can dry-run the whole flow without a Pi.
+
+use std::path::PathBuf;
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use harmony::inventory::Inventory;
+use harmony::modules::fleet::{
+    AdminAuth, FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore,
+    ensure_fleet_ssh_keypair,
+};
+use harmony::modules::linux::{LinuxHostTopology, LinuxLocalhostTopology, SshCredentials};
+use harmony_types::id::Id;
+
+// VM-rehearsal-only imports. Hidden behind a feature so `cargo build
+// --no-default-features` (the device-side / aarch64 cross-compile)
+// doesn't pull in libvirt — `libvirt-dev` doesn't link against arm64
+// targets on most distros.
+#[cfg(feature = "vm-rehearsal")]
+use harmony::modules::fleet::{ProvisionVmScore, check_fleet_smoke_preflight_for_arch};
+#[cfg(feature = "vm-rehearsal")]
+use harmony::modules::kvm::KvmVirtualMachineHost;
+#[cfg(feature = "vm-rehearsal")]
+use harmony::modules::kvm::config::init_executor;
+#[cfg(feature = "vm-rehearsal")]
+use harmony::topology::{VirtualMachineSpec, VmArchitecture, VmFirstBootConfig};
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet_device_enroll",
+    about = "Enroll a device into the fleet by minting its Zitadel \
+             credentials inline (browser SSO or pre-acquired token)"
+)]
+struct Cli {
+    // ---- target ----------------------------------------------------------
+    /// Where to apply the score.
+    ///
+    /// - **Omitted** → run on the same machine the binary is invoked
+    ///   on (no SSH, no keypair). Ansible's `-c local` connection
+    ///   does the work; sudo still goes through your normal
+    ///   credentials.
+    /// - **`ssh://user@host`** → drive the score against a remote
+    ///   device over SSH using the harmony fleet SSH key.
+    ///
+    /// Ignored when `--vm-rehearsal` is set (the rehearsal targets
+    /// the freshly-booted VM).
+    #[arg(long)]
+    target: Option<String>,
+
+    /// Spin up a fresh aarch64 libvirt VM and enroll it. Pulls the
+    /// stock Ubuntu cloud image, attaches to the libvirt `default`
+    /// network, waits for SSH, then runs the setup score against it.
+    /// Requires the `vm-rehearsal` feature (enabled by default on
+    /// host builds, disabled on device-side aarch64 builds).
+    #[cfg(feature = "vm-rehearsal")]
+    #[arg(long)]
+    vm_rehearsal: bool,
+
+    /// Boot a Pi-equivalent aarch64 VM (Debian trixie generic-cloud
+    /// image — the same distribution base as Raspberry Pi OS, since
+    /// Pi OS itself is locked to Pi hardware and won't boot in
+    /// generic KVM) and **exit**. Prints the SSH connection details
+    /// so you can connect manually and run `fleet_device_enroll`
+    /// against the booted VM as a separate command. Useful for
+    /// dev-on-device rehearsal: launch once, then iterate with the
+    /// enrollment binary against the running VM. Requires the
+    /// `vm-rehearsal` feature.
+    #[cfg(feature = "vm-rehearsal")]
+    #[arg(long)]
+    launch_pi_vm: bool,
+
+    // ---- Zitadel + NATS endpoints ----------------------------------------
+    /// Zitadel issuer URL — what the agent will use as its OIDC
+    /// issuer and what the score talks to during enrollment.
+    /// Required for enrollment; ignored with `--launch-pi-vm`.
+    #[arg(long)]
+    issuer_url: Option<String>,
+
+    /// Zitadel project ID (the project's numeric id). Becomes the
+    /// agent's `audience` for JWT-bearer mint requests, and tags the
+    /// machine user so the auth callout's `aud` check passes.
+    #[arg(long)]
+    audience: Option<String>,
+
+    /// Project name (human-readable) the device's machine user
+    /// belongs to. Must already exist — created by the staging
+    /// install's `ZitadelSetupScore`.
+    #[arg(long, default_value = "fleet")]
+    project_name: String,
+
+    /// NATS URL the agent should connect to.
+    #[arg(long)]
+    nats_url: Option<String>,
+
+    // ---- device identity -------------------------------------------------
+    /// Device id baked into the agent's TOML, the Zitadel machine
+    /// username (`device-<device_id>`), and the Kubernetes Device CR
+    /// name on the operator side. **Required.**
+    ///
+    /// Must be a valid RFC1123 DNS label / subdomain since the
+    /// operator builds Kubernetes resource names from it. The
+    /// validator in this binary rejects anything else upfront so
+    /// enrollment can't produce a Zitadel machine user that the
+    /// operator will later choke on with `metadata.name: Invalid value`.
+    ///
+    /// Allowed: lowercase alphanumerics + `-`, must start and end with
+    /// an alphanumeric, max 63 chars per segment. Segments separated
+    /// by `.` are accepted (full RFC1123 subdomain) but `-` is the
+    /// usual choice.
+    ///
+    /// Examples that pass: `pi-001`, `lab-rehearsal-3`, `dev-jg-vm`.
+    /// Examples that fail: `pi_001` (underscore), `Pi001` (uppercase),
+    /// `-pi001` (leading dash), `pi001-` (trailing dash).
+    #[arg(long)]
+    device_id: String,
+
+    /// Zitadel machine username for this device. Defaults to
+    /// `device-<device_id>` so re-running with the same device_id
+    /// reuses the same Zitadel user.
+    #[arg(long)]
+    device_username: Option<String>,
+
+    /// Project-scoped Zitadel role to grant the device's user.
+    /// Defaults to `device` — the role the auth callout maps to
+    /// per-device-scoped pub/sub permissions.
+    #[arg(long, default_value = "device")]
+    device_role: String,
+
+    /// Routing labels (`key=value,key=value`) the agent publishes in
+    /// every DeviceInfo heartbeat.
+    #[arg(long, default_value = "group=group-a")]
+    labels: String,
+
+    // ---- admin auth ------------------------------------------------------
+    /// Pre-acquired Bearer token (PAT or out-of-band access token).
+    /// When set, skips the browser device-code flow.
+    #[arg(long, env = "HARMONY_ZITADEL_ADMIN_TOKEN")]
+    admin_token: Option<String>,
+
+    /// Zitadel OIDC `client_id` for the device-code app — the
+    /// **numeric id** Zitadel assigns when the app is created (e.g.
+    /// `371639797157987125@fleet`), NOT the human-readable app name
+    /// (`harmony-cli`). The staging install prints this value in its
+    /// final summary; copy it from there. Required when using SSO
+    /// (omit only when `--admin-token` is set).
+    #[arg(long)]
+    admin_oidc_client_id: Option<String>,
+
+    /// Forward to the agent's HTTP client AND to our admin-side calls
+    /// to Zitadel. Set when talking to a dev cluster with a
+    /// self-signed cert.
+    #[arg(long)]
+    danger_accept_invalid_certs: bool,
+
+    /// Override the Zitadel **org context** (`x-zitadel-orgid` header)
+    /// for management API calls. Set when the SSO operator's primary
+    /// org differs from where the project + device users live —
+    /// typical for human SSO accounts on a Zitadel where the project
+    /// was provisioned by the system iam-admin (their org defaults
+    /// don't match). Symptom: `Project '<name>' not found in
+    /// Zitadel` even though the project clearly exists. Find the
+    /// right value in Zitadel's admin UI → Organization → Resource
+    /// ID, or via `/admin/v1/orgs/_search`.
+    #[arg(long)]
+    admin_org_id: Option<String>,
+
+    // ---- agent binary ----------------------------------------------------
+    /// Path to the cross-compiled fleet-agent binary that gets
+    /// uploaded to the device and installed at /usr/local/bin/fleet-agent.
+    /// Optional when `--launch-pi-vm` is set (no enrollment runs).
+    #[arg(long)]
+    agent_binary: Option<PathBuf>,
+
+    // ---- VM rehearsal knobs (only relevant with --vm-rehearsal) ----------
+    /// libvirt domain name for the rehearsal VM.
+    #[cfg(feature = "vm-rehearsal")]
+    #[arg(long, default_value = "fleet-enroll-rehearsal")]
+    vm_name: String,
+    #[cfg(feature = "vm-rehearsal")]
+    #[arg(long, default_value = "default")]
+    vm_network: String,
+    #[cfg(feature = "vm-rehearsal")]
+    #[arg(long, default_value = "fleet-admin")]
+    vm_admin_user: String,
+    #[cfg(feature = "vm-rehearsal")]
+    #[arg(long, default_value_t = 16)]
+    vm_disk_size_gb: u32,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init()
+        .ok();
+
+    let cli = Cli::parse();
+
+    #[cfg(feature = "vm-rehearsal")]
+    if cli.launch_pi_vm {
+        let vm_ip = boot_pi_rehearsal_vm(&cli).await?;
+        println!();
+        println!("=== Pi-equivalent VM ready ===");
+        println!("VM:   {} (debian-trixie arm64)", cli.vm_name);
+        println!("IP:   {vm_ip}");
+        println!(
+            "SSH:  ssh -i {} {}@{vm_ip}",
+            harmony::modules::fleet::ensure_fleet_ssh_keypair()
+                .await
+                .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?
+                .private_key
+                .display(),
+            cli.vm_admin_user
+        );
+        println!();
+        println!("To enroll this VM, run from your workstation:");
+        println!(
+            "  fleet_device_enroll \\\n    \
+             --target ssh://{}@{vm_ip} \\\n    \
+             --device-id <ID>            # required, RFC1123 (e.g. pi-001) \\\n    \
+             --issuer-url <ISSUER> \\\n    \
+             --audience <PROJECT_ID> \\\n    \
+             --nats-url <NATS_URL> \\\n    \
+             --admin-oidc-client-id <CLIENT_ID> \\\n    \
+             --agent-binary <AGENT_BIN>",
+            cli.vm_admin_user
+        );
+        return Ok(());
+    }
+
+    validate_device_id(&cli.device_id)?;
+    let device_id = Id::from(cli.device_id.clone());
+    let device_username = cli
+        .device_username
+        .clone()
+        .unwrap_or_else(|| format!("device-{device_id}"));
+
+    let labels = parse_labels(&cli.labels)?;
+    let issuer_url = cli
+        .issuer_url
+        .clone()
+        .context("--issuer-url is required for enrollment (omit only with --launch-pi-vm)")?;
+    let audience = cli
+        .audience
+        .clone()
+        .context("--audience is required for enrollment")?;
+    let nats_url = cli
+        .nats_url
+        .clone()
+        .context("--nats-url is required for enrollment")?;
+    let agent_binary = cli
+        .agent_binary
+        .clone()
+        .context("--agent-binary is required for enrollment")?;
+
+    let auth = FleetDeviceAuth::ZitadelEnroll {
+        oidc_issuer_url: issuer_url,
+        audience,
+        project_name: cli.project_name.clone(),
+        device_username: device_username.clone(),
+        device_display_name: format!("Fleet Device {device_id}"),
+        device_role_keys: vec![cli.device_role.clone()],
+        admin: match &cli.admin_token {
+            Some(t) => AdminAuth::Token(t.clone()),
+            None => AdminAuth::Sso {
+                client_id: cli.admin_oidc_client_id.clone().context(
+                    "--admin-oidc-client-id is required for SSO login. \
+                     This is the **numeric** Zitadel client_id (e.g. \
+                     `371639797157987125@fleet`), not the app name. \
+                     The staging install prints it in its final summary. \
+                     Alternatively, pass --admin-token <PAT> to skip SSO.",
+                )?,
+            },
+        },
+        admin_org_id: cli.admin_org_id.clone(),
+        danger_accept_invalid_certs: cli.danger_accept_invalid_certs,
+    };
+
+    let setup_config = FleetDeviceSetupConfig {
+        device_id: device_id.clone(),
+        labels,
+        nats_urls: vec![nats_url],
+        auth,
+        agent_binary_path: agent_binary,
+        hosts_entries: vec![],
+    };
+    let setup_score = FleetDeviceSetupScore::new(setup_config);
+
+    #[cfg(feature = "vm-rehearsal")]
+    if cli.vm_rehearsal {
+        let vm_ip = boot_rehearsal_vm(&cli).await?;
+        let ssh = ensure_fleet_ssh_keypair()
+            .await
+            .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
+        let topology = LinuxHostTopology::new(
+            format!("rehearsal-{}", cli.vm_name),
+            vm_ip
+                .parse()
+                .context("rehearsal VM did not yield a valid IP")?,
+            SshCredentials {
+                user: cli.vm_admin_user.clone(),
+                private_key_path: ssh.private_key.clone(),
+                remote_python: Some("/usr/bin/python3".to_string()),
+                sudo_password: None,
+            },
+        );
+        run_setup(&setup_score, &topology).await?;
+        println!(
+            "✅ rehearsal device '{device_id}' enrolled via VM {} ({vm_ip})",
+            cli.vm_name
+        );
+        return Ok(());
+    }
+
+    match cli.target.as_deref() {
+        // No `--target` → run on the same machine. ansible's `-c
+        // local` connection skips SSH entirely; sudo still works the
+        // usual way (operator types the password if not configured
+        // passwordless).
+        None => {
+            let topology = LinuxLocalhostTopology::new("localhost");
+            run_setup(&setup_score, &topology).await?;
+        }
+        Some(target) => {
+            let (user, host) = parse_ssh_target(target)?;
+            let ssh = ensure_fleet_ssh_keypair()
+                .await
+                .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
+            let topology = LinuxHostTopology::new(
+                format!("ssh-{host}"),
+                host.parse().context("--target host is not a valid IP")?,
+                SshCredentials {
+                    user,
+                    private_key_path: ssh.private_key.clone(),
+                    remote_python: Some("/usr/bin/python3".to_string()),
+                    sudo_password: None,
+                },
+            );
+            run_setup(&setup_score, &topology).await?;
+        }
+    }
+    println!("✅ device '{device_id}' enrolled");
+    Ok(())
+}
+
+#[cfg(feature = "vm-rehearsal")]
+async fn boot_rehearsal_vm(cli: &Cli) -> Result<String> {
+    boot_vm(cli, RehearsalImage::Ubuntu).await
+}
+
+#[cfg(feature = "vm-rehearsal")]
+async fn boot_pi_rehearsal_vm(cli: &Cli) -> Result<String> {
+    boot_vm(cli, RehearsalImage::DebianTrixie).await
+}
+
+#[cfg(feature = "vm-rehearsal")]
+#[derive(Debug, Clone, Copy)]
+enum RehearsalImage {
+    Ubuntu,
+    DebianTrixie,
+}
+
+#[cfg(feature = "vm-rehearsal")]
+async fn boot_vm(cli: &Cli, image: RehearsalImage) -> Result<String> {
+    let arch = VmArchitecture::Aarch64;
+    check_fleet_smoke_preflight_for_arch(arch)
+        .await
+        .map_err(|e| anyhow::anyhow!("preflight: {e}"))?;
+    let base_image = match image {
+        RehearsalImage::Ubuntu => {
+            harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch)
+                .await
+                .map_err(|e| anyhow::anyhow!("cloud image: {e}"))?
+        }
+        RehearsalImage::DebianTrixie => {
+            harmony::modules::fleet::ensure_debian_trixie_arm64_cloud_image()
+                .await
+                .map_err(|e| anyhow::anyhow!("debian cloud image: {e}"))?
+        }
+    };
+    let pool = harmony::modules::fleet::ensure_harmony_fleet_pool()
+        .await
+        .map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
+    let ssh = ensure_fleet_ssh_keypair()
+        .await
+        .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
+    let authorized_key = harmony::modules::fleet::read_public_key(&ssh)
+        .await
+        .map_err(|e| anyhow::anyhow!("read ssh pubkey: {e}"))?;
+
+    let executor = init_executor().map_err(|e| anyhow::anyhow!("KVM init: {e}"))?;
+    let vm_host = KvmVirtualMachineHost::new(
+        "kvm-local",
+        executor,
+        pool.name.clone(),
+        pool.path.clone(),
+        base_image,
+    );
+
+    let vm_score = ProvisionVmScore {
+        spec: VirtualMachineSpec {
+            name: cli.vm_name.clone(),
+            architecture: arch,
+            cpus: 2,
+            memory_mib: 2048,
+            disk_size_gb: Some(cli.vm_disk_size_gb),
+            network: cli.vm_network.clone(),
+            first_boot: Some(VmFirstBootConfig {
+                hostname: Some(cli.vm_name.clone()),
+                admin_user: Some(cli.vm_admin_user.clone()),
+                authorized_keys: vec![authorized_key],
+                admin_password: None,
+            }),
+        },
+    };
+
+    use harmony::score::Score;
+    let outcome = Score::<KvmVirtualMachineHost>::create_interpret(&vm_score)
+        .execute(&Inventory::empty(), &vm_host)
+        .await
+        .map_err(|e| anyhow::anyhow!("ProvisionVmScore: {e}"))?;
+    for d in &outcome.details {
+        if let Some(v) = d.strip_prefix("ip=") {
+            return Ok(v.to_string());
+        }
+    }
+    anyhow::bail!("ProvisionVmScore finished without an IP")
+}
+
+async fn run_setup<T>(score: &FleetDeviceSetupScore, topology: &T) -> Result<()>
+where
+    T: harmony::topology::Topology + harmony::topology::LinuxHostConfiguration,
+{
+    use harmony::score::Score;
+    let outcome = Score::<T>::create_interpret(score)
+        .execute(&Inventory::empty(), topology)
+        .await
+        .map_err(|e| anyhow::anyhow!("FleetDeviceSetupScore: {e}"))?;
+    println!("setup outcome: {} ({:?})", outcome.message, outcome.details);
+    Ok(())
+}
+
+/// Validate `device_id` against RFC1123 subdomain rules so the
+/// operator's downstream Device CR upsert can't fail with
+/// `metadata.name: Invalid value`. See
+/// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names
+///
+/// Constraints applied here:
+/// - non-empty, ≤253 chars total
+/// - one or more dot-separated **labels**
+/// - each label: 1-63 chars, lowercase alphanumeric + `-`, must start
+///   AND end with an alphanumeric (no leading/trailing `-`)
+///
+/// We're stricter than just "kube name valid" because the same
+/// device_id is also embedded in NATS subjects via the auth
+/// callout's permission templates — and `_`/uppercase there silently
+/// passes NATS but breaks the kube path. Rejecting upfront beats
+/// debugging from three layers down.
+fn validate_device_id(id: &str) -> Result<()> {
+    if id.is_empty() {
+        anyhow::bail!("device id is empty");
+    }
+    if id.len() > 253 {
+        anyhow::bail!(
+            "device id '{id}' is {len} chars, max 253 (RFC1123 subdomain limit)",
+            len = id.len()
+        );
+    }
+    for label in id.split('.') {
+        validate_dns_label(label).with_context(|| format!("device id '{id}'"))?;
+    }
+    Ok(())
+}
+
+fn validate_dns_label(label: &str) -> Result<()> {
+    if label.is_empty() {
+        anyhow::bail!("empty label (consecutive dots or leading/trailing dot)");
+    }
+    if label.len() > 63 {
+        anyhow::bail!(
+            "label '{label}' is {len} chars, max 63 per RFC1123 label",
+            len = label.len()
+        );
+    }
+    let bytes = label.as_bytes();
+    if !bytes[0].is_ascii_alphanumeric() {
+        anyhow::bail!(
+            "label '{label}' must start with an alphanumeric (got `{}`)",
+            label.chars().next().unwrap()
+        );
+    }
+    if !bytes[bytes.len() - 1].is_ascii_alphanumeric() {
+        anyhow::bail!(
+            "label '{label}' must end with an alphanumeric (got `{}`)",
+            label.chars().last().unwrap()
+        );
+    }
+    for (i, c) in label.chars().enumerate() {
+        let ok = c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-';
+        if !ok {
+            anyhow::bail!(
+                "label '{label}' has invalid char `{c}` at position {i}; \
+                 only lowercase a-z, 0-9, and `-` are allowed (no `_`, no uppercase)"
+            );
+        }
+    }
+    Ok(())
+}
+
+fn parse_ssh_target(target: &str) -> Result<(String, String)> {
+    let rest = target
+        .strip_prefix("ssh://")
+        .context("--target must start with `ssh://` or be `localhost`")?;
+    let (user, host) = rest
+        .split_once('@')
+        .context("--target must be `ssh://user@host`")?;
+    if user.is_empty() || host.is_empty() {
+        anyhow::bail!("--target ssh:// has empty user or host");
+    }
+    Ok((user.to_string(), host.to_string()))
+}
+
+fn parse_labels(raw: &str) -> Result<std::collections::BTreeMap<String, String>> {
+    let mut out = std::collections::BTreeMap::new();
+    for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) {
+        let (k, v) = piece
+            .split_once('=')
+            .ok_or_else(|| anyhow::anyhow!("label '{piece}' missing '='"))?;
+        let k = k.trim();
+        let v = v.trim();
+        if k.is_empty() || v.is_empty() {
+            anyhow::bail!("label '{piece}' has empty key or value");
+        }
+        out.insert(k.to_string(), v.to_string());
+    }
+    if out.is_empty() {
+        anyhow::bail!("--labels must include at least one key=value pair");
+    }
+    Ok(out)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::validate_device_id;
+
+    #[test]
+    fn accepts_simple_labels() {
+        for ok in [
+            "pi",
+            "pi-001",
+            "lab-rehearsal-3",
+            "dev-jg-vm",
+            "a",
+            "0",
+            "fb5310-qm2kpoq",
+            // multi-label subdomain
+            "pi-001.lab-east.fleet",
+        ] {
+            assert!(
+                validate_device_id(ok).is_ok(),
+                "expected '{ok}' to be accepted: {:?}",
+                validate_device_id(ok)
+            );
+        }
+    }
+
+    fn err_chain(e: anyhow::Error) -> String {
+        // anyhow's `.to_string()` only renders the top-level context;
+        // the validator emits the *cause* message (`invalid char …`,
+        // `max 63`, etc.) further down the chain. `{:#}` renders the
+        // full chain joined by `: ` which is what we want to match.
+        format!("{e:#}")
+    }
+
+    #[test]
+    fn rejects_underscore() {
+        // The original `Id::default()` shape that triggered this fix.
+        let err = err_chain(validate_device_id("fb5310_Qm2kPoQ").unwrap_err());
+        assert!(err.contains("invalid char `_`"), "got: {err}");
+    }
+
+    #[test]
+    fn rejects_uppercase() {
+        let err = err_chain(validate_device_id("Pi001").unwrap_err());
+        assert!(err.contains("invalid char"), "got: {err}");
+    }
+
+    #[test]
+    fn rejects_leading_or_trailing_dash() {
+        assert!(validate_device_id("-pi001").is_err());
+        assert!(validate_device_id("pi001-").is_err());
+    }
+
+    #[test]
+    fn rejects_empty() {
+        assert!(validate_device_id("").is_err());
+    }
+
+    #[test]
+    fn rejects_consecutive_dots() {
+        assert!(validate_device_id("a..b").is_err());
+    }
+
+    #[test]
+    fn rejects_too_long_label() {
+        let long = "a".repeat(64);
+        let err = err_chain(validate_device_id(&long).unwrap_err());
+        assert!(err.contains("max 63"), "got: {err}");
+    }
+
+    #[test]
+    fn rejects_too_long_total() {
+        // 4 × (63 + 1) - 1 = 255 chars total; rejects on >253.
+        let segment = "a".repeat(63);
+        let id = [segment.as_str(); 4].join(".");
+        assert!(id.len() > 253);
+        let err = err_chain(validate_device_id(&id).unwrap_err());
+        assert!(err.contains("max 253"), "got: {err}");
+    }
+}
--- a/examples/fleet_e2e_demo/Cargo.toml
+++ b/examples/fleet_e2e_demo/Cargo.toml
@@ -0,0 +1,48 @@
+[package]
+name = "example-fleet-e2e-demo"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "VM-based end-to-end rehearsal: k3d + Zitadel + NATS auth callout + libvirt VM agents + operator → CR → podman → status"
+
+[lib]
+name = "example_fleet_e2e_demo"
+path = "src/lib.rs"
+
+[[bin]]
+name = "fleet-e2e-demo"
+path = "src/main.rs"
+
+[[test]]
+name = "e2e_walking_skeleton"
+path = "tests/e2e_walking_skeleton.rs"
+
+[dependencies]
+harmony = { path = "../../harmony", features = ["kvm"] }
+harmony-k8s = { path = "../../harmony-k8s" }
+harmony_types = { path = "../../harmony_types" }
+example-fleet-auth-callout = { path = "../fleet_auth_callout" }
+harmony-nats-callout = { path = "../../nats/callout" }
+harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
+harmony-fleet-operator = { path = "../../fleet/harmony-fleet-operator" }
+harmony-fleet-deploy = { path = "../../fleet/harmony-fleet-deploy" }
+k3d-rs = { path = "../../k3d" }
+async-nats.workspace = true
+nkeys = "0.4"
+tokio = { workspace = true, features = ["full"] }
+tokio-test.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+anyhow.workspace = true
+log.workspace = true
+env_logger.workspace = true
+tracing.workspace = true
+tracing-subscriber.workspace = true
+futures-util.workspace = true
+k8s-openapi.workspace = true
+kube.workspace = true
+clap = { version = "4", features = ["derive", "env"] }
+directories = "6.0.0"
+tempfile = "3"
+url.workspace = true
--- a/examples/fleet_e2e_demo/RUNBOOK.md
+++ b/examples/fleet_e2e_demo/RUNBOOK.md
@@ -0,0 +1,312 @@
+# Local fleet rehearsal runbook
+
+End-to-end walkthrough of the IoT fleet platform on your laptop:
+k3d-hosted control plane (Zitadel + NATS + auth callout) plus two
+libvirt VMs running the fleet-agent. Mirrors the production topology
+closely enough that you can watch the auth callout flow, the
+JetStream KV traffic, and the per-device permission boundary in a
+real cluster.
+
+This is not the integration-test harness (that runs unattended). It
+is a step-by-step sequence with inspection points in between. Run
+each section, look at what happened, then continue.
+
+## 0. Prerequisites
+
+- Linux host with KVM (the user running the commands in `libvirt` /
+  `kvm` group; check with `id`).
+- `podman`, `qemu-system-x86_64` (and `qemu-system-aarch64` if you
+  pick `--arch aarch64`), `mdbook` (optional), `kubectl`, `nats` CLI
+  (optional, for the manual subscribe step). Most other tooling
+  (k3d, ansible venv, cloud images) is auto-provisioned under
+  `~/.local/share/harmony/`.
+- `/etc/hosts`: `127.0.0.1 sso.fleet.local` so you can hit Zitadel
+  from your browser through the cluster's HTTP_PORT (see
+  `examples/fleet_auth_callout/src/lib.rs` for the constant).
+- Free TCP ports `8080` and `30422` on the host.
+
+Source map for the things you'll inspect:
+
+| Component | File |
+| --- | --- |
+| Bring-up flow | `examples/fleet_e2e_demo/src/lib.rs` |
+| Per-device Zitadel + agent install | same, `provision_device()` |
+| NATS Score (auth-callout mode) | `fleet/harmony-fleet-deploy/src/nats.rs::FleetNatsScore::callout` |
+| Shared agent config schema | `fleet/harmony-fleet-auth/src/agent_config.rs` |
+| Auth callout deployment Score | `harmony/src/modules/nats_auth_callout/mod.rs` |
+| Callout decision logic | `nats/callout/src/handler.rs::decide` |
+| Per-device permissions template | `nats/callout/src/permissions.rs::device_default` |
+| Agent NATS auth (JWT-bearer mint) | `fleet/harmony-fleet-auth/src/credentials.rs` |
+| Agent KV publishers + direct pulse | `fleet/harmony-fleet-agent/src/fleet_publisher.rs` |
+| Walking-skeleton tests | `examples/fleet_e2e_demo/tests/e2e_walking_skeleton.rs` |
+
+The NATS server's helm values are rendered from typed Rust structs
+via `serde_yaml::to_string` (see `FleetNatsScore::values_yaml`),
+not by `format!()` string interpolation. Same with the agent's
+`/etc/fleet-agent/config.toml` — typed `AgentConfig` →
+`toml::to_string` → ConfigMap. Per ADR-023 principle 2 the e2e
+demo composes the same `*Score` types the production deploy uses.
+
+## 1. Provision the VMs
+
+Each VM is one libvirt domain on the default network
+(`192.168.122.0/24`). Run `fleet_vm_setup` once per VM. Pass
+`--only-vm` so it stops at the cloud-init step (the agent install
+happens later from the e2e bring-up — keeps the two phases legible).
+
+```bash
+# VM 0
+cargo run --release -p example-fleet-vm-setup -- \
+  --arch aarch64 \
+  --vm-name vm-device-00 \
+  --only-vm
+
+# VM 1
+cargo run --release -p example-fleet-vm-setup -- \
+  --arch aarch64 \
+  --vm-name vm-device-01 \
+  --only-vm
+```
+
+Use `--arch x86_64` for native KVM speed; `aarch64` runs under
+qemu-system-aarch64 TCG emulation on x86_64 hosts and is slower but
+matches Pi targets.
+
+**Inspect:**
+
+```bash
+virsh list --all
+virsh domifaddr vm-device-00
+virsh domifaddr vm-device-01
+```
+
+Note the IPs — you'll pass them in step 2. Confirm SSH works:
+
+```bash
+ssh -i ~/.local/share/harmony/fleet/ssh/id_ed25519 \
+    fleet-admin@<vm0-ip> uptime
+```
+
+The keypair lives under `~/.local/share/harmony/fleet/ssh/`,
+generated on first run.
+
+## 2. Bring up the control-plane stack
+
+This single command does everything: k3d cluster, Zitadel,
+ZitadelSetupScore (project + roles + 2 device machine users +
+`fleet-ops` admin), NATS with `auth_callout`, callout image build &
+sideload, callout Deployment, and finally `FleetDeviceSetupScore`
+over SSH for each VM (packages, agent binary, JWT keyfile,
+systemd unit).
+
+```bash
+FLEET_E2E_VM_0_IP=<vm0-ip> FLEET_E2E_VM_1_IP=<vm1-ip> \
+  cargo run --release -p example-fleet-e2e-demo -- --num-devices 2
+```
+
+The bring-up logs each step as `[e2e-demo X/9]`. Read along with
+`examples/fleet_e2e_demo/src/lib.rs::bring_up_full_stack` to see
+what's happening at each line. Stops at `STACK READY` and waits on
+Ctrl-C (the cluster stays up after Ctrl-C — this is just the
+foreground holder).
+
+**Inspect:**
+
+```bash
+export KUBECONFIG=$(k3d kubeconfig write fleet-auth-callout)
+
+# All workloads up?
+kubectl get pods -n fleet-system
+kubectl get pods -n zitadel
+
+# Callout config the deployment is using:
+kubectl get deployment -n fleet-system fleet-callout \
+  -o jsonpath='{.spec.template.spec.containers[0].env}' | jq
+```
+
+Open Zitadel in the browser: <http://sso.fleet.local:8080/ui/console>
+(login with `root@zitadel.local` / the bootstrap password printed
+during step `[e2e-demo 3/9]`). Click into the `fleet` project →
+`Users` to see the two `device-vm-device-0X` machine users with
+`device` role grants and the `fleet-ops` admin.
+
+## 3. Watch the auth callout in action
+
+The callout is the security boundary: every NATS connect attempt
+hits `$SYS.REQ.USER.AUTH`, the callout validates the Zitadel JWT
+in `connect_opts.auth_token`, applies the decision tree in
+`nats/callout/src/handler.rs::decide`, and signs back a user JWT
+with role-scoped permissions.
+
+Tail it while the agents reconnect:
+
+```bash
+kubectl logs -n fleet-system -l app=fleet-callout -f
+```
+
+You'll see one set of lines per (re)connect:
+
+```
+received auth callout request user_nkey=U…
+Zitadel JWT validated, generating user JWT device_id=vm-device-00 role=device
+sending auth response
+```
+
+The `device_id` field is the value AFTER `device_id_prefix_strip`
+runs (Zitadel emits `client_id=device-vm-device-00`; the callout
+strips `device-` so permissions are interpolated against the bare
+device id the agent uses for KV keys). See
+`nats/callout/src/zitadel.rs::extract_device_id` for the strip.
+
+**Force a reconnect to make a callout fire on demand:**
+
+```bash
+ssh -i ~/.local/share/harmony/fleet/ssh/id_ed25519 \
+    fleet-admin@<vm0-ip> 'sudo systemctl restart fleet-agent'
+```
+
+Watch the callout pod log emit one fresh request/response.
+
+## 4. Watch the agent
+
+```bash
+ssh -i ~/.local/share/harmony/fleet/ssh/id_ed25519 \
+    fleet-admin@<vm0-ip> 'sudo journalctl -u fleet-agent -f'
+```
+
+What good looks like, in order:
+
+| Log line | Where it comes from |
+| --- | --- |
+| `minted fresh Zitadel access token audience=…` | `credentials.rs::zitadel_mint` — RFC 7523 JWT-bearer flow, signed with the per-device machine key under `/etc/fleet-agent/zitadel-key.json` |
+| `connected successfully server=4222` | NATS accepted the JWT minted by the callout |
+| `fleet publisher ready` | KV buckets opened; `device-info` write succeeded |
+| `watching KV keys filter=vm-device-00.>` | desired-state subscriber is up |
+
+Absence of `Permissions Violation` lines is the success signal —
+those mean the JWT's perms don't match what the agent tried to
+publish (you'd hit them if `device_id_prefix_strip` were
+misconfigured, for example).
+
+## 5. Observe fleet traffic as admin
+
+The harness mints a `fleet-ops` admin machine user with the
+`fleet-admin` role; the callout maps that role to
+`pub/sub allow: [">"]`. The integration test
+`admin_jwt_reads_any_device_subject` exercises this — easiest path
+to see it live is to run it with output. The test is
+`#[ignore]`d on `cargo test` so a developer box doesn't burn a
+10-minute Zitadel bring-up by accident; `--ignored` opts in:
+
+```bash
+FLEET_E2E_VM_0_IP=<vm0-ip> FLEET_E2E_VM_1_IP=<vm1-ip> \
+  cargo test -p example-fleet-e2e-demo \
+    --test e2e_walking_skeleton \
+    admin_jwt_reads_any_device_subject \
+    -- --test-threads=1 --nocapture --ignored
+```
+
+It subscribes admin to `device-state.>` (the direct, non-JetStream
+fan-out subject the agent emits a pulse on every 30s — see
+`fleet_publisher.rs::publish_state_pulse`) and asserts a message
+arrives within 30s.
+
+**Inspect KV state directly** using a bare admin client. The
+underlying mechanism is in
+`examples/fleet_e2e_demo/tests/e2e_walking_skeleton.rs::admin_nats_client`:
+mint a JWT-bearer token from `stack.admin_machine_key`, hand it to
+`async_nats` as `auth_token`. The test
+`both_devices_heartbeat_within_60s` then reads `device-info` keys
+directly:
+
+```rust
+let js = async_nats::jetstream::new(admin);
+let bucket = js.get_key_value(BUCKET_DEVICE_INFO).await?;
+let entry = bucket.entry(&device_info_key("vm-device-00")).await?;
+```
+
+To do it from a shell, port-forward NATS and use the `nats` CLI
+with admin creds — but creds for an auth-callout server take a
+JWT-bearer token, which the `nats` CLI doesn't speak natively;
+running the test is the path of least friction.
+
+## 6. Verify cross-device isolation (currently `#[ignore]`)
+
+`cross_device_isolation_enforced_in_vm` is an empty test marked
+`#[ignore = "requires E2eHandles::device_machine_key plumbing"]`
+in `e2e_walking_skeleton.rs` — the test is a placeholder. The
+plumbing it's waiting on is straightforward: the existing
+`DeviceHandle` struct (`examples/fleet_e2e_demo/src/lib.rs:106`)
+exposes `device_id` + `vm_ip` + `labels` but not the per-device
+Zitadel machine key the test would need to mint a `device`-role
+JWT and try cross-device subjects. `provision_device` already
+creates the key (line ~324, `machine_key_json`) — wiring it through
+into `DeviceHandle.machine_key` and implementing the test body
+(mint JWT-bearer for vm-device-00, sub to
+`device-commands.vm-device-01`, expect `Permissions Violation`)
+is a single follow-up commit. I haven't touched it because nothing
+in this branch's scope required it.
+
+**You can verify the boundary manually right now**, even without
+the test wired up: tail the callout pod, then SSH onto vm-device-00
+and run the agent with a tampered config that points it at
+vm-device-01's keyfile. The callout will issue a JWT for
+`vm-device-01` (because the JWT-bearer assertion is signed with
+that user's key); the agent on vm-device-00 will then publish on
+`$KV.device-info.info.vm-device-00`, which is NOT in the JWT's
+allow list — NATS rejects with `Permissions Violation`. This is
+the same gate the test would automate.
+
+The permissions template is in
+`nats/callout/src/permissions.rs::device_default` — every allowed
+subject contains `{device_id}` and is interpolated per-request, so
+device A's JWT physically cannot publish to device B's subjects.
+
+## 7. Drive the desired-state loop
+
+(Not yet covered by a walking-skeleton test, but the agent's
+reconciler is wired and observable.) From an admin client, write a
+desired state for vm-device-00:
+
+```rust
+// pseudocode — see harmony-reconciler-contracts for the exact types
+let kv = jetstream.create_key_value(kv::Config {
+    bucket: BUCKET_DESIRED_STATE.into(),
+    history: 1,
+    ..Default::default()
+}).await?;
+kv.put(
+    &desired_state_key("vm-device-00", &dn("hello-web")),
+    payload.into(),
+).await?;
+```
+
+What happens, observable from the agent's journal:
+
+1. Agent's KV watcher (filter `vm-device-00.>`) fires.
+2. Reconciler computes the diff and runs the podman create.
+3. `write_deployment_state(&state)` fires:
+   - puts `state.vm-device-00.hello-web` into the `device-state`
+     KV bucket (operator-side watch picks it up)
+   - publishes the same payload on direct subject
+     `device-state.vm-device-00` (admin observers see it live)
+
+You can subscribe to the latter with admin and watch reconcile
+events stream in real time.
+
+## 8. Teardown
+
+The cluster persists across runs (re-running `fleet_e2e_demo`
+converges drift, doesn't recreate). When you want a clean slate:
+
+```bash
+k3d cluster delete fleet-auth-callout
+
+virsh destroy vm-device-00; virsh undefine vm-device-00 --remove-all-storage
+virsh destroy vm-device-01; virsh undefine vm-device-01 --remove-all-storage
+```
+
+Cached assets (cloud images, k3d binary, ansible venv, SSH key,
+fleet secrets) live under `~/.local/share/harmony/` and survive
+cluster/VM destruction by design — first run after a clean reuses
+them.
--- a/examples/fleet_e2e_demo/src/lib.rs
+++ b/examples/fleet_e2e_demo/src/lib.rs
@@ -0,0 +1,831 @@
+//! VM-based end-to-end rehearsal of the customer demo flow.
+//!
+//! Goal: prove the JWT-auth chain works on a real-system agent
+//! before pointing the demo at OKD. See
+//! `ROADMAP/fleet_platform/v0_demo_e2e.md` for the full plan.
+//!
+//! Bring-up sequence:
+//! 1. k3d cluster with HTTP + NATS port mappings (re-uses
+//!    fleet_auth_callout's k3d helpers — same cluster name so
+//!    re-runs of either example reuse the same cluster).
+//! 2. Zitadel + Postgres via ZitadelScore.
+//! 3. Wait for Zitadel HTTP and the chart-provisioned `iam-admin-pat`
+//!    secret (the chart's setup job is async).
+//! 4. ZitadelSetupScore for the project + API app + roles + admin
+//!    machine user (no per-device users yet).
+//! 5. NATS with auth_callout block + the callout pod.
+//! 6. For each device i:
+//!    - ZitadelSetupScore minting a per-device machine user with
+//!      the `device` role grant. The JSON keyfile is cached in
+//!      `ZitadelClientConfig` and read back here for the agent.
+//!    - libvirt VM via `ProvisionVmScore`.
+//!    - SSH-inject `/etc/hosts` so the VM resolves
+//!      `sso.fleet.local` to the libvirt host.
+//!    - `FleetDeviceSetupScore` with `FleetDeviceAuth::ZitadelJwt`
+//!      pointing at the dropped keyfile.
+//!
+//! Tests in `tests/e2e_walking_skeleton.rs` share a single bring-up
+//! via `OnceCell` and exercise: heartbeats, label-selector targeting,
+//! status reflect-back, env+volume propagation, admin cross-device
+//! read, per-device isolation, NATS-pod-restart reconnect.
+
+use std::path::PathBuf;
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use example_fleet_auth_callout::{
+    ADMIN_ROLE_KEY, API_APP_NAME, CALLOUT_DEPLOYMENT_NAME, CALLOUT_IMAGE_TAG, DEVICE_ROLE_KEY,
+    FLEET_NAMESPACE, HTTP_PORT, NATS_ACCOUNT, NATS_AUTH_PASS, NATS_AUTH_USER, NATS_NAMESPACE,
+    NATS_NODE_PORT, NATS_RELEASE, PROJECT_NAME, ZITADEL_HOST, build_and_load_callout_image,
+    create_k3d, create_topology, deploy_zitadel, ensure_issuer_seed, wait_for_callout_ready,
+    wait_for_zitadel_ready,
+};
+use harmony::inventory::Inventory;
+use harmony::modules::fleet::{
+    FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore, HostsEntry,
+    ensure_fleet_ssh_keypair,
+};
+use harmony::modules::k8s::coredns::{CoreDNSRewrite, CoreDNSRewriteScore};
+use harmony::modules::linux::{LinuxHostTopology, SshCredentials, ensure_ansible_venv};
+use harmony::modules::nats_auth_callout::NatsAuthCalloutScore;
+use harmony::modules::zitadel::{
+    MachineKeyType, ZitadelApiApp, ZitadelClientConfig, ZitadelMachineUser, ZitadelRole,
+    ZitadelSetupScore,
+};
+use harmony::score::Score;
+use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
+use harmony_fleet_deploy::FleetNatsScore;
+use harmony_types::id::Id;
+use log::{info, warn};
+use nkeys::KeyPair;
+
+// ---- constants -------------------------------------------------------------
+
+/// Libvirt's default NAT gateway. The host's IP from inside any VM
+/// attached to the `default` libvirt network. We bake this in because
+/// every smoke-a* harness assumes it; if a customer runs their own
+/// libvirt with a different bridge they can override via env.
+pub const DEFAULT_LIBVIRT_HOST_IP: &str = "192.168.122.1";
+
+pub const ADMIN_USERNAME: &str = "fleet-ops";
+/// Separate machine user for the in-cluster operator. Distinct from
+/// `fleet-ops` (manual admin tooling) so the audit trail can tell
+/// operator-driven actions apart from human operator actions. Same
+/// `fleet-admin` role grant — only the identity differs.
+pub const OPERATOR_USERNAME: &str = "fleet-operator";
+pub const OPERATOR_IMAGE_TAG: &str = "localhost/harmony-fleet-operator:dev";
+
+/// Per-device username convention: `device-${device_id}`. Matches what
+/// `fleet_rpi_setup` produces, so callout's `device_id_claim =
+/// "client_id"` extracts the device id verbatim from the `client_id`
+/// claim Zitadel emits in machine-user access tokens.
+pub fn device_username(device_id: &str) -> String {
+    format!("device-{device_id}")
+}
+
+// ---- options + handles -----------------------------------------------------
+
+#[derive(Debug, Clone)]
+pub struct E2eDemoOpts {
+    /// Number of VM-as-device agents to provision.
+    pub num_devices: usize,
+    /// Path to the cross-compiled `fleet-agent` binary uploaded to
+    /// each VM. Defaults to `target/release/fleet-agent` (the same
+    /// path that smoke-a4 produces).
+    pub agent_binary: PathBuf,
+    /// Override for the libvirt host IP (the address VMs see as the
+    /// gateway). Defaults to [`DEFAULT_LIBVIRT_HOST_IP`].
+    pub libvirt_host_ip: String,
+}
+
+impl Default for E2eDemoOpts {
+    fn default() -> Self {
+        Self {
+            num_devices: 2,
+            agent_binary: workspace_target_path("release/harmony-fleet-agent"),
+            libvirt_host_ip: DEFAULT_LIBVIRT_HOST_IP.to_string(),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct DeviceHandle {
+    pub index: usize,
+    pub device_id: String,
+    pub vm_ip: String,
+    pub labels: std::collections::BTreeMap<String, String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct E2eHandles {
+    pub cluster_name: String,
+    pub nats_url_external: String,
+    pub zitadel_url: String,
+    pub project_id: String,
+    pub issuer_pubkey: String,
+    pub admin_machine_key: String,
+    pub devices: Vec<DeviceHandle>,
+}
+
+// ---- bring up --------------------------------------------------------------
+
+pub async fn bring_up_full_stack(opts: E2eDemoOpts) -> Result<E2eHandles> {
+    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init();
+
+    info!("[e2e-demo 1/9] ensuring k3d cluster");
+    let k3d = create_k3d();
+    k3d.ensure_installed()
+        .await
+        .map_err(|e| anyhow::anyhow!("k3d ensure: {e}"))?;
+    let topology = create_topology(&k3d);
+    topology.ensure_ready().await.context("topology init")?;
+
+    info!("[e2e-demo 2/9] deploying Zitadel (cold start: ~5 min)");
+    deploy_zitadel(&topology).await?;
+
+    info!("[e2e-demo 3/9] CoreDNS rewrite + waiting for Zitadel HTTP + iam-admin-pat secret");
+    CoreDNSRewriteScore {
+        rewrites: vec![CoreDNSRewrite {
+            hostname: ZITADEL_HOST.to_string(),
+            target: "zitadel.zitadel.svc.cluster.local".to_string(),
+        }],
+    }
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("CoreDNSRewriteScore")?;
+    wait_for_zitadel_ready().await?;
+    wait_for_iam_admin_pat_secret(&topology).await?;
+
+    info!("[e2e-demo 4/9] provisioning project, API app, roles, admin machine user");
+    let admin_setup = ZitadelSetupScore {
+        host: ZITADEL_HOST.to_string(),
+        scheme: Default::default(),
+        port: None,
+        skip_tls: true,
+        endpoint: Some(format!("http://127.0.0.1:{HTTP_PORT}")),
+        admin_org_id: None,
+        namespace: "zitadel".to_string(),
+        applications: vec![],
+        api_apps: vec![ZitadelApiApp {
+            project_name: PROJECT_NAME.to_string(),
+            app_name: API_APP_NAME.to_string(),
+        }],
+        roles: vec![
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: ADMIN_ROLE_KEY.to_string(),
+                display_name: "Fleet Admin".to_string(),
+                group: None,
+            },
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: DEVICE_ROLE_KEY.to_string(),
+                display_name: "Device".to_string(),
+                group: None,
+            },
+        ],
+        machine_users: vec![
+            ZitadelMachineUser {
+                username: ADMIN_USERNAME.to_string(),
+                name: "Fleet Operations".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![ADMIN_ROLE_KEY.to_string()],
+            },
+            // Separate machine user for the in-cluster operator pod.
+            // Same `fleet-admin` role grant as the manual admin
+            // identity, but distinct username so JWT `client_id` lets
+            // log analysis tell operator-driven actions apart from
+            // human operator actions.
+            ZitadelMachineUser {
+                username: OPERATOR_USERNAME.to_string(),
+                name: "Fleet Operator (in-cluster)".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![ADMIN_ROLE_KEY.to_string()],
+            },
+        ],
+    };
+    admin_setup
+        .interpret(&Inventory::autoload(), &topology)
+        .await
+        .context("admin ZitadelSetupScore")?;
+
+    let zcfg = ZitadelClientConfig::load()
+        .context("ZitadelSetupScore did not produce a client config cache")?;
+    let project_id = zcfg
+        .project_id_by_name(PROJECT_NAME)
+        .or(zcfg.project_id.as_ref())
+        .context("project_id missing from cache")?
+        .clone();
+    let admin_machine_key = zcfg
+        .machine_key(ADMIN_USERNAME)
+        .context("admin machine key missing from cache")?
+        .clone();
+
+    info!("[e2e-demo 5/9] generating issuer NKey, deploying NATS with auth_callout");
+    let issuer_seed = ensure_issuer_seed(&topology).await?;
+    let issuer_kp = KeyPair::from_seed(&issuer_seed)
+        .map_err(|e| anyhow::anyhow!("invalid persisted issuer seed: {e}"))?;
+    let issuer_pubkey = issuer_kp.public_key();
+
+    // Per ADR-023 principle 2 — e2e uses the same Scores as production.
+    // `FleetNatsScore::callout` renders the auth-callout values block
+    // typed (serde_yaml) rather than the legacy `render_nats_values`
+    // string interpolation. Same upstream chart, same wire format,
+    // schema-checked at compile time.
+    FleetNatsScore::callout(
+        NATS_NAMESPACE,
+        NATS_NODE_PORT as u16,
+        &issuer_pubkey,
+        NATS_ACCOUNT,
+        NATS_AUTH_USER,
+        NATS_AUTH_PASS,
+    )
+    .release_name(NATS_RELEASE)
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("NATS deploy")?;
+
+    info!("[e2e-demo 6/9] building + sideloading callout image into k3d");
+    build_and_load_callout_image(&k3d).await?;
+
+    info!("[e2e-demo 7/9] deploying NatsAuthCalloutScore");
+    let mut callout = NatsAuthCalloutScore::new(
+        CALLOUT_DEPLOYMENT_NAME,
+        FLEET_NAMESPACE,
+        format!("nats://{NATS_RELEASE}.{NATS_NAMESPACE}.svc.cluster.local:4222"),
+        format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+        project_id.clone(),
+        NATS_AUTH_USER,
+        NATS_AUTH_PASS,
+        issuer_seed.clone(),
+    )
+    .image(CALLOUT_IMAGE_TAG)
+    .target_account(NATS_ACCOUNT)
+    .admin_role(ADMIN_ROLE_KEY)
+    .device_role(DEVICE_ROLE_KEY)
+    .danger_accept_invalid_certs(true);
+    // Same convention as fleet_auth_callout: the username is in the
+    // access token's `client_id` claim. The role claim path is
+    // project-scoped because the JWT-bearer flow requests project
+    // audience scope.
+    callout.device_id_claim = "client_id".to_string();
+    // Zitadel's `client_id` for a machine user equals its userName, so a
+    // user created as `device-vm-device-00` (the convention shared with
+    // fleet_rpi_setup and fleet_auth_callout) lands in the JWT verbatim.
+    // Strip the `device-` prefix so the callout interpolates permissions
+    // against the bare device id (`vm-device-00`) the agent uses for KV
+    // keys + direct subjects.
+    callout.device_id_prefix_strip = "device-".to_string();
+    callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles");
+    callout
+        .interpret(&Inventory::autoload(), &topology)
+        .await
+        .context("callout deploy")?;
+    wait_for_callout_ready(&topology).await?;
+
+    info!("[e2e-demo 8/10] building + sideloading operator image into k3d");
+    build_and_load_operator_image(&k3d).await?;
+
+    info!("[e2e-demo 9/10] deploying fleet operator with Zitadel JWT auth");
+    let operator_machine_key = zcfg
+        .machine_key(OPERATOR_USERNAME)
+        .with_context(|| format!("machine key for {OPERATOR_USERNAME} missing from cache"))?
+        .clone();
+    deploy_operator(&topology, &project_id, &operator_machine_key).await?;
+    wait_for_operator_ready(&topology).await?;
+
+    info!(
+        "[e2e-demo 10/10] provisioning {} VM(s) and onboarding agent(s)",
+        opts.num_devices
+    );
+    let mut devices = Vec::with_capacity(opts.num_devices);
+    for i in 0..opts.num_devices {
+        let handle = provision_device(i, &opts, &topology, &project_id).await?;
+        devices.push(handle);
+    }
+
+    info!(
+        "full stack ready: {} device(s), operator + admin role configured",
+        devices.len()
+    );
+
+    Ok(E2eHandles {
+        cluster_name: example_fleet_auth_callout::CLUSTER_NAME.to_string(),
+        nats_url_external: format!("nats://127.0.0.1:{NATS_NODE_PORT}"),
+        zitadel_url: format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+        project_id,
+        issuer_pubkey,
+        admin_machine_key,
+        devices,
+    })
+}
+
+// ---- per-device provisioning ----------------------------------------------
+
+async fn provision_device(
+    index: usize,
+    opts: &E2eDemoOpts,
+    topology: &K8sAnywhereTopology,
+    project_id: &str,
+) -> Result<DeviceHandle> {
+    let device_id = format!("vm-device-{index:02}");
+    let username = device_username(&device_id);
+    info!("[device {index}] minting Zitadel machine user {username}");
+
+    // Per-device ZitadelSetupScore (search-then-create — running this
+    // for an existing user is a NOOP that just refreshes the cache
+    // entry pointing at the persisted machine key). The keyfile is
+    // re-minted because Zitadel doesn't expose the private half of
+    // an existing key — accept that any prior key drifts to "stale
+    // until expiry" on the previous device installation.
+    let device_setup = ZitadelSetupScore {
+        host: ZITADEL_HOST.to_string(),
+        scheme: Default::default(),
+        port: None,
+        skip_tls: true,
+        endpoint: Some(format!("http://127.0.0.1:{HTTP_PORT}")),
+        admin_org_id: None,
+        namespace: "zitadel".to_string(),
+        applications: vec![],
+        api_apps: vec![],
+        roles: vec![],
+        machine_users: vec![ZitadelMachineUser {
+            username: username.clone(),
+            name: format!("Fleet Device {device_id}"),
+            create_pat: false,
+            machine_key: Some(MachineKeyType::Json),
+            project_name: Some(PROJECT_NAME.to_string()),
+            grant_roles: vec![DEVICE_ROLE_KEY.to_string()],
+        }],
+    };
+    device_setup
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .with_context(|| format!("ZitadelSetupScore for {username}"))?;
+
+    let zcfg = ZitadelClientConfig::load()
+        .context("ZitadelClientConfig disappeared between admin and device setup")?;
+    let machine_key_json = zcfg
+        .machine_key(&username)
+        .with_context(|| format!("machine key for {username} missing from cache"))?
+        .clone();
+
+    // -- VM provisioning would go here. Deferred to keep the harness
+    //    cold-start observable in pieces — the kvm bits (ProvisionVmScore)
+    //    require root + libvirtd + the cloud image. Today the harness
+    //    expects the operator to have provisioned VMs out-of-band (e.g.
+    //    via fleet_vm_setup, or a pre-existing libvirt domain). We read
+    //    the IP from a convention path (see `discover_vm_ip`) so the
+    //    test driver can iterate on the agent path without re-paying VM
+    //    boot every test cycle.
+    //
+    //    Follow-up: fold ProvisionVmScore::ensure_vm here once the
+    //    bring-up has been demonstrated end-to-end at least once.
+    let vm_ip = discover_vm_ip(index)
+        .with_context(|| format!("could not resolve IP for device {index}"))?;
+
+    info!("[device {index}] {device_id} at {vm_ip} — installing agent with Zitadel JWT auth");
+    let labels = build_device_labels(&device_id, index);
+    let agent_score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig {
+        device_id: Id::from(device_id.clone()),
+        labels: labels.clone(),
+        // Agent connects to NATS at the libvirt host's IP via the
+        // NodePort. The libvirt default network NATs the VM through
+        // the host so the host's port mapping is reachable.
+        nats_urls: vec![format!("nats://{}:{NATS_NODE_PORT}", opts.libvirt_host_ip)],
+        auth: FleetDeviceAuth::ZitadelJwt {
+            machine_key_json,
+            // Issuer URL the agent uses MUST match the issuer
+            // string Zitadel returns — Zitadel derives that from
+            // the request's Host header. We hit Zitadel via the
+            // host's port mapping, so the agent's URL is
+            // `http://sso.fleet.local:<host-port>`. The /etc/hosts
+            // entry below points sso.fleet.local at the libvirt
+            // host so the VM resolves it.
+            oidc_issuer_url: format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+            audience: project_id.to_string(),
+            // Local rehearsal hits Zitadel over plain HTTP through
+            // the cluster ingress; no TLS validation needed.
+            danger_accept_invalid_certs: true,
+        },
+        agent_binary_path: opts.agent_binary.clone(),
+        hosts_entries: vec![HostsEntry {
+            ip: opts.libvirt_host_ip.clone(),
+            hostname: ZITADEL_HOST.to_string(),
+        }],
+    });
+
+    // Apply the score over SSH against the VM. Same pattern as
+    // fleet_rpi_setup, but synthesized inline so the harness can drive
+    // multiple VMs in sequence without copying the CLI plumbing.
+    apply_fleet_setup_to_vm(index, &vm_ip, agent_score).await?;
+
+    Ok(DeviceHandle {
+        index,
+        device_id,
+        vm_ip,
+        labels,
+    })
+}
+
+async fn apply_fleet_setup_to_vm(
+    index: usize,
+    vm_ip: &str,
+    score: FleetDeviceSetupScore,
+) -> Result<()> {
+    ensure_ansible_venv()
+        .await
+        .map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?;
+    let ssh = ensure_fleet_ssh_keypair()
+        .await
+        .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
+    let ip = vm_ip
+        .parse()
+        .with_context(|| format!("VM IP '{vm_ip}' is not a valid IP address"))?;
+    let creds = SshCredentials {
+        // Matches the cloud-init admin user that fleet_vm_setup +
+        // smoke-a4 create. If the operator overrode that during
+        // out-of-band VM provisioning, follow-up: thread the
+        // username through E2eDemoOpts.
+        user: "fleet-admin".to_string(),
+        private_key_path: ssh.private_key.clone(),
+        remote_python: Some("/usr/bin/python3".to_string()),
+        sudo_password: None,
+    };
+    let topology = LinuxHostTopology::new(format!("vm-device-{index:02}"), ip, creds);
+    use harmony::score::Score;
+    score
+        .create_interpret()
+        .execute(&Inventory::empty(), &topology)
+        .await
+        .with_context(|| format!("FleetDeviceSetupScore against VM {index} ({vm_ip})"))?;
+    Ok(())
+}
+
+fn build_device_labels(
+    device_id: &str,
+    index: usize,
+) -> std::collections::BTreeMap<String, String> {
+    // Two devices, two distinct group labels by default — lets
+    // selector tests target "exactly one device". Label scheme
+    // matches the demo runbook.
+    let mut labels = std::collections::BTreeMap::new();
+    labels.insert(
+        "group".to_string(),
+        if index == 0 {
+            "group-a".to_string()
+        } else {
+            "group-b".to_string()
+        },
+    );
+    labels.insert("arch".to_string(), std::env::consts::ARCH.to_string());
+    labels.insert("role".to_string(), "rehearsal".to_string());
+    labels.insert("device-id".to_string(), device_id.to_string());
+    labels
+}
+
+fn discover_vm_ip(index: usize) -> Result<String> {
+    // Convention: a `FLEET_E2E_VM_<i>_IP` env var points at the
+    // pre-provisioned VM's IP. This keeps the harness usable on a
+    // workstation where the operator runs `fleet_vm_setup` once per
+    // device out-of-band, then re-runs the e2e harness against the
+    // already-booted VMs.
+    let key = format!("FLEET_E2E_VM_{index}_IP");
+    std::env::var(&key)
+        .with_context(|| format!("set {key} to the libvirt VM's IP (default network)"))
+}
+
+// ---- iam-admin-pat readiness ----------------------------------------------
+
+/// Wait for the Zitadel chart's setup job to write the `iam-admin-pat`
+/// secret. The Helm release reports Ready before the job completes,
+/// so calling ZitadelSetupScore immediately after Zitadel deploy
+/// races. ZitadelSetupScore itself reads this secret to authenticate
+/// to the management API.
+async fn wait_for_iam_admin_pat_secret(topology: &K8sAnywhereTopology) -> Result<()> {
+    use k8s_openapi::api::core::v1::Secret;
+    let k8s = topology
+        .k8s_client()
+        .await
+        .map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
+    for attempt in 1..=120 {
+        if let Some(secret) = k8s
+            .get_resource::<Secret>("iam-admin-pat", Some("zitadel"))
+            .await?
+            && let Some(data) = secret.data
+            && data.contains_key("pat")
+        {
+            return Ok(());
+        }
+        if attempt % 10 == 0 {
+            warn!("iam-admin-pat secret not yet present in zitadel ns ({attempt}/120)");
+        }
+        tokio::time::sleep(Duration::from_secs(1)).await;
+    }
+    anyhow::bail!(
+        "timed out waiting for iam-admin-pat secret in 'zitadel' namespace — \
+         is FirstInstance.Org.Machine.Pat configured in ZitadelScore Helm values?"
+    )
+}
+
+// ---- operator deploy -------------------------------------------------------
+
+const OPERATOR_NAMESPACE: &str = FLEET_NAMESPACE;
+
+/// k3d's data directory under `$XDG_DATA_HOME`. Mirrors
+/// `example_fleet_auth_callout::data_dir` (the latter is private —
+/// duplicated here rather than re-exported so the operator wiring is
+/// self-contained).
+fn k3d_data_dir() -> PathBuf {
+    directories::BaseDirs::new()
+        .map(|dirs| dirs.data_dir().join("harmony").join("k3d"))
+        .unwrap_or_else(|| PathBuf::from("/tmp/harmony"))
+}
+
+/// Build the operator's release binary, package it into an OCI image,
+/// and sideload into the k3d cluster. Mirrors
+/// `build_and_load_callout_image`. The Dockerfile lives in the
+/// operator crate.
+async fn build_and_load_operator_image(k3d: &k3d_rs::K3d) -> Result<()> {
+    use std::process::Stdio;
+
+    let workspace_root = std::env::var("CARGO_MANIFEST_DIR")
+        .map(|d| PathBuf::from(d).join("..").join(".."))
+        .unwrap_or_else(|_| PathBuf::from("."));
+    let workspace_root = workspace_root.canonicalize().unwrap_or(workspace_root);
+
+    info!("cargo build --release -p harmony-fleet-operator");
+    let status = tokio::process::Command::new("cargo")
+        .args(["build", "--release", "-p", "harmony-fleet-operator"])
+        .current_dir(&workspace_root)
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("cargo build for fleet operator failed");
+    }
+
+    // Stage the binary + Dockerfile into a clean temp dir so podman
+    // build doesn't drag the whole target/ tree across.
+    let ctx = tempfile::tempdir()?;
+    let bin_dst = ctx.path().join("target/release");
+    std::fs::create_dir_all(&bin_dst)?;
+    std::fs::copy(
+        workspace_root.join("target/release/harmony-fleet-operator"),
+        bin_dst.join("harmony-fleet-operator"),
+    )
+    .context("staging operator binary into build context")?;
+    let dockerfile_src = workspace_root.join("fleet/harmony-fleet-operator/Dockerfile");
+    if !dockerfile_src.exists() {
+        anyhow::bail!(
+            "missing fleet/harmony-fleet-operator/Dockerfile — operator image staging \
+             expects it next to Cargo.toml; either add it or update the bring-up."
+        );
+    }
+    std::fs::copy(&dockerfile_src, ctx.path().join("Dockerfile"))?;
+
+    info!("podman build → {OPERATOR_IMAGE_TAG}");
+    let status = tokio::process::Command::new("podman")
+        .args(["build", "-q", "-t", OPERATOR_IMAGE_TAG, "."])
+        .current_dir(ctx.path())
+        .stderr(Stdio::inherit())
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("podman build for operator failed");
+    }
+
+    let tar_path =
+        std::env::temp_dir().join(format!("harmony-operator-image-{}.tar", std::process::id()));
+    let _ = std::fs::remove_file(&tar_path);
+    let status = tokio::process::Command::new("podman")
+        .args(["save", "-o", tar_path.to_str().unwrap(), OPERATOR_IMAGE_TAG])
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("podman save for operator failed");
+    }
+    info!("k3d image import {OPERATOR_IMAGE_TAG}");
+    let cluster_name = k3d
+        .cluster_name()
+        .unwrap_or(example_fleet_auth_callout::CLUSTER_NAME)
+        .to_string();
+    let tar_path_str = tar_path.to_str().unwrap().to_string();
+    let cluster_for_blocking = cluster_name.clone();
+    let data_dir = k3d_data_dir();
+    tokio::task::spawn_blocking(move || {
+        k3d_rs::K3d::new(data_dir, Some(cluster_for_blocking.clone())).run_k3d_command([
+            "image",
+            "import",
+            tar_path_str.as_str(),
+            "-c",
+            cluster_for_blocking.as_str(),
+        ])
+    })
+    .await?
+    .map_err(|e| anyhow::anyhow!("k3d image import failed: {e}"))?;
+    let _ = std::fs::remove_file(&tar_path);
+    Ok(())
+}
+
+/// Apply the operator's CRDs + ServiceAccount + ClusterRole +
+/// ClusterRoleBinding + Secret + Deployment via Harmony's
+/// K8sResourceScore. The Secret carries both the `[credentials]` TOML
+/// (consumed by the operator as `FLEET_OPERATOR_CREDENTIALS_TOML`) and
+/// the Zitadel JSON keyfile that the TOML's `key_path` references.
+async fn deploy_operator(
+    topology: &K8sAnywhereTopology,
+    project_id: &str,
+    operator_machine_key: &str,
+) -> Result<()> {
+    use harmony::modules::fleet::operator::crd::{Deployment as FleetDeployment, Device};
+    use harmony::modules::k8s::resource::K8sResourceScore;
+    use harmony_fleet_deploy::operator::chart::{
+        ChartOptions, OperatorCredentials, RELEASE_NAME, build_cluster_role,
+        build_cluster_role_binding, build_operator_deployment, build_service_account,
+        operator_secret,
+    };
+    use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition;
+    use kube::CustomResourceExt;
+
+    // Render the [credentials] TOML the operator pod consumes via the
+    // FLEET_OPERATOR_CREDENTIALS_TOML env var (sourced from a Secret
+    // key). The Zitadel JSON keyfile is embedded inline under
+    // `key_json`; the operator never sees a file. Triple-quoted TOML
+    // string keeps the JSON's `"`s untouched.
+    let credentials_toml = format!(
+        r#"type = "zitadel-jwt"
+oidc_issuer_url = "http://{host}:{port}"
+audience = "{project_id}"
+danger_accept_invalid_certs = true
+key_json = """
+{key_json}
+"""
+"#,
+        host = ZITADEL_HOST,
+        port = HTTP_PORT,
+        key_json = operator_machine_key,
+    );
+
+    let opts = ChartOptions {
+        output_dir: PathBuf::new(), // unused on this code path
+        image: OPERATOR_IMAGE_TAG.to_string(),
+        image_pull_policy: "IfNotPresent".to_string(),
+        namespace: OPERATOR_NAMESPACE.to_string(),
+        nats_url: format!("nats://{NATS_RELEASE}.{NATS_NAMESPACE}.svc.cluster.local:4222"),
+        log_level: "info,kube_runtime=warn".to_string(),
+        credentials: Some(OperatorCredentials { credentials_toml }),
+    };
+
+    // CRDs first — the operator watches them on startup.
+    let crds: Vec<CustomResourceDefinition> = vec![FleetDeployment::crd(), Device::crd()];
+    K8sResourceScore::<CustomResourceDefinition> {
+        resource: crds,
+        namespace: None,
+    }
+    .interpret(&Inventory::autoload(), topology)
+    .await
+    .context("operator CRD apply")?;
+
+    // RBAC.
+    K8sResourceScore::single(
+        build_service_account(&opts),
+        Some(OPERATOR_NAMESPACE.to_string()),
+    )
+    .interpret(&Inventory::autoload(), topology)
+    .await
+    .context("operator ServiceAccount apply")?;
+
+    K8sResourceScore::single(build_cluster_role(), None)
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("operator ClusterRole apply")?;
+
+    K8sResourceScore::single(build_cluster_role_binding(&opts), None)
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("operator ClusterRoleBinding apply")?;
+
+    // Secret holding both the credentials TOML and the keyfile.
+    let secret = operator_secret(&opts).expect("credentials present in opts");
+    K8sResourceScore::single(secret, Some(OPERATOR_NAMESPACE.to_string()))
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("operator Secret apply")?;
+
+    // Deployment last so it pulls the up-to-date Secret.
+    K8sResourceScore::single(
+        build_operator_deployment(&opts),
+        Some(OPERATOR_NAMESPACE.to_string()),
+    )
+    .interpret(&Inventory::autoload(), topology)
+    .await
+    .context("operator Deployment apply")?;
+
+    info!("operator deployment {OPERATOR_NAMESPACE}/{RELEASE_NAME} applied");
+    Ok(())
+}
+
+async fn wait_for_operator_ready(topology: &K8sAnywhereTopology) -> Result<()> {
+    use harmony_fleet_deploy::operator::chart::RELEASE_NAME;
+    use k8s_openapi::api::apps::v1::Deployment as K8sDeployment;
+    let k8s = topology
+        .k8s_client()
+        .await
+        .map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
+    for attempt in 1..=120 {
+        if let Some(d) = k8s
+            .get_resource::<K8sDeployment>(RELEASE_NAME, Some(OPERATOR_NAMESPACE))
+            .await?
+            && let Some(status) = d.status
+            && status.ready_replicas.unwrap_or(0) >= 1
+        {
+            return Ok(());
+        }
+        if attempt % 10 == 0 {
+            warn!("operator Deployment not yet Ready ({attempt}/120)");
+        }
+        tokio::time::sleep(Duration::from_secs(1)).await;
+    }
+    anyhow::bail!("timed out waiting for operator Deployment to become Ready")
+}
+
+// ---- helpers ---------------------------------------------------------------
+
+fn workspace_target_path(rel: &str) -> PathBuf {
+    let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
+        .map(PathBuf::from)
+        .unwrap_or_else(|_| PathBuf::from("."));
+    manifest_dir.join("..").join("..").join("target").join(rel)
+}
+
+// ---- next-steps panel ------------------------------------------------------
+
+impl E2eHandles {
+    pub fn print_next_steps(&self) {
+        println!();
+        println!("============================================================");
+        println!(" E2E DEMO REHEARSAL — STACK READY");
+        println!("============================================================");
+        println!(" k3d cluster:    {}", self.cluster_name);
+        println!(" Zitadel:        {}", self.zitadel_url);
+        println!(" NATS (host):    {}", self.nats_url_external);
+        println!(" Project ID:     {}", self.project_id);
+        println!(" Issuer pubkey:  {}", self.issuer_pubkey);
+        println!();
+        println!(" Devices ({}):", self.devices.len());
+        for d in &self.devices {
+            let labels: Vec<String> = d.labels.iter().map(|(k, v)| format!("{k}={v}")).collect();
+            println!(
+                "   [{}] {} @ {} ({})",
+                d.index,
+                d.device_id,
+                d.vm_ip,
+                labels.join(",")
+            );
+        }
+        println!();
+        println!(" Run the test suite:");
+        println!();
+        println!("   cargo test -p example-fleet-e2e-demo \\");
+        println!("     --test e2e_walking_skeleton -- --test-threads=1 --nocapture");
+        println!();
+        println!(" Ctrl-C exits without tearing the cluster down — re-run");
+        println!(" the bring-up to converge any drift.");
+        println!("============================================================");
+    }
+}
+
+#[cfg(test)]
+mod unit_tests {
+    use super::*;
+
+    #[test]
+    fn device_username_matches_callout_convention() {
+        // Callout's device_id_claim is `client_id`, which Zitadel
+        // populates from the machine user's username. The test we
+        // run later asserts the agent's per-device subjects match
+        // its device_id, which therefore must equal the username
+        // minus the "device-" prefix the callout knows about.
+        assert_eq!(device_username("vm-device-00"), "device-vm-device-00");
+    }
+
+    #[test]
+    fn device_labels_split_into_distinct_groups() {
+        let l0 = build_device_labels("vm-device-00", 0);
+        let l1 = build_device_labels("vm-device-01", 1);
+        assert_eq!(l0.get("group").unwrap(), "group-a");
+        assert_eq!(l1.get("group").unwrap(), "group-b");
+        assert_ne!(l0.get("group"), l1.get("group"));
+        // Ubiquitous labels: device-id + arch + role on both.
+        for l in [&l0, &l1] {
+            assert!(l.contains_key("device-id"));
+            assert!(l.contains_key("arch"));
+            assert_eq!(l.get("role").unwrap(), "rehearsal");
+        }
+    }
+}
--- a/examples/fleet_e2e_demo/src/main.rs
+++ b/examples/fleet_e2e_demo/src/main.rs
@@ -0,0 +1,51 @@
+//! `cargo run -p example-fleet-e2e-demo -- --num-devices 2 ...`
+//!
+//! Brings up the full E2E rehearsal stack: k3d + Zitadel + NATS auth
+//! callout + per-device Zitadel machine users + (out-of-band)
+//! libvirt VMs + agents authenticating via JWT-bearer.
+//!
+//! See `src/lib.rs` and `ROADMAP/fleet_platform/v0_demo_e2e.md`.
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use example_fleet_e2e_demo::{DEFAULT_LIBVIRT_HOST_IP, E2eDemoOpts, bring_up_full_stack};
+use std::path::PathBuf;
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet-e2e-demo",
+    about = "VM-based end-to-end rehearsal of the fleet platform demo flow"
+)]
+struct Cli {
+    /// Number of VM-as-device agents to bring up. Each one needs its
+    /// own libvirt domain (provisioned out-of-band today via
+    /// `fleet_vm_setup` — see `FLEET_E2E_VM_<i>_IP` env vars below).
+    #[arg(long, default_value_t = 2)]
+    num_devices: usize,
+    /// Path to the cross-compiled `fleet-agent` binary uploaded to
+    /// each VM. Same binary that smoke-a4 produces.
+    #[arg(long, default_value = "target/release/harmony-fleet-agent")]
+    agent_binary: PathBuf,
+    /// Override for the libvirt host IP (the address VMs see as the
+    /// gateway). Defaults to the libvirt default network's gateway.
+    #[arg(long, default_value = DEFAULT_LIBVIRT_HOST_IP)]
+    libvirt_host_ip: String,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let cli = Cli::parse();
+    let handles = bring_up_full_stack(E2eDemoOpts {
+        num_devices: cli.num_devices,
+        agent_binary: cli.agent_binary,
+        libvirt_host_ip: cli.libvirt_host_ip,
+    })
+    .await
+    .context("bring_up_full_stack")?;
+    handles.print_next_steps();
+
+    println!();
+    println!(" Press Ctrl-C to exit (cluster keeps running).");
+    tokio::signal::ctrl_c().await?;
+    Ok(())
+}
--- a/examples/fleet_e2e_demo/tests/e2e_walking_skeleton.rs
+++ b/examples/fleet_e2e_demo/tests/e2e_walking_skeleton.rs
@@ -0,0 +1,161 @@
+//! End-to-end walking-skeleton tests for the VM-based demo rehearsal.
+//!
+//! Shares one bring-up across the whole suite via `OnceCell`. Run
+//! sequentially — they touch shared k3d + libvirt VM state.
+//!
+//! Pre-flight (manual, before `cargo test`):
+//!
+//! - libvirt + qemu installed; default network active.
+//! - Two cloud-init Ubuntu VMs provisioned (e.g. via
+//!   `cargo run -p example_fleet_vm_setup`). Their IPs exported as
+//!   `FLEET_E2E_VM_0_IP` and `FLEET_E2E_VM_1_IP`.
+//! - SSH keypair the VMs trust at `~/.ssh/id_ed25519` (or
+//!   override path; harness reads the standard pair).
+//!
+//! Run:
+//!
+//! ```bash
+//! FLEET_E2E_VM_0_IP=192.168.122.42 \
+//! FLEET_E2E_VM_1_IP=192.168.122.43 \
+//! cargo test -p example-fleet-e2e-demo --test e2e_walking_skeleton \
+//!   -- --test-threads=1 --nocapture
+//! ```
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use async_nats::ConnectOptions;
+use example_fleet_auth_callout::{mint_access_token, scopes_for_project};
+use example_fleet_e2e_demo::{E2eDemoOpts, E2eHandles, bring_up_full_stack};
+use futures_util::StreamExt;
+use tokio::sync::OnceCell;
+
+static STACK: OnceCell<Arc<E2eHandles>> = OnceCell::const_new();
+
+async fn shared_stack() -> Result<Arc<E2eHandles>> {
+    let cell = STACK
+        .get_or_try_init(|| async {
+            let h = bring_up_full_stack(E2eDemoOpts::default()).await?;
+            anyhow::Ok(Arc::new(h))
+        })
+        .await?;
+    Ok(cell.clone())
+}
+
+async fn admin_nats_client(stack: &E2eHandles) -> Result<async_nats::Client> {
+    let token = mint_access_token(
+        &stack.zitadel_url,
+        &stack.admin_machine_key,
+        &scopes_for_project(&stack.project_id),
+    )
+    .await
+    .context("mint admin Zitadel token")?;
+    ConnectOptions::with_token(token)
+        .connection_timeout(Duration::from_secs(5))
+        .connect(&stack.nats_url_external)
+        .await
+        .map_err(|e| anyhow::anyhow!("admin connect: {e}"))
+}
+
+// -- Test 1 -------------------------------------------------------------
+
+/// Each provisioned VM publishes a DeviceInfo within the heartbeat
+/// window. Reads from the `device-info` KV bucket via the admin
+/// client (admin role can subscribe to anything).
+#[tokio::test]
+#[ignore = "requires libvirt VMs + k3d + Zitadel + NATS bring-up — see header"]
+async fn both_devices_heartbeat_within_60s() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+    let admin = admin_nats_client(&stack).await?;
+
+    let js = async_nats::jetstream::new(admin);
+    let bucket = js
+        .get_key_value(harmony_reconciler_contracts::BUCKET_DEVICE_INFO)
+        .await
+        .context("device-info bucket")?;
+
+    let deadline = std::time::Instant::now() + Duration::from_secs(60);
+    let expected: std::collections::HashSet<String> =
+        stack.devices.iter().map(|d| d.device_id.clone()).collect();
+    let mut seen = std::collections::HashSet::new();
+
+    while std::time::Instant::now() < deadline && seen != expected {
+        for d in &stack.devices {
+            let key = harmony_reconciler_contracts::device_info_key(&d.device_id);
+            if let Some(_e) = bucket.entry(&key).await? {
+                seen.insert(d.device_id.clone());
+            }
+        }
+        tokio::time::sleep(Duration::from_millis(500)).await;
+    }
+    assert_eq!(
+        seen, expected,
+        "each provisioned device must publish DeviceInfo within 60s; saw {seen:?}"
+    );
+    Ok(())
+}
+
+// -- Test 5 (admin cross-device read) -----------------------------------
+
+/// The admin's Zitadel JWT carries `fleet-admin` role. Callout maps
+/// that to `pub/sub allow: [">"]`, so subscribing to `device-state.>`
+/// is admitted and observes every device's traffic.
+#[tokio::test]
+#[ignore = "requires libvirt VMs + k3d + Zitadel + NATS bring-up — see header"]
+async fn admin_jwt_reads_any_device_subject() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+    let admin = admin_nats_client(&stack).await?;
+
+    let mut sub = admin.subscribe("device-state.>").await?;
+    admin.flush().await?;
+
+    // Hold the subscription open long enough that any device's
+    // periodic state publication should land. We don't pump traffic
+    // ourselves — the agents themselves publish per-deployment state
+    // on every reconcile tick. If no traffic arrives in 30s it means
+    // either the agents aren't connected or they're not publishing,
+    // both of which are fatal for the demo.
+    let result = tokio::time::timeout(Duration::from_secs(30), sub.next()).await;
+    assert!(
+        result.is_ok() && result.as_ref().unwrap().is_some(),
+        "admin must observe at least one device-state.* message in 30s"
+    );
+    Ok(())
+}
+
+// -- Test 6 (per-device isolation) ---------------------------------------
+
+/// A per-device JWT has subject permissions scoped to its own
+/// `device-state.{device_id}` and `device-commands.{device_id}`. The
+/// callout enforces this; subscribing to a sibling device's commands
+/// must fail at NATS connect-time or at SUB-time.
+///
+/// Skipped here because the per-device JWT minting helper (analogous
+/// to `mint_access_token` but for a `device` role user) needs the
+/// per-device machine key to be plumbed back from `bring_up_full_stack`
+/// through `E2eHandles`. Follow-up commit adds
+/// `E2eHandles::device_machine_key(idx)` so this test can be
+/// implemented without re-running `ZitadelSetupScore` from the test
+/// body.
+#[tokio::test]
+#[ignore = "requires E2eHandles::device_machine_key plumbing"]
+async fn cross_device_isolation_enforced_in_vm() {}
+
+// -- Test 7 (load-bearing reconnect) -------------------------------------
+
+/// Kill the NATS pod, wait for the new one to come up, verify both
+/// agents reconnect with fresh JWTs and resume publishing within
+/// 30 seconds. This is the test that validates the "never lose
+/// connectivity to a device" guarantee under realistic disturbance.
+///
+/// Skipped pending operator install in the harness — without the
+/// operator the agents have no `desired-state` to publish status
+/// against, so verifying "publishing resumed" needs a separate
+/// signal. Follow-up commit observes the agents' periodic
+/// heartbeat publication directly via the device-heartbeat KV.
+#[tokio::test]
+#[ignore = "requires NATS-pod-restart driver and heartbeat-presence assertion"]
+async fn agent_recovers_from_nats_pod_restart() {}
--- a/examples/fleet_load_test/Cargo.toml
+++ b/examples/fleet_load_test/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "example_fleet_load_test"
+version.workspace = true
+edition = "2024"
+license.workspace = true
+
+[[bin]]
+name = "fleet_load_test"
+path = "src/main.rs"
+
+[dependencies]
+harmony = { path = "../../harmony", default-features = false }
+harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
+async-nats = { workspace = true }
+chrono = { workspace = true }
+kube = { workspace = true, features = ["runtime", "derive"] }
+k8s-openapi.workspace = true
+serde_json = { workspace = true }
+tokio = { workspace = true }
+tracing = { workspace = true }
+tracing-subscriber = { workspace = true }
+anyhow = { workspace = true }
+clap = { workspace = true }
+rand = { workspace = true }
--- a/examples/fleet_load_test/src/main.rs
+++ b/examples/fleet_load_test/src/main.rs
@@ -0,0 +1,551 @@
+//! Load test for the IoT operator's `fleet_aggregator`.
+//!
+//! Simulates N devices across M Deployment CRs, each device pushing
+//! a `DeploymentState` update to NATS every `--tick-ms`. Measures
+//! throughput on both sides (devices → NATS and operator → kube
+//! apiserver) and, at the end of the run, verifies each CR's
+//! `.status.aggregate` counters sum to its expected group size (and
+//! that `matched_device_count` equals that size — i.e. every
+//! registered device got picked up by the CR's label selector).
+//!
+//! Assumes an already-running stack:
+//!   - NATS reachable at `--nats-url`
+//!   - k8s cluster with the operator's CRD installed (KUBECONFIG)
+//!   - the operator process running against the same NATS + cluster
+//!
+//! The `fleet/scripts/smoke-a4.sh` script brings all three up — pass
+//! `--hold` to leave them running, then run this binary.
+//!
+//! Typical invocation:
+//!
+//!     cargo run -q -p example_fleet_load_test -- \
+//!         --namespace fleet-load \
+//!         --groups 55,5,5,5,5,5,5,5,5,5 \
+//!         --tick-ms 1000 \
+//!         --duration-s 60
+
+use anyhow::{Context, Result};
+use async_nats::jetstream::{self, kv};
+use chrono::Utc;
+use clap::Parser;
+use harmony::modules::fleet::operator::{Deployment, DeploymentSpec, Rollout, RolloutStrategy};
+use harmony::modules::podman::{PodmanService, PodmanV0Score, ReconcileScore};
+use harmony_reconciler_contracts::{
+    BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE, DeploymentName,
+    DeploymentState, DeviceInfo, HeartbeatPayload, Id, Phase, device_heartbeat_key,
+    device_info_key, device_state_key,
+};
+use k8s_openapi::api::core::v1::Namespace;
+use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
+use kube::Client;
+use kube::api::{Api, DeleteParams, Patch, PatchParams, PostParams};
+use rand::Rng;
+use std::collections::BTreeMap;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::{Duration, Instant};
+use tokio::task::JoinSet;
+
+#[derive(Parser, Debug, Clone)]
+#[command(
+    name = "fleet_load_test",
+    about = "Synthetic load for the IoT operator's fleet_aggregator"
+)]
+struct Cli {
+    /// NATS URL (same one the operator connects to).
+    #[arg(long, default_value = "nats://localhost:4222")]
+    nats_url: String,
+
+    /// k8s namespace for the load-test Deployment CRs. Created if
+    /// missing.
+    #[arg(long, default_value = "fleet-load")]
+    namespace: String,
+
+    /// Group shape — comma-separated device counts, one per CR.
+    /// Default: 100 devices over 10 groups (1 × 55 + 9 × 5).
+    #[arg(long, default_value = "55,5,5,5,5,5,5,5,5,5")]
+    groups: String,
+
+    /// Per-device tick in ms. Each tick publishes one DeploymentState.
+    #[arg(long, default_value_t = 1000)]
+    tick_ms: u64,
+
+    /// Heartbeat cadence in seconds (separate from the state tick).
+    #[arg(long, default_value_t = 30)]
+    heartbeat_s: u64,
+
+    /// Total run duration in seconds before tearing down.
+    #[arg(long, default_value_t = 60)]
+    duration_s: u64,
+
+    /// Report throughput every N seconds.
+    #[arg(long, default_value_t = 5)]
+    report_s: u64,
+
+    /// Keep the CRs + KV entries in place after the run instead of
+    /// deleting them. Useful with HOLD=1 to inspect the steady-state
+    /// aggregate after the load finishes.
+    #[arg(long)]
+    keep: bool,
+}
+
+/// Metrics collected across all device tasks.
+#[derive(Default)]
+struct Counters {
+    state_writes: AtomicU64,
+    heartbeat_writes: AtomicU64,
+    errors: AtomicU64,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    tracing_subscriber::fmt()
+        .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+        .init();
+
+    let cli = Cli::parse();
+    let group_sizes = parse_groups(&cli.groups)?;
+    let total: usize = group_sizes.iter().sum();
+
+    tracing::info!(
+        devices = total,
+        groups = group_sizes.len(),
+        shape = ?group_sizes,
+        tick_ms = cli.tick_ms,
+        duration_s = cli.duration_s,
+        "fleet_load_test starting"
+    );
+
+    // --- NATS setup ----------------------------------------------------------
+    let nc = async_nats::connect(&cli.nats_url)
+        .await
+        .with_context(|| format!("connecting to NATS at {}", cli.nats_url))?;
+    let js = jetstream::new(nc);
+    let info_bucket = open_bucket(&js, BUCKET_DEVICE_INFO).await?;
+    let state_bucket = open_bucket(&js, BUCKET_DEVICE_STATE).await?;
+    let heartbeat_bucket = open_bucket(&js, BUCKET_DEVICE_HEARTBEAT).await?;
+
+    // --- kube setup ----------------------------------------------------------
+    let client = Client::try_default().await.context("kube client")?;
+    ensure_namespace(&client, &cli.namespace).await?;
+    let deployments: Api<Deployment> = Api::namespaced(client.clone(), &cli.namespace);
+
+    // --- plan groups + device ids --------------------------------------------
+    let plan = build_plan(&group_sizes);
+    apply_crs(&deployments, &plan).await?;
+    publish_device_infos(&info_bucket, &plan).await?;
+
+    // --- spawn simulators ----------------------------------------------------
+    let counters = Arc::new(Counters::default());
+    let mut sims = JoinSet::new();
+
+    let tick = Duration::from_millis(cli.tick_ms);
+    let hb_tick = Duration::from_secs(cli.heartbeat_s);
+    for device in &plan.devices {
+        let device = Arc::new(device.clone());
+        sims.spawn(simulate_state_loop(
+            device.clone(),
+            state_bucket.clone(),
+            counters.clone(),
+            tick,
+        ));
+        sims.spawn(simulate_heartbeat_loop(
+            device.clone(),
+            heartbeat_bucket.clone(),
+            counters.clone(),
+            hb_tick,
+        ));
+    }
+
+    // --- metrics reporter ----------------------------------------------------
+    let report_tick = Duration::from_secs(cli.report_s);
+    let reporter_counters = counters.clone();
+    let reporter = tokio::spawn(async move {
+        let mut ticker = tokio::time::interval(report_tick);
+        ticker.tick().await; // skip immediate fire
+        let mut prev_state = 0u64;
+        let mut prev_hb = 0u64;
+        loop {
+            ticker.tick().await;
+            let s = reporter_counters.state_writes.load(Ordering::Relaxed);
+            let h = reporter_counters.heartbeat_writes.load(Ordering::Relaxed);
+            let e = reporter_counters.errors.load(Ordering::Relaxed);
+            let dt = report_tick.as_secs_f64();
+            let ss = (s - prev_state) as f64 / dt;
+            let hh = (h - prev_hb) as f64 / dt;
+            tracing::info!(
+                state_writes_total = s,
+                state_writes_per_s = format!("{ss:.1}"),
+                heartbeats_total = h,
+                heartbeats_per_s = format!("{hh:.1}"),
+                errors = e,
+                "load"
+            );
+            prev_state = s;
+            prev_hb = h;
+        }
+    });
+
+    // --- run for duration ----------------------------------------------------
+    let started = Instant::now();
+    tokio::time::sleep(Duration::from_secs(cli.duration_s)).await;
+    reporter.abort();
+    sims.shutdown().await;
+    let elapsed = started.elapsed();
+
+    let s = counters.state_writes.load(Ordering::Relaxed);
+    let h = counters.heartbeat_writes.load(Ordering::Relaxed);
+    let e = counters.errors.load(Ordering::Relaxed);
+    tracing::info!(
+        elapsed_s = format!("{:.1}", elapsed.as_secs_f64()),
+        state_writes_total = s,
+        state_writes_per_s = format!("{:.1}", s as f64 / elapsed.as_secs_f64()),
+        heartbeats_total = h,
+        errors = e,
+        "run complete"
+    );
+
+    // --- give the aggregator a second to drain --------------------------------
+    tokio::time::sleep(Duration::from_secs(2)).await;
+
+    // --- verify CR status aggregates -----------------------------------------
+    //
+    // With selector-based matching there's a second axis we want to check:
+    // `matched_device_count` must equal the expected group size (selector
+    // actually resolved every registered Device), AND the phase counters
+    // must sum to it.
+    let mut all_ok = true;
+    for group in &plan.groups {
+        let cr = deployments.get(&group.cr_name).await?;
+        let Some(status) = cr.status.as_ref().and_then(|s| s.aggregate.as_ref()) else {
+            tracing::warn!(cr = %group.cr_name, "aggregate missing on CR status");
+            all_ok = false;
+            continue;
+        };
+        let total_reported = status.succeeded + status.failed + status.pending;
+        let expected = group.devices.len() as u32;
+        let ok = status.matched_device_count == expected && total_reported == expected;
+        if !ok {
+            all_ok = false;
+        }
+        tracing::info!(
+            cr = %group.cr_name,
+            expected_devices = expected,
+            matched = status.matched_device_count,
+            succeeded = status.succeeded,
+            failed = status.failed,
+            pending = status.pending,
+            total = total_reported,
+            ok,
+            "cr status"
+        );
+    }
+
+    if !cli.keep {
+        tracing::info!("cleanup: deleting CRs + KV entries");
+        for group in &plan.groups {
+            let _ = deployments
+                .delete(&group.cr_name, &DeleteParams::default())
+                .await;
+        }
+        for device in &plan.devices {
+            let _ = state_bucket
+                .delete(&device_state_key(
+                    &device.device_id,
+                    &DeploymentName::try_new(&device.cr_name).unwrap(),
+                ))
+                .await;
+            let _ = info_bucket
+                .delete(&device_info_key(&device.device_id))
+                .await;
+            let _ = heartbeat_bucket
+                .delete(&device_heartbeat_key(&device.device_id))
+                .await;
+        }
+    }
+
+    if all_ok {
+        tracing::info!("PASS — all CR aggregates match device counts");
+        Ok(())
+    } else {
+        anyhow::bail!("FAIL — at least one CR aggregate did not sum to its target device count")
+    }
+}
+
+fn parse_groups(s: &str) -> Result<Vec<usize>> {
+    let out: Vec<usize> = s
+        .split(',')
+        .map(|t| t.trim().parse::<usize>())
+        .collect::<Result<_, _>>()
+        .context("parsing --groups")?;
+    if out.is_empty() {
+        anyhow::bail!("--groups must have at least one size");
+    }
+    Ok(out)
+}
+
+/// A single simulated device and the CR it belongs to.
+#[derive(Clone)]
+struct DevicePlan {
+    device_id: String,
+    cr_name: String,
+}
+
+#[derive(Clone)]
+struct GroupPlan {
+    cr_name: String,
+    devices: Vec<String>,
+}
+
+struct Plan {
+    devices: Vec<DevicePlan>,
+    groups: Vec<GroupPlan>,
+}
+
+fn build_plan(group_sizes: &[usize]) -> Plan {
+    // CR-name + device-id width scale with group count so large runs
+    // get zero-padded ids that sort sensibly in kubectl.
+    let cr_width = group_sizes.len().to_string().len().max(2);
+    let total: usize = group_sizes.iter().sum();
+    let dev_width = total.to_string().len().max(5);
+
+    let mut devices = Vec::new();
+    let mut groups = Vec::new();
+    let mut next_id = 1usize;
+    for (i, size) in group_sizes.iter().enumerate() {
+        let cr_name = format!("load-group-{i:0cr_width$}");
+        let mut ids = Vec::with_capacity(*size);
+        for _ in 0..*size {
+            let id = format!("load-dev-{next_id:0dev_width$}");
+            next_id += 1;
+            devices.push(DevicePlan {
+                device_id: id.clone(),
+                cr_name: cr_name.clone(),
+            });
+            ids.push(id);
+        }
+        groups.push(GroupPlan {
+            cr_name,
+            devices: ids,
+        });
+    }
+    Plan { devices, groups }
+}
+
+async fn open_bucket(js: &jetstream::Context, bucket: &'static str) -> Result<kv::Store> {
+    Ok(js
+        .create_key_value(kv::Config {
+            bucket: bucket.to_string(),
+            history: 1,
+            ..Default::default()
+        })
+        .await?)
+}
+
+async fn ensure_namespace(client: &Client, name: &str) -> Result<()> {
+    let api: Api<Namespace> = Api::all(client.clone());
+    if api.get_opt(name).await?.is_some() {
+        return Ok(());
+    }
+    let ns = Namespace {
+        metadata: kube::api::ObjectMeta {
+            name: Some(name.to_string()),
+            ..Default::default()
+        },
+        ..Default::default()
+    };
+    match api.create(&PostParams::default(), &ns).await {
+        Ok(_) => Ok(()),
+        Err(kube::Error::Api(ae)) if ae.code == 409 => Ok(()),
+        Err(e) => Err(e.into()),
+    }
+}
+
+async fn apply_crs(api: &Api<Deployment>, plan: &Plan) -> Result<()> {
+    let params = PatchParams::apply("fleet-load-test").force();
+    let started = Instant::now();
+
+    // Cap concurrency so we don't overwhelm the apiserver on large
+    // fleets. 32 in-flight applies is well under typical apiserver
+    // QPS limits and keeps the startup latency predictable.
+    const CONCURRENCY: usize = 32;
+    let mut in_flight: JoinSet<Result<String>> = JoinSet::new();
+    let mut iter = plan.groups.iter();
+
+    for _ in 0..CONCURRENCY {
+        if let Some(group) = iter.next() {
+            in_flight.spawn(apply_one_cr(api.clone(), group.clone(), params.clone()));
+        }
+    }
+    while let Some(res) = in_flight.join_next().await {
+        res??;
+        if let Some(group) = iter.next() {
+            in_flight.spawn(apply_one_cr(api.clone(), group.clone(), params.clone()));
+        }
+    }
+
+    tracing::info!(
+        crs = plan.groups.len(),
+        elapsed_ms = started.elapsed().as_millis() as u64,
+        "applied Deployment CRs"
+    );
+    Ok(())
+}
+
+async fn apply_one_cr(
+    api: Api<Deployment>,
+    group: GroupPlan,
+    params: PatchParams,
+) -> Result<String> {
+    // Selector-based targeting: every Device CR in this group carries
+    // a `group=<cr_name>` label (we publish that on DeviceInfo; the
+    // operator reflects it into Device.metadata.labels).
+    let mut match_labels = BTreeMap::new();
+    match_labels.insert("group".to_string(), group.cr_name.clone());
+
+    let cr = Deployment::new(
+        &group.cr_name,
+        DeploymentSpec {
+            target_selector: LabelSelector {
+                match_labels: Some(match_labels),
+                match_expressions: None,
+            },
+            // Score content doesn't matter — no real agents consume
+            // the desired-state here. The aggregator still writes KV
+            // for each matched device; that's wire noise we accept
+            // as part of the realism.
+            score: ReconcileScore::PodmanV0(PodmanV0Score {
+                services: vec![PodmanService {
+                    name: group.cr_name.clone(),
+                    image: "docker.io/library/nginx:alpine".to_string(),
+                    ports: vec!["8080:80".to_string()],
+                    env: vec![],
+                    volumes: vec![],
+                    restart_policy: Default::default(),
+                }],
+            }),
+            rollout: Rollout {
+                strategy: RolloutStrategy::Immediate,
+            },
+        },
+    );
+    api.patch(&group.cr_name, &params, &Patch::Apply(&cr))
+        .await
+        .with_context(|| format!("applying CR {}", group.cr_name))?;
+    Ok(group.cr_name)
+}
+
+async fn publish_device_infos(bucket: &kv::Store, plan: &Plan) -> Result<()> {
+    let started = Instant::now();
+    const CONCURRENCY: usize = 64;
+    let mut in_flight: JoinSet<Result<()>> = JoinSet::new();
+    let mut iter = plan.devices.iter();
+
+    for _ in 0..CONCURRENCY {
+        if let Some(device) = iter.next() {
+            in_flight.spawn(publish_one_info(bucket.clone(), device.clone()));
+        }
+    }
+    while let Some(res) = in_flight.join_next().await {
+        res??;
+        if let Some(device) = iter.next() {
+            in_flight.spawn(publish_one_info(bucket.clone(), device.clone()));
+        }
+    }
+
+    tracing::info!(
+        devices = plan.devices.len(),
+        elapsed_ms = started.elapsed().as_millis() as u64,
+        "seeded DeviceInfo"
+    );
+    Ok(())
+}
+
+async fn publish_one_info(bucket: kv::Store, device: DevicePlan) -> Result<()> {
+    let info = DeviceInfo {
+        device_id: Id::from(device.device_id.clone()),
+        labels: BTreeMap::from([("group".to_string(), device.cr_name.clone())]),
+        inventory: None,
+        updated_at: Utc::now(),
+    };
+    let key = device_info_key(&device.device_id);
+    let payload = serde_json::to_vec(&info)?;
+    bucket.put(&key, payload.into()).await?;
+    Ok(())
+}
+
+async fn simulate_state_loop(
+    device: Arc<DevicePlan>,
+    bucket: kv::Store,
+    counters: Arc<Counters>,
+    tick: Duration,
+) {
+    let Ok(deployment) = DeploymentName::try_new(&device.cr_name) else {
+        return;
+    };
+    let state_key = device_state_key(&device.device_id, &deployment);
+    let mut ticker = tokio::time::interval(tick);
+    ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
+    loop {
+        ticker.tick().await;
+        let phase = pick_phase();
+        let ds = DeploymentState {
+            device_id: Id::from(device.device_id.clone()),
+            deployment: deployment.clone(),
+            phase,
+            last_event_at: Utc::now(),
+            last_error: matches!(phase, Phase::Failed)
+                .then(|| format!("synthetic failure @{}", device.device_id)),
+        };
+        match serde_json::to_vec(&ds) {
+            Ok(payload) => match bucket.put(&state_key, payload.into()).await {
+                Ok(_) => {
+                    counters.state_writes.fetch_add(1, Ordering::Relaxed);
+                }
+                Err(_) => {
+                    counters.errors.fetch_add(1, Ordering::Relaxed);
+                }
+            },
+            Err(_) => {
+                counters.errors.fetch_add(1, Ordering::Relaxed);
+            }
+        }
+    }
+}
+
+async fn simulate_heartbeat_loop(
+    device: Arc<DevicePlan>,
+    bucket: kv::Store,
+    counters: Arc<Counters>,
+    tick: Duration,
+) {
+    let hb_key = device_heartbeat_key(&device.device_id);
+    let mut ticker = tokio::time::interval(tick);
+    ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
+    loop {
+        ticker.tick().await;
+        let hb = HeartbeatPayload {
+            device_id: Id::from(device.device_id.clone()),
+            at: Utc::now(),
+        };
+        if let Ok(payload) = serde_json::to_vec(&hb) {
+            if bucket.put(&hb_key, payload.into()).await.is_ok() {
+                counters.heartbeat_writes.fetch_add(1, Ordering::Relaxed);
+            } else {
+                counters.errors.fetch_add(1, Ordering::Relaxed);
+            }
+        }
+    }
+}
+
+/// Phase distribution mirroring a healthy-ish fleet: mostly Running,
+/// a sprinkle of Failed + Pending to exercise the aggregator's
+/// transition-handling + last_error logic.
+fn pick_phase() -> Phase {
+    let n: u32 = rand::rng().random_range(0..100);
+    match n {
+        0..80 => Phase::Running,
+        80..90 => Phase::Failed,
+        _ => Phase::Pending,
+    }
+}
--- a/examples/fleet_nats_install/Cargo.toml
+++ b/examples/fleet_nats_install/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "example_fleet_nats_install"
+version.workspace = true
+edition = "2024"
+license.workspace = true
+
+[[bin]]
+name = "fleet_nats_install"
+path = "src/main.rs"
+
+[dependencies]
+harmony = { path = "../../harmony", default-features = false }
+tokio.workspace = true
+anyhow.workspace = true
+clap.workspace = true
--- a/examples/fleet_nats_install/src/main.rs
+++ b/examples/fleet_nats_install/src/main.rs
@@ -0,0 +1,91 @@
+//! Install a single-node NATS server into the cluster `KUBECONFIG`
+//! points at, using harmony's `NatsBasicScore` + `K8sBareTopology`.
+//!
+//! This binary is the glue between the smoke harness (`smoke-a4.sh`)
+//! and the framework Score. Typical usage from a demo script:
+//!
+//!     KUBECONFIG=$KUBECFG cargo run -q -p example_fleet_nats_install \
+//!         -- --namespace fleet-system --name fleet-nats --node-port 4222
+//!
+//! Behaviour:
+//!   - Ensures the target namespace exists
+//!   - Deploys a single-replica NATS server (JetStream on)
+//!   - Exposes it as a Service (NodePort by default so off-cluster
+//!     clients like a libvirt VM agent can reach it through the
+//!     k3d loadbalancer port mapping)
+//!
+//! For production / HA / TLS, graduate to `NatsK8sScore`.
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use harmony::inventory::Inventory;
+use harmony::modules::k8s::K8sBareTopology;
+use harmony::modules::nats::NatsBasicScore;
+use harmony::score::Score;
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet_nats_install",
+    about = "Install single-node NATS (JetStream) via NatsBasicScore"
+)]
+struct Cli {
+    /// Target namespace. Created if missing.
+    #[arg(long, default_value = "fleet-system")]
+    namespace: String,
+    /// Resource name for the NATS Deployment + Service.
+    #[arg(long, default_value = "fleet-nats")]
+    name: String,
+    /// Service exposure mode. `load-balancer` pairs with k3d's
+    /// `-p PORT:PORT@loadbalancer` port mapping (direct service-
+    /// port routing). `node-port` demands a port in the apiserver's
+    /// nodeport range (default 30000-32767). `cluster-ip` keeps
+    /// NATS in-cluster only.
+    #[arg(long, value_enum, default_value_t = ExposeMode::LoadBalancer)]
+    expose: ExposeMode,
+    /// NodePort when `--expose=node-port`. Must be in the cluster's
+    /// nodeport range (default 30000-32767). Ignored otherwise.
+    #[arg(long, default_value_t = 30422)]
+    node_port: i32,
+    /// Override the NATS container image.
+    #[arg(long)]
+    image: Option<String>,
+}
+
+#[derive(Clone, Debug, clap::ValueEnum)]
+enum ExposeMode {
+    ClusterIp,
+    NodePort,
+    LoadBalancer,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let cli = Cli::parse();
+
+    let topology = K8sBareTopology::from_kubeconfig("fleet-nats-install")
+        .await
+        .map_err(|e| anyhow::anyhow!(e))
+        .context("building K8sBareTopology from KUBECONFIG")?;
+
+    let mut score = NatsBasicScore::new(&cli.name, &cli.namespace);
+    match cli.expose {
+        ExposeMode::ClusterIp => {}
+        ExposeMode::NodePort => score = score.node_port(cli.node_port),
+        ExposeMode::LoadBalancer => score = score.load_balancer(),
+    }
+    if let Some(image) = cli.image {
+        score = score.image(image);
+    }
+
+    let interpret = Score::<K8sBareTopology>::create_interpret(&score);
+    let outcome = interpret
+        .execute(&Inventory::empty(), &topology)
+        .await
+        .map_err(|e| anyhow::anyhow!("execute NatsBasicScore: {e}"))?;
+
+    println!(
+        "NATS installed: namespace={}, name={}, expose={:?}  outcome={outcome:?}",
+        cli.namespace, cli.name, cli.expose
+    );
+    Ok(())
+}
--- a/examples/fleet_rpi_setup/Cargo.toml
+++ b/examples/fleet_rpi_setup/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "example_fleet_rpi_setup"
+version.workspace = true
+edition = "2024"
+license.workspace = true
+
+[[bin]]
+name = "fleet_rpi_setup"
+path = "src/main.rs"
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony_cli = { path = "../../harmony_cli" }
+harmony_secret = { path = "../../harmony_secret" }
+harmony_types = { path = "../../harmony_types" }
+tokio.workspace = true
+log.workspace = true
+anyhow.workspace = true
+clap.workspace = true
+reqwest = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json.workspace = true
+base64 = "0.22"
--- a/examples/fleet_rpi_setup/env.sh
+++ b/examples/fleet_rpi_setup/env.sh
@@ -0,0 +1,4 @@
+export HARMONY_SECRET_NAMESPACE=fleet-rpi-setup
+export HARMONY_SECRET_STORE=file
+export HARMONY_DATABASE_URL=sqlite://harmony_fleet_rpi_setup.sqlite
+export RUST_LOG=info
--- a/examples/fleet_rpi_setup/src/main.rs
+++ b/examples/fleet_rpi_setup/src/main.rs
@@ -0,0 +1,272 @@
+//! Onboard a real, already-booted Raspberry Pi into the IoT fleet.
+//!
+//! This is the physical-device sibling of `fleet_vm_setup`: the VM
+//! provisioning step is gone (you booted Pi OS yourself with rpi-imager
+//! and preloaded an SSH key), and we go straight to applying
+//! `FleetDeviceSetupScore` over SSH. That score installs podman +
+//! systemd-container, creates the `fleet-agent` user, drops the agent
+//! binary + config + systemd unit, and starts the service.
+//!
+//! Source `env.sh` first (sets `HARMONY_SECRET_NAMESPACE`,
+//! `HARMONY_SECRET_STORE`, `HARMONY_DATABASE_URL`, `RUST_LOG`), then:
+//!
+//! ```bash
+//! source examples/fleet_rpi_setup/env.sh
+//! cargo run -p example_fleet_rpi_setup -- --pi-host <ip> ...
+//! ```
+//!
+//! Output rendering (per-step traces and the final recap) is handled
+//! by `harmony_cli::run` — same as every other harmony example. The
+//! score's `Outcome.details` is structured for that path.
+//!
+//! Prereqs on the Pi (one-time, via rpi-imager or manual):
+//!   - SSH server enabled
+//!   - An admin user with sudo. Passwordless sudo is detected and
+//!     used silently; otherwise the example prompts for a sudo
+//!     password via `SecretManager` and caches it for next runs.
+//!   - Your driver-machine SSH public key in that user's
+//!     `~/.ssh/authorized_keys`
+//!
+//! Prereqs on the driver machine (where this binary runs):
+//!   - Python 3 + `python3-venv` (Ansible is auto-bootstrapped into a venv)
+//!   - A cross-compiled `fleet-agent` binary for aarch64
+
+mod zitadel_bootstrap;
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use harmony::config::secret::SudoPassword;
+use harmony::inventory::Inventory;
+use harmony::modules::fleet::{FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore};
+use harmony::modules::linux::{LinuxHostTopology, SshCredentials, ensure_ansible_venv, ssh_exec};
+use harmony_secret::SecretManager;
+use harmony_types::id::Id;
+use log::info;
+use std::path::PathBuf;
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet_rpi_setup",
+    about = "Onboard a physical Raspberry Pi into the IoT fleet"
+)]
+struct Cli {
+    /// IP address of the Pi (e.g. 192.168.1.42).
+    #[arg(long)]
+    pi_host: String,
+    /// SSH user on the Pi with passwordless sudo.
+    #[arg(long, default_value = "pi")]
+    pi_user: String,
+    /// Path to the SSH private key whose public half is in the Pi
+    /// user's `~/.ssh/authorized_keys`.
+    #[arg(long, default_value = "~/.ssh/id_ed25519")]
+    ssh_key: PathBuf,
+    /// Device id the agent will announce to NATS. Defaults to a fresh
+    /// `Id` (sortable hex timestamp + random suffix).
+    #[arg(long)]
+    device_id: Option<String>,
+    /// Routing labels for `Deployment.spec.targetSelector` matching.
+    /// Comma-separated `key=value` pairs. At least one is required.
+    #[arg(long, default_value = "group=group-a,arch=aarch64")]
+    labels: String,
+    /// Path to the cross-compiled aarch64 fleet-agent binary on the
+    /// driver machine. Uploaded to `/usr/local/bin/fleet-agent`.
+    #[arg(long)]
+    agent_binary: PathBuf,
+    /// NATS URL the agent should connect to.
+    #[arg(long)]
+    nats_url: String,
+    /// Shared NATS username — used in `toml-shared` mode (no SSO).
+    /// Ignored when `--bootstrap-token` is set.
+    #[arg(long, default_value = "smoke")]
+    nats_user: String,
+    /// Shared NATS password — used in `toml-shared` mode (no SSO).
+    /// Ignored when `--bootstrap-token` is set.
+    #[arg(long, default_value = "smoke")]
+    nats_pass: String,
+    /// Zitadel admin Personal Access Token used to provision a
+    /// per-device machine user + role grant + JWT key on this Pi.
+    /// When set, the agent's NATS auth flips from `toml-shared` to
+    /// `zitadel-jwt` and the issued machine key is dropped onto the
+    /// Pi at `/etc/fleet-agent/zitadel-key.json`. The PAT itself is
+    /// used only by this CLI invocation — it never lands on the Pi.
+    #[arg(long, env = "HARMONY_ZITADEL_ADMIN_PAT")]
+    bootstrap_token: Option<String>,
+    /// Externally-visible Zitadel issuer URL (e.g.
+    /// `https://zitadel.customer1.nationtech.io`). Required when
+    /// `--bootstrap-token` is set.
+    #[arg(long)]
+    zitadel_issuer_url: Option<String>,
+    /// Zitadel project ID hosting the fleet roles. Required when
+    /// `--bootstrap-token` is set. Used as both the JWT-bearer
+    /// audience scope target and the role-claim path qualifier.
+    #[arg(long)]
+    zitadel_project_id: Option<String>,
+    /// Zitadel role key to grant the per-device machine user.
+    /// Defaults to `device` (matches the auth callout's
+    /// `device_role` config).
+    #[arg(long, default_value = "device")]
+    zitadel_device_role: String,
+    /// Whether the agent's HTTP client to Zitadel accepts invalid
+    /// TLS certs. Local-dev escape hatch; default false.
+    #[arg(long)]
+    danger_accept_invalid_certs: bool,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    harmony_cli::cli_logger::init();
+    let cli = Cli::parse();
+
+    ensure_ansible_venv()
+        .await
+        .map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?;
+
+    let device_id = cli
+        .device_id
+        .clone()
+        .map(Id::from)
+        .unwrap_or_else(Id::default);
+    let ssh_key = expand_tilde(&cli.ssh_key);
+    let pi_ip = cli
+        .pi_host
+        .parse()
+        .with_context(|| format!("--pi-host '{}' is not a valid IP address", cli.pi_host))?;
+
+    let mut creds = SshCredentials {
+        user: cli.pi_user.clone(),
+        private_key_path: ssh_key,
+        // Pi OS Lite ships /usr/bin/python3 — skip auto-discovery.
+        remote_python: Some("/usr/bin/python3".to_string()),
+        sudo_password: None,
+    };
+
+    // If the Pi doesn't have passwordless sudo, fetch the password
+    // through SecretManager (same flow other scores use for SSH keys
+    // etc. — see harmony_secret/src/lib.rs:145). First run prompts;
+    // subsequent runs reuse the cached value. Probe with `sudo -n`
+    // first so we don't prompt the operator for a password they
+    // don't need.
+    let probe = ssh_exec(pi_ip, &creds, "sudo -n true", None)
+        .await
+        .map_err(|e| anyhow::anyhow!("sudo probe: {e}"))?;
+    if probe.rc != 0 {
+        info!("device requires a sudo password — fetching from secret store");
+        let secret = SecretManager::get_or_prompt::<SudoPassword>()
+            .await
+            .map_err(|e| anyhow::anyhow!("get sudo password: {e}"))?;
+        creds.sudo_password = Some(secret.password);
+    }
+
+    let topology = LinuxHostTopology::new(format!("rpi-{}", cli.pi_host), pi_ip, creds);
+
+    let labels = parse_labels(&cli.labels)?;
+    let auth = build_auth(&cli, &device_id).await?;
+    let score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig {
+        device_id: device_id.clone(),
+        labels,
+        nats_urls: vec![cli.nats_url.clone()],
+        auth,
+        agent_binary_path: cli.agent_binary.clone(),
+        hosts_entries: vec![],
+    });
+
+    // We have our own clap CLI, so harmony_cli must NOT call
+    // `Args::parse()` (it would choke on --pi-host etc.). Pass an
+    // explicit Args with `yes: true` — the operator already committed
+    // to the run by typing the command, so the extra confirmation
+    // prompt would just add friction.
+    let harmony_args = harmony_cli::Args {
+        yes: true,
+        filter: None,
+        interactive: false,
+        all: true,
+        number: 0,
+        list: false,
+    };
+
+    harmony_cli::run(
+        Inventory::empty(),
+        topology,
+        vec![Box::new(score)],
+        Some(harmony_args),
+    )
+    .await
+    .map_err(|e| anyhow::anyhow!("{e}"))?;
+    Ok(())
+}
+
+/// Build the per-device auth block. Either:
+/// - `--bootstrap-token` is set → mint a per-device Zitadel machine
+///   user + role grant + JWT key via the Management API and embed the
+///   key JSON in `FleetDeviceAuth::ZitadelJwt`. The bootstrap PAT
+///   never leaves this CLI invocation.
+/// - Otherwise → fall back to `--nats-user`/`--nats-pass` shared creds.
+async fn build_auth(cli: &Cli, device_id: &Id) -> Result<FleetDeviceAuth> {
+    let Some(pat) = cli.bootstrap_token.clone() else {
+        info!("no --bootstrap-token; using shared NATS user/pass (toml-shared)");
+        return Ok(FleetDeviceAuth::TomlShared {
+            nats_user: cli.nats_user.clone(),
+            nats_pass: cli.nats_pass.clone(),
+        });
+    };
+    let issuer = cli
+        .zitadel_issuer_url
+        .clone()
+        .context("--bootstrap-token requires --zitadel-issuer-url")?;
+    let project_id = cli
+        .zitadel_project_id
+        .clone()
+        .context("--bootstrap-token requires --zitadel-project-id")?;
+
+    info!("bootstrapping Zitadel machine user device-{device_id} on project {project_id}");
+    let bootstrap = zitadel_bootstrap::ZitadelBootstrap::new(
+        issuer.clone(),
+        pat,
+        cli.danger_accept_invalid_certs,
+    );
+    let key_json = bootstrap
+        .ensure_device_machine_user(
+            &format!("device-{device_id}"),
+            &device_id.to_string(),
+            &project_id,
+            &cli.zitadel_device_role,
+        )
+        .await
+        .context("Zitadel device bootstrap failed")?;
+
+    Ok(FleetDeviceAuth::ZitadelJwt {
+        machine_key_json: key_json,
+        oidc_issuer_url: issuer,
+        audience: project_id,
+        danger_accept_invalid_certs: cli.danger_accept_invalid_certs,
+    })
+}
+
+fn parse_labels(raw: &str) -> Result<std::collections::BTreeMap<String, String>> {
+    let mut out = std::collections::BTreeMap::new();
+    for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) {
+        let (k, v) = piece
+            .split_once('=')
+            .ok_or_else(|| anyhow::anyhow!("label chunk '{piece}' missing '='"))?;
+        let k = k.trim();
+        let v = v.trim();
+        if k.is_empty() || v.is_empty() {
+            anyhow::bail!("label chunk '{piece}' has empty key or value");
+        }
+        out.insert(k.to_string(), v.to_string());
+    }
+    if out.is_empty() {
+        anyhow::bail!("--labels must include at least one key=value pair");
+    }
+    Ok(out)
+}
+
+fn expand_tilde(p: &std::path::Path) -> PathBuf {
+    let s = p.to_string_lossy();
+    if let Some(rest) = s.strip_prefix("~/")
+        && let Ok(home) = std::env::var("HOME")
+    {
+        return PathBuf::from(home).join(rest);
+    }
+    p.to_path_buf()
+}
--- a/examples/fleet_rpi_setup/src/zitadel_bootstrap.rs
+++ b/examples/fleet_rpi_setup/src/zitadel_bootstrap.rs
@@ -0,0 +1,247 @@
+//! Per-device Zitadel bootstrap for the Pi onboarding flow.
+//!
+//! Invoked once per Pi from the operator's machine. Uses the admin PAT
+//! given on the CLI to:
+//!
+//! 1. Find or create a machine user `device-${device_id}` in Zitadel.
+//! 2. Find or create a JSON-typed JWT signing key for that user.
+//! 3. Find or create a project grant on the `device` role.
+//!
+//! Returns the JSON keyfile content. The caller drops it onto the Pi
+//! via `FleetDeviceSetupScore`. The admin PAT is held in CLI memory
+//! for the duration of the run only — it never lands on the Pi.
+//!
+//! All operations are idempotent: re-running for the same device id
+//! is a series of NOOPs.
+//!
+//! NOTE: This is intentionally a minimal Management-API client. It
+//! duplicates a small slice of `harmony::modules::zitadel::setup` (the
+//! in-cluster ZitadelSetupScore) because `fleet_rpi_setup` runs on the
+//! operator's machine without a kubeconfig pointing at the Zitadel
+//! cluster. Refactoring the in-cluster Score's HTTP layer into a
+//! reusable client crate is a follow-up.
+
+use anyhow::{Context, Result};
+use base64::Engine;
+use serde::Deserialize;
+
+pub struct ZitadelBootstrap {
+    issuer_url: String,
+    admin_pat: String,
+    http: reqwest::Client,
+}
+
+impl ZitadelBootstrap {
+    pub fn new(issuer_url: String, admin_pat: String, danger_accept_invalid_certs: bool) -> Self {
+        let http = reqwest::Client::builder()
+            .danger_accept_invalid_certs(danger_accept_invalid_certs)
+            .timeout(std::time::Duration::from_secs(10))
+            .build()
+            .expect("reqwest client builder is infallible for these settings");
+        Self {
+            issuer_url,
+            admin_pat,
+            http,
+        }
+    }
+
+    /// Ensure machine user + key + role grant for one device. Returns
+    /// the JSON keyfile content (raw, decoded from Zitadel's base64
+    /// `keyDetails`). Idempotent: re-running with the same `username`
+    /// reuses the existing user; if no key was previously persisted
+    /// (we can't read the private key back from Zitadel), a fresh one
+    /// is generated and returned.
+    pub async fn ensure_device_machine_user(
+        &self,
+        username: &str,
+        device_id: &str,
+        project_id: &str,
+        role_key: &str,
+    ) -> Result<String> {
+        let user_id = match self.find_user_by_name(username).await? {
+            Some(id) => id,
+            None => self
+                .create_machine_user(username, device_id)
+                .await
+                .with_context(|| format!("creating machine user {username}"))?,
+        };
+        log::info!("[zitadel-bootstrap] machine user {username} → {user_id}");
+
+        // The grant API rejects duplicates with code 6 (ALREADY_EXISTS),
+        // so the cheapest path is "search → maybe create".
+        if self.find_user_grant(&user_id, project_id).await?.is_none() {
+            self.create_user_grant(&user_id, project_id, role_key)
+                .await
+                .with_context(|| {
+                    format!("granting role {role_key} on project {project_id} to {username}")
+                })?;
+            log::info!("[zitadel-bootstrap] granted role {role_key} on project {project_id}");
+        } else {
+            log::info!("[zitadel-bootstrap] role grant already present");
+        }
+
+        // Always mint a fresh key — Zitadel doesn't expose the private
+        // half of existing keys, so we can't reuse one. Stale keys
+        // remain valid until expiry but never get reused on this Pi
+        // because the agent's keyfile is overwritten on each setup run.
+        let key_json = self
+            .create_machine_key(&user_id)
+            .await
+            .with_context(|| format!("minting machine key for {username}"))?;
+        Ok(key_json)
+    }
+
+    fn url(&self, path: &str) -> String {
+        format!("{}{path}", self.issuer_url.trim_end_matches('/'))
+    }
+
+    async fn find_user_by_name(&self, username: &str) -> Result<Option<String>> {
+        let resp = self
+            .http
+            .post(self.url("/management/v1/users/_search"))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({
+                "queries": [{
+                    "userNameQuery": {
+                        "userName": username,
+                        "method": "TEXT_QUERY_METHOD_EQUALS"
+                    }
+                }]
+            }))
+            .send()
+            .await
+            .context("POST users/_search")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("users/_search returned {s}: {body}");
+        }
+        #[derive(Deserialize)]
+        struct R {
+            #[serde(default)]
+            result: Vec<E>,
+        }
+        #[derive(Deserialize)]
+        struct E {
+            id: String,
+            #[serde(rename = "userName", default)]
+            user_name: Option<String>,
+        }
+        let r: R = resp.json().await.context("parse users/_search")?;
+        Ok(r.result
+            .into_iter()
+            .find(|e| e.user_name.as_deref() == Some(username))
+            .map(|e| e.id))
+    }
+
+    async fn create_machine_user(&self, username: &str, device_id: &str) -> Result<String> {
+        let resp = self
+            .http
+            .post(self.url("/management/v1/users/machine"))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({
+                "userName": username,
+                "name": format!("Fleet Device {device_id}"),
+                "description": format!("Provisioned by fleet_rpi_setup for device {device_id}"),
+                "accessTokenType": "ACCESS_TOKEN_TYPE_JWT"
+            }))
+            .send()
+            .await
+            .context("POST users/machine")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("create machine user returned {s}: {body}");
+        }
+        #[derive(Deserialize)]
+        struct R {
+            #[serde(rename = "userId")]
+            user_id: String,
+        }
+        let r: R = resp.json().await.context("parse machine user response")?;
+        Ok(r.user_id)
+    }
+
+    async fn create_machine_key(&self, user_id: &str) -> Result<String> {
+        let resp = self
+            .http
+            .post(self.url(&format!("/management/v1/users/{user_id}/keys")))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({ "type": "KEY_TYPE_JSON" }))
+            .send()
+            .await
+            .context("POST users/{}/keys")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("create machine key returned {s}: {body}");
+        }
+        #[derive(Deserialize)]
+        struct R {
+            #[serde(rename = "keyDetails")]
+            key_details: String,
+        }
+        let r: R = resp.json().await.context("parse machine key response")?;
+        let bytes = base64::engine::general_purpose::STANDARD
+            .decode(&r.key_details)
+            .context("decode keyDetails base64")?;
+        String::from_utf8(bytes).context("keyDetails is non-UTF-8")
+    }
+
+    async fn find_user_grant(&self, user_id: &str, project_id: &str) -> Result<Option<String>> {
+        let resp = self
+            .http
+            .post(self.url(&format!("/management/v1/users/{user_id}/grants/_search")))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({}))
+            .send()
+            .await
+            .context("POST users/{}/grants/_search")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("grants/_search returned {s}: {body}");
+        }
+        #[derive(Deserialize)]
+        struct R {
+            #[serde(default)]
+            result: Vec<E>,
+        }
+        #[derive(Deserialize)]
+        struct E {
+            id: String,
+            #[serde(rename = "projectId")]
+            project_id: String,
+        }
+        let r: R = resp.json().await.context("parse grants/_search")?;
+        Ok(r.result
+            .into_iter()
+            .find(|e| e.project_id == project_id)
+            .map(|e| e.id))
+    }
+
+    async fn create_user_grant(
+        &self,
+        user_id: &str,
+        project_id: &str,
+        role_key: &str,
+    ) -> Result<()> {
+        let resp = self
+            .http
+            .post(self.url(&format!("/management/v1/users/{user_id}/grants")))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({
+                "projectId": project_id,
+                "roleKeys": [role_key]
+            }))
+            .send()
+            .await
+            .context("POST users/{}/grants")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("create grant returned {s}: {body}");
+        }
+        Ok(())
+    }
+}
--- a/examples/fleet_server_install/Cargo.toml
+++ b/examples/fleet_server_install/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "example_fleet_server_install"
+version.workspace = true
+edition = "2024"
+license.workspace = true
+
+[[bin]]
+name = "fleet_server_install"
+path = "src/main.rs"
+
+[dependencies]
+harmony = { path = "../../harmony", default-features = false }
+harmony_cli = { path = "../../harmony_cli" }
+harmony-fleet-deploy = { path = "../../fleet/harmony-fleet-deploy" }
+tokio.workspace = true
+anyhow.workspace = true
+clap.workspace = true
--- a/examples/fleet_server_install/env.sh
+++ b/examples/fleet_server_install/env.sh
@@ -0,0 +1,19 @@
+export HARMONY_SECRET_NAMESPACE=fleet-server-install
+export HARMONY_SECRET_STORE=file
+export HARMONY_DATABASE_URL=sqlite://fleet_server_install.sqlite
+export RUST_LOG=harmony=info,kube_runtime=warn
+
+# Required: the kubeconfig for the cluster the score installs into.
+# K8sAnywhereTopology::from_env() reads KUBECONFIG; without it, the
+# topology will fall back to autoinstall logic that may try to spin
+# up a local k3d cluster.
+#export KUBECONFIG=/tmp/kubeconfig
+
+export HARMONY_USE_LOCAL_K3D=false
+
+# Zitadel install knobs (used by fleet/scripts/run_server_install.sh).
+# Zitadel is installed by default; the script's defaults assume a
+# `.localhost` hostname and HTTP ingress.
+# export NO_ZITADEL=1                      # skip the Zitadel install entirely
+# export ZITADEL_HOST=zitadel.example.com  # override the default zitadel.localhost
+# export ZITADEL_VERSION=v4.12.1           # override the chart version
--- a/examples/fleet_server_install/run.sh
+++ b/examples/fleet_server_install/run.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+# Thin wrapper around `cargo run -p example_fleet_server_install`. All
+# flags pass through to the binary — see src/main.rs for the surface,
+# or run with --help.
+#
+# Requires KUBECONFIG to point at a reachable cluster + helm on PATH.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+cd "$REPO_ROOT"
+exec cargo run -q --release -p example_fleet_server_install -- "$@"
--- a/examples/fleet_server_install/src/main.rs
+++ b/examples/fleet_server_install/src/main.rs
@@ -0,0 +1,192 @@
+//! Install the harmony fleet server-side stack into the cluster
+//! `KUBECONFIG` points at: NATS + the harmony fleet operator (CRDs +
+//! RBAC + Deployment), and optionally a central Zitadel OIDC
+//! identity provider, via [`FleetServerScore`].
+//!
+//! This is the framework-side replacement for the
+//! `example_fleet_nats_install`, `harmony-fleet-operator chart`,
+//! and `helm install` chain that the load-test harness used to
+//! drive by hand.
+//!
+//! Typical usage (operator + NATS only):
+//!
+//!     KUBECONFIG=$KUBECFG cargo run -q -p example_fleet_server_install -- \
+//!         --operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev
+//!
+//! Including Zitadel:
+//!
+//!     KUBECONFIG=$KUBECFG cargo run -q -p example_fleet_server_install -- \
+//!         --operator-image … \
+//!         --zitadel-host zitadel.localhost
+//!
+//! Behaviour:
+//!   - Installs single-node NATS (JetStream) into `--nats-namespace`
+//!     using `NatsBasicScore`, exposed per `--nats-expose`.
+//!   - Installs the operator chart into `--operator-namespace` via
+//!     `FleetOperatorScore` (which renders the chart in a tempdir
+//!     and helm-installs it).
+//!   - When `--zitadel-host` is set, also runs `ZitadelScore`:
+//!     provisions a CNPG PostgreSQL cluster + the upstream
+//!     `zitadel/zitadel` helm chart with distribution-aware ingress.
+//!     Defaults to HTTPS unless host endswith `.localhost` or
+//!     `--zitadel-insecure` is passed.
+//!   - Idempotent: re-running on an existing install short-circuits
+//!     at `HelmChartScore::find_installed_release`.
+//!
+//! Topology: `K8sAnywhereTopology::from_env()`. This requires `KUBECONFIG`
+//! to be set and runs `CertificateManagementScore` as part of
+//! `ensure_ready` — i.e. it installs cert-manager into the cluster on
+//! first run. Cert-manager is needed for Zitadel's ingress TLS in
+//! production; for k3d dev it's still installed but unused.
+//!
+//! Output is driven by `harmony_cli::run`, which wires up the
+//! framework's standard logger + reporter — emoji-tagged progress
+//! lines per Score, plus an end-of-run summary listing the
+//! `Outcome.details` from each Score.
+
+use anyhow::Result;
+use clap::Parser;
+use harmony::inventory::Inventory;
+use harmony::modules::nats::NatsBasicScore;
+use harmony::modules::zitadel::ZitadelScore;
+use harmony::score::Score;
+use harmony::topology::K8sAnywhereTopology;
+use harmony_fleet_deploy::FleetOperatorScore;
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet_server_install",
+    about = "Install the harmony fleet server-side stack (NATS + operator [+ Zitadel])"
+)]
+struct Cli {
+    /// Namespace for the NATS Deployment + Service.
+    #[arg(long, default_value = "fleet-system")]
+    nats_namespace: String,
+    /// Resource name for the NATS release.
+    #[arg(long, default_value = "fleet-nats")]
+    nats_name: String,
+    /// NATS service exposure mode. `load-balancer` pairs with k3d's
+    /// `-p PORT:PORT@loadbalancer`. `node-port` requires the port be
+    /// in the apiserver's nodeport range (default 30000-32767).
+    #[arg(long, value_enum, default_value_t = NatsExpose::LoadBalancer)]
+    nats_expose: NatsExpose,
+    /// NodePort when `--nats-expose=node-port`. Ignored otherwise.
+    #[arg(long, default_value_t = 30422)]
+    nats_node_port: i32,
+    /// Optional NATS image override (`repository:tag`).
+    #[arg(long)]
+    nats_image: Option<String>,
+
+    /// Namespace the operator runs in.
+    #[arg(long, default_value = "fleet-system")]
+    operator_namespace: String,
+    /// Helm release name for the operator chart.
+    #[arg(long, default_value = "harmony-fleet-operator")]
+    operator_release: String,
+    /// Operator container image (`repository:tag`).
+    #[arg(
+        long,
+        default_value = "hub.nationtech.io/harmony/harmony-fleet-operator:dev"
+    )]
+    operator_image: String,
+    /// Image pull policy for the operator Deployment.
+    #[arg(long, default_value = "IfNotPresent")]
+    operator_image_pull_policy: String,
+    /// `RUST_LOG` value injected into the operator pod's env.
+    #[arg(long, default_value = "info,kube_runtime=warn")]
+    log_level: String,
+
+    /// Hostname Zitadel should answer on. When set, Zitadel + its
+    /// PostgreSQL cluster are installed alongside the operator.
+    /// When unset, the Zitadel install is skipped entirely.
+    #[arg(long)]
+    zitadel_host: Option<String>,
+    /// Zitadel chart version (matches `zitadel/zitadel` upstream tags).
+    #[arg(long, default_value = "v4.12.1")]
+    zitadel_version: String,
+    /// Force HTTP instead of HTTPS for the Zitadel ingress. Defaults
+    /// to true (HTTP) when `--zitadel-host` endswith `.localhost`,
+    /// false otherwise.
+    #[arg(long)]
+    zitadel_insecure: bool,
+}
+
+#[derive(Clone, Debug, clap::ValueEnum)]
+enum NatsExpose {
+    ClusterIp,
+    NodePort,
+    LoadBalancer,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let cli = Cli::parse();
+
+    let topology = K8sAnywhereTopology::from_env();
+
+    let mut nats = NatsBasicScore::new(&cli.nats_name, &cli.nats_namespace);
+    match cli.nats_expose {
+        NatsExpose::ClusterIp => {}
+        NatsExpose::NodePort => nats = nats.node_port(cli.nats_node_port),
+        NatsExpose::LoadBalancer => nats = nats.load_balancer(),
+    }
+    if let Some(image) = cli.nats_image {
+        nats = nats.image(image);
+    }
+
+    // Point the operator at NATS via the in-cluster service DNS the
+    // NatsBasicScore install creates. ClusterIP and LoadBalancer both
+    // expose the same `<release>.<namespace>:4222` for in-cluster
+    // callers.
+    let nats_url = format!("nats://{}.{}:4222", cli.nats_name, cli.nats_namespace);
+
+    let operator = FleetOperatorScore::new()
+        .namespace(&cli.operator_namespace)
+        .release_name(&cli.operator_release)
+        .image(&cli.operator_image)
+        .image_pull_policy(&cli.operator_image_pull_policy)
+        .nats_url(&nats_url)
+        .log_level(&cli.log_level);
+
+    // FleetServerScore now takes NatsK8sScore (auth-callout-aware,
+    // OKD-Route-aware) — see `fleet_staging_install` for the
+    // production composition. This simpler example registers the
+    // inner Scores directly so it can keep using the basic NATS
+    // helm chart for k3d-style local installs.
+    let mut scores: Vec<Box<dyn Score<K8sAnywhereTopology>>> =
+        vec![Box::new(nats), Box::new(operator)];
+
+    if let Some(host) = cli.zitadel_host {
+        // Default external_secure logic: HTTPS unless the host is a
+        // .localhost / .test development hostname or --zitadel-insecure
+        // was explicitly set.
+        let external_secure =
+            !cli.zitadel_insecure && !host.ends_with(".localhost") && !host.ends_with(".test");
+        scores.push(Box::new(ZitadelScore {
+            host,
+            zitadel_version: cli.zitadel_version,
+            external_secure,
+            external_port: None,
+            ..Default::default()
+        }));
+    }
+
+    // We've already parsed our own Cli; pass `Some(harmony_cli::Args)`
+    // with dev-friendly defaults (no confirmation prompt, run every
+    // registered score) so harmony_cli doesn't try to re-parse argv.
+    harmony_cli::run(
+        Inventory::empty(),
+        topology,
+        scores,
+        Some(harmony_cli::Args {
+            yes: true,
+            filter: None,
+            interactive: false,
+            all: true,
+            number: 0,
+            list: false,
+        }),
+    )
+    .await
+    .map_err(|e| anyhow::anyhow!("{e}"))
+}
--- a/examples/fleet_sso_login/Cargo.toml
+++ b/examples/fleet_sso_login/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "example-fleet-sso-login"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "Developer-side CLI: log in to a fleet platform staging instance via Zitadel device-code OIDC"
+
+[[bin]]
+name = "fleet-sso-login"
+path = "src/main.rs"
+
+[dependencies]
+reqwest = { workspace = true }
+tokio = { workspace = true, features = ["full"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json.workspace = true
+anyhow.workspace = true
+clap = { version = "4", features = ["derive", "env"] }
+base64 = "0.22"
+log.workspace = true
+env_logger.workspace = true
+directories = "6.0.0"
--- a/examples/fleet_sso_login/src/main.rs
+++ b/examples/fleet_sso_login/src/main.rs
@@ -0,0 +1,266 @@
+//! Developer-side CLI: log in to a fleet platform staging instance via
+//! Zitadel's OIDC Device Authorization Grant (RFC 8628).
+//!
+//! Usage:
+//!
+//! ```text
+//! cargo run -p example-fleet-sso-login -- \
+//!   --base-domain customer1.nationtech.io \
+//!   --client-id 366378028009259038
+//! ```
+//!
+//! Flow:
+//! 1. POST to `/oauth/v2/device_authorization` with the CLI client_id —
+//!    receive a `verification_uri_complete`, `user_code`, `device_code`
+//!    and a polling interval.
+//! 2. Print the URL the user opens in their browser. They authenticate
+//!    via Zitadel (username/password, MFA, SSO chain — Zitadel handles
+//!    that part).
+//! 3. Poll `/oauth/v2/token` with `grant_type=urn:ietf:params:oauth:
+//!    grant-type:device_code` until the access token is issued.
+//! 4. Decode the access token's claims, print "Welcome <preferred
+//!    username>", and persist the session at
+//!    `$DATA_DIR/harmony/sso-session.json`.
+//!
+//! No K8s API call yet — for the demo, this CLI proves the SSO works.
+//! Future: a `harmony fleet apply` subcommand uses the persisted token
+//! to talk to a fleet-platform API gateway. That gateway is post-demo.
+
+use std::path::PathBuf;
+use std::time::Duration;
+
+use anyhow::{Context, Result, bail};
+use base64::Engine;
+use clap::Parser;
+use serde::{Deserialize, Serialize};
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet-sso-login",
+    about = "Log in to a fleet platform staging instance via Zitadel device-code OIDC"
+)]
+struct Cli {
+    /// Base DNS domain — same value the operator passed to
+    /// fleet-staging-deploy. The Zitadel issuer derives as
+    /// `https://zitadel.<base>`.
+    #[arg(long, env = "FLEET_BASE_DOMAIN")]
+    base_domain: String,
+    /// OIDC client_id of the `harmony-cli` Device Code app on the
+    /// Zitadel project. Printed by `fleet-staging-deploy` at the end
+    /// of a successful run.
+    #[arg(long, env = "FLEET_CLI_CLIENT_ID")]
+    client_id: String,
+    /// Override the polling interval suggested by Zitadel
+    /// (defaults to whatever the device-authorization endpoint returned;
+    /// pass to short-circuit during testing).
+    #[arg(long)]
+    poll_interval_secs: Option<u64>,
+}
+
+#[derive(Debug, Deserialize)]
+struct DeviceAuthResponse {
+    device_code: String,
+    user_code: String,
+    verification_uri: String,
+    #[serde(default)]
+    verification_uri_complete: Option<String>,
+    expires_in: u64,
+    #[serde(default)]
+    interval: Option<u64>,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+struct TokenResponse {
+    access_token: String,
+    #[serde(default)]
+    id_token: Option<String>,
+    #[serde(default)]
+    refresh_token: Option<String>,
+    #[serde(default)]
+    expires_in: Option<u64>,
+    #[serde(default)]
+    token_type: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+struct TokenError {
+    error: String,
+    #[serde(default)]
+    error_description: Option<String>,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init();
+    let cli = Cli::parse();
+
+    let issuer = format!("https://zitadel.{}", cli.base_domain);
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(15))
+        .build()?;
+
+    // -- Step 1: kick off the device flow ----------------------------
+    let device_auth_url = format!("{issuer}/oauth/v2/device_authorization");
+    let scope =
+        "openid profile email urn:zitadel:iam:user:resourceowner urn:zitadel:iam:org:project:roles";
+    let resp = client
+        .post(&device_auth_url)
+        .form(&[("client_id", cli.client_id.as_str()), ("scope", scope)])
+        .send()
+        .await
+        .with_context(|| format!("POST {device_auth_url}"))?;
+    if !resp.status().is_success() {
+        let s = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+        bail!("device_authorization returned {s}: {body}");
+    }
+    let auth: DeviceAuthResponse = resp.json().await.context("parse device_authorization")?;
+
+    let display_url = auth
+        .verification_uri_complete
+        .clone()
+        .unwrap_or_else(|| auth.verification_uri.clone());
+    println!();
+    println!("============================================================");
+    println!(" Open this URL in your browser to log in:");
+    println!();
+    println!("   {display_url}");
+    println!();
+    println!(" If the URL doesn't pre-fill the code, enter:");
+    println!();
+    println!("   user_code: {}", auth.user_code);
+    println!();
+    println!(
+        " Waiting for browser-side completion (expires in {}s)...",
+        auth.expires_in
+    );
+    println!("============================================================");
+    println!();
+
+    // -- Step 2: poll the token endpoint -----------------------------
+    let token_url = format!("{issuer}/oauth/v2/token");
+    let interval =
+        Duration::from_secs(cli.poll_interval_secs.unwrap_or(auth.interval.unwrap_or(5)));
+    let deadline = std::time::Instant::now() + Duration::from_secs(auth.expires_in);
+
+    let access_token = loop {
+        if std::time::Instant::now() > deadline {
+            bail!("device-code expired before user completed login");
+        }
+        tokio::time::sleep(interval).await;
+        let resp = client
+            .post(&token_url)
+            .form(&[
+                ("grant_type", "urn:ietf:params:oauth:grant-type:device_code"),
+                ("device_code", auth.device_code.as_str()),
+                ("client_id", cli.client_id.as_str()),
+            ])
+            .send()
+            .await
+            .context("POST token")?;
+        let status = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+        if status.is_success() {
+            let tr: TokenResponse =
+                serde_json::from_str(&body).context("parse token success body")?;
+            break tr.access_token;
+        }
+        // Per RFC 8628, the token endpoint returns specific error
+        // codes during polling — `authorization_pending` and
+        // `slow_down` are NOT terminal, every other error is.
+        let err: TokenError = serde_json::from_str(&body).unwrap_or_else(|_| TokenError {
+            error: format!("http_{}", status.as_u16()),
+            error_description: Some(body.clone()),
+        });
+        match err.error.as_str() {
+            "authorization_pending" => {
+                log::debug!("authorization_pending — user hasn't approved yet");
+                continue;
+            }
+            "slow_down" => {
+                log::info!("server requested slow_down — increasing poll interval");
+                tokio::time::sleep(interval).await; // wait one extra interval
+                continue;
+            }
+            other => bail!(
+                "token endpoint refused: {other} ({})",
+                err.error_description.unwrap_or_default()
+            ),
+        }
+    };
+
+    // -- Step 3: introspect + persist --------------------------------
+    let claims = decode_jwt_claims(&access_token).unwrap_or_default();
+    let display_name = claims
+        .get("name")
+        .or_else(|| claims.get("preferred_username"))
+        .and_then(|v| v.as_str())
+        .unwrap_or("(unknown)");
+    let email = claims
+        .get("email")
+        .and_then(|v| v.as_str())
+        .unwrap_or("(no email)");
+
+    persist_session(&issuer, &cli.client_id, &access_token, &claims)?;
+
+    println!();
+    println!("============================================================");
+    println!(" SSO LOGIN SUCCESSFUL");
+    println!("============================================================");
+    println!(" Welcome, {display_name} <{email}>");
+    println!(" Session stored at: {}", session_path().display());
+    println!("============================================================");
+    Ok(())
+}
+
+fn decode_jwt_claims(jwt: &str) -> Option<serde_json::Value> {
+    let payload_b64 = jwt.split('.').nth(1)?;
+    let pad = "=".repeat((4 - payload_b64.len() % 4) % 4);
+    let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
+        .decode(format!("{payload_b64}{pad}").trim_end_matches('='))
+        .ok()?;
+    serde_json::from_slice(&bytes).ok()
+}
+
+#[derive(Serialize)]
+struct PersistedSession<'a> {
+    issuer: &'a str,
+    client_id: &'a str,
+    access_token: &'a str,
+    claims: &'a serde_json::Value,
+}
+
+fn persist_session(
+    issuer: &str,
+    client_id: &str,
+    access_token: &str,
+    claims: &serde_json::Value,
+) -> Result<()> {
+    let path = session_path();
+    if let Some(parent) = path.parent() {
+        std::fs::create_dir_all(parent)
+            .with_context(|| format!("create session dir {}", parent.display()))?;
+    }
+    let s = PersistedSession {
+        issuer,
+        client_id,
+        access_token,
+        claims,
+    };
+    let json = serde_json::to_string_pretty(&s)?;
+    std::fs::write(&path, json).with_context(|| format!("write session to {}", path.display()))?;
+    // 0600 so other users on the box can't read the access token.
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt;
+        std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o600)).ok();
+    }
+    Ok(())
+}
+
+fn session_path() -> PathBuf {
+    directories::BaseDirs::new()
+        .map(|d| d.data_dir().join("harmony").join("sso-session.json"))
+        .unwrap_or_else(|| PathBuf::from("/tmp/harmony-sso-session.json"))
+}
--- a/examples/fleet_staging_deploy/Cargo.toml
+++ b/examples/fleet_staging_deploy/Cargo.toml
@@ -0,0 +1,36 @@
+[package]
+name = "example-fleet-staging-deploy"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "Deploy the fleet platform stack (Zitadel + NATS + auth callout) onto an OKD/Kubernetes cluster. Operator-side, run-once-per-customer."
+
+[lib]
+name = "example_fleet_staging_deploy"
+path = "src/lib.rs"
+
+[[bin]]
+name = "fleet-staging-deploy"
+path = "src/main.rs"
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony-k8s = { path = "../../harmony-k8s" }
+harmony_types = { path = "../../harmony_types" }
+harmony-nats-callout = { path = "../../nats/callout" }
+nkeys = "0.4"
+async-nats.workspace = true
+reqwest = { workspace = true }
+tokio = { workspace = true, features = ["full"] }
+serde.workspace = true
+serde_json.workspace = true
+anyhow.workspace = true
+log.workspace = true
+env_logger.workspace = true
+tracing.workspace = true
+tracing-subscriber.workspace = true
+clap = { version = "4", features = ["derive", "env"] }
+k8s-openapi.workspace = true
+kube.workspace = true
+url.workspace = true
--- a/examples/fleet_staging_deploy/src/lib.rs
+++ b/examples/fleet_staging_deploy/src/lib.rs
@@ -0,0 +1,577 @@
+//! Operator-side staging deploy harness.
+//!
+//! Runs once per customer instance against an OKD / Kubernetes cluster
+//! to bring up the fleet platform's central services:
+//!
+//! 1. Zitadel + Postgres (HTTPS via OKD HAProxy ingress, edge TLS).
+//! 2. The fleet project + roles (`fleet-admin`, `device`) + an API app
+//!    (so the project ID can be the JWT-bearer audience).
+//! 3. NATS with `auth_callout` and a WSS ingress (so Pis on a customer
+//!    LAN connect through `wss://nats.<base>/`).
+//! 4. The auth callout Deployment, configured to validate Zitadel JWTs
+//!    and emit per-device permissions on user JWTs to NATS.
+//!
+//! Everything keys off [`FleetDomainConfig::base_domain`] —
+//! `zitadel.<base>`, `nats.<base>`, `api.<base>` are the only
+//! customer-visible hostnames. Pi-side onboarding (see
+//! `examples/fleet_rpi_setup/`) consumes the Zitadel admin PAT plus
+//! the project ID this harness prints, so the operator's flow is:
+//!
+//! ```text
+//! cargo run -p example-fleet-staging-deploy -- --base-domain customer1.nationtech.io
+//!   ↓ prints PROJECT_ID, NATS WSS URL, instructions to extract iam-admin-pat
+//! HARMONY_ZITADEL_ADMIN_PAT=$(kubectl -n zitadel get secret iam-admin-pat -o jsonpath='{.data.pat}' | base64 -d) \
+//! cargo run -p example-fleet-rpi-setup -- \
+//!   --pi-host 192.168.1.42 \
+//!   --bootstrap-token "$HARMONY_ZITADEL_ADMIN_PAT" \
+//!   --zitadel-issuer-url https://zitadel.customer1.nationtech.io \
+//!   --zitadel-project-id <PROJECT_ID printed above> \
+//!   --nats-url wss://nats.customer1.nationtech.io/ \
+//!   --agent-binary ./target/aarch64-unknown-linux-gnu/release/fleet-agent
+//! ```
+//!
+//! The harness is **idempotent** by design — re-running picks up
+//! existing resources via the new helm-upgrade-by-default behavior +
+//! ZitadelSetupScore's search-then-create flow + a persisted issuer
+//! NKey in a K8s secret so user JWTs survive restarts.
+
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use harmony::inventory::Inventory;
+use harmony::modules::nats::NatsHelmChartScore;
+use harmony::modules::nats_auth_callout::{NatsAuthCalloutScore, render_auth_callout_block};
+use harmony::modules::zitadel::{
+    ZitadelApiApp, ZitadelAppType, ZitadelApplication, ZitadelClientConfig, ZitadelRole,
+    ZitadelScore, ZitadelSetupScore,
+};
+use harmony::score::Score;
+use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
+use log::info;
+use nkeys::KeyPair;
+
+// ---- domain config ---------------------------------------------------------
+
+/// Single source of truth for all customer-visible hostnames. Every
+/// `<app>.<customer>.<base>` URL the staging deploy emits derives from
+/// the one base domain — no hostnames are hardcoded so the same code
+/// runs across customers / staging / canary instances.
+#[derive(Debug, Clone)]
+pub struct FleetDomainConfig {
+    /// e.g. `customer1.nationtech.io`. The deploy emits
+    /// `zitadel.<base>`, `nats.<base>`, `api.<base>` against it.
+    pub base_domain: String,
+}
+
+impl FleetDomainConfig {
+    pub fn new(base_domain: impl Into<String>) -> Self {
+        Self {
+            base_domain: base_domain.into(),
+        }
+    }
+    pub fn zitadel_host(&self) -> String {
+        format!("zitadel.{}", self.base_domain)
+    }
+    pub fn nats_wss_host(&self) -> String {
+        format!("nats.{}", self.base_domain)
+    }
+    pub fn zitadel_issuer_url(&self) -> String {
+        format!("https://{}", self.zitadel_host())
+    }
+    pub fn nats_wss_url(&self) -> String {
+        format!("wss://{}/", self.nats_wss_host())
+    }
+}
+
+// ---- naming + constants ----------------------------------------------------
+
+pub const FLEET_NAMESPACE: &str = "fleet-staging";
+pub const ZITADEL_NAMESPACE: &str = "zitadel-staging";
+pub const NATS_RELEASE: &str = "fleet-nats";
+pub const CALLOUT_DEPLOYMENT_NAME: &str = "fleet-callout";
+pub const PROJECT_NAME: &str = "fleet";
+pub const API_APP_NAME: &str = "nats";
+pub const CLI_APP_NAME: &str = "harmony-cli";
+pub const ADMIN_ROLE_KEY: &str = "fleet-admin";
+pub const DEVICE_ROLE_KEY: &str = "device";
+pub const NATS_AUTH_USER: &str = "auth";
+pub const NATS_ACCOUNT: &str = "DEVICES";
+pub const NATS_SYSTEM_USER: &str = "sys-admin";
+pub const ISSUER_SEED_SECRET: &str = "callout-issuer-seed";
+
+// ---- handles ---------------------------------------------------------------
+
+#[derive(Debug, Clone)]
+pub struct StagingHandles {
+    pub domain: FleetDomainConfig,
+    pub project_id: String,
+    pub issuer_pubkey: String,
+    /// Tag of the callout image expected to exist in a registry the
+    /// cluster pulls from. The operator pushes it before running the
+    /// deploy; this field is just the name we put on the Deployment
+    /// for traceability.
+    pub callout_image: String,
+    /// OIDC client_id of the `harmony-cli` Device Code app — what the
+    /// `fleet_sso_login` CLI sends in its device-authorization request.
+    /// `None` if the app pre-existed without the cache picking it up
+    /// (re-running the staging deploy after `rm -rf
+    /// ~/.local/share/harmony/zitadel/`).
+    pub cli_client_id: Option<String>,
+}
+
+// ---- bring up --------------------------------------------------------------
+
+pub struct StagingDeployOpts {
+    pub domain: FleetDomainConfig,
+    pub kubeconfig_context: Option<String>,
+    /// Image reference the cluster will pull. Operator must have
+    /// pushed this beforehand (e.g. `quay.io/customer/harmony-nats-callout:demo`).
+    pub callout_image: String,
+    /// Per-NATS-account password for the callout's own NATS connection.
+    /// Stored in a K8s secret + listed in the chart's
+    /// `accounts.<account>.users` so the callout bypasses callout to
+    /// connect (otherwise it'd deadlock authenticating itself).
+    pub nats_auth_pass: String,
+    /// SYS account password (for `kubectl exec nats-box` debugging).
+    pub nats_system_pass: String,
+}
+
+pub async fn bring_up_staging(opts: StagingDeployOpts) -> Result<StagingHandles> {
+    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init();
+
+    if let Some(ctx) = &opts.kubeconfig_context {
+        unsafe {
+            std::env::set_var("HARMONY_K8S_CONTEXT", ctx);
+            std::env::set_var("HARMONY_USE_LOCAL_K3D", "false");
+            std::env::set_var("HARMONY_AUTOINSTALL", "false");
+        }
+    }
+    let topology = K8sAnywhereTopology::from_env();
+    topology.ensure_ready().await.context("topology init")?;
+
+    info!(
+        "[1/5] deploying Zitadel at https://{}",
+        opts.domain.zitadel_host()
+    );
+    deploy_zitadel(&opts.domain, &topology).await?;
+
+    info!("[2/5] waiting for Zitadel HTTPS to respond");
+    wait_for_zitadel_ready(&opts.domain).await?;
+
+    info!("[3/5] provisioning project '{PROJECT_NAME}', api app, CLI device-code app, and roles");
+    provision_zitadel_project(&opts.domain, &topology).await?;
+    let project_id = read_project_id()?;
+    let cli_client_id = read_cli_client_id();
+    info!(" → project_id = {project_id}");
+    if let Some(cid) = &cli_client_id {
+        info!(" → cli_client_id = {cid}");
+    } else {
+        log::warn!(
+            " → cli_client_id missing from cache; CLI login won't work until you reset the local zitadel cache"
+        );
+    }
+
+    info!("[4/5] generating issuer NKey + deploying NATS with auth_callout + WSS ingress");
+    let issuer_seed = ensure_issuer_seed(&topology).await?;
+    let issuer_kp = KeyPair::from_seed(&issuer_seed)
+        .map_err(|e| anyhow::anyhow!("invalid persisted issuer seed: {e}"))?;
+    let issuer_pubkey = issuer_kp.public_key();
+
+    NatsHelmChartScore::new(
+        NATS_RELEASE.to_string(),
+        FLEET_NAMESPACE.to_string(),
+        render_nats_values(
+            &opts.domain,
+            &issuer_pubkey,
+            &opts.nats_auth_pass,
+            &opts.nats_system_pass,
+        ),
+    )
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("NATS deploy")?;
+
+    info!(
+        "[5/5] deploying NatsAuthCalloutScore (image: {})",
+        opts.callout_image
+    );
+    NatsAuthCalloutScore::new(
+        CALLOUT_DEPLOYMENT_NAME,
+        FLEET_NAMESPACE,
+        format!("nats://{NATS_RELEASE}.{FLEET_NAMESPACE}.svc.cluster.local:4222"),
+        opts.domain.zitadel_issuer_url(),
+        // The aud the callout validates against is the project ID —
+        // Zitadel emits it in access tokens minted via the
+        // project-id-audience scope.
+        project_id.clone(),
+        NATS_AUTH_USER,
+        opts.nats_auth_pass.clone(),
+        issuer_seed,
+    )
+    .image(&opts.callout_image)
+    .target_account(NATS_ACCOUNT)
+    .admin_role(ADMIN_ROLE_KEY)
+    .device_role(DEVICE_ROLE_KEY)
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("callout deploy")?;
+
+    Ok(StagingHandles {
+        domain: opts.domain,
+        project_id,
+        issuer_pubkey,
+        callout_image: opts.callout_image,
+        cli_client_id,
+    })
+}
+
+fn read_cli_client_id() -> Option<String> {
+    ZitadelClientConfig::load()?
+        .client_id(CLI_APP_NAME)
+        .cloned()
+}
+
+async fn deploy_zitadel(domain: &FleetDomainConfig, topology: &K8sAnywhereTopology) -> Result<()> {
+    let z = ZitadelScore {
+        host: domain.zitadel_host(),
+        zitadel_version: "v4.12.1".to_string(),
+        // OKD HAProxy edge-terminates TLS for us, so the issuer URL
+        // is `https://zitadel.<base>` (port 443 implied) — leave
+        // external_port at None so Zitadel's emitted issuer omits the
+        // port, matching what clients reach.
+        external_secure: true,
+        external_port: None,
+        namespace: ZITADEL_NAMESPACE.to_string(),
+        ..Default::default()
+    };
+    z.interpret(&Inventory::autoload(), topology)
+        .await
+        .context("ZitadelScore")?;
+    Ok(())
+}
+
+async fn provision_zitadel_project(
+    domain: &FleetDomainConfig,
+    topology: &K8sAnywhereTopology,
+) -> Result<()> {
+    let setup = ZitadelSetupScore {
+        host: domain.zitadel_host(),
+        // Direct HTTPS through OKD's HAProxy ingress — operator runs
+        // anywhere with kubeconfig + DNS access. Defaults give
+        // `https://<host>` (port 443).
+        scheme: Default::default(),
+        port: None,
+        skip_tls: false,
+        endpoint: None,
+        admin_org_id: None,
+        namespace: ZITADEL_NAMESPACE.to_string(),
+        applications: vec![ZitadelApplication {
+            project_name: PROJECT_NAME.to_string(),
+            app_name: CLI_APP_NAME.to_string(),
+            // Device Code grant — the only browser-driven OIDC flow
+            // that fits a CLI tool: prints a verification URL + user
+            // code, polls for a token, no embedded web server / open
+            // listener required.
+            app_type: ZitadelAppType::DeviceCode,
+        }],
+        api_apps: vec![ZitadelApiApp {
+            project_name: PROJECT_NAME.to_string(),
+            app_name: API_APP_NAME.to_string(),
+        }],
+        roles: vec![
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: ADMIN_ROLE_KEY.to_string(),
+                display_name: "Fleet Admin".to_string(),
+                group: None,
+            },
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: DEVICE_ROLE_KEY.to_string(),
+                display_name: "Device".to_string(),
+                group: None,
+            },
+        ],
+        // No machine users provisioned here — `fleet_rpi_setup` mints
+        // them on demand per device, so the staging deploy stays
+        // device-count-agnostic.
+        machine_users: vec![],
+    };
+    setup
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("ZitadelSetupScore")?;
+    Ok(())
+}
+
+fn read_project_id() -> Result<String> {
+    let cfg = ZitadelClientConfig::load()
+        .context("ZitadelSetupScore did not produce a client config cache")?;
+    cfg.project_id_by_name(PROJECT_NAME)
+        .or(cfg.project_id.as_ref())
+        .context("project_id missing from ZitadelClientConfig cache")
+        .cloned()
+}
+
+/// Persist the callout's issuer NKey seed in a K8s secret so re-runs
+/// of the staging deploy don't invalidate previously-issued user JWTs
+/// already in flight on customer Pis.
+async fn ensure_issuer_seed(topology: &K8sAnywhereTopology) -> Result<String> {
+    use k8s_openapi::ByteString;
+    use k8s_openapi::api::core::v1::{Namespace, Secret};
+    use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
+    use std::collections::BTreeMap;
+
+    let k8s = topology
+        .k8s_client()
+        .await
+        .map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
+
+    if k8s
+        .get_resource::<Namespace>(FLEET_NAMESPACE, None)
+        .await?
+        .is_none()
+    {
+        let ns = Namespace {
+            metadata: ObjectMeta {
+                name: Some(FLEET_NAMESPACE.to_string()),
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+        k8s.create(&ns, None).await.ok();
+    }
+
+    if let Some(existing) = k8s
+        .get_resource::<Secret>(ISSUER_SEED_SECRET, Some(FLEET_NAMESPACE))
+        .await?
+        && let Some(data) = existing.data
+        && let Some(seed_bytes) = data.get("seed")
+    {
+        let seed = String::from_utf8(seed_bytes.0.clone())?;
+        return Ok(seed.trim().to_string());
+    }
+
+    let seed = KeyPair::new_account()
+        .seed()
+        .map_err(|e| anyhow::anyhow!("nkey seed: {e}"))?;
+    let mut data = BTreeMap::new();
+    data.insert("seed".to_string(), ByteString(seed.as_bytes().to_vec()));
+    let secret = Secret {
+        metadata: ObjectMeta {
+            name: Some(ISSUER_SEED_SECRET.to_string()),
+            namespace: Some(FLEET_NAMESPACE.to_string()),
+            ..Default::default()
+        },
+        data: Some(data),
+        type_: Some("Opaque".to_string()),
+        ..Default::default()
+    };
+    k8s.create(&secret, Some(FLEET_NAMESPACE)).await.ok();
+    Ok(seed)
+}
+
+// ---- NATS values -----------------------------------------------------------
+
+/// Render NATS Helm values for an OKD-flavored deployment with WSS
+/// ingress + auth callout + JetStream.
+///
+/// **Why WSS rather than plain NATS-on-TLS:** OKD's default ingress
+/// controller (HAProxy) is HTTP-aware and edge-terminates TLS. NATS
+/// over WebSocket goes through that ingress unchanged; native NATS
+/// TCP would require a TCP loadbalancer service or a passthrough
+/// Route, both of which are extra infra the customer's cluster may
+/// not have. WSS is also the default async-nats client transport on
+/// `wss://...` URLs — no special agent code needed.
+pub fn render_nats_values(
+    domain: &FleetDomainConfig,
+    issuer_pubkey: &str,
+    nats_auth_pass: &str,
+    nats_system_pass: &str,
+) -> String {
+    let auth_callout = render_auth_callout_block(issuer_pubkey, NATS_AUTH_USER, NATS_ACCOUNT);
+    let auth_callout_indented = auth_callout
+        .lines()
+        .enumerate()
+        .map(|(i, l)| {
+            if i == 0 {
+                l.to_string()
+            } else {
+                format!("    {l}")
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("\n");
+    format!(
+        r#"fullnameOverride: {nats_release}
+config:
+  cluster:
+    enabled: false
+  jetstream:
+    enabled: true
+    fileStorage:
+      enabled: true
+      size: 5Gi
+  websocket:
+    enabled: true
+    port: 8443
+    ingress:
+      enabled: true
+      className: openshift-default
+      pathType: Prefix
+      hosts:
+        - {nats_wss_host}
+      annotations:
+        # OKD HAProxy edge-terminates TLS — the chart's default Route
+        # generation needs `route.openshift.io/termination: edge` so
+        # the Route's TLS block is "edge", matching the cluster's wildcard
+        # cert behavior. Switch to `reencrypt` if you need TLS all the
+        # way to the NATS pod.
+        route.openshift.io/termination: edge
+        haproxy.router.openshift.io/timeout: "1h"
+  merge:
+    {auth_callout_indented}
+    accounts:
+      {nats_account}:
+        jetstream: enabled
+        users:
+          - user: "{auth_user}"
+            password: "{auth_pass}"
+      SYS:
+        users:
+          - user: "{sys_user}"
+            password: "{sys_pass}"
+    system_account: SYS
+service:
+  ports:
+    nats:
+      enabled: true
+"#,
+        nats_release = NATS_RELEASE,
+        nats_wss_host = domain.nats_wss_host(),
+        nats_account = NATS_ACCOUNT,
+        auth_user = NATS_AUTH_USER,
+        auth_pass = nats_auth_pass,
+        sys_user = NATS_SYSTEM_USER,
+        sys_pass = nats_system_pass,
+    )
+}
+
+// ---- readiness -------------------------------------------------------------
+
+async fn wait_for_zitadel_ready(domain: &FleetDomainConfig) -> Result<()> {
+    let issuer = domain.zitadel_issuer_url();
+    let well_known = format!("{issuer}/.well-known/openid-configuration");
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(5))
+        .build()?;
+    for attempt in 1..=180 {
+        match client.get(&well_known).send().await {
+            Ok(r) if r.status().is_success() => return Ok(()),
+            Ok(r) if attempt % 30 == 0 => {
+                info!("Zitadel HTTPS {} (attempt {attempt}/180)", r.status());
+            }
+            Err(e) if attempt % 30 == 0 => {
+                info!("Zitadel unreachable: {e} (attempt {attempt}/180)");
+            }
+            _ => {}
+        }
+        tokio::time::sleep(Duration::from_secs(2)).await;
+    }
+    anyhow::bail!("timed out waiting for Zitadel at {well_known}")
+}
+
+// ---- helpful printout ------------------------------------------------------
+
+impl StagingHandles {
+    /// Print the operator's "what to do next" panel after a successful
+    /// staging deploy. Pasted at the end of the binary's run.
+    pub fn print_next_steps(&self) {
+        let zitadel = self.domain.zitadel_issuer_url();
+        let nats = self.domain.nats_wss_url();
+        println!();
+        println!("============================================================");
+        println!(" STAGING DEPLOY COMPLETE");
+        println!("============================================================");
+        println!(" Base domain:      {}", self.domain.base_domain);
+        println!(" Zitadel:          {zitadel}");
+        println!(" NATS (WSS):       {nats}");
+        println!(" Project ID:       {}", self.project_id);
+        println!(" Callout image:    {}", self.callout_image);
+        println!(" Issuer pubkey:    {}", self.issuer_pubkey);
+        if let Some(cid) = &self.cli_client_id {
+            println!(" CLI client_id:    {cid}");
+            println!();
+            println!(" CLI SSO login (developer-side):");
+            println!();
+            println!("   cargo run -p example-fleet-sso-login -- \\");
+            println!("     --base-domain {} \\", self.domain.base_domain);
+            println!("     --client-id {cid}");
+        }
+        println!();
+        println!(" Onboard a Pi:");
+        println!();
+        println!("   PAT=$(kubectl -n zitadel get secret iam-admin-pat \\");
+        println!("       -o jsonpath='{{.data.pat}}' | base64 -d)");
+        println!();
+        println!("   cargo run -p example-fleet-rpi-setup -- \\");
+        println!("     --pi-host <PI_IP> \\");
+        println!("     --bootstrap-token \"$PAT\" \\");
+        println!("     --zitadel-issuer-url {zitadel} \\");
+        println!("     --zitadel-project-id {} \\", self.project_id);
+        println!("     --nats-url {nats} \\");
+        println!("     --agent-binary <path-to-aarch64-fleet-agent>");
+        println!();
+        println!("============================================================");
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn domain_config_derives_hostnames() {
+        let d = FleetDomainConfig::new("customer1.nationtech.io");
+        assert_eq!(d.zitadel_host(), "zitadel.customer1.nationtech.io");
+        assert_eq!(d.nats_wss_host(), "nats.customer1.nationtech.io");
+        assert_eq!(
+            d.zitadel_issuer_url(),
+            "https://zitadel.customer1.nationtech.io"
+        );
+        assert_eq!(d.nats_wss_url(), "wss://nats.customer1.nationtech.io/");
+    }
+
+    #[test]
+    fn nats_values_render_includes_wss_ingress_and_auth_callout() {
+        let d = FleetDomainConfig::new("acme.io");
+        let yaml = render_nats_values(&d, "ABCDEF", "auth-pass", "sys-pass");
+        // WSS plumbing.
+        assert!(yaml.contains("websocket:"));
+        assert!(yaml.contains("port: 8443"));
+        assert!(yaml.contains("nats.acme.io"));
+        // OKD edge-TLS annotations.
+        assert!(yaml.contains("openshift-default"));
+        assert!(yaml.contains("route.openshift.io/termination: edge"));
+        // Auth callout wired through with the issuer pubkey.
+        assert!(yaml.contains("auth_callout"));
+        assert!(yaml.contains("issuer: ABCDEF"));
+        assert!(yaml.contains("auth_users: [ auth ]"));
+        assert!(yaml.contains("system_account: SYS"));
+        // Account user.
+        assert!(yaml.contains("password: \"auth-pass\""));
+    }
+
+    #[test]
+    fn nats_values_inline_account_block_under_merge() {
+        // Prevent regressions where the auth_callout block leaks
+        // outside the `merge:` indentation level — chart expects it
+        // under config.merge.
+        let d = FleetDomainConfig::new("x.io");
+        let yaml = render_nats_values(&d, "K", "p", "s");
+        let idx_merge = yaml.find("\n  merge:\n").expect("merge block present");
+        let idx_callout = yaml.find("auth_callout:").expect("auth_callout present");
+        assert!(idx_callout > idx_merge, "auth_callout must follow merge:");
+    }
+}
--- a/examples/fleet_staging_deploy/src/main.rs
+++ b/examples/fleet_staging_deploy/src/main.rs
@@ -0,0 +1,71 @@
+//! `cargo run -p example-fleet-staging-deploy -- --base-domain customer1.nationtech.io ...`
+//!
+//! Operator-side, run-once-per-customer-instance harness. Brings up
+//! the central fleet platform services (Zitadel + NATS + auth callout)
+//! against an OKD/K8s cluster pointed to by `KUBECONFIG`. Prints the
+//! exact follow-up command the operator runs against a Pi to onboard
+//! the first device.
+//!
+//! See `src/lib.rs` for the architectural notes.
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use example_fleet_staging_deploy::{FleetDomainConfig, StagingDeployOpts, bring_up_staging};
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet-staging-deploy",
+    about = "Deploy Zitadel + NATS + auth callout onto an OKD cluster"
+)]
+struct Cli {
+    /// Base DNS domain. All cluster-visible services derive from this:
+    /// `zitadel.<base>`, `nats.<base>`. The customer's wildcard cert /
+    /// CoreDNS / DNS provider must already point this at the cluster.
+    #[arg(long, env = "FLEET_BASE_DOMAIN")]
+    base_domain: String,
+    /// kubeconfig context to deploy against. Defaults to the
+    /// kubeconfig's current-context. Set this when your kubeconfig
+    /// has multiple contexts and you don't want to rely on the
+    /// global current.
+    #[arg(long, env = "FLEET_KUBE_CONTEXT")]
+    kube_context: Option<String>,
+    /// Container image reference for the harmony-nats-callout binary.
+    /// The cluster pulls this; operator must have pushed it before
+    /// running the deploy. Defaults to a quay.io path that the
+    /// customer should override per their registry.
+    #[arg(
+        long,
+        env = "FLEET_CALLOUT_IMAGE",
+        default_value = "quay.io/nationtech/harmony-nats-callout:demo"
+    )]
+    callout_image: String,
+    /// Password for the NATS service-account user the callout uses on
+    /// its own NATS connection. Stored in a K8s secret + listed in
+    /// the chart's `accounts.DEVICES.users` (which bypass callout —
+    /// otherwise the callout would deadlock authenticating itself).
+    #[arg(long, env = "FLEET_NATS_AUTH_PASS")]
+    nats_auth_pass: String,
+    /// Password for the NATS SYS account (used for nats-box debugging
+    /// inside the cluster).
+    #[arg(long, env = "FLEET_NATS_SYSTEM_PASS")]
+    nats_system_pass: String,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let cli = Cli::parse();
+    let domain = FleetDomainConfig::new(cli.base_domain);
+
+    let handles = bring_up_staging(StagingDeployOpts {
+        domain,
+        kubeconfig_context: cli.kube_context,
+        callout_image: cli.callout_image,
+        nats_auth_pass: cli.nats_auth_pass,
+        nats_system_pass: cli.nats_system_pass,
+    })
+    .await
+    .context("staging deploy")?;
+
+    handles.print_next_steps();
+    Ok(())
+}
--- a/examples/fleet_staging_install/Cargo.toml
+++ b/examples/fleet_staging_install/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "example_fleet_staging_install"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "Production-shape fleet install: Zitadel + NATS + auth callout + operator on OKD"
+
+[[bin]]
+name = "fleet_staging_install"
+path = "src/main.rs"
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony_cli = { path = "../../harmony_cli" }
+harmony-k8s = { path = "../../harmony-k8s" }
+harmony-nats-callout = { path = "../../nats/callout" }
+harmony-fleet-deploy = { path = "../../fleet/harmony-fleet-deploy" }
+nkeys = "0.4"
+rand = "0.9"
+anyhow.workspace = true
+clap = { version = "4", features = ["derive", "env"] }
+tokio.workspace = true
+log.workspace = true
+env_logger.workspace = true
--- a/examples/fleet_staging_install/src/main.rs
+++ b/examples/fleet_staging_install/src/main.rs
@@ -0,0 +1,433 @@
+//! Production-shape fleet install for OKD (or any cluster with the
+//! same capabilities). Composes:
+//!
+//! 1. Zitadel + Postgres helm install in `--zitadel-namespace`,
+//!    edge-TLS Route at `sso-staging.<base>` via cert-manager.
+//! 2. ZitadelSetupScore in the same call so we have the
+//!    `fleet-operator` machine key BEFORE the operator pod starts.
+//! 3. Single-instance NATS (JetStream) in `--fleet-namespace` with
+//!    the auth_callout block wired to the callout's issuer NKey
+//!    pubkey + WebSocket listener (no_tls — Route owns TLS).
+//! 4. NATS WebSocket Route at `nats-fleet-staging.<base>`,
+//!    edge-TLS, cert-manager-managed cert.
+//! 5. NatsAuthCalloutScore deployment (Secret-based env vars only,
+//!    no volume mounts — OKD restricted-v2 SCC compat).
+//! 6. FleetOperatorScore with credentials TOML inlining the
+//!    `fleet-operator` JSON keyfile (env-var-from-Secret only).
+//!
+//! One required CLI flag — `--base-domain` — drives every public
+//! hostname. Per-cluster overrides for the cluster issuer name and
+//! image refs follow.
+//!
+//! Usage:
+//!
+//! ```text
+//! KUBECONFIG=$ADMIN_KUBECONFIG cargo run -p example_fleet_staging_install -- \
+//!   --base-domain cb1.nationtech.io \
+//!   --operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev \
+//!   --callout-image hub.nationtech.io/harmony/harmony-nats-callout:dev
+//! ```
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use harmony::inventory::Inventory;
+use harmony::modules::nats::capability::NatsCluster;
+use harmony::modules::nats::score_nats_k8s::{AuthCalloutCfg, NatsK8sScore, WebSocketRouteCfg};
+use harmony::modules::nats_auth_callout::NatsAuthCalloutScore;
+use harmony::modules::zitadel::{
+    MachineKeyType, ZitadelApiApp, ZitadelAppType, ZitadelApplication, ZitadelClientConfig,
+    ZitadelMachineUser, ZitadelRole, ZitadelScore, ZitadelSetupScore,
+};
+use harmony::score::Score;
+use harmony::topology::{K8sAnywhereTopology, Topology};
+use harmony_fleet_deploy::{FleetOperatorScore, OperatorCredentials};
+use harmony_k8s::KubernetesDistribution;
+use nkeys::KeyPair;
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet_staging_install",
+    about = "Install fleet staging stack (Zitadel + NATS + callout + operator) on OKD"
+)]
+struct Cli {
+    /// Cluster's public base domain. Hostnames are derived from it:
+    ///   sso-staging.<base>            ← Zitadel
+    ///   nats-fleet-staging.<base>     ← NATS WebSocket
+    ///
+    /// To deploy on a different cluster, change this and re-run.
+    #[arg(long)]
+    base_domain: String,
+
+    /// cert-manager `ClusterIssuer` name. Drives the
+    /// `cert-manager.io/cluster-issuer` annotation on the Zitadel
+    /// and NATS Routes. Override per cluster if your operator uses
+    /// a different issuer name.
+    #[arg(long, default_value = "letsencrypt-prod")]
+    cluster_issuer: String,
+
+    /// Namespace for NATS, callout, operator.
+    #[arg(long, default_value = "fleet-staging")]
+    fleet_namespace: String,
+
+    /// Namespace for Zitadel + Postgres.
+    #[arg(long, default_value = "zitadel-staging")]
+    zitadel_namespace: String,
+
+    /// Operator container image (`repository:tag`). Public on
+    /// hub.nationtech.io for the demo; ImagePullSecret for that
+    /// registry must already be present in `--fleet-namespace`.
+    #[arg(long)]
+    operator_image: String,
+
+    /// Auth callout container image (`repository:tag`).
+    #[arg(long)]
+    callout_image: String,
+
+    /// NATS account name auth-callout-issued users land in. Must
+    /// match the NATS Helm `auth_callout.account` field. Default
+    /// `FLEET` matches the rest of the staging conventions.
+    #[arg(long, default_value = "FLEET")]
+    nats_account: String,
+
+    /// Zitadel chart version pin.
+    #[arg(long, default_value = "v4.12.1")]
+    zitadel_version: String,
+
+    /// Project name created inside Zitadel for fleet auth.
+    #[arg(long, default_value = "fleet")]
+    project_name: String,
+
+    /// Role name granting full admin (operator + manual ops). The
+    /// callout maps this role to `pub/sub: [">"]`.
+    #[arg(long, default_value = "fleet-admin")]
+    admin_role: String,
+
+    /// Role name granting per-device scoped permissions.
+    #[arg(long, default_value = "device")]
+    device_role: String,
+
+    /// Username of the operator's Zitadel machine user. Distinct
+    /// from `fleet-ops` (manual admin tooling) for audit trail.
+    #[arg(long, default_value = "fleet-operator")]
+    operator_username: String,
+
+    /// Username of the manual-admin Zitadel machine user (the one
+    /// you mint tokens with from your laptop).
+    #[arg(long, default_value = "fleet-ops")]
+    admin_username: String,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init()
+        .ok();
+
+    let cli = Cli::parse();
+    let topology = K8sAnywhereTopology::from_env();
+    topology.ensure_ready().await?;
+
+    let zitadel_host = format!("sso-stg.{}", cli.base_domain);
+    let nats_ws_host = format!("nats-fleet-stg.{}", cli.base_domain);
+
+    // ---- 1. Zitadel helm install ----------------------------------------
+    let zitadel = ZitadelScore {
+        host: zitadel_host.clone(),
+        zitadel_version: cli.zitadel_version.clone(),
+        external_secure: true,
+        external_port: None,
+        namespace: cli.zitadel_namespace.clone(),
+        cluster_issuer: cli.cluster_issuer.clone(),
+    };
+    log::info!(
+        "[1/6] Zitadel helm: ns={} host={}",
+        cli.zitadel_namespace,
+        zitadel_host
+    );
+    zitadel
+        .interpret(&Inventory::empty(), &topology)
+        .await
+        .context("Zitadel helm install")?;
+
+    // ---- 2. ZitadelSetupScore: project + roles + machine users ----------
+    // Run this BEFORE building the operator score so we have the
+    // `fleet-operator` machine key in hand when filling
+    // OperatorCredentials. The Score caches keys to
+    // ZitadelClientConfig on disk; we read them back here.
+    log::info!(
+        "[2/6] Zitadel setup: project={} admin={} operator={}",
+        cli.project_name,
+        cli.admin_username,
+        cli.operator_username
+    );
+    let api_app_name = "nats";
+    let cli_app_name = "harmony-cli";
+    let zitadel_setup = ZitadelSetupScore {
+        host: zitadel_host.clone(),
+        scheme: Default::default(),
+        port: None,
+        skip_tls: false,
+        endpoint: None,
+        admin_org_id: None,
+        namespace: cli.zitadel_namespace.clone(),
+        // Device-code OIDC app for human admin login from
+        // `fleet_device_enroll`'s SSO flow. Operators sign in here
+        // with their personal Zitadel account; their resulting
+        // access token is what `mint_device_credentials` uses to
+        // create per-device users + keys. The numeric `client_id`
+        // generated by Zitadel for this app is what gets passed to
+        // `--admin-oidc-client-id`; we read it back from the
+        // ZitadelClientConfig cache below and print it in the
+        // success banner.
+        applications: vec![ZitadelApplication {
+            project_name: cli.project_name.clone(),
+            app_name: cli_app_name.to_string(),
+            app_type: ZitadelAppType::DeviceCode,
+        }],
+        api_apps: vec![ZitadelApiApp {
+            project_name: cli.project_name.clone(),
+            app_name: api_app_name.to_string(),
+        }],
+        roles: vec![
+            ZitadelRole {
+                project_name: cli.project_name.clone(),
+                key: cli.admin_role.clone(),
+                display_name: "Fleet Admin".to_string(),
+                group: None,
+            },
+            ZitadelRole {
+                project_name: cli.project_name.clone(),
+                key: cli.device_role.clone(),
+                display_name: "Device".to_string(),
+                group: None,
+            },
+        ],
+        machine_users: vec![
+            ZitadelMachineUser {
+                username: cli.admin_username.clone(),
+                name: "Fleet Operations".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(cli.project_name.clone()),
+                grant_roles: vec![cli.admin_role.clone()],
+            },
+            ZitadelMachineUser {
+                username: cli.operator_username.clone(),
+                name: "Fleet Operator (in-cluster)".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(cli.project_name.clone()),
+                grant_roles: vec![cli.admin_role.clone()],
+            },
+        ],
+    };
+    zitadel_setup
+        .interpret(&Inventory::empty(), &topology)
+        .await
+        .context("Zitadel setup (project + roles + machine users)")?;
+
+    // Read back the project_id + operator key from cache.
+    let zcfg = ZitadelClientConfig::load()
+        .context("ZitadelSetupScore did not produce a client config cache")?;
+    let project_id = zcfg
+        .project_id_by_name(&cli.project_name)
+        .or(zcfg.project_id.as_ref())
+        .context("project_id missing from cache after setup")?
+        .clone();
+    let operator_machine_key = zcfg
+        .machine_key(&cli.operator_username)
+        .with_context(|| {
+            format!(
+                "machine key for {} missing from cache after setup",
+                cli.operator_username
+            )
+        })?
+        .clone();
+    let cli_client_id = zcfg
+        .client_id(cli_app_name)
+        .with_context(|| {
+            format!(
+                "OIDC client_id for app '{cli_app_name}' missing from cache — \
+                 ZitadelSetupScore should have created the app and populated \
+                 ZitadelClientConfig.apps"
+            )
+        })?
+        .clone();
+    log::info!("[2/6] project_id resolved: {project_id}");
+    log::info!("[2/6] device-code client_id for '{cli_app_name}' resolved: {cli_client_id}");
+
+    // ---- 3. Issuer NKey + auth callout pieces ---------------------------
+    // The callout signs user JWTs with this account NKey. NATS server
+    // is configured with the matching pubkey via the auth_callout
+    // block in the helm values rendered by NatsK8sScore.
+    let issuer_kp = KeyPair::new_account();
+    let issuer_seed = issuer_kp
+        .seed()
+        .map_err(|e| anyhow::anyhow!("issuer NKey seed: {e}"))?;
+    let issuer_pubkey = issuer_kp.public_key();
+    let nats_auth_user = "auth";
+    let nats_auth_pass = generate_alphanum(24);
+
+    // ---- 4. NATS install ------------------------------------------------
+    let nats_release = "fleet-nats";
+    log::info!(
+        "[3/6] NATS install: ns={} release={} ws={}",
+        cli.fleet_namespace,
+        nats_release,
+        nats_ws_host
+    );
+    let nats_cluster = NatsCluster {
+        namespace: cli.fleet_namespace.clone(),
+        // `domain` is unused in single-instance mode (gateway off).
+        // Kept here for the legacy supercluster code path which the
+        // staging install doesn't take.
+        domain: cli.base_domain.clone(),
+        replicas: 1,
+        name: nats_release.to_string(),
+        gateway_advertise: String::new(),
+        dns_name: nats_ws_host.clone(),
+        // Static-string fields the NatsCluster shape requires; only
+        // referenced when `gateway` is Some, which it isn't here.
+        supercluster_ca_secret_name: "fleet-nats-supercluster-ca",
+        tls_cert_name: "fleet-nats-tls",
+        jetstream_enabled: "true",
+    };
+    let nats = NatsK8sScore {
+        distribution: KubernetesDistribution::OpenshiftFamily,
+        cluster: nats_cluster,
+        peers: None,
+        ca_bundle: None,
+        gateway: None, // single-instance — drop the gateway block
+        auth_callout: Some(AuthCalloutCfg {
+            issuer_pubkey: issuer_pubkey.clone(),
+            auth_user: nats_auth_user.to_string(),
+            auth_pass: nats_auth_pass.clone(),
+            account: cli.nats_account.clone(),
+        }),
+        websocket: Some(WebSocketRouteCfg {
+            host: nats_ws_host.clone(),
+            cluster_issuer: cli.cluster_issuer.clone(),
+        }),
+    };
+    nats.interpret(&Inventory::empty(), &topology)
+        .await
+        .context("NATS install (single-instance + auth_callout + WS Route)")?;
+
+    // ---- 5. Auth callout deployment -------------------------------------
+    log::info!(
+        "[4/6] Auth callout: image={} project_id={}",
+        cli.callout_image,
+        project_id
+    );
+    let mut callout = NatsAuthCalloutScore::new(
+        "fleet-callout",
+        &cli.fleet_namespace,
+        format!(
+            "nats://{nats_release}.{}.svc.cluster.local:4222",
+            cli.fleet_namespace
+        ),
+        format!("https://{zitadel_host}"),
+        project_id.clone(),
+        nats_auth_user,
+        &nats_auth_pass,
+        &issuer_seed,
+    )
+    .image(&cli.callout_image)
+    .target_account(&cli.nats_account)
+    .admin_role(&cli.admin_role)
+    .device_role(&cli.device_role)
+    .danger_accept_invalid_certs(false);
+    callout.device_id_claim = "client_id".to_string();
+    callout.device_id_prefix_strip = "device-".to_string();
+    callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles");
+    callout
+        .interpret(&Inventory::empty(), &topology)
+        .await
+        .context("auth callout deploy")?;
+
+    // ---- 6. Operator deployment with credentials ------------------------
+    log::info!("[5/6] Operator: image={}", cli.operator_image);
+    // `key_json` MUST use TOML literal multi-line strings (`'''...'''`),
+    // not basic multi-line (`"""..."""`). Basic strings interpret
+    // backslash escapes, which corrupts the JSON keyfile: every `\n`
+    // inside the embedded RSA private key gets expanded to a literal
+    // newline (0x0A) before JSON parsing sees it, and JSON disallows
+    // raw control chars inside strings ("control character found while
+    // parsing a string"). Literal strings preserve `\n` as-is so the
+    // downstream JSON parser interprets it as an escape and decodes
+    // the multi-line PEM correctly.
+    let credentials_toml = format!(
+        r#"type = "zitadel-jwt"
+oidc_issuer_url = "https://{zitadel_host}"
+audience = "{project_id}"
+key_json = '''{operator_key}'''
+"#,
+        zitadel_host = zitadel_host,
+        project_id = project_id,
+        operator_key = operator_machine_key,
+    );
+    let mut operator = FleetOperatorScore::new()
+        .namespace(&cli.fleet_namespace)
+        .release_name("harmony-fleet-operator")
+        .image(&cli.operator_image)
+        .image_pull_policy("Always")
+        .nats_url(format!(
+            "nats://{nats_release}.{}.svc.cluster.local:4222",
+            cli.fleet_namespace
+        ))
+        .log_level("info,kube_runtime=warn");
+    operator.credentials = Some(OperatorCredentials { credentials_toml });
+    operator
+        .interpret(&Inventory::empty(), &topology)
+        .await
+        .context("operator deploy")?;
+
+    log::info!("[6/6] Stack installed.");
+    println!("\n=== fleet-staging install complete ===");
+    println!("Zitadel:           https://{zitadel_host}/");
+    println!("NATS WS public:    wss://{nats_ws_host}/");
+    println!(
+        "NATS in-cluster:   nats://{nats_release}.{}.svc.cluster.local:4222",
+        cli.fleet_namespace
+    );
+    println!(
+        "Operator:          oc -n {} get deploy/harmony-fleet-operator",
+        cli.fleet_namespace
+    );
+    println!(
+        "Auth callout:      oc -n {} get deploy/fleet-callout",
+        cli.fleet_namespace
+    );
+    println!("Project id:        {project_id}");
+    println!(
+        "Admin user:        {} (machine key in ~/.local/share/harmony/zitadel/client-config.json)",
+        cli.admin_username
+    );
+    println!(
+        "Operator user:     {} (machine key embedded in operator's Secret)",
+        cli.operator_username
+    );
+    println!("SSO client_id:     {cli_client_id}  (app '{cli_app_name}', device-code grant)");
+    println!();
+    println!("To enroll a device, pass the SSO client_id explicitly:");
+    println!(
+        "  fleet_device_enroll \\\n    \
+         --target ssh://<user>@<device> \\\n    \
+         --issuer-url https://{zitadel_host} \\\n    \
+         --audience {project_id} \\\n    \
+         --nats-url wss://{nats_ws_host} \\\n    \
+         --admin-oidc-client-id {cli_client_id} \\\n    \
+         --agent-binary <path>"
+    );
+
+    Ok(())
+}
+
+fn generate_alphanum(len: usize) -> String {
+    use rand::Rng;
+    const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+    let mut rng = rand::rng();
+    (0..len)
+        .map(|_| CHARSET[rng.random_range(0..CHARSET.len())] as char)
+        .collect()
+}
--- a/examples/fleet_vm_setup/Cargo.toml
+++ b/examples/fleet_vm_setup/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "example_fleet_vm_setup"
+version.workspace = true
+edition = "2024"
+license.workspace = true
+
+[[bin]]
+name = "fleet_vm_setup"
+path = "src/main.rs"
+
+[dependencies]
+harmony = { path = "../../harmony", features = ["kvm"] }
+harmony_types = { path = "../../harmony_types" }
+tokio.workspace = true
+log.workspace = true
+env_logger.workspace = true
+anyhow.workspace = true
+clap.workspace = true
--- a/examples/fleet_vm_setup/README.md
+++ b/examples/fleet_vm_setup/README.md
@@ -0,0 +1,69 @@
+# example_iot_vm_setup
+
+End-to-end driver for the IoT walking-skeleton VM-as-device flow. Runs two
+Harmony Scores in sequence:
+
+1. **`KvmVmScore`** — provision a libvirt VM from an Ubuntu 24.04 cloud
+   image with a cloud-init seed ISO that authorizes one SSH key. Returns
+   the booted VM's IP.
+2. **`FleetDeviceSetupScore`** — SSH into the VM (via the Ansible-backed
+   `HostConfigurationProvider`) and install podman + the `fleet-agent`
+   binary, drop the TOML config, bring up the systemd unit.
+
+After a successful run, the VM is a fleet member reporting to NATS under
+the `--device-id` you chose, carrying the `--group` label you passed.
+
+## One-time setup
+
+```bash
+WORK=/var/tmp/harmony-iot-smoke
+mkdir -p "$WORK/ssh"
+
+# 1. Ubuntu 24.04 cloud image (~700 MB) — cached between runs.
+curl -o "$WORK/ubuntu-24.04-server-cloudimg-amd64.img" \
+     https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img
+
+# 2. SSH keypair the VM will trust.
+ssh-keygen -t ed25519 -N '' -f "$WORK/ssh/id_ed25519"
+
+# 3. Runtime deps — Harmony self-installs Ansible into a managed venv
+#    under $HARMONY_DATA_DIR/ansible-venv on first run, so you only need
+#    python3 + venv on the runner. No system-wide `ansible` needed.
+# On Arch:
+#   sudo pacman -S libvirt qemu-full xorriso python
+# On Debian/Ubuntu:
+#   sudo apt install libvirt-daemon-system qemu-kvm xorriso python3 python3-venv
+
+# 4. libvirt default network.
+sudo virsh net-start default
+sudo virsh net-autostart default
+```
+
+## Run
+
+```bash
+cargo build -p fleet-agent-v0
+
+cargo run -p example_iot_vm_setup -- \
+  --base-image /var/tmp/harmony-iot-smoke/ubuntu-24.04-server-cloudimg-amd64.img \
+  --ssh-pubkey /var/tmp/harmony-iot-smoke/ssh/id_ed25519.pub \
+  --ssh-privkey /var/tmp/harmony-iot-smoke/ssh/id_ed25519 \
+  --work-dir /var/tmp/harmony-iot-smoke \
+  --agent-binary target/debug/fleet-agent-v0 \
+  --nats-url nats://192.168.122.1:4222
+```
+
+## Changing groups
+
+Re-running with a different `--group` rewrites
+`/etc/fleet-agent/config.toml` on the VM and restarts the agent. The VM
+itself is untouched.
+
+```bash
+cargo run -p example_iot_vm_setup -- ... --group group-b
+```
+
+## Full end-to-end via smoke test
+
+See `fleet/scripts/smoke-a3.sh` — stands up NATS in a podman container,
+runs this example, asserts the agent's status lands in NATS.
--- a/examples/fleet_vm_setup/src/main.rs
+++ b/examples/fleet_vm_setup/src/main.rs
@@ -0,0 +1,284 @@
+//! End-to-end driver for the IoT walking-skeleton VM-as-device flow.
+//!
+//! Runs two Scores back-to-back:
+//!   1. `ProvisionVmScore` — bound to the generic `VirtualMachineHost`
+//!      capability. Here we satisfy it with `KvmVirtualMachineHost`
+//!      (libvirt). Swapping to VMware/Proxmox/cloud would be a
+//!      different topology injection with the same Score code.
+//!   2. `FleetDeviceSetupScore` — SSHes into the booted VM and installs
+//!      podman + fleet-agent via the split Linux-host capabilities.
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use harmony::inventory::Inventory;
+use harmony::modules::fleet::{
+    FleetDeviceSetupConfig, FleetDeviceSetupScore, ProvisionVmScore,
+    check_fleet_smoke_preflight_for_arch, ensure_fleet_ssh_keypair,
+};
+use harmony::modules::kvm::KvmVirtualMachineHost;
+use harmony::modules::kvm::config::init_executor;
+use harmony::modules::linux::{LinuxHostTopology, SshCredentials};
+use harmony::topology::{VirtualMachineSpec, VmArchitecture, VmFirstBootConfig};
+use harmony_types::id::Id;
+use std::path::PathBuf;
+
+#[derive(Parser, Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
+enum CliArch {
+    /// Native KVM on x86_64 hosts.
+    X86_64,
+    /// Aarch64 guest. Runs on native KVM on arm64 hosts and under
+    /// qemu-system-aarch64 TCG emulation on x86_64 hosts (slower).
+    Aarch64,
+}
+
+impl From<CliArch> for VmArchitecture {
+    fn from(a: CliArch) -> Self {
+        match a {
+            CliArch::X86_64 => VmArchitecture::X86_64,
+            CliArch::Aarch64 => VmArchitecture::Aarch64,
+        }
+    }
+}
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet_vm_setup",
+    about = "Provision one VM + onboard it into the IoT fleet"
+)]
+struct Cli {
+    /// Guest CPU architecture. Selects the cloud image, qemu
+    /// emulator, and firmware model.
+    #[arg(long, value_enum, default_value_t = CliArch::X86_64)]
+    arch: CliArch,
+    /// libvirt domain name for the VM.
+    #[arg(long, default_value = "fleet-vm-01")]
+    vm_name: String,
+    /// Device id the agent will announce to NATS. Defaults to a
+    /// fresh `Id` (hex timestamp + random suffix).
+    #[arg(long)]
+    device_id: Option<String>,
+    /// Routing labels to write into the agent's TOML config.
+    /// Comma-separated list of `key=value` pairs. Published in every
+    /// DeviceInfo heartbeat; the operator resolves Deployment
+    /// `spec.targetSelector` against this map. At least one label
+    /// is required so the device is targetable — the default
+    /// `group=group-a` satisfies that.
+    #[arg(long, default_value = "group=group-a")]
+    labels: String,
+    /// libvirt network name to attach the VM to.
+    #[arg(long, default_value = "default")]
+    network: String,
+    /// Admin username created on first boot.
+    #[arg(long, default_value = "fleet-admin")]
+    admin_user: String,
+    /// Optional plaintext password for the admin user. Enables SSH
+    /// password auth on the guest — intended for interactive
+    /// debugging / reliability-testing sessions where the operator
+    /// wants to break things on purpose. Leave unset for key-only
+    /// auth (production default).
+    #[arg(long, env = "FLEET_VM_ADMIN_PASSWORD")]
+    admin_password: Option<String>,
+    /// Path to the cross-compiled fleet-agent binary.
+    /// Required unless `--bootstrap-only` is set.
+    #[arg(long)]
+    agent_binary: Option<PathBuf>,
+    /// NATS URL the agent should connect to.
+    #[arg(long, default_value = "nats://192.168.122.1:4222")]
+    nats_url: String,
+    #[arg(long, default_value = "smoke")]
+    nats_user: String,
+    #[arg(long, default_value = "smoke")]
+    nats_pass: String,
+    /// Only run the VM-provisioning step; skip device setup.
+    #[arg(long)]
+    only_vm: bool,
+    /// Run preflight + asset bootstrap (ansible venv, cloud image,
+    /// SSH key, libvirt pool) and exit.
+    #[arg(long)]
+    bootstrap_only: bool,
+    /// Virtual disk size in GiB. The stock Ubuntu cloud image has
+    /// only ~2 GiB of root — resized on first boot by
+    /// cloud-initramfs-growroot. Bump this to 16 GiB by default so
+    /// podman can sideload a couple of container images without
+    /// running out of space.
+    #[arg(long, default_value_t = 16)]
+    disk_size_gb: u32,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    env_logger::init();
+    let cli = Cli::parse();
+    let arch: VmArchitecture = cli.arch.into();
+
+    check_fleet_smoke_preflight_for_arch(arch)
+        .await
+        .map_err(|e| anyhow::anyhow!("{e}"))?;
+
+    if cli.bootstrap_only {
+        harmony::modules::linux::ensure_ansible_venv()
+            .await
+            .map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?;
+        harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch)
+            .await
+            .map_err(|e| anyhow::anyhow!("cloud image: {e}"))?;
+        ensure_fleet_ssh_keypair()
+            .await
+            .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
+        harmony::modules::fleet::ensure_harmony_fleet_pool()
+            .await
+            .map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
+        println!("bootstrap complete");
+        return Ok(());
+    }
+
+    // --- Step 1: provision the VM ---
+    let base_image = harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch)
+        .await
+        .map_err(|e| anyhow::anyhow!("cloud image: {e}"))?;
+    let pool = harmony::modules::fleet::ensure_harmony_fleet_pool()
+        .await
+        .map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
+    let ssh = ensure_fleet_ssh_keypair()
+        .await
+        .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
+    let authorized_key = harmony::modules::fleet::read_public_key(&ssh)
+        .await
+        .map_err(|e| anyhow::anyhow!("read ssh pubkey: {e}"))?;
+
+    let executor = init_executor().map_err(|e| anyhow::anyhow!("KVM init: {e}"))?;
+    let vm_host = KvmVirtualMachineHost::new(
+        "kvm-local",
+        executor,
+        pool.name.clone(),
+        pool.path.clone(),
+        base_image,
+    );
+
+    let vm_score = ProvisionVmScore {
+        spec: VirtualMachineSpec {
+            name: cli.vm_name.clone(),
+            architecture: arch,
+            cpus: 2,
+            memory_mib: 2048,
+            disk_size_gb: Some(cli.disk_size_gb),
+            network: cli.network.clone(),
+            first_boot: Some(VmFirstBootConfig {
+                hostname: Some(cli.vm_name.clone()),
+                admin_user: Some(cli.admin_user.clone()),
+                authorized_keys: vec![authorized_key],
+                admin_password: cli.admin_password.clone(),
+            }),
+        },
+    };
+    let vm_ip = run_vm_score(&vm_score, &vm_host).await?;
+    println!("VM '{}' up at {vm_ip}", cli.vm_name);
+
+    if cli.only_vm {
+        return Ok(());
+    }
+
+    // --- Step 2: onboard the VM into the fleet ---
+    let agent_binary = cli
+        .agent_binary
+        .clone()
+        .context("--agent-binary is required (e.g. target/release/fleet-agent-v0)")?;
+    let device_id = cli
+        .device_id
+        .clone()
+        .map(Id::from)
+        .unwrap_or_else(Id::default);
+
+    let linux_topology = LinuxHostTopology::new(
+        format!("linux-{}", cli.vm_name),
+        vm_ip.parse().context("VM IP is not a valid IP address")?,
+        SshCredentials {
+            user: cli.admin_user.clone(),
+            private_key_path: ssh.private_key.clone(),
+            remote_python: Some("/usr/bin/python3".to_string()),
+            sudo_password: None,
+        },
+    );
+
+    let labels = parse_labels(&cli.labels)?;
+    let labels_display = labels
+        .iter()
+        .map(|(k, v)| format!("{k}={v}"))
+        .collect::<Vec<_>>()
+        .join(",");
+
+    let setup_score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig {
+        device_id: device_id.clone(),
+        labels,
+        nats_urls: vec![cli.nats_url.clone()],
+        // VM smoke harness keeps shared-creds for v0; the customer-
+        // facing Pi flow uses Zitadel JWT (see fleet_rpi_setup).
+        auth: harmony::modules::fleet::FleetDeviceAuth::TomlShared {
+            nats_user: cli.nats_user.clone(),
+            nats_pass: cli.nats_pass.clone(),
+        },
+        agent_binary_path: agent_binary,
+        hosts_entries: vec![],
+    });
+
+    run_setup_score(&setup_score, &linux_topology).await?;
+    println!("device '{device_id}' ({labels_display}) onboarded via {vm_ip}");
+    Ok(())
+}
+
+/// Parse `key=value,key=value` into a BTreeMap. Errors on any
+/// malformed chunk, empty keys/values, or an empty map overall —
+/// a device with no labels is practically untargetable, so we'd
+/// rather fail at the CLI than silently onboard a ghost.
+fn parse_labels(raw: &str) -> anyhow::Result<std::collections::BTreeMap<String, String>> {
+    let mut out = std::collections::BTreeMap::new();
+    for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) {
+        let (k, v) = piece
+            .split_once('=')
+            .ok_or_else(|| anyhow::anyhow!("label chunk '{piece}' missing '='"))?;
+        let k = k.trim();
+        let v = v.trim();
+        if k.is_empty() || v.is_empty() {
+            anyhow::bail!("label chunk '{piece}' has empty key or value");
+        }
+        out.insert(k.to_string(), v.to_string());
+    }
+    if out.is_empty() {
+        anyhow::bail!("--labels must include at least one key=value pair");
+    }
+    Ok(out)
+}
+
+async fn run_vm_score(
+    score: &ProvisionVmScore,
+    topology: &KvmVirtualMachineHost,
+) -> Result<String> {
+    use harmony::score::Score;
+    let inventory = Inventory::empty();
+    let interpret = Score::<KvmVirtualMachineHost>::create_interpret(score);
+    let outcome = interpret
+        .execute(&inventory, topology)
+        .await
+        .map_err(|e| anyhow::anyhow!("ProvisionVmScore execute: {e}"))?;
+    for d in &outcome.details {
+        if let Some(v) = d.strip_prefix("ip=") {
+            return Ok(v.to_string());
+        }
+    }
+    anyhow::bail!("ProvisionVmScore finished without reporting an IP: {outcome:?}")
+}
+
+async fn run_setup_score(
+    score: &FleetDeviceSetupScore,
+    topology: &LinuxHostTopology,
+) -> Result<()> {
+    use harmony::score::Score;
+    let inventory = Inventory::empty();
+    let interpret = Score::<LinuxHostTopology>::create_interpret(score);
+    let outcome = interpret
+        .execute(&inventory, topology)
+        .await
+        .map_err(|e| anyhow::anyhow!("FleetDeviceSetupScore execute: {e}"))?;
+    println!("setup: {} ({:?})", outcome.message, outcome.details);
+    Ok(())
+}
--- a/examples/harmony_apply_deployment/Cargo.toml
+++ b/examples/harmony_apply_deployment/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "example_harmony_apply_deployment"
+version.workspace = true
+edition = "2024"
+license.workspace = true
+
+[[bin]]
+name = "harmony_apply_deployment"
+path = "src/main.rs"
+
+[dependencies]
+harmony = { path = "../../harmony", default-features = false, features = ["podman"] }
+kube = { workspace = true, features = ["runtime", "derive"] }
+k8s-openapi = { workspace = true }
+serde_json.workspace = true
+tokio.workspace = true
+anyhow.workspace = true
+clap.workspace = true
--- a/examples/harmony_apply_deployment/src/main.rs
+++ b/examples/harmony_apply_deployment/src/main.rs
@@ -0,0 +1,239 @@
+//! Typed-Rust applier for the harmony fleet `Deployment` CR.
+//!
+//! Builds a `Deployment` CR via the typed `DeploymentSpec` +
+//! `PodmanV0Score` + `kube::Api`, then either applies it directly
+//! through the kube client or prints it to stdout so the user can
+//! pipe into `kubectl apply -f -`.
+//!
+//! The CRD is domain-agnostic — it's "declarative reconcile intent
+//! for a set of devices matched by label selector," which is the
+//! same shape whether the fleet is Pi podman, OKD clusters, or
+//! KVM VMs. The name `harmony_apply_deployment` reflects that
+//! (not `iot_`-anything), in line with the review call to position
+//! the operator as a generic fleet/reconcile tool.
+//!
+//! The CRD types live in `harmony::modules::fleet::operator`; the score types
+//! live in `harmony::modules::podman` (PodmanV0 being the first
+//! reconciler variant — future variants drop in alongside).
+//!
+//! Typical demo-driver usage:
+//!
+//!     # apply an nginx deployment
+//!     cargo run -q -p example_harmony_apply_deployment -- \
+//!         --target-device fleet-smoke-vm-arm \
+//!         --image nginx:latest
+//!
+//!     # print the CR JSON (lets the user kubectl-apply it manually)
+//!     cargo run -q -p example_harmony_apply_deployment -- \
+//!         --target-device fleet-smoke-vm-arm \
+//!         --image nginx:latest --print | kubectl apply -f -
+//!
+//!     # upgrade the same deployment to a newer image
+//!     cargo run -q -p example_harmony_apply_deployment -- \
+//!         --target-device fleet-smoke-vm-arm \
+//!         --image nginx:1.26
+//!
+//!     # delete the deployment
+//!     cargo run -q -p example_harmony_apply_deployment -- --delete
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use harmony::modules::fleet::operator::crd::{
+    Deployment, DeploymentSpec, Rollout, RolloutStrategy,
+};
+use harmony::modules::podman::{PodmanService, PodmanV0Score, ReconcileScore};
+use harmony::topology::{EnvVar, RestartPolicy, VolumeMount};
+use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
+use kube::Client;
+use kube::api::{Api, DeleteParams, Patch, PatchParams};
+use std::collections::BTreeMap;
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "harmony_apply_deployment",
+    about = "Build + apply a harmony fleet Deployment CR from typed Rust (no yaml)"
+)]
+struct Cli {
+    /// Kubernetes namespace for the Deployment CR.
+    #[arg(long, default_value = "fleet-demo")]
+    namespace: String,
+    /// Deployment CR name. Also used as the KV key suffix and
+    /// podman container name on the device.
+    #[arg(long, default_value = "hello-world")]
+    name: String,
+    /// Shortcut: if set, picks a single device by id. Shorthand for
+    /// `--selector device-id=<target_device>` — the agent publishes
+    /// a `device-id=<id>` label on its DeviceInfo by default so this
+    /// works without any cluster-side label pre-wiring.
+    #[arg(long, default_value = "fleet-smoke-vm")]
+    target_device: String,
+    /// Repeatable `key=value` label selector. Takes precedence over
+    /// `--target-device` when provided. All pairs AND together.
+    #[arg(long = "selector", value_name = "KEY=VALUE")]
+    selectors: Vec<String>,
+    /// Container image to run.
+    #[arg(long, default_value = "docker.io/library/nginx:latest")]
+    image: String,
+    /// `host:container` port mapping exposed on the device.
+    #[arg(long, default_value = "8080:80")]
+    port: String,
+    /// Repeatable `KEY=VALUE` env var injected into the container.
+    #[arg(long = "env", value_name = "KEY=VALUE")]
+    envs: Vec<String>,
+    /// Repeatable bind-mount in `host_path:container_path[:ro]` form.
+    /// Append `:ro` for read-only.
+    #[arg(long = "volume", value_name = "HOST:CONTAINER[:ro]")]
+    volumes: Vec<String>,
+    /// Container restart policy.
+    #[arg(long, value_enum, default_value_t = CliRestart::UnlessStopped)]
+    restart: CliRestart,
+    /// Delete the Deployment CR instead of applying it.
+    #[arg(long)]
+    delete: bool,
+    /// Print the CR as JSON to stdout instead of applying it.
+    /// Useful for piping into `kubectl apply -f -`.
+    #[arg(long)]
+    print: bool,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let cli = Cli::parse();
+    let cr = build_cr(&cli);
+
+    if cli.print {
+        println!("{}", serde_json::to_string_pretty(&cr)?);
+        return Ok(());
+    }
+
+    let client = Client::try_default()
+        .await
+        .context("building kube client (is KUBECONFIG set?)")?;
+    let api: Api<Deployment> = Api::namespaced(client, &cli.namespace);
+
+    if cli.delete {
+        match api.delete(&cli.name, &DeleteParams::default()).await {
+            Ok(_) => println!("deleted deployment '{}/{}'", cli.namespace, cli.name),
+            Err(kube::Error::Api(ae)) if ae.code == 404 => {
+                println!(
+                    "deployment '{}/{}' not found (already gone)",
+                    cli.namespace, cli.name
+                )
+            }
+            Err(e) => anyhow::bail!("delete failed: {e}"),
+        }
+        return Ok(());
+    }
+
+    // Server-side apply so repeated invocations (upgrades) patch
+    // the existing CR instead of erroring with "already exists."
+    let params = PatchParams::apply("harmony-apply-deployment").force();
+    let applied = api
+        .patch(&cli.name, &params, &Patch::Apply(&cr))
+        .await
+        .context("applying Deployment CR")?;
+    let meta = applied.metadata;
+    println!(
+        "applied deployment '{}/{}' (resourceVersion={}, image={})",
+        cli.namespace,
+        meta.name.as_deref().unwrap_or("?"),
+        meta.resource_version.as_deref().unwrap_or("?"),
+        cli.image,
+    );
+    Ok(())
+}
+
+/// Mirrors `harmony::topology::RestartPolicy` so we can keep the CLI
+/// schema stable even if the underlying enum gains variants.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
+enum CliRestart {
+    No,
+    UnlessStopped,
+    OnFailure,
+    Always,
+}
+
+impl From<CliRestart> for RestartPolicy {
+    fn from(c: CliRestart) -> Self {
+        match c {
+            CliRestart::No => RestartPolicy::No,
+            CliRestart::UnlessStopped => RestartPolicy::UnlessStopped,
+            CliRestart::OnFailure => RestartPolicy::OnFailure,
+            CliRestart::Always => RestartPolicy::Always,
+        }
+    }
+}
+
+fn parse_env(s: &str) -> Result<(String, String)> {
+    let (k, v) = s
+        .split_once('=')
+        .ok_or_else(|| anyhow::anyhow!("--env expects KEY=VALUE, got {s:?}"))?;
+    Ok((k.to_string(), v.to_string()))
+}
+
+fn parse_volume(s: &str) -> Result<VolumeMount> {
+    let parts: Vec<&str> = s.split(':').collect();
+    let (host, cont, ro) = match parts.as_slice() {
+        [host, cont] => (host, cont, false),
+        [host, cont, mode] if *mode == "ro" => (host, cont, true),
+        [host, cont, mode] if *mode == "rw" => (host, cont, false),
+        _ => anyhow::bail!("--volume expects HOST:CONTAINER[:ro|rw], got {s:?}"),
+    };
+    Ok(VolumeMount {
+        host_path: host.to_string(),
+        container_path: cont.to_string(),
+        read_only: ro,
+    })
+}
+
+fn build_cr(cli: &Cli) -> Deployment {
+    let env: Vec<EnvVar> = cli
+        .envs
+        .iter()
+        .map(|s| EnvVar::from(parse_env(s).expect("--env validated")))
+        .collect();
+    let volumes: Vec<VolumeMount> = cli
+        .volumes
+        .iter()
+        .map(|s| parse_volume(s).expect("--volume validated"))
+        .collect();
+
+    let score = PodmanV0Score {
+        services: vec![PodmanService {
+            name: cli.name.clone(),
+            image: cli.image.clone(),
+            ports: vec![cli.port.clone()],
+            env,
+            volumes,
+            restart_policy: cli.restart.into(),
+        }],
+    };
+
+    let payload = ReconcileScore::PodmanV0(score);
+
+    let mut match_labels = BTreeMap::new();
+    if cli.selectors.is_empty() {
+        match_labels.insert("device-id".to_string(), cli.target_device.clone());
+    } else {
+        for kv in &cli.selectors {
+            let (k, v) = kv
+                .split_once('=')
+                .unwrap_or_else(|| panic!("--selector expects KEY=VALUE, got '{kv}'"));
+            match_labels.insert(k.to_string(), v.to_string());
+        }
+    }
+
+    Deployment::new(
+        &cli.name,
+        DeploymentSpec {
+            target_selector: LabelSelector {
+                match_labels: Some(match_labels),
+                match_expressions: None,
+            },
+            score: payload,
+            rollout: Rollout {
+                strategy: RolloutStrategy::Immediate,
+            },
+        },
+    )
+}
--- a/examples/harmony_sso/src/main.rs
+++ b/examples/harmony_sso/src/main.rs
@@ -118,6 +118,8 @@ async fn deploy_zitadel(k3d: &K3d) -> anyhow::Result<()> {
        host: ZITADEL_HOST.to_string(),
        zitadel_version: "v4.12.1".to_string(),
        external_secure: false,
+        external_port: None,
+        ..Default::default()
    };

    let topology = create_topology(k3d);
@@ -294,13 +296,19 @@ async fn main() -> anyhow::Result<()> {
    // Provision Zitadel project + device-code application
    ZitadelSetupScore {
        host: ZITADEL_HOST.to_string(),
-        port: HTTP_PORT as u16,
+        scheme: Default::default(),
+        port: None,
        skip_tls: true,
+        endpoint: Some(format!("http://127.0.0.1:{HTTP_PORT}")),
+        admin_org_id: None,
+        namespace: "zitadel".to_string(),
        applications: vec![ZitadelApplication {
            project_name: PROJECT_NAME.to_string(),
            app_name: APP_NAME.to_string(),
            app_type: ZitadelAppType::DeviceCode,
        }],
+        api_apps: vec![],
+        roles: vec![],
        machine_users: vec![],
    }
    .interpret(&Inventory::autoload(), &topology)
--- a/examples/k8s_drain_node/src/main.rs
+++ b/examples/k8s_drain_node/src/main.rs
@@ -27,7 +27,7 @@ async fn main() {
    if drain {
        let mut options = DrainOptions::default_ignore_daemonset_delete_emptydir_data();
        options.timeout = Duration::from_secs(1);
-        k8s.drain_node(&node_name, &options).await.unwrap();
+        k8s.drain_node(node_name, &options).await.unwrap();

        info!("Node {node_name} successfully drained");
    }
@@ -49,7 +49,7 @@ async fn main() {

    if reboot {
        k8s.reboot_node(
-            &node_name,
+            node_name,
            &DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
            Duration::from_secs(3600),
        )
--- a/examples/k8s_write_file_on_node/src/main.rs
+++ b/examples/k8s_write_file_on_node/src/main.rs
@@ -22,19 +22,19 @@ async fn main() {
    let content = inquire::Text::new("File content").prompt().unwrap();

    let node_file = NodeFile {
-        path: path,
-        content: content,
+        path,
+        content,
        mode: 0o600,
    };

-    k8s.write_files_to_node(&node, &vec![node_file.clone()])
+    k8s.write_files_to_node(node, std::slice::from_ref(&node_file))
        .await
        .unwrap();

    let cmd = inquire::Text::new("Command to run on node")
        .prompt()
        .unwrap();
-    k8s.run_privileged_command_on_node(&node, &cmd)
+    k8s.run_privileged_command_on_node(node, &cmd)
        .await
        .unwrap();

--- a/examples/kube-rs/src/main.rs
+++ b/examples/kube-rs/src/main.rs
@@ -66,12 +66,12 @@ async fn main() {
        Ok(_d) => println!("Deployment success"),
        Err(e) => {
            println!("Error creating deployment {}", e);
-            if let kube::Error::Api(error_response) = &e {
-                if error_response.code == http::StatusCode::CONFLICT.as_u16() {
+            if let kube::Error::Api(error_response) = &e
+                && error_response.code == http::StatusCode::CONFLICT.as_u16()
+            {
                println!("Already exists");
                return;
            }
-            }
            panic!("{}", e)
        }
    };
--- a/examples/kvm_okd_ha_cluster/Cargo.toml
+++ b/examples/kvm_okd_ha_cluster/Cargo.toml
@@ -9,7 +9,7 @@ name = "kvm_okd_ha_cluster"
 path = "src/main.rs"

 [dependencies]
-harmony = { path = "../../harmony" }
+harmony = { path = "../../harmony", features = ["kvm"] }
 tokio.workspace = true
 log.workspace = true
 env_logger.workspace = true
--- a/examples/kvm_vm_examples/Cargo.toml
+++ b/examples/kvm_vm_examples/Cargo.toml
@@ -9,7 +9,7 @@ name = "kvm-vm-examples"
 path = "src/main.rs"

 [dependencies]
-harmony = { path = "../../harmony" }
+harmony = { path = "../../harmony", features = ["kvm"] }
 tokio.workspace = true
 log.workspace = true
 env_logger.workspace = true
--- a/examples/kvm_vm_examples/src/main.rs
+++ b/examples/kvm_vm_examples/src/main.rs
@@ -42,7 +42,7 @@
 use clap::{Parser, Subcommand};
 use harmony::modules::kvm::config::init_executor;
 use harmony::modules::kvm::{
-    BootDevice, ForwardMode, KvmExecutor, NetworkConfig, NetworkRef, VmConfig, VmStatus,
+    BootDevice, ForwardMode, KvmExecutor, NetworkConfig, NetworkRef, VmConfig,
 };
 use log::info;

@@ -345,7 +345,7 @@ async fn status(executor: &KvmExecutor, scenario: &str) -> Result<(), Box<dyn st
        }
    };

-    println!("{:<20} {}", "VM", "STATUS");
+    println!("{:<20} STATUS", "VM");
    println!("{}", "-".repeat(35));
    for vm in &vms {
        let status = match executor.vm_status(vm).await {
--- a/examples/nats-module/src/main.rs
+++ b/examples/nats-module/src/main.rs
@@ -22,9 +22,9 @@ async fn main() {
        name: site_1_name.clone(),
        gateway_advertise: format!("{site_1_name}-gw.{site_1_domain}:443"),
        dns_name: format!("{site_1_name}-gw.{site_1_domain}"),
-        supercluster_ca_secret_name: supercluster_ca_secret_name,
-        tls_cert_name: tls_cert_name,
-        jetstream_enabled: jetstream_enabled,
+        supercluster_ca_secret_name,
+        tls_cert_name,
+        jetstream_enabled,
    };

    let site_2_name = "site-2".to_string();
@@ -38,9 +38,9 @@ async fn main() {
        name: site_2_name.clone(),
        gateway_advertise: format!("{site_2_name}-gw.{site_2_domain}:443"),
        dns_name: format!("{site_2_name}-gw.{site_2_domain}"),
-        supercluster_ca_secret_name: supercluster_ca_secret_name,
-        tls_cert_name: tls_cert_name,
-        jetstream_enabled: jetstream_enabled,
+        supercluster_ca_secret_name,
+        tls_cert_name,
+        jetstream_enabled,
    };

    let site_3_name = "site-3".to_string();
@@ -54,9 +54,9 @@ async fn main() {
        name: site_3_name.clone(),
        gateway_advertise: format!("{site_3_name}-gw.{site_3_domain}:443"),
        dns_name: format!("{site_3_name}-gw.{site_3_domain}"),
-        supercluster_ca_secret_name: supercluster_ca_secret_name,
-        tls_cert_name: tls_cert_name,
-        jetstream_enabled: jetstream_enabled,
+        supercluster_ca_secret_name,
+        tls_cert_name,
+        jetstream_enabled,
    };

    let clusters = vec![nats_site_1, nats_site_2, nats_site_3];
--- a/examples/nats-supercluster/src/main.rs
+++ b/examples/nats-supercluster/src/main.rs
@@ -253,10 +253,7 @@ async fn create_nats_certs<T: Topology + CertificateManagement>(

    debug!("creating issuer '{}'", self_signed_issuer_name);
    topology
-        .create_issuer(
-            self_signed_issuer_name.to_string(),
-            &self_signed_cert_config,
-        )
+        .create_issuer(self_signed_issuer_name.to_string(), self_signed_cert_config)
        .await?;

    debug!("creating certificate {root_ca_cert_name}");
@@ -294,7 +291,7 @@ async fn create_nats_certs<T: Topology + CertificateManagement>(
 async fn build_ca_bundle_secret(
    namespace: &str,
    nats_cluster: &NatsCluster,
-    bundle: &Vec<String>,
+    bundle: &[String],
 ) -> Secret {
    Secret {
        metadata: ObjectMeta {
@@ -309,7 +306,7 @@ async fn build_ca_bundle_secret(
    }
 }

-async fn build_secret_data(bundle: &Vec<String>) -> BTreeMap<String, ByteString> {
+async fn build_secret_data(bundle: &[String]) -> BTreeMap<String, ByteString> {
    let mut data = BTreeMap::new();

    data.insert(
@@ -323,7 +320,7 @@ async fn build_secret_data(bundle: &Vec<String>) -> BTreeMap<String, ByteString>
 async fn build_ca_bundle_secret_score<T: Topology + K8sclient + 'static>(
    _topology: T,
    nats_cluster: &NatsCluster,
-    ca_bundle: &Vec<String>,
+    ca_bundle: &[String],
    namespace: String,
 ) -> Box<dyn Score<T>> {
    let bundle_secret = build_ca_bundle_secret(&namespace, nats_cluster, ca_bundle).await;
@@ -343,6 +340,7 @@ async fn build_route_score<T: Topology + K8sclient + 'static>(
    let route = OKDRouteScore {
        name: cluster.name.to_string(),
        namespace,
+        annotations: Default::default(),
        spec: RouteSpec {
            to: RouteTargetReference {
                kind: "Service".to_string(),
@@ -383,6 +381,7 @@ async fn build_deploy_nats_score<T: Topology + HelmCommand + TlsRouter + 'static
    let domain = topology.get_internal_domain().await.unwrap().unwrap();

    // Inject gateway config into the 'merge' block to comply with chart structure
+    let tls_secret_name = format!("{}-tls", cluster.tls_cert_name);
    let values_yaml = Some(format!(
        r#"config:
  merge:
@@ -455,7 +454,7 @@ natsBox:
        domain = domain,
        gateway_gateways = gateway_gateways,
        gateway_advertise = cluster.gateway_advertise,
-        tls_secret_name = format!("{}-tls", cluster.tls_cert_name),
+        tls_secret_name = tls_secret_name,
        jetstream_enabled = cluster.jetstream_enabled,
        supercluster_ca_secret_name = cluster.supercluster_ca_secret_name,
    ));
@@ -463,7 +462,7 @@ natsBox:
    debug!("Prepared Helm Chart values : \n{values_yaml:#?}");
    let nats = HelmChartScore {
        namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
-        release_name: NonBlankString::from_str(&cluster.name).unwrap(),
+        release_name: NonBlankString::from_str(cluster.name).unwrap(),
        chart_name: NonBlankString::from_str("nats/nats").unwrap(),
        chart_version: None,
        values_overrides: None,
--- a/examples/okd_cluster_alerts/src/main.rs
+++ b/examples/okd_cluster_alerts/src/main.rs
@@ -28,7 +28,7 @@ async fn main() {
            receivers: vec![Box::new(DiscordWebhook {
                name: K8sName("wills-discord-webhook-example".to_string()),
                url: hurl!("https://something.io"),
-                selectors: selectors,
+                selectors,
            })],
        })],
        None,
--- a/examples/opnsense_pair_integration/Cargo.toml
+++ b/examples/opnsense_pair_integration/Cargo.toml
@@ -9,7 +9,7 @@ name = "opnsense-pair-integration"
 path = "src/main.rs"

 [dependencies]
-harmony = { path = "../../harmony" }
+harmony = { path = "../../harmony", features = ["kvm"] }
 harmony_cli = { path = "../../harmony_cli" }
 harmony_inventory_agent = { path = "../../harmony_inventory_agent" }
 harmony_macros = { path = "../../harmony_macros" }
--- a/examples/opnsense_pair_integration/src/main.rs
+++ b/examples/opnsense_pair_integration/src/main.rs
@@ -312,11 +312,11 @@ async fn run_pair_test() -> Result<(), Box<dyn std::error::Error>> {

    // Build FirewallPairTopology
    let primary_host = LogicalHost {
-        ip: primary_ip.into(),
+        ip: primary_ip,
        name: VM_PRIMARY.to_string(),
    };
    let backup_host = LogicalHost {
-        ip: backup_ip.into(),
+        ip: backup_ip,
        name: VM_BACKUP.to_string(),
    };
    let primary_api_creds = OPNSenseApiCredentials {
--- a/examples/opnsense_vm_integration/Cargo.toml
+++ b/examples/opnsense_vm_integration/Cargo.toml
@@ -9,7 +9,7 @@ name = "opnsense-vm-integration"
 path = "src/main.rs"

 [dependencies]
-harmony = { path = "../../harmony" }
+harmony = { path = "../../harmony", features = ["kvm"] }
 harmony_cli = { path = "../../harmony_cli" }
 harmony_inventory_agent = { path = "../../harmony_inventory_agent" }
 harmony_macros = { path = "../../harmony_macros" }
--- a/examples/opnsense_vm_integration/src/main.rs
+++ b/examples/opnsense_vm_integration/src/main.rs
@@ -253,7 +253,7 @@ async fn run_integration() -> Result<(), Box<dyn std::error::Error>> {

    // Build topology
    let firewall_host = LogicalHost {
-        ip: vm_ip.into(),
+        ip: vm_ip,
        name: VM_NAME.to_string(),
    };
    let api_creds = OPNSenseApiCredentials {
@@ -343,7 +343,7 @@ async fn run_integration() -> Result<(), Box<dyn std::error::Error>> {
    info!("=== IDEMPOTENCY TEST: Running all Scores a SECOND time ===");
    let scores_round2 = build_all_scores()?;
    let firewall_host2 = LogicalHost {
-        ip: vm_ip.into(),
+        ip: vm_ip,
        name: VM_NAME.to_string(),
    };
    let opnsense2 =
@@ -562,8 +562,11 @@ async fn verify_state(
    })
 }

+type FirewallScore = Box<dyn Score<OPNSenseFirewall>>;
+type BuildScoresResult = Result<Vec<FirewallScore>, Box<dyn std::error::Error>>;
+
 /// Build all test Scores — extracted so we can call it for both run 1 and run 2.
-fn build_all_scores() -> Result<Vec<Box<dyn Score<OPNSenseFirewall>>>, Box<dyn std::error::Error>> {
+fn build_all_scores() -> BuildScoresResult {
    let lb_score = LoadBalancerScore {
        public_services: vec![
            LoadBalancerService {
--- a/examples/penpot/src/main.rs
+++ b/examples/penpot/src/main.rs
@@ -1,12 +1,3 @@
-use std::{collections::HashMap, str::FromStr};
-
-use harmony::{
-    inventory::Inventory,
-    modules::helm::chart::{HelmChartScore, HelmRepository, NonBlankString},
-    topology::K8sAnywhereTopology,
-};
-use harmony_macros::hurl;
-
 #[tokio::main]
 async fn main() {
    // let mut chart_values = HashMap::new();
--- a/examples/try_rust_webapp/Cargo.toml
+++ b/examples/try_rust_webapp/Cargo.toml
@@ -4,6 +4,7 @@ edition = "2024"
 version.workspace = true
 readme.workspace = true
 license.workspace = true
+autobins = false

 [[example]]
 name = "try_rust_webapp"
--- a/examples/zitadel/src/main.rs
+++ b/examples/zitadel/src/main.rs
@@ -8,6 +8,8 @@ async fn main() {
        host: "sso.sto1.nationtech.io".to_string(),
        zitadel_version: "v4.12.1".to_string(),
        external_secure: true,
+        external_port: None,
+        ..Default::default()
    };

    harmony_cli::run(
--- a/fleet/ARCHITECTURE.html
+++ b/fleet/ARCHITECTURE.html
@@ -0,0 +1,599 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>Harmony Fleet — Architecture</title>
+<script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
+<script>
+  mermaid.initialize({
+    startOnLoad: true,
+    theme: "base",
+    themeVariables: {
+      fontFamily: "ui-sans-serif, -apple-system, Segoe UI, Inter, sans-serif",
+      primaryColor: "#eef3fb",
+      primaryBorderColor: "#7a93b7",
+      primaryTextColor: "#1f2937",
+      lineColor: "#5b6b80",
+      tertiaryColor: "#fafbfd",
+      clusterBkg: "#f6f8fc",
+      clusterBorder: "#c6d2e2",
+      noteBkgColor: "#fff8e1",
+      noteTextColor: "#3a2f00",
+      actorBkg: "#eef3fb",
+      actorBorder: "#7a93b7",
+      sequenceNumberColor: "#1f2937"
+    }
+  });
+</script>
+<style>
+  :root {
+    --ink: #1f2937;
+    --ink-soft: #4b5563;
+    --paper: #ffffff;
+    --paper-tint: #f6f8fc;
+    --rule: #e3e8ef;
+    --accent: #2c5282;
+    --accent-soft: #ebf2fb;
+    --warn: #b7791f;
+    --warn-soft: #fff8e1;
+    --mono: ui-monospace, SFMono-Regular, "JetBrains Mono", Menlo, Consolas, monospace;
+    --sans: ui-sans-serif, -apple-system, "Segoe UI", Inter, system-ui, sans-serif;
+  }
+  * { box-sizing: border-box; }
+  html, body {
+    margin: 0;
+    background: var(--paper);
+    color: var(--ink);
+    font-family: var(--sans);
+    line-height: 1.6;
+    font-size: 16px;
+  }
+  main {
+    max-width: 880px;
+    margin: 0 auto;
+    padding: 4rem 1.5rem 6rem;
+  }
+  header.hero {
+    margin-bottom: 3rem;
+    border-bottom: 1px solid var(--rule);
+    padding-bottom: 2rem;
+  }
+  header.hero h1 {
+    font-size: 2.4rem;
+    line-height: 1.15;
+    letter-spacing: -0.02em;
+    margin: 0 0 1rem;
+    color: var(--ink);
+  }
+  header.hero p.subtitle {
+    margin: 0;
+    color: var(--ink-soft);
+    font-size: 1.1rem;
+  }
+  header.hero p.subtitle b { color: var(--ink); font-weight: 600; }
+  h2 {
+    margin-top: 3.5rem;
+    margin-bottom: 1rem;
+    font-size: 1.55rem;
+    letter-spacing: -0.01em;
+    color: var(--ink);
+    display: flex;
+    align-items: baseline;
+    gap: 0.75rem;
+  }
+  h2 .layer {
+    font-size: 0.7rem;
+    text-transform: uppercase;
+    letter-spacing: 0.08em;
+    color: var(--accent);
+    background: var(--accent-soft);
+    padding: 0.15rem 0.55rem;
+    border-radius: 999px;
+    font-weight: 600;
+    line-height: 1.6;
+    flex-shrink: 0;
+  }
+  h3 {
+    margin-top: 2rem;
+    font-size: 1.1rem;
+    color: var(--ink);
+  }
+  p, li { color: var(--ink); }
+  a { color: var(--accent); text-decoration: none; border-bottom: 1px solid transparent; }
+  a:hover { border-bottom-color: var(--accent); }
+  code {
+    font-family: var(--mono);
+    font-size: 0.92em;
+    background: var(--paper-tint);
+    padding: 0.08em 0.35em;
+    border-radius: 4px;
+    border: 1px solid var(--rule);
+  }
+  pre {
+    background: var(--paper-tint);
+    border: 1px solid var(--rule);
+    border-radius: 8px;
+    padding: 1rem 1.2rem;
+    overflow-x: auto;
+    font-family: var(--mono);
+    font-size: 0.88rem;
+    line-height: 1.5;
+  }
+  pre code {
+    background: none;
+    border: none;
+    padding: 0;
+  }
+  blockquote {
+    margin: 1.5rem 0;
+    padding: 0.6rem 1.2rem;
+    border-left: 3px solid var(--accent);
+    background: var(--accent-soft);
+    color: var(--ink);
+    border-radius: 0 6px 6px 0;
+  }
+  blockquote p { margin: 0.3rem 0; }
+  .callout {
+    margin: 1.5rem 0;
+    padding: 0.8rem 1.2rem;
+    border-left: 3px solid var(--warn);
+    background: var(--warn-soft);
+    border-radius: 0 6px 6px 0;
+    color: #4a3c10;
+    font-size: 0.95rem;
+  }
+  .callout b { color: #3a2f00; }
+  table {
+    border-collapse: collapse;
+    width: 100%;
+    margin: 1.5rem 0;
+    font-size: 0.95rem;
+  }
+  th, td {
+    text-align: left;
+    padding: 0.6rem 0.8rem;
+    border-bottom: 1px solid var(--rule);
+    vertical-align: top;
+  }
+  th {
+    background: var(--paper-tint);
+    font-weight: 600;
+    color: var(--ink);
+    border-bottom: 2px solid var(--rule);
+  }
+  tr:hover td { background: var(--paper-tint); }
+  details {
+    margin: 1.2rem 0;
+    border: 1px solid var(--rule);
+    border-radius: 8px;
+    background: var(--paper-tint);
+    padding: 0;
+    overflow: hidden;
+  }
+  details summary {
+    cursor: pointer;
+    padding: 0.75rem 1.1rem;
+    font-weight: 600;
+    color: var(--ink);
+    list-style: none;
+    user-select: none;
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    transition: background 80ms ease;
+  }
+  details summary::-webkit-details-marker { display: none; }
+  details summary::before {
+    content: "▸";
+    color: var(--accent);
+    transition: transform 120ms ease;
+    display: inline-block;
+    font-size: 0.85em;
+  }
+  details[open] summary::before { transform: rotate(90deg); }
+  details summary:hover { background: rgba(0,0,0,0.02); }
+  details > *:not(summary) {
+    padding: 0 1.1rem;
+  }
+  details > *:not(summary):last-child {
+    padding-bottom: 1rem;
+  }
+  details[open] summary {
+    border-bottom: 1px solid var(--rule);
+  }
+  .mermaid {
+    background: var(--paper);
+    border: 1px solid var(--rule);
+    border-radius: 8px;
+    padding: 1.2rem;
+    margin: 1.5rem 0;
+    text-align: center;
+    overflow-x: auto;
+  }
+  hr {
+    border: none;
+    border-top: 1px solid var(--rule);
+    margin: 3rem 0;
+  }
+  ul, ol { padding-left: 1.4rem; }
+  ul li, ol li { margin: 0.25rem 0; }
+  .stop-here {
+    margin: 2rem 0;
+    text-align: center;
+    color: var(--ink-soft);
+    font-style: italic;
+    font-size: 0.95rem;
+  }
+  .stop-here::before, .stop-here::after {
+    content: " — ";
+    color: var(--rule);
+  }
+  footer {
+    margin-top: 5rem;
+    padding-top: 2rem;
+    border-top: 1px solid var(--rule);
+    color: var(--ink-soft);
+    font-size: 0.9rem;
+  }
+</style>
+</head>
+<body>
+<main>
+
+<header class="hero">
+  <h1>Harmony Fleet — Architecture</h1>
+  <p class="subtitle">
+    An operator declares <b>what</b> to run, in Kubernetes.
+    Agents on devices make it real, in their own containers.
+    NATS is the bus between them. Zitadel signs the agent's passport.
+  </p>
+</header>
+
+<p>This document walks the system in layers. Read until you stop having questions —
+each layer adds one idea on top of the previous one.</p>
+
+<hr>
+
+<h2><span class="layer">Layer 0</span> One picture</h2>
+
+<div class="mermaid">
+flowchart LR
+  subgraph K8S [Kubernetes cluster]
+    OP[Harmony Fleet Operator]
+  end
+  subgraph BUS [NATS JetStream]
+    KV[(KV buckets)]
+  end
+  subgraph DEV [Fleet device]
+    AG[Fleet Agent + Podman]
+  end
+  OP -- writes desired state --> KV
+  KV -- watches --> AG
+  AG -- reports state --> KV
+  KV -- watches --> OP
+</div>
+
+<p>That's it. The rest of the document explains the boxes.</p>
+
+<hr>
+
+<h2><span class="layer">Layer 1</span> The three planes</h2>
+
+<p>The fleet system has three planes that are deliberately decoupled:</p>
+
+<table>
+  <thead><tr><th>Plane</th><th>What lives here</th><th>Why</th></tr></thead>
+  <tbody>
+    <tr>
+      <td><b>Control</b></td>
+      <td>Kubernetes (k3d, OKD, vanilla — anything) + the <b>Fleet Operator</b></td>
+      <td>Operators already know how to talk to k8s. <code>kubectl apply</code> is the API.</td>
+    </tr>
+    <tr>
+      <td><b>Bus</b></td>
+      <td>A NATS server with JetStream + an auth callout that talks to Zitadel</td>
+      <td>Edge devices come and go; the bus tolerates that. KV gives us last-writer-wins state without bespoke sync.</td>
+    </tr>
+    <tr>
+      <td><b>Edge</b></td>
+      <td>Each device runs the <b>Fleet Agent</b> binary, which drives <b>Podman</b></td>
+      <td>Devices don't speak k8s — they speak NATS and run containers locally.</td>
+    </tr>
+  </tbody>
+</table>
+
+<div class="mermaid">
+flowchart LR
+  subgraph control [Control plane — Kubernetes]
+    direction TB
+    API[API Server + etcd]
+    OP[Fleet Operator]
+    DASH[/Dashboard — optional, feature-gated/]
+    API <--> OP
+    OP --- DASH
+  end
+  subgraph bus [Bus — NATS]
+    direction TB
+    NATS[NATS + JetStream KV]
+    CALLOUT[Auth Callout]
+    ZIT[Zitadel OIDC]
+    NATS -. token check .-> CALLOUT
+    CALLOUT -. validate JWT .-> ZIT
+  end
+  subgraph edge [Edge — fleet device]
+    direction TB
+    AGENT[Fleet Agent]
+    PODMAN[Podman]
+    AGENT --> PODMAN
+  end
+  OP <-->|KV| NATS
+  AGENT <-->|KV + commands| NATS
+</div>
+
+<div class="stop-here">Stop here if you only needed to know the shape</div>
+
+<hr>
+
+<h2><span class="layer">Layer 2</span> A deployment, end-to-end</h2>
+
+<p>Walk through what happens when an operator runs <code>kubectl apply -f my-deployment.yaml</code>:</p>
+
+<div class="mermaid">
+sequenceDiagram
+  autonumber
+  actor User as Actor (SRE)
+  participant K8s as API Server
+  participant Op as Fleet Operator
+  participant Bus as NATS KV
+  participant Ag as Agent (on device)
+  participant Pm as Podman
+
+  User->>K8s: kubectl apply Deployment CR
+  K8s-->>Op: watch event (Deployment added)
+  Op->>Op: evaluate spec.targetSelector against Device CR labels
+  Op->>Bus: PUT desired-state.&lt;dev&gt;.&lt;dep&gt; = ReconcileScore JSON
+  Bus-->>Ag: KV watch event
+  Ag->>Ag: deserialize Score, build Interpret
+  Ag->>Pm: pull image, create/update container
+  Pm-->>Ag: container Running
+  Ag->>Bus: PUT device-state.state.&lt;dev&gt;.&lt;dep&gt; = Running
+  Bus-->>Op: KV watch event
+  Op->>K8s: PATCH Deployment.status.aggregate
+</div>
+
+<p>Things to notice:</p>
+<ul>
+  <li><b>The agent never talks to the API server.</b> Only the operator does. Everything edge-bound flows through NATS.</li>
+  <li><b>The flow is one-way for desired state, one-way for reported state.</b> The two paths cross at NATS, never at k8s.</li>
+  <li><b>The aggregator coalesces</b> — status patches fire at 1 Hz, not on every event, so high-frequency churn doesn't beat up the API server.</li>
+</ul>
+
+<details>
+  <summary>The CRDs in detail</summary>
+  <p>Group: <code>fleet.nationtech.io</code> · Version: <code>v1alpha1</code></p>
+  <ul>
+    <li>
+      <b><code>Deployment</code></b> (kind), plural <code>deployments</code>, short <code>fleetdep</code>, <b>namespaced</b><br>
+      Spec: <code>targetSelector: LabelSelector</code>, <code>score: ReconcileScore</code>, <code>rollout: Rollout</code><br>
+      Status: <code>aggregate: { matchedDeviceCount, succeeded, failed, pending, lastError }</code>
+    </li>
+    <li>
+      <b><code>Device</code></b> (kind), plural <code>devices</code>, short <code>fleetdev</code>, <b>cluster-scoped</b><br>
+      Spec: <code>inventory: InventorySnapshot</code><br>
+      Cluster-scoped because devices are infrastructure — the same way <code>Node</code> is cluster-scoped.
+    </li>
+  </ul>
+  <p>Devices in k8s are <b>created by the operator</b> from agent-published <code>device-info</code> KV entries. Agents never touch the API server.</p>
+  <p>Source: <code>harmony/src/modules/fleet/operator/crd.rs</code></p>
+</details>
+
+<hr>
+
+<h2><span class="layer">Layer 3</span> The four KV buckets</h2>
+
+<p>The bus is more granular than "a NATS KV". The fleet contract pins <b>four</b> named buckets, each with its own write/read direction.</p>
+
+<table>
+  <thead><tr><th>Bucket</th><th>Writer</th><th>Reader(s)</th><th>Key format</th><th>Purpose</th></tr></thead>
+  <tbody>
+    <tr>
+      <td><code>desired-state</code></td>
+      <td>Operator</td>
+      <td>Agent (watch)</td>
+      <td><code>&lt;device&gt;.&lt;deployment&gt;</code></td>
+      <td>The score the agent should reconcile to</td>
+    </tr>
+    <tr>
+      <td><code>device-state</code></td>
+      <td>Agent</td>
+      <td>Operator (watch + aggregator)</td>
+      <td><code>state.&lt;device&gt;.&lt;deployment&gt;</code></td>
+      <td>Current reconcile phase per (device, deployment)</td>
+    </tr>
+    <tr>
+      <td><code>device-info</code></td>
+      <td>Agent</td>
+      <td>Operator (reflects to <code>Device</code> CR)</td>
+      <td><code>info.&lt;device&gt;</code></td>
+      <td>Routing labels, inventory snapshot, agent version</td>
+    </tr>
+    <tr>
+      <td><code>device-heartbeat</code></td>
+      <td>Agent</td>
+      <td>Operator (liveness)</td>
+      <td><code>heartbeat.&lt;device&gt;</code></td>
+      <td>Tiny liveness ping every N seconds, kept off the state bucket to avoid churn</td>
+    </tr>
+  </tbody>
+</table>
+
+<div class="mermaid">
+flowchart LR
+  OP[Operator]
+  AG[Agent]
+  DS[(desired-state)]
+  ST[(device-state)]
+  IN[(device-info)]
+  HB[(device-heartbeat)]
+
+  OP -- writes --> DS
+  DS -- watches --> AG
+  AG -- writes --> ST
+  AG -- writes --> IN
+  AG -- writes --> HB
+  ST -- watches --> OP
+  IN -- reflects to Device CR --> OP
+  HB -. queries .- OP
+</div>
+
+<p>These four bucket names are <b>the contract</b> between agent and operator. They live in one place to keep cross-component drift from happening:</p>
+
+<pre><code>// harmony-reconciler-contracts/src/kv.rs
+pub const BUCKET_DESIRED_STATE: &amp;str    = "desired-state";
+pub const BUCKET_DEVICE_INFO: &amp;str      = "device-info";
+pub const BUCKET_DEVICE_STATE: &amp;str     = "device-state";
+pub const BUCKET_DEVICE_HEARTBEAT: &amp;str = "device-heartbeat";</code></pre>
+
+<p>There's also a <b>commands</b> path for request/response RPCs (ping today; logs/exec planned) on core-NATS subjects <code>device-commands.&lt;device-id&gt;.&lt;verb&gt;</code>, separate from JetStream KV.</p>
+
+<hr>
+
+<h2><span class="layer">Layer 4</span> Identity &amp; auth</h2>
+
+<p>Agents authenticate to NATS with a <b>Zitadel-signed JWT bearer token</b>. NATS doesn't validate the JWT itself; it delegates to a NATS <b>auth callout</b>, which is just another connected client running our <code>harmony-fleet-auth</code> binary.</p>
+
+<div class="mermaid">
+sequenceDiagram
+  autonumber
+  participant Ag as Agent
+  participant Z as Zitadel
+  participant N as NATS server
+  participant C as Auth Callout (harmony-fleet-auth)
+
+  Note over Ag,Z: One-time bootstrap (or before token expiry)
+  Ag->>Z: JWT assertion (RFC 7523, signed with device key)
+  Z-->>Ag: short-lived access token
+
+  Note over Ag,N: Every (re)connect
+  Ag->>N: CONNECT with bearer = access token
+  N->>C: auth callout request
+  C->>Z: introspect / validate signature
+  Z-->>C: token valid, claims = { device_id, ... }
+  C-->>N: ALLOW, permissions scoped to device_id
+  N-->>Ag: connection accepted
+</div>
+
+<p><b>Per-device scoping</b> — the callout derives NATS subject permissions from the JWT's <code>device_id</code> claim, so a compromised device key can only touch its own subjects.</p>
+<p><b>Token rotation</b> — the agent's auth callback is invoked by <code>async-nats</code> on every reconnect; the token cache mints a fresh one within a 5-minute leeway window. This is how the "never lose connectivity across token rollovers" guarantee holds.</p>
+
+<div class="callout">
+  <b>Today vs. target.</b> The CLI in <code>harmony-fleet-deploy/src/main.rs</code> defaults to <b>user/pass NATS</b> (<code>FleetNatsScore::user_pass</code>) for the v1 walking skeleton. The Zitadel/callout path is wired through <code>FleetServerScore</code>'s optional fields and is the production target — the diagram describes the target, not what the dev <code>main.rs</code> lights up by default.
+</div>
+
+<details>
+  <summary>Where this lives in code</summary>
+  <ul>
+    <li>Auth callout binary: <code>fleet/harmony-fleet-auth/src/lib.rs</code></li>
+    <li>Credential source + JWT minting: <code>fleet/harmony-fleet-auth/src/credentials.rs</code></li>
+    <li>Composing it into a server install: <code>FleetServerScore { auth_callout: Some(...) }</code> in <code>fleet/harmony-fleet-deploy/src/server.rs</code></li>
+  </ul>
+</details>
+
+<hr>
+
+<h2><span class="layer">Layer 5</span> Device enrollment (one-time setup)</h2>
+
+<p>A device joins the fleet through <code>FleetDeviceSetupScore</code> (in <code>harmony/src/modules/fleet/setup_score.rs</code>). Three flavours, in order of seriousness:</p>
+
+<ol>
+  <li><b>Dev / lab</b> — <code>FleetDeviceAuth::TomlShared</code>: a shared NATS user/pass baked into config. Zero auth infra. Don't ship this to a real device.</li>
+  <li><b>Production A</b> — <code>FleetDeviceAuth::ZitadelJwt</code>: an admin pre-creates a Zitadel machine user, exports its key JSON, and drops it at <code>/etc/fleet-agent/zitadel-key.json</code> on the device.</li>
+  <li><b>Production B (recommended)</b> — <code>FleetDeviceAuth::ZitadelEnroll</code>: the setup score itself talks to Zitadel's management API to mint a per-device machine key. No pre-provisioning. Works either developer-on-device (Zitadel device-code flow opens a browser) or operator-via-SSH.</li>
+</ol>
+
+<p>What the setup score does, in order:</p>
+<ol>
+  <li>Renders <code>/etc/fleet-agent/config.toml</code> (device id, NATS URL, auth credentials).</li>
+  <li>Drops the agent binary at <code>/usr/local/bin/fleet-agent</code>.</li>
+  <li>Enables <code>fleet-agent.service</code> (systemd).</li>
+  <li>Agent boots, connects to NATS with bearer token from the keyfile.</li>
+  <li>Agent publishes initial DeviceInfo into <code>device-info.&lt;device_id&gt;</code>.</li>
+  <li>Agent starts watching <code>desired-state.&lt;device_id&gt;.&gt;</code>.</li>
+  <li>Agent answers <code>device-commands.&lt;device_id&gt;.ping</code>.</li>
+</ol>
+
+<p>After step 5 the operator reflects the agent-published DeviceInfo into a cluster-scoped <code>Device</code> CR. From that moment, a new <code>Deployment</code> CR whose <code>targetSelector</code> matches the Device's labels will land on the device automatically.</p>
+
+<hr>
+
+<h2><span class="layer">Layer 6</span> What runs where</h2>
+
+<div class="mermaid">
+flowchart TB
+  subgraph cluster [Kubernetes — fleet-system namespace]
+    direction TB
+    OP["Pod: harmony-fleet-operator
+    watches CRDs, writes desired-state KV,
+    aggregates device-state into CR status,
+    optional dashboard on :18080"]
+    NATS["Pod: NATS + JetStream
+    4 KV buckets, command subjects"]
+    CO["Pod: harmony-fleet-auth
+    NATS auth callout — validates JWTs"]
+    ZT["Pods: Zitadel + Postgres
+    OIDC, JWT signing"]
+  end
+  subgraph device [Edge — a Raspberry Pi or any podman host]
+    direction TB
+    AG["systemd: fleet-agent.service
+    watches desired-state.&lt;id&gt;.&gt;
+    writes device-state, device-info, device-heartbeat
+    handles device-commands.&lt;id&gt;.&lt;verb&gt;"]
+    PM[podman socket]
+    AG --> PM
+  end
+  AG <-->|NATS over WSS / TLS| NATS
+  OP <-->|in-cluster NATS| NATS
+  NATS -. callout .- CO
+  CO -. JWT introspect .- ZT
+  OP --- ZT
+</div>
+
+<hr>
+
+<h2>Cheat sheet — where to start reading</h2>
+
+<table>
+  <thead><tr><th>If you want to understand…</th><th>Open this file</th></tr></thead>
+  <tbody>
+    <tr><td>What a Deployment / Device CR looks like</td><td><code>harmony/src/modules/fleet/operator/crd.rs</code></td></tr>
+    <tr><td>The names of the KV buckets and key formats</td><td><code>harmony-reconciler-contracts/src/kv.rs</code></td></tr>
+    <tr><td>Operator: how CR → KV reconciliation works</td><td><code>fleet/harmony-fleet-operator/src/fleet_aggregator.rs</code></td></tr>
+    <tr><td>Agent: how KV → Podman reconciliation works</td><td><code>fleet/harmony-fleet-agent/src/reconciler.rs</code></td></tr>
+    <tr><td>Auth: JWT minting and NATS callout protocol</td><td><code>fleet/harmony-fleet-auth/src/credentials.rs</code></td></tr>
+    <tr><td>Deploying the whole server-side stack</td><td><code>fleet/harmony-fleet-deploy/src/server.rs</code></td></tr>
+    <tr><td>One-time device enrollment</td><td><code>harmony/src/modules/fleet/setup_score.rs</code></td></tr>
+    <tr><td>Why it's shaped this way (philosophy)</td><td><code>docs/adr/016-…</code> and <code>docs/adr/023-deploy-architecture.md</code></td></tr>
+  </tbody>
+</table>
+
+<h2>Glossary, for quick reference</h2>
+
+<ul>
+  <li><b>Score</b> — a Rust struct describing desired state (declarative). <code>ReconcileScore</code> is the variant agents apply.</li>
+  <li><b>Topology</b> — what the environment can do (capabilities exposed as traits). The agent uses <code>PodmanTopology</code>; the deploy CLI uses <code>K8sAnywhereTopology</code>.</li>
+  <li><b>Interpret</b> — the glue that drives a Topology to fulfil a Score. Agents call <code>score.create_interpret().execute(&amp;inv, &amp;PodmanTopology)</code>.</li>
+  <li><b>Auth callout</b> — a NATS feature where the server delegates AuthN to a connected client; here, that client is <code>harmony-fleet-auth</code>.</li>
+  <li><b>K8sAnywhere</b> — single Topology implementation that targets any reachable cluster (k3d, OKD, vanilla) via the kubeconfig. Today the only topology wired into <code>harmony-fleet-deploy</code>; <code>K8sBareTopology</code> is planned.</li>
+</ul>
+
+<footer>
+  Source of truth lives in the repo. This document validates against
+  <code>fleet/</code> and <code>harmony/src/modules/fleet/</code> as of the commit on
+  <code>feat/iot-walking-skeleton</code>. If a layer looks wrong to you, it probably is — open a PR.
+</footer>
+
+</main>
+</body>
+</html>
--- a/fleet/PLAN_requests_over_nats.md
+++ b/fleet/PLAN_requests_over_nats.md
@@ -0,0 +1,427 @@
+# Plan — Request/Reply over NATS, TDD via in-cluster e2e harness
+
+Two intertwined deliverables:
+
+1. **`fleet/harmony-fleet-e2e`** — a new harness crate that brings up the full stack (NATS + auth-callout + fleet-operator + fleet-agent-as-pod) in a fresh k3d namespace and tears it down at process exit. Fast (target ≤15s bring-up when the cluster is already running, ≤5s teardown). Works against k3d locally or any cluster with a kubeconfig (incl. OKD).
+2. **First feature, TDD-style**: `Verb::Ping`. Failing test in the harness, then the wire types + agent handler + operator client to make it green. Subsequent verbs (logs, exec) follow the same pattern in follow-up PRs.
+
+Both land together because the harness is what proves cohesion: every fleet feature from now on gets its e2e test in the same crate, and the scattered bring-up code in `examples/fleet_e2e_demo` and `examples/fleet_auth_callout` becomes a thin layer over this harness.
+
+## Goals & non-goals
+
+| Goal | In v1 |
+|---|---|
+| `cargo test -p harmony-fleet-e2e` brings up the stack, runs ping test, tears down | ✅ |
+| Per-test namespace isolation; multiple test runs can coexist in the same cluster | ✅ |
+| Images built once and sideloaded into k3d (no registry push) | ✅ |
+| Cluster reused across runs; only namespace is recreated | ✅ |
+| Agent runs as a Pod (no VMs, no SSH, no libvirt) | ✅ |
+| Harness prints NATS URL + admin creds so the developer can poke during a hung test | ✅ |
+| First feature: `ping` (operator-side `FleetCommandsClient::ping`, agent-side handler, wire types) | ✅ |
+| Runs against a remote OKD cluster via `KUBECONFIG` | ✅ (image-import step is conditional) |
+
+| Non-goal (v1) | Reason |
+|---|---|
+| `logs` / `exec` implementations | Same wiring; covered in follow-up commits using the same harness |
+| PTY | Doc Pattern B; defer |
+| JetStream audit log | Defer; sidecar consumer added later |
+| Zitadel in the harness | Cold-start cost is 30-60s; harness mode A uses a mock OIDC fixture for the callout to keep bring-up fast. Real Zitadel stays in `fleet_e2e_demo` (manual rehearsal). |
+
+## Crate layout
+
+New workspace member at `fleet/harmony-fleet-e2e/`:
+
+```
+fleet/harmony-fleet-e2e/
+├── Cargo.toml
+├── README.md                    # How to run, debug, point at remote clusters
+├── src/
+│   ├── lib.rs                   # Public surface: Stack, StackHandle, bring_up()
+│   ├── images.rs                # Build + sideload (callout, operator, agent)
+│   ├── namespace.rs             # Unique-namespace generation + RAII cleanup
+│   ├── stack.rs                 # Compose Scores against K8sBareTopology
+│   ├── nats.rs                  # NatsHelmChartScore preset with callout + mock-issuer block
+│   ├── mock_oidc.rs             # Tiny in-cluster OIDC fixture (issues JWTs the callout accepts)
+│   ├── agent_pod.rs             # New Score: agent as a Pod (no VM/SSH)
+│   ├── observability.rs         # NodePort + admin creds, helper to mint admin JWT
+│   └── client.rs                # FleetCommandsClient (operator-side wrapper for tests)
+└── tests/
+    └── ping.rs                  # **First TDD test** — failing until the protocol lands
+```
+
+Crate kind: library + `[[test]]` integration tests. Not a binary; harness is consumed by tests via `harmony_fleet_e2e::Stack::bring_up().await`.
+
+Cargo workspace: add to root `members`. Build deps: `harmony` (k8s+nats helpers), `harmony-fleet-auth`, `harmony-reconciler-contracts`, `k3d`, `async-nats`, `kube`, `k8s-openapi`, `tokio`, `anyhow`, `tracing`, `uuid` (for namespace ID), `tempfile`, `serde_json`.
+
+### Why a separate crate (not an example)
+
+Examples currently are bring-up scripts. The e2e harness is **infrastructure for tests** consumed by multiple callers (the new `tests/ping.rs`, future `tests/logs.rs`, `tests/exec.rs`, and eventually a slimmed-down `examples/fleet_e2e_demo` that just calls into it for the manual rehearsal). A library crate lets us expose `Stack`, `StackHandle`, `FleetCommandsClient` as proper types, with `cargo test` discovery and parallel-friendly per-namespace isolation.
+
+## Agent-side prerequisite: gate podman behind config
+
+The agent currently `panic`s if the podman socket isn't ready (`fleet/harmony-fleet-agent/src/main.rs:200`). For the in-cluster harness we need the agent to run on a node that doesn't expose podman.
+
+Add to `agent-config.toml`:
+
+```toml
+[agent]
+device_id = "vm-device-00"
+# NEW: when false, skip podman init and the reconciler loop.
+# Command server still runs (ping/exec-via-fallback are still useful).
+runtime_enabled = true   # default true; e2e harness sets false
+```
+
+Wire-up in `main.rs`:
+
+- When `runtime_enabled = false`, the agent skips `PodmanTopology::from_default_socket()`, skips the reconciler periodic tick, but still subscribes to desired-state (KV watch) and runs the command server. KV deliveries with a non-podman Score variant get logged + rejected with `ErrorKind::BadRequest` (today we'd just drop them silently).
+
+Small, contained change (~30 lines). Unlocks pod-based agents and unblocks future verbs (exec/logs add their own runtime requirements).
+
+Alternative considered: mount `/var/run/podman/podman.sock` into the pod. Rejected — k3d nodes run containerd, not podman; mount would dangle.
+
+## Harness public API
+
+```rust
+// fleet/harmony-fleet-e2e/src/lib.rs
+pub struct Stack {
+    pub namespace: String,            // e2e-<uuid8>
+    pub nats_url: String,             // nats://localhost:<nodeport>
+    pub admin_token: String,          // JWT for the mock OIDC, callout-accepted
+    pub device_ids: Vec<Id>,          // ["vm-device-00", "vm-device-01", …]
+    pub operator_client: async_nats::Client,  // pre-authed admin client
+    _guard: NamespaceGuard,           // Drop impl deletes the namespace
+}
+
+pub struct StackOptions {
+    pub kubeconfig: Option<PathBuf>,   // default: $KUBECONFIG, fall back to k3d-managed
+    pub k3d_cluster_name: Option<String>, // None = pick the harness default; required if not using k3d
+    pub num_devices: usize,            // default 1; ping test uses 1
+    pub image_rebuild: bool,           // env var FLEET_E2E_FORCE_REBUILD
+    pub keep_namespace: bool,          // env var FLEET_E2E_KEEP=1 — skip teardown for debugging
+    pub auth_mode: AuthMode,           // Callout (default) | UserPass (fastest)
+}
+
+pub enum AuthMode {
+    /// Real auth-callout + mock OIDC fixture. Exercises the production code path.
+    Callout,
+    /// NATS user/pass via TomlShared credentials. Skips callout entirely.
+    /// ~3-5s faster bring-up; use for tests that don't care about auth.
+    UserPass,
+}
+
+impl Stack {
+    pub async fn bring_up(opts: StackOptions) -> anyhow::Result<Self>;
+    pub fn print_debug_info(&self);     // logs URL, token, namespace, kubectl shortcuts
+}
+```
+
+`Drop for NamespaceGuard`: spawns a blocking task that runs `kubectl delete namespace <name> --wait=false`. Doesn't block process exit; the namespace garbage-collects asynchronously. If `keep_namespace = true`, just logs the name.
+
+## TDD test order
+
+### Test 1 (first to land): ping
+
+```rust
+// fleet/harmony-fleet-e2e/tests/ping.rs
+#[tokio::test(flavor = "multi_thread")]
+async fn operator_can_ping_agent() -> anyhow::Result<()> {
+    let stack = Stack::bring_up(StackOptions::default()).await?;
+    let device_id = &stack.device_ids[0];
+
+    let client = FleetCommandsClient::new(stack.operator_client.clone());
+    let reply = tokio::time::timeout(
+        Duration::from_secs(10),
+        client.ping(device_id.as_str()),
+    ).await??;
+
+    assert_eq!(reply.device_id.as_str(), device_id.as_str());
+    assert!(!reply.agent_version.is_empty());
+    Ok(())
+}
+```
+
+**Failing → green sequence:**
+
+1. **Red**: write the test above. It can't even compile because `FleetCommandsClient`, `Stack`, `bring_up` don't exist.
+2. **Scaffold the harness**: stub `Stack::bring_up` that just returns an error. Test compiles, fails at runtime.
+3. **Bring up the cluster bits incrementally**:
+   - Namespace creation + RAII guard.
+   - NATS deploy via `NatsHelmChartScore` (UserPass mode first for speed).
+   - Operator deploy via `FleetOperatorScore` (image sideloaded).
+   - Agent pod deploy via new `FleetAgentPodScore`.
+   - Wait for pod readiness.
+   - Build operator admin NATS client.
+4. **Implement the wire types** in `harmony-reconciler-contracts/src/commands.rs` (just `Verb::Ping` + `CommandRequest::Ping` + `PingReply` for now).
+5. **Implement agent command server** with only the ping handler (`fleet/harmony-fleet-agent/src/command_server.rs`).
+6. **Implement `FleetCommandsClient::ping`** in `fleet/harmony-fleet-operator/src/commands.rs`.
+7. **Test goes green.**
+8. **Add Callout auth mode** to the harness (mock OIDC fixture deployed alongside NATS), re-run test in both modes.
+
+### Test 2 (follow-up PR): no-responders → DeviceOffline
+
+```rust
+#[tokio::test]
+async fn ping_to_offline_device_returns_immediately() -> anyhow::Result<()> {
+    let stack = Stack::bring_up(StackOptions::default()).await?;
+    let client = FleetCommandsClient::new(stack.operator_client.clone());
+    let started = Instant::now();
+    let err = client.ping("nonexistent-device").await.unwrap_err();
+    assert!(matches!(err, CommandError::DeviceOffline));
+    assert!(started.elapsed() < Duration::from_secs(1));
+    Ok(())
+}
+```
+
+### Test 3+ (follow-up PR, same harness): logs + exec — same pattern.
+
+## Image build & sideload
+
+`src/images.rs` exposes:
+
+```rust
+pub struct Images {
+    pub callout: String,           // e.g. harmony-nats-callout:e2e-<contenthash>
+    pub operator: String,
+    pub agent: String,
+}
+
+pub async fn build_and_sideload(cluster: &K3dCluster, opts: BuildOpts) -> Result<Images>;
+```
+
+Implementation:
+
+- For each of (callout, operator, agent):
+  - Hash the crate's source tree + `Cargo.lock`.
+  - If `podman images` doesn't contain `<image>:<hash>` and `FLEET_E2E_FORCE_REBUILD != 1`, skip.
+  - Otherwise: `cargo build --release -p <crate>` + `podman build -f Dockerfile -t <image>:<hash>`.
+  - `podman save | k3d image import -c <cluster>` (or `--volumes` if `--import` doesn't accept stdin; use the existing pattern from `examples/fleet_e2e_demo`).
+
+Dockerfiles:
+- Callout: exists at `nats/callout/Dockerfile` (used by the demo).
+- Operator: exists at `fleet/harmony-fleet-operator/Dockerfile`.
+- **Agent**: doesn't exist yet — add `fleet/harmony-fleet-agent/Dockerfile`. Distroless base, single static binary, ~5MB image.
+
+Sideload bypass for remote clusters: if `opts.registry` is set, push to that registry and skip sideload. Out of scope for v1 (the user said defer); v1 just panics if running against a non-k3d cluster.
+
+## Per-namespace isolation
+
+Today the demo hardcodes `fleet-system` and `zitadel`. The harness:
+
+- Picks namespace `e2e-<uuid8>` per `Stack::bring_up` call.
+- Every Score in the harness is parametrized on `namespace`; nothing is hardcoded.
+- The `FleetOperatorScore` already takes a `namespace` (verified in `harmony/src/modules/fleet/operator/score.rs`). The `NatsHelmChartScore` too. The `NatsAuthCalloutScore` too. Good.
+- The CRDs (`Deployment`, `Device`) are cluster-scoped — but they're created once per cluster (idempotent apply), shared across e2e runs. The operator filters by namespace via its `kube::Api::namespaced()` calls.
+- Wait — `Device` is cluster-scoped. Two simultaneous e2e runs would collide on `Device` CR names. Two mitigations:
+  - **Option A** (simpler): per-test device IDs include the namespace suffix (`vm-device-00-e2e-abc12345`). No collision.
+  - **Option B**: scope the `Device` CR to a namespace. Bigger change to the operator. Out of scope.
+  - Plan picks A.
+
+## Auth mode story
+
+Default `AuthMode::Callout` because the user explicitly asked for "nats + callout + operator + agent". To avoid Zitadel's bring-up cost, the harness ships a `mock_oidc.rs` fixture: a tiny single-Pod HTTP service that:
+
+- Serves `/.well-known/openid-configuration` and `/jwks.json` from a process-generated keypair.
+- Mints JWTs for `device-<id>` and `fleet-ops` machine users on demand via a `/token` endpoint the harness calls.
+- ~200 LOC, no external deps. Lives inside `harmony-fleet-e2e` (not exposed elsewhere).
+
+The callout points its `oidc_issuer_url` at the mock service's in-cluster URL. From the callout's perspective this is indistinguishable from Zitadel.
+
+`AuthMode::UserPass` skips the callout entirely: NATS deploys with two static accounts (`device` + `admin`) and the agent's `TomlShared` credential variant connects directly. ~3-5s faster bring-up. Useful when iterating on the command protocol itself, where auth isn't being tested.
+
+Both modes go through the same `Stack::operator_client` surface — tests don't see the difference.
+
+## Observability — what the harness prints
+
+On bring-up success, `print_debug_info()` logs:
+
+```
+[e2e] namespace: e2e-7d3a91f4 (will be deleted on exit unless FLEET_E2E_KEEP=1)
+[e2e] kubectl -n e2e-7d3a91f4 get pods
+[e2e] NATS: nats://localhost:30422
+[e2e] admin token: eyJhbGc... (use as auth_token)
+[e2e] devices: vm-device-00-e2e-7d3a91f4
+[e2e] tail agent: kubectl -n e2e-7d3a91f4 logs deploy/fleet-agent-vm-device-00 -f
+[e2e] tail callout: kubectl -n e2e-7d3a91f4 logs deploy/fleet-callout -f
+```
+
+When a test fails, set `FLEET_E2E_KEEP=1` and the namespace persists so you can poke around. The next run uses a different namespace, so leaks don't compound.
+
+## Reuse / cohesion plan
+
+The existing `examples/fleet_e2e_demo/src/lib.rs` is the original bring-up Frankenstein. Once `harmony-fleet-e2e` exists, refactor `fleet_e2e_demo` to delegate:
+
+```rust
+// examples/fleet_e2e_demo/src/lib.rs (after refactor)
+pub async fn bring_up_full_stack(...) -> ... {
+    let stack = harmony_fleet_e2e::Stack::bring_up(StackOptions {
+        auth_mode: AuthMode::Callout,           // real
+        num_devices: cfg.num_devices,
+        oidc_provider: OidcProvider::RealZitadel(zitadel_config),  // adapter for real Zitadel
+        agent_target: AgentTarget::Vm(vm_ips),  // SSH-based, for the rehearsal flow
+        ..
+    }).await?;
+    // ...
+}
+```
+
+This requires the harness to support **multiple agent targets** (Pod vs VM/SSH) and **multiple OIDC providers** (mock vs real Zitadel). Architecture-wise this is a `trait AgentTarget` and a `trait OidcProvider`, both with mock + real impls. The v1 PR ships only the Pod + mock-OIDC impls; the demo refactor is a follow-up PR.
+
+Cohesion deliverables this PR closes:
+- Single home for "bring up a fleet stack" logic (currently scattered across 3 examples).
+- Single home for image-build invocation (today inline `cargo build --release` + `podman build` calls live in `fleet_e2e_demo/src/lib.rs` lines 553–623).
+- Single home for "issue NATS test client" plumbing (the `admin_nats_client` helper in `e2e_walking_skeleton.rs` should be a Stack method).
+
+## Wire types (same as previous plan, reduced for ping-only first pass)
+
+In `harmony-reconciler-contracts/src/commands.rs` — add only what `ping` needs in PR 1:
+
+```rust
+pub enum Verb { Ping }
+pub fn device_command_subject(device_id: &str, verb: Verb) -> String;
+
+pub enum CommandRequest { Ping }
+pub struct PingReply {
+    pub device_id: Id,
+    pub agent_version: String,
+    pub uptime_s: u64,
+}
+
+pub const HDR_REQUEST_ID: &str = "X-Harmony-Request-Id";
+pub const HDR_DEADLINE: &str = "X-Harmony-Deadline";
+pub const HDR_OPERATOR_SUB: &str = "X-Harmony-Operator-Sub";
+```
+
+`Verb::Exec` / `Verb::Logs` and their payloads are added in follow-up PRs alongside their tests.
+
+## Agent-side command server (ping-only scaffold)
+
+`fleet/harmony-fleet-agent/src/command_server.rs`:
+
+```rust
+pub struct CommandServer {
+    device_id: Id,
+    client: async_nats::Client,
+    agent_version: &'static str,
+    started_at: Instant,
+}
+
+impl CommandServer {
+    pub async fn run(self: Arc<Self>) -> Result<()> {
+        let subject = format!("device-commands.{}.>", self.device_id);
+        let mut sub = self.client.subscribe(subject).await?;
+        while let Some(msg) = sub.next().await {
+            self.dispatch(msg).await;
+        }
+        Ok(())
+    }
+
+    async fn dispatch(&self, msg: async_nats::Message) {
+        let verb = msg.subject.rsplit('.').next();
+        match verb {
+            Some("ping") => self.reply_ping(&msg).await,
+            _ => self.reply_error(&msg, ErrorKind::BadRequest, "unknown verb").await,
+        }
+    }
+
+    async fn reply_ping(&self, msg: &async_nats::Message) {
+        let reply = PingReply {
+            device_id: self.device_id.clone(),
+            agent_version: env!("CARGO_PKG_VERSION").to_string(),
+            uptime_s: self.started_at.elapsed().as_secs(),
+        };
+        if let Some(inbox) = &msg.reply {
+            let _ = self.client.publish(inbox.clone(), serde_json::to_vec(&reply)?.into()).await;
+        }
+    }
+}
+```
+
+Wired into `main.rs` as a new arm of the existing `tokio::select!`. Future verbs slot into `dispatch`.
+
+## Operator-side client (ping-only scaffold)
+
+`fleet/harmony-fleet-operator/src/commands.rs`:
+
+```rust
+pub struct FleetCommandsClient {
+    nc: async_nats::Client,
+    default_timeout: Duration,
+}
+
+pub enum CommandError {
+    DeviceOffline,   // 503 no_responders
+    Timeout,
+    BadReply(serde_json::Error),
+    Nats(async_nats::Error),
+}
+
+impl FleetCommandsClient {
+    pub fn new(nc: async_nats::Client) -> Self;
+    pub async fn ping(&self, device_id: &str) -> Result<PingReply, CommandError>;
+}
+```
+
+`ping` uses `nc.request()` (relies on `no_responders` default-on in async-nats). Timeout: 5s. Decodes JSON reply into `PingReply`.
+
+## Test ordering & PR slicing
+
+**PR 1 (this plan):**
+- `harmony-fleet-e2e` crate scaffolding
+- `harmony-reconciler-contracts::commands` (ping types only)
+- Agent: `runtime_enabled` config flag + `command_server.rs` (ping only)
+- Operator: `commands.rs` (ping only)
+- New `FleetAgentPodScore` (or inline manifest) for pod-based agents
+- New `MockOidcScore` for the auth callout's issuer
+- `tests/ping.rs` — passing
+- Agent Dockerfile (new)
+
+**PR 2** (after PR 1 merges):
+- `tests/ping_offline.rs` (no_responders → DeviceOffline)
+- Refactor `fleet_e2e_demo` to delegate to `harmony-fleet-e2e` with `AgentTarget::Vm` + `OidcProvider::RealZitadel`
+
+**PR 3 (logs):**
+- Wire types for `Verb::Logs` + `LogsReq` + `LogChunk`
+- Agent handler invoking `podman_api::Containers::logs`
+- Operator client streaming method
+- `tests/logs.rs`
+
+**PR 4 (exec):**
+- Wire types for `Verb::Exec` + `ExecReq` + `ExecReply`
+- Agent handler with container-only default + host-exec policy gate
+- Operator client
+- `tests/exec.rs`
+
+**PR 5+**: web frontend wiring, CLI subcommands.
+
+## Open questions for review
+
+1. **Auth mode default** — Callout-with-mock-OIDC (slower, exercises real auth path), or UserPass (faster, doesn't test auth)? Plan picks Callout. UserPass available via env or `StackOptions`.
+2. **Mock OIDC fixture** — build into the harness, or use an existing crate? I haven't found a small-enough off-the-shelf one; recommend hand-rolled ~200 LOC (uses `jsonwebtoken`).
+3. **Image hash strategy** — content-hash of `Cargo.lock` + crate source (skip rebuild if matching tag exists)? Or always rebuild and rely on Docker layer cache? Plan: content-hash, with `FLEET_E2E_FORCE_REBUILD=1` escape hatch.
+4. **Cluster lifecycle** — harness assumes the k3d cluster already exists (or auto-creates one named `fleet-e2e`). Should it also offer a `Stack::bring_up_isolated_cluster()` that creates+destroys the whole cluster per test? Plan: no, namespace isolation is enough; clusters are heavy.
+5. **Ping reply shape** — `PingReply { device_id, agent_version, uptime_s }` minimal. Add anything else useful for a health-check (memory, podman socket status, current desired-state revision)? Easy to extend later; v1 keeps it minimal.
+6. **Subject choice** — `device-commands.<id>.ping` (matches the existing callout permission template). Alternative `harmony.device.<id>.cmd.ping` (matches the doc's verbatim suggestion) would require updating the callout permissions. Plan picks the existing `device-commands.<id>.ping` subject and notes the doc's `harmony.device.*` is the same idea with different prefix; no callout change needed.
+
+## What you'll see when you run the green ping test
+
+```
+$ cargo test -p harmony-fleet-e2e --test ping
+   Compiling harmony-fleet-e2e v0.1.0
+    Finished test [unoptimized + debuginfo] target(s) in 12.4s
+     Running tests/ping.rs
+
+running 1 test
+[e2e] building images: callout, operator, agent (cached, skipping rebuild)
+[e2e] sideloading 3 images into k3d cluster fleet-e2e
+[e2e] namespace: e2e-7d3a91f4
+[e2e] deploying mock-oidc, nats, callout, operator, agent
+[e2e] all pods ready in 7.2s
+[e2e] NATS: nats://localhost:30422
+[e2e] admin token: eyJhbGc...
+test operator_can_ping_agent ... ok
+
+test result: ok. 1 passed; 0 failed; 0 ignored; finished in 9.8s
+[e2e] tearing down namespace e2e-7d3a91f4
+```
+
+Target: green test in <15s end-to-end, with subsequent runs hitting <10s thanks to image cache + cluster reuse.
--- a/fleet/README.md
+++ b/fleet/README.md
@@ -0,0 +1,159 @@
+# Harmony Fleet
+
+IoT / decentralized-edge orchestration for harmony. A fleet stack is:
+
+| Component | Crate | Role |
+|---|---|---|
+| **Operator** | [`harmony-fleet-operator`](harmony-fleet-operator/) | Watches `Deployment` CRs, writes desired state into NATS JetStream KV, aggregates device state back into CR status. Runtime binary; no `harmony` dep. |
+| **Agent** | [`harmony-fleet-agent`](harmony-fleet-agent/) | One per device. Watches the desired-state KV, drives the local runtime (podman today), publishes heartbeats + per-deployment state, answers `device-commands.*` request/reply. |
+| **Auth** | [`harmony-fleet-auth`](harmony-fleet-auth/) | Shared NATS credential plumbing — `TomlShared` (dev) and `ZitadelJwt` (prod with auth-callout). |
+| **Deploy** | [`harmony-fleet-deploy`](harmony-fleet-deploy/) | The canonical deploy crate. Imports `harmony` and exposes one `*Score` per component (`FleetOperatorScore`, `FleetAgentScore`, `FleetNatsScore`, `FleetServerScore`). Both the production CLI and the e2e harness compose these — see [ADR-023](../docs/adr/023-deploy-architecture.md). |
+| **E2E harness** | [`harmony-fleet-e2e`](harmony-fleet-e2e/) | Brings the stack up in a fresh k3d namespace and runs integration tests against it. |
+
+The on-the-wire types both ends agree on (KV bucket names, key formats, command-protocol payloads) live in [`../harmony-reconciler-contracts`](../harmony-reconciler-contracts/).
+
+## Architecture in one line
+
+`FleetOperatorScore`, `FleetAgentScore`, etc. are real Rust types with capability-bound `Topology` parameters. Production deploys, the e2e harness, and any future control-plane tool all compose the **same** Scores; the only thing that changes is the `Topology` instance. **No handrolled YAML or imperative manifest factories anywhere.** Read [ADR-023](../docs/adr/023-deploy-architecture.md) before adding deploy logic.
+
+---
+
+## Quickstart — run the e2e ping test
+
+The fastest path to a green fleet stack on your laptop. Requires `podman`, `kubectl`, and `helm` on `$PATH`; everything else (`k3d`, the NATS chart, all images) is fetched / built on demand.
+
+```bash
+HARMONY_FLEET_E2E=1 cargo test -p harmony-fleet-e2e --test ping -- --nocapture
+```
+
+What it does, in order:
+
+1. Ensures a `fleet-e2e` k3d cluster exists (creates one if not). NodePort `30423` on the host forwards to NATS inside the cluster.
+2. Builds `harmony-fleet-agent` in release mode, packages it into `localhost/harmony-fleet-agent:e2e`, and sideloads the image into the k3d cluster's containerd store.
+3. Mints a per-bring-up namespace `e2e-<uuid8>` and prunes any leftover `e2e-*` namespaces from prior runs (NodePort `30423` is cluster-scoped, so a stuck `Terminating` namespace would block the new bring-up — the prune waits up to 90 s for full cleanup before proceeding).
+4. Deploys NATS via `FleetNatsScore` (helm chart, JetStream on, static admin/device users, NodePort Service).
+5. Waits for NATS to be reachable from the host on `nats://localhost:30423` (admin/e2e-admin).
+6. Deploys one `FleetAgentScore { target: Pod }` — runs with `runtime_enabled = false` so it skips podman and only runs the command-server + heartbeat loop.
+7. Waits for the agent Deployment to be Ready.
+8. The test publishes `device-commands.<device_id>.ping` via `FleetCommandsClient::ping` and asserts the agent replies with `{ device_id, agent_version, uptime_s }`.
+
+Cold first run: ~80 s (release build of the agent dominates). Warm: ~25 s.
+
+### Useful env knobs
+
+| Var | Effect |
+|---|---|
+| `HARMONY_FLEET_E2E=1` | Required. Without it the test is skipped — keeps `cargo test --workspace` cheap on machines without k3d. |
+| `FLEET_E2E_KEEP=1` | Skip namespace teardown on Drop. Lets you `kubectl -n e2e-<…> logs deploy/…` after a failure. The next run prunes it. |
+| `RUST_LOG=info` | Or `debug` for the per-message `command dispatch` traces inside `harmony-fleet-agent::command_server`. |
+
+### Connecting to NATS while the stack is up
+
+```bash
+# Host-side, via the NodePort
+nats://localhost:30423           # user=admin pass=e2e-admin (full access)
+nats://localhost:30423           # user=device pass=e2e-device (device permissions)
+```
+
+```bash
+# In-cluster, from any Pod in the same namespace
+nats://fleet-nats.e2e-<uuid8>.svc.cluster.local:4222
+```
+
+`FLEET_E2E_KEEP=1` + the harness's stdout line `[e2e] NATS: nats://127.0.0.1:30423 …` is the path most tests will take — leave the harness running, point a NATS client at that URL.
+
+### Inspecting the agent
+
+```bash
+# Find your namespace
+kubectl get ns -l harmony.io/managed-by=fleet-e2e
+
+# Tail the agent
+kubectl -n e2e-<uuid8> logs deploy/fleet-agent-<device-id> -f
+
+# Tail NATS (StatefulSet, not Deployment)
+kubectl -n e2e-<uuid8> logs sts/fleet-nats -c nats -f
+
+# Send a ping by hand (requires the `nats` CLI:
+#   https://github.com/nats-io/natscli/releases)
+nats --server nats://localhost:30423 --user admin --password e2e-admin \
+     request "device-commands.vm-device-00-<uuid8>.ping" ""
+```
+
+Or if you don't want to install the nats binary :
+
+```
+alias natsbox='podman run --network=host --rm docker.io/natsio/nats-box:latest nats --server nats://localhost:30423 --user admin --password e2e-admin'
+```
+
+You should see something like `{"device_id":"vm-device-00-<uuid8>","agent_version":"0.1.0","uptime_s":12}`.
+
+### Cleaning up
+
+The shared `OnceCell` in `harmony-fleet-e2e` lives for the test binary's lifetime, so namespaces survive a `cargo test` exit (the static is never explicitly dropped). The next `cargo test` invocation prunes them. To force a manual cleanup:
+
+```bash
+kubectl delete ns -l harmony.io/managed-by=fleet-e2e
+# wipe the whole cluster:
+k3d cluster delete fleet-e2e
+```
+
+---
+
+## Production deploys
+
+`harmony-fleet-deploy` is the binary that puts the fleet stack on a real cluster (OKD, vanilla k8s, anywhere `K8sAnywhereTopology` can reach). It composes `FleetNatsScore` + `FleetOperatorScore` + `FleetAgentScore` against the topology you point it at.
+
+```bash
+# Default: K8sAnywhereTopology against whatever KUBECONFIG points at
+cargo run -p harmony-fleet-deploy -- \
+  --namespace fleet-system \
+  --operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev \
+  --agent-image   hub.nationtech.io/harmony/harmony-fleet-agent:dev \
+  --agent-device-id fleet-agent-01
+
+# Pick a single component with the harmony_cli filter
+cargo run -p harmony-fleet-deploy -- \
+  --namespace fleet-system \
+  -- --filter FleetOperatorScore --all
+```
+
+`harmony-fleet-deploy` reads its full config from CLI flags + env vars (`FLEET_NAMESPACE`, `FLEET_OPERATOR_IMAGE`, …). The minimal-CLI surface is deliberate — per ADR-023 the long-term answer is a plugin-discovery layer over `harmony-*` binaries; until that lands, deploy crates stay small and use the existing `harmony_cli`.
+
+### Connecting to the operator
+
+The operator runs as a single-replica Deployment in `--namespace` (default `fleet-system`).
+
+```bash
+# Tail logs
+kubectl -n fleet-system logs deploy/harmony-fleet-operator -f
+
+# Port-forward the embedded web dashboard (web-frontend feature)
+kubectl -n fleet-system port-forward deploy/harmony-fleet-operator 18080:18080
+
+# Or run the dashboard standalone with seeded fake data — no NATS, no cluster
+cargo run -p harmony-fleet-operator --features web-frontend -- serve-web --mock
+# browse http://127.0.0.1:18080
+```
+
+---
+
+## Existing manual rehearsal — `examples/fleet_e2e_demo`
+
+`examples/fleet_e2e_demo` brings up a *fuller* stack than the e2e harness — real Zitadel, the auth-callout, libvirt VM agents over SSH — at the cost of a 5-min cold start. It's the manual rehearsal flow; not what you want during the dev loop. See the example's [`RUNBOOK.md`](../examples/fleet_e2e_demo/RUNBOOK.md).
+
+The harness and the rehearsal will converge: the [follow-up PR](#whats-next) lifts `FleetCalloutScore` + a mock-OIDC fixture into `harmony-fleet-deploy`, at which point the harness can run the full production auth path in ~30 s instead of 5 min, and `fleet_e2e_demo` thins down to a caller over the same Scores.
+
+---
+
+## What's next
+
+This branch lands the deploy-architecture cleanup (ADR-023), the per-component Scores, and the ping path. Slated immediately after:
+
+1. **Zitadel + auth callout in `harmony-fleet-deploy`.** New `FleetCalloutScore` (preset over `NatsAuthCalloutScore`) plus an in-cluster mock-OIDC fixture so the e2e harness can exercise the real auth-callout code path without paying Zitadel's 5-min cold-start cost. The harness's `AuthMode::Callout` variant is already on the public API for this.
+2. **Operator pod in the e2e harness.** `FleetOperatorScore` is already in the deploy crate; wiring it into the harness gives integration tests against the actual `Deployment` / `Device` reconcile loops.
+3. **`Verb::Logs` and `Verb::Exec`** — the next two verbs on the `device-commands.*` protocol. Same harness, same TDD shape as `ping`.
+4. **CRD types out of `harmony` core.** `harmony::modules::fleet::operator::crd` is the last fleet-deploy thing still living in `harmony`. The `ReconcileScore` payload coupling is the only blocker.
+5. **Smoke-test contract.** ADR-023 principle 4 — every Score blocks on a smoke test before `deploy` returns success. Today the e2e suite plays that role; the trait/companion shape lands once it's been validated in practice.
+
+See [`PLAN_requests_over_nats.md`](PLAN_requests_over_nats.md) for the full TDD-style plan this branch implements.
--- a/fleet/harmony-fleet-agent/Cargo.toml
+++ b/fleet/harmony-fleet-agent/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "harmony-fleet-agent"
+version = "0.1.0"
+edition = "2024"
+rust-version = "1.85"
+
+[dependencies]
+harmony-fleet-auth = { path = "../harmony-fleet-auth" }
+harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
+harmony = { path = "../../harmony", default-features = false, features = ["podman"] }
+async-nats = { workspace = true }
+async-trait = { workspace = true }
+chrono = { workspace = true }
+futures-util = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true }
+tracing = { workspace = true }
+tracing-subscriber = { workspace = true }
+anyhow = { workspace = true }
+clap = { workspace = true }
+toml = { workspace = true }
+thiserror = { workspace = true }
--- a/fleet/harmony-fleet-agent/Dockerfile
+++ b/fleet/harmony-fleet-agent/Dockerfile
@@ -0,0 +1,49 @@
+# Multi-stage container build for harmony-fleet-agent.
+#
+# Build context is the workspace root (the agent's Cargo.toml has
+# `path = "../../harmony"` deps that only resolve when the whole
+# workspace is in scope). Invoke from the repo root:
+#
+#   docker build -f fleet/harmony-fleet-agent/Dockerfile \
+#       -t hub.nationtech.io/harmony/harmony-fleet-agent:<tag> .
+#
+# Both stages are pinned to bookworm for a matched glibc — the
+# rust:slim image follows Debian's latest stable, and a binary built
+# against trixie's glibc 2.40 fails to start on a bookworm runtime
+# (`GLIBC_2.39 not found`). This is the same lesson the operator
+# Dockerfile encodes; keep the two pinned to the same Debian release.
+#
+# The e2e harness uses a faster host-build + single-stage path
+# (`fleet/harmony-fleet-e2e/src/images.rs`); this Dockerfile is the
+# canonical recipe for production registries.
+
+FROM docker.io/rust:1.94-slim-bookworm AS builder
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        pkg-config \
+        ca-certificates \
+        libssl-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY . .
+
+RUN cargo build --release --locked -p harmony-fleet-agent
+
+FROM docker.io/library/debian:bookworm-slim
+
+# ca-certificates: outbound TLS to NATS over wss:// when the agent is
+# configured against a TLS-terminated NATS endpoint. kube-rs is not
+# used at runtime on the agent; async-nats uses rustls.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=builder /app/target/release/harmony-fleet-agent /usr/local/bin/harmony-fleet-agent
+
+# Non-root runtime. 65532 is the `nonroot` UID convention from
+# distroless. Pairs with `securityContext.runAsNonRoot: true` in
+# whatever Pod spec the harness or production helm chart applies.
+USER 65532:65532
+
+ENTRYPOINT ["/usr/local/bin/harmony-fleet-agent"]
--- a/fleet/harmony-fleet-agent/src/command_server.rs
+++ b/fleet/harmony-fleet-agent/src/command_server.rs
@@ -0,0 +1,153 @@
+//! Agent-side request/reply command server.
+//!
+//! Subscribes to `device-commands.<device_id>.>` and dispatches one
+//! handler per verb. Single-shot replies for v1; streaming verbs
+//! (logs, exec follow-up) will reuse this loop and write multiple
+//! frames to the inbox, terminating with the `X-Harmony-Final`
+//! header.
+//!
+//! Runs alongside the KV reconciler in the agent's top-level
+//! `tokio::select!`. Independent of the podman runtime: when
+//! `[agent] runtime_enabled = false`, the reconciler is skipped but
+//! the command server still runs (ping is useful for "is this device
+//! online" health-checks regardless).
+
+use std::sync::Arc;
+use std::time::Instant;
+
+use async_nats::Client;
+use async_nats::Subject;
+use futures_util::StreamExt;
+use harmony_reconciler_contracts::{
+    HDR_REQUEST_ID, Id, PingReply, Verb, device_command_subscription,
+};
+use serde::Serialize;
+use thiserror::Error;
+
+pub struct CommandServer {
+    device_id: Id,
+    client: Client,
+    agent_version: &'static str,
+    started_at: Instant,
+}
+
+impl CommandServer {
+    pub fn new(device_id: Id, client: Client) -> Self {
+        Self {
+            device_id,
+            client,
+            agent_version: env!("CARGO_PKG_VERSION"),
+            started_at: Instant::now(),
+        }
+    }
+
+    pub async fn run(self: Arc<Self>) -> Result<(), CommandServerError> {
+        let subject = device_command_subscription(&self.device_id.to_string());
+        tracing::info!(subject = %subject, "command server subscribing");
+        let mut sub = self.client.subscribe(subject.clone()).await.map_err(|e| {
+            CommandServerError::Subscribe {
+                subject: subject.clone(),
+                source: e,
+            }
+        })?;
+        while let Some(msg) = sub.next().await {
+            let me = self.clone();
+            tokio::spawn(async move {
+                match me.dispatch(msg).await {
+                    Ok(()) => tracing::debug!("command handled"),
+                    Err(e) => {
+                        tracing::error!(command_error = %e, "failed to handle command")
+                    }
+                };
+            });
+        }
+        tracing::warn!("command server subscription ended");
+        Ok(())
+    }
+
+    async fn dispatch(&self, msg: async_nats::Message) -> Result<(), CommandError> {
+        // Subject token after the device id is the verb. Pattern is
+        // `device-commands.<id>.<verb>` — we own both ends so this
+        // unwrap shape is safe under normal routing.
+        // FIXME do not unwrap here, we cannot affoard to crash an entire fleet because a verb is
+        // added or removed or format changed. Log an error and move on maybe we could list supported verbs.
+        let verb_token = if let Some(verb) = msg.subject.rsplit('.').next() {
+            verb
+        } else {
+            return Err(CommandError::InvalidFormat(msg.subject.to_string()));
+        };
+        let request_id = msg
+            .headers
+            .as_ref()
+            .and_then(|h| h.get(HDR_REQUEST_ID))
+            .map(|v| v.as_str().to_string());
+        tracing::debug!(
+            subject = %msg.subject,
+            verb = %verb_token,
+            request_id = ?request_id,
+            "command dispatch",
+        );
+
+        let reply_to = match msg.reply.clone() {
+            Some(inbox) => inbox,
+            None => {
+                tracing::warn!(verb = %verb_token, "command without reply inbox; ignoring");
+                return Err(CommandError::MissingReplyInbox);
+            }
+        };
+
+        if verb_token == Verb::Ping.as_subject_token() {
+            self.reply_ping(reply_to).await?;
+            Ok(())
+        } else {
+            tracing::warn!(verb = %verb_token, "unknown command verb");
+            Err(CommandError::UnknownVerb(verb_token.to_string()))
+        }
+    }
+
+    async fn reply_ping(&self, reply_to: Subject) -> Result<(), CommandError> {
+        let reply = PingReply {
+            device_id: self.device_id.clone(),
+            agent_version: self.agent_version.to_string(),
+            uptime_s: self.started_at.elapsed().as_secs(),
+        };
+        let payload = serde_json::to_vec(&reply).map_err(CommandError::SerializeReply)?;
+        self.client
+            .publish(reply_to, payload.into())
+            .await
+            .map_err(|e| CommandError::PublishReply(e.to_string()))
+    }
+}
+
+/// Failure modes the per-message dispatcher can report. Stays
+/// `pub(crate)` for now — the run loop logs and continues on each
+/// variant rather than surfacing them to a caller.
+#[derive(Debug, Error, Serialize)]
+pub(crate) enum CommandError {
+    #[error("invalid command subject: {0}")]
+    InvalidFormat(String),
+    #[error("unknown verb: {0}")]
+    UnknownVerb(String),
+    #[error("command message had no reply inbox")]
+    MissingReplyInbox,
+    #[error("serializing reply: {0}")]
+    // `serde_json::Error` is not `Serialize`, so flatten on the
+    // serialize-out path. The original error stays in `Display`.
+    #[serde(skip)]
+    SerializeReply(serde_json::Error),
+    #[error("publishing reply: {0}")]
+    PublishReply(String),
+}
+
+/// Surface returned by [`CommandServer::run`]. The only currently
+/// failing operation is the initial subscribe; per-message errors
+/// stay inside the loop and are logged.
+#[derive(Debug, Error)]
+pub enum CommandServerError {
+    #[error("subscribing to {subject}")]
+    Subscribe {
+        subject: String,
+        #[source]
+        source: async_nats::SubscribeError,
+    },
+}
--- a/fleet/harmony-fleet-agent/src/config.rs
+++ b/fleet/harmony-fleet-agent/src/config.rs
@@ -0,0 +1,9 @@
+//! Agent-side config loading.
+//!
+//! The schema (`AgentConfig`, `AgentSection`, `NatsSection`,
+//! `CredentialsSection`) lives in `harmony-fleet-auth` so it is shared
+//! with the deploy crate by type, not by string interpolation. This
+//! file re-exports those types so existing `crate::config::*` call
+//! sites in the agent binary keep working.
+
+pub use harmony_fleet_auth::{AgentConfig, load_config};
--- a/fleet/harmony-fleet-agent/src/fleet_publisher.rs
+++ b/fleet/harmony-fleet-agent/src/fleet_publisher.rs
@@ -0,0 +1,163 @@
+//! Agent-side publish surface.
+//!
+//! Thin wrapper around three KV buckets: [`BUCKET_DEVICE_INFO`],
+//! [`BUCKET_DEVICE_STATE`], [`BUCKET_DEVICE_HEARTBEAT`].
+//!
+//! Failure mode: log and swallow. The KV is the source of truth —
+//! a dropped put gets corrected on the next reconcile transition
+//! or operator watch reconnection.
+
+use async_nats::jetstream::{self, kv};
+use harmony_reconciler_contracts::{
+    BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE, DeploymentName,
+    DeploymentState, DeviceInfo, HeartbeatPayload, Id, InventorySnapshot, device_heartbeat_key,
+    device_info_key, device_state_key,
+};
+use std::collections::BTreeMap;
+
+pub struct FleetPublisher {
+    device_id: Id,
+    /// Raw NATS client kept around so we can publish on direct
+    /// (non-JetStream) subjects like `device-state.<device_id>` for
+    /// live observers — the KV writes are storage-and-watch, the
+    /// direct subject is fan-out.
+    client: async_nats::Client,
+    info_bucket: kv::Store,
+    state_bucket: kv::Store,
+    heartbeat_bucket: kv::Store,
+}
+
+impl FleetPublisher {
+    /// Open every bucket the agent needs, creating those that don't
+    /// exist yet. Idempotent with operator-side creation.
+    pub async fn connect(client: async_nats::Client, device_id: Id) -> anyhow::Result<Self> {
+        let jetstream = jetstream::new(client.clone());
+
+        let info_bucket = jetstream
+            .create_key_value(kv::Config {
+                bucket: BUCKET_DEVICE_INFO.to_string(),
+                // If this is as I think, it would be useful to keep a history of the last 10 device
+                // info, with a timestamp
+                history: 1,
+                ..Default::default()
+            })
+            .await?;
+        let state_bucket = jetstream
+            .create_key_value(kv::Config {
+                bucket: BUCKET_DEVICE_STATE.to_string(),
+                // If this is as I think, it would be useful to keep a history of the last 10 states
+                // a device had, with a timestamp
+                history: 1,
+                ..Default::default()
+            })
+            .await?;
+        let heartbeat_bucket = jetstream
+            .create_key_value(kv::Config {
+                bucket: BUCKET_DEVICE_HEARTBEAT.to_string(),
+                history: 1,
+                ..Default::default()
+            })
+            .await?;
+
+        Ok(Self {
+            device_id,
+            client,
+            info_bucket,
+            state_bucket,
+            heartbeat_bucket,
+        })
+    }
+
+    /// Publish the agent's static-ish facts. Called at startup and
+    /// on label change.
+    pub async fn publish_device_info(
+        &self,
+        labels: BTreeMap<String, String>,
+        inventory: Option<InventorySnapshot>,
+    ) {
+        let info = DeviceInfo {
+            device_id: self.device_id.clone(),
+            labels,
+            inventory,
+            updated_at: chrono::Utc::now(),
+        };
+        let key = device_info_key(&self.device_id.to_string());
+        match serde_json::to_vec(&info) {
+            Ok(payload) => {
+                if let Err(e) = self.info_bucket.put(&key, payload.into()).await {
+                    tracing::warn!(%key, error = %e, "publish_device_info: kv put failed");
+                }
+            }
+            Err(e) => tracing::warn!(error = %e, "publish_device_info: serialize failed"),
+        }
+    }
+
+    /// Tiny liveness ping. Called every 30s.
+    pub async fn publish_heartbeat(&self) {
+        let hb = HeartbeatPayload {
+            device_id: self.device_id.clone(),
+            at: chrono::Utc::now(),
+        };
+        let key = device_heartbeat_key(&self.device_id.to_string());
+        match serde_json::to_vec(&hb) {
+            Ok(payload) => {
+                if let Err(e) = self.heartbeat_bucket.put(&key, payload.into()).await {
+                    tracing::debug!(%key, error = %e, "publish_heartbeat: kv put failed");
+                }
+            }
+            Err(e) => tracing::warn!(error = %e, "publish_heartbeat: serialize failed"),
+        }
+    }
+
+    /// Persist the authoritative current phase for a `(device,
+    /// deployment)` pair. The operator's watch on the `device-state`
+    /// bucket picks up this put and updates CR status counters.
+    /// Also fans out the same payload on `device-state.<device_id>`
+    /// for live observers that don't want to consume the KV stream.
+    pub async fn write_deployment_state(&self, state: &DeploymentState) {
+        let key = device_state_key(&self.device_id.to_string(), &state.deployment);
+        match serde_json::to_vec(state) {
+            Ok(payload) => {
+                if let Err(e) = self.state_bucket.put(&key, payload.clone().into()).await {
+                    tracing::warn!(%key, error = %e, "write_deployment_state: kv put failed");
+                }
+                self.publish_direct_state(payload).await;
+            }
+            Err(e) => tracing::warn!(error = %e, "write_deployment_state: serialize failed"),
+        }
+    }
+
+    /// Emit a tiny presence pulse on `device-state.<device_id>` so live
+    /// observers (admin tooling, dashboards) see the device is alive
+    /// without subscribing to JetStream. Called from the heartbeat
+    /// loop alongside the KV heartbeat write — same cadence, two
+    /// transports.
+    pub async fn publish_state_pulse(&self) {
+        let pulse = serde_json::json!({
+            "device_id": self.device_id.to_string(),
+            "kind": "heartbeat",
+            "at": chrono::Utc::now(),
+        });
+        match serde_json::to_vec(&pulse) {
+            Ok(payload) => self.publish_direct_state(payload).await,
+            Err(e) => tracing::warn!(error = %e, "publish_state_pulse: serialize failed"),
+        }
+    }
+
+    async fn publish_direct_state(&self, payload: Vec<u8>) {
+        let subject = format!("device-state.{}", self.device_id);
+        if let Err(e) = self.client.publish(subject.clone(), payload.into()).await {
+            tracing::debug!(%subject, error = %e, "publish_direct_state: publish failed");
+        }
+    }
+
+    /// Delete the authoritative current-phase entry, e.g. when the
+    /// Deployment CR is removed and the agent has torn down the
+    /// container.
+    pub async fn delete_deployment_state(&self, deployment: &DeploymentName) {
+        let key = device_state_key(&self.device_id.to_string(), deployment);
+        if let Err(e) = self.state_bucket.delete(&key).await {
+            tracing::debug!(%key, error = %e, "delete_deployment_state: kv delete failed");
+        }
+    }
+}
--- a/fleet/harmony-fleet-agent/src/main.rs
+++ b/fleet/harmony-fleet-agent/src/main.rs
@@ -0,0 +1,320 @@
+mod command_server;
+mod config;
+mod fleet_publisher;
+mod reconciler;
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::{Context, Error, Result};
+use clap::Parser;
+use config::AgentConfig;
+use harmony_fleet_auth::{
+    CredentialSource, connect_options_with_credentials, credential_source_from_config,
+};
+// Type alias to keep function signatures readable. The auth callback
+// captures one `Arc<CredentialSource>` and clones it per invocation.
+type Creds = Arc<CredentialSource>;
+use futures_util::StreamExt;
+use harmony_reconciler_contracts::{
+    BUCKET_DESIRED_STATE, Id, InventorySnapshot, desired_state_watch_filter,
+};
+
+use harmony::inventory::Inventory;
+use harmony::modules::podman::PodmanTopology;
+use harmony::topology::Topology;
+
+use crate::command_server::CommandServer;
+use crate::fleet_publisher::FleetPublisher;
+use crate::reconciler::Reconciler;
+
+/// ROADMAP §5.6 — agent polls podman every 30s as ground truth; KV watch
+/// events are accelerators.
+const RECONCILE_INTERVAL: Duration = Duration::from_secs(30);
+
+#[derive(Parser)]
+#[command(name = "fleet-agent-v0", about = "IoT agent for Raspberry Pi devices")]
+struct Cli {
+    #[arg(
+        long,
+        env = "FLEET_AGENT_CONFIG",
+        // FIXME this should be a constant from a config, not just hardcoded here as we need the
+        // installation scripts and other bits to know about this file location.
+        default_value = "/etc/fleet-agent/config.toml"
+    )]
+    config: std::path::PathBuf,
+}
+
+async fn connect_nats(cfg: &AgentConfig, creds: Creds) -> Result<async_nats::Client> {
+    let urls = &cfg.nats.urls;
+    tracing::info!(device_id = %cfg.agent.device_id, "connecting to NATS {urls:?}");
+    // The auth callback is invoked on every (re)connect, so a fresh
+    // Zitadel access token is minted automatically when the cached one
+    // is near-expiry — that's how we hold the "never lose connectivity"
+    // guarantee even across token rollovers and NATS pod restarts.
+    let client = connect_options_with_credentials(creds)
+        .ping_interval(Duration::from_secs(10))
+        // Surface async-nats's connection lifecycle in our logs. This
+        // is load-bearing for ops: a device that quietly disconnects
+        // is exactly the failure mode we promise won't happen, and
+        // operators need to see the reconnect attempts to debug.
+        .event_callback(|event| async move {
+            use async_nats::Event;
+            match event {
+                Event::Connected => tracing::info!("NATS connected"),
+                Event::Disconnected => tracing::warn!("NATS disconnected, will reconnect"),
+                Event::LameDuckMode => tracing::warn!("NATS server entered lame-duck mode"),
+                Event::SlowConsumer(sid) => {
+                    tracing::warn!(sid = %sid, "NATS slow consumer")
+                }
+                Event::ServerError(e) => tracing::error!(error = %e, "NATS server error"),
+                Event::ClientError(e) => tracing::error!(error = %e, "NATS client error"),
+                Event::Closed => tracing::error!("NATS connection closed"),
+                other => tracing::debug!(?other, "NATS event"),
+            }
+        })
+        .connect(cfg.nats.urls.as_slice())
+        .await?;
+    tracing::info!(urls = ?cfg.nats.urls, "connected to NATS");
+    Ok(client)
+}
+
+async fn watch_desired_state(
+    client: async_nats::Client,
+    device_id: Id,
+    reconciler: Arc<Reconciler>,
+) -> Result<()> {
+    let jetstream = async_nats::jetstream::new(client);
+    let bucket = jetstream
+        .create_key_value(async_nats::jetstream::kv::Config {
+            bucket: BUCKET_DESIRED_STATE.to_string(),
+            ..Default::default()
+        })
+        .await?;
+
+    let key_filter = desired_state_watch_filter(&device_id.to_string());
+    tracing::info!(filter = %key_filter, "watching KV keys");
+
+    let mut watch = bucket.watch(&key_filter).await?;
+    while let Some(result) = watch.next().await {
+        let entry = match result {
+            Ok(e) => e,
+            Err(e) => {
+                tracing::warn!(error = %e, "watch error");
+                continue;
+            }
+        };
+
+        tracing::debug!(key = %entry.key, "bucket watch new value {entry:?}");
+
+        match entry.operation {
+            async_nats::jetstream::kv::Operation::Put => {
+                if let Err(e) = reconciler.apply(&entry.key, &entry.value).await {
+                    tracing::warn!(key = %entry.key, error = %e, "apply failed");
+                }
+            }
+            async_nats::jetstream::kv::Operation::Delete
+            | async_nats::jetstream::kv::Operation::Purge => {
+                if let Err(e) = reconciler.remove(&entry.key).await {
+                    tracing::warn!(key = %entry.key, error = %e, "remove failed");
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Tiny liveness-only loop: push a `HeartbeatPayload` into the
+/// `device-heartbeat` bucket every N seconds, and fan out the same
+/// pulse on `device-state.<device_id>` for live (non-JetStream)
+/// observers. Stays separate from per-deployment state writes so
+/// routine pings don't churn the device-state bucket or its watch
+/// subscribers — but the direct-subject pulse uses ordinary core
+/// NATS pub/sub and doesn't accumulate state anywhere.
+async fn publish_heartbeat_loop(fleet: Arc<FleetPublisher>) {
+    let mut interval = tokio::time::interval(Duration::from_secs(30));
+    interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
+    loop {
+        interval.tick().await;
+        fleet.publish_heartbeat().await;
+        fleet.publish_state_pulse().await;
+    }
+}
+
+/// Build a one-shot inventory snapshot at agent startup. Cheap,
+/// published alongside every heartbeat until the agent restarts.
+/// NOTE: I don't see why this is *published* with every heartbeat, it feels like noise.
+/// It shoulf be published on heartbeat only when something changed. It is ok to *check* the state
+/// on heartbeat but not always send it over the wire
+fn local_inventory(inventory: &Inventory) -> InventorySnapshot {
+    InventorySnapshot {
+        hostname: inventory.location.name.clone(),
+        arch: std::env::consts::ARCH.to_string(),
+        os: std::env::consts::OS.to_string(),
+        kernel: std::fs::read_to_string("/proc/sys/kernel/osrelease")
+            .map(|s| s.trim().to_string())
+            .unwrap_or_default(),
+        cpu_cores: std::thread::available_parallelism()
+            .map(|n| n.get() as u32)
+            .unwrap_or(0),
+        memory_mb: sys_memory_total_mb().unwrap_or(0),
+        agent_version: env!("CARGO_PKG_VERSION").to_string(),
+    }
+}
+
+/// Read total RAM from /proc/meminfo. Returns None on non-Linux or
+/// if /proc isn't mounted. Small, avoids a sys-info crate dep for a
+/// single field.
+fn sys_memory_total_mb() -> Option<u64> {
+    let s = std::fs::read_to_string("/proc/meminfo").ok()?;
+    for line in s.lines() {
+        if let Some(rest) = line.strip_prefix("MemTotal:") {
+            let kb: u64 = rest.split_whitespace().next()?.parse().ok()?;
+            return Some(kb / 1024);
+        }
+    }
+    None
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    // Default to `info` so the agent produces useful output without
+    // requiring `RUST_LOG` to be set anywhere — the systemd unit
+    // installed by `FleetDeviceSetupScore` does set it, but a
+    // hand-launched binary or a user who's overridden the unit
+    // shouldn't have to know that. `RUST_LOG` still overrides
+    // when set (e.g. `RUST_LOG=debug` for troubleshooting).
+    let filter = tracing_subscriber::EnvFilter::try_from_default_env()
+        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
+    tracing_subscriber::fmt().with_env_filter(filter).init();
+
+    let cli = Cli::parse();
+    let cfg = config::load_config(&cli.config)?;
+    tracing::info!(
+        device_id = %cfg.agent.device_id,
+        runtime_enabled = cfg.agent.runtime_enabled,
+        "fleet-agent-v0 starting",
+    );
+
+    let device_id = cfg.agent.device_id.clone();
+
+    // Podman is the agent's runtime backend for deploying workloads.
+    // When `runtime_enabled = false`, skip the socket entirely so the
+    // agent can run on hosts that don't ship podman (the in-cluster
+    // e2e harness deploys the agent as a Pod on containerd-only k3d
+    // nodes). The command server + heartbeat still run; only the
+    // reconciler depends on the topology.
+    let topology = if cfg.agent.runtime_enabled {
+        let t = Arc::new(
+            PodmanTopology::from_default_socket()
+                .map_err(|e| anyhow::anyhow!("failed to open podman socket: {e}"))?,
+        );
+        t.ensure_ready().await.context("podman socket not ready")?;
+        tracing::info!("podman socket ready");
+        Some(t)
+    } else {
+        tracing::warn!(
+            "runtime_enabled=false; skipping podman + reconciler. \
+             Desired-state KV deliveries will be logged and dropped."
+        );
+        None
+    };
+
+    let inventory = Arc::new(Inventory::from_localhost());
+    tracing::info!(hostname = %inventory.location.name, "inventory loaded");
+    let inventory_snapshot = local_inventory(&inventory);
+
+    let creds = credential_source_from_config(&cfg.credentials)
+        .context("building NATS credential source")?;
+
+    let client = connect_nats(&cfg, creds).await.map_err(|e| {
+        let msg = format!("Nats connection FAILED : {e}");
+        tracing::error!(msg);
+        Error::msg(msg)
+    })?;
+
+    // Publish surface. Opens the three KV buckets (idempotent
+    // creates). Must be live before the reconciler starts so
+    // writes on the first desired-state KV watch land on the wire.
+    let fleet = Arc::new(
+        FleetPublisher::connect(client.clone(), device_id.clone())
+            .await
+            .context("fleet publisher connect")?,
+    );
+    tracing::info!("fleet publisher ready");
+
+    // Publish DeviceInfo once at startup. Merge the config-declared
+    // labels with an always-on `device-id=<id>` default so every
+    // device is targetable by id even without explicit labels.
+    // Config labels win on key conflicts — operators can override
+    // `device-id` if they really want to (unusual but legal).
+    let mut startup_labels = cfg.labels.clone();
+    startup_labels
+        .entry("device-id".to_string())
+        .or_insert_with(|| device_id.to_string());
+    fleet
+        .publish_device_info(startup_labels, Some(inventory_snapshot.clone()))
+        .await;
+
+    // Reconciler exists only when a podman topology is available.
+    // Without it, the desired-state watch + periodic reconcile arms
+    // are replaced by pending-forever futures so `select!` only sees
+    // heartbeat + command server.
+    let reconciler: Option<Arc<Reconciler>> = topology.as_ref().map(|t| {
+        Arc::new(Reconciler::new(
+            device_id.clone(),
+            t.clone(),
+            inventory.clone(),
+            Some(fleet.clone()),
+        ))
+    });
+
+    let command_server = Arc::new(CommandServer::new(device_id.clone(), client.clone()));
+
+    let ctrlc = async {
+        tokio::signal::ctrl_c().await.ok();
+        tracing::info!("received SIGINT, shutting down");
+    };
+    let sigterm = async {
+        tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())?
+            .recv()
+            .await;
+        tracing::info!("received SIGTERM, shutting down");
+        Ok::<(), anyhow::Error>(())
+    };
+
+    let _ = inventory_snapshot; // consumed by the DeviceInfo publish above
+
+    let watch: std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send>> =
+        match reconciler.as_ref() {
+            Some(r) => Box::pin(watch_desired_state(
+                client.clone(),
+                device_id.clone(),
+                r.clone(),
+            )),
+            None => Box::pin(async {
+                std::future::pending::<()>().await;
+                Ok(())
+            }),
+        };
+    let reconcile: std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send>> =
+        match reconciler.as_ref() {
+            Some(r) => Box::pin(r.clone().run_periodic(RECONCILE_INTERVAL)),
+            None => Box::pin(std::future::pending::<()>()),
+        };
+    let heartbeat = publish_heartbeat_loop(fleet);
+    let commands = command_server.run();
+
+    tokio::select! {
+        // Waiting on ctrlc in a select will automatically terminate other branches when
+        // ctrlc happens.
+        _ = ctrlc => {},
+        r = sigterm => { r?; }
+        r = watch => { r?; }
+        _ = reconcile => {}
+        _ = heartbeat => {}
+        r = commands => { r?; }
+    }
+
+    Ok(())
+}
--- a/fleet/harmony-fleet-agent/src/reconciler.rs
+++ b/fleet/harmony-fleet-agent/src/reconciler.rs
@@ -0,0 +1,349 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::Result;
+use chrono::Utc;
+use harmony_reconciler_contracts::{DeploymentName, DeploymentState, Id, Phase};
+use tokio::sync::Mutex;
+
+use harmony::inventory::Inventory;
+use harmony::modules::podman::{PodmanTopology, PodmanV0Score, ReconcileScore};
+use harmony::score::Score;
+
+use crate::fleet_publisher::FleetPublisher;
+
+/// Cache key → last-seen state, populated by `apply` and consulted by the
+/// 30-second periodic tick and the delete path.
+struct CachedEntry {
+    /// Serialized score JSON. Used for string-compare idempotency per
+    /// ROADMAP §5.5 — cheaper and more deterministic than a hash.
+    serialized: String,
+    /// Parsed score. Cached so the periodic reconcile tick and delete
+    /// handlers don't have to re-parse the JSON.
+    score: PodmanV0Score,
+}
+
+pub struct Reconciler {
+    device_id: Id,
+    topology: Arc<PodmanTopology>,
+    inventory: Arc<Inventory>,
+    /// Keyed by NATS KV key (`<device>.<deployment>`). A single entry per
+    /// KV key — in v0 there is no fan-out from one key to many scores.
+    state: Mutex<HashMap<String, CachedEntry>>,
+    /// Current phase per deployment, used to decide whether a new
+    /// write to the `device-state` KV is needed.
+    ///
+    /// NOTE : this feels dangerous, conflict on deployment name could be a problem
+    /// We must explore this and clarify it in the design and decide if it is a constraint
+    deployments: Mutex<HashMap<DeploymentName, Phase>>,
+    /// Publish surface. Optional so unit tests without a live NATS
+    /// client still work; always populated in the real agent runtime.
+    fleet: Option<Arc<FleetPublisher>>,
+}
+
+impl Reconciler {
+    pub fn new(
+        device_id: Id,
+        topology: Arc<PodmanTopology>,
+        inventory: Arc<Inventory>,
+        fleet: Option<Arc<FleetPublisher>>,
+    ) -> Self {
+        Self {
+            device_id,
+            topology,
+            inventory,
+            state: Mutex::new(HashMap::new()),
+            deployments: Mutex::new(HashMap::new()),
+            fleet,
+        }
+    }
+
+    /// Record a new phase for a deployment and, if it changed, write
+    /// the updated [`DeploymentState`] to the KV. Same-phase
+    /// re-confirmations are no-ops so the periodic reconcile tick
+    /// doesn't churn the bucket.
+    async fn apply_phase(
+        &self,
+        deployment: &DeploymentName,
+        phase: Phase,
+        last_error: Option<String>,
+    ) {
+        {
+            let mut phases = self.deployments.lock().await;
+            // performance nitpick : we don't need a write lock here, we could check before acquiring the write
+            // lock
+            if phases.get(deployment).copied() == Some(phase) {
+                return;
+            }
+            phases.insert(deployment.clone(), phase);
+        }
+
+        if let Some(publisher) = &self.fleet {
+            let state = DeploymentState {
+                device_id: self.device_id.clone(),
+                deployment: deployment.clone(),
+                phase,
+                last_event_at: Utc::now(),
+                last_error,
+            };
+            publisher.write_deployment_state(&state).await;
+        }
+    }
+
+    /// Clear the in-memory phase for a deployment and delete its KV
+    /// entry. Idempotent: a delete for a never-applied deployment is
+    /// a no-op in memory and a harmless tombstone write on the wire.
+    async fn drop_phase(&self, deployment: &DeploymentName) {
+        let was_known = {
+            let mut phases = self.deployments.lock().await;
+            phases.remove(deployment).is_some()
+        };
+        if !was_known {
+            return;
+        }
+        if let Some(publisher) = &self.fleet {
+            publisher.delete_deployment_state(deployment).await;
+        }
+    }
+
+    /// Handle a Put event (new or updated score on NATS KV). No-ops if the
+    /// serialized score is byte-identical to the last-seen value for this
+    /// key.
+    pub async fn apply(&self, key: &str, value: &[u8]) -> Result<()> {
+        let deployment = deployment_from_key(key);
+        let incoming = match serde_json::from_slice::<ReconcileScore>(value) {
+            Ok(ReconcileScore::PodmanV0(s)) => s,
+            Err(e) => {
+                tracing::warn!(key, error = %e, "failed to deserialize score");
+                if let Some(name) = &deployment {
+                    self.apply_phase(name, Phase::Failed, Some(format!("bad payload: {e}")))
+                        .await;
+                }
+                return Ok(());
+            }
+        };
+        let serialized = String::from_utf8_lossy(value).into_owned();
+
+        {
+            let state = self.state.lock().await;
+            if let Some(existing) = state.get(key) {
+                if existing.serialized == serialized {
+                    tracing::debug!(key, "score unchanged — noop");
+                    return Ok(());
+                }
+            }
+        }
+
+        if let Some(name) = &deployment {
+            self.apply_phase(name, Phase::Pending, None).await;
+        }
+
+        match self.run_score(key, &incoming).await {
+            Ok(()) => {
+                if let Some(name) = &deployment {
+                    self.apply_phase(name, Phase::Running, None).await;
+                }
+            }
+            Err(e) => {
+                if let Some(name) = &deployment {
+                    self.apply_phase(name, Phase::Failed, Some(short(&e.to_string())))
+                        .await;
+                }
+                return Err(e);
+            }
+        }
+
+        let mut state = self.state.lock().await;
+        state.insert(
+            key.to_string(),
+            CachedEntry {
+                serialized,
+                score: incoming,
+            },
+        );
+        Ok(())
+    }
+
+    /// Handle a Delete/Purge event. Stops and removes every container
+    /// referenced by the last cached score for this key. Idempotent: if we
+    /// never saw a Put for this key (agent restart after delete), logs and
+    /// returns ok.
+    pub async fn remove(&self, key: &str) -> Result<()> {
+        let deployment = deployment_from_key(key);
+        let mut state = self.state.lock().await;
+        let Some(entry) = state.remove(key) else {
+            tracing::info!(key, "delete for unknown key — nothing to remove");
+            if let Some(name) = &deployment {
+                self.drop_phase(name).await;
+            }
+            return Ok(());
+        };
+        drop(state);
+
+        use harmony::topology::ContainerRuntime;
+        for service in &entry.score.services {
+            if let Err(e) = self.topology.remove_service(&service.name).await {
+                tracing::warn!(
+                    key,
+                    service = %service.name,
+                    error = %e,
+                    "failed to remove container"
+                );
+            } else {
+                tracing::info!(key, service = %service.name, "removed container");
+            }
+        }
+        if let Some(name) = &deployment {
+            self.drop_phase(name).await;
+        }
+        Ok(())
+    }
+
+    /// Periodic ground-truth reconcile. ROADMAP §5.6 — "polling instead of
+    /// event-driven PLEG. Agent polls podman every 30s as ground truth;
+    /// KV watch events are accelerators." Re-runs each cached score against
+    /// podman-api; the underlying `ensure_service_running` is idempotent
+    /// so a converged state produces no log noise.
+    pub async fn tick(&self) -> Result<()> {
+        let snapshot: Vec<(String, PodmanV0Score)> = {
+            let state = self.state.lock().await;
+            state
+                .iter()
+                .map(|(k, v)| (k.clone(), v.score.clone()))
+                .collect()
+        };
+        for (key, score) in snapshot {
+            let deployment = deployment_from_key(&key);
+            match self.run_score(&key, &score).await {
+                Ok(()) => {
+                    if let Some(name) = &deployment {
+                        self.apply_phase(name, Phase::Running, None).await;
+                    }
+                }
+                Err(e) => {
+                    tracing::warn!(key, error = %e, "periodic reconcile failed");
+                    if let Some(name) = &deployment {
+                        self.apply_phase(name, Phase::Failed, Some(short(&e.to_string())))
+                            .await;
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    pub async fn run_periodic(self: Arc<Self>, interval: Duration) {
+        let mut ticker = tokio::time::interval(interval);
+        ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
+        loop {
+            ticker.tick().await;
+            if let Err(e) = self.tick().await {
+                tracing::warn!(error = %e, "reconcile tick error");
+            }
+        }
+    }
+
+    async fn run_score(&self, key: &str, score: &PodmanV0Score) -> Result<()> {
+        let interpret = Score::<PodmanTopology>::create_interpret(score);
+        let outcome = interpret
+            .execute(&self.inventory, &self.topology)
+            .await
+            .map_err(|e| anyhow::anyhow!("PodmanV0Score interpret failed for {key}: {e}"))?;
+        tracing::info!(key, outcome = ?outcome, "reconciled");
+        Ok(())
+    }
+}
+
+/// Extract the deployment name from a NATS KV key of the form
+/// `<device>.<deployment>`.
+fn deployment_from_key(key: &str) -> Option<DeploymentName> {
+    let (_, rest) = key.split_once('.')?;
+    DeploymentName::try_new(rest).ok()
+}
+
+/// Truncate a long error message so the DeploymentState payload stays
+/// comfortably below NATS JetStream's per-message limit.
+fn short(s: &str) -> String {
+    const MAX: usize = 512;
+    if s.len() <= MAX {
+        s.to_string()
+    } else {
+        let mut cut = s[..MAX].to_string();
+        cut.push('…');
+        cut
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    //! Focused tests for transition detection. Drive `apply_phase` /
+    //! `drop_phase` directly with an inert topology (no real podman
+    //! socket) and a `None` FleetPublisher.
+    use super::*;
+    use harmony::inventory::Inventory;
+    use harmony::modules::podman::PodmanTopology;
+    use std::path::PathBuf;
+
+    fn reconciler() -> Reconciler {
+        let topology = Arc::new(
+            PodmanTopology::from_unix_socket(PathBuf::from("/nonexistent/for-tests")).unwrap(),
+        );
+        let inventory = Arc::new(Inventory::empty());
+        Reconciler::new(
+            Id::from("test-device".to_string()),
+            topology,
+            inventory,
+            None,
+        )
+    }
+
+    fn dn(s: &str) -> DeploymentName {
+        DeploymentName::try_new(s).expect("valid test name")
+    }
+
+    #[tokio::test]
+    async fn apply_phase_records_new_phase() {
+        let r = reconciler();
+        r.apply_phase(&dn("hello"), Phase::Running, None).await;
+        let phases = r.deployments.lock().await;
+        assert_eq!(phases.get(&dn("hello")), Some(&Phase::Running));
+    }
+
+    #[tokio::test]
+    async fn apply_phase_idempotent_for_same_phase() {
+        let r = reconciler();
+        r.apply_phase(&dn("hello"), Phase::Running, None).await;
+        r.apply_phase(&dn("hello"), Phase::Running, None).await;
+        let phases = r.deployments.lock().await;
+        assert_eq!(phases.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn apply_phase_transitions_update_phase() {
+        let r = reconciler();
+        r.apply_phase(&dn("hello"), Phase::Pending, None).await;
+        r.apply_phase(&dn("hello"), Phase::Running, None).await;
+        r.apply_phase(&dn("hello"), Phase::Failed, Some("oom".to_string()))
+            .await;
+        let phases = r.deployments.lock().await;
+        assert_eq!(phases.get(&dn("hello")), Some(&Phase::Failed));
+    }
+
+    #[tokio::test]
+    async fn drop_phase_clears_known_deployment() {
+        let r = reconciler();
+        r.apply_phase(&dn("hello"), Phase::Running, None).await;
+        r.drop_phase(&dn("hello")).await;
+        let phases = r.deployments.lock().await;
+        assert!(!phases.contains_key(&dn("hello")));
+    }
+
+    #[tokio::test]
+    async fn drop_phase_on_unknown_deployment_is_noop() {
+        let r = reconciler();
+        r.drop_phase(&dn("never-existed")).await;
+        let phases = r.deployments.lock().await;
+        assert!(phases.is_empty());
+    }
+}
--- a/fleet/harmony-fleet-auth/Cargo.toml
+++ b/fleet/harmony-fleet-auth/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "harmony-fleet-auth"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "Shared NATS credential plumbing for the fleet agent + operator (Zitadel JWT-bearer + dev-only username/password)"
+
+[lib]
+path = "src/lib.rs"
+
+[dependencies]
+harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
+async-nats = { workspace = true }
+anyhow = { workspace = true }
+chrono = { workspace = true }
+jsonwebtoken = "9"
+reqwest = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+tokio = { workspace = true, features = ["sync"] }
+toml = { workspace = true }
+tracing = { workspace = true }
+serde_json = { workspace = true }
+
+[dev-dependencies]
+tokio = { workspace = true, features = ["macros", "rt"] }
--- a/fleet/harmony-fleet-auth/src/agent_config.rs
+++ b/fleet/harmony-fleet-auth/src/agent_config.rs
@@ -0,0 +1,222 @@
+//! Shared agent-config schema.
+//!
+//! `harmony-fleet-agent` reads this from `/etc/fleet-agent/config.toml`
+//! at startup; `harmony-fleet-deploy` constructs the same shape when it
+//! emits a `ConfigMap` mounted into the agent's container. Keeping the
+//! schema in one place — typed — means the deploy crate cannot drift
+//! away from what the agent can parse without a compile error.
+
+use crate::CredentialsSection;
+use harmony_reconciler_contracts::Id;
+use serde::{Deserialize, Serialize};
+use std::collections::BTreeMap;
+use std::path::Path;
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct AgentConfig {
+    pub agent: AgentSection,
+    pub nats: NatsSection,
+    pub credentials: CredentialsSection,
+    /// Routing labels published verbatim in every DeviceInfo
+    /// heartbeat. The operator reflects them into
+    /// `Device.metadata.labels` so Deployment `spec.targetSelector`
+    /// resolves against them (K8s-Node-analogue flow). Empty by
+    /// default — a device with no labels is targetable only by its
+    /// auto-published `device-id` label.
+    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
+    pub labels: BTreeMap<String, String>,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct AgentSection {
+    /// Cross-boundary device identity. TOML deserializes the field
+    /// as a bare string thanks to `#[serde(transparent)]` on `Id`.
+    pub device_id: Id,
+    /// When false, skip the podman socket + reconciler loop and run
+    /// only the heartbeat + command-server arms. Lets the agent run
+    /// on hosts without podman (e.g. the in-cluster e2e harness on
+    /// containerd-only k3d nodes). Default true so existing RPi
+    /// configs are unaffected.
+    #[serde(default = "default_runtime_enabled")]
+    pub runtime_enabled: bool,
+}
+
+fn default_runtime_enabled() -> bool {
+    true
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct NatsSection {
+    pub urls: Vec<String>,
+}
+
+pub fn load_config(path: &Path) -> anyhow::Result<AgentConfig> {
+    let content = std::fs::read_to_string(path)?;
+    let config: AgentConfig = toml::from_str(&content)?;
+    Ok(config)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parses_toml_shared_credentials() {
+        let raw = r#"
+[agent]
+device_id = "pi-42"
+runtime_enabled = true
+
+[credentials]
+type = "toml-shared"
+nats_user = "u"
+nats_pass = "p"
+
+[nats]
+urls = ["nats://nats:4222"]
+
+[labels]
+group = "site-a"
+arch = "aarch64"
+"#;
+        let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
+        assert_eq!(cfg.labels.get("group"), Some(&"site-a".to_string()));
+        match &cfg.credentials {
+            CredentialsSection::TomlShared {
+                nats_user,
+                nats_pass,
+            } => {
+                assert_eq!(nats_user, "u");
+                assert_eq!(nats_pass, "p");
+            }
+            _ => panic!("expected TomlShared"),
+        }
+    }
+
+    #[test]
+    fn labels_section_optional_defaults_empty() {
+        let raw = r#"
+[agent]
+device_id = "pi-42"
+
+[credentials]
+type = "toml-shared"
+nats_user = "u"
+nats_pass = "p"
+
+[nats]
+urls = ["nats://nats:4222"]
+"#;
+        let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
+        assert!(cfg.labels.is_empty());
+    }
+
+    #[test]
+    fn runtime_enabled_defaults_to_true_when_omitted() {
+        // Existing RPi configs predate the runtime_enabled flag.
+        // Omitting it must keep podman+reconciler turned on — anything
+        // else silently downgrades a production agent.
+        let raw = r#"
+[agent]
+device_id = "pi-42"
+
+[credentials]
+type = "toml-shared"
+nats_user = "u"
+nats_pass = "p"
+
+[nats]
+urls = ["nats://nats:4222"]
+"#;
+        let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
+        assert!(cfg.agent.runtime_enabled);
+    }
+
+    #[test]
+    fn runtime_enabled_false_is_honored() {
+        let raw = r#"
+[agent]
+device_id = "pi-42"
+runtime_enabled = false
+
+[credentials]
+type = "toml-shared"
+nats_user = "u"
+nats_pass = "p"
+
+[nats]
+urls = ["nats://nats:4222"]
+"#;
+        let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
+        assert!(!cfg.agent.runtime_enabled);
+    }
+
+    #[test]
+    fn round_trips_via_toml_serialize_with_labels() {
+        // The deploy crate emits this same schema to a `ConfigMap`
+        // via `toml::to_string`. The round-trip is the contract — a
+        // deploy that emits something the agent can't parse is a
+        // compile error today, but this test guards the serde
+        // attributes that make the round-trip behave (skip-empty,
+        // tagged credentials, etc.).
+        let original = AgentConfig {
+            agent: AgentSection {
+                device_id: Id::from("vm-device-01"),
+                runtime_enabled: false,
+            },
+            nats: NatsSection {
+                urls: vec!["nats://fleet-nats.e2e-x.svc.cluster.local:4222".to_string()],
+            },
+            credentials: CredentialsSection::TomlShared {
+                nats_user: "device\"with\"quotes".to_string(),
+                nats_pass: "p@ss\\with\\backslash".to_string(),
+            },
+            labels: BTreeMap::from([
+                ("group".to_string(), "site\"a".to_string()),
+                ("arch".to_string(), "aarch64".to_string()),
+            ]),
+        };
+        let rendered = toml::to_string(&original).expect("serialize");
+        let parsed: AgentConfig = toml::from_str(&rendered).expect("deserialize");
+        assert_eq!(parsed.agent.device_id, original.agent.device_id);
+        assert_eq!(parsed.agent.runtime_enabled, original.agent.runtime_enabled);
+        assert_eq!(parsed.nats.urls, original.nats.urls);
+        match (&parsed.credentials, &original.credentials) {
+            (
+                CredentialsSection::TomlShared {
+                    nats_user: pu,
+                    nats_pass: pp,
+                },
+                CredentialsSection::TomlShared {
+                    nats_user: ou,
+                    nats_pass: op,
+                },
+            ) => {
+                assert_eq!(pu, ou);
+                assert_eq!(pp, op);
+            }
+            _ => panic!("expected TomlShared round-trip"),
+        }
+        assert_eq!(parsed.labels, original.labels);
+    }
+
+    #[test]
+    fn empty_labels_omit_section_on_serialize() {
+        let cfg = AgentConfig {
+            agent: AgentSection {
+                device_id: Id::from("vm-device-01"),
+                runtime_enabled: false,
+            },
+            nats: NatsSection {
+                urls: vec!["nats://nats:4222".to_string()],
+            },
+            credentials: CredentialsSection::TomlShared {
+                nats_user: "u".to_string(),
+                nats_pass: "p".to_string(),
+            },
+            labels: BTreeMap::new(),
+        };
+        let rendered = toml::to_string(&cfg).expect("serialize");
+        assert!(!rendered.contains("[labels]"), "got:\n{rendered}");
+    }
+}
--- a/fleet/harmony-fleet-auth/src/config.rs
+++ b/fleet/harmony-fleet-auth/src/config.rs
@@ -0,0 +1,186 @@
+use serde::{Deserialize, Serialize};
+use std::path::PathBuf;
+
+/// Externally-tagged credential definition shared between the fleet
+/// agent and the fleet operator. The `type` field selects the variant;
+/// each variant's other fields are flatly mixed into the
+/// `[credentials]` TOML table for human-friendly editing.
+///
+/// **Why one struct for both processes**: the agent reads this from
+/// `/etc/fleet-agent/config.toml`; the operator reads it from a single
+/// env var (`FLEET_OPERATOR_CREDENTIALS_TOML`) whose value is a TOML
+/// snippet shaped exactly like the `[credentials]` table. Identical
+/// deserialization, identical downstream code path. The only thing
+/// that differs is the byte source.
+///
+/// Adding a new mode is additive — emit `type = "<new>"` from the
+/// installer side, decode here, instantiate the matching
+/// `CredentialSource`.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(tag = "type", rename_all = "kebab-case")]
+pub enum CredentialsSection {
+    /// Shared username + password baked into the agent config. Only
+    /// suitable for v0/development scenarios where every device shares
+    /// a single NATS account user. Not used in production.
+    TomlShared {
+        nats_user: String,
+        nats_pass: String,
+    },
+    /// Zitadel machine-user JWT-bearer (RFC 7523) flow. The keyfile
+    /// (the JSON blob Zitadel emits for `KEY_TYPE_JSON`) is the only
+    /// durable secret on the process — the access token is short-lived
+    /// and re-minted before expiry by the auth callback registered on
+    /// each NATS (re)connect.
+    ///
+    /// Two ways to point the loader at the keyfile contents — the
+    /// loader prefers `key_json` when present (operator on
+    /// OKD-restricted-v2 SCC, no volume mounts allowed), falls back to
+    /// `key_path` (agent on a VM, file on disk):
+    ///
+    /// * `key_json` — the JSON keyfile content embedded inline. Lets
+    ///   the operator pod consume the entire credentials block from a
+    ///   single env-var-from-Secret without a Secret volume mount.
+    /// * `key_path` — filesystem path the loader reads. The agent's
+    ///   `FleetDeviceSetupScore` drops the keyfile here at install
+    ///   time. Default path is the agent convention.
+    ///
+    /// Setting both is explicitly allowed — the loader picks
+    /// `key_json` and ignores `key_path`. Setting neither is a runtime
+    /// error at factory time.
+    ZitadelJwt {
+        /// Filesystem path to the keyfile. Falls back to the agent
+        /// default when omitted (file expected to exist there).
+        #[serde(default = "default_zitadel_key_path")]
+        key_path: PathBuf,
+        /// Inline JSON keyfile content. When `Some`, takes precedence
+        /// over `key_path`. Used by the operator pod (env-var-from-
+        /// Secret deployment) where mounting Secret volumes conflicts
+        /// with OKD's restricted-v2 SCC.
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        key_json: Option<String>,
+        /// Externally-visible Zitadel issuer URL — must match Zitadel's
+        /// emitted `iss` claim exactly (including port if non-default).
+        oidc_issuer_url: String,
+        /// `aud` value for token-bearer requests. Typically the Zitadel
+        /// project ID (the auth callout side validates against this).
+        audience: String,
+        /// Whether the HTTP client accepts invalid TLS certs. Local-dev
+        /// escape hatch for self-signed staging Zitadels.
+        #[serde(default, skip_serializing_if = "std::ops::Not::not")]
+        danger_accept_invalid_certs: bool,
+    },
+}
+
+fn default_zitadel_key_path() -> PathBuf {
+    PathBuf::from("/etc/fleet-agent/zitadel-key.json")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn parse(raw: &str) -> CredentialsSection {
+        toml::from_str(raw).expect("valid credentials TOML")
+    }
+
+    #[test]
+    fn parses_toml_shared() {
+        let cs = parse(
+            r#"
+type = "toml-shared"
+nats_user = "u"
+nats_pass = "p"
+"#,
+        );
+        match cs {
+            CredentialsSection::TomlShared {
+                nats_user,
+                nats_pass,
+            } => {
+                assert_eq!(nats_user, "u");
+                assert_eq!(nats_pass, "p");
+            }
+            _ => panic!("expected TomlShared"),
+        }
+    }
+
+    #[test]
+    fn parses_zitadel_jwt_with_key_path() {
+        let cs = parse(
+            r#"
+type = "zitadel-jwt"
+key_path = "/var/lib/fleet-agent/zitadel-key.json"
+oidc_issuer_url = "https://zitadel.staging.example.com"
+audience = "366378028009259037"
+danger_accept_invalid_certs = false
+"#,
+        );
+        match cs {
+            CredentialsSection::ZitadelJwt {
+                key_path,
+                key_json,
+                oidc_issuer_url,
+                audience,
+                danger_accept_invalid_certs,
+            } => {
+                assert_eq!(
+                    key_path.to_str(),
+                    Some("/var/lib/fleet-agent/zitadel-key.json")
+                );
+                assert!(key_json.is_none());
+                assert_eq!(oidc_issuer_url, "https://zitadel.staging.example.com");
+                assert_eq!(audience, "366378028009259037");
+                assert!(!danger_accept_invalid_certs);
+            }
+            _ => panic!("expected ZitadelJwt"),
+        }
+    }
+
+    #[test]
+    fn parses_zitadel_jwt_with_inline_key_json() {
+        // Operator-side shape: the entire credentials block plus the
+        // JSON keyfile content as a TOML triple-quoted string. Used
+        // by the operator's env-var-from-Secret deployment.
+        let cs = parse(
+            r#"
+type = "zitadel-jwt"
+oidc_issuer_url = "https://sso-staging.cb1.nationtech.io"
+audience = "371584906720968725"
+key_json = """
+{"type":"serviceaccount","keyId":"k1","key":"-----BEGIN RSA PRIVATE KEY-----\nABC\n-----END RSA PRIVATE KEY-----\n","userId":"u1"}
+"""
+"#,
+        );
+        match cs {
+            CredentialsSection::ZitadelJwt {
+                key_json, audience, ..
+            } => {
+                let inline = key_json.expect("key_json present");
+                assert!(inline.contains("BEGIN RSA PRIVATE KEY"));
+                assert!(inline.contains("\"keyId\":\"k1\""));
+                assert_eq!(audience, "371584906720968725");
+            }
+            _ => panic!("expected ZitadelJwt"),
+        }
+    }
+
+    #[test]
+    fn zitadel_jwt_key_path_defaults_when_omitted() {
+        let cs = parse(
+            r#"
+type = "zitadel-jwt"
+oidc_issuer_url = "https://zitadel.staging.example.com"
+audience = "366378028009259037"
+"#,
+        );
+        match cs {
+            CredentialsSection::ZitadelJwt {
+                key_path, key_json, ..
+            } => {
+                assert_eq!(key_path.to_str(), Some("/etc/fleet-agent/zitadel-key.json"));
+                assert!(key_json.is_none());
+            }
+            _ => panic!("expected ZitadelJwt"),
+        }
+    }
+}
--- a/fleet/harmony-fleet-auth/src/credentials.rs
+++ b/fleet/harmony-fleet-auth/src/credentials.rs
@@ -0,0 +1,553 @@
+//! NATS credential sources for fleet processes (agent + operator).
+//!
+//! `CredentialSource::next_credential()` is invoked from async-nats's
+//! `with_auth_callback` on every (re)connect attempt — including the
+//! first connect. The callback shape means an expired token is
+//! automatically replaced when async-nats reconnects after a transient
+//! NATS outage / pod restart / network blip: the caller doesn't need
+//! a separate refresh task to "never lose connectivity."
+//!
+//! Two variants:
+//!
+//! - [`CredentialSource::TomlShared`] — username + password baked into
+//!   the config (v0/dev only).
+//! - [`CredentialSource::ZitadelJwt`] — Zitadel machine-user JWT-bearer
+//!   flow (RFC 7523). The keyfile is the only durable secret on the
+//!   process; the bearer token is short-lived and re-minted
+//!   transparently when a cached token is within 5 minutes of expiry.
+//!
+//! Modeled as an enum (rather than a `dyn Trait`) because async-nats's
+//! auth-callback bounds (`Future: Send + Sync`) are incompatible with
+//! `Pin<Box<dyn Future + Send>>` returned by an object-safe trait. Two
+//! variants is a small enough cardinality that enum dispatch is
+//! cleaner than a Trait + factory.
+
+use std::path::Path;
+use std::sync::{Arc, Mutex};
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use jsonwebtoken::{Algorithm, EncodingKey, Header as JwtHeader};
+use serde::Deserialize;
+
+use crate::config::CredentialsSection;
+
+/// Material the NATS connector needs to authenticate. Returned per
+/// (re)connect attempt — the source decides whether to mint fresh.
+#[derive(Debug, Clone)]
+pub enum NatsCredential {
+    UserPass { user: String, pass: String },
+    BearerToken(String),
+}
+
+/// Externally-tagged credential source. Constructed once at startup
+/// from the parsed `[credentials]` section; cloned via Arc into the
+/// async-nats auth callback.
+pub enum CredentialSource {
+    TomlShared {
+        user: String,
+        pass: String,
+    },
+    ZitadelJwt {
+        key: MachineKeyFile,
+        oidc_issuer_url: String,
+        audience: String,
+        http: reqwest::Client,
+        cache: Mutex<Option<CachedToken>>,
+    },
+}
+
+impl CredentialSource {
+    /// Return current valid credentials, minting fresh material when any
+    /// cached value is within its safety window of expiry. Called on
+    /// every NATS (re)connect.
+    pub async fn next_credential(&self) -> Result<NatsCredential> {
+        match self {
+            Self::TomlShared { user, pass } => Ok(NatsCredential::UserPass {
+                user: user.clone(),
+                pass: pass.clone(),
+            }),
+            Self::ZitadelJwt { .. } => self.zitadel_next().await,
+        }
+    }
+
+    async fn zitadel_next(&self) -> Result<NatsCredential> {
+        // Fast path: lock the cache synchronously, copy out the token if
+        // it's comfortably valid, drop the lock. Holding a MutexGuard
+        // across `.await` would make this future !Sync, which
+        // async-nats's `with_auth_callback` rejects at compile time.
+        if let Some(token) = self.cached_if_fresh() {
+            return Ok(NatsCredential::BearerToken(token));
+        }
+        // Slow path: mint outside any lock. Two concurrent (re)connect
+        // attempts could both reach here and both mint; that's a wasted
+        // HTTP round-trip in a rare race, not a correctness issue —
+        // the second writer wins and replaces the first's value.
+        let fresh = self.zitadel_mint().await?;
+        let token = fresh.access_token.clone();
+        if let Self::ZitadelJwt {
+            cache, audience, ..
+        } = self
+            && let Ok(mut guard) = cache.lock()
+        {
+            *guard = Some(fresh);
+            tracing::info!(audience = %audience, "minted fresh Zitadel access token");
+        }
+        Ok(NatsCredential::BearerToken(token))
+    }
+
+    fn cached_if_fresh(&self) -> Option<String> {
+        let Self::ZitadelJwt { cache, .. } = self else {
+            return None;
+        };
+        let now = chrono::Utc::now().timestamp();
+        let guard = cache.lock().ok()?;
+        let cached = guard.as_ref()?;
+        if cached.expires_at_unix - TOKEN_REFRESH_LEEWAY_SECS > now {
+            Some(cached.access_token.clone())
+        } else {
+            None
+        }
+    }
+
+    async fn zitadel_mint(&self) -> Result<CachedToken> {
+        let Self::ZitadelJwt {
+            key,
+            oidc_issuer_url,
+            audience,
+            http,
+            ..
+        } = self
+        else {
+            anyhow::bail!("zitadel_mint called on non-ZitadelJwt variant");
+        };
+
+        let now = chrono::Utc::now().timestamp();
+        let assertion = build_assertion(key, oidc_issuer_url, now)?;
+        let scope = build_scope(audience);
+        let token_url = build_token_url(oidc_issuer_url);
+
+        let resp = http
+            .post(&token_url)
+            .form(&[
+                (
+                    "grant_type",
+                    "urn:ietf:params:oauth:grant-type:jwt-bearer".to_string(),
+                ),
+                ("assertion", assertion),
+                ("scope", scope),
+            ])
+            .send()
+            .await
+            .with_context(|| format!("POST {token_url}"))?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("Zitadel token endpoint returned {status}: {body}");
+        }
+
+        #[derive(Deserialize)]
+        struct TokenResponse {
+            access_token: String,
+            #[serde(default)]
+            expires_in: Option<i64>,
+        }
+        let tr: TokenResponse = resp.json().await.context("parsing token response")?;
+        // Zitadel typically returns 12h (43200s); be defensive against
+        // a missing field by assuming a conservative 1h.
+        let expires_in = tr.expires_in.unwrap_or(3600);
+        Ok(CachedToken {
+            access_token: tr.access_token,
+            expires_at_unix: now + expires_in,
+        })
+    }
+}
+
+/// Build the JWT-bearer assertion. Split out from the network path so
+/// the claims + header shape can be unit-tested without an HTTP server,
+/// and split internally into the (pure) claim/header builders so they
+/// can be unit-tested without an RSA private key fixture.
+pub(crate) fn build_assertion(
+    key: &MachineKeyFile,
+    oidc_issuer_url: &str,
+    now: i64,
+) -> Result<String> {
+    let claims = build_assertion_claims(key, oidc_issuer_url, now);
+    let header = build_assertion_header(key);
+    let assertion = jsonwebtoken::encode(
+        &header,
+        &claims,
+        &EncodingKey::from_rsa_pem(key.key.as_bytes())
+            .context("parsing RSA private key from machine key file")?,
+    )
+    .context("signing JWT assertion")?;
+    Ok(assertion)
+}
+
+/// Pure claim payload for the JWT-bearer assertion. `iss == sub == userId`
+/// is a Zitadel requirement; `aud` is Zitadel itself (the token endpoint
+/// is reached via `oidc_issuer_url`); `exp - iat` MUST be ≤ 60 s or
+/// Zitadel rejects.
+pub(crate) fn build_assertion_claims(
+    key: &MachineKeyFile,
+    oidc_issuer_url: &str,
+    now: i64,
+) -> serde_json::Value {
+    serde_json::json!({
+        "iss": key.user_id,
+        "sub": key.user_id,
+        "aud": oidc_issuer_url,
+        "exp": now + ASSERTION_LIFETIME_SECS,
+        "iat": now,
+    })
+}
+
+/// JWT header for the assertion. The `kid` tells Zitadel which of the
+/// machine user's registered keys to verify the signature against.
+pub(crate) fn build_assertion_header(key: &MachineKeyFile) -> JwtHeader {
+    let mut header = JwtHeader::new(Algorithm::RS256);
+    header.kid = Some(key.key_id.clone());
+    header
+}
+
+/// Build the OAuth `scope` string for the token-bearer request.
+///
+/// Three scopes are needed for the access token to be useful here:
+///
+///   * `openid` — base OIDC requirement.
+///   * `urn:zitadel:iam:org:projects:roles` (PLURAL "projects") —
+///     tells Zitadel to include the role-claim block in the access
+///     token. Without this, the callout sees "no authorized role
+///     in token" even when the user has a project role grant.
+///   * `urn:zitadel:iam:org:project:id:<aud>:aud` (SINGULAR
+///     "project") — adds <aud> to the access token's `aud` claim
+///     so the callout's audience validation accepts the project
+///     ID we're using as the JWT-bearer audience.
+///
+/// The plural-vs-singular distinction is a Zitadel convention,
+/// not a typo. Both scopes are required.
+pub(crate) fn build_scope(audience: &str) -> String {
+    format!(
+        "openid \
+         urn:zitadel:iam:org:projects:roles \
+         urn:zitadel:iam:org:project:id:{audience}:aud"
+    )
+}
+
+/// Resolve the token endpoint URL, tolerating a trailing slash on
+/// `oidc_issuer_url`. Without trimming, a configured issuer of
+/// `https://sso.example.com/` produces `…//oauth/v2/token` which 404s.
+pub(crate) fn build_token_url(oidc_issuer_url: &str) -> String {
+    format!("{}/oauth/v2/token", oidc_issuer_url.trim_end_matches('/'))
+}
+
+// ---- helper types ----------------------------------------------------------
+
+/// JSON keyfile content as Zitadel emits it for a `KEY_TYPE_JSON`
+/// machine key. The `key` is a PEM-encoded RSA private key.
+#[derive(Debug, Clone, Deserialize)]
+pub struct MachineKeyFile {
+    #[serde(rename = "type")]
+    pub _type: String,
+    #[serde(rename = "keyId")]
+    pub key_id: String,
+    pub key: String,
+    #[serde(rename = "userId")]
+    pub user_id: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct CachedToken {
+    pub(crate) access_token: String,
+    /// Unix seconds at which the token is no longer trusted by
+    /// `cached_if_fresh`. Computed from the OAuth response's `expires_in`
+    /// and the local clock at mint time.
+    pub(crate) expires_at_unix: i64,
+}
+
+/// Refresh tokens this many seconds before their advertised expiry.
+/// Five minutes leaves headroom for clock skew, slow networks, and
+/// the round-trip cost of re-minting against Zitadel.
+pub const TOKEN_REFRESH_LEEWAY_SECS: i64 = 5 * 60;
+
+/// Lifetime of the JWT *assertion* (the client-side bearer JWT we sign
+/// to authenticate to Zitadel's token endpoint). Zitadel rejects
+/// assertions with `exp - iat > 60s`; one minute is the safe ceiling.
+pub const ASSERTION_LIFETIME_SECS: i64 = 60;
+
+// ---- factory ---------------------------------------------------------------
+
+/// Build the appropriate `CredentialSource` from the parsed config.
+///
+/// For [`CredentialsSection::ZitadelJwt`] this reads the keyfile from
+/// disk. Both the agent and the operator mount their key as a file
+/// (Secret volume in the operator's Pod, dropped by
+/// `FleetDeviceSetupScore` on the agent's VM); the path is just
+/// configured differently.
+pub fn credential_source_from_config(creds: &CredentialsSection) -> Result<Arc<CredentialSource>> {
+    match creds {
+        CredentialsSection::TomlShared {
+            nats_user,
+            nats_pass,
+        } => Ok(Arc::new(CredentialSource::TomlShared {
+            user: nats_user.clone(),
+            pass: nats_pass.clone(),
+        })),
+        CredentialsSection::ZitadelJwt {
+            key_path,
+            key_json,
+            oidc_issuer_url,
+            audience,
+            danger_accept_invalid_certs,
+        } => {
+            // `key_json` (inline) wins over `key_path` (file). The
+            // operator pod uses inline because OKD's restricted-v2
+            // SCC + env-var-from-Secret deployment shape can't
+            // reliably mount Secret volumes; the agent uses the file
+            // path because it lives on a VM and a real file is the
+            // more natural rotation target.
+            let key = match key_json.as_deref().map(str::trim) {
+                Some(json) if !json.is_empty() => parse_machine_key(json)?,
+                _ => load_machine_key(key_path)?,
+            };
+            Ok(Arc::new(CredentialSource::ZitadelJwt {
+                key,
+                oidc_issuer_url: oidc_issuer_url.clone(),
+                audience: audience.clone(),
+                http: reqwest::Client::builder()
+                    .danger_accept_invalid_certs(*danger_accept_invalid_certs)
+                    .timeout(Duration::from_secs(10))
+                    .build()
+                    .context("building HTTP client for Zitadel token endpoint")?,
+                cache: Mutex::new(None),
+            }))
+        }
+    }
+}
+
+fn load_machine_key(key_path: &Path) -> Result<MachineKeyFile> {
+    let raw = std::fs::read_to_string(key_path)
+        .with_context(|| format!("reading machine key file at {}", key_path.display()))?;
+    parse_machine_key(&raw)
+        .with_context(|| format!("parsing machine key file at {}", key_path.display()))
+}
+
+fn parse_machine_key(raw: &str) -> Result<MachineKeyFile> {
+    serde_json::from_str(raw).context("parsing inline machine key JSON")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn fake_key() -> MachineKeyFile {
+        MachineKeyFile {
+            _type: "serviceaccount".to_string(),
+            key_id: "kid-371358469099356247".to_string(),
+            // Real PEM not required for the pure-builder tests; the
+            // signing path that needs a parseable key is exercised
+            // end-to-end in the e2e harness.
+            key: "PEM-PLACEHOLDER".to_string(),
+            user_id: "uid-371358469065801815".to_string(),
+        }
+    }
+
+    fn zjwt_source() -> CredentialSource {
+        CredentialSource::ZitadelJwt {
+            key: fake_key(),
+            oidc_issuer_url: "http://sso.fleet.local:8080".to_string(),
+            audience: "366378028009259037".to_string(),
+            http: reqwest::Client::new(),
+            cache: Mutex::new(None),
+        }
+    }
+
+    // ---- next_credential / cache state -------------------------------------
+
+    #[tokio::test]
+    async fn toml_shared_returns_userpass_each_call() {
+        let s = CredentialSource::TomlShared {
+            user: "u".to_string(),
+            pass: "p".to_string(),
+        };
+        let c = s.next_credential().await.unwrap();
+        match c {
+            NatsCredential::UserPass { user, pass } => {
+                assert_eq!(user, "u");
+                assert_eq!(pass, "p");
+            }
+            other => panic!("expected UserPass, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn cached_token_within_leeway_is_treated_as_expired() {
+        // Sanity-check the comparison so refactors don't accidentally
+        // invert the leeway window.
+        let now = chrono::Utc::now().timestamp();
+        let about_to_expire = CachedToken {
+            access_token: "x".to_string(),
+            expires_at_unix: now + TOKEN_REFRESH_LEEWAY_SECS - 1,
+        };
+        assert!(
+            about_to_expire.expires_at_unix - TOKEN_REFRESH_LEEWAY_SECS <= now,
+            "tokens within the leeway window must be considered expired"
+        );
+
+        let comfortable = CachedToken {
+            access_token: "x".to_string(),
+            expires_at_unix: now + TOKEN_REFRESH_LEEWAY_SECS + 60,
+        };
+        assert!(
+            comfortable.expires_at_unix - TOKEN_REFRESH_LEEWAY_SECS > now,
+            "tokens with comfortable headroom must be cache-hits"
+        );
+    }
+
+    #[test]
+    fn cached_if_fresh_returns_some_when_outside_leeway() {
+        let src = zjwt_source();
+        let now = chrono::Utc::now().timestamp();
+        if let CredentialSource::ZitadelJwt { cache, .. } = &src {
+            *cache.lock().unwrap() = Some(CachedToken {
+                access_token: "fresh".to_string(),
+                expires_at_unix: now + TOKEN_REFRESH_LEEWAY_SECS + 60,
+            });
+        }
+        assert_eq!(src.cached_if_fresh(), Some("fresh".to_string()));
+    }
+
+    #[test]
+    fn cached_if_fresh_returns_none_when_no_cache() {
+        // Brand-new ZitadelJwt source — no token has been minted yet.
+        // Forces the slow path on first connect.
+        let src = zjwt_source();
+        assert_eq!(src.cached_if_fresh(), None);
+    }
+
+    #[test]
+    fn cached_if_fresh_returns_none_for_toml_shared() {
+        // Defensive: cache_if_fresh is only meaningful for ZitadelJwt;
+        // TomlShared has no cache. A nonsensical call must return None,
+        // not panic, so the cold-path can degrade gracefully.
+        let src = CredentialSource::TomlShared {
+            user: "u".into(),
+            pass: "p".into(),
+        };
+        assert_eq!(src.cached_if_fresh(), None);
+    }
+
+    // ---- assertion claims / header (pure builders) ------------------------
+
+    #[test]
+    fn assertion_claims_carry_iss_sub_aud_exp_iat() {
+        let now = 1_700_000_000;
+        let claims = build_assertion_claims(&fake_key(), "http://sso.fleet.local:8080", now);
+        assert_eq!(claims["iss"], "uid-371358469065801815");
+        assert_eq!(claims["sub"], "uid-371358469065801815");
+        assert_eq!(claims["aud"], "http://sso.fleet.local:8080");
+        assert_eq!(claims["iat"].as_i64(), Some(now));
+        assert_eq!(claims["exp"].as_i64(), Some(now + ASSERTION_LIFETIME_SECS));
+    }
+
+    #[test]
+    fn assertion_lifetime_locked_at_60_seconds() {
+        // Zitadel rejects assertions where exp - iat > 60s. If anyone
+        // bumps ASSERTION_LIFETIME_SECS thinking "more is safer", the
+        // mints will silently start failing in prod with no helpful
+        // error. Lock the constant.
+        assert_eq!(ASSERTION_LIFETIME_SECS, 60);
+    }
+
+    #[test]
+    fn assertion_header_carries_kid_and_rs256() {
+        let header = build_assertion_header(&fake_key());
+        assert_eq!(header.alg, jsonwebtoken::Algorithm::RS256);
+        assert_eq!(header.kid.as_deref(), Some("kid-371358469099356247"));
+    }
+
+    // ---- scope string ------------------------------------------------------
+
+    #[test]
+    fn scope_includes_plural_projects_roles() {
+        // The plural-projects URN is what tells Zitadel to emit the
+        // role claim. Day-one bug; lock it.
+        let s = build_scope("366378028009259037");
+        assert!(
+            s.contains("urn:zitadel:iam:org:projects:roles"),
+            "scope must include the PLURAL projects-roles URN; got {s:?}"
+        );
+    }
+
+    #[test]
+    fn scope_audience_uses_singular_project_id_urn() {
+        // The singular-project URN tells Zitadel to put <id> into the
+        // access token's aud claim. Different URN entirely from the
+        // plural one above; both required.
+        let s = build_scope("366378028009259037");
+        assert!(
+            s.contains("urn:zitadel:iam:org:project:id:366378028009259037:aud"),
+            "scope must include the SINGULAR project:id:<aud>:aud URN; got {s:?}"
+        );
+    }
+
+    #[test]
+    fn scope_includes_openid_base() {
+        let s = build_scope("any");
+        assert!(
+            s.split_whitespace().any(|tok| tok == "openid"),
+            "scope must include `openid` as a standalone token; got {s:?}"
+        );
+    }
+
+    // ---- token URL ---------------------------------------------------------
+
+    #[test]
+    fn token_url_appends_oauth_endpoint() {
+        assert_eq!(
+            build_token_url("http://sso.fleet.local:8080"),
+            "http://sso.fleet.local:8080/oauth/v2/token"
+        );
+    }
+
+    #[test]
+    fn token_url_strips_single_trailing_slash() {
+        // A trailing slash would yield `…//oauth/v2/token`, which 404s.
+        // Common configuration drift; the trim guards against it.
+        assert_eq!(
+            build_token_url("http://sso.fleet.local:8080/"),
+            "http://sso.fleet.local:8080/oauth/v2/token"
+        );
+    }
+
+    #[test]
+    fn token_url_strips_multiple_trailing_slashes() {
+        // Defensive — `trim_end_matches('/')` peels all of them, not
+        // just the first. Locks that semantics.
+        assert_eq!(
+            build_token_url("http://sso.fleet.local:8080///"),
+            "http://sso.fleet.local:8080/oauth/v2/token"
+        );
+    }
+
+    // ---- MachineKeyFile JSON parsing --------------------------------------
+
+    #[test]
+    fn machine_key_file_parses_zitadel_json_shape() {
+        // The serde renames (`type`, `keyId`, `userId`) are easy to
+        // break. This is the literal JSON shape Zitadel's
+        // /management/v1/users/.../keys endpoint emits.
+        let raw = r#"{
+            "type":   "serviceaccount",
+            "keyId":  "371358469099356247",
+            "key":    "-----BEGIN RSA PRIVATE KEY-----\nABC\n-----END RSA PRIVATE KEY-----\n",
+            "userId": "371358469065801815"
+        }"#;
+        let parsed: MachineKeyFile = serde_json::from_str(raw).expect("valid keyfile");
+        assert_eq!(parsed._type, "serviceaccount");
+        assert_eq!(parsed.key_id, "371358469099356247");
+        assert_eq!(parsed.user_id, "371358469065801815");
+        assert!(parsed.key.contains("BEGIN RSA PRIVATE KEY"));
+    }
+}
--- a/fleet/harmony-fleet-auth/src/lib.rs
+++ b/fleet/harmony-fleet-auth/src/lib.rs
@@ -0,0 +1,65 @@
+//! Shared NATS auth plumbing for fleet processes.
+//!
+//! Two consumers today:
+//!
+//! - **`harmony-fleet-agent`** — reads `[credentials]` from
+//!   `/etc/fleet-agent/config.toml`. Per-device Zitadel machine user
+//!   with the `device` role.
+//! - **`harmony-fleet-operator`** — reads the same TOML shape from a
+//!   single env var (the env var's value is the TOML snippet for the
+//!   `[credentials]` table). Singleton machine user with the
+//!   `fleet-admin` role.
+//!
+//! Both deserialize into the **same** [`CredentialsSection`], factory
+//! into the **same** [`CredentialSource`], and use the **same**
+//! [`connect_options_with_credentials`] helper to build a NATS client.
+//! The only thing that differs between processes is where the bytes of
+//! the TOML config come from and which Zitadel user signs the
+//! JWT-bearer assertion.
+//!
+//! Adding a new mode (e.g. user JWT from a CLI session) is one new
+//! variant on `CredentialsSection` + `CredentialSource`; everything
+//! else flows through unchanged.
+
+mod agent_config;
+mod config;
+mod credentials;
+
+pub use agent_config::{AgentConfig, AgentSection, NatsSection, load_config};
+pub use config::CredentialsSection;
+pub use credentials::{
+    ASSERTION_LIFETIME_SECS, CachedToken, CredentialSource, MachineKeyFile, NatsCredential,
+    TOKEN_REFRESH_LEEWAY_SECS, credential_source_from_config,
+};
+
+use std::sync::Arc;
+
+/// Build `async_nats::ConnectOptions` wired with the auth callback
+/// that pulls fresh credentials from `creds` on every (re)connect.
+///
+/// Caller chains additional options (`ping_interval`, `event_callback`,
+/// …) before invoking `.connect(urls)`.
+pub fn connect_options_with_credentials(
+    creds: Arc<CredentialSource>,
+) -> async_nats::ConnectOptions {
+    async_nats::ConnectOptions::with_auth_callback(move |_nonce| {
+        let cs = creds.clone();
+        async move {
+            let cred = cs
+                .next_credential()
+                .await
+                .map_err(|e| async_nats::AuthError::new(format!("credential source: {e}")))?;
+            let mut auth = async_nats::Auth::new();
+            match cred {
+                NatsCredential::UserPass { user, pass } => {
+                    auth.username = Some(user);
+                    auth.password = Some(pass);
+                }
+                NatsCredential::BearerToken(token) => {
+                    auth.token = Some(token);
+                }
+            }
+            Ok(auth)
+        }
+    })
+}
--- a/Show More
+++ b/Show More