2026-05-05 13:46:15 +00:00
128 changed files with 19363 additions and 840 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,9 @@ ignore

 # Generated book
 book
+
+# Scratch and agent worktrees — never commit
+.claude/
+ui-idea.md
+ROADMAP/00-priority-matrix.md
+fleet/harmony-fleet-agent/agent-config.toml
--- a/.sqlx/query-165b944d13c8f7810b4e3ef891e5cd256d74f572629b8c0764782066e705c50c.json
+++ b/.sqlx/query-165b944d13c8f7810b4e3ef891e5cd256d74f572629b8c0764782066e705c50c.json
@@ -1,12 +1,12 @@
 {
  "db_name": "SQLite",
-  "query": "\n        INSERT INTO host_role_mapping (host_id, role, installation_device)\n        VALUES (?, ?, ?)\n        ",
+  "query": "\n        INSERT INTO host_role_mapping (host_id, role, installation_device, network_config)\n        VALUES (?, ?, ?, ?)\n        ",
  "describe": {
    "columns": [],
    "parameters": {
-      "Right": 3
+      "Right": 4
    },
    "nullable": []
  },
-  "hash": "6fcc29cfdbdf3b2cee94a4844e227f09b245dd8f079832a9a7b774151cb03af6"
+  "hash": "165b944d13c8f7810b4e3ef891e5cd256d74f572629b8c0764782066e705c50c"
 }
--- a/.sqlx/query-3b71d7d7ae75e75ec3ef1df2cd3c4d18520b9d56dd328b7edf576af9dac3c2c0.json
+++ b/.sqlx/query-3b71d7d7ae75e75ec3ef1df2cd3c4d18520b9d56dd328b7edf576af9dac3c2c0.json
@@ -0,0 +1,32 @@
+{
+  "db_name": "SQLite",
+  "query": "SELECT role as \"role: HostRole\", installation_device, network_config FROM host_role_mapping WHERE host_id = ? ORDER BY id DESC LIMIT 1",
+  "describe": {
+    "columns": [
+      {
+        "name": "role: HostRole",
+        "ordinal": 0,
+        "type_info": "Text"
+      },
+      {
+        "name": "installation_device",
+        "ordinal": 1,
+        "type_info": "Text"
+      },
+      {
+        "name": "network_config",
+        "ordinal": 2,
+        "type_info": "Text"
+      }
+    ],
+    "parameters": {
+      "Right": 1
+    },
+    "nullable": [
+      false,
+      true,
+      true
+    ]
+  },
+  "hash": "3b71d7d7ae75e75ec3ef1df2cd3c4d18520b9d56dd328b7edf576af9dac3c2c0"
+}
--- a/.sqlx/query-43cfa7b6dda8b9745ef74eb45f3f52a9193dcb09a4b917f0fde9f39058e0f276.json
+++ b/.sqlx/query-43cfa7b6dda8b9745ef74eb45f3f52a9193dcb09a4b917f0fde9f39058e0f276.json
@@ -1,6 +1,6 @@
 {
  "db_name": "SQLite",
-  "query": "SELECT host_id, installation_device FROM host_role_mapping WHERE role = ?",
+  "query": "SELECT host_id, installation_device, network_config FROM host_role_mapping WHERE role = ?",
  "describe": {
    "columns": [
      {
@@ -12,6 +12,11 @@
        "name": "installation_device",
        "ordinal": 1,
        "type_info": "Text"
+      },
+      {
+        "name": "network_config",
+        "ordinal": 2,
+        "type_info": "Text"
      }
    ],
    "parameters": {
@@ -19,8 +24,9 @@
    },
    "nullable": [
      false,
+      true,
      true
    ]
  },
-  "hash": "24f719d57144ecf4daa55f0aa5836c165872d70164401c0388e8d625f1b72d7b"
+  "hash": "43cfa7b6dda8b9745ef74eb45f3f52a9193dcb09a4b917f0fde9f39058e0f276"
 }
--- a/.sqlx/query-779c5aa1643e714051ba141e5cc5788846925324bfb7d79662026fdc3e33c0ca.json
+++ b/.sqlx/query-779c5aa1643e714051ba141e5cc5788846925324bfb7d79662026fdc3e33c0ca.json
@@ -0,0 +1,12 @@
+{
+  "db_name": "SQLite",
+  "query": "DELETE FROM host_role_mapping WHERE host_id = ?",
+  "describe": {
+    "columns": [],
+    "parameters": {
+      "Right": 1
+    },
+    "nullable": []
+  },
+  "hash": "779c5aa1643e714051ba141e5cc5788846925324bfb7d79662026fdc3e33c0ca"
+}
--- a/.sqlx/query-8d247918eca10a88b784ee353db090c94a222115c543231f2140cba27bd0f067.json
+++ b/.sqlx/query-8d247918eca10a88b784ee353db090c94a222115c543231f2140cba27bd0f067.json
@@ -16,7 +16,7 @@
      {
        "name": "data: Json<PhysicalHost>",
        "ordinal": 2,
-        "type_info": "Blob"
+        "type_info": "Null"
      }
    ],
    "parameters": {
--- a/.sqlx/query-c7ca191faaa23b3ec5019f8c4910f666db9c6c2be22ffe563be4b7caef645bd1.json
+++ b/.sqlx/query-c7ca191faaa23b3ec5019f8c4910f666db9c6c2be22ffe563be4b7caef645bd1.json
@@ -0,0 +1,20 @@
+{
+  "db_name": "SQLite",
+  "query": "SELECT data as \"data!: Vec<u8>\" FROM physical_hosts WHERE id = ? ORDER BY version_id DESC LIMIT 1",
+  "describe": {
+    "columns": [
+      {
+        "name": "data!: Vec<u8>",
+        "ordinal": 0,
+        "type_info": "Null"
+      }
+    ],
+    "parameters": {
+      "Right": 1
+    },
+    "nullable": [
+      false
+    ]
+  },
+  "hash": "c7ca191faaa23b3ec5019f8c4910f666db9c6c2be22ffe563be4b7caef645bd1"
+}
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1512,6 +1512,7 @@ checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
 dependencies = [
 "crypto-common",
 "inout",
+ "zeroize",
 ]

 [[package]]
@@ -1968,6 +1969,35 @@ dependencies = [
 "typenum",
 ]

+[[package]]
+name = "crypto_box"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16182b4f39a82ec8a6851155cc4c0cda3065bb1db33651726a29e1951de0f009"
+dependencies = [
+ "aead",
+ "crypto_secretbox",
+ "curve25519-dalek",
+ "salsa20",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "crypto_secretbox"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d6cf87adf719ddf43a805e92c6870a531aedda35ff640442cbaf8674e141e1"
+dependencies = [
+ "aead",
+ "cipher",
+ "generic-array",
+ "poly1305",
+ "salsa20",
+ "subtle",
+ "zeroize",
+]
+
 [[package]]
 name = "ctr"
 version = "0.9.2"
@@ -2682,6 +2712,110 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "example-fleet-auth-callout"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-nats",
+ "base64 0.22.1",
+ "directories",
+ "env_logger",
+ "futures-util",
+ "harmony",
+ "harmony-k8s",
+ "harmony-nats-callout",
+ "harmony_types",
+ "jsonwebtoken",
+ "k3d-rs",
+ "k8s-openapi",
+ "kube",
+ "log",
+ "nkeys",
+ "reqwest 0.12.28",
+ "serde",
+ "serde_json",
+ "tempfile",
+ "tokio",
+ "tokio-test",
+ "tracing",
+ "tracing-subscriber",
+ "url",
+]
+
+[[package]]
+name = "example-fleet-e2e-demo"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-nats",
+ "clap",
+ "directories",
+ "env_logger",
+ "example-fleet-auth-callout",
+ "futures-util",
+ "harmony",
+ "harmony-fleet-operator",
+ "harmony-k8s",
+ "harmony-nats-callout",
+ "harmony-reconciler-contracts",
+ "harmony_types",
+ "k3d-rs",
+ "k8s-openapi",
+ "kube",
+ "log",
+ "nkeys",
+ "serde",
+ "serde_json",
+ "tempfile",
+ "tokio",
+ "tokio-test",
+ "tracing",
+ "tracing-subscriber",
+ "url",
+]
+
+[[package]]
+name = "example-fleet-sso-login"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "base64 0.22.1",
+ "clap",
+ "directories",
+ "env_logger",
+ "log",
+ "reqwest 0.12.28",
+ "serde",
+ "serde_json",
+ "tokio",
+]
+
+[[package]]
+name = "example-fleet-staging-deploy"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-nats",
+ "clap",
+ "env_logger",
+ "harmony",
+ "harmony-k8s",
+ "harmony-nats-callout",
+ "harmony_types",
+ "k8s-openapi",
+ "kube",
+ "log",
+ "nkeys",
+ "reqwest 0.12.28",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+ "url",
+]
+
 [[package]]
 name = "example-grafana"
 version = "0.1.0"
@@ -2909,6 +3043,17 @@ dependencies = [
 "url",
 ]

+[[package]]
+name = "example-okd-ceph-alerts"
+version = "0.1.0"
+dependencies = [
+ "harmony",
+ "harmony_cli",
+ "harmony_types",
+ "log",
+ "tokio",
+]
+
 [[package]]
 name = "example-okd-cluster-alerts"
 version = "0.1.0"
@@ -3199,12 +3344,16 @@ name = "example_fleet_rpi_setup"
 version = "0.1.0"
 dependencies = [
 "anyhow",
+ "base64 0.22.1",
 "clap",
 "harmony",
 "harmony_cli",
 "harmony_secret",
 "harmony_types",
 "log",
+ "reqwest 0.12.28",
+ "serde",
+ "serde_json",
 "tokio",
 ]

@@ -3753,10 +3902,12 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-nats",
+ "async-trait",
 "chrono",
 "clap",
 "futures-util",
 "harmony",
+ "harmony-fleet-auth",
 "harmony-reconciler-contracts",
 "serde",
 "serde_json",
@@ -3766,6 +3917,22 @@ dependencies = [
 "tracing-subscriber",
 ]

+[[package]]
+name = "harmony-fleet-auth"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-nats",
+ "chrono",
+ "jsonwebtoken",
+ "reqwest 0.12.28",
+ "serde",
+ "serde_json",
+ "tokio",
+ "toml",
+ "tracing",
+]
+
 [[package]]
 name = "harmony-fleet-operator"
 version = "0.1.0"
@@ -3776,6 +3943,7 @@ dependencies = [
 "clap",
 "futures-util",
 "harmony",
+ "harmony-fleet-auth",
 "harmony-reconciler-contracts",
 "k8s-openapi",
 "kube",
@@ -3784,6 +3952,7 @@ dependencies = [
 "serde_json",
 "thiserror 2.0.18",
 "tokio",
+ "toml",
 "tracing",
 "tracing-subscriber",
 ]
@@ -3807,6 +3976,26 @@ dependencies = [
 "url",
 ]

+[[package]]
+name = "harmony-nats-callout"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-nats",
+ "futures-util",
+ "harmony-reconciler-contracts",
+ "jsonwebtoken",
+ "nats-jwt",
+ "nkeys",
+ "reqwest 0.12.28",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "harmony-node-readiness-endpoint"
 version = "0.1.0"
@@ -3981,6 +4170,19 @@ dependencies = [
 "thiserror 2.0.18",
 ]

+[[package]]
+name = "harmony_host_discovery"
+version = "0.1.0"
+dependencies = [
+ "cidr",
+ "harmony",
+ "harmony_cli",
+ "harmony_macros",
+ "harmony_types",
+ "tokio",
+ "url",
+]
+
 [[package]]
 name = "harmony_i18n"
 version = "0.1.0"
@@ -4794,6 +4996,30 @@ dependencies = [
 "syn 2.0.117",
 ]

+[[package]]
+name = "integration-test-callout"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-nats",
+ "base64 0.22.1",
+ "futures-util",
+ "harmony-nats-callout",
+ "hex",
+ "jsonwebtoken",
+ "nats-jwt",
+ "nkeys",
+ "reqwest 0.12.28",
+ "rsa",
+ "serde",
+ "serde_json",
+ "tempfile",
+ "tokio",
+ "tokio-test",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "interactive-parse"
 version = "0.1.5"
@@ -5330,6 +5556,17 @@ dependencies = [
 "windows-sys 0.61.2",
 ]

+[[package]]
+name = "nats-jwt"
+version = "0.1.0"
+dependencies = [
+ "base64 0.22.1",
+ "nkeys",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+]
+
 [[package]]
 name = "neli"
 version = "0.7.4"
@@ -5406,6 +5643,7 @@ version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "879011babc47a1c7fdf5a935ae3cfe94f34645ca0cac1c7f6424b36fc743d1bf"
 dependencies = [
+ "crypto_box",
 "data-encoding",
 "ed25519",
 "ed25519-dalek",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,7 +30,11 @@ members = [
  "harmony_assets", "opnsense-codegen", "opnsense-api",
  "fleet/harmony-fleet-operator",
  "fleet/harmony-fleet-agent",
+  "fleet/harmony-fleet-auth",
  "harmony-reconciler-contracts",
+  "nats/jwt",
+  "nats/callout",
+  "nats/integration-test-callout",
 ]

 [workspace.package]
--- a/ROADMAP/fleet_platform/demo_runbook.md
+++ b/ROADMAP/fleet_platform/demo_runbook.md
@@ -0,0 +1,221 @@
+# Fleet Platform Demo Runbook
+
+48-hour-demo edition. Covers the operator-side (NationTech) and the
+customer-developer-side (two devs onboarding two Pis, applying a
+container deployment to them). Hand-on, no UI yet.
+
+## Roles
+
+- **NationTech operator** — runs `fleet-staging-deploy` once against the
+  customer's OKD cluster.
+- **Customer developer** — runs `fleet-sso-login` to prove auth works,
+  then runs `fleet-rpi-setup` for each Pi, then applies their workload
+  via the existing `harmony-apply-deployment` example.
+
+## Prerequisites
+
+### Cluster (operator-side)
+
+- OKD ≥ 4.10 (HAProxy ingress, edge-TLS).
+- Wildcard DNS `*.<base-domain>` pointing at the cluster ingress IP
+  (e.g. `*.customer1.nationtech.io`).
+- Wildcard cert that the HAProxy router serves for that domain (the
+  default OKD pattern).
+- `cert-manager`, `cloudnative-pg` operators installed (Zitadel chart
+  depends on them via `K8sAnywhereTopology`'s ensure_ready).
+- Access to a container registry the cluster can pull from. Customer
+  may have their own; the default in `fleet-staging-deploy` is
+  `quay.io/nationtech/harmony-nats-callout:demo`.
+
+### Driver machine (operator + developers)
+
+- `kubectl` with kubeconfig wired up.
+- `cargo` (Rust toolchain).
+- `podman` (used to build the agent image / fleet-callout image).
+- `ssh` into the Pis from the developers' machines.
+
+### Pis
+
+- Pi OS Lite booted, SSH server enabled, developer's SSH pubkey in
+  `~/.ssh/authorized_keys`. `fleet-rpi-setup` handles the rest.
+
+## Operator: deploy the staging stack
+
+```bash
+# 1. Build the callout image and push it to the customer's registry.
+cargo build --release -p harmony-nats-callout
+podman build -t quay.io/nationtech/harmony-nats-callout:demo \
+  -f nats/callout/Dockerfile .
+podman push quay.io/nationtech/harmony-nats-callout:demo
+
+# 2. Deploy the central stack.
+cargo run -p example-fleet-staging-deploy -- \
+  --base-domain customer1.nationtech.io \
+  --kube-context customer1-prod \
+  --callout-image quay.io/nationtech/harmony-nats-callout:demo \
+  --nats-auth-pass "$(openssl rand -hex 16)" \
+  --nats-system-pass "$(openssl rand -hex 16)"
+```
+
+Expected output ends with a "next steps" panel containing the project
+ID, the `harmony-cli` client_id, the NATS WSS URL, and the exact
+follow-up commands. Save those — both developers will need them.
+
+## Developer: prove SSO works
+
+```bash
+cargo run -p example-fleet-sso-login -- \
+  --base-domain customer1.nationtech.io \
+  --client-id <CLI_CLIENT_ID printed by staging deploy>
+```
+
+Browser opens, developer logs into Zitadel, CLI prints
+`Welcome <name> <email>` and persists `~/.local/share/harmony/sso-session.json`.
+
+Two developers each do this once with their own Zitadel accounts.
+
+## Operator (or developer with an admin PAT): onboard a Pi
+
+```bash
+# Extract the Zitadel admin PAT once (it's in a K8s secret on the
+# staging cluster).
+PAT=$(kubectl --context customer1-prod \
+  -n zitadel get secret iam-admin-pat \
+  -o jsonpath='{.data.pat}' | base64 -d)
+
+# Cross-compile the agent for aarch64 (one-time per agent rev).
+cargo build --release --target aarch64-unknown-linux-gnu -p harmony-fleet-agent
+
+# Onboard Pi #1 — sensor on the floor with arch=aarch64, group=group-a.
+cargo run -p example-fleet-rpi-setup -- \
+  --pi-host 192.168.1.42 \
+  --pi-user pi \
+  --device-id sensor-floor-01 \
+  --labels "group=group-a,arch=aarch64,role=sensor" \
+  --bootstrap-token "$PAT" \
+  --zitadel-issuer-url https://zitadel.customer1.nationtech.io \
+  --zitadel-project-id <PROJECT_ID printed by staging deploy> \
+  --nats-url wss://nats.customer1.nationtech.io/ \
+  --agent-binary ./target/aarch64-unknown-linux-gnu/release/fleet-agent
+
+# Onboard Pi #2 — different group label so we can target by selector.
+cargo run -p example-fleet-rpi-setup -- \
+  --pi-host 192.168.1.43 \
+  --pi-user pi \
+  --device-id sensor-shelf-02 \
+  --labels "group=group-b,arch=aarch64,role=sensor" \
+  --bootstrap-token "$PAT" \
+  --zitadel-issuer-url https://zitadel.customer1.nationtech.io \
+  --zitadel-project-id <PROJECT_ID> \
+  --nats-url wss://nats.customer1.nationtech.io/ \
+  --agent-binary ./target/aarch64-unknown-linux-gnu/release/fleet-agent
+```
+
+Each Pi onboarding does the following on the device:
+
+- Installs podman + systemd-container.
+- Creates the `fleet-agent` user (with subuid/subgid for rootless
+  podman + linger).
+- Drops the per-device Zitadel JSON key at
+  `/etc/fleet-agent/zitadel-key.json` (mode 0640, owner fleet-agent).
+- Renders `/etc/fleet-agent/config.toml` with `type = "zitadel-jwt"`
+  pointing at the keyfile.
+- Starts `fleet-agent.service` under systemd.
+
+The agent connects to NATS over WSS using the JWT-bearer token it
+mints from its keyfile. async-nats's auto-reconnect + the auth
+callback re-mints the token on every reconnect attempt — the
+"never lose connectivity" property holds across:
+
+- Token expiry (12h Zitadel default → re-minted ~5 minutes before).
+- NATS pod restart (chart upgrade, drain, etc.).
+- Pi network blip (DHCP renewal, Wi-Fi roam).
+
+## Verify the fleet from the operator side
+
+```bash
+kubectl --context customer1-prod -n fleet-system get device.fleet.nationtech.io
+# NAME                LABELS
+# sensor-floor-01     arch=aarch64,group=group-a,role=sensor
+# sensor-shelf-02     arch=aarch64,group=group-b,role=sensor
+
+kubectl --context customer1-prod -n fleet-system logs deployment/fleet-callout
+# ... received auth callout request
+# ... Zitadel JWT validated, generating user JWT  device_id=sensor-floor-01  role=device
+```
+
+## Developer: deploy a container to a labeled subset
+
+```bash
+# Apply the customer's backend (single service + sqlite volume + envs)
+# to every device with group=group-a.
+cargo run -p example_harmony_apply_deployment -- \
+  --namespace fleet-demo \
+  --name customer-backend \
+  --selector group=group-a \
+  --image registry.example.com/customer/backend:1.4 \
+  --port 8080:8080 \
+  --env DATABASE_URL=sqlite:///data/app.db \
+  --env LOG_LEVEL=info \
+  --volume /var/lib/customer-backend:/data \
+  --restart unless-stopped
+```
+
+The operator sees one Deployment CR materialized, NATS KV gets a
+`desired-state.<device-id>.customer-backend` entry per matched
+device, and each Pi's agent reconciles podman to match. The
+container's data persists across agent restarts and Pi reboots
+because the bind mount survives both.
+
+`kubectl get device` shows the agents heartbeating; their per-deployment
+state shows up on `Device.status.aggregate` (Chapter 2 reflect-back
+already in place).
+
+### Translating a docker-compose to a Deployment CR
+
+For the call: walk through the customer's compose file once, paste
+the equivalent `--env`/`--volume`/`--port` flags. Bind mounts only;
+named volumes need a separate decision per service. Most compose
+shapes translate mechanically; depends_on / startup ordering does
+not (PodmanV0 has no ordering primitive — design out of scope for
+the demo).
+
+## Cross-device security model (worth showing)
+
+- Pi A's NATS connection has a user JWT permissioned to
+  `device-state.sensor-floor-01.>` and `device-commands.sensor-floor-01.>`.
+- Pi A *cannot* publish to or subscribe from `sensor-shelf-02`'s
+  subjects — the auth callout never grants them.
+- An admin user (Zitadel role `fleet-admin`) gets `>` on both
+  publish + subscribe — they observe every device.
+- A user with no fleet role is rejected at NATS connect time.
+
+This is the same security model the local `examples/fleet_auth_callout`
+suite (3 cargo tests sharing a OnceCell k3d cluster) verifies in CI.
+
+## What's NOT in the demo
+
+- Compose-to-Deployment auto-translation (low priority — manual
+  translation during the call works).
+- A web UI for `harmony fleet apply` (post-demo).
+- Tailscale/Headscale-based SSH backdoor to the Pis (separate daemon,
+  out of scope).
+- Device-join-request + admin-approve flow (would replace
+  bootstrap-PAT pattern; out of scope).
+- OpenBao for non-NATS secrets (env-var-only is fine for demo).
+- K8s OIDC integration so kubectl accepts Zitadel JWTs (post-demo).
+
+## Re-run idempotency
+
+Every harness in this runbook is idempotent.
+
+- `fleet-staging-deploy` rides helm-upgrade-by-default, the
+  ZitadelSetupScore search-then-create loop, and a persisted issuer
+  NKey in a K8s secret.
+- `fleet-rpi-setup` byte-compares the rendered TOML against the
+  device's existing config and only reapplies on drift; the keyfile
+  drop + agent restart only happen when something actually changed.
+- `harmony-apply-deployment` is a `kube::Api::patch(...)` apply, so
+  re-running with the same fields is a server-side no-op.
+EOF
+)
--- a/ROADMAP/fleet_platform/nats-sso.md
+++ b/ROADMAP/fleet_platform/nats-sso.md
@@ -0,0 +1,52 @@
+-- documentation : https://docs.nats.io/running-a-nats-service/configuration/securing_nats/auth_intro/nkey_auth
+https://docs.nats.io/running-a-nats-service/configuration/securing_nats/auth_intro/jwt
+https://docs.nats.io/running-a-nats-service/nats_admin/security/jwt
+
+--- context : openbao allows integration with jwks or whatever protocol required to interact with zitadel directly, but nats does not. See documentation above and analysis below :
+
+
+These are notes taken from this video 
+
+https://www.youtube.com/watch?v=VvGxrT-jv64
+https://github.com/synadia-io/rethink_connectivity/tree/main/19-auth-callout
+
+
+
+1. `nsc generate nkey --account`
+
+generates nsc key pair for the auth callout service
+
+2. nats.conf
+
+add
+
+```
+authorization {
+    auth_callout {
+        issuer: <pubkey of the new nsc key pair>
+        auth_users: [ auth, user ] # list of users we can discover on the account. (something I don't get here, I want dynamic users management through the jwt)
+        account: CHAT # Name of the account we want to discover users on, this account exists in the accounts block
+    }
+}
+```
+
+
+3. Write the auth callout service, full code example here https://github.com/synadia-io/rethink_connectivity/tree/main/19-auth-callout
+  3.1 This service will be the app authorized by the SSO provider (google in the example, zitadel in our case)
+  3.2 Load the NKeySeed (private key from the pair above)
+  3.3 connect to nats. We will communicate with the nats server through nats protocol itself to handle auth callout requests
+  3.4 Subscribe to the KV workspace (not sure why yet)
+  3.5 start forging the nats jwt token using the request nkey (each new client connection comes with an nkey which will be used for the session)
+  3.6 setup the audience (nats account from above, CHAT in the example)
+  3.7 Validate and decode the jwt (nats passes the user jwt as request connectionoptions token)
+  3.8 Add user to the workspace (wtf this is completely dynamic?, how do we remove it?)
+  3.9 Attach permissions inside the nats jwt such as `Allow : [ "$JS.API.INFO", format!("chat.*.{userId}") ]` where userId is read from the google jwt, our case zitadel jwt.
+
+
+Now, synadia provides a small SDK to ease writing auth callout services in Go. But we're in rust. It might be worth writing this thing in go to benefit from synadia's stuff but from what I gathered, only the nats jwt minting is maybe something that we would benefit a lot from. But then again I think that crafting a jwt is something standard?
+
+Interaction with zitadel and all the rest is likely the same or more work for us as our entire ecosystem is in rust. Let's analyze this properly.
+
+https://github.com/synadia-io/callout.go/tree/main
+
+https://github.com/synadia-io/callout.go/tree/main/examples/dynamic_accounts
--- a/ROADMAP/fleet_platform/v0_1_plan.md
+++ b/ROADMAP/fleet_platform/v0_1_plan.md
@@ -360,6 +360,24 @@ auth from Chapter 4.

 ---

+## Chapter 6 — Customer demo rehearsal **[in progress]**
+
+48-hour customer demo prep. PO assessment concluded that promising a
+real-OKD deployment without first proving the JWT-auth chain is
+reckless. **VM-based rehearsal first**, OKD second.
+
+The rehearsal extends `smoke-a4` (k3d + libvirt VM + agent + apply
+CR + reconcile podman) with **Zitadel + auth callout + agent JWT
+auth**. Two devices + one admin. Same code paths as production —
+only the cluster topology differs.
+
+Detailed plan: [`v0_demo_e2e.md`](v0_demo_e2e.md).
+
+Once the VM rehearsal is green (success criteria in that doc), the
+residual deltas to ship to real OKD are configuration, not new code.
+
+---
+
 ## Principles — what we've learned and want to keep doing

 - **No yaml in framework code paths.** Every kube-rs type is
--- a/ROADMAP/fleet_platform/v0_demo_e2e.md
+++ b/ROADMAP/fleet_platform/v0_demo_e2e.md
@@ -0,0 +1,239 @@
+# V0 Demo End-to-End — VM-Based Rehearsal
+
+48-hour customer demo prep. The PO assessment from
+`memory/feedback_*` and the prior planning discussion concluded that
+shipping the customer demo against an untested OKD path is reckless.
+This doc plans the **VM-based rehearsal** that proves the JWT-auth
+chain end-to-end before we touch a real cluster.
+
+## Why VM, not OKD
+
+Smoke-a4 already greens the chain `k3d + in-cluster NATS + libvirt
+ARM VM + agent + apply CR + reconcile podman + status reflect-back`
+on x86_64 and aarch64. Zero new infra; we extend the existing
+harness with **Zitadel + auth callout + agent JWT auth**.
+
+Same Helm charts, same Scores, same agent code paths as production.
+Only the cluster topology differs (k3d/traefik vs OKD/HAProxy). The
+remaining OKD-specific deltas — Route annotations, edge-TLS, real DNS
+— are small and testable in isolation **after** the VM smoke is
+green.
+
+Compared to validating directly against OKD:
+
+- **Local + reproducible**: same `cargo run` runs on any dev machine
+  with podman + libvirt + k3d.
+- **Fast iteration**: bring-up is ~12-15 min cold, ~30s warm. We
+  fix integration bugs in minutes, not "wait for cluster admin"
+  hours.
+- **CI-able**: greens in a single `cargo test` invocation, so we
+  prevent regressions post-demo.
+
+## What this rehearsal proves
+
+- `ZitadelScore`'s `FirstInstance.Org.Machine.Pat` block actually
+  causes the chart to provision the `iam-admin-pat` secret (we
+  added the Helm config, never confirmed the secret materialises).
+- `ZitadelSetupScore::ensure_machine_user` reaches a working JSON
+  keyfile when called outside its k3d unit tests.
+- The agent's `CredentialSource::ZitadelJwt` mints a token, that
+  token actually authenticates against the auth callout, and the
+  callout admits it into the `DEVICES` account.
+- async-nats's auto-reconnect-with-auth-callback fires fresh tokens
+  on real NATS pod restart — the **load-bearing** "never lose
+  connectivity to a device" guarantee.
+- The full operator → NATS KV → agent → podman → status-back-to-CR
+  loop survives the credential-source rewrite.
+- Container env / volumes / restart policy land on the real podman
+  instance, not just in unit tests.
+
+## What it does NOT prove (deferred, accepted)
+
+- OKD HAProxy edge-TLS termination on the Zitadel and NATS-WSS
+  Routes. Tested separately in a follow-up smoke once the VM smoke
+  is green.
+- Real DNS resolution from a customer LAN. We inject `/etc/hosts`
+  entries on each VM so `sso.fleet.local` resolves to the libvirt
+  host.
+- Browser-driven device-code SSO (`fleet_sso_login` is compile-only
+  today). Out of scope for this rehearsal — admin verification uses
+  an injected machine-user token via JWT-bearer (same as
+  `examples/fleet_auth_callout`).
+- Customer's docker-compose translation. Manual at the call.
+
+## Architecture
+
+```
+                   k3d cluster (host)
+   ┌─────────────────────────────────────────────────┐
+   │  Zitadel + Postgres   http://sso.fleet.local    │
+   │      │                     (host:8080)          │
+   │      │  project + roles + per-device users      │
+   │      ▼                                           │
+   │  ZitadelSetupScore cache  → keyfiles (per VM)   │
+   │                                                  │
+   │  NATS (auth_callout)   nats://<host>:30422      │
+   │      ▲                                           │
+   │      │  JWT-bearer via callout                   │
+   │  fleet-callout pod                               │
+   │                                                  │
+   │  fleet-operator → KV writes desired-state       │
+   │      ▲                                           │
+   │      │  kube apply Deployment CR                 │
+   └──────┼──────────────────────────────────────────┘
+          │
+   ┌──────┼──────────────────────────────────────────┐
+   │   libvirt default NAT (host = 192.168.122.1)    │
+   └──────┼──────────────────────────────────────────┘
+          ▼
+   ┌──────────────┐    ┌──────────────┐
+   │  device-A    │    │  device-B    │   (cloud-init Ubuntu VMs)
+   │  fleet-agent │    │  fleet-agent │
+   │  + Zitadel   │    │  + Zitadel   │
+   │   JWT key    │    │   JWT key    │
+   │  + podman    │    │  + podman    │
+   └──────────────┘    └──────────────┘
+```
+
+## Bring-up sequence
+
+1. Ensure k3d cluster `fleet-e2e-demo` (port mappings 8080→80,
+   30422→30422; same as fleet_auth_callout).
+2. Reuse `fleet_auth_callout::bring_up_stack` constituent functions:
+   - Deploy Zitadel + Postgres
+   - Wait for `iam-admin-pat` secret to materialise
+   - Provision project `fleet`, API app, roles `fleet-admin` +
+     `device`
+3. Install fleet operator from its Helm chart (Chapter 3 ships this).
+4. Generate issuer NKey, deploy NATS with `auth_callout` block, deploy
+   `NatsAuthCalloutScore` (image side-loaded into k3d).
+5. **For each device i in 1..=num_devices**:
+   - Mint Zitadel machine user `device-${device_id_i}` with the
+     `device` role grant via `ZitadelSetupScore`. Cache the JSON key.
+   - Provision libvirt VM via `ProvisionVmScore` (cloud-init
+     Ubuntu, x86_64).
+   - SSH in via `LinuxHostTopology`. Inject `/etc/hosts`:
+     `<host_ip> sso.fleet.local`.
+   - Run `FleetDeviceSetupScore` with
+     `FleetDeviceAuth::ZitadelJwt { machine_key_json, ... }`.
+6. Mint admin Zitadel machine user with `fleet-admin` role (one-off
+   for verification — separate from the per-device users).
+7. Hand off / run tests.
+
+Idempotent across re-runs:
+- k3d cluster create skipped if exists.
+- ZitadelSetupScore is search-then-create.
+- VM creation: `ProvisionVmScore` reports NOOP if domain exists.
+- FleetDeviceSetupScore byte-compares the rendered TOML.
+
+## Tests
+
+Real `#[tokio::test]` functions sharing a `OnceCell`-bringup. Run
+sequentially (`--test-threads=1` because they share the cluster +
+VMs):
+
+| # | Name | What it asserts |
+|---|---|---|
+| 1 | `both_devices_heartbeat_within_60s` | `Device` CRs for A and B materialise with their labels. |
+| 2 | `deployment_targets_only_matching_device` | Apply CR with `group=group-a` selector → A reconciles, B doesn't. |
+| 3 | `deployment_status_aggregates_back_to_cr` | `.status.aggregate.succeeded == 1` within 60s. |
+| 4 | `env_vars_and_volume_propagate_to_container` | SSH into A, `podman inspect` confirms env + bind mount. |
+| 5 | `admin_jwt_reads_any_device_subject` | Admin token sees A's heartbeat. |
+| 6 | `cross_device_isolation_enforced_in_vm` | A's per-device JWT cannot subscribe to B's command subject. |
+| 7 | `agent_recovers_from_nats_pod_restart` | Kill NATS pod, both agents reconnect with fresh tokens within 30s. |
+
+Test 7 is the load-bearing one — it's the only one that exercises
+the auto-reconnect + auth-callback re-mint path under realistic
+disturbance. Asserted by: kill nats-0 pod via kube API, wait for
+new pod ready, then publish a message from admin and verify both
+agents pick it up.
+
+## Implementation order
+
+1. ✏️ Roadmap doc (this file).
+2. 🆕 `examples/fleet_e2e_demo/` crate skeleton.
+3. ♻️ Refactor `fleet_auth_callout::bring_up_stack` constituent
+   functions to be `pub` so they're individually re-usable.
+4. ➕ `/etc/hosts` injection step in `FleetDeviceSetupScore`.
+5. ➕ Operator install via Helm in the new harness.
+6. 🔗 Compose `bring_up_full_stack(num_devices)`.
+7. 🧪 Write the 7 tests.
+8. 🚦 Cold-start the bring-up. Fix what breaks (expected: ≥3 things).
+9. 🧪 Run tests. Fix what breaks (expected: ≥1 thing).
+10. 💥 Run test 7 in isolation; verify reconnect timing.
+11. 📝 Update `demo_runbook.md` with VM-rehearsal commands.
+
+## Known risks / debugging traps
+
+- **`iam-admin-pat` secret timing.** Chart's setup job runs on first
+  install but may take 30-90s after Helm reports the chart Ready.
+  Need a wait-for-secret loop before invoking ZitadelSetupScore.
+  (Today the `bring_up_stack` in `fleet_auth_callout` doesn't have
+  this — it works because we re-run after the secret has settled.
+  First-cold-run will likely fail.)
+- **Per-device machine keys are returned ONCE.** ZitadelClientConfig
+  caches them locally. If the cache file is missing/corrupt
+  mid-bring-up, devices fail at TOML render. Persist the cache
+  atomically.
+- **VM /etc/hosts mutation.** Cloud-init can do this, but
+  FleetDeviceSetupScore doesn't currently touch /etc/hosts. Add a
+  step before package install (low risk: idempotent line-in-file).
+- **k3d port collision.** Existing `harmony` and `harmony-example`
+  clusters from prior sessions may collide on host ports. Either
+  pick unique ports or fail loudly when in use.
+- **NATS pod restart test is non-deterministic.** async-nats's
+  reconnect timing depends on backoff schedule. Assert via "publish
+  succeeds within 30s after restart" rather than literal reconnect
+  events; the latter is implementation-detail-dependent.
+- **Bring-up time.** Cold: ~15 min (Zitadel + Postgres dominate).
+  Set test runner timeout accordingly. Warm: ~30s. The OnceCell
+  pattern means the cost is amortised across the test suite.
+- **Agent reconciler is non-idempotent for env / volume specs.**
+  `harmony/src/modules/podman/topology.rs::matches_spec` returns
+  false (forcing destroy + recreate) for any `ContainerSpec` with
+  non-empty env or volumes — by deliberate "fail-safe" choice the
+  original author made because podman's list endpoint doesn't
+  surface env/mount data. With the periodic reconcile firing every
+  30s, this becomes a destroy-and-recreate loop for any
+  non-trivial Deployment. Demo workaround: keep demo specs free of
+  env + volumes (the hello-web nginx demo already is). Real fix
+  (out of scope for the demo, in scope for delivery): switch the
+  drift check to `containers.get(name).inspect()` which returns
+  env + mounts, do a structural compare, lock with an integration
+  test asserting container ID is stable across two consecutive
+  applies. FIXME tag at the offending line.
+
+## Success criteria for the rehearsal day
+
+Tomorrow's all-day testing is "green" if:
+
+1. Cold `cargo run -p example-fleet-e2e-demo` brings up the full
+   stack and prints credentials in under 20 minutes.
+2. `cargo test -p example-fleet-e2e-demo --test e2e_walking_skeleton`
+   greens all 7 tests on a clean machine.
+3. `cargo test ... --test e2e_walking_skeleton agent_recovers_from_nats_pod_restart`
+   greens reliably 5 runs in a row.
+
+Anything below this and we don't show up to the customer call with a
+"staging deployed" promise — we reframe to "architecture walkthrough
+ local k3d security-model demo + pilot scheduled in 1-2 weeks."
+
+## What follows after greens
+
+Once the VM rehearsal is green, the residual deltas to ship to
+real OKD are:
+
+1. Replace `K8sAnywhereTopology` (which falls back to k3d via
+   `HARMONY_USE_LOCAL_K3D`) with a real-OKD profile. The Score code
+   doesn't change; only the topology bootstrap.
+2. Verify Route annotations actually edge-TLS for both Zitadel and
+   NATS-WSS in the customer's cluster. ~30 min smoke.
+3. Push the callout image to a registry the customer's cluster
+   pulls from. Mechanical.
+4. Real wildcard DNS for `*.<base-domain>` pointed at the cluster
+   ingress.
+
+None of those four require new code; they're configuration. The
+heavy lifting (the JWT auth chain, the agent's reconnect loop, the
+operator → KV → agent → podman → status loop) is what the VM
+rehearsal proves.
--- a/docs/guides/fleet-manual-token-mint.md
+++ b/docs/guides/fleet-manual-token-mint.md
@@ -0,0 +1,189 @@
+# Manual Zitadel token mint + NATS write
+
+Operator-side recipe for talking to a callout-protected NATS by
+hand: sign a JWT-bearer assertion with a Zitadel machine user's
+private key, exchange it for an access token, drive `nats` CLI
+commands with the token. Useful for debugging the auth chain,
+poking the desired-state KV without the operator running, and
+validating that a deployed callout is actually accepting what
+you think it should.
+
+Read [fleet-zitadel-faq.md](./fleet-zitadel-faq.md) first for the
+underlying mechanism (RFC 7523 JWT-bearer flow, why we sign
+locally, what each claim means).
+
+## Inputs you need
+
+Five strings:
+
+| Input | Where to find it |
+| --- | --- |
+| `OIDC_ISSUER_URL` (the Zitadel base URL) | callout Deployment env: `kubectl exec -n fleet-system deploy/fleet-callout -- printenv OIDC_ISSUER_URL` |
+| `project_id` (becomes the access token's `aud`) | callout Deployment env: `OIDC_AUDIENCE` |
+| Machine user's `userId` | the JSON keyfile's `userId` field |
+| Machine user's `keyId` | the JSON keyfile's `keyId` field |
+| Private RSA key (PEM) | the JSON keyfile's `key` field |
+
+Get the `fleet-ops` (admin role) JSON keyfile from the cache:
+
+```bash
+jq -r '.machine_keys["fleet-ops"]' \
+  ~/.local/share/harmony/zitadel/client-config.json \
+  > /tmp/fleet-ops.json
+
+jq -r '.userId' /tmp/fleet-ops.json    # → user_id
+jq -r '.keyId'  /tmp/fleet-ops.json    # → key_id
+jq -r '.key'    /tmp/fleet-ops.json    > /tmp/fleet-ops.pem
+```
+
+The cache may drift from the deployed Zitadel state if Zitadel has
+been re-seeded; **always pull `OIDC_AUDIENCE` from the running
+callout**, not from the cache. The cache fix landed in commit
+`f4d6fb94` but older entries can still trip you up.
+
+## Mint script (PyJWT)
+
+```python
+# pip install PyJWT requests   ← MUST be PyJWT, not the `jwt` package.
+# The two share `import jwt`; `jwt` (the package) refuses raw PEM
+# strings and demands an AbstractJWKBase wrapper. PyJWT takes PEM
+# directly. If you ever see `TypeError: key must be an instance of
+# a class implements jwt.AbstractJWKBase`, you have the wrong one.
+
+import jwt, time, requests
+
+# These come from the running callout + Zitadel. Don't reuse stale
+# values from a checked-in note; verify against the live cluster.
+OIDC_ISSUER_URL = "http://sso.fleet.local:8080"
+PROJECT_ID      = "371158654839160853"   # = OIDC_AUDIENCE on callout
+USER_ID         = "..."                  # from machine keyfile
+KEY_ID          = "..."                  # from machine keyfile
+
+key = open("/tmp/fleet-ops.pem").read()
+now = int(time.time())
+
+assertion = jwt.encode(
+    {
+        "iss": USER_ID,
+        "sub": USER_ID,
+        "aud": OIDC_ISSUER_URL,   # for Zitadel itself, NOT the project_id
+        "exp": now + 60,          # Zitadel rejects exp - iat > 60s
+        "iat": now,
+    },
+    key,
+    algorithm="RS256",
+    headers={"kid": KEY_ID},      # PyJWT spelling — `headers=`, not `optional_headers=`
+)
+
+r = requests.post(
+    f"{OIDC_ISSUER_URL}/oauth/v2/token",
+    data={
+        "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
+        "assertion":  assertion,
+        # Three scopes:
+        #   openid                                     — base OIDC
+        #   urn:zitadel:iam:org:projects:roles         — PLURAL.
+        #     Without this, Zitadel omits the role claim and the
+        #     callout rejects with "no authorized role in token".
+        #   urn:zitadel:iam:org:project:id:<id>:aud    — singular.
+        #     Tells Zitadel to put <id> into the access token's
+        #     `aud` claim, which the callout's audience check
+        #     compares against OIDC_AUDIENCE.
+        "scope": (
+            "openid "
+            "urn:zitadel:iam:org:projects:roles "
+            f"urn:zitadel:iam:org:project:id:{PROJECT_ID}:aud"
+        ),
+    },
+)
+r.raise_for_status()
+token = r.json()["access_token"]
+
+# Sanity check — decode without verifying signature so you can see
+# what Zitadel actually emitted. If anything below is wrong, the
+# callout will reject your token.
+print(jwt.decode(token, options={"verify_signature": False}))
+print(token)
+```
+
+Expected decoded claims (the parts the callout will check):
+
+| Claim | What it should be | Why |
+| --- | --- | --- |
+| `iss` | `OIDC_ISSUER_URL` (byte-equal) | Callout: `validation.set_issuer(&[&self.issuer_url])` |
+| `aud` | `["<PROJECT_ID>"]` | Callout: `validation.set_audience(&[&self.audience])`; the array form is Zitadel's default |
+| `exp` | ~now + 12h | Zitadel default access token TTL |
+| `client_id` | the machine user's username (`fleet-ops`, `device-vm-device-00`, …) | Callout uses this as `device_id_claim` (with optional `DEVICE_ID_PREFIX_STRIP` applied) |
+| `urn:zitadel:iam:org:project:<PROJECT_ID>:roles` | object with role names as keys (e.g. `{"fleet-admin": {"<orgId>": "<orgName>"}}`) | Callout uses this as `roles_claim` and admits the role if `fleet-admin` or `device` is present |
+
+If any of these is wrong, fix the script before bothering with NATS.
+
+## Drive NATS with the token
+
+`nats --token=<bearer>` puts the value into the CONNECT frame's
+`auth_token`, which is what the callout expects.
+
+```bash
+NATS_SERVER=192.168.122.1:30422       # libvirt host's port mapping
+TOKEN=$(python3 mint.py | tail -1)    # last line is the raw token
+
+# Read everything (admin role allows >):
+nats --server "$NATS_SERVER" --token "$TOKEN" kv ls device-info
+nats --server "$NATS_SERVER" --token "$TOKEN" kv get device-info info.vm-device-00
+
+# Write a desired state — agent's KV watcher fires within 1s,
+# reconciler creates the podman container.
+nats --server "$NATS_SERVER" --token "$TOKEN" \
+  kv put desired-state vm-device-00.hello-web '{
+    "name": "hello-web",
+    "type": "PodmanV0",
+    "data": {
+      "services": [{
+        "name":  "testnginx",
+        "image": "docker.io/nginx:latest",
+        "ports": ["8080:80"]
+      }]
+    }
+  }'
+```
+
+The exact JSON shape comes from
+`harmony-reconciler-contracts/src/fleet.rs` — read that crate when
+in doubt about field names, NOT this doc; this doc is a worked
+example and may drift.
+
+## Common failures and what they mean
+
+| Symptom | Likely cause |
+| --- | --- |
+| `TypeError: key must be an instance of … AbstractJWKBase` | Wrong PyPI package. `pip uninstall jwt && pip install PyJWT`. |
+| HTTP 400 from `/oauth/v2/token`: `"invalid_grant_type"` | Forgot the percent-encoded form encoding, OR `grant_type` value mistyped. The full URN is `urn:ietf:params:oauth:grant-type:jwt-bearer`. |
+| HTTP 400: `"jwt: token is expired"` | Your assertion's `exp` is in the past. Wall-clock skew between your laptop and the cluster — sync NTP. |
+| Token mints but no `urn:zitadel:…:roles` claim | Missing the **plural** `urn:zitadel:iam:org:projects:roles` in scope. |
+| Token mints but `aud` is the issuer URL instead of the project id | Forgot the `urn:zitadel:iam:org:project:id:<id>:aud` scope. |
+| NATS CLI: `nats: Authorization Violation` | Token is good but callout rejected it — check `kubectl logs -n fleet-system -l app=fleet-callout` for the actual reason. The most common ones are "InvalidAudience" (your `aud` ≠ deployed `OIDC_AUDIENCE`) and "no authorized role in token". |
+| Callout log: `JWT validation failed: InvalidIssuer` | Trailing slash drift. `OIDC_ISSUER_URL=http://sso.fleet.local:8080/` ≠ `http://sso.fleet.local:8080`. Match exactly. |
+
+When the callout rejects, **its log is the source of truth**, not
+your decoded claims. The validation error includes which check
+failed; work backwards from there.
+
+## Rotating the deployed `OIDC_AUDIENCE`
+
+If Zitadel was re-seeded and `OIDC_AUDIENCE` on the callout now
+points at a non-existent project:
+
+```bash
+# 1. Confirm the live project id
+oc -n zitadel exec -ti deploy/zitadel -- /bin/sh -c \
+  'curl -s -H "Authorization: Bearer $PAT" \
+        $ZITADEL_URL/management/v1/projects/_search \
+   | jq ".result[] | select(.name == \"fleet\") | .id"'
+
+# 2. Re-run the bring-up — the live-query fix in f4d6fb94 will
+#    refresh OIDC_AUDIENCE on the next NatsAuthCalloutScore apply.
+```
+
+The shape of `mint.py` doesn't change between regular operation
+and post-recovery — you just plug in fresh values for
+`OIDC_AUDIENCE` and `PROJECT_ID`.
--- a/docs/guides/fleet-zitadel-faq.md
+++ b/docs/guides/fleet-zitadel-faq.md
@@ -0,0 +1,185 @@
+# Fleet × Zitadel FAQ
+
+Technical reference for the Zitadel setup behind the fleet
+auth callout. Describes what exists, why it's that way, and where
+each piece lives in the code.
+
+Code anchors:
+- `examples/fleet_e2e_demo/src/lib.rs` — bring-up flow
+- `harmony/src/modules/zitadel/setup.rs` — `ZitadelSetupScore`
+- `harmony/src/modules/zitadel/mod.rs` — Helm install
+- `nats/callout/src/handler.rs` — auth callout
+- `fleet/harmony-fleet-agent/src/credentials.rs` — JWT-bearer mint
+
+---
+
+## What is an "application" in Zitadel?
+
+An OIDC client config: `clientId`, allowed grant types, redirect
+URIs (browser apps only), PKCE settings (browser apps only).
+
+Apps are not containers for users or roles — those live one
+level up at the org. An app is the entry point a service uses to
+delegate auth to Zitadel.
+
+The `nats` app is **API type**: JWT-bearer / client-credentials
+only, no browser flow. Headless agents never see a login page.
+The app's `clientId` is what tokens carry as `aud` and what the
+auth callout validates against (`OIDC_AUDIENCE` env on the callout
+Deployment).
+
+## Why are users and roles at org level instead of per-project?
+
+Roles are defined inside a project but are essentially labels —
+strings + display names with no inherent permissions. Each app
+enforces them in code (the callout maps `device` → a
+permission template).
+
+Users live at org level so one identity can hold roles across
+multiple projects in the same org and SSO between them. Role
+grants are the join: "user X has roles \[A, B\] on project Y."
+
+The only privilege ladder Zitadel enforces directly is at the
+instance/org level (IAM-Owner, Org-Owner). Project roles say
+nothing about Zitadel admin rights.
+
+## What is each service account for?
+
+| User | Created by | Purpose |
+| --- | --- | --- |
+| `iam-admin` | Helm `FirstInstance.Org.Machine` | IAM-Owner. Its PAT (`iam-admin-pat` k8s Secret) drives the management API from `ZitadelSetupScore`. |
+| `login-client` | Helm `FirstInstance.Org.LoginClient` | Internal — Zitadel's login UI pod uses it to call back into Zitadel. Don't touch. |
+| `fleet-ops` | `fleet_e2e_demo` admin setup | `fleet-admin` role grant, JSON key, used by tests and admin tooling. |
+| `device-vm-device-NN` | `fleet_e2e_demo::provision_device` | One per VM. JSON key copied to `/etc/fleet-agent/zitadel-key.json`. `device` role grant. |
+| `ops-station`, `sensor-a`, `sensor-b`, `intruder` | `fleet_auth_callout` (separate example) | Leftovers from previous runs. Postgres survives cluster recreates. Harmless, deletable. |
+
+The `device-` prefix on per-device usernames is intentional:
+Zitadel emits the username verbatim in the access token's
+`client_id` claim. The callout strips `device-` to recover the
+bare device id used for NATS subject interpolation
+(`DEVICE_ID_PREFIX_STRIP=device-` env var on the callout;
+`nats/callout/src/zitadel.rs::extract_device_id`).
+
+## How does the agent authenticate? Are JWTs / refresh tokens cached?
+
+On disk the agent keeps **only the JSON machine key** (RSA
+private key) at `/etc/fleet-agent/zitadel-key.json`.
+
+It does NOT store:
+- access tokens (in memory only)
+- refresh tokens (the JWT-bearer flow has none — RFC 7523 is
+  stateless by design)
+
+On every NATS (re)connect, `credentials.rs::zitadel_mint`:
+
+1. Builds a JWT assertion with `exp = now + 60s`, signs it with
+   the RSA key
+2. POSTs it to `<zitadel>/oauth/v2/token` with grant type
+   `urn:ietf:params:oauth:grant-type:jwt-bearer`
+3. Receives an access token (~12h validity), caches it in memory
+4. Re-mints when within 5min of expiry
+   (`TOKEN_REFRESH_LEEWAY_SECS`)
+
+## What happens to an offline agent?
+
+| Time offline | Behavior |
+| --- | --- |
+| 0 – ~12 h | Cached access token still valid. Reconnects work transparently. |
+| > ~12 h | Token expired. Agent enters reconnect loop until network returns, then mints fresh on first successful reach. |
+
+The RSA key never expires until rotated server-side.
+
+## Where are the lifetimes set?
+
+- **Access token TTL** — Zitadel UI: Org → Settings → OIDC
+  Settings → "Access Token Lifetime" (default 12 h).
+- **Assertion TTL** — hardcoded 60 s in
+  `credentials.rs::ASSERTION_LIFETIME_SECS`. Zitadel rejects
+  assertions where `exp - iat > 60 s`; this is server-enforced,
+  not a knob.
+- **Machine key TTL** — set when the key is created in
+  `harmony/src/modules/zitadel/setup.rs::create_machine_key`.
+
+## Why is a JSON machine key more secure than a PAT?
+
+Both are "if stolen, full impersonation" — the same blast radius.
+The difference is in leak surface:
+
+- **PAT**: a 60-char bearer string sent on every authenticated
+  request. Every log line, every env dump, every misrouted
+  request is a leak opportunity.
+- **JSON key**: an RSA private key. Only ever signs short-lived
+  (60 s) assertions sent to one endpoint
+  (`<zitadel>/oauth/v2/token`). The bearer token NATS sees is
+  the access token — short-lived (12 h max), scoped, distinct
+  from the long-term secret. A full network capture of the
+  agent ↔ NATS traffic yields only access tokens that expire
+  within 12 h.
+
+Plus: Zitadel allows multiple keys per machine user, so rotation
+is zero-downtime (mint new → push to device → delete old). PATs
+rotate one-at-a-time and are disruptive.
+
+What this does not defend against: a fully compromised device
+where the attacker reads the keyfile. That requires hardware
+(TPM / secure element) and is out of scope.
+
+## The machine keys expire in year 9999. Isn't that effectively forever?
+
+Yes. Currently set in `ZitadelSetupScore::create_machine_key` as
+a known-bad default chosen for demo convenience (re-running tests
+shouldn't produce expired keys mid-run). Tracked as a known issue.
+
+## Why is the IAM-Owner PAT stored as a plain k8s Secret?
+
+K8s Secrets are base64-encoded, **not** encrypted at rest unless
+etcd encryption-at-rest is explicitly enabled with a KMS provider.
+Anyone with `get secrets` in the `zitadel` namespace effectively
+has Zitadel admin.
+
+The PAT exists because `ZitadelSetupScore` calls Zitadel's
+management API (create project, role, machine user, mint key),
+which requires IAM-Owner privileges. A PAT is the simplest
+credential that survives across applies.
+
+This is a known production-hardening gap. Harmony has the
+`harmony_secret` crate (ADR-020) with OpenBao and local-encrypted-file
+backends; the Score is currently wired against a k8s Secret only.
+
+## What lifetime is set for the human admin password — why does the ConfigMap show one that doesn't work?
+
+`ZitadelScore` regenerates a random admin password on every apply
+and writes it to the rendered ConfigMap. Helm's `FirstInstance`
+block only seeds Postgres on the **first** install against an
+empty DB, so re-applies render a new ConfigMap password but leave
+the original Postgres hash untouched. The displayed password is
+stale on every apply after the first.
+
+To recover access: use the `iam-admin-pat` to call Zitadel's
+management API and reset the human admin's password directly.
+Tracked as a known bug.
+
+## Quick reference — tokens on the wire
+
+| Token | Lives where | Lifetime | Signed by | Purpose |
+| --- | --- | --- | --- | --- |
+| **Assertion** | Agent memory, in-flight | 60 s | Agent (RSA key) | "I'm machine user X — give me an access token" |
+| **Access token** | Agent memory + on-the-wire to NATS | ~12 h | Zitadel | "Zitadel says I'm device X with role `device`" |
+| **NATS user JWT** | NATS server connection state | callout-defined (~30 s) | Auth callout (NKey) | "I have these permissions on these subjects" |
+
+The agent only holds the RSA key on disk and the access token
+in memory. The NATS user JWT is server-internal — agents don't
+see it.
+
+## Code map
+
+| Topic | File |
+| --- | --- |
+| Helm install, masterkey, admin password | `harmony/src/modules/zitadel/mod.rs` |
+| Project/role/machine user provisioning | `harmony/src/modules/zitadel/setup.rs` |
+| Per-device machine user + key handoff | `examples/fleet_e2e_demo/src/lib.rs::provision_device` |
+| JWT-bearer mint | `fleet/harmony-fleet-agent/src/credentials.rs::zitadel_mint` |
+| Auth callout decision tree | `nats/callout/src/handler.rs::decide` |
+| Per-device permission template | `nats/callout/src/permissions.rs::device_default` |
+| End-to-end rehearsal runbook | `examples/fleet_e2e_demo/RUNBOOK.md` |
+| Manual JWT-bearer mint + NATS write recipe | [`fleet-manual-token-mint.md`](./fleet-manual-token-mint.md) |
--- a/examples/fleet_auth_callout/Cargo.toml
+++ b/examples/fleet_auth_callout/Cargo.toml
@@ -0,0 +1,46 @@
+[package]
+name = "example-fleet-auth-callout"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "End-to-end fleet IoT security model: Zitadel + NATS + auth callout on k3d"
+
+[lib]
+name = "example_fleet_auth_callout"
+path = "src/lib.rs"
+
+[[bin]]
+name = "fleet-auth-callout"
+path = "src/main.rs"
+
+[[test]]
+name = "security_model"
+path = "tests/security_model.rs"
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony-k8s = { path = "../../harmony-k8s" }
+harmony_types = { path = "../../harmony_types" }
+k3d-rs = { path = "../../k3d" }
+harmony-nats-callout = { path = "../../nats/callout" }
+async-nats.workspace = true
+nkeys = "0.4"
+jsonwebtoken = "9"
+reqwest = { workspace = true }
+tokio = { workspace = true, features = ["full"] }
+tokio-test.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+anyhow.workspace = true
+tracing.workspace = true
+tracing-subscriber.workspace = true
+log.workspace = true
+env_logger.workspace = true
+futures-util.workspace = true
+k8s-openapi.workspace = true
+kube.workspace = true
+base64 = "0.22"
+tempfile.workspace = true
+url.workspace = true
+directories = "6.0.0"
--- a/examples/fleet_auth_callout/src/lib.rs
+++ b/examples/fleet_auth_callout/src/lib.rs
@@ -0,0 +1,790 @@
+//! End-to-end fleet IoT security model harness.
+//!
+//! Brings up the full stack on a local k3d cluster:
+//! 1. k3d cluster (creates if missing) with HTTP/NATS port mappings.
+//! 2. Zitadel + Postgres (via the official Helm chart).
+//! 3. Project + roles (`fleet-admin`, `device`) + 4 machine users +
+//!    JWT keys via ZitadelSetupScore.
+//! 4. NATS server with `auth_callout` block referencing the issuer NKey.
+//! 5. The harmony-nats-callout binary as a Deployment, sideloaded as a
+//!    container image into k3d.
+//!
+//! `main.rs` calls [`bring_up_stack`] then prints credentials and waits.
+//! Tests under `tests/` share a single cluster via `OnceCell` and exercise
+//! the security model through real `async_nats` clients using JWT-bearer
+//! access tokens minted from the machine keys produced in step 3.
+//!
+//! ## Why this lives in an example, not under `harmony/src/modules/`
+//!
+//! Everything in this crate is a *composition* of reusable Scores plus
+//! test fixtures (the JWT-bearer helper, image-build glue). The Scores
+//! themselves are in `harmony/src/modules/{zitadel,nats_auth_callout}`.
+
+use std::path::PathBuf;
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use harmony::inventory::Inventory;
+use harmony::modules::k8s::coredns::{CoreDNSRewrite, CoreDNSRewriteScore};
+use harmony::modules::nats::NatsHelmChartScore;
+use harmony::modules::nats_auth_callout::{NatsAuthCalloutScore, render_auth_callout_block};
+use harmony::modules::zitadel::{
+    MachineKeyType, ZitadelApiApp, ZitadelClientConfig, ZitadelMachineUser, ZitadelRole,
+    ZitadelScore, ZitadelSetupScore,
+};
+use harmony::score::Score;
+use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
+use jsonwebtoken::{Algorithm, EncodingKey, Header as JwtHeader, encode as jwt_encode};
+use k3d_rs::{K3d, PortMapping};
+use log::info;
+use nkeys::KeyPair;
+use serde::{Deserialize, Serialize};
+
+pub const CLUSTER_NAME: &str = "fleet-auth-callout";
+pub const HTTP_PORT: u32 = 8080;
+pub const NATS_NODE_PORT: i32 = 30422;
+pub const ZITADEL_HOST: &str = "sso.fleet.local";
+
+pub const FLEET_NAMESPACE: &str = "fleet-system";
+pub const NATS_NAMESPACE: &str = FLEET_NAMESPACE;
+pub const NATS_RELEASE: &str = "fleet-nats";
+pub const CALLOUT_DEPLOYMENT_NAME: &str = "fleet-callout";
+/// `localhost/` prefix matches what podman tags images as internally —
+/// `podman build -t foo:tag` produces `localhost/foo:tag`. After
+/// `podman save → k3d image import`, the image lands in the k3d node's
+/// containerd under that exact name. Without the prefix, K8s would
+/// treat `foo:tag` as a Docker Hub reference and ImagePullBackOff.
+pub const CALLOUT_IMAGE_TAG: &str = "localhost/harmony-nats-callout:dev";
+
+pub const PROJECT_NAME: &str = "fleet";
+pub const API_APP_NAME: &str = "nats";
+pub const ADMIN_ROLE_KEY: &str = "fleet-admin";
+pub const DEVICE_ROLE_KEY: &str = "device";
+
+pub const ADMIN_USERNAME: &str = "ops-station";
+pub const DEVICE_A_USERNAME: &str = "sensor-a";
+pub const DEVICE_B_USERNAME: &str = "sensor-b";
+pub const NO_ROLE_USERNAME: &str = "intruder";
+
+/// Service-side NATS account user that the callout itself authenticates
+/// with (listed in `auth_callout.auth_users` to bypass the callout).
+pub const NATS_AUTH_USER: &str = "auth";
+pub const NATS_AUTH_PASS: &str = "auth-callout-pass";
+pub const NATS_ACCOUNT: &str = "DEVICES";
+pub const NATS_SYSTEM_USER: &str = "sys-admin";
+pub const NATS_SYSTEM_PASS: &str = "sys-admin-pass";
+
+#[derive(Debug, Clone)]
+pub struct StackHandles {
+    pub cluster_name: String,
+    pub nats_url_external: String,
+    pub zitadel_url: String,
+    pub project_id: String,
+    pub admin_machine_key: String,
+    pub device_a_machine_key: String,
+    pub device_b_machine_key: String,
+    pub intruder_machine_key: String,
+    pub issuer_pubkey: String,
+}
+
+/// JSON keyfile content as Zitadel emits it for `KEY_TYPE_JSON` machine keys.
+#[derive(Debug, Deserialize, Serialize)]
+pub struct MachineKeyFile {
+    #[serde(rename = "type")]
+    pub r#type: String,
+    #[serde(rename = "keyId")]
+    pub key_id: String,
+    /// PEM-encoded RSA private key.
+    pub key: String,
+    #[serde(rename = "userId")]
+    pub user_id: String,
+}
+
+fn data_dir() -> PathBuf {
+    directories::BaseDirs::new()
+        .map(|dirs| dirs.data_dir().join("harmony").join("k3d"))
+        .unwrap_or_else(|| PathBuf::from("/tmp/harmony"))
+}
+
+pub fn create_k3d() -> K3d {
+    let base = data_dir();
+    std::fs::create_dir_all(&base).expect("create k3d data dir");
+    K3d::new(base, Some(CLUSTER_NAME.to_string()))
+        // HTTP_PORT:80 so /etc/hosts entries (or curl --resolve) hit ingress.
+        // NATS_NODE_PORT lets clients off-cluster talk to the NATS service.
+        .with_port_mappings(vec![
+            PortMapping::new(HTTP_PORT, 80),
+            PortMapping::new(NATS_NODE_PORT as u32, NATS_NODE_PORT as u32),
+        ])
+}
+
+pub fn create_topology(k3d: &K3d) -> K8sAnywhereTopology {
+    let context = k3d
+        .context_name()
+        .unwrap_or_else(|| format!("k3d-{CLUSTER_NAME}"));
+    unsafe {
+        std::env::set_var("HARMONY_USE_LOCAL_K3D", "false");
+        std::env::set_var("HARMONY_AUTOINSTALL", "false");
+        std::env::set_var("HARMONY_K8S_CONTEXT", &context);
+    }
+    K8sAnywhereTopology::from_env()
+}
+
+/// Build the NATS Helm values that wire `auth_callout` to a callout
+/// service running in the same account, plus a NodePort for off-cluster
+/// access from tests on the host.
+///
+/// **Why the explicit `service.merge.spec.ports` list:** the upstream
+/// chart's `service.ports.<name>.merge` field is *not* a strategic-merge
+/// directive — it gets emitted as-is into the rendered Service (the
+/// chart's `_helpers.tpl` does `merge (dict "name" $k) $v` which leaves
+/// `merge: …` as a literal field on each port). K8s then rejects the
+/// Service with "field not declared in schema". Only the top-level
+/// `service.merge` is actually a `mergeOverwrite` patch; we use that
+/// path and re-state the full ports list so `nats` gets our nodePort.
+pub fn render_nats_values(issuer_pubkey: &str) -> String {
+    let auth_callout = render_auth_callout_block(issuer_pubkey, NATS_AUTH_USER, NATS_ACCOUNT);
+    format!(
+        r#"fullnameOverride: {nats_release}
+config:
+  cluster:
+    enabled: false
+  jetstream:
+    enabled: true
+    fileStorage:
+      enabled: true
+      size: 2Gi
+  merge:
+    {auth_callout_indented}
+    accounts:
+      {nats_account}:
+        jetstream: enabled
+        users:
+          - user: "{auth_user}"
+            password: "{auth_pass}"
+      SYS:
+        users:
+          - user: "{sys_user}"
+            password: "{sys_pass}"
+    system_account: SYS
+service:
+  merge:
+    spec:
+      type: NodePort
+      ports:
+        - appProtocol: tcp
+          name: nats
+          port: 4222
+          targetPort: nats
+          nodePort: {node_port}
+        - appProtocol: http
+          name: monitor
+          port: 8222
+          targetPort: monitor
+"#,
+        nats_release = NATS_RELEASE,
+        auth_callout_indented = auth_callout
+            .lines()
+            .enumerate()
+            .map(|(i, l)| if i == 0 {
+                l.to_string()
+            } else {
+                format!("    {l}")
+            })
+            .collect::<Vec<_>>()
+            .join("\n"),
+        nats_account = NATS_ACCOUNT,
+        auth_user = NATS_AUTH_USER,
+        auth_pass = NATS_AUTH_PASS,
+        sys_user = NATS_SYSTEM_USER,
+        sys_pass = NATS_SYSTEM_PASS,
+        node_port = NATS_NODE_PORT,
+    )
+}
+
+/// Bring the entire stack up on a local k3d cluster. Idempotent —
+/// re-running picks up existing resources.
+///
+/// Returns handles + credentials. The machine key fields contain raw
+/// JSON keyfile content (`MachineKeyFile`) and can be passed straight
+/// to [`mint_access_token`] to authenticate as the corresponding user.
+pub async fn bring_up_stack() -> Result<StackHandles> {
+    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init();
+
+    let k3d = create_k3d();
+
+    info!("[1/8] ensuring k3d cluster '{CLUSTER_NAME}' is up");
+    k3d.ensure_installed()
+        .await
+        .map_err(|e| anyhow::anyhow!("k3d ensure: {e}"))?;
+
+    let topology = create_topology(&k3d);
+    topology.ensure_ready().await.context("topology init")?;
+
+    info!("[2/8] deploying Zitadel (this takes several minutes the first time)");
+    deploy_zitadel(&topology).await?;
+
+    info!("[3/8] CoreDNS rewrite so in-cluster lookups for {ZITADEL_HOST} resolve");
+    CoreDNSRewriteScore {
+        rewrites: vec![CoreDNSRewrite {
+            hostname: ZITADEL_HOST.to_string(),
+            target: "zitadel.zitadel.svc.cluster.local".to_string(),
+        }],
+    }
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("CoreDNS rewrite")?;
+
+    info!("[4/8] waiting for Zitadel HTTP to respond");
+    wait_for_zitadel_ready().await?;
+
+    info!("[5/8] provisioning project + roles + machine users in Zitadel");
+    let setup = ZitadelSetupScore {
+        host: ZITADEL_HOST.to_string(),
+        port: HTTP_PORT as u16,
+        skip_tls: true,
+        applications: vec![],
+        api_apps: vec![ZitadelApiApp {
+            project_name: PROJECT_NAME.to_string(),
+            app_name: API_APP_NAME.to_string(),
+        }],
+        roles: vec![
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: ADMIN_ROLE_KEY.to_string(),
+                display_name: "Fleet Admin".to_string(),
+                group: None,
+            },
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: DEVICE_ROLE_KEY.to_string(),
+                display_name: "Device".to_string(),
+                group: None,
+            },
+        ],
+        machine_users: vec![
+            ZitadelMachineUser {
+                username: ADMIN_USERNAME.to_string(),
+                name: "Ops Station".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![ADMIN_ROLE_KEY.to_string()],
+            },
+            ZitadelMachineUser {
+                username: DEVICE_A_USERNAME.to_string(),
+                name: "Sensor A".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![DEVICE_ROLE_KEY.to_string()],
+            },
+            ZitadelMachineUser {
+                username: DEVICE_B_USERNAME.to_string(),
+                name: "Sensor B".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![DEVICE_ROLE_KEY.to_string()],
+            },
+            ZitadelMachineUser {
+                username: NO_ROLE_USERNAME.to_string(),
+                name: "Intruder".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: None,
+                grant_roles: vec![],
+            },
+        ],
+    };
+    setup
+        .interpret(&Inventory::autoload(), &topology)
+        .await
+        .context("ZitadelSetupScore failed")?;
+
+    let zcfg = ZitadelClientConfig::load()
+        .context("ZitadelSetupScore did not produce a client config cache")?;
+    let project_id = zcfg
+        .project_id_by_name(PROJECT_NAME)
+        .or(zcfg.project_id.as_ref())
+        .context("project_id missing from cache")?
+        .clone();
+
+    info!("[6/8] generating callout issuer NKey + deploying NATS with auth_callout");
+    // Re-use a deterministic seed across runs by stashing it in a
+    // K8s secret in the fleet namespace. Fall back to a fresh one
+    // and persist it. Keeping it stable lets us reuse the cached
+    // user JWTs Zitadel issued.
+    let issuer_seed = ensure_issuer_seed(&topology).await?;
+    let issuer_kp = KeyPair::from_seed(&issuer_seed)
+        .map_err(|e| anyhow::anyhow!("invalid persisted issuer seed: {e}"))?;
+    let issuer_pubkey = issuer_kp.public_key();
+
+    NatsHelmChartScore::new(
+        NATS_RELEASE.to_string(),
+        NATS_NAMESPACE.to_string(),
+        render_nats_values(&issuer_pubkey),
+    )
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("NATS deploy")?;
+
+    info!("[7/8] building + sideloading callout image into k3d");
+    build_and_load_callout_image(&k3d).await?;
+
+    info!("[8/8] deploying NatsAuthCalloutScore");
+    let mut callout = NatsAuthCalloutScore::new(
+        CALLOUT_DEPLOYMENT_NAME,
+        FLEET_NAMESPACE,
+        format!("nats://{NATS_RELEASE}.{NATS_NAMESPACE}.svc.cluster.local:4222"),
+        format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+        // Zitadel emits aud = projectId for tokens issued via the
+        // `urn:zitadel:iam:org:project:id:<projectId>:aud` scope.
+        project_id.clone(),
+        NATS_AUTH_USER,
+        NATS_AUTH_PASS,
+        issuer_seed.clone(),
+    )
+    .image(CALLOUT_IMAGE_TAG)
+    .target_account(NATS_ACCOUNT)
+    .admin_role(ADMIN_ROLE_KEY)
+    .device_role(DEVICE_ROLE_KEY)
+    .danger_accept_invalid_certs(true);
+    // Zitadel doesn't emit a custom `device_id` claim by default — that
+    // would require a Zitadel Action to map metadata into an extension
+    // claim. For this example we use `preferred_username`, which is
+    // populated with the machine user's username (`sensor-a`,
+    // `ops-station`, …). Production deployments that want a separate
+    // `device_id` claim should configure a Zitadel Action and override
+    // the device_id_claim path back to `device_id`.
+    // Zitadel access tokens for machine users:
+    //   * Don't carry `preferred_username` (that's an OIDC ID-token claim);
+    //   * Do carry `client_id` set to the machine user's userName — perfect
+    //     for our device-id-from-username case.
+    //
+    // The project's role claim lives at a *project-scoped* path
+    // `urn:zitadel:iam:org:project:<projectId>:roles` (NOT the unqualified
+    // `urn:zitadel:iam:org:project:roles`) because we request the
+    // `urn:zitadel:iam:org:project:id:<projectId>:aud` scope. The latter
+    // forces Zitadel to scope role claims to the specific project, which
+    // is what we want for tenant isolation.
+    callout.device_id_claim = "client_id".to_string();
+    // Zitadel's `client_id` for a machine user equals its userName, so
+    // a user created as `device-vm-device-00` (matching the
+    // `device_username()` convention used by both fleet_e2e_demo and
+    // fleet_rpi_setup) lands in the JWT verbatim. Strip the `device-`
+    // prefix so the callout interpolates permissions against the bare
+    // device id (`vm-device-00`) the agent uses for KV keys.
+    callout.device_id_prefix_strip = "device-".to_string();
+    callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles");
+    callout
+        .interpret(&Inventory::autoload(), &topology)
+        .await
+        .context("callout deploy")?;
+
+    info!("waiting for callout pod to be Ready before handing the stack over");
+    wait_for_callout_ready(&topology).await?;
+
+    let admin_machine_key = zcfg
+        .machine_key(ADMIN_USERNAME)
+        .context("admin machine key missing from cache")?
+        .clone();
+    let device_a_machine_key = zcfg
+        .machine_key(DEVICE_A_USERNAME)
+        .context("device A machine key missing from cache")?
+        .clone();
+    let device_b_machine_key = zcfg
+        .machine_key(DEVICE_B_USERNAME)
+        .context("device B machine key missing from cache")?
+        .clone();
+    let intruder_machine_key = zcfg
+        .machine_key(NO_ROLE_USERNAME)
+        .context("intruder machine key missing from cache")?
+        .clone();
+
+    Ok(StackHandles {
+        cluster_name: CLUSTER_NAME.to_string(),
+        nats_url_external: format!("nats://127.0.0.1:{NATS_NODE_PORT}"),
+        zitadel_url: format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+        project_id,
+        admin_machine_key,
+        device_a_machine_key,
+        device_b_machine_key,
+        intruder_machine_key,
+        issuer_pubkey,
+    })
+}
+
+pub async fn deploy_zitadel(topology: &K8sAnywhereTopology) -> Result<()> {
+    let zitadel = ZitadelScore {
+        host: ZITADEL_HOST.to_string(),
+        zitadel_version: "v4.12.1".to_string(),
+        external_secure: false,
+        // Match the host-side k3d port mapping so Zitadel's emitted
+        // issuer is `http://sso.fleet.local:8080`. Without this, JWT-bearer
+        // audience validation fails with `Errors.Internal` (the assertion
+        // `aud` doesn't match the chart-default issuer at port 80).
+        external_port: Some(HTTP_PORT),
+    };
+    zitadel
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("ZitadelScore deploy")?;
+    Ok(())
+}
+
+pub async fn wait_for_callout_ready(topology: &K8sAnywhereTopology) -> Result<()> {
+    let _ = topology;
+    // `kubectl rollout status deployment` is the canonical "is the new
+    // ReplicaSet's pod up?" check — it handles observed-generation
+    // tracking, terminating-old-replica edge cases, and pod-readiness in
+    // one call. Reproducing that in the kube client is doable but error-
+    // prone; shelling out keeps it short and obviously-correct.
+    let status = tokio::process::Command::new("kubectl")
+        .args([
+            "--context",
+            "k3d-fleet-auth-callout",
+            "rollout",
+            "status",
+            "-n",
+            FLEET_NAMESPACE,
+            &format!("deployment/{CALLOUT_DEPLOYMENT_NAME}"),
+            "--timeout=60s",
+        ])
+        .status()
+        .await
+        .context("invoke kubectl rollout status")?;
+    if !status.success() {
+        anyhow::bail!("kubectl rollout status timed out / failed");
+    }
+    Ok(())
+}
+
+pub async fn wait_for_zitadel_ready() -> Result<()> {
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(5))
+        .build()?;
+    for attempt in 1..=120 {
+        match client
+            .get(format!(
+                "http://127.0.0.1:{HTTP_PORT}/.well-known/openid-configuration"
+            ))
+            // Include the port in Host so Zitadel emits a matching issuer URL
+            // — see `mint_access_token` for the underlying mechanism.
+            .header("Host", format!("{ZITADEL_HOST}:{HTTP_PORT}"))
+            .send()
+            .await
+        {
+            Ok(r) if r.status().is_success() => return Ok(()),
+            Ok(r) if attempt % 15 == 0 => {
+                info!("Zitadel HTTP {} (attempt {attempt}/120)", r.status())
+            }
+            Err(e) if attempt % 15 == 0 => {
+                info!("Zitadel unreachable: {e} (attempt {attempt}/120)")
+            }
+            _ => {}
+        }
+        tokio::time::sleep(Duration::from_secs(2)).await;
+    }
+    anyhow::bail!("timed out waiting for Zitadel")
+}
+
+/// Persist the callout's issuer NKey seed in a K8s secret so re-runs of
+/// the example don't invalidate previously issued user JWTs in NATS.
+pub async fn ensure_issuer_seed(topology: &K8sAnywhereTopology) -> Result<String> {
+    use k8s_openapi::ByteString;
+    use k8s_openapi::api::core::v1::{Namespace, Secret};
+    use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
+    use std::collections::BTreeMap;
+
+    let k8s = topology
+        .k8s_client()
+        .await
+        .map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
+
+    // Ensure namespace exists first — secret creation requires it.
+    if k8s
+        .get_resource::<Namespace>(FLEET_NAMESPACE, None)
+        .await?
+        .is_none()
+    {
+        let ns = Namespace {
+            metadata: ObjectMeta {
+                name: Some(FLEET_NAMESPACE.to_string()),
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+        k8s.create(&ns, None).await.ok();
+    }
+
+    let secret_name = "callout-issuer-seed";
+
+    if let Some(existing) = k8s
+        .get_resource::<Secret>(secret_name, Some(FLEET_NAMESPACE))
+        .await?
+        && let Some(data) = existing.data
+        && let Some(seed_bytes) = data.get("seed")
+    {
+        let seed = String::from_utf8(seed_bytes.0.clone())?;
+        return Ok(seed.trim().to_string());
+    }
+
+    let seed = KeyPair::new_account()
+        .seed()
+        .map_err(|e| anyhow::anyhow!("nkey seed: {e}"))?;
+    let mut data = BTreeMap::new();
+    data.insert("seed".to_string(), ByteString(seed.as_bytes().to_vec()));
+    let secret = Secret {
+        metadata: ObjectMeta {
+            name: Some(secret_name.to_string()),
+            namespace: Some(FLEET_NAMESPACE.to_string()),
+            ..Default::default()
+        },
+        data: Some(data),
+        type_: Some("Opaque".to_string()),
+        ..Default::default()
+    };
+    k8s.create(&secret, Some(FLEET_NAMESPACE)).await.ok();
+    Ok(seed)
+}
+
+/// Build the callout binary, package the container image, and import it
+/// into the running k3d cluster. Mirrors `fleet/scripts/load-test.sh`'s
+/// staging-context pattern (the workspace `.dockerignore` excludes
+/// `target/`).
+pub async fn build_and_load_callout_image(k3d: &K3d) -> Result<()> {
+    let workspace_root = std::env::var("CARGO_MANIFEST_DIR")
+        .map(|d| PathBuf::from(d).join("..").join(".."))
+        .unwrap_or_else(|_| PathBuf::from("."));
+    let workspace_root = workspace_root.canonicalize().unwrap_or(workspace_root);
+
+    info!("cargo build --release -p harmony-nats-callout");
+    let status = tokio::process::Command::new("cargo")
+        .args(["build", "--release", "-p", "harmony-nats-callout"])
+        .current_dir(&workspace_root)
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("cargo build failed");
+    }
+
+    let ctx = tempfile::tempdir()?;
+    let bin_dst = ctx.path().join("target/release");
+    std::fs::create_dir_all(&bin_dst)?;
+    std::fs::copy(
+        workspace_root.join("target/release/harmony-nats-callout"),
+        bin_dst.join("harmony-nats-callout"),
+    )?;
+    std::fs::copy(
+        workspace_root.join("nats/callout/Dockerfile"),
+        ctx.path().join("Dockerfile"),
+    )?;
+
+    info!("podman build → {CALLOUT_IMAGE_TAG}");
+    let status = tokio::process::Command::new("podman")
+        .args(["build", "-q", "-t", CALLOUT_IMAGE_TAG, "."])
+        .current_dir(ctx.path())
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("podman build failed");
+    }
+
+    info!("k3d image import {CALLOUT_IMAGE_TAG}");
+    let cluster = k3d.cluster_name().unwrap_or(CLUSTER_NAME).to_string();
+    // Deterministic .tar path with a per-process suffix so concurrent
+    // test crates don't trample each other.
+    let tar_path =
+        std::env::temp_dir().join(format!("harmony-callout-image-{}.tar", std::process::id()));
+    // `podman save` (docker-archive format) refuses to overwrite an
+    // existing archive — wipe any leftover from a prior failed run.
+    let _ = std::fs::remove_file(&tar_path);
+    let status = tokio::process::Command::new("podman")
+        .args(["save", "-o", tar_path.to_str().unwrap(), CALLOUT_IMAGE_TAG])
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("podman save failed");
+    }
+    // The k3d binary lives in `~/.local/share/harmony/k3d/k3d` — it's
+    // managed by k3d-rs, not on the system PATH (the user's interactive
+    // shell typically has it as an alias, but child processes don't
+    // inherit aliases). Run it via k3d-rs's accessor.
+    let tar_path_str = tar_path.to_str().unwrap().to_string();
+    let cluster_for_blocking = cluster.clone();
+    let tar_path_clone = tar_path.clone();
+    let result = tokio::task::spawn_blocking(move || {
+        k3d_rs::K3d::new(data_dir(), Some(cluster_for_blocking.clone())).run_k3d_command([
+            "image",
+            "import",
+            tar_path_str.as_str(),
+            "-c",
+            cluster_for_blocking.as_str(),
+        ])
+    })
+    .await
+    .context("spawn_blocking k3d image import")?;
+    let _ = std::fs::remove_file(&tar_path_clone);
+    let output = result.map_err(|e| anyhow::anyhow!("k3d image import failed: {e}"))?;
+    if !output.status.success() {
+        anyhow::bail!(
+            "k3d image import returned {}: {}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
+    }
+    Ok(())
+}
+
+/// RFC 7523 JWT-bearer client for Zitadel.
+///
+/// `issuer_url` should be the externally-visible Zitadel URL
+/// (e.g. `http://sso.fleet.local:8080`) — it's used as the JWT
+/// assertion's `aud` claim. The actual HTTP transport hits
+/// `127.0.0.1:HTTP_PORT` and forwards the hostname via the `Host`
+/// header, which is how the k3d ingress routes without requiring a
+/// host-side `/etc/hosts` entry.
+///
+/// `machine_key_json` is the raw keyfile content Zitadel emits
+/// (decoded from `keyDetails`). `scopes` are appended to the standard
+/// set; pass `[format!("urn:zitadel:iam:org:project:id:{project_id}:aud")]`
+/// to make the resulting access token's `aud` include the project ID.
+pub async fn mint_access_token(
+    issuer_url: &str,
+    machine_key_json: &str,
+    scopes: &[String],
+) -> Result<String> {
+    let key: MachineKeyFile =
+        serde_json::from_str(machine_key_json).context("machine key JSON parse")?;
+
+    let now = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)?
+        .as_secs() as i64;
+
+    let claims = serde_json::json!({
+        "iss": key.user_id,
+        "sub": key.user_id,
+        "aud": issuer_url,
+        "exp": now + 60,
+        "iat": now,
+    });
+
+    let mut header = JwtHeader::new(Algorithm::RS256);
+    header.kid = Some(key.key_id.clone());
+    let assertion = jwt_encode(
+        &header,
+        &claims,
+        &EncodingKey::from_rsa_pem(key.key.as_bytes())
+            .context("parse RSA private key from machine key file")?,
+    )?;
+
+    let scope = {
+        let mut s = vec![
+            "openid".to_string(),
+            "profile".to_string(),
+            "urn:zitadel:iam:org:projects:roles".to_string(),
+        ];
+        s.extend(scopes.iter().cloned());
+        s.join(" ")
+    };
+
+    let client = reqwest::Client::builder()
+        .danger_accept_invalid_certs(true)
+        .timeout(Duration::from_secs(10))
+        .build()?;
+    // The Zitadel chart's ingress routes by Host header. Hitting
+    // 127.0.0.1:HTTP_PORT bypasses the need for an /etc/hosts entry
+    // on the host running the tests (k3d's loadbalancer maps the
+    // port; the ingress controller dispatches by Host header).
+    //
+    // The Host MUST include the port: Zitadel derives the OIDC issuer
+    // string from the request's Host header. With `Host: sso.fleet.local`
+    // it emits `iss: http://sso.fleet.local`; with `Host: sso.fleet.local:8080`
+    // it emits `iss: http://sso.fleet.local:8080`. Our JWT assertion's `aud`
+    // must match Zitadel's issuer exactly, so we always send the port.
+    let host = url::Url::parse(issuer_url)
+        .ok()
+        .and_then(|u| {
+            let h = u.host_str()?;
+            let p = u.port_or_known_default();
+            Some(match p {
+                Some(p) => format!("{h}:{p}"),
+                None => h.to_string(),
+            })
+        })
+        .unwrap_or_else(|| format!("{ZITADEL_HOST}:{HTTP_PORT}"));
+    let token_url = format!("http://127.0.0.1:{HTTP_PORT}/oauth/v2/token");
+
+    let resp = client
+        .post(&token_url)
+        .header("Host", host)
+        .form(&[
+            (
+                "grant_type",
+                "urn:ietf:params:oauth:grant-type:jwt-bearer".to_string(),
+            ),
+            ("assertion", assertion),
+            ("scope", scope),
+        ])
+        .send()
+        .await
+        .context("POST /oauth/v2/token")?;
+
+    if !resp.status().is_success() {
+        let status = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+        anyhow::bail!("token endpoint returned {status}: {body}");
+    }
+
+    #[derive(Deserialize)]
+    struct TokenResponse {
+        access_token: String,
+    }
+    let tr: TokenResponse = resp.json().await.context("parse token response")?;
+    if std::env::var("FLEET_AUTH_CALLOUT_DEBUG_TOKENS").is_ok()
+        && let Some(payload_b64) = tr.access_token.split('.').nth(1)
+    {
+        use base64::Engine;
+        let pad = "=".repeat((4 - payload_b64.len() % 4) % 4);
+        if let Ok(bytes) = base64::engine::general_purpose::URL_SAFE_NO_PAD
+            .decode(format!("{payload_b64}{pad}").trim_end_matches('='))
+            && let Ok(claims) = serde_json::from_slice::<serde_json::Value>(&bytes)
+        {
+            log::info!(
+                "[debug] access token claims: {}",
+                serde_json::to_string_pretty(&claims).unwrap_or_default()
+            );
+        }
+    }
+    Ok(tr.access_token)
+}
+
+/// Build the standard scope list for our project: standard claims + a
+/// project-id audience scope so the access token's `aud` matches what the
+/// callout's `oidc_audience` expects.
+pub fn scopes_for_project(project_id: &str) -> Vec<String> {
+    vec![format!("urn:zitadel:iam:org:project:id:{project_id}:aud")]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn render_nats_values_inlines_auth_callout_block() {
+        let yaml = render_nats_values("ABCDEF");
+        assert!(yaml.contains("issuer: ABCDEF"));
+        assert!(yaml.contains("auth_users: [ auth ]"));
+        assert!(yaml.contains("account: DEVICES"));
+        assert!(yaml.contains("system_account: SYS"));
+        assert!(yaml.contains("nodePort: 30422"));
+    }
+
+    #[test]
+    fn scopes_for_project_emits_audience_scope() {
+        let s = scopes_for_project("12345");
+        assert_eq!(s, vec!["urn:zitadel:iam:org:project:id:12345:aud"]);
+    }
+}
--- a/examples/fleet_auth_callout/src/main.rs
+++ b/examples/fleet_auth_callout/src/main.rs
@@ -0,0 +1,55 @@
+//! `cargo run -p example-fleet-auth-callout` brings the full Zitadel +
+//! NATS + auth callout stack up on a local k3d cluster, prints the URLs
+//! and credentials, and waits for Ctrl-C.
+//!
+//! Tests under `tests/` exercise the security model. They do NOT run
+//! unless explicitly requested with `cargo test -p example-fleet-auth-callout`
+//! since they bring up the same heavy stack.
+
+use anyhow::Result;
+use example_fleet_auth_callout::{
+    ADMIN_USERNAME, DEVICE_A_USERNAME, DEVICE_B_USERNAME, NO_ROLE_USERNAME, bring_up_stack,
+};
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let handles = bring_up_stack().await?;
+
+    println!("\n=========================================================");
+    println!(" Fleet Auth Callout — STACK READY");
+    println!("=========================================================");
+    println!(" k3d cluster:     {}", handles.cluster_name);
+    println!(" Zitadel:         {}", handles.zitadel_url);
+    println!(
+        "   admin login:   admin / (see Zitadel ConfigMap 'zitadel-config-yaml' for password)"
+    );
+    println!(" NATS (external): {}", handles.nats_url_external);
+    println!("   account:       DEVICES");
+    println!(" Project ID:      {}", handles.project_id);
+    println!(" Issuer pubkey:   {}", handles.issuer_pubkey);
+    println!();
+    println!(" Machine keys provisioned (admin / sensor-a / sensor-b / intruder):");
+    for (name, key_json) in [
+        (ADMIN_USERNAME, &handles.admin_machine_key),
+        (DEVICE_A_USERNAME, &handles.device_a_machine_key),
+        (DEVICE_B_USERNAME, &handles.device_b_machine_key),
+        (NO_ROLE_USERNAME, &handles.intruder_machine_key),
+    ] {
+        // Print only the keyId so the output is tidy; the full keyfile is
+        // cached at ~/.local/share/harmony/zitadel/client-config.json
+        let key_id = serde_json::from_str::<serde_json::Value>(key_json)
+            .ok()
+            .and_then(|v| {
+                v.get("keyId")
+                    .and_then(|k| k.as_str().map(|s| s.to_string()))
+            })
+            .unwrap_or_else(|| "<unknown>".to_string());
+        println!("   {name:14}  keyId={key_id}");
+    }
+    println!();
+    println!(" Stack is running. Press Ctrl-C to exit (cluster keeps running).");
+    println!("=========================================================");
+
+    tokio::signal::ctrl_c().await?;
+    Ok(())
+}
--- a/examples/fleet_auth_callout/tests/security_model.rs
+++ b/examples/fleet_auth_callout/tests/security_model.rs
@@ -0,0 +1,131 @@
+//! Real cargo tests proving the IoT fleet security model.
+//!
+//! All tests share a single bringup of the stack via [`OnceCell`]. The
+//! cluster keeps running across the suite, with each test using the
+//! cached machine keys to mint Zitadel JWTs and exercise NATS through
+//! the auth callout. Three invariants:
+//!
+//! 1. `admin_can_read_any_device_subject` — fleet-admin sees other devices' state.
+//! 2. `device_can_only_access_own_subjects` — sensor-a is denied access to sensor-b's commands.
+//! 3. `unknown_role_is_rejected` — a Zitadel-authenticated user with no
+//!    fleet role cannot connect to NATS.
+//!
+//! ## Why these tests are real-stack
+//!
+//! Mocking the OIDC issuer or NATS would only re-prove the unit tests
+//! already cover. The point of this suite is to confirm — in CI, in
+//! cargo — that the **deployed** stack on k3d enforces the security
+//! model end-to-end. Hidden cluster-level misconfiguration (an unset
+//! `auth_callout` block, a wrong issuer pubkey, a CoreDNS rewrite drift,
+//! a permissions YAML typo) only shows up here.
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use async_nats::ConnectOptions;
+use example_fleet_auth_callout::{
+    StackHandles, bring_up_stack, mint_access_token, scopes_for_project,
+};
+use futures_util::StreamExt;
+use tokio::sync::OnceCell;
+
+static STACK: OnceCell<Arc<StackHandles>> = OnceCell::const_new();
+
+async fn shared_stack() -> Result<Arc<StackHandles>> {
+    let cell = STACK
+        .get_or_try_init(|| async {
+            let handles = bring_up_stack().await?;
+            anyhow::Ok(Arc::new(handles))
+        })
+        .await?;
+    Ok(cell.clone())
+}
+
+async fn connect_with_role(stack: &StackHandles, key_json: &str) -> Result<async_nats::Client> {
+    let token = mint_access_token(
+        &stack.zitadel_url,
+        key_json,
+        &scopes_for_project(&stack.project_id),
+    )
+    .await
+    .context("mint Zitadel access token")?;
+
+    ConnectOptions::with_token(token)
+        .connection_timeout(Duration::from_secs(5))
+        .connect(&stack.nats_url_external)
+        .await
+        .map_err(|e| anyhow::anyhow!("NATS connect: {e}"))
+}
+
+#[tokio::test]
+async fn admin_can_read_any_device_subject() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+
+    let admin = connect_with_role(&stack, &stack.admin_machine_key).await?;
+    let device = connect_with_role(&stack, &stack.device_a_machine_key).await?;
+
+    let mut admin_sub = admin.subscribe("device-state.>").await?;
+    admin.flush().await?;
+
+    device
+        .publish("device-state.sensor-a", "telemetry-payload".into())
+        .await?;
+    device.flush().await?;
+
+    let msg = tokio::time::timeout(Duration::from_secs(5), admin_sub.next())
+        .await
+        .context("admin sub timeout")?
+        .context("admin sub closed")?;
+    assert_eq!(msg.payload.as_ref(), b"telemetry-payload");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn device_can_only_access_own_subjects() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+
+    let device_a = connect_with_role(&stack, &stack.device_a_machine_key).await?;
+    let device_b = connect_with_role(&stack, &stack.device_b_machine_key).await?;
+
+    let _b_sub = device_b.subscribe("device-commands.sensor-b").await?;
+    let mut a_wrong = device_a.subscribe("device-commands.sensor-b").await?;
+    device_a.flush().await?;
+    device_b.flush().await?;
+
+    // We only care that A's subscription does NOT receive B's traffic;
+    // pushing through B-side traffic would be a no-op since A's
+    // subscription was rejected by NATS at SUB time.
+    device_b
+        .publish("device-commands.sensor-b", "should-not-leak".into())
+        .await?;
+    device_b.flush().await?;
+
+    let result = tokio::time::timeout(Duration::from_millis(750), a_wrong.next()).await;
+    assert!(
+        result.is_err(),
+        "device A must not observe device B's commands"
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn unknown_role_is_rejected() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+
+    // The intruder has a valid Zitadel JWT but no fleet-admin/device role
+    // grant. The callout must reject the connection — NATS surfaces that
+    // as `authorization violation` at connect time.
+    let result = connect_with_role(&stack, &stack.intruder_machine_key).await;
+    assert!(
+        result.is_err(),
+        "JWT without fleet role must not be admitted to NATS"
+    );
+
+    Ok(())
+}
--- a/examples/fleet_e2e_demo/Cargo.toml
+++ b/examples/fleet_e2e_demo/Cargo.toml
@@ -0,0 +1,47 @@
+[package]
+name = "example-fleet-e2e-demo"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "VM-based end-to-end rehearsal: k3d + Zitadel + NATS auth callout + libvirt VM agents + operator → CR → podman → status"
+
+[lib]
+name = "example_fleet_e2e_demo"
+path = "src/lib.rs"
+
+[[bin]]
+name = "fleet-e2e-demo"
+path = "src/main.rs"
+
+[[test]]
+name = "e2e_walking_skeleton"
+path = "tests/e2e_walking_skeleton.rs"
+
+[dependencies]
+harmony = { path = "../../harmony", features = ["kvm"] }
+harmony-k8s = { path = "../../harmony-k8s" }
+harmony_types = { path = "../../harmony_types" }
+example-fleet-auth-callout = { path = "../fleet_auth_callout" }
+harmony-nats-callout = { path = "../../nats/callout" }
+harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
+harmony-fleet-operator = { path = "../../fleet/harmony-fleet-operator" }
+k3d-rs = { path = "../../k3d" }
+async-nats.workspace = true
+nkeys = "0.4"
+tokio = { workspace = true, features = ["full"] }
+tokio-test.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+anyhow.workspace = true
+log.workspace = true
+env_logger.workspace = true
+tracing.workspace = true
+tracing-subscriber.workspace = true
+futures-util.workspace = true
+k8s-openapi.workspace = true
+kube.workspace = true
+clap = { version = "4", features = ["derive", "env"] }
+directories = "6.0.0"
+tempfile = "3"
+url.workspace = true
--- a/examples/fleet_e2e_demo/src/lib.rs
+++ b/examples/fleet_e2e_demo/src/lib.rs
@@ -0,0 +1,815 @@
+//! VM-based end-to-end rehearsal of the customer demo flow.
+//!
+//! Goal: prove the JWT-auth chain works on a real-system agent
+//! before pointing the demo at OKD. See
+//! `ROADMAP/fleet_platform/v0_demo_e2e.md` for the full plan.
+//!
+//! Bring-up sequence:
+//! 1. k3d cluster with HTTP + NATS port mappings (re-uses
+//!    fleet_auth_callout's k3d helpers — same cluster name so
+//!    re-runs of either example reuse the same cluster).
+//! 2. Zitadel + Postgres via ZitadelScore.
+//! 3. Wait for Zitadel HTTP and the chart-provisioned `iam-admin-pat`
+//!    secret (the chart's setup job is async).
+//! 4. ZitadelSetupScore for the project + API app + roles + admin
+//!    machine user (no per-device users yet).
+//! 5. NATS with auth_callout block + the callout pod.
+//! 6. For each device i:
+//!    - ZitadelSetupScore minting a per-device machine user with
+//!      the `device` role grant. The JSON keyfile is cached in
+//!      `ZitadelClientConfig` and read back here for the agent.
+//!    - libvirt VM via `ProvisionVmScore`.
+//!    - SSH-inject `/etc/hosts` so the VM resolves
+//!      `sso.fleet.local` to the libvirt host.
+//!    - `FleetDeviceSetupScore` with `FleetDeviceAuth::ZitadelJwt`
+//!      pointing at the dropped keyfile.
+//!
+//! Tests in `tests/e2e_walking_skeleton.rs` share a single bring-up
+//! via `OnceCell` and exercise: heartbeats, label-selector targeting,
+//! status reflect-back, env+volume propagation, admin cross-device
+//! read, per-device isolation, NATS-pod-restart reconnect.
+
+use std::path::PathBuf;
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use example_fleet_auth_callout::{
+    ADMIN_ROLE_KEY, API_APP_NAME, CALLOUT_DEPLOYMENT_NAME, CALLOUT_IMAGE_TAG, DEVICE_ROLE_KEY,
+    FLEET_NAMESPACE, HTTP_PORT, NATS_ACCOUNT, NATS_AUTH_PASS, NATS_AUTH_USER, NATS_NAMESPACE,
+    NATS_NODE_PORT, NATS_RELEASE, PROJECT_NAME, ZITADEL_HOST, build_and_load_callout_image,
+    create_k3d, create_topology, deploy_zitadel, ensure_issuer_seed, render_nats_values,
+    wait_for_callout_ready, wait_for_zitadel_ready,
+};
+use harmony::inventory::Inventory;
+use harmony::modules::fleet::{
+    FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore, HostsEntry,
+    ensure_fleet_ssh_keypair,
+};
+use harmony::modules::k8s::coredns::{CoreDNSRewrite, CoreDNSRewriteScore};
+use harmony::modules::linux::{LinuxHostTopology, SshCredentials, ensure_ansible_venv};
+use harmony::modules::nats::NatsHelmChartScore;
+use harmony::modules::nats_auth_callout::NatsAuthCalloutScore;
+use harmony::modules::zitadel::{
+    MachineKeyType, ZitadelApiApp, ZitadelClientConfig, ZitadelMachineUser, ZitadelRole,
+    ZitadelSetupScore,
+};
+use harmony::score::Score;
+use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
+use harmony_types::id::Id;
+use log::{info, warn};
+use nkeys::KeyPair;
+
+// ---- constants -------------------------------------------------------------
+
+/// Libvirt's default NAT gateway. The host's IP from inside any VM
+/// attached to the `default` libvirt network. We bake this in because
+/// every smoke-a* harness assumes it; if a customer runs their own
+/// libvirt with a different bridge they can override via env.
+pub const DEFAULT_LIBVIRT_HOST_IP: &str = "192.168.122.1";
+
+pub const ADMIN_USERNAME: &str = "fleet-ops";
+/// Separate machine user for the in-cluster operator. Distinct from
+/// `fleet-ops` (manual admin tooling) so the audit trail can tell
+/// operator-driven actions apart from human operator actions. Same
+/// `fleet-admin` role grant — only the identity differs.
+pub const OPERATOR_USERNAME: &str = "fleet-operator";
+pub const OPERATOR_IMAGE_TAG: &str = "localhost/harmony-fleet-operator:dev";
+
+/// Per-device username convention: `device-${device_id}`. Matches what
+/// `fleet_rpi_setup` produces, so callout's `device_id_claim =
+/// "client_id"` extracts the device id verbatim from the `client_id`
+/// claim Zitadel emits in machine-user access tokens.
+pub fn device_username(device_id: &str) -> String {
+    format!("device-{device_id}")
+}
+
+// ---- options + handles -----------------------------------------------------
+
+#[derive(Debug, Clone)]
+pub struct E2eDemoOpts {
+    /// Number of VM-as-device agents to provision.
+    pub num_devices: usize,
+    /// Path to the cross-compiled `fleet-agent` binary uploaded to
+    /// each VM. Defaults to `target/release/fleet-agent` (the same
+    /// path that smoke-a4 produces).
+    pub agent_binary: PathBuf,
+    /// Override for the libvirt host IP (the address VMs see as the
+    /// gateway). Defaults to [`DEFAULT_LIBVIRT_HOST_IP`].
+    pub libvirt_host_ip: String,
+}
+
+impl Default for E2eDemoOpts {
+    fn default() -> Self {
+        Self {
+            num_devices: 2,
+            agent_binary: workspace_target_path("release/harmony-fleet-agent"),
+            libvirt_host_ip: DEFAULT_LIBVIRT_HOST_IP.to_string(),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct DeviceHandle {
+    pub index: usize,
+    pub device_id: String,
+    pub vm_ip: String,
+    pub labels: std::collections::BTreeMap<String, String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct E2eHandles {
+    pub cluster_name: String,
+    pub nats_url_external: String,
+    pub zitadel_url: String,
+    pub project_id: String,
+    pub issuer_pubkey: String,
+    pub admin_machine_key: String,
+    pub devices: Vec<DeviceHandle>,
+}
+
+// ---- bring up --------------------------------------------------------------
+
+pub async fn bring_up_full_stack(opts: E2eDemoOpts) -> Result<E2eHandles> {
+    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init();
+
+    info!("[e2e-demo 1/9] ensuring k3d cluster");
+    let k3d = create_k3d();
+    k3d.ensure_installed()
+        .await
+        .map_err(|e| anyhow::anyhow!("k3d ensure: {e}"))?;
+    let topology = create_topology(&k3d);
+    topology.ensure_ready().await.context("topology init")?;
+
+    info!("[e2e-demo 2/9] deploying Zitadel (cold start: ~5 min)");
+    deploy_zitadel(&topology).await?;
+
+    info!("[e2e-demo 3/9] CoreDNS rewrite + waiting for Zitadel HTTP + iam-admin-pat secret");
+    CoreDNSRewriteScore {
+        rewrites: vec![CoreDNSRewrite {
+            hostname: ZITADEL_HOST.to_string(),
+            target: "zitadel.zitadel.svc.cluster.local".to_string(),
+        }],
+    }
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("CoreDNSRewriteScore")?;
+    wait_for_zitadel_ready().await?;
+    wait_for_iam_admin_pat_secret(&topology).await?;
+
+    info!("[e2e-demo 4/9] provisioning project, API app, roles, admin machine user");
+    let admin_setup = ZitadelSetupScore {
+        host: ZITADEL_HOST.to_string(),
+        port: HTTP_PORT as u16,
+        skip_tls: true,
+        applications: vec![],
+        api_apps: vec![ZitadelApiApp {
+            project_name: PROJECT_NAME.to_string(),
+            app_name: API_APP_NAME.to_string(),
+        }],
+        roles: vec![
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: ADMIN_ROLE_KEY.to_string(),
+                display_name: "Fleet Admin".to_string(),
+                group: None,
+            },
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: DEVICE_ROLE_KEY.to_string(),
+                display_name: "Device".to_string(),
+                group: None,
+            },
+        ],
+        machine_users: vec![
+            ZitadelMachineUser {
+                username: ADMIN_USERNAME.to_string(),
+                name: "Fleet Operations".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![ADMIN_ROLE_KEY.to_string()],
+            },
+            // Separate machine user for the in-cluster operator pod.
+            // Same `fleet-admin` role grant as the manual admin
+            // identity, but distinct username so JWT `client_id` lets
+            // log analysis tell operator-driven actions apart from
+            // human operator actions.
+            ZitadelMachineUser {
+                username: OPERATOR_USERNAME.to_string(),
+                name: "Fleet Operator (in-cluster)".to_string(),
+                create_pat: false,
+                machine_key: Some(MachineKeyType::Json),
+                project_name: Some(PROJECT_NAME.to_string()),
+                grant_roles: vec![ADMIN_ROLE_KEY.to_string()],
+            },
+        ],
+    };
+    admin_setup
+        .interpret(&Inventory::autoload(), &topology)
+        .await
+        .context("admin ZitadelSetupScore")?;
+
+    let zcfg = ZitadelClientConfig::load()
+        .context("ZitadelSetupScore did not produce a client config cache")?;
+    let project_id = zcfg
+        .project_id_by_name(PROJECT_NAME)
+        .or(zcfg.project_id.as_ref())
+        .context("project_id missing from cache")?
+        .clone();
+    let admin_machine_key = zcfg
+        .machine_key(ADMIN_USERNAME)
+        .context("admin machine key missing from cache")?
+        .clone();
+
+    info!("[e2e-demo 5/9] generating issuer NKey, deploying NATS with auth_callout");
+    let issuer_seed = ensure_issuer_seed(&topology).await?;
+    let issuer_kp = KeyPair::from_seed(&issuer_seed)
+        .map_err(|e| anyhow::anyhow!("invalid persisted issuer seed: {e}"))?;
+    let issuer_pubkey = issuer_kp.public_key();
+
+    NatsHelmChartScore::new(
+        NATS_RELEASE.to_string(),
+        NATS_NAMESPACE.to_string(),
+        render_nats_values(&issuer_pubkey),
+    )
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("NATS deploy")?;
+
+    info!("[e2e-demo 6/9] building + sideloading callout image into k3d");
+    build_and_load_callout_image(&k3d).await?;
+
+    info!("[e2e-demo 7/9] deploying NatsAuthCalloutScore");
+    let mut callout = NatsAuthCalloutScore::new(
+        CALLOUT_DEPLOYMENT_NAME,
+        FLEET_NAMESPACE,
+        format!("nats://{NATS_RELEASE}.{NATS_NAMESPACE}.svc.cluster.local:4222"),
+        format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+        project_id.clone(),
+        NATS_AUTH_USER,
+        NATS_AUTH_PASS,
+        issuer_seed.clone(),
+    )
+    .image(CALLOUT_IMAGE_TAG)
+    .target_account(NATS_ACCOUNT)
+    .admin_role(ADMIN_ROLE_KEY)
+    .device_role(DEVICE_ROLE_KEY)
+    .danger_accept_invalid_certs(true);
+    // Same convention as fleet_auth_callout: the username is in the
+    // access token's `client_id` claim. The role claim path is
+    // project-scoped because the JWT-bearer flow requests project
+    // audience scope.
+    callout.device_id_claim = "client_id".to_string();
+    // Zitadel's `client_id` for a machine user equals its userName, so a
+    // user created as `device-vm-device-00` (the convention shared with
+    // fleet_rpi_setup and fleet_auth_callout) lands in the JWT verbatim.
+    // Strip the `device-` prefix so the callout interpolates permissions
+    // against the bare device id (`vm-device-00`) the agent uses for KV
+    // keys + direct subjects.
+    callout.device_id_prefix_strip = "device-".to_string();
+    callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles");
+    callout
+        .interpret(&Inventory::autoload(), &topology)
+        .await
+        .context("callout deploy")?;
+    wait_for_callout_ready(&topology).await?;
+
+    info!("[e2e-demo 8/10] building + sideloading operator image into k3d");
+    build_and_load_operator_image(&k3d).await?;
+
+    info!("[e2e-demo 9/10] deploying fleet operator with Zitadel JWT auth");
+    let operator_machine_key = zcfg
+        .machine_key(OPERATOR_USERNAME)
+        .with_context(|| format!("machine key for {OPERATOR_USERNAME} missing from cache"))?
+        .clone();
+    deploy_operator(&topology, &project_id, &operator_machine_key).await?;
+    wait_for_operator_ready(&topology).await?;
+
+    info!(
+        "[e2e-demo 10/10] provisioning {} VM(s) and onboarding agent(s)",
+        opts.num_devices
+    );
+    let mut devices = Vec::with_capacity(opts.num_devices);
+    for i in 0..opts.num_devices {
+        let handle = provision_device(i, &opts, &topology, &project_id).await?;
+        devices.push(handle);
+    }
+
+    info!(
+        "full stack ready: {} device(s), operator + admin role configured",
+        devices.len()
+    );
+
+    Ok(E2eHandles {
+        cluster_name: example_fleet_auth_callout::CLUSTER_NAME.to_string(),
+        nats_url_external: format!("nats://127.0.0.1:{NATS_NODE_PORT}"),
+        zitadel_url: format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+        project_id,
+        issuer_pubkey,
+        admin_machine_key,
+        devices,
+    })
+}
+
+// ---- per-device provisioning ----------------------------------------------
+
+async fn provision_device(
+    index: usize,
+    opts: &E2eDemoOpts,
+    topology: &K8sAnywhereTopology,
+    project_id: &str,
+) -> Result<DeviceHandle> {
+    let device_id = format!("vm-device-{index:02}");
+    let username = device_username(&device_id);
+    info!("[device {index}] minting Zitadel machine user {username}");
+
+    // Per-device ZitadelSetupScore (search-then-create — running this
+    // for an existing user is a NOOP that just refreshes the cache
+    // entry pointing at the persisted machine key). The keyfile is
+    // re-minted because Zitadel doesn't expose the private half of
+    // an existing key — accept that any prior key drifts to "stale
+    // until expiry" on the previous device installation.
+    let device_setup = ZitadelSetupScore {
+        host: ZITADEL_HOST.to_string(),
+        port: HTTP_PORT as u16,
+        skip_tls: true,
+        applications: vec![],
+        api_apps: vec![],
+        roles: vec![],
+        machine_users: vec![ZitadelMachineUser {
+            username: username.clone(),
+            name: format!("Fleet Device {device_id}"),
+            create_pat: false,
+            machine_key: Some(MachineKeyType::Json),
+            project_name: Some(PROJECT_NAME.to_string()),
+            grant_roles: vec![DEVICE_ROLE_KEY.to_string()],
+        }],
+    };
+    device_setup
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .with_context(|| format!("ZitadelSetupScore for {username}"))?;
+
+    let zcfg = ZitadelClientConfig::load()
+        .context("ZitadelClientConfig disappeared between admin and device setup")?;
+    let machine_key_json = zcfg
+        .machine_key(&username)
+        .with_context(|| format!("machine key for {username} missing from cache"))?
+        .clone();
+
+    // -- VM provisioning would go here. Deferred to keep the harness
+    //    cold-start observable in pieces — the kvm bits (ProvisionVmScore)
+    //    require root + libvirtd + the cloud image. Today the harness
+    //    expects the operator to have provisioned VMs out-of-band (e.g.
+    //    via fleet_vm_setup, or a pre-existing libvirt domain). We read
+    //    the IP from a convention path (see `discover_vm_ip`) so the
+    //    test driver can iterate on the agent path without re-paying VM
+    //    boot every test cycle.
+    //
+    //    Follow-up: fold ProvisionVmScore::ensure_vm here once the
+    //    bring-up has been demonstrated end-to-end at least once.
+    let vm_ip = discover_vm_ip(index)
+        .with_context(|| format!("could not resolve IP for device {index}"))?;
+
+    info!("[device {index}] {device_id} at {vm_ip} — installing agent with Zitadel JWT auth");
+    let labels = build_device_labels(&device_id, index);
+    let agent_score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig {
+        device_id: Id::from(device_id.clone()),
+        labels: labels.clone(),
+        // Agent connects to NATS at the libvirt host's IP via the
+        // NodePort. The libvirt default network NATs the VM through
+        // the host so the host's port mapping is reachable.
+        nats_urls: vec![format!("nats://{}:{NATS_NODE_PORT}", opts.libvirt_host_ip)],
+        auth: FleetDeviceAuth::ZitadelJwt {
+            machine_key_json,
+            // Issuer URL the agent uses MUST match the issuer
+            // string Zitadel returns — Zitadel derives that from
+            // the request's Host header. We hit Zitadel via the
+            // host's port mapping, so the agent's URL is
+            // `http://sso.fleet.local:<host-port>`. The /etc/hosts
+            // entry below points sso.fleet.local at the libvirt
+            // host so the VM resolves it.
+            oidc_issuer_url: format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
+            audience: project_id.to_string(),
+            // Local rehearsal hits Zitadel over plain HTTP through
+            // the cluster ingress; no TLS validation needed.
+            danger_accept_invalid_certs: true,
+        },
+        agent_binary_path: opts.agent_binary.clone(),
+        hosts_entries: vec![HostsEntry {
+            ip: opts.libvirt_host_ip.clone(),
+            hostname: ZITADEL_HOST.to_string(),
+        }],
+    });
+
+    // Apply the score over SSH against the VM. Same pattern as
+    // fleet_rpi_setup, but synthesized inline so the harness can drive
+    // multiple VMs in sequence without copying the CLI plumbing.
+    apply_fleet_setup_to_vm(index, &vm_ip, agent_score).await?;
+
+    Ok(DeviceHandle {
+        index,
+        device_id,
+        vm_ip,
+        labels,
+    })
+}
+
+async fn apply_fleet_setup_to_vm(
+    index: usize,
+    vm_ip: &str,
+    score: FleetDeviceSetupScore,
+) -> Result<()> {
+    ensure_ansible_venv()
+        .await
+        .map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?;
+    let ssh = ensure_fleet_ssh_keypair()
+        .await
+        .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
+    let ip = vm_ip
+        .parse()
+        .with_context(|| format!("VM IP '{vm_ip}' is not a valid IP address"))?;
+    let creds = SshCredentials {
+        // Matches the cloud-init admin user that fleet_vm_setup +
+        // smoke-a4 create. If the operator overrode that during
+        // out-of-band VM provisioning, follow-up: thread the
+        // username through E2eDemoOpts.
+        user: "fleet-admin".to_string(),
+        private_key_path: ssh.private_key.clone(),
+        remote_python: Some("/usr/bin/python3".to_string()),
+        sudo_password: None,
+    };
+    let topology = LinuxHostTopology::new(format!("vm-device-{index:02}"), ip, creds);
+    use harmony::score::Score;
+    score
+        .create_interpret()
+        .execute(&Inventory::empty(), &topology)
+        .await
+        .with_context(|| format!("FleetDeviceSetupScore against VM {index} ({vm_ip})"))?;
+    Ok(())
+}
+
+fn build_device_labels(
+    device_id: &str,
+    index: usize,
+) -> std::collections::BTreeMap<String, String> {
+    // Two devices, two distinct group labels by default — lets
+    // selector tests target "exactly one device". Label scheme
+    // matches the demo runbook.
+    let mut labels = std::collections::BTreeMap::new();
+    labels.insert(
+        "group".to_string(),
+        if index == 0 {
+            "group-a".to_string()
+        } else {
+            "group-b".to_string()
+        },
+    );
+    labels.insert("arch".to_string(), std::env::consts::ARCH.to_string());
+    labels.insert("role".to_string(), "rehearsal".to_string());
+    labels.insert("device-id".to_string(), device_id.to_string());
+    labels
+}
+
+fn discover_vm_ip(index: usize) -> Result<String> {
+    // Convention: a `FLEET_E2E_VM_<i>_IP` env var points at the
+    // pre-provisioned VM's IP. This keeps the harness usable on a
+    // workstation where the operator runs `fleet_vm_setup` once per
+    // device out-of-band, then re-runs the e2e harness against the
+    // already-booted VMs.
+    let key = format!("FLEET_E2E_VM_{index}_IP");
+    std::env::var(&key)
+        .with_context(|| format!("set {key} to the libvirt VM's IP (default network)"))
+}
+
+// ---- iam-admin-pat readiness ----------------------------------------------
+
+/// Wait for the Zitadel chart's setup job to write the `iam-admin-pat`
+/// secret. The Helm release reports Ready before the job completes,
+/// so calling ZitadelSetupScore immediately after Zitadel deploy
+/// races. ZitadelSetupScore itself reads this secret to authenticate
+/// to the management API.
+async fn wait_for_iam_admin_pat_secret(topology: &K8sAnywhereTopology) -> Result<()> {
+    use k8s_openapi::api::core::v1::Secret;
+    let k8s = topology
+        .k8s_client()
+        .await
+        .map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
+    for attempt in 1..=120 {
+        if let Some(secret) = k8s
+            .get_resource::<Secret>("iam-admin-pat", Some("zitadel"))
+            .await?
+            && let Some(data) = secret.data
+            && data.contains_key("pat")
+        {
+            return Ok(());
+        }
+        if attempt % 10 == 0 {
+            warn!("iam-admin-pat secret not yet present in zitadel ns ({attempt}/120)");
+        }
+        tokio::time::sleep(Duration::from_secs(1)).await;
+    }
+    anyhow::bail!(
+        "timed out waiting for iam-admin-pat secret in 'zitadel' namespace — \
+         is FirstInstance.Org.Machine.Pat configured in ZitadelScore Helm values?"
+    )
+}
+
+// ---- operator deploy -------------------------------------------------------
+
+const OPERATOR_NAMESPACE: &str = FLEET_NAMESPACE;
+const OPERATOR_KEY_MOUNT_PATH: &str = "/etc/fleet-operator/zitadel-key.json";
+
+/// k3d's data directory under `$XDG_DATA_HOME`. Mirrors
+/// `example_fleet_auth_callout::data_dir` (the latter is private —
+/// duplicated here rather than re-exported so the operator wiring is
+/// self-contained).
+fn k3d_data_dir() -> PathBuf {
+    directories::BaseDirs::new()
+        .map(|dirs| dirs.data_dir().join("harmony").join("k3d"))
+        .unwrap_or_else(|| PathBuf::from("/tmp/harmony"))
+}
+
+/// Build the operator's release binary, package it into an OCI image,
+/// and sideload into the k3d cluster. Mirrors
+/// `build_and_load_callout_image`. The Dockerfile lives in the
+/// operator crate.
+async fn build_and_load_operator_image(k3d: &k3d_rs::K3d) -> Result<()> {
+    use std::process::Stdio;
+
+    let workspace_root = std::env::var("CARGO_MANIFEST_DIR")
+        .map(|d| PathBuf::from(d).join("..").join(".."))
+        .unwrap_or_else(|_| PathBuf::from("."));
+    let workspace_root = workspace_root.canonicalize().unwrap_or(workspace_root);
+
+    info!("cargo build --release -p harmony-fleet-operator");
+    let status = tokio::process::Command::new("cargo")
+        .args(["build", "--release", "-p", "harmony-fleet-operator"])
+        .current_dir(&workspace_root)
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("cargo build for fleet operator failed");
+    }
+
+    // Stage the binary + Dockerfile into a clean temp dir so podman
+    // build doesn't drag the whole target/ tree across.
+    let ctx = tempfile::tempdir()?;
+    let bin_dst = ctx.path().join("target/release");
+    std::fs::create_dir_all(&bin_dst)?;
+    std::fs::copy(
+        workspace_root.join("target/release/harmony-fleet-operator"),
+        bin_dst.join("harmony-fleet-operator"),
+    )
+    .context("staging operator binary into build context")?;
+    let dockerfile_src = workspace_root.join("fleet/harmony-fleet-operator/Dockerfile");
+    if !dockerfile_src.exists() {
+        anyhow::bail!(
+            "missing fleet/harmony-fleet-operator/Dockerfile — operator image staging \
+             expects it next to Cargo.toml; either add it or update the bring-up."
+        );
+    }
+    std::fs::copy(&dockerfile_src, ctx.path().join("Dockerfile"))?;
+
+    info!("podman build → {OPERATOR_IMAGE_TAG}");
+    let status = tokio::process::Command::new("podman")
+        .args(["build", "-q", "-t", OPERATOR_IMAGE_TAG, "."])
+        .current_dir(ctx.path())
+        .stderr(Stdio::inherit())
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("podman build for operator failed");
+    }
+
+    let tar_path =
+        std::env::temp_dir().join(format!("harmony-operator-image-{}.tar", std::process::id()));
+    let _ = std::fs::remove_file(&tar_path);
+    let status = tokio::process::Command::new("podman")
+        .args(["save", "-o", tar_path.to_str().unwrap(), OPERATOR_IMAGE_TAG])
+        .status()
+        .await?;
+    if !status.success() {
+        anyhow::bail!("podman save for operator failed");
+    }
+    info!("k3d image import {OPERATOR_IMAGE_TAG}");
+    let cluster_name = k3d
+        .cluster_name()
+        .unwrap_or(example_fleet_auth_callout::CLUSTER_NAME)
+        .to_string();
+    let tar_path_str = tar_path.to_str().unwrap().to_string();
+    let cluster_for_blocking = cluster_name.clone();
+    let data_dir = k3d_data_dir();
+    tokio::task::spawn_blocking(move || {
+        k3d_rs::K3d::new(data_dir, Some(cluster_for_blocking.clone())).run_k3d_command([
+            "image",
+            "import",
+            tar_path_str.as_str(),
+            "-c",
+            cluster_for_blocking.as_str(),
+        ])
+    })
+    .await?
+    .map_err(|e| anyhow::anyhow!("k3d image import failed: {e}"))?;
+    let _ = std::fs::remove_file(&tar_path);
+    Ok(())
+}
+
+/// Apply the operator's CRDs + ServiceAccount + ClusterRole +
+/// ClusterRoleBinding + Secret + Deployment via Harmony's
+/// K8sResourceScore. The Secret carries both the `[credentials]` TOML
+/// (consumed by the operator as `FLEET_OPERATOR_CREDENTIALS_TOML`) and
+/// the Zitadel JSON keyfile that the TOML's `key_path` references.
+async fn deploy_operator(
+    topology: &K8sAnywhereTopology,
+    project_id: &str,
+    operator_machine_key: &str,
+) -> Result<()> {
+    use harmony::modules::k8s::resource::K8sResourceScore;
+    use harmony_fleet_operator::chart::{
+        ChartOptions, OperatorCredentials, RELEASE_NAME, build_cluster_role,
+        build_cluster_role_binding, build_operator_deployment, build_service_account,
+        operator_secret,
+    };
+    use harmony_fleet_operator::crd::{Deployment as FleetDeployment, Device};
+    use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition;
+    use kube::CustomResourceExt;
+
+    // Render the [credentials] TOML the operator pod will consume via
+    // env var. Same shape as the agent's [credentials] block —
+    // `harmony_fleet_auth::CredentialsSection` parses both verbatim.
+    let credentials_toml = format!(
+        r#"type = "zitadel-jwt"
+key_path = "{key_path}"
+oidc_issuer_url = "http://{host}:{port}"
+audience = "{project_id}"
+danger_accept_invalid_certs = true
+"#,
+        key_path = OPERATOR_KEY_MOUNT_PATH,
+        host = ZITADEL_HOST,
+        port = HTTP_PORT,
+    );
+
+    let opts = ChartOptions {
+        output_dir: PathBuf::new(), // unused on this code path
+        image: OPERATOR_IMAGE_TAG.to_string(),
+        image_pull_policy: "IfNotPresent".to_string(),
+        namespace: OPERATOR_NAMESPACE.to_string(),
+        nats_url: format!("nats://{NATS_RELEASE}.{NATS_NAMESPACE}.svc.cluster.local:4222"),
+        log_level: "info,kube_runtime=warn".to_string(),
+        credentials: Some(OperatorCredentials {
+            credentials_toml,
+            zitadel_keyfile_json: operator_machine_key.to_string(),
+            key_mount_path: OPERATOR_KEY_MOUNT_PATH.to_string(),
+        }),
+    };
+
+    // CRDs first — the operator watches them on startup.
+    let crds: Vec<CustomResourceDefinition> = vec![FleetDeployment::crd(), Device::crd()];
+    K8sResourceScore::<CustomResourceDefinition> {
+        resource: crds,
+        namespace: None,
+    }
+    .interpret(&Inventory::autoload(), topology)
+    .await
+    .context("operator CRD apply")?;
+
+    // RBAC.
+    K8sResourceScore::single(
+        build_service_account(&opts),
+        Some(OPERATOR_NAMESPACE.to_string()),
+    )
+    .interpret(&Inventory::autoload(), topology)
+    .await
+    .context("operator ServiceAccount apply")?;
+
+    K8sResourceScore::single(build_cluster_role(), None)
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("operator ClusterRole apply")?;
+
+    K8sResourceScore::single(build_cluster_role_binding(&opts), None)
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("operator ClusterRoleBinding apply")?;
+
+    // Secret holding both the credentials TOML and the keyfile.
+    let secret = operator_secret(&opts).expect("credentials present in opts");
+    K8sResourceScore::single(secret, Some(OPERATOR_NAMESPACE.to_string()))
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("operator Secret apply")?;
+
+    // Deployment last so it pulls the up-to-date Secret.
+    K8sResourceScore::single(
+        build_operator_deployment(&opts),
+        Some(OPERATOR_NAMESPACE.to_string()),
+    )
+    .interpret(&Inventory::autoload(), topology)
+    .await
+    .context("operator Deployment apply")?;
+
+    info!("operator deployment {OPERATOR_NAMESPACE}/{RELEASE_NAME} applied");
+    Ok(())
+}
+
+async fn wait_for_operator_ready(topology: &K8sAnywhereTopology) -> Result<()> {
+    use harmony_fleet_operator::chart::RELEASE_NAME;
+    use k8s_openapi::api::apps::v1::Deployment as K8sDeployment;
+    let k8s = topology
+        .k8s_client()
+        .await
+        .map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
+    for attempt in 1..=120 {
+        if let Some(d) = k8s
+            .get_resource::<K8sDeployment>(RELEASE_NAME, Some(OPERATOR_NAMESPACE))
+            .await?
+            && let Some(status) = d.status
+            && status.ready_replicas.unwrap_or(0) >= 1
+        {
+            return Ok(());
+        }
+        if attempt % 10 == 0 {
+            warn!("operator Deployment not yet Ready ({attempt}/120)");
+        }
+        tokio::time::sleep(Duration::from_secs(1)).await;
+    }
+    anyhow::bail!("timed out waiting for operator Deployment to become Ready")
+}
+
+// ---- helpers ---------------------------------------------------------------
+
+fn workspace_target_path(rel: &str) -> PathBuf {
+    let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
+        .map(PathBuf::from)
+        .unwrap_or_else(|_| PathBuf::from("."));
+    manifest_dir.join("..").join("..").join("target").join(rel)
+}
+
+// ---- next-steps panel ------------------------------------------------------
+
+impl E2eHandles {
+    pub fn print_next_steps(&self) {
+        println!();
+        println!("============================================================");
+        println!(" E2E DEMO REHEARSAL — STACK READY");
+        println!("============================================================");
+        println!(" k3d cluster:    {}", self.cluster_name);
+        println!(" Zitadel:        {}", self.zitadel_url);
+        println!(" NATS (host):    {}", self.nats_url_external);
+        println!(" Project ID:     {}", self.project_id);
+        println!(" Issuer pubkey:  {}", self.issuer_pubkey);
+        println!();
+        println!(" Devices ({}):", self.devices.len());
+        for d in &self.devices {
+            let labels: Vec<String> = d.labels.iter().map(|(k, v)| format!("{k}={v}")).collect();
+            println!(
+                "   [{}] {} @ {} ({})",
+                d.index,
+                d.device_id,
+                d.vm_ip,
+                labels.join(",")
+            );
+        }
+        println!();
+        println!(" Run the test suite:");
+        println!();
+        println!("   cargo test -p example-fleet-e2e-demo \\");
+        println!("     --test e2e_walking_skeleton -- --test-threads=1 --nocapture");
+        println!();
+        println!(" Ctrl-C exits without tearing the cluster down — re-run");
+        println!(" the bring-up to converge any drift.");
+        println!("============================================================");
+    }
+}
+
+#[cfg(test)]
+mod unit_tests {
+    use super::*;
+
+    #[test]
+    fn device_username_matches_callout_convention() {
+        // Callout's device_id_claim is `client_id`, which Zitadel
+        // populates from the machine user's username. The test we
+        // run later asserts the agent's per-device subjects match
+        // its device_id, which therefore must equal the username
+        // minus the "device-" prefix the callout knows about.
+        assert_eq!(device_username("vm-device-00"), "device-vm-device-00");
+    }
+
+    #[test]
+    fn device_labels_split_into_distinct_groups() {
+        let l0 = build_device_labels("vm-device-00", 0);
+        let l1 = build_device_labels("vm-device-01", 1);
+        assert_eq!(l0.get("group").unwrap(), "group-a");
+        assert_eq!(l1.get("group").unwrap(), "group-b");
+        assert_ne!(l0.get("group"), l1.get("group"));
+        // Ubiquitous labels: device-id + arch + role on both.
+        for l in [&l0, &l1] {
+            assert!(l.contains_key("device-id"));
+            assert!(l.contains_key("arch"));
+            assert_eq!(l.get("role").unwrap(), "rehearsal");
+        }
+    }
+}
--- a/examples/fleet_e2e_demo/src/main.rs
+++ b/examples/fleet_e2e_demo/src/main.rs
@@ -0,0 +1,51 @@
+//! `cargo run -p example-fleet-e2e-demo -- --num-devices 2 ...`
+//!
+//! Brings up the full E2E rehearsal stack: k3d + Zitadel + NATS auth
+//! callout + per-device Zitadel machine users + (out-of-band)
+//! libvirt VMs + agents authenticating via JWT-bearer.
+//!
+//! See `src/lib.rs` and `ROADMAP/fleet_platform/v0_demo_e2e.md`.
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use example_fleet_e2e_demo::{DEFAULT_LIBVIRT_HOST_IP, E2eDemoOpts, bring_up_full_stack};
+use std::path::PathBuf;
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet-e2e-demo",
+    about = "VM-based end-to-end rehearsal of the fleet platform demo flow"
+)]
+struct Cli {
+    /// Number of VM-as-device agents to bring up. Each one needs its
+    /// own libvirt domain (provisioned out-of-band today via
+    /// `fleet_vm_setup` — see `FLEET_E2E_VM_<i>_IP` env vars below).
+    #[arg(long, default_value_t = 2)]
+    num_devices: usize,
+    /// Path to the cross-compiled `fleet-agent` binary uploaded to
+    /// each VM. Same binary that smoke-a4 produces.
+    #[arg(long, default_value = "target/release/harmony-fleet-agent")]
+    agent_binary: PathBuf,
+    /// Override for the libvirt host IP (the address VMs see as the
+    /// gateway). Defaults to the libvirt default network's gateway.
+    #[arg(long, default_value = DEFAULT_LIBVIRT_HOST_IP)]
+    libvirt_host_ip: String,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let cli = Cli::parse();
+    let handles = bring_up_full_stack(E2eDemoOpts {
+        num_devices: cli.num_devices,
+        agent_binary: cli.agent_binary,
+        libvirt_host_ip: cli.libvirt_host_ip,
+    })
+    .await
+    .context("bring_up_full_stack")?;
+    handles.print_next_steps();
+
+    println!();
+    println!(" Press Ctrl-C to exit (cluster keeps running).");
+    tokio::signal::ctrl_c().await?;
+    Ok(())
+}
--- a/examples/fleet_e2e_demo/tests/e2e_walking_skeleton.rs
+++ b/examples/fleet_e2e_demo/tests/e2e_walking_skeleton.rs
@@ -0,0 +1,159 @@
+//! End-to-end walking-skeleton tests for the VM-based demo rehearsal.
+//!
+//! Shares one bring-up across the whole suite via `OnceCell`. Run
+//! sequentially — they touch shared k3d + libvirt VM state.
+//!
+//! Pre-flight (manual, before `cargo test`):
+//!
+//! - libvirt + qemu installed; default network active.
+//! - Two cloud-init Ubuntu VMs provisioned (e.g. via
+//!   `cargo run -p example_fleet_vm_setup`). Their IPs exported as
+//!   `FLEET_E2E_VM_0_IP` and `FLEET_E2E_VM_1_IP`.
+//! - SSH keypair the VMs trust at `~/.ssh/id_ed25519` (or
+//!   override path; harness reads the standard pair).
+//!
+//! Run:
+//!
+//! ```bash
+//! FLEET_E2E_VM_0_IP=192.168.122.42 \
+//! FLEET_E2E_VM_1_IP=192.168.122.43 \
+//! cargo test -p example-fleet-e2e-demo --test e2e_walking_skeleton \
+//!   -- --test-threads=1 --nocapture
+//! ```
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use async_nats::ConnectOptions;
+use example_fleet_auth_callout::{mint_access_token, scopes_for_project};
+use example_fleet_e2e_demo::{E2eDemoOpts, E2eHandles, bring_up_full_stack};
+use futures_util::StreamExt;
+use tokio::sync::OnceCell;
+
+static STACK: OnceCell<Arc<E2eHandles>> = OnceCell::const_new();
+
+async fn shared_stack() -> Result<Arc<E2eHandles>> {
+    let cell = STACK
+        .get_or_try_init(|| async {
+            let h = bring_up_full_stack(E2eDemoOpts::default()).await?;
+            anyhow::Ok(Arc::new(h))
+        })
+        .await?;
+    Ok(cell.clone())
+}
+
+async fn admin_nats_client(stack: &E2eHandles) -> Result<async_nats::Client> {
+    let token = mint_access_token(
+        &stack.zitadel_url,
+        &stack.admin_machine_key,
+        &scopes_for_project(&stack.project_id),
+    )
+    .await
+    .context("mint admin Zitadel token")?;
+    ConnectOptions::with_token(token)
+        .connection_timeout(Duration::from_secs(5))
+        .connect(&stack.nats_url_external)
+        .await
+        .map_err(|e| anyhow::anyhow!("admin connect: {e}"))
+}
+
+// -- Test 1 -------------------------------------------------------------
+
+/// Each provisioned VM publishes a DeviceInfo within the heartbeat
+/// window. Reads from the `device-info` KV bucket via the admin
+/// client (admin role can subscribe to anything).
+#[tokio::test]
+async fn both_devices_heartbeat_within_60s() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+    let admin = admin_nats_client(&stack).await?;
+
+    let js = async_nats::jetstream::new(admin);
+    let bucket = js
+        .get_key_value(harmony_reconciler_contracts::BUCKET_DEVICE_INFO)
+        .await
+        .context("device-info bucket")?;
+
+    let deadline = std::time::Instant::now() + Duration::from_secs(60);
+    let expected: std::collections::HashSet<String> =
+        stack.devices.iter().map(|d| d.device_id.clone()).collect();
+    let mut seen = std::collections::HashSet::new();
+
+    while std::time::Instant::now() < deadline && seen != expected {
+        for d in &stack.devices {
+            let key = harmony_reconciler_contracts::device_info_key(&d.device_id);
+            if let Some(_e) = bucket.entry(&key).await? {
+                seen.insert(d.device_id.clone());
+            }
+        }
+        tokio::time::sleep(Duration::from_millis(500)).await;
+    }
+    assert_eq!(
+        seen, expected,
+        "each provisioned device must publish DeviceInfo within 60s; saw {seen:?}"
+    );
+    Ok(())
+}
+
+// -- Test 5 (admin cross-device read) -----------------------------------
+
+/// The admin's Zitadel JWT carries `fleet-admin` role. Callout maps
+/// that to `pub/sub allow: [">"]`, so subscribing to `device-state.>`
+/// is admitted and observes every device's traffic.
+#[tokio::test]
+async fn admin_jwt_reads_any_device_subject() -> Result<()> {
+    let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
+    let stack = shared_stack().await?;
+    let admin = admin_nats_client(&stack).await?;
+
+    let mut sub = admin.subscribe("device-state.>").await?;
+    admin.flush().await?;
+
+    // Hold the subscription open long enough that any device's
+    // periodic state publication should land. We don't pump traffic
+    // ourselves — the agents themselves publish per-deployment state
+    // on every reconcile tick. If no traffic arrives in 30s it means
+    // either the agents aren't connected or they're not publishing,
+    // both of which are fatal for the demo.
+    let result = tokio::time::timeout(Duration::from_secs(30), sub.next()).await;
+    assert!(
+        result.is_ok() && result.as_ref().unwrap().is_some(),
+        "admin must observe at least one device-state.* message in 30s"
+    );
+    Ok(())
+}
+
+// -- Test 6 (per-device isolation) ---------------------------------------
+
+/// A per-device JWT has subject permissions scoped to its own
+/// `device-state.{device_id}` and `device-commands.{device_id}`. The
+/// callout enforces this; subscribing to a sibling device's commands
+/// must fail at NATS connect-time or at SUB-time.
+///
+/// Skipped here because the per-device JWT minting helper (analogous
+/// to `mint_access_token` but for a `device` role user) needs the
+/// per-device machine key to be plumbed back from `bring_up_full_stack`
+/// through `E2eHandles`. Follow-up commit adds
+/// `E2eHandles::device_machine_key(idx)` so this test can be
+/// implemented without re-running `ZitadelSetupScore` from the test
+/// body.
+#[tokio::test]
+#[ignore = "requires E2eHandles::device_machine_key plumbing"]
+async fn cross_device_isolation_enforced_in_vm() {}
+
+// -- Test 7 (load-bearing reconnect) -------------------------------------
+
+/// Kill the NATS pod, wait for the new one to come up, verify both
+/// agents reconnect with fresh JWTs and resume publishing within
+/// 30 seconds. This is the test that validates the "never lose
+/// connectivity to a device" guarantee under realistic disturbance.
+///
+/// Skipped pending operator install in the harness — without the
+/// operator the agents have no `desired-state` to publish status
+/// against, so verifying "publishing resumed" needs a separate
+/// signal. Follow-up commit observes the agents' periodic
+/// heartbeat publication directly via the device-heartbeat KV.
+#[tokio::test]
+#[ignore = "requires NATS-pod-restart driver and heartbeat-presence assertion"]
+async fn agent_recovers_from_nats_pod_restart() {}
--- a/examples/fleet_rpi_setup/Cargo.toml
+++ b/examples/fleet_rpi_setup/Cargo.toml
@@ -17,3 +17,7 @@ tokio.workspace = true
 log.workspace = true
 anyhow.workspace = true
 clap.workspace = true
+reqwest = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json.workspace = true
+base64 = "0.22"
--- a/examples/fleet_rpi_setup/src/main.rs
+++ b/examples/fleet_rpi_setup/src/main.rs
@@ -31,11 +31,13 @@
 //!   - Python 3 + `python3-venv` (Ansible is auto-bootstrapped into a venv)
 //!   - A cross-compiled `fleet-agent` binary for aarch64

+mod zitadel_bootstrap;
+
 use anyhow::{Context, Result};
 use clap::Parser;
 use harmony::config::secret::SudoPassword;
 use harmony::inventory::Inventory;
-use harmony::modules::fleet::{FleetDeviceSetupConfig, FleetDeviceSetupScore};
+use harmony::modules::fleet::{FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore};
 use harmony::modules::linux::{LinuxHostTopology, SshCredentials, ensure_ansible_venv, ssh_exec};
 use harmony_secret::SecretManager;
 use harmony_types::id::Id;
@@ -73,10 +75,41 @@ struct Cli {
    /// NATS URL the agent should connect to.
    #[arg(long)]
    nats_url: String,
+    /// Shared NATS username — used in `toml-shared` mode (no SSO).
+    /// Ignored when `--bootstrap-token` is set.
    #[arg(long, default_value = "smoke")]
    nats_user: String,
+    /// Shared NATS password — used in `toml-shared` mode (no SSO).
+    /// Ignored when `--bootstrap-token` is set.
    #[arg(long, default_value = "smoke")]
    nats_pass: String,
+    /// Zitadel admin Personal Access Token used to provision a
+    /// per-device machine user + role grant + JWT key on this Pi.
+    /// When set, the agent's NATS auth flips from `toml-shared` to
+    /// `zitadel-jwt` and the issued machine key is dropped onto the
+    /// Pi at `/etc/fleet-agent/zitadel-key.json`. The PAT itself is
+    /// used only by this CLI invocation — it never lands on the Pi.
+    #[arg(long, env = "HARMONY_ZITADEL_ADMIN_PAT")]
+    bootstrap_token: Option<String>,
+    /// Externally-visible Zitadel issuer URL (e.g.
+    /// `https://zitadel.customer1.nationtech.io`). Required when
+    /// `--bootstrap-token` is set.
+    #[arg(long)]
+    zitadel_issuer_url: Option<String>,
+    /// Zitadel project ID hosting the fleet roles. Required when
+    /// `--bootstrap-token` is set. Used as both the JWT-bearer
+    /// audience scope target and the role-claim path qualifier.
+    #[arg(long)]
+    zitadel_project_id: Option<String>,
+    /// Zitadel role key to grant the per-device machine user.
+    /// Defaults to `device` (matches the auth callout's
+    /// `device_role` config).
+    #[arg(long, default_value = "device")]
+    zitadel_device_role: String,
+    /// Whether the agent's HTTP client to Zitadel accepts invalid
+    /// TLS certs. Local-dev escape hatch; default false.
+    #[arg(long)]
+    danger_accept_invalid_certs: bool,
 }

 #[tokio::main]
@@ -127,13 +160,14 @@ async fn main() -> Result<()> {
    let topology = LinuxHostTopology::new(format!("rpi-{}", cli.pi_host), pi_ip, creds);

    let labels = parse_labels(&cli.labels)?;
+    let auth = build_auth(&cli, &device_id).await?;
    let score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig {
-        device_id,
+        device_id: device_id.clone(),
        labels,
        nats_urls: vec![cli.nats_url.clone()],
-        nats_user: cli.nats_user.clone(),
-        nats_pass: cli.nats_pass.clone(),
+        auth,
        agent_binary_path: cli.agent_binary.clone(),
+        hosts_entries: vec![],
    });

    // We have our own clap CLI, so harmony_cli must NOT call
@@ -161,6 +195,53 @@ async fn main() -> Result<()> {
    Ok(())
 }

+/// Build the per-device auth block. Either:
+/// - `--bootstrap-token` is set → mint a per-device Zitadel machine
+///   user + role grant + JWT key via the Management API and embed the
+///   key JSON in `FleetDeviceAuth::ZitadelJwt`. The bootstrap PAT
+///   never leaves this CLI invocation.
+/// - Otherwise → fall back to `--nats-user`/`--nats-pass` shared creds.
+async fn build_auth(cli: &Cli, device_id: &Id) -> Result<FleetDeviceAuth> {
+    let Some(pat) = cli.bootstrap_token.clone() else {
+        info!("no --bootstrap-token; using shared NATS user/pass (toml-shared)");
+        return Ok(FleetDeviceAuth::TomlShared {
+            nats_user: cli.nats_user.clone(),
+            nats_pass: cli.nats_pass.clone(),
+        });
+    };
+    let issuer = cli
+        .zitadel_issuer_url
+        .clone()
+        .context("--bootstrap-token requires --zitadel-issuer-url")?;
+    let project_id = cli
+        .zitadel_project_id
+        .clone()
+        .context("--bootstrap-token requires --zitadel-project-id")?;
+
+    info!("bootstrapping Zitadel machine user device-{device_id} on project {project_id}");
+    let bootstrap = zitadel_bootstrap::ZitadelBootstrap::new(
+        issuer.clone(),
+        pat,
+        cli.danger_accept_invalid_certs,
+    );
+    let key_json = bootstrap
+        .ensure_device_machine_user(
+            &format!("device-{device_id}"),
+            &device_id.to_string(),
+            &project_id,
+            &cli.zitadel_device_role,
+        )
+        .await
+        .context("Zitadel device bootstrap failed")?;
+
+    Ok(FleetDeviceAuth::ZitadelJwt {
+        machine_key_json: key_json,
+        oidc_issuer_url: issuer,
+        audience: project_id,
+        danger_accept_invalid_certs: cli.danger_accept_invalid_certs,
+    })
+}
+
 fn parse_labels(raw: &str) -> Result<std::collections::BTreeMap<String, String>> {
    let mut out = std::collections::BTreeMap::new();
    for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) {
--- a/examples/fleet_rpi_setup/src/zitadel_bootstrap.rs
+++ b/examples/fleet_rpi_setup/src/zitadel_bootstrap.rs
@@ -0,0 +1,247 @@
+//! Per-device Zitadel bootstrap for the Pi onboarding flow.
+//!
+//! Invoked once per Pi from the operator's machine. Uses the admin PAT
+//! given on the CLI to:
+//!
+//! 1. Find or create a machine user `device-${device_id}` in Zitadel.
+//! 2. Find or create a JSON-typed JWT signing key for that user.
+//! 3. Find or create a project grant on the `device` role.
+//!
+//! Returns the JSON keyfile content. The caller drops it onto the Pi
+//! via `FleetDeviceSetupScore`. The admin PAT is held in CLI memory
+//! for the duration of the run only — it never lands on the Pi.
+//!
+//! All operations are idempotent: re-running for the same device id
+//! is a series of NOOPs.
+//!
+//! NOTE: This is intentionally a minimal Management-API client. It
+//! duplicates a small slice of `harmony::modules::zitadel::setup` (the
+//! in-cluster ZitadelSetupScore) because `fleet_rpi_setup` runs on the
+//! operator's machine without a kubeconfig pointing at the Zitadel
+//! cluster. Refactoring the in-cluster Score's HTTP layer into a
+//! reusable client crate is a follow-up.
+
+use anyhow::{Context, Result};
+use base64::Engine;
+use serde::Deserialize;
+
+pub struct ZitadelBootstrap {
+    issuer_url: String,
+    admin_pat: String,
+    http: reqwest::Client,
+}
+
+impl ZitadelBootstrap {
+    pub fn new(issuer_url: String, admin_pat: String, danger_accept_invalid_certs: bool) -> Self {
+        let http = reqwest::Client::builder()
+            .danger_accept_invalid_certs(danger_accept_invalid_certs)
+            .timeout(std::time::Duration::from_secs(10))
+            .build()
+            .expect("reqwest client builder is infallible for these settings");
+        Self {
+            issuer_url,
+            admin_pat,
+            http,
+        }
+    }
+
+    /// Ensure machine user + key + role grant for one device. Returns
+    /// the JSON keyfile content (raw, decoded from Zitadel's base64
+    /// `keyDetails`). Idempotent: re-running with the same `username`
+    /// reuses the existing user; if no key was previously persisted
+    /// (we can't read the private key back from Zitadel), a fresh one
+    /// is generated and returned.
+    pub async fn ensure_device_machine_user(
+        &self,
+        username: &str,
+        device_id: &str,
+        project_id: &str,
+        role_key: &str,
+    ) -> Result<String> {
+        let user_id = match self.find_user_by_name(username).await? {
+            Some(id) => id,
+            None => self
+                .create_machine_user(username, device_id)
+                .await
+                .with_context(|| format!("creating machine user {username}"))?,
+        };
+        log::info!("[zitadel-bootstrap] machine user {username} → {user_id}");
+
+        // The grant API rejects duplicates with code 6 (ALREADY_EXISTS),
+        // so the cheapest path is "search → maybe create".
+        if self.find_user_grant(&user_id, project_id).await?.is_none() {
+            self.create_user_grant(&user_id, project_id, role_key)
+                .await
+                .with_context(|| {
+                    format!("granting role {role_key} on project {project_id} to {username}")
+                })?;
+            log::info!("[zitadel-bootstrap] granted role {role_key} on project {project_id}");
+        } else {
+            log::info!("[zitadel-bootstrap] role grant already present");
+        }
+
+        // Always mint a fresh key — Zitadel doesn't expose the private
+        // half of existing keys, so we can't reuse one. Stale keys
+        // remain valid until expiry but never get reused on this Pi
+        // because the agent's keyfile is overwritten on each setup run.
+        let key_json = self
+            .create_machine_key(&user_id)
+            .await
+            .with_context(|| format!("minting machine key for {username}"))?;
+        Ok(key_json)
+    }
+
+    fn url(&self, path: &str) -> String {
+        format!("{}{path}", self.issuer_url.trim_end_matches('/'))
+    }
+
+    async fn find_user_by_name(&self, username: &str) -> Result<Option<String>> {
+        let resp = self
+            .http
+            .post(self.url("/management/v1/users/_search"))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({
+                "queries": [{
+                    "userNameQuery": {
+                        "userName": username,
+                        "method": "TEXT_QUERY_METHOD_EQUALS"
+                    }
+                }]
+            }))
+            .send()
+            .await
+            .context("POST users/_search")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("users/_search returned {s}: {body}");
+        }
+        #[derive(Deserialize)]
+        struct R {
+            #[serde(default)]
+            result: Vec<E>,
+        }
+        #[derive(Deserialize)]
+        struct E {
+            id: String,
+            #[serde(rename = "userName", default)]
+            user_name: Option<String>,
+        }
+        let r: R = resp.json().await.context("parse users/_search")?;
+        Ok(r.result
+            .into_iter()
+            .find(|e| e.user_name.as_deref() == Some(username))
+            .map(|e| e.id))
+    }
+
+    async fn create_machine_user(&self, username: &str, device_id: &str) -> Result<String> {
+        let resp = self
+            .http
+            .post(self.url("/management/v1/users/machine"))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({
+                "userName": username,
+                "name": format!("Fleet Device {device_id}"),
+                "description": format!("Provisioned by fleet_rpi_setup for device {device_id}"),
+                "accessTokenType": "ACCESS_TOKEN_TYPE_JWT"
+            }))
+            .send()
+            .await
+            .context("POST users/machine")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("create machine user returned {s}: {body}");
+        }
+        #[derive(Deserialize)]
+        struct R {
+            #[serde(rename = "userId")]
+            user_id: String,
+        }
+        let r: R = resp.json().await.context("parse machine user response")?;
+        Ok(r.user_id)
+    }
+
+    async fn create_machine_key(&self, user_id: &str) -> Result<String> {
+        let resp = self
+            .http
+            .post(self.url(&format!("/management/v1/users/{user_id}/keys")))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({ "type": "KEY_TYPE_JSON" }))
+            .send()
+            .await
+            .context("POST users/{}/keys")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("create machine key returned {s}: {body}");
+        }
+        #[derive(Deserialize)]
+        struct R {
+            #[serde(rename = "keyDetails")]
+            key_details: String,
+        }
+        let r: R = resp.json().await.context("parse machine key response")?;
+        let bytes = base64::engine::general_purpose::STANDARD
+            .decode(&r.key_details)
+            .context("decode keyDetails base64")?;
+        String::from_utf8(bytes).context("keyDetails is non-UTF-8")
+    }
+
+    async fn find_user_grant(&self, user_id: &str, project_id: &str) -> Result<Option<String>> {
+        let resp = self
+            .http
+            .post(self.url(&format!("/management/v1/users/{user_id}/grants/_search")))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({}))
+            .send()
+            .await
+            .context("POST users/{}/grants/_search")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("grants/_search returned {s}: {body}");
+        }
+        #[derive(Deserialize)]
+        struct R {
+            #[serde(default)]
+            result: Vec<E>,
+        }
+        #[derive(Deserialize)]
+        struct E {
+            id: String,
+            #[serde(rename = "projectId")]
+            project_id: String,
+        }
+        let r: R = resp.json().await.context("parse grants/_search")?;
+        Ok(r.result
+            .into_iter()
+            .find(|e| e.project_id == project_id)
+            .map(|e| e.id))
+    }
+
+    async fn create_user_grant(
+        &self,
+        user_id: &str,
+        project_id: &str,
+        role_key: &str,
+    ) -> Result<()> {
+        let resp = self
+            .http
+            .post(self.url(&format!("/management/v1/users/{user_id}/grants")))
+            .bearer_auth(&self.admin_pat)
+            .json(&serde_json::json!({
+                "projectId": project_id,
+                "roleKeys": [role_key]
+            }))
+            .send()
+            .await
+            .context("POST users/{}/grants")?;
+        if !resp.status().is_success() {
+            let s = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("create grant returned {s}: {body}");
+        }
+        Ok(())
+    }
+}
--- a/examples/fleet_sso_login/Cargo.toml
+++ b/examples/fleet_sso_login/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "example-fleet-sso-login"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "Developer-side CLI: log in to a fleet platform staging instance via Zitadel device-code OIDC"
+
+[[bin]]
+name = "fleet-sso-login"
+path = "src/main.rs"
+
+[dependencies]
+reqwest = { workspace = true }
+tokio = { workspace = true, features = ["full"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json.workspace = true
+anyhow.workspace = true
+clap = { version = "4", features = ["derive", "env"] }
+base64 = "0.22"
+log.workspace = true
+env_logger.workspace = true
+directories = "6.0.0"
--- a/examples/fleet_sso_login/src/main.rs
+++ b/examples/fleet_sso_login/src/main.rs
@@ -0,0 +1,266 @@
+//! Developer-side CLI: log in to a fleet platform staging instance via
+//! Zitadel's OIDC Device Authorization Grant (RFC 8628).
+//!
+//! Usage:
+//!
+//! ```text
+//! cargo run -p example-fleet-sso-login -- \
+//!   --base-domain customer1.nationtech.io \
+//!   --client-id 366378028009259038
+//! ```
+//!
+//! Flow:
+//! 1. POST to `/oauth/v2/device_authorization` with the CLI client_id —
+//!    receive a `verification_uri_complete`, `user_code`, `device_code`
+//!    and a polling interval.
+//! 2. Print the URL the user opens in their browser. They authenticate
+//!    via Zitadel (username/password, MFA, SSO chain — Zitadel handles
+//!    that part).
+//! 3. Poll `/oauth/v2/token` with `grant_type=urn:ietf:params:oauth:
+//!    grant-type:device_code` until the access token is issued.
+//! 4. Decode the access token's claims, print "Welcome <preferred
+//!    username>", and persist the session at
+//!    `$DATA_DIR/harmony/sso-session.json`.
+//!
+//! No K8s API call yet — for the demo, this CLI proves the SSO works.
+//! Future: a `harmony fleet apply` subcommand uses the persisted token
+//! to talk to a fleet-platform API gateway. That gateway is post-demo.
+
+use std::path::PathBuf;
+use std::time::Duration;
+
+use anyhow::{Context, Result, bail};
+use base64::Engine;
+use clap::Parser;
+use serde::{Deserialize, Serialize};
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet-sso-login",
+    about = "Log in to a fleet platform staging instance via Zitadel device-code OIDC"
+)]
+struct Cli {
+    /// Base DNS domain — same value the operator passed to
+    /// fleet-staging-deploy. The Zitadel issuer derives as
+    /// `https://zitadel.<base>`.
+    #[arg(long, env = "FLEET_BASE_DOMAIN")]
+    base_domain: String,
+    /// OIDC client_id of the `harmony-cli` Device Code app on the
+    /// Zitadel project. Printed by `fleet-staging-deploy` at the end
+    /// of a successful run.
+    #[arg(long, env = "FLEET_CLI_CLIENT_ID")]
+    client_id: String,
+    /// Override the polling interval suggested by Zitadel
+    /// (defaults to whatever the device-authorization endpoint returned;
+    /// pass to short-circuit during testing).
+    #[arg(long)]
+    poll_interval_secs: Option<u64>,
+}
+
+#[derive(Debug, Deserialize)]
+struct DeviceAuthResponse {
+    device_code: String,
+    user_code: String,
+    verification_uri: String,
+    #[serde(default)]
+    verification_uri_complete: Option<String>,
+    expires_in: u64,
+    #[serde(default)]
+    interval: Option<u64>,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+struct TokenResponse {
+    access_token: String,
+    #[serde(default)]
+    id_token: Option<String>,
+    #[serde(default)]
+    refresh_token: Option<String>,
+    #[serde(default)]
+    expires_in: Option<u64>,
+    #[serde(default)]
+    token_type: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+struct TokenError {
+    error: String,
+    #[serde(default)]
+    error_description: Option<String>,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init();
+    let cli = Cli::parse();
+
+    let issuer = format!("https://zitadel.{}", cli.base_domain);
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(15))
+        .build()?;
+
+    // -- Step 1: kick off the device flow ----------------------------
+    let device_auth_url = format!("{issuer}/oauth/v2/device_authorization");
+    let scope =
+        "openid profile email urn:zitadel:iam:user:resourceowner urn:zitadel:iam:org:project:roles";
+    let resp = client
+        .post(&device_auth_url)
+        .form(&[("client_id", cli.client_id.as_str()), ("scope", scope)])
+        .send()
+        .await
+        .with_context(|| format!("POST {device_auth_url}"))?;
+    if !resp.status().is_success() {
+        let s = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+        bail!("device_authorization returned {s}: {body}");
+    }
+    let auth: DeviceAuthResponse = resp.json().await.context("parse device_authorization")?;
+
+    let display_url = auth
+        .verification_uri_complete
+        .clone()
+        .unwrap_or_else(|| auth.verification_uri.clone());
+    println!();
+    println!("============================================================");
+    println!(" Open this URL in your browser to log in:");
+    println!();
+    println!("   {display_url}");
+    println!();
+    println!(" If the URL doesn't pre-fill the code, enter:");
+    println!();
+    println!("   user_code: {}", auth.user_code);
+    println!();
+    println!(
+        " Waiting for browser-side completion (expires in {}s)...",
+        auth.expires_in
+    );
+    println!("============================================================");
+    println!();
+
+    // -- Step 2: poll the token endpoint -----------------------------
+    let token_url = format!("{issuer}/oauth/v2/token");
+    let interval =
+        Duration::from_secs(cli.poll_interval_secs.unwrap_or(auth.interval.unwrap_or(5)));
+    let deadline = std::time::Instant::now() + Duration::from_secs(auth.expires_in);
+
+    let access_token = loop {
+        if std::time::Instant::now() > deadline {
+            bail!("device-code expired before user completed login");
+        }
+        tokio::time::sleep(interval).await;
+        let resp = client
+            .post(&token_url)
+            .form(&[
+                ("grant_type", "urn:ietf:params:oauth:grant-type:device_code"),
+                ("device_code", auth.device_code.as_str()),
+                ("client_id", cli.client_id.as_str()),
+            ])
+            .send()
+            .await
+            .context("POST token")?;
+        let status = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+        if status.is_success() {
+            let tr: TokenResponse =
+                serde_json::from_str(&body).context("parse token success body")?;
+            break tr.access_token;
+        }
+        // Per RFC 8628, the token endpoint returns specific error
+        // codes during polling — `authorization_pending` and
+        // `slow_down` are NOT terminal, every other error is.
+        let err: TokenError = serde_json::from_str(&body).unwrap_or_else(|_| TokenError {
+            error: format!("http_{}", status.as_u16()),
+            error_description: Some(body.clone()),
+        });
+        match err.error.as_str() {
+            "authorization_pending" => {
+                log::debug!("authorization_pending — user hasn't approved yet");
+                continue;
+            }
+            "slow_down" => {
+                log::info!("server requested slow_down — increasing poll interval");
+                tokio::time::sleep(interval).await; // wait one extra interval
+                continue;
+            }
+            other => bail!(
+                "token endpoint refused: {other} ({})",
+                err.error_description.unwrap_or_default()
+            ),
+        }
+    };
+
+    // -- Step 3: introspect + persist --------------------------------
+    let claims = decode_jwt_claims(&access_token).unwrap_or_default();
+    let display_name = claims
+        .get("name")
+        .or_else(|| claims.get("preferred_username"))
+        .and_then(|v| v.as_str())
+        .unwrap_or("(unknown)");
+    let email = claims
+        .get("email")
+        .and_then(|v| v.as_str())
+        .unwrap_or("(no email)");
+
+    persist_session(&issuer, &cli.client_id, &access_token, &claims)?;
+
+    println!();
+    println!("============================================================");
+    println!(" SSO LOGIN SUCCESSFUL");
+    println!("============================================================");
+    println!(" Welcome, {display_name} <{email}>");
+    println!(" Session stored at: {}", session_path().display());
+    println!("============================================================");
+    Ok(())
+}
+
+fn decode_jwt_claims(jwt: &str) -> Option<serde_json::Value> {
+    let payload_b64 = jwt.split('.').nth(1)?;
+    let pad = "=".repeat((4 - payload_b64.len() % 4) % 4);
+    let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
+        .decode(format!("{payload_b64}{pad}").trim_end_matches('='))
+        .ok()?;
+    serde_json::from_slice(&bytes).ok()
+}
+
+#[derive(Serialize)]
+struct PersistedSession<'a> {
+    issuer: &'a str,
+    client_id: &'a str,
+    access_token: &'a str,
+    claims: &'a serde_json::Value,
+}
+
+fn persist_session(
+    issuer: &str,
+    client_id: &str,
+    access_token: &str,
+    claims: &serde_json::Value,
+) -> Result<()> {
+    let path = session_path();
+    if let Some(parent) = path.parent() {
+        std::fs::create_dir_all(parent)
+            .with_context(|| format!("create session dir {}", parent.display()))?;
+    }
+    let s = PersistedSession {
+        issuer,
+        client_id,
+        access_token,
+        claims,
+    };
+    let json = serde_json::to_string_pretty(&s)?;
+    std::fs::write(&path, json).with_context(|| format!("write session to {}", path.display()))?;
+    // 0600 so other users on the box can't read the access token.
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt;
+        std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o600)).ok();
+    }
+    Ok(())
+}
+
+fn session_path() -> PathBuf {
+    directories::BaseDirs::new()
+        .map(|d| d.data_dir().join("harmony").join("sso-session.json"))
+        .unwrap_or_else(|| PathBuf::from("/tmp/harmony-sso-session.json"))
+}
--- a/examples/fleet_staging_deploy/Cargo.toml
+++ b/examples/fleet_staging_deploy/Cargo.toml
@@ -0,0 +1,36 @@
+[package]
+name = "example-fleet-staging-deploy"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "Deploy the fleet platform stack (Zitadel + NATS + auth callout) onto an OKD/Kubernetes cluster. Operator-side, run-once-per-customer."
+
+[lib]
+name = "example_fleet_staging_deploy"
+path = "src/lib.rs"
+
+[[bin]]
+name = "fleet-staging-deploy"
+path = "src/main.rs"
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony-k8s = { path = "../../harmony-k8s" }
+harmony_types = { path = "../../harmony_types" }
+harmony-nats-callout = { path = "../../nats/callout" }
+nkeys = "0.4"
+async-nats.workspace = true
+reqwest = { workspace = true }
+tokio = { workspace = true, features = ["full"] }
+serde.workspace = true
+serde_json.workspace = true
+anyhow.workspace = true
+log.workspace = true
+env_logger.workspace = true
+tracing.workspace = true
+tracing-subscriber.workspace = true
+clap = { version = "4", features = ["derive", "env"] }
+k8s-openapi.workspace = true
+kube.workspace = true
+url.workspace = true
--- a/examples/fleet_staging_deploy/src/lib.rs
+++ b/examples/fleet_staging_deploy/src/lib.rs
@@ -0,0 +1,572 @@
+//! Operator-side staging deploy harness.
+//!
+//! Runs once per customer instance against an OKD / Kubernetes cluster
+//! to bring up the fleet platform's central services:
+//!
+//! 1. Zitadel + Postgres (HTTPS via OKD HAProxy ingress, edge TLS).
+//! 2. The fleet project + roles (`fleet-admin`, `device`) + an API app
+//!    (so the project ID can be the JWT-bearer audience).
+//! 3. NATS with `auth_callout` and a WSS ingress (so Pis on a customer
+//!    LAN connect through `wss://nats.<base>/`).
+//! 4. The auth callout Deployment, configured to validate Zitadel JWTs
+//!    and emit per-device permissions on user JWTs to NATS.
+//!
+//! Everything keys off [`FleetDomainConfig::base_domain`] —
+//! `zitadel.<base>`, `nats.<base>`, `api.<base>` are the only
+//! customer-visible hostnames. Pi-side onboarding (see
+//! `examples/fleet_rpi_setup/`) consumes the Zitadel admin PAT plus
+//! the project ID this harness prints, so the operator's flow is:
+//!
+//! ```text
+//! cargo run -p example-fleet-staging-deploy -- --base-domain customer1.nationtech.io
+//!   ↓ prints PROJECT_ID, NATS WSS URL, instructions to extract iam-admin-pat
+//! HARMONY_ZITADEL_ADMIN_PAT=$(kubectl -n zitadel get secret iam-admin-pat -o jsonpath='{.data.pat}' | base64 -d) \
+//! cargo run -p example-fleet-rpi-setup -- \
+//!   --pi-host 192.168.1.42 \
+//!   --bootstrap-token "$HARMONY_ZITADEL_ADMIN_PAT" \
+//!   --zitadel-issuer-url https://zitadel.customer1.nationtech.io \
+//!   --zitadel-project-id <PROJECT_ID printed above> \
+//!   --nats-url wss://nats.customer1.nationtech.io/ \
+//!   --agent-binary ./target/aarch64-unknown-linux-gnu/release/fleet-agent
+//! ```
+//!
+//! The harness is **idempotent** by design — re-running picks up
+//! existing resources via the new helm-upgrade-by-default behavior +
+//! ZitadelSetupScore's search-then-create flow + a persisted issuer
+//! NKey in a K8s secret so user JWTs survive restarts.
+
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use harmony::inventory::Inventory;
+use harmony::modules::nats::NatsHelmChartScore;
+use harmony::modules::nats_auth_callout::{NatsAuthCalloutScore, render_auth_callout_block};
+use harmony::modules::zitadel::{
+    ZitadelApiApp, ZitadelAppType, ZitadelApplication, ZitadelClientConfig, ZitadelRole,
+    ZitadelScore, ZitadelSetupScore,
+};
+use harmony::score::Score;
+use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
+use log::info;
+use nkeys::KeyPair;
+
+// ---- domain config ---------------------------------------------------------
+
+/// Single source of truth for all customer-visible hostnames. Every
+/// `<app>.<customer>.<base>` URL the staging deploy emits derives from
+/// the one base domain — no hostnames are hardcoded so the same code
+/// runs across customers / staging / canary instances.
+#[derive(Debug, Clone)]
+pub struct FleetDomainConfig {
+    /// e.g. `customer1.nationtech.io`. The deploy emits
+    /// `zitadel.<base>`, `nats.<base>`, `api.<base>` against it.
+    pub base_domain: String,
+}
+
+impl FleetDomainConfig {
+    pub fn new(base_domain: impl Into<String>) -> Self {
+        Self {
+            base_domain: base_domain.into(),
+        }
+    }
+    pub fn zitadel_host(&self) -> String {
+        format!("zitadel.{}", self.base_domain)
+    }
+    pub fn nats_wss_host(&self) -> String {
+        format!("nats.{}", self.base_domain)
+    }
+    pub fn zitadel_issuer_url(&self) -> String {
+        format!("https://{}", self.zitadel_host())
+    }
+    pub fn nats_wss_url(&self) -> String {
+        format!("wss://{}/", self.nats_wss_host())
+    }
+}
+
+// ---- naming + constants ----------------------------------------------------
+
+pub const FLEET_NAMESPACE: &str = "fleet-system";
+pub const NATS_RELEASE: &str = "fleet-nats";
+pub const CALLOUT_DEPLOYMENT_NAME: &str = "fleet-callout";
+pub const PROJECT_NAME: &str = "fleet";
+pub const API_APP_NAME: &str = "nats";
+pub const CLI_APP_NAME: &str = "harmony-cli";
+pub const ADMIN_ROLE_KEY: &str = "fleet-admin";
+pub const DEVICE_ROLE_KEY: &str = "device";
+pub const NATS_AUTH_USER: &str = "auth";
+pub const NATS_ACCOUNT: &str = "DEVICES";
+pub const NATS_SYSTEM_USER: &str = "sys-admin";
+pub const ISSUER_SEED_SECRET: &str = "callout-issuer-seed";
+
+// ---- handles ---------------------------------------------------------------
+
+#[derive(Debug, Clone)]
+pub struct StagingHandles {
+    pub domain: FleetDomainConfig,
+    pub project_id: String,
+    pub issuer_pubkey: String,
+    /// Tag of the callout image expected to exist in a registry the
+    /// cluster pulls from. The operator pushes it before running the
+    /// deploy; this field is just the name we put on the Deployment
+    /// for traceability.
+    pub callout_image: String,
+    /// OIDC client_id of the `harmony-cli` Device Code app — what the
+    /// `fleet_sso_login` CLI sends in its device-authorization request.
+    /// `None` if the app pre-existed without the cache picking it up
+    /// (re-running the staging deploy after `rm -rf
+    /// ~/.local/share/harmony/zitadel/`).
+    pub cli_client_id: Option<String>,
+}
+
+// ---- bring up --------------------------------------------------------------
+
+pub struct StagingDeployOpts {
+    pub domain: FleetDomainConfig,
+    pub kubeconfig_context: Option<String>,
+    /// Image reference the cluster will pull. Operator must have
+    /// pushed this beforehand (e.g. `quay.io/customer/harmony-nats-callout:demo`).
+    pub callout_image: String,
+    /// Per-NATS-account password for the callout's own NATS connection.
+    /// Stored in a K8s secret + listed in the chart's
+    /// `accounts.<account>.users` so the callout bypasses callout to
+    /// connect (otherwise it'd deadlock authenticating itself).
+    pub nats_auth_pass: String,
+    /// SYS account password (for `kubectl exec nats-box` debugging).
+    pub nats_system_pass: String,
+}
+
+pub async fn bring_up_staging(opts: StagingDeployOpts) -> Result<StagingHandles> {
+    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+        .try_init();
+
+    if let Some(ctx) = &opts.kubeconfig_context {
+        unsafe {
+            std::env::set_var("HARMONY_K8S_CONTEXT", ctx);
+            std::env::set_var("HARMONY_USE_LOCAL_K3D", "false");
+            std::env::set_var("HARMONY_AUTOINSTALL", "false");
+        }
+    }
+    let topology = K8sAnywhereTopology::from_env();
+    topology.ensure_ready().await.context("topology init")?;
+
+    info!(
+        "[1/5] deploying Zitadel at https://{}",
+        opts.domain.zitadel_host()
+    );
+    deploy_zitadel(&opts.domain, &topology).await?;
+
+    info!("[2/5] waiting for Zitadel HTTPS to respond");
+    wait_for_zitadel_ready(&opts.domain).await?;
+
+    info!("[3/5] provisioning project '{PROJECT_NAME}', api app, CLI device-code app, and roles");
+    provision_zitadel_project(&opts.domain, &topology).await?;
+    let project_id = read_project_id()?;
+    let cli_client_id = read_cli_client_id();
+    info!(" → project_id = {project_id}");
+    if let Some(cid) = &cli_client_id {
+        info!(" → cli_client_id = {cid}");
+    } else {
+        log::warn!(
+            " → cli_client_id missing from cache; CLI login won't work until you reset the local zitadel cache"
+        );
+    }
+
+    info!("[4/5] generating issuer NKey + deploying NATS with auth_callout + WSS ingress");
+    let issuer_seed = ensure_issuer_seed(&topology).await?;
+    let issuer_kp = KeyPair::from_seed(&issuer_seed)
+        .map_err(|e| anyhow::anyhow!("invalid persisted issuer seed: {e}"))?;
+    let issuer_pubkey = issuer_kp.public_key();
+
+    NatsHelmChartScore::new(
+        NATS_RELEASE.to_string(),
+        FLEET_NAMESPACE.to_string(),
+        render_nats_values(
+            &opts.domain,
+            &issuer_pubkey,
+            &opts.nats_auth_pass,
+            &opts.nats_system_pass,
+        ),
+    )
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("NATS deploy")?;
+
+    info!(
+        "[5/5] deploying NatsAuthCalloutScore (image: {})",
+        opts.callout_image
+    );
+    NatsAuthCalloutScore::new(
+        CALLOUT_DEPLOYMENT_NAME,
+        FLEET_NAMESPACE,
+        format!("nats://{NATS_RELEASE}.{FLEET_NAMESPACE}.svc.cluster.local:4222"),
+        opts.domain.zitadel_issuer_url(),
+        // The aud the callout validates against is the project ID —
+        // Zitadel emits it in access tokens minted via the
+        // project-id-audience scope.
+        project_id.clone(),
+        NATS_AUTH_USER,
+        opts.nats_auth_pass.clone(),
+        issuer_seed,
+    )
+    .image(&opts.callout_image)
+    .target_account(NATS_ACCOUNT)
+    .admin_role(ADMIN_ROLE_KEY)
+    .device_role(DEVICE_ROLE_KEY)
+    .interpret(&Inventory::autoload(), &topology)
+    .await
+    .context("callout deploy")?;
+
+    Ok(StagingHandles {
+        domain: opts.domain,
+        project_id,
+        issuer_pubkey,
+        callout_image: opts.callout_image,
+        cli_client_id,
+    })
+}
+
+fn read_cli_client_id() -> Option<String> {
+    ZitadelClientConfig::load()?
+        .client_id(CLI_APP_NAME)
+        .cloned()
+}
+
+async fn deploy_zitadel(domain: &FleetDomainConfig, topology: &K8sAnywhereTopology) -> Result<()> {
+    let z = ZitadelScore {
+        host: domain.zitadel_host(),
+        zitadel_version: "v4.12.1".to_string(),
+        // OKD HAProxy edge-terminates TLS for us, so the issuer URL
+        // is `https://zitadel.<base>` (port 443 implied) — leave
+        // external_port at None so Zitadel's emitted issuer omits the
+        // port, matching what clients reach.
+        external_secure: true,
+        external_port: None,
+    };
+    z.interpret(&Inventory::autoload(), topology)
+        .await
+        .context("ZitadelScore")?;
+    Ok(())
+}
+
+async fn provision_zitadel_project(
+    domain: &FleetDomainConfig,
+    topology: &K8sAnywhereTopology,
+) -> Result<()> {
+    let setup = ZitadelSetupScore {
+        host: domain.zitadel_host(),
+        // OKD HAProxy listens on 443; ZitadelSetupScore talks to
+        // 127.0.0.1:<port> with Host header + skip_tls — but for
+        // staging we go through the real ingress so the operator can
+        // run this from anywhere with kubeconfig + DNS access. 443 is
+        // the externally-visible port.
+        port: 443,
+        skip_tls: false,
+        applications: vec![ZitadelApplication {
+            project_name: PROJECT_NAME.to_string(),
+            app_name: CLI_APP_NAME.to_string(),
+            // Device Code grant — the only browser-driven OIDC flow
+            // that fits a CLI tool: prints a verification URL + user
+            // code, polls for a token, no embedded web server / open
+            // listener required.
+            app_type: ZitadelAppType::DeviceCode,
+        }],
+        api_apps: vec![ZitadelApiApp {
+            project_name: PROJECT_NAME.to_string(),
+            app_name: API_APP_NAME.to_string(),
+        }],
+        roles: vec![
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: ADMIN_ROLE_KEY.to_string(),
+                display_name: "Fleet Admin".to_string(),
+                group: None,
+            },
+            ZitadelRole {
+                project_name: PROJECT_NAME.to_string(),
+                key: DEVICE_ROLE_KEY.to_string(),
+                display_name: "Device".to_string(),
+                group: None,
+            },
+        ],
+        // No machine users provisioned here — `fleet_rpi_setup` mints
+        // them on demand per device, so the staging deploy stays
+        // device-count-agnostic.
+        machine_users: vec![],
+    };
+    setup
+        .interpret(&Inventory::autoload(), topology)
+        .await
+        .context("ZitadelSetupScore")?;
+    Ok(())
+}
+
+fn read_project_id() -> Result<String> {
+    let cfg = ZitadelClientConfig::load()
+        .context("ZitadelSetupScore did not produce a client config cache")?;
+    cfg.project_id_by_name(PROJECT_NAME)
+        .or(cfg.project_id.as_ref())
+        .context("project_id missing from ZitadelClientConfig cache")
+        .cloned()
+}
+
+/// Persist the callout's issuer NKey seed in a K8s secret so re-runs
+/// of the staging deploy don't invalidate previously-issued user JWTs
+/// already in flight on customer Pis.
+async fn ensure_issuer_seed(topology: &K8sAnywhereTopology) -> Result<String> {
+    use k8s_openapi::ByteString;
+    use k8s_openapi::api::core::v1::{Namespace, Secret};
+    use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
+    use std::collections::BTreeMap;
+
+    let k8s = topology
+        .k8s_client()
+        .await
+        .map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
+
+    if k8s
+        .get_resource::<Namespace>(FLEET_NAMESPACE, None)
+        .await?
+        .is_none()
+    {
+        let ns = Namespace {
+            metadata: ObjectMeta {
+                name: Some(FLEET_NAMESPACE.to_string()),
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+        k8s.create(&ns, None).await.ok();
+    }
+
+    if let Some(existing) = k8s
+        .get_resource::<Secret>(ISSUER_SEED_SECRET, Some(FLEET_NAMESPACE))
+        .await?
+        && let Some(data) = existing.data
+        && let Some(seed_bytes) = data.get("seed")
+    {
+        let seed = String::from_utf8(seed_bytes.0.clone())?;
+        return Ok(seed.trim().to_string());
+    }
+
+    let seed = KeyPair::new_account()
+        .seed()
+        .map_err(|e| anyhow::anyhow!("nkey seed: {e}"))?;
+    let mut data = BTreeMap::new();
+    data.insert("seed".to_string(), ByteString(seed.as_bytes().to_vec()));
+    let secret = Secret {
+        metadata: ObjectMeta {
+            name: Some(ISSUER_SEED_SECRET.to_string()),
+            namespace: Some(FLEET_NAMESPACE.to_string()),
+            ..Default::default()
+        },
+        data: Some(data),
+        type_: Some("Opaque".to_string()),
+        ..Default::default()
+    };
+    k8s.create(&secret, Some(FLEET_NAMESPACE)).await.ok();
+    Ok(seed)
+}
+
+// ---- NATS values -----------------------------------------------------------
+
+/// Render NATS Helm values for an OKD-flavored deployment with WSS
+/// ingress + auth callout + JetStream.
+///
+/// **Why WSS rather than plain NATS-on-TLS:** OKD's default ingress
+/// controller (HAProxy) is HTTP-aware and edge-terminates TLS. NATS
+/// over WebSocket goes through that ingress unchanged; native NATS
+/// TCP would require a TCP loadbalancer service or a passthrough
+/// Route, both of which are extra infra the customer's cluster may
+/// not have. WSS is also the default async-nats client transport on
+/// `wss://...` URLs — no special agent code needed.
+pub fn render_nats_values(
+    domain: &FleetDomainConfig,
+    issuer_pubkey: &str,
+    nats_auth_pass: &str,
+    nats_system_pass: &str,
+) -> String {
+    let auth_callout = render_auth_callout_block(issuer_pubkey, NATS_AUTH_USER, NATS_ACCOUNT);
+    let auth_callout_indented = auth_callout
+        .lines()
+        .enumerate()
+        .map(|(i, l)| {
+            if i == 0 {
+                l.to_string()
+            } else {
+                format!("    {l}")
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("\n");
+    format!(
+        r#"fullnameOverride: {nats_release}
+config:
+  cluster:
+    enabled: false
+  jetstream:
+    enabled: true
+    fileStorage:
+      enabled: true
+      size: 5Gi
+  websocket:
+    enabled: true
+    port: 8443
+    ingress:
+      enabled: true
+      className: openshift-default
+      pathType: Prefix
+      hosts:
+        - {nats_wss_host}
+      annotations:
+        # OKD HAProxy edge-terminates TLS — the chart's default Route
+        # generation needs `route.openshift.io/termination: edge` so
+        # the Route's TLS block is "edge", matching the cluster's wildcard
+        # cert behavior. Switch to `reencrypt` if you need TLS all the
+        # way to the NATS pod.
+        route.openshift.io/termination: edge
+        haproxy.router.openshift.io/timeout: "1h"
+  merge:
+    {auth_callout_indented}
+    accounts:
+      {nats_account}:
+        jetstream: enabled
+        users:
+          - user: "{auth_user}"
+            password: "{auth_pass}"
+      SYS:
+        users:
+          - user: "{sys_user}"
+            password: "{sys_pass}"
+    system_account: SYS
+service:
+  ports:
+    nats:
+      enabled: true
+"#,
+        nats_release = NATS_RELEASE,
+        nats_wss_host = domain.nats_wss_host(),
+        nats_account = NATS_ACCOUNT,
+        auth_user = NATS_AUTH_USER,
+        auth_pass = nats_auth_pass,
+        sys_user = NATS_SYSTEM_USER,
+        sys_pass = nats_system_pass,
+    )
+}
+
+// ---- readiness -------------------------------------------------------------
+
+async fn wait_for_zitadel_ready(domain: &FleetDomainConfig) -> Result<()> {
+    let issuer = domain.zitadel_issuer_url();
+    let well_known = format!("{issuer}/.well-known/openid-configuration");
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(5))
+        .build()?;
+    for attempt in 1..=180 {
+        match client.get(&well_known).send().await {
+            Ok(r) if r.status().is_success() => return Ok(()),
+            Ok(r) if attempt % 30 == 0 => {
+                info!("Zitadel HTTPS {} (attempt {attempt}/180)", r.status());
+            }
+            Err(e) if attempt % 30 == 0 => {
+                info!("Zitadel unreachable: {e} (attempt {attempt}/180)");
+            }
+            _ => {}
+        }
+        tokio::time::sleep(Duration::from_secs(2)).await;
+    }
+    anyhow::bail!("timed out waiting for Zitadel at {well_known}")
+}
+
+// ---- helpful printout ------------------------------------------------------
+
+impl StagingHandles {
+    /// Print the operator's "what to do next" panel after a successful
+    /// staging deploy. Pasted at the end of the binary's run.
+    pub fn print_next_steps(&self) {
+        let zitadel = self.domain.zitadel_issuer_url();
+        let nats = self.domain.nats_wss_url();
+        println!();
+        println!("============================================================");
+        println!(" STAGING DEPLOY COMPLETE");
+        println!("============================================================");
+        println!(" Base domain:      {}", self.domain.base_domain);
+        println!(" Zitadel:          {zitadel}");
+        println!(" NATS (WSS):       {nats}");
+        println!(" Project ID:       {}", self.project_id);
+        println!(" Callout image:    {}", self.callout_image);
+        println!(" Issuer pubkey:    {}", self.issuer_pubkey);
+        if let Some(cid) = &self.cli_client_id {
+            println!(" CLI client_id:    {cid}");
+            println!();
+            println!(" CLI SSO login (developer-side):");
+            println!();
+            println!("   cargo run -p example-fleet-sso-login -- \\");
+            println!("     --base-domain {} \\", self.domain.base_domain);
+            println!("     --client-id {cid}");
+        }
+        println!();
+        println!(" Onboard a Pi:");
+        println!();
+        println!("   PAT=$(kubectl -n zitadel get secret iam-admin-pat \\");
+        println!("       -o jsonpath='{{.data.pat}}' | base64 -d)");
+        println!();
+        println!("   cargo run -p example-fleet-rpi-setup -- \\");
+        println!("     --pi-host <PI_IP> \\");
+        println!("     --bootstrap-token \"$PAT\" \\");
+        println!("     --zitadel-issuer-url {zitadel} \\");
+        println!("     --zitadel-project-id {} \\", self.project_id);
+        println!("     --nats-url {nats} \\");
+        println!("     --agent-binary <path-to-aarch64-fleet-agent>");
+        println!();
+        println!("============================================================");
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn domain_config_derives_hostnames() {
+        let d = FleetDomainConfig::new("customer1.nationtech.io");
+        assert_eq!(d.zitadel_host(), "zitadel.customer1.nationtech.io");
+        assert_eq!(d.nats_wss_host(), "nats.customer1.nationtech.io");
+        assert_eq!(
+            d.zitadel_issuer_url(),
+            "https://zitadel.customer1.nationtech.io"
+        );
+        assert_eq!(d.nats_wss_url(), "wss://nats.customer1.nationtech.io/");
+    }
+
+    #[test]
+    fn nats_values_render_includes_wss_ingress_and_auth_callout() {
+        let d = FleetDomainConfig::new("acme.io");
+        let yaml = render_nats_values(&d, "ABCDEF", "auth-pass", "sys-pass");
+        // WSS plumbing.
+        assert!(yaml.contains("websocket:"));
+        assert!(yaml.contains("port: 8443"));
+        assert!(yaml.contains("nats.acme.io"));
+        // OKD edge-TLS annotations.
+        assert!(yaml.contains("openshift-default"));
+        assert!(yaml.contains("route.openshift.io/termination: edge"));
+        // Auth callout wired through with the issuer pubkey.
+        assert!(yaml.contains("auth_callout"));
+        assert!(yaml.contains("issuer: ABCDEF"));
+        assert!(yaml.contains("auth_users: [ auth ]"));
+        assert!(yaml.contains("system_account: SYS"));
+        // Account user.
+        assert!(yaml.contains("password: \"auth-pass\""));
+    }
+
+    #[test]
+    fn nats_values_inline_account_block_under_merge() {
+        // Prevent regressions where the auth_callout block leaks
+        // outside the `merge:` indentation level — chart expects it
+        // under config.merge.
+        let d = FleetDomainConfig::new("x.io");
+        let yaml = render_nats_values(&d, "K", "p", "s");
+        let idx_merge = yaml.find("\n  merge:\n").expect("merge block present");
+        let idx_callout = yaml.find("auth_callout:").expect("auth_callout present");
+        assert!(idx_callout > idx_merge, "auth_callout must follow merge:");
+    }
+}
--- a/examples/fleet_staging_deploy/src/main.rs
+++ b/examples/fleet_staging_deploy/src/main.rs
@@ -0,0 +1,71 @@
+//! `cargo run -p example-fleet-staging-deploy -- --base-domain customer1.nationtech.io ...`
+//!
+//! Operator-side, run-once-per-customer-instance harness. Brings up
+//! the central fleet platform services (Zitadel + NATS + auth callout)
+//! against an OKD/K8s cluster pointed to by `KUBECONFIG`. Prints the
+//! exact follow-up command the operator runs against a Pi to onboard
+//! the first device.
+//!
+//! See `src/lib.rs` for the architectural notes.
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use example_fleet_staging_deploy::{FleetDomainConfig, StagingDeployOpts, bring_up_staging};
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "fleet-staging-deploy",
+    about = "Deploy Zitadel + NATS + auth callout onto an OKD cluster"
+)]
+struct Cli {
+    /// Base DNS domain. All cluster-visible services derive from this:
+    /// `zitadel.<base>`, `nats.<base>`. The customer's wildcard cert /
+    /// CoreDNS / DNS provider must already point this at the cluster.
+    #[arg(long, env = "FLEET_BASE_DOMAIN")]
+    base_domain: String,
+    /// kubeconfig context to deploy against. Defaults to the
+    /// kubeconfig's current-context. Set this when your kubeconfig
+    /// has multiple contexts and you don't want to rely on the
+    /// global current.
+    #[arg(long, env = "FLEET_KUBE_CONTEXT")]
+    kube_context: Option<String>,
+    /// Container image reference for the harmony-nats-callout binary.
+    /// The cluster pulls this; operator must have pushed it before
+    /// running the deploy. Defaults to a quay.io path that the
+    /// customer should override per their registry.
+    #[arg(
+        long,
+        env = "FLEET_CALLOUT_IMAGE",
+        default_value = "quay.io/nationtech/harmony-nats-callout:demo"
+    )]
+    callout_image: String,
+    /// Password for the NATS service-account user the callout uses on
+    /// its own NATS connection. Stored in a K8s secret + listed in
+    /// the chart's `accounts.DEVICES.users` (which bypass callout —
+    /// otherwise the callout would deadlock authenticating itself).
+    #[arg(long, env = "FLEET_NATS_AUTH_PASS")]
+    nats_auth_pass: String,
+    /// Password for the NATS SYS account (used for nats-box debugging
+    /// inside the cluster).
+    #[arg(long, env = "FLEET_NATS_SYSTEM_PASS")]
+    nats_system_pass: String,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let cli = Cli::parse();
+    let domain = FleetDomainConfig::new(cli.base_domain);
+
+    let handles = bring_up_staging(StagingDeployOpts {
+        domain,
+        kubeconfig_context: cli.kube_context,
+        callout_image: cli.callout_image,
+        nats_auth_pass: cli.nats_auth_pass,
+        nats_system_pass: cli.nats_system_pass,
+    })
+    .await
+    .context("staging deploy")?;
+
+    handles.print_next_steps();
+    Ok(())
+}
--- a/examples/fleet_vm_setup/src/main.rs
+++ b/examples/fleet_vm_setup/src/main.rs
@@ -211,9 +211,14 @@ async fn main() -> Result<()> {
        device_id: device_id.clone(),
        labels,
        nats_urls: vec![cli.nats_url.clone()],
-        nats_user: cli.nats_user.clone(),
-        nats_pass: cli.nats_pass.clone(),
+        // VM smoke harness keeps shared-creds for v0; the customer-
+        // facing Pi flow uses Zitadel JWT (see fleet_rpi_setup).
+        auth: harmony::modules::fleet::FleetDeviceAuth::TomlShared {
+            nats_user: cli.nats_user.clone(),
+            nats_pass: cli.nats_pass.clone(),
+        },
        agent_binary_path: agent_binary,
+        hosts_entries: vec![],
    });

    run_setup_score(&setup_score, &linux_topology).await?;
--- a/examples/harmony_apply_deployment/src/main.rs
+++ b/examples/harmony_apply_deployment/src/main.rs
@@ -39,6 +39,7 @@
 use anyhow::{Context, Result};
 use clap::Parser;
 use harmony::modules::podman::{PodmanService, PodmanV0Score};
+use harmony::topology::{RestartPolicy, VolumeMount};
 use harmony_fleet_operator::crd::{
    Deployment, DeploymentSpec, Rollout, RolloutStrategy, ScorePayload,
 };
@@ -76,6 +77,16 @@ struct Cli {
    /// `host:container` port mapping exposed on the device.
    #[arg(long, default_value = "8080:80")]
    port: String,
+    /// Repeatable `KEY=VALUE` env var injected into the container.
+    #[arg(long = "env", value_name = "KEY=VALUE")]
+    envs: Vec<String>,
+    /// Repeatable bind-mount in `host_path:container_path[:ro]` form.
+    /// Append `:ro` for read-only.
+    #[arg(long = "volume", value_name = "HOST:CONTAINER[:ro]")]
+    volumes: Vec<String>,
+    /// Container restart policy.
+    #[arg(long, value_enum, default_value_t = CliRestart::UnlessStopped)]
+    restart: CliRestart,
    /// Delete the Deployment CR instead of applying it.
    #[arg(long)]
    delete: bool,
@@ -132,12 +143,69 @@ async fn main() -> Result<()> {
    Ok(())
 }

+/// Mirrors `harmony::topology::RestartPolicy` so we can keep the CLI
+/// schema stable even if the underlying enum gains variants.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
+enum CliRestart {
+    No,
+    UnlessStopped,
+    OnFailure,
+    Always,
+}
+
+impl From<CliRestart> for RestartPolicy {
+    fn from(c: CliRestart) -> Self {
+        match c {
+            CliRestart::No => RestartPolicy::No,
+            CliRestart::UnlessStopped => RestartPolicy::UnlessStopped,
+            CliRestart::OnFailure => RestartPolicy::OnFailure,
+            CliRestart::Always => RestartPolicy::Always,
+        }
+    }
+}
+
+fn parse_env(s: &str) -> Result<(String, String)> {
+    let (k, v) = s
+        .split_once('=')
+        .ok_or_else(|| anyhow::anyhow!("--env expects KEY=VALUE, got {s:?}"))?;
+    Ok((k.to_string(), v.to_string()))
+}
+
+fn parse_volume(s: &str) -> Result<VolumeMount> {
+    let parts: Vec<&str> = s.split(':').collect();
+    let (host, cont, ro) = match parts.as_slice() {
+        [host, cont] => (host, cont, false),
+        [host, cont, mode] if *mode == "ro" => (host, cont, true),
+        [host, cont, mode] if *mode == "rw" => (host, cont, false),
+        _ => anyhow::bail!("--volume expects HOST:CONTAINER[:ro|rw], got {s:?}"),
+    };
+    Ok(VolumeMount {
+        host_path: host.to_string(),
+        container_path: cont.to_string(),
+        read_only: ro,
+    })
+}
+
 fn build_cr(cli: &Cli) -> Deployment {
+    let env: Vec<(String, String)> = cli
+        .envs
+        .iter()
+        .map(|s| parse_env(s).expect("--env validated"))
+        .collect();
+    let volumes: Vec<VolumeMount> = cli
+        .volumes
+        .iter()
+        .map(|s| parse_volume(s).expect("--volume validated"))
+        .collect();
+
    let score = PodmanV0Score {
        services: vec![PodmanService {
            name: cli.name.clone(),
            image: cli.image.clone(),
            ports: vec![cli.port.clone()],
+            env,
+            volumes,
+            restart_policy: cli.restart.into(),
        }],
    };

--- a/examples/harmony_host_discovery/Cargo.toml
+++ b/examples/harmony_host_discovery/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "harmony_host_discovery"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony_cli = { path = "../../harmony_cli" }
+harmony_macros = { path = "../../harmony_macros" }
+harmony_types = { path = "../../harmony_types" }
+tokio.workspace = true
+url.workspace = true
+cidr.workspace = true
--- a/examples/harmony_host_discovery/env.sh
+++ b/examples/harmony_host_discovery/env.sh
@@ -0,0 +1,4 @@
+export HARMONY_SECRET_NAMESPACE=host-discovery
+export HARMONY_SECRET_STORE=file
+export HARMONY_DATABASE_URL=sqlite://harmony_host_discovery.sqlite
+export RUST_LOG=harmony=debug
--- a/examples/harmony_host_discovery/src/main.rs
+++ b/examples/harmony_host_discovery/src/main.rs
@@ -0,0 +1,27 @@
+use harmony::{
+    inventory::{HostRole, Inventory},
+    modules::inventory::{DiscoverHostForRoleScore, HarmonyDiscoveryStrategy},
+    topology::LocalhostTopology,
+};
+use harmony_macros::cidrv4;
+
+#[tokio::main]
+async fn main() {
+    let discover_one_host = DiscoverHostForRoleScore {
+        role: HostRole::Worker,
+        number_desired_hosts: 1,
+        discovery_strategy: HarmonyDiscoveryStrategy::SUBNET {
+            cidr: cidrv4!("192.168.40.0/24"),
+            port: 25000,
+        },
+    };
+
+    harmony_cli::run(
+        Inventory::autoload(),
+        LocalhostTopology::new(),
+        vec![Box::new(discover_one_host)],
+        None,
+    )
+    .await
+    .unwrap();
+}
--- a/examples/harmony_sso/src/main.rs
+++ b/examples/harmony_sso/src/main.rs
@@ -118,6 +118,7 @@ async fn deploy_zitadel(k3d: &K3d) -> anyhow::Result<()> {
        host: ZITADEL_HOST.to_string(),
        zitadel_version: "v4.12.1".to_string(),
        external_secure: false,
+        external_port: None,
    };

    let topology = create_topology(k3d);
@@ -301,6 +302,8 @@ async fn main() -> anyhow::Result<()> {
            app_name: APP_NAME.to_string(),
            app_type: ZitadelAppType::DeviceCode,
        }],
+        api_apps: vec![],
+        roles: vec![],
        machine_users: vec![],
    }
    .interpret(&Inventory::autoload(), &topology)
--- a/examples/okd_ceph_alerts/Cargo.toml
+++ b/examples/okd_ceph_alerts/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "example-okd-ceph-alerts"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+publish = false
+
+[dependencies]
+harmony = { path = "../../harmony" }
+harmony_cli = { path = "../../harmony_cli" }
+harmony_types = { path = "../../harmony_types" }
+tokio = { workspace = true }
+log = { workspace = true }
--- a/examples/okd_ceph_alerts/env.sh
+++ b/examples/okd_ceph_alerts/env.sh
@@ -0,0 +1,4 @@
+export HARMONY_SECRET_NAMESPACE=okd_ceph_alerts_example
+export HARMONY_SECRET_STORE=file
+export HARMONY_DATABASE_URL=sqlite://harmony_okd_ceph_alerts_example.sqlite
+export RUST_LOG=harmony=debug
--- a/examples/okd_ceph_alerts/src/main.rs
+++ b/examples/okd_ceph_alerts/src/main.rs
@@ -0,0 +1,28 @@
+use harmony::{
+    inventory::Inventory,
+    modules::monitoring::{
+        ceph_alerts::ceph_alert_rule_groups, okd::cluster_alert_rules::OpenshiftPrometheusRuleScore,
+    },
+    topology::K8sAnywhereTopology,
+};
+
+#[tokio::main]
+async fn main() {
+    harmony_cli::cli_logger::init();
+
+    let ceph_rules = OpenshiftPrometheusRuleScore {
+        namespace: "rook-ceph".to_string(),
+        name: "ceph-alerts".to_string(),
+        rule_groups: ceph_alert_rule_groups(),
+        labels: None,
+    };
+
+    harmony_cli::run(
+        Inventory::autoload(),
+        K8sAnywhereTopology::from_env(),
+        vec![Box::new(ceph_rules)],
+        None,
+    )
+    .await
+    .unwrap();
+}
--- a/examples/zitadel/src/main.rs
+++ b/examples/zitadel/src/main.rs
@@ -8,6 +8,7 @@ async fn main() {
        host: "sso.sto1.nationtech.io".to_string(),
        zitadel_version: "v4.12.1".to_string(),
        external_secure: true,
+        external_port: None,
    };

    harmony_cli::run(
--- a/fleet/harmony-fleet-agent/Cargo.toml
+++ b/fleet/harmony-fleet-agent/Cargo.toml
@@ -5,9 +5,11 @@ edition = "2024"
 rust-version = "1.85"

 [dependencies]
+harmony-fleet-auth = { path = "../harmony-fleet-auth" }
 harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
 harmony = { path = "../../harmony", default-features = false, features = ["podman"] }
 async-nats = { workspace = true }
+async-trait = { workspace = true }
 chrono = { workspace = true }
 futures-util = { workspace = true }
 serde = { workspace = true }
@@ -17,4 +19,4 @@ tracing = { workspace = true }
 tracing-subscriber = { workspace = true }
 anyhow = { workspace = true }
 clap = { workspace = true }
-toml = { workspace = true }
+toml = { workspace = true }
--- a/fleet/harmony-fleet-agent/src/config.rs
+++ b/fleet/harmony-fleet-agent/src/config.rs
@@ -3,6 +3,11 @@ use serde::Deserialize;
 use std::collections::BTreeMap;
 use std::path::Path;

+// Re-export the shared credential types so existing call sites keep
+// working with `crate::config::CredentialsSection`. The struct itself
+// lives in `harmony_fleet_auth` and is shared with the operator.
+pub use harmony_fleet_auth::CredentialsSection;
+
 #[derive(Debug, Clone, Deserialize)]
 pub struct AgentConfig {
    pub agent: AgentSection,
@@ -30,49 +35,6 @@ pub struct NatsSection {
    pub urls: Vec<String>,
 }

-#[derive(Debug, Clone, Deserialize)]
-pub struct CredentialsSection {
-    #[serde(rename = "type")]
-    pub source_type: String,
-    pub nats_user: Option<String>,
-    pub nats_pass: Option<String>,
-}
-
-pub trait CredentialSource: Send + Sync {
-    fn nats_credentials(&self) -> anyhow::Result<(String, String)>;
-}
-
-pub struct TomlFileCredentialSource<'a> {
-    config: &'a AgentConfig,
-}
-
-impl<'a> TomlFileCredentialSource<'a> {
-    pub fn new(config: &'a AgentConfig) -> Self {
-        Self { config }
-    }
-}
-
-impl CredentialSource for TomlFileCredentialSource<'_> {
-    fn nats_credentials(&self) -> anyhow::Result<(String, String)> {
-        let creds = &self.config.credentials;
-        if creds.source_type != "toml-shared" {
-            anyhow::bail!(
-                "unsupported credentials.type '{}' (v0 only supports 'toml-shared')",
-                creds.source_type
-            );
-        }
-        let user = creds
-            .nats_user
-            .as_deref()
-            .ok_or_else(|| anyhow::anyhow!("missing nats_user in credentials"))?;
-        let pass = creds
-            .nats_pass
-            .as_deref()
-            .ok_or_else(|| anyhow::anyhow!("missing nats_pass in credentials"))?;
-        Ok((user.to_string(), pass.to_string()))
-    }
-}
-
 pub fn load_config(path: &Path) -> anyhow::Result<AgentConfig> {
    let content = std::fs::read_to_string(path)?;
    let config: AgentConfig = toml::from_str(&content)?;
@@ -84,7 +46,7 @@ mod tests {
    use super::*;

    #[test]
-    fn parses_config_with_labels_section() {
+    fn parses_toml_shared_credentials() {
        let raw = r#"
 [agent]
 device_id = "pi-42"
@@ -103,7 +65,16 @@ arch = "aarch64"
 "#;
        let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
        assert_eq!(cfg.labels.get("group"), Some(&"site-a".to_string()));
-        assert_eq!(cfg.labels.get("arch"), Some(&"aarch64".to_string()));
+        match &cfg.credentials {
+            CredentialsSection::TomlShared {
+                nats_user,
+                nats_pass,
+            } => {
+                assert_eq!(nats_user, "u");
+                assert_eq!(nats_pass, "p");
+            }
+            _ => panic!("expected TomlShared"),
+        }
    }

    #[test]
--- a/fleet/harmony-fleet-agent/src/fleet_publisher.rs
+++ b/fleet/harmony-fleet-agent/src/fleet_publisher.rs
@@ -17,6 +17,11 @@ use std::collections::BTreeMap;

 pub struct FleetPublisher {
    device_id: Id,
+    /// Raw NATS client kept around so we can publish on direct
+    /// (non-JetStream) subjects like `device-state.<device_id>` for
+    /// live observers — the KV writes are storage-and-watch, the
+    /// direct subject is fan-out.
+    client: async_nats::Client,
    info_bucket: kv::Store,
    state_bucket: kv::Store,
    heartbeat_bucket: kv::Store,
@@ -26,11 +31,13 @@ impl FleetPublisher {
    /// Open every bucket the agent needs, creating those that don't
    /// exist yet. Idempotent with operator-side creation.
    pub async fn connect(client: async_nats::Client, device_id: Id) -> anyhow::Result<Self> {
-        let jetstream = jetstream::new(client);
+        let jetstream = jetstream::new(client.clone());

        let info_bucket = jetstream
            .create_key_value(kv::Config {
                bucket: BUCKET_DEVICE_INFO.to_string(),
+                // If this is as I think, it would be useful to keep a history of the last 10 device
+                // info, with a timestamp
                history: 1,
                ..Default::default()
            })
@@ -38,6 +45,8 @@ impl FleetPublisher {
        let state_bucket = jetstream
            .create_key_value(kv::Config {
                bucket: BUCKET_DEVICE_STATE.to_string(),
+                // If this is as I think, it would be useful to keep a history of the last 10 states
+                // a device had, with a timestamp
                history: 1,
                ..Default::default()
            })
@@ -52,6 +61,7 @@ impl FleetPublisher {

        Ok(Self {
            device_id,
+            client,
            info_bucket,
            state_bucket,
            heartbeat_bucket,
@@ -102,18 +112,45 @@ impl FleetPublisher {
    /// Persist the authoritative current phase for a `(device,
    /// deployment)` pair. The operator's watch on the `device-state`
    /// bucket picks up this put and updates CR status counters.
+    /// Also fans out the same payload on `device-state.<device_id>`
+    /// for live observers that don't want to consume the KV stream.
    pub async fn write_deployment_state(&self, state: &DeploymentState) {
        let key = device_state_key(&self.device_id.to_string(), &state.deployment);
        match serde_json::to_vec(state) {
            Ok(payload) => {
-                if let Err(e) = self.state_bucket.put(&key, payload.into()).await {
+                if let Err(e) = self.state_bucket.put(&key, payload.clone().into()).await {
                    tracing::warn!(%key, error = %e, "write_deployment_state: kv put failed");
                }
+                self.publish_direct_state(payload).await;
            }
            Err(e) => tracing::warn!(error = %e, "write_deployment_state: serialize failed"),
        }
    }

+    /// Emit a tiny presence pulse on `device-state.<device_id>` so live
+    /// observers (admin tooling, dashboards) see the device is alive
+    /// without subscribing to JetStream. Called from the heartbeat
+    /// loop alongside the KV heartbeat write — same cadence, two
+    /// transports.
+    pub async fn publish_state_pulse(&self) {
+        let pulse = serde_json::json!({
+            "device_id": self.device_id.to_string(),
+            "kind": "heartbeat",
+            "at": chrono::Utc::now(),
+        });
+        match serde_json::to_vec(&pulse) {
+            Ok(payload) => self.publish_direct_state(payload).await,
+            Err(e) => tracing::warn!(error = %e, "publish_state_pulse: serialize failed"),
+        }
+    }
+
+    async fn publish_direct_state(&self, payload: Vec<u8>) {
+        let subject = format!("device-state.{}", self.device_id);
+        if let Err(e) = self.client.publish(subject.clone(), payload.into()).await {
+            tracing::debug!(%subject, error = %e, "publish_direct_state: publish failed");
+        }
+    }
+
    /// Delete the authoritative current-phase entry, e.g. when the
    /// Deployment CR is removed and the agent has torn down the
    /// container.
--- a/fleet/harmony-fleet-agent/src/main.rs
+++ b/fleet/harmony-fleet-agent/src/main.rs
@@ -5,9 +5,15 @@ mod reconciler;
 use std::sync::Arc;
 use std::time::Duration;

-use anyhow::{Context, Result};
+use anyhow::{Context, Error, Result};
 use clap::Parser;
-use config::{AgentConfig, CredentialSource, TomlFileCredentialSource};
+use config::AgentConfig;
+use harmony_fleet_auth::{
+    CredentialSource, connect_options_with_credentials, credential_source_from_config,
+};
+// Type alias to keep function signatures readable. The auth callback
+// captures one `Arc<CredentialSource>` and clones it per invocation.
+type Creds = Arc<CredentialSource>;
 use futures_util::StreamExt;
 use harmony_reconciler_contracts::{BUCKET_DESIRED_STATE, Id, InventorySnapshot};

@@ -28,15 +34,41 @@ struct Cli {
    #[arg(
        long,
        env = "FLEET_AGENT_CONFIG",
+        // FIXME this should be a constant from a config, not just hardcoded here as we need the
+        // installation scripts and other bits to know about this file location.
        default_value = "/etc/fleet-agent/config.toml"
    )]
    config: std::path::PathBuf,
 }

-async fn connect_nats(cfg: &AgentConfig) -> Result<async_nats::Client> {
-    let (user, pass) = TomlFileCredentialSource::new(cfg).nats_credentials()?;
-    let client = async_nats::ConnectOptions::with_user_and_password(user, pass)
+async fn connect_nats(cfg: &AgentConfig, creds: Creds) -> Result<async_nats::Client> {
+    let urls = &cfg.nats.urls;
+    tracing::info!(device_id = %cfg.agent.device_id, "connecting to NATS {urls:?}");
+    // The auth callback is invoked on every (re)connect, so a fresh
+    // Zitadel access token is minted automatically when the cached one
+    // is near-expiry — that's how we hold the "never lose connectivity"
+    // guarantee even across token rollovers and NATS pod restarts.
+    let client = connect_options_with_credentials(creds)
        .ping_interval(Duration::from_secs(10))
+        // Surface async-nats's connection lifecycle in our logs. This
+        // is load-bearing for ops: a device that quietly disconnects
+        // is exactly the failure mode we promise won't happen, and
+        // operators need to see the reconnect attempts to debug.
+        .event_callback(|event| async move {
+            use async_nats::Event;
+            match event {
+                Event::Connected => tracing::info!("NATS connected"),
+                Event::Disconnected => tracing::warn!("NATS disconnected, will reconnect"),
+                Event::LameDuckMode => tracing::warn!("NATS server entered lame-duck mode"),
+                Event::SlowConsumer(sid) => {
+                    tracing::warn!(sid = %sid, "NATS slow consumer")
+                }
+                Event::ServerError(e) => tracing::error!(error = %e, "NATS server error"),
+                Event::ClientError(e) => tracing::error!(error = %e, "NATS client error"),
+                Event::Closed => tracing::error!("NATS connection closed"),
+                other => tracing::debug!(?other, "NATS event"),
+            }
+        })
        .connect(cfg.nats.urls.as_slice())
        .await?;
    tracing::info!(urls = ?cfg.nats.urls, "connected to NATS");
@@ -68,6 +100,9 @@ async fn watch_desired_state(
                continue;
            }
        };
+
+        tracing::debug!(key = %entry.key, "bucket watch new value {entry:?}");
+
        match entry.operation {
            async_nats::jetstream::kv::Operation::Put => {
                if let Err(e) = reconciler.apply(&entry.key, &entry.value).await {
@@ -86,20 +121,27 @@ async fn watch_desired_state(
 }

 /// Tiny liveness-only loop: push a `HeartbeatPayload` into the
-/// `device-heartbeat` bucket every N seconds. Stays separate from
-/// per-deployment state writes so routine pings don't churn the
-/// device-state bucket or its watch subscribers.
+/// `device-heartbeat` bucket every N seconds, and fan out the same
+/// pulse on `device-state.<device_id>` for live (non-JetStream)
+/// observers. Stays separate from per-deployment state writes so
+/// routine pings don't churn the device-state bucket or its watch
+/// subscribers — but the direct-subject pulse uses ordinary core
+/// NATS pub/sub and doesn't accumulate state anywhere.
 async fn publish_heartbeat_loop(fleet: Arc<FleetPublisher>) {
    let mut interval = tokio::time::interval(Duration::from_secs(30));
    interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
    loop {
        interval.tick().await;
        fleet.publish_heartbeat().await;
+        fleet.publish_state_pulse().await;
    }
 }

 /// Build a one-shot inventory snapshot at agent startup. Cheap,
 /// published alongside every heartbeat until the agent restarts.
+/// NOTE: I don't see why this is *published* with every heartbeat, it feels like noise.
+/// It shoulf be published on heartbeat only when something changed. It is ok to *check* the state
+/// on heartbeat but not always send it over the wire
 fn local_inventory(inventory: &Inventory) -> InventorySnapshot {
    InventorySnapshot {
        hostname: inventory.location.name.clone(),
@@ -156,7 +198,14 @@ async fn main() -> Result<()> {
    tracing::info!(hostname = %inventory.location.name, "inventory loaded");
    let inventory_snapshot = local_inventory(&inventory);

-    let client = connect_nats(&cfg).await?;
+    let creds = credential_source_from_config(&cfg.credentials)
+        .context("building NATS credential source")?;
+
+    let client = connect_nats(&cfg, creds).await.map_err(|e| {
+        let msg = format!("Nats connection FAILED : {e}");
+        tracing::error!(msg);
+        Error::msg(msg)
+    })?;

    // Publish surface. Opens the three KV buckets (idempotent
    // creates). Must be live before the reconciler starts so
--- a/fleet/harmony-fleet-agent/src/reconciler.rs
+++ b/fleet/harmony-fleet-agent/src/reconciler.rs
@@ -33,7 +33,10 @@ pub struct Reconciler {
    state: Mutex<HashMap<String, CachedEntry>>,
    /// Current phase per deployment, used to decide whether a new
    /// write to the `device-state` KV is needed.
-    phases: Mutex<HashMap<DeploymentName, Phase>>,
+    ///
+    /// NOTE : this feels dangerous, conflict on deployment name could be a problem
+    /// We must explore this and clarify it in the design and decide if it is a constraint
+    deployments: Mutex<HashMap<DeploymentName, Phase>>,
    /// Publish surface. Optional so unit tests without a live NATS
    /// client still work; always populated in the real agent runtime.
    fleet: Option<Arc<FleetPublisher>>,
@@ -51,7 +54,7 @@ impl Reconciler {
            topology,
            inventory,
            state: Mutex::new(HashMap::new()),
-            phases: Mutex::new(HashMap::new()),
+            deployments: Mutex::new(HashMap::new()),
            fleet,
        }
    }
@@ -67,7 +70,9 @@ impl Reconciler {
        last_error: Option<String>,
    ) {
        {
-            let mut phases = self.phases.lock().await;
+            let mut phases = self.deployments.lock().await;
+            // performance nitpick : we don't need a write lock here, we could check before acquiring the write
+            // lock
            if phases.get(deployment).copied() == Some(phase) {
                return;
            }
@@ -91,7 +96,7 @@ impl Reconciler {
    /// a no-op in memory and a harmless tombstone write on the wire.
    async fn drop_phase(&self, deployment: &DeploymentName) {
        let was_known = {
-            let mut phases = self.phases.lock().await;
+            let mut phases = self.deployments.lock().await;
            phases.remove(deployment).is_some()
        };
        if !was_known {
@@ -301,7 +306,7 @@ mod tests {
    async fn apply_phase_records_new_phase() {
        let r = reconciler();
        r.apply_phase(&dn("hello"), Phase::Running, None).await;
-        let phases = r.phases.lock().await;
+        let phases = r.deployments.lock().await;
        assert_eq!(phases.get(&dn("hello")), Some(&Phase::Running));
    }

@@ -310,7 +315,7 @@ mod tests {
        let r = reconciler();
        r.apply_phase(&dn("hello"), Phase::Running, None).await;
        r.apply_phase(&dn("hello"), Phase::Running, None).await;
-        let phases = r.phases.lock().await;
+        let phases = r.deployments.lock().await;
        assert_eq!(phases.len(), 1);
    }

@@ -321,7 +326,7 @@ mod tests {
        r.apply_phase(&dn("hello"), Phase::Running, None).await;
        r.apply_phase(&dn("hello"), Phase::Failed, Some("oom".to_string()))
            .await;
-        let phases = r.phases.lock().await;
+        let phases = r.deployments.lock().await;
        assert_eq!(phases.get(&dn("hello")), Some(&Phase::Failed));
    }

@@ -330,7 +335,7 @@ mod tests {
        let r = reconciler();
        r.apply_phase(&dn("hello"), Phase::Running, None).await;
        r.drop_phase(&dn("hello")).await;
-        let phases = r.phases.lock().await;
+        let phases = r.deployments.lock().await;
        assert!(!phases.contains_key(&dn("hello")));
    }

@@ -338,7 +343,7 @@ mod tests {
    async fn drop_phase_on_unknown_deployment_is_noop() {
        let r = reconciler();
        r.drop_phase(&dn("never-existed")).await;
-        let phases = r.phases.lock().await;
+        let phases = r.deployments.lock().await;
        assert!(phases.is_empty());
    }
 }
--- a/fleet/harmony-fleet-auth/Cargo.toml
+++ b/fleet/harmony-fleet-auth/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "harmony-fleet-auth"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "Shared NATS credential plumbing for the fleet agent + operator (Zitadel JWT-bearer + dev-only username/password)"
+
+[lib]
+path = "src/lib.rs"
+
+[dependencies]
+async-nats = { workspace = true }
+anyhow = { workspace = true }
+chrono = { workspace = true }
+jsonwebtoken = "9"
+reqwest = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+tokio = { workspace = true, features = ["sync"] }
+tracing = { workspace = true }
+serde_json = { workspace = true }
+
+[dev-dependencies]
+toml = { workspace = true }
+tokio = { workspace = true, features = ["macros", "rt"] }
--- a/fleet/harmony-fleet-auth/src/config.rs
+++ b/fleet/harmony-fleet-auth/src/config.rs
@@ -0,0 +1,133 @@
+use serde::Deserialize;
+use std::path::PathBuf;
+
+/// Externally-tagged credential definition shared between the fleet
+/// agent and the fleet operator. The `type` field selects the variant;
+/// each variant's other fields are flatly mixed into the
+/// `[credentials]` TOML table for human-friendly editing.
+///
+/// **Why one struct for both processes**: the agent reads this from
+/// `/etc/fleet-agent/config.toml`; the operator reads it from a single
+/// env var (`FLEET_OPERATOR_CREDENTIALS_TOML`) whose value is a TOML
+/// snippet shaped exactly like the `[credentials]` table. Identical
+/// deserialization, identical downstream code path. The only thing
+/// that differs is the byte source.
+///
+/// Adding a new mode is additive — emit `type = "<new>"` from the
+/// installer side, decode here, instantiate the matching
+/// `CredentialSource`.
+#[derive(Debug, Clone, Deserialize)]
+#[serde(tag = "type", rename_all = "kebab-case")]
+pub enum CredentialsSection {
+    /// Shared username + password baked into the agent config. Only
+    /// suitable for v0/development scenarios where every device shares
+    /// a single NATS account user. Not used in production.
+    TomlShared {
+        nats_user: String,
+        nats_pass: String,
+    },
+    /// Per-device Zitadel machine-user JWT-bearer (RFC 7523) flow. The
+    /// keyfile at `key_path` is the only durable secret on the device —
+    /// the access token is short-lived and re-minted before expiry by
+    /// the auth callback registered on each NATS (re)connect.
+    ZitadelJwt {
+        /// Path to the machine-user JSON key file Zitadel emits for
+        /// `KEY_TYPE_JSON`. Defaults to
+        /// `/etc/fleet-agent/zitadel-key.json` for the agent; the
+        /// operator's deploy mounts the keyfile at a path it sets
+        /// explicitly in the env-var TOML.
+        #[serde(default = "default_zitadel_key_path")]
+        key_path: PathBuf,
+        /// Externally-visible Zitadel issuer URL — must match Zitadel's
+        /// emitted `iss` claim exactly (including port if non-default).
+        oidc_issuer_url: String,
+        /// `aud` value for token-bearer requests. Typically the Zitadel
+        /// project ID (the auth callout side validates against this).
+        audience: String,
+        /// Whether the HTTP client accepts invalid TLS certs. Local-dev
+        /// escape hatch for self-signed staging Zitadels.
+        #[serde(default)]
+        danger_accept_invalid_certs: bool,
+    },
+}
+
+fn default_zitadel_key_path() -> PathBuf {
+    PathBuf::from("/etc/fleet-agent/zitadel-key.json")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn parse(raw: &str) -> CredentialsSection {
+        toml::from_str(raw).expect("valid credentials TOML")
+    }
+
+    #[test]
+    fn parses_toml_shared() {
+        let cs = parse(
+            r#"
+type = "toml-shared"
+nats_user = "u"
+nats_pass = "p"
+"#,
+        );
+        match cs {
+            CredentialsSection::TomlShared {
+                nats_user,
+                nats_pass,
+            } => {
+                assert_eq!(nats_user, "u");
+                assert_eq!(nats_pass, "p");
+            }
+            _ => panic!("expected TomlShared"),
+        }
+    }
+
+    #[test]
+    fn parses_zitadel_jwt() {
+        let cs = parse(
+            r#"
+type = "zitadel-jwt"
+key_path = "/var/lib/fleet-agent/zitadel-key.json"
+oidc_issuer_url = "https://zitadel.staging.example.com"
+audience = "366378028009259037"
+danger_accept_invalid_certs = false
+"#,
+        );
+        match cs {
+            CredentialsSection::ZitadelJwt {
+                key_path,
+                oidc_issuer_url,
+                audience,
+                danger_accept_invalid_certs,
+            } => {
+                assert_eq!(
+                    key_path.to_str(),
+                    Some("/var/lib/fleet-agent/zitadel-key.json")
+                );
+                assert_eq!(oidc_issuer_url, "https://zitadel.staging.example.com");
+                assert_eq!(audience, "366378028009259037");
+                assert!(!danger_accept_invalid_certs);
+            }
+            _ => panic!("expected ZitadelJwt"),
+        }
+    }
+
+    #[test]
+    fn zitadel_jwt_key_path_defaults_when_omitted() {
+        let cs = parse(
+            r#"
+type = "zitadel-jwt"
+oidc_issuer_url = "https://zitadel.staging.example.com"
+audience = "366378028009259037"
+"#,
+        );
+        match cs {
+            CredentialsSection::ZitadelJwt { key_path, .. } => {
+                assert_eq!(key_path.to_str(), Some("/etc/fleet-agent/zitadel-key.json"));
+            }
+            _ => panic!("expected ZitadelJwt"),
+        }
+    }
+}
--- a/fleet/harmony-fleet-auth/src/credentials.rs
+++ b/fleet/harmony-fleet-auth/src/credentials.rs
@@ -0,0 +1,536 @@
+//! NATS credential sources for fleet processes (agent + operator).
+//!
+//! `CredentialSource::next_credential()` is invoked from async-nats's
+//! `with_auth_callback` on every (re)connect attempt — including the
+//! first connect. The callback shape means an expired token is
+//! automatically replaced when async-nats reconnects after a transient
+//! NATS outage / pod restart / network blip: the caller doesn't need
+//! a separate refresh task to "never lose connectivity."
+//!
+//! Two variants:
+//!
+//! - [`CredentialSource::TomlShared`] — username + password baked into
+//!   the config (v0/dev only).
+//! - [`CredentialSource::ZitadelJwt`] — Zitadel machine-user JWT-bearer
+//!   flow (RFC 7523). The keyfile is the only durable secret on the
+//!   process; the bearer token is short-lived and re-minted
+//!   transparently when a cached token is within 5 minutes of expiry.
+//!
+//! Modeled as an enum (rather than a `dyn Trait`) because async-nats's
+//! auth-callback bounds (`Future: Send + Sync`) are incompatible with
+//! `Pin<Box<dyn Future + Send>>` returned by an object-safe trait. Two
+//! variants is a small enough cardinality that enum dispatch is
+//! cleaner than a Trait + factory.
+
+use std::path::Path;
+use std::sync::{Arc, Mutex};
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use jsonwebtoken::{Algorithm, EncodingKey, Header as JwtHeader};
+use serde::Deserialize;
+
+use crate::config::CredentialsSection;
+
+/// Material the NATS connector needs to authenticate. Returned per
+/// (re)connect attempt — the source decides whether to mint fresh.
+#[derive(Debug, Clone)]
+pub enum NatsCredential {
+    UserPass { user: String, pass: String },
+    BearerToken(String),
+}
+
+/// Externally-tagged credential source. Constructed once at startup
+/// from the parsed `[credentials]` section; cloned via Arc into the
+/// async-nats auth callback.
+pub enum CredentialSource {
+    TomlShared {
+        user: String,
+        pass: String,
+    },
+    ZitadelJwt {
+        key: MachineKeyFile,
+        oidc_issuer_url: String,
+        audience: String,
+        http: reqwest::Client,
+        cache: Mutex<Option<CachedToken>>,
+    },
+}
+
+impl CredentialSource {
+    /// Return current valid credentials, minting fresh material when any
+    /// cached value is within its safety window of expiry. Called on
+    /// every NATS (re)connect.
+    pub async fn next_credential(&self) -> Result<NatsCredential> {
+        match self {
+            Self::TomlShared { user, pass } => Ok(NatsCredential::UserPass {
+                user: user.clone(),
+                pass: pass.clone(),
+            }),
+            Self::ZitadelJwt { .. } => self.zitadel_next().await,
+        }
+    }
+
+    async fn zitadel_next(&self) -> Result<NatsCredential> {
+        // Fast path: lock the cache synchronously, copy out the token if
+        // it's comfortably valid, drop the lock. Holding a MutexGuard
+        // across `.await` would make this future !Sync, which
+        // async-nats's `with_auth_callback` rejects at compile time.
+        if let Some(token) = self.cached_if_fresh() {
+            return Ok(NatsCredential::BearerToken(token));
+        }
+        // Slow path: mint outside any lock. Two concurrent (re)connect
+        // attempts could both reach here and both mint; that's a wasted
+        // HTTP round-trip in a rare race, not a correctness issue —
+        // the second writer wins and replaces the first's value.
+        let fresh = self.zitadel_mint().await?;
+        let token = fresh.access_token.clone();
+        if let Self::ZitadelJwt {
+            cache, audience, ..
+        } = self
+            && let Ok(mut guard) = cache.lock()
+        {
+            *guard = Some(fresh);
+            tracing::info!(audience = %audience, "minted fresh Zitadel access token");
+        }
+        Ok(NatsCredential::BearerToken(token))
+    }
+
+    fn cached_if_fresh(&self) -> Option<String> {
+        let Self::ZitadelJwt { cache, .. } = self else {
+            return None;
+        };
+        let now = chrono::Utc::now().timestamp();
+        let guard = cache.lock().ok()?;
+        let cached = guard.as_ref()?;
+        if cached.expires_at_unix - TOKEN_REFRESH_LEEWAY_SECS > now {
+            Some(cached.access_token.clone())
+        } else {
+            None
+        }
+    }
+
+    async fn zitadel_mint(&self) -> Result<CachedToken> {
+        let Self::ZitadelJwt {
+            key,
+            oidc_issuer_url,
+            audience,
+            http,
+            ..
+        } = self
+        else {
+            anyhow::bail!("zitadel_mint called on non-ZitadelJwt variant");
+        };
+
+        let now = chrono::Utc::now().timestamp();
+        let assertion = build_assertion(key, oidc_issuer_url, now)?;
+        let scope = build_scope(audience);
+        let token_url = build_token_url(oidc_issuer_url);
+
+        let resp = http
+            .post(&token_url)
+            .form(&[
+                (
+                    "grant_type",
+                    "urn:ietf:params:oauth:grant-type:jwt-bearer".to_string(),
+                ),
+                ("assertion", assertion),
+                ("scope", scope),
+            ])
+            .send()
+            .await
+            .with_context(|| format!("POST {token_url}"))?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            anyhow::bail!("Zitadel token endpoint returned {status}: {body}");
+        }
+
+        #[derive(Deserialize)]
+        struct TokenResponse {
+            access_token: String,
+            #[serde(default)]
+            expires_in: Option<i64>,
+        }
+        let tr: TokenResponse = resp.json().await.context("parsing token response")?;
+        // Zitadel typically returns 12h (43200s); be defensive against
+        // a missing field by assuming a conservative 1h.
+        let expires_in = tr.expires_in.unwrap_or(3600);
+        Ok(CachedToken {
+            access_token: tr.access_token,
+            expires_at_unix: now + expires_in,
+        })
+    }
+}
+
+/// Build the JWT-bearer assertion. Split out from the network path so
+/// the claims + header shape can be unit-tested without an HTTP server,
+/// and split internally into the (pure) claim/header builders so they
+/// can be unit-tested without an RSA private key fixture.
+pub(crate) fn build_assertion(
+    key: &MachineKeyFile,
+    oidc_issuer_url: &str,
+    now: i64,
+) -> Result<String> {
+    let claims = build_assertion_claims(key, oidc_issuer_url, now);
+    let header = build_assertion_header(key);
+    let assertion = jsonwebtoken::encode(
+        &header,
+        &claims,
+        &EncodingKey::from_rsa_pem(key.key.as_bytes())
+            .context("parsing RSA private key from machine key file")?,
+    )
+    .context("signing JWT assertion")?;
+    Ok(assertion)
+}
+
+/// Pure claim payload for the JWT-bearer assertion. `iss == sub == userId`
+/// is a Zitadel requirement; `aud` is Zitadel itself (the token endpoint
+/// is reached via `oidc_issuer_url`); `exp - iat` MUST be ≤ 60 s or
+/// Zitadel rejects.
+pub(crate) fn build_assertion_claims(
+    key: &MachineKeyFile,
+    oidc_issuer_url: &str,
+    now: i64,
+) -> serde_json::Value {
+    serde_json::json!({
+        "iss": key.user_id,
+        "sub": key.user_id,
+        "aud": oidc_issuer_url,
+        "exp": now + ASSERTION_LIFETIME_SECS,
+        "iat": now,
+    })
+}
+
+/// JWT header for the assertion. The `kid` tells Zitadel which of the
+/// machine user's registered keys to verify the signature against.
+pub(crate) fn build_assertion_header(key: &MachineKeyFile) -> JwtHeader {
+    let mut header = JwtHeader::new(Algorithm::RS256);
+    header.kid = Some(key.key_id.clone());
+    header
+}
+
+/// Build the OAuth `scope` string for the token-bearer request.
+///
+/// Three scopes are needed for the access token to be useful here:
+///
+///   * `openid` — base OIDC requirement.
+///   * `urn:zitadel:iam:org:projects:roles` (PLURAL "projects") —
+///     tells Zitadel to include the role-claim block in the access
+///     token. Without this, the callout sees "no authorized role
+///     in token" even when the user has a project role grant.
+///   * `urn:zitadel:iam:org:project:id:<aud>:aud` (SINGULAR
+///     "project") — adds <aud> to the access token's `aud` claim
+///     so the callout's audience validation accepts the project
+///     ID we're using as the JWT-bearer audience.
+///
+/// The plural-vs-singular distinction is a Zitadel convention,
+/// not a typo. Both scopes are required.
+pub(crate) fn build_scope(audience: &str) -> String {
+    format!(
+        "openid \
+         urn:zitadel:iam:org:projects:roles \
+         urn:zitadel:iam:org:project:id:{audience}:aud"
+    )
+}
+
+/// Resolve the token endpoint URL, tolerating a trailing slash on
+/// `oidc_issuer_url`. Without trimming, a configured issuer of
+/// `https://sso.example.com/` produces `…//oauth/v2/token` which 404s.
+pub(crate) fn build_token_url(oidc_issuer_url: &str) -> String {
+    format!("{}/oauth/v2/token", oidc_issuer_url.trim_end_matches('/'))
+}
+
+// ---- helper types ----------------------------------------------------------
+
+/// JSON keyfile content as Zitadel emits it for a `KEY_TYPE_JSON`
+/// machine key. The `key` is a PEM-encoded RSA private key.
+#[derive(Debug, Clone, Deserialize)]
+pub struct MachineKeyFile {
+    #[serde(rename = "type")]
+    pub _type: String,
+    #[serde(rename = "keyId")]
+    pub key_id: String,
+    pub key: String,
+    #[serde(rename = "userId")]
+    pub user_id: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct CachedToken {
+    pub(crate) access_token: String,
+    /// Unix seconds at which the token is no longer trusted by
+    /// `cached_if_fresh`. Computed from the OAuth response's `expires_in`
+    /// and the local clock at mint time.
+    pub(crate) expires_at_unix: i64,
+}
+
+/// Refresh tokens this many seconds before their advertised expiry.
+/// Five minutes leaves headroom for clock skew, slow networks, and
+/// the round-trip cost of re-minting against Zitadel.
+pub const TOKEN_REFRESH_LEEWAY_SECS: i64 = 5 * 60;
+
+/// Lifetime of the JWT *assertion* (the client-side bearer JWT we sign
+/// to authenticate to Zitadel's token endpoint). Zitadel rejects
+/// assertions with `exp - iat > 60s`; one minute is the safe ceiling.
+pub const ASSERTION_LIFETIME_SECS: i64 = 60;
+
+// ---- factory ---------------------------------------------------------------
+
+/// Build the appropriate `CredentialSource` from the parsed config.
+///
+/// For [`CredentialsSection::ZitadelJwt`] this reads the keyfile from
+/// disk. Both the agent and the operator mount their key as a file
+/// (Secret volume in the operator's Pod, dropped by
+/// `FleetDeviceSetupScore` on the agent's VM); the path is just
+/// configured differently.
+pub fn credential_source_from_config(creds: &CredentialsSection) -> Result<Arc<CredentialSource>> {
+    match creds {
+        CredentialsSection::TomlShared {
+            nats_user,
+            nats_pass,
+        } => Ok(Arc::new(CredentialSource::TomlShared {
+            user: nats_user.clone(),
+            pass: nats_pass.clone(),
+        })),
+        CredentialsSection::ZitadelJwt {
+            key_path,
+            oidc_issuer_url,
+            audience,
+            danger_accept_invalid_certs,
+        } => Ok(Arc::new(CredentialSource::ZitadelJwt {
+            key: load_machine_key(key_path)?,
+            oidc_issuer_url: oidc_issuer_url.clone(),
+            audience: audience.clone(),
+            http: reqwest::Client::builder()
+                .danger_accept_invalid_certs(*danger_accept_invalid_certs)
+                .timeout(Duration::from_secs(10))
+                .build()
+                .context("building HTTP client for Zitadel token endpoint")?,
+            cache: Mutex::new(None),
+        })),
+    }
+}
+
+fn load_machine_key(key_path: &Path) -> Result<MachineKeyFile> {
+    let raw = std::fs::read_to_string(key_path)
+        .with_context(|| format!("reading machine key file at {}", key_path.display()))?;
+    serde_json::from_str(&raw)
+        .with_context(|| format!("parsing machine key file at {}", key_path.display()))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn fake_key() -> MachineKeyFile {
+        MachineKeyFile {
+            _type: "serviceaccount".to_string(),
+            key_id: "kid-371358469099356247".to_string(),
+            // Real PEM not required for the pure-builder tests; the
+            // signing path that needs a parseable key is exercised
+            // end-to-end in the e2e harness.
+            key: "PEM-PLACEHOLDER".to_string(),
+            user_id: "uid-371358469065801815".to_string(),
+        }
+    }
+
+    fn zjwt_source() -> CredentialSource {
+        CredentialSource::ZitadelJwt {
+            key: fake_key(),
+            oidc_issuer_url: "http://sso.fleet.local:8080".to_string(),
+            audience: "366378028009259037".to_string(),
+            http: reqwest::Client::new(),
+            cache: Mutex::new(None),
+        }
+    }
+
+    // ---- next_credential / cache state -------------------------------------
+
+    #[tokio::test]
+    async fn toml_shared_returns_userpass_each_call() {
+        let s = CredentialSource::TomlShared {
+            user: "u".to_string(),
+            pass: "p".to_string(),
+        };
+        let c = s.next_credential().await.unwrap();
+        match c {
+            NatsCredential::UserPass { user, pass } => {
+                assert_eq!(user, "u");
+                assert_eq!(pass, "p");
+            }
+            other => panic!("expected UserPass, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn cached_token_within_leeway_is_treated_as_expired() {
+        // Sanity-check the comparison so refactors don't accidentally
+        // invert the leeway window.
+        let now = chrono::Utc::now().timestamp();
+        let about_to_expire = CachedToken {
+            access_token: "x".to_string(),
+            expires_at_unix: now + TOKEN_REFRESH_LEEWAY_SECS - 1,
+        };
+        assert!(
+            about_to_expire.expires_at_unix - TOKEN_REFRESH_LEEWAY_SECS <= now,
+            "tokens within the leeway window must be considered expired"
+        );
+
+        let comfortable = CachedToken {
+            access_token: "x".to_string(),
+            expires_at_unix: now + TOKEN_REFRESH_LEEWAY_SECS + 60,
+        };
+        assert!(
+            comfortable.expires_at_unix - TOKEN_REFRESH_LEEWAY_SECS > now,
+            "tokens with comfortable headroom must be cache-hits"
+        );
+    }
+
+    #[test]
+    fn cached_if_fresh_returns_some_when_outside_leeway() {
+        let src = zjwt_source();
+        let now = chrono::Utc::now().timestamp();
+        if let CredentialSource::ZitadelJwt { cache, .. } = &src {
+            *cache.lock().unwrap() = Some(CachedToken {
+                access_token: "fresh".to_string(),
+                expires_at_unix: now + TOKEN_REFRESH_LEEWAY_SECS + 60,
+            });
+        }
+        assert_eq!(src.cached_if_fresh(), Some("fresh".to_string()));
+    }
+
+    #[test]
+    fn cached_if_fresh_returns_none_when_no_cache() {
+        // Brand-new ZitadelJwt source — no token has been minted yet.
+        // Forces the slow path on first connect.
+        let src = zjwt_source();
+        assert_eq!(src.cached_if_fresh(), None);
+    }
+
+    #[test]
+    fn cached_if_fresh_returns_none_for_toml_shared() {
+        // Defensive: cache_if_fresh is only meaningful for ZitadelJwt;
+        // TomlShared has no cache. A nonsensical call must return None,
+        // not panic, so the cold-path can degrade gracefully.
+        let src = CredentialSource::TomlShared {
+            user: "u".into(),
+            pass: "p".into(),
+        };
+        assert_eq!(src.cached_if_fresh(), None);
+    }
+
+    // ---- assertion claims / header (pure builders) ------------------------
+
+    #[test]
+    fn assertion_claims_carry_iss_sub_aud_exp_iat() {
+        let now = 1_700_000_000;
+        let claims = build_assertion_claims(&fake_key(), "http://sso.fleet.local:8080", now);
+        assert_eq!(claims["iss"], "uid-371358469065801815");
+        assert_eq!(claims["sub"], "uid-371358469065801815");
+        assert_eq!(claims["aud"], "http://sso.fleet.local:8080");
+        assert_eq!(claims["iat"].as_i64(), Some(now));
+        assert_eq!(claims["exp"].as_i64(), Some(now + ASSERTION_LIFETIME_SECS));
+    }
+
+    #[test]
+    fn assertion_lifetime_locked_at_60_seconds() {
+        // Zitadel rejects assertions where exp - iat > 60s. If anyone
+        // bumps ASSERTION_LIFETIME_SECS thinking "more is safer", the
+        // mints will silently start failing in prod with no helpful
+        // error. Lock the constant.
+        assert_eq!(ASSERTION_LIFETIME_SECS, 60);
+    }
+
+    #[test]
+    fn assertion_header_carries_kid_and_rs256() {
+        let header = build_assertion_header(&fake_key());
+        assert_eq!(header.alg, jsonwebtoken::Algorithm::RS256);
+        assert_eq!(header.kid.as_deref(), Some("kid-371358469099356247"));
+    }
+
+    // ---- scope string ------------------------------------------------------
+
+    #[test]
+    fn scope_includes_plural_projects_roles() {
+        // The plural-projects URN is what tells Zitadel to emit the
+        // role claim. Day-one bug; lock it.
+        let s = build_scope("366378028009259037");
+        assert!(
+            s.contains("urn:zitadel:iam:org:projects:roles"),
+            "scope must include the PLURAL projects-roles URN; got {s:?}"
+        );
+    }
+
+    #[test]
+    fn scope_audience_uses_singular_project_id_urn() {
+        // The singular-project URN tells Zitadel to put <id> into the
+        // access token's aud claim. Different URN entirely from the
+        // plural one above; both required.
+        let s = build_scope("366378028009259037");
+        assert!(
+            s.contains("urn:zitadel:iam:org:project:id:366378028009259037:aud"),
+            "scope must include the SINGULAR project:id:<aud>:aud URN; got {s:?}"
+        );
+    }
+
+    #[test]
+    fn scope_includes_openid_base() {
+        let s = build_scope("any");
+        assert!(
+            s.split_whitespace().any(|tok| tok == "openid"),
+            "scope must include `openid` as a standalone token; got {s:?}"
+        );
+    }
+
+    // ---- token URL ---------------------------------------------------------
+
+    #[test]
+    fn token_url_appends_oauth_endpoint() {
+        assert_eq!(
+            build_token_url("http://sso.fleet.local:8080"),
+            "http://sso.fleet.local:8080/oauth/v2/token"
+        );
+    }
+
+    #[test]
+    fn token_url_strips_single_trailing_slash() {
+        // A trailing slash would yield `…//oauth/v2/token`, which 404s.
+        // Common configuration drift; the trim guards against it.
+        assert_eq!(
+            build_token_url("http://sso.fleet.local:8080/"),
+            "http://sso.fleet.local:8080/oauth/v2/token"
+        );
+    }
+
+    #[test]
+    fn token_url_strips_multiple_trailing_slashes() {
+        // Defensive — `trim_end_matches('/')` peels all of them, not
+        // just the first. Locks that semantics.
+        assert_eq!(
+            build_token_url("http://sso.fleet.local:8080///"),
+            "http://sso.fleet.local:8080/oauth/v2/token"
+        );
+    }
+
+    // ---- MachineKeyFile JSON parsing --------------------------------------
+
+    #[test]
+    fn machine_key_file_parses_zitadel_json_shape() {
+        // The serde renames (`type`, `keyId`, `userId`) are easy to
+        // break. This is the literal JSON shape Zitadel's
+        // /management/v1/users/.../keys endpoint emits.
+        let raw = r#"{
+            "type":   "serviceaccount",
+            "keyId":  "371358469099356247",
+            "key":    "-----BEGIN RSA PRIVATE KEY-----\nABC\n-----END RSA PRIVATE KEY-----\n",
+            "userId": "371358469065801815"
+        }"#;
+        let parsed: MachineKeyFile = serde_json::from_str(raw).expect("valid keyfile");
+        assert_eq!(parsed._type, "serviceaccount");
+        assert_eq!(parsed.key_id, "371358469099356247");
+        assert_eq!(parsed.user_id, "371358469065801815");
+        assert!(parsed.key.contains("BEGIN RSA PRIVATE KEY"));
+    }
+}
--- a/fleet/harmony-fleet-auth/src/lib.rs
+++ b/fleet/harmony-fleet-auth/src/lib.rs
@@ -0,0 +1,63 @@
+//! Shared NATS auth plumbing for fleet processes.
+//!
+//! Two consumers today:
+//!
+//! - **`harmony-fleet-agent`** — reads `[credentials]` from
+//!   `/etc/fleet-agent/config.toml`. Per-device Zitadel machine user
+//!   with the `device` role.
+//! - **`harmony-fleet-operator`** — reads the same TOML shape from a
+//!   single env var (the env var's value is the TOML snippet for the
+//!   `[credentials]` table). Singleton machine user with the
+//!   `fleet-admin` role.
+//!
+//! Both deserialize into the **same** [`CredentialsSection`], factory
+//! into the **same** [`CredentialSource`], and use the **same**
+//! [`connect_options_with_credentials`] helper to build a NATS client.
+//! The only thing that differs between processes is where the bytes of
+//! the TOML config come from and which Zitadel user signs the
+//! JWT-bearer assertion.
+//!
+//! Adding a new mode (e.g. user JWT from a CLI session) is one new
+//! variant on `CredentialsSection` + `CredentialSource`; everything
+//! else flows through unchanged.
+
+mod config;
+mod credentials;
+
+pub use config::CredentialsSection;
+pub use credentials::{
+    ASSERTION_LIFETIME_SECS, CachedToken, CredentialSource, MachineKeyFile, NatsCredential,
+    TOKEN_REFRESH_LEEWAY_SECS, credential_source_from_config,
+};
+
+use std::sync::Arc;
+
+/// Build `async_nats::ConnectOptions` wired with the auth callback
+/// that pulls fresh credentials from `creds` on every (re)connect.
+///
+/// Caller chains additional options (`ping_interval`, `event_callback`,
+/// …) before invoking `.connect(urls)`.
+pub fn connect_options_with_credentials(
+    creds: Arc<CredentialSource>,
+) -> async_nats::ConnectOptions {
+    async_nats::ConnectOptions::with_auth_callback(move |_nonce| {
+        let cs = creds.clone();
+        async move {
+            let cred = cs
+                .next_credential()
+                .await
+                .map_err(|e| async_nats::AuthError::new(format!("credential source: {e}")))?;
+            let mut auth = async_nats::Auth::new();
+            match cred {
+                NatsCredential::UserPass { user, pass } => {
+                    auth.username = Some(user);
+                    auth.password = Some(pass);
+                }
+                NatsCredential::BearerToken(token) => {
+                    auth.token = Some(token);
+                }
+            }
+            Ok(auth)
+        }
+    })
+}
--- a/fleet/harmony-fleet-operator/Cargo.toml
+++ b/fleet/harmony-fleet-operator/Cargo.toml
@@ -6,7 +6,9 @@ rust-version = "1.85"

 [dependencies]
 harmony = { path = "../../harmony" }
+harmony-fleet-auth = { path = "../harmony-fleet-auth" }
 harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
+toml = { workspace = true }
 chrono = { workspace = true, features = ["serde"] }
 kube = { workspace = true, features = ["runtime", "derive"] }
 k8s-openapi.workspace = true
--- a/fleet/harmony-fleet-operator/src/chart.rs
+++ b/fleet/harmony-fleet-operator/src/chart.rs
@@ -20,12 +20,13 @@ use std::path::{Path, PathBuf};

 use anyhow::{Context, Result};
 use harmony::modules::application::helm::{HelmChart, HelmResourceKind};
+use k8s_openapi::ByteString;
 use k8s_openapi::api::apps::v1::{
    Deployment as K8sDeployment, DeploymentSpec as K8sDeploymentSpec,
 };
 use k8s_openapi::api::core::v1::{
-    Capabilities, Container, EnvVar, PodSpec, PodTemplateSpec, SeccompProfile, SecurityContext,
-    ServiceAccount,
+    Capabilities, Container, EnvVar, EnvVarSource, PodSpec, PodTemplateSpec, SeccompProfile,
+    Secret, SecretKeySelector, SecurityContext, ServiceAccount,
 };
 use k8s_openapi::api::rbac::v1::{ClusterRole, ClusterRoleBinding, PolicyRule, RoleRef, Subject};
 use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition;
@@ -60,6 +61,41 @@ pub struct ChartOptions {
    pub nats_url: String,
    /// `RUST_LOG` value for the operator process.
    pub log_level: String,
+    /// `[credentials]` TOML payload to inject as
+    /// `FLEET_OPERATOR_CREDENTIALS_TOML` via a Secret. `None` skips the
+    /// Secret entirely and lets the operator connect to NATS without
+    /// auth — only sensible when there's no callout in front of NATS.
+    pub credentials: Option<OperatorCredentials>,
+}
+
+/// What the operator pod needs to authenticate to NATS via the auth
+/// callout: a TOML snippet matching the agent's `[credentials]`
+/// table, plus the JSON keyfile content the TOML references via
+/// `key_path`.
+///
+/// Both bytes go into a single Secret (`harmony-fleet-operator-secrets`).
+/// The TOML is exposed as `FLEET_OPERATOR_CREDENTIALS_TOML` (env var);
+/// the keyfile is mounted as a file at `key_path` (defaults to
+/// `/etc/fleet-operator/zitadel-key.json` — caller-controllable via
+/// the TOML's `key_path`).
+pub struct OperatorCredentials {
+    /// TOML payload, e.g.
+    /// ```text
+    /// type = "zitadel-jwt"
+    /// key_path = "/etc/fleet-operator/zitadel-key.json"
+    /// oidc_issuer_url = "http://sso.fleet.local:8080"
+    /// audience = "<project_id>"
+    /// ```
+    pub credentials_toml: String,
+    /// JSON keyfile content (the `Zitadel KEY_TYPE_JSON` blob). Must be
+    /// the file the `credentials_toml`'s `key_path` resolves to inside
+    /// the Pod. Whoever calls this is responsible for keeping the two
+    /// in sync.
+    pub zitadel_keyfile_json: String,
+    /// Where in the Pod's filesystem to mount the keyfile. MUST match
+    /// the `key_path` in `credentials_toml`. Defaults to
+    /// `/etc/fleet-operator/zitadel-key.json`.
+    pub key_mount_path: String,
 }

 impl Default for ChartOptions {
@@ -71,14 +107,22 @@ impl Default for ChartOptions {
            namespace: "fleet-system".to_string(),
            nats_url: "nats://fleet-nats.fleet-system:4222".to_string(),
            log_level: "info,kube_runtime=warn".to_string(),
+            credentials: None,
        }
    }
 }

-const RELEASE_NAME: &str = "harmony-fleet-operator";
-const SERVICE_ACCOUNT: &str = "harmony-fleet-operator";
-const CLUSTER_ROLE: &str = "harmony-fleet-operator";
-const CLUSTER_ROLE_BINDING: &str = "harmony-fleet-operator";
+pub const RELEASE_NAME: &str = "harmony-fleet-operator";
+pub const SERVICE_ACCOUNT: &str = "harmony-fleet-operator";
+pub const CLUSTER_ROLE: &str = "harmony-fleet-operator";
+pub const CLUSTER_ROLE_BINDING: &str = "harmony-fleet-operator";
+pub const SECRET_NAME: &str = "harmony-fleet-operator-secrets";
+/// Key inside the Secret holding the `[credentials]` TOML.
+pub const SECRET_KEY_CREDENTIALS_TOML: &str = "credentials.toml";
+/// Key inside the Secret holding the JSON keyfile.
+pub const SECRET_KEY_ZITADEL_KEYFILE: &str = "zitadel-key.json";
+/// Volume name for the keyfile mount. Internal to the Pod spec.
+const KEYFILE_VOLUME_NAME: &str = "zitadel-key";

 /// Build + write the chart to `opts.output_dir`. Returns the full
 /// path to the generated chart directory (which is what `helm
@@ -107,6 +151,12 @@ pub fn build_chart(opts: &ChartOptions) -> Result<PathBuf> {
    chart.add_resource(HelmResourceKind::ClusterRoleBinding(cluster_role_binding(
        &opts.namespace,
    )));
+    // Secret intentionally NOT included in the on-disk helm chart —
+    // credentials are operator-environment-specific and out of scope
+    // for a redistributable chart. The e2e bring-up applies the Secret
+    // directly via `operator_secret()` (used as a `K8sResourceScore`)
+    // and the chart's Deployment expects the Secret to be present in
+    // the namespace at install time.
    chart.add_resource(HelmResourceKind::Deployment(operator_deployment(opts)));

    let written = chart
@@ -115,6 +165,32 @@ pub fn build_chart(opts: &ChartOptions) -> Result<PathBuf> {
    Ok(written)
 }

+/// Build the operator's Secret holding the `[credentials]` TOML and the
+/// Zitadel JSON keyfile. Returns `None` when no credentials configured
+/// (no-auth dev mode).
+pub fn operator_secret(opts: &ChartOptions) -> Option<Secret> {
+    let creds = opts.credentials.as_ref()?;
+    let mut data: BTreeMap<String, ByteString> = BTreeMap::new();
+    data.insert(
+        SECRET_KEY_CREDENTIALS_TOML.to_string(),
+        ByteString(creds.credentials_toml.as_bytes().to_vec()),
+    );
+    data.insert(
+        SECRET_KEY_ZITADEL_KEYFILE.to_string(),
+        ByteString(creds.zitadel_keyfile_json.as_bytes().to_vec()),
+    );
+    Some(Secret {
+        metadata: ObjectMeta {
+            name: Some(SECRET_NAME.to_string()),
+            namespace: Some(opts.namespace.clone()),
+            ..Default::default()
+        },
+        data: Some(data),
+        type_: Some("Opaque".to_string()),
+        ..Default::default()
+    })
+}
+
 /// Annotate a CRD with `helm.sh/resource-policy: keep` so
 /// `helm uninstall` **does not** cascade-delete the CRD and its
 /// CRs. Without this, uninstall wipes every `Deployment` + `Device`
@@ -213,12 +289,92 @@ fn cluster_role_binding(namespace: &str) -> ClusterRoleBinding {
 }

 fn operator_deployment(opts: &ChartOptions) -> K8sDeployment {
+    use k8s_openapi::api::core::v1::{KeyToPath, SecretVolumeSource, Volume, VolumeMount};
+
    let mut match_labels = BTreeMap::new();
    match_labels.insert(
        "app.kubernetes.io/name".to_string(),
        RELEASE_NAME.to_string(),
    );

+    let mut env = vec![
+        EnvVar {
+            name: "NATS_URL".to_string(),
+            value: Some(opts.nats_url.clone()),
+            ..Default::default()
+        },
+        EnvVar {
+            name: "RUST_LOG".to_string(),
+            value: Some(opts.log_level.clone()),
+            ..Default::default()
+        },
+    ];
+
+    let mut volume_mounts: Vec<VolumeMount> = Vec::new();
+    let mut volumes: Vec<Volume> = Vec::new();
+
+    if let Some(creds) = opts.credentials.as_ref() {
+        // The whole TOML payload travels as a single env var so the
+        // operator can `toml::from_str(env::var(...))` directly. Same
+        // shape the agent reads from `/etc/fleet-agent/config.toml`.
+        env.push(EnvVar {
+            name: "FLEET_OPERATOR_CREDENTIALS_TOML".to_string(),
+            value_from: Some(EnvVarSource {
+                secret_key_ref: Some(SecretKeySelector {
+                    name: SECRET_NAME.to_string(),
+                    key: SECRET_KEY_CREDENTIALS_TOML.to_string(),
+                    optional: Some(false),
+                }),
+                ..Default::default()
+            }),
+            ..Default::default()
+        });
+
+        // The keyfile must be a real file because
+        // `credential_source_from_config` reads it via `key_path` (same
+        // contract as the agent). Mount only the keyfile entry of the
+        // Secret at the Pod's `key_mount_path`.
+        let mount_path = std::path::Path::new(&creds.key_mount_path);
+        let mount_dir = mount_path
+            .parent()
+            .map(|p| p.to_string_lossy().to_string())
+            .unwrap_or_else(|| "/etc/fleet-operator".to_string());
+        let mount_filename = mount_path
+            .file_name()
+            .map(|n| n.to_string_lossy().to_string())
+            .unwrap_or_else(|| SECRET_KEY_ZITADEL_KEYFILE.to_string());
+
+        volume_mounts.push(VolumeMount {
+            name: KEYFILE_VOLUME_NAME.to_string(),
+            mount_path: mount_dir,
+            read_only: Some(true),
+            ..Default::default()
+        });
+        volumes.push(Volume {
+            name: KEYFILE_VOLUME_NAME.to_string(),
+            secret: Some(SecretVolumeSource {
+                secret_name: Some(SECRET_NAME.to_string()),
+                items: Some(vec![KeyToPath {
+                    key: SECRET_KEY_ZITADEL_KEYFILE.to_string(),
+                    path: mount_filename,
+                    // 0o444 = world-read. The Secret volume is owned by
+                    // root (kubelet default; we don't pin a fsGroup
+                    // because we also don't pin runAsUser for SCC
+                    // compatibility — see container_security_context).
+                    // World-read inside the pod is safe: the pod has a
+                    // single container, the Secret namespace is locked
+                    // down, and the file never escapes the pod
+                    // filesystem. With 0o400 the operator hits
+                    // EACCES because its non-root UID is not root.
+                    mode: Some(0o444),
+                }]),
+                default_mode: Some(0o444),
+                optional: Some(false),
+            }),
+            ..Default::default()
+        });
+    }
+
    K8sDeployment {
        metadata: ObjectMeta {
            name: Some(RELEASE_NAME.to_string()),
@@ -243,21 +399,20 @@ fn operator_deployment(opts: &ChartOptions) -> K8sDeployment {
                        name: "operator".to_string(),
                        image: Some(opts.image.clone()),
                        image_pull_policy: Some(opts.image_pull_policy.clone()),
-                        env: Some(vec![
-                            EnvVar {
-                                name: "NATS_URL".to_string(),
-                                value: Some(opts.nats_url.clone()),
-                                ..Default::default()
-                            },
-                            EnvVar {
-                                name: "RUST_LOG".to_string(),
-                                value: Some(opts.log_level.clone()),
-                                ..Default::default()
-                            },
-                        ]),
+                        env: Some(env),
+                        volume_mounts: if volume_mounts.is_empty() {
+                            None
+                        } else {
+                            Some(volume_mounts)
+                        },
                        security_context: Some(container_security_context()),
                        ..Default::default()
                    }],
+                    volumes: if volumes.is_empty() {
+                        None
+                    } else {
+                        Some(volumes)
+                    },
                    ..Default::default()
                }),
            },
@@ -267,6 +422,21 @@ fn operator_deployment(opts: &ChartOptions) -> K8sDeployment {
    }
 }

+// Re-export the manifest builders so the e2e bring-up can apply the
+// operator inline (Score-style) without re-implementing the manifests.
+pub fn build_service_account(opts: &ChartOptions) -> ServiceAccount {
+    service_account(&opts.namespace)
+}
+pub fn build_cluster_role() -> ClusterRole {
+    cluster_role()
+}
+pub fn build_cluster_role_binding(opts: &ChartOptions) -> ClusterRoleBinding {
+    cluster_role_binding(&opts.namespace)
+}
+pub fn build_operator_deployment(opts: &ChartOptions) -> K8sDeployment {
+    operator_deployment(opts)
+}
+
 /// Minimum-privilege container security context.
 ///
 /// - `runAsNonRoot: true` — a compromised operator pod with
--- a/fleet/harmony-fleet-operator/src/lib.rs
+++ b/fleet/harmony-fleet-operator/src/lib.rs
@@ -6,6 +6,7 @@
 //! — can import the typed `Deployment`, `DeploymentSpec`,
 //! `ScorePayload`, etc. without duplicating them.

+pub mod chart;
 pub mod crd;
 pub mod device_reconciler;
 pub mod fleet_aggregator;
--- a/fleet/harmony-fleet-operator/src/main.rs
+++ b/fleet/harmony-fleet-operator/src/main.rs
@@ -1,15 +1,18 @@
-mod chart;
 mod controller;
 mod install;

-use harmony_fleet_operator::{crd, device_reconciler, fleet_aggregator};
+use harmony_fleet_operator::{chart, crd, device_reconciler, fleet_aggregator};

-use anyhow::Result;
+use anyhow::{Context, Result};
 use async_nats::jetstream;
 use clap::{Parser, Subcommand};
+use harmony_fleet_auth::{
+    CredentialsSection, connect_options_with_credentials, credential_source_from_config,
+};
 use harmony_reconciler_contracts::BUCKET_DESIRED_STATE;
 use kube::Client;
 use std::path::PathBuf;
+use std::time::Duration;

 #[derive(Parser)]
 #[command(
@@ -35,6 +38,18 @@ struct Cli {
        global = true
    )]
    kv_bucket: String,
+
+    /// `[credentials]` TOML payload (same shape the agent reads from
+    /// `/etc/fleet-agent/config.toml`). Mounted into the Pod from the
+    /// operator's Secret. Empty string means "no auth — bare connect"
+    /// (for local dev without a callout-protected NATS).
+    #[arg(
+        long,
+        env = "FLEET_OPERATOR_CREDENTIALS_TOML",
+        default_value = "",
+        global = true
+    )]
+    credentials_toml: String,
 }

 #[derive(Subcommand)]
@@ -73,7 +88,7 @@ async fn main() -> Result<()> {
    let cli = Cli::parse();
    match cli.command.unwrap_or(Command::Run) {
        Command::Install => install::install_crds().await,
-        Command::Run => run(&cli.nats_url, &cli.kv_bucket).await,
+        Command::Run => run(&cli.nats_url, &cli.kv_bucket, &cli.credentials_toml).await,
        Command::Chart {
            output,
            image,
@@ -89,6 +104,12 @@ async fn main() -> Result<()> {
                namespace,
                nats_url,
                log_level,
+                // The disk-distributed chart never carries operator
+                // credentials — those are environment-specific. The
+                // operator deploys into a namespace where the matching
+                // Secret already exists (provisioned out-of-band, or
+                // by the e2e bring-up's K8sResourceScore path).
+                credentials: None,
            })?;
            println!("{}", written.display());
            Ok(())
@@ -96,10 +117,8 @@ async fn main() -> Result<()> {
    }
 }

-async fn run(nats_url: &str, bucket: &str) -> Result<()> {
-    // Retry on the initial connect — startup races against the NATS
-    // server becoming fully ready.
-    let nats = connect_with_retry(nats_url).await?;
+async fn run(nats_url: &str, bucket: &str, credentials_toml: &str) -> Result<()> {
+    let nats = connect_with_retry(nats_url, credentials_toml).await?;
    tracing::info!(url = %nats_url, "connected to NATS");
    let js = jetstream::new(nats);
    let desired_state_kv = js
@@ -129,18 +148,66 @@ async fn run(nats_url: &str, bucket: &str) -> Result<()> {
    }
 }

-async fn connect_with_retry(nats_url: &str) -> Result<async_nats::Client> {
-    use std::time::Duration;
+/// Connect to NATS, retrying on the initial connect — startup races
+/// against the NATS server becoming fully ready.
+///
+/// `credentials_toml` is the in-memory `[credentials]` TOML snippet
+/// the operator's pod gets via the `FLEET_OPERATOR_CREDENTIALS_TOML`
+/// env var (sourced from a Kubernetes Secret). Same shape as the
+/// agent's `[credentials]` table; same factory; same auth callback.
+/// Empty string means bypass — connect with no creds (only useful
+/// for callout-less local dev).
+async fn connect_with_retry(nats_url: &str, credentials_toml: &str) -> Result<async_nats::Client> {
    let mut last_err: Option<anyhow::Error> = None;
    for attempt in 0..15 {
-        match async_nats::connect(nats_url).await {
+        let attempt_result = if credentials_toml.is_empty() {
+            tracing::warn!(
+                "FLEET_OPERATOR_CREDENTIALS_TOML is empty — connecting to NATS \
+                 without auth. Production deploys MUST mount a credentials Secret."
+            );
+            async_nats::connect(nats_url)
+                .await
+                .map_err(anyhow::Error::from)
+        } else {
+            connect_with_credentials(nats_url, credentials_toml).await
+        };
+        match attempt_result {
            Ok(c) => return Ok(c),
            Err(e) => {
                tracing::warn!(attempt, error = %e, "NATS connect failed; retrying");
-                last_err = Some(e.into());
+                last_err = Some(e);
                tokio::time::sleep(Duration::from_secs(2)).await;
            }
        }
    }
    Err(last_err.unwrap_or_else(|| anyhow::anyhow!("NATS connect failed after retries")))
 }
+
+async fn connect_with_credentials(
+    nats_url: &str,
+    credentials_toml: &str,
+) -> Result<async_nats::Client> {
+    let creds_section: CredentialsSection =
+        toml::from_str(credentials_toml).context("parsing FLEET_OPERATOR_CREDENTIALS_TOML")?;
+    let creds = credential_source_from_config(&creds_section)
+        .context("constructing CredentialSource from operator credentials")?;
+    let client = connect_options_with_credentials(creds)
+        .ping_interval(Duration::from_secs(10))
+        .event_callback(|event| async move {
+            use async_nats::Event;
+            match event {
+                Event::Connected => tracing::info!("NATS connected"),
+                Event::Disconnected => tracing::warn!("NATS disconnected, will reconnect"),
+                Event::LameDuckMode => tracing::warn!("NATS server entered lame-duck mode"),
+                Event::SlowConsumer(sid) => tracing::warn!(sid = %sid, "NATS slow consumer"),
+                Event::ServerError(e) => tracing::error!(error = %e, "NATS server error"),
+                Event::ClientError(e) => tracing::error!(error = %e, "NATS client error"),
+                Event::Closed => tracing::error!("NATS connection closed"),
+                other => tracing::debug!(?other, "NATS event"),
+            }
+        })
+        .connect(nats_url)
+        .await
+        .context("connecting to NATS with operator credentials")?;
+    Ok(client)
+}
--- a/fleet/scripts/load-test.sh
+++ b/fleet/scripts/load-test.sh
@@ -249,6 +249,7 @@ $(printf '\033[1;32m[load-test]\033[0m stack ready. In another terminal:')

 EOF
 }
+  alias natsbox='podman run --rm docker.io/natsio/nats-box:latest nats --server nats://192.168.12.102:4222'

 print_banner

--- a/harmony/src/domain/hardware/mod.rs
+++ b/harmony/src/domain/hardware/mod.rs
@@ -33,6 +33,21 @@ impl PhysicalHost {
    }

    pub fn summary(&self) -> String {
+        let mut parts = self.summary_parts_through_storage();
+        self.append_network_summary(&mut parts);
+        parts.join(" | ")
+    }
+
+    /// Same shape as [`Self::summary`] but drops the network portion — useful
+    /// for compact contexts like the `Host:` header above interactive
+    /// `inquire` prompts, where the NIC list is too wide for the terminal.
+    pub fn summary_short(&self) -> String {
+        self.summary_parts_through_storage().join(" | ")
+    }
+
+    /// Builds the first four sections of the summary (model, CPU, RAM, storage).
+    /// Shared between [`Self::summary`] and [`Self::summary_short`].
+    fn summary_parts_through_storage(&self) -> Vec<String> {
        let mut parts = Vec::new();

        // Part 1: System Model (from labels) or Category as a fallback
@@ -49,15 +64,17 @@ impl PhysicalHost {
            let cpu_count = self.cpus.len();
            let total_cores = self.cpus.iter().map(|c| c.cores).sum::<u32>();
            let total_threads = self.cpus.iter().map(|c| c.threads).sum::<u32>();
-            let model_name = &self.cpus[0].model;
+            let model_name = self.cpus[0].model.trim();

-            let cpu_summary = if cpu_count > 1 {
-                format!(
-                    "{}x {} ({}c/{}t)",
-                    cpu_count, model_name, total_cores, total_threads
-                )
-            } else {
-                format!("{} ({}c/{}t)", model_name, total_cores, total_threads)
+            // Agents sometimes report a blank model (e.g. when /proc/cpuinfo is
+            // unreadable); collapse those cases to avoid stray double-spaces.
+            let cpu_summary = match (cpu_count > 1, model_name.is_empty()) {
+                (true, true) => format!("{cpu_count}x CPU ({total_cores}c/{total_threads}t)"),
+                (true, false) => {
+                    format!("{cpu_count}x {model_name} ({total_cores}c/{total_threads}t)")
+                }
+                (false, true) => format!("{total_cores}c/{total_threads}t"),
+                (false, false) => format!("{model_name} ({total_cores}c/{total_threads}t)"),
            };
            parts.push(cpu_summary);
        }
@@ -94,7 +111,6 @@ impl PhysicalHost {
        if !self.storage.is_empty() {
            let total_storage_bytes = self.storage.iter().map(|d| d.size_bytes).sum::<u64>();
            let drive_count = self.storage.len();
-            let first_drive_model = &self.storage[0].model;

            // Helper to format bytes into TB or GB
            let format_storage = |bytes: u64| {
@@ -115,45 +131,39 @@ impl PhysicalHost {
                    .collect::<Vec<_>>()
                    .join(", ");

-                format!(
-                    "{} Storage ({} Disks [{}])",
-                    format_storage(total_storage_bytes),
-                    drive_count,
-                    drive_sizes
-                )
+                format!("{} [{}]", format_storage(total_storage_bytes), drive_sizes)
            } else {
-                format!(
-                    "{} Storage ({})",
-                    format_storage(total_storage_bytes),
-                    first_drive_model
-                )
+                format_storage(total_storage_bytes)
            };
            parts.push(storage_summary);
        }

-        // Part 5: Network Information
-        // Prioritize an "up" interface with an IPv4 address
-        let best_nic = self
+        parts
+    }
+
+    /// Appends the per-NIC network section to an existing parts list.
+    fn append_network_summary(&self, parts: &mut Vec<String>) {
+        if self.network.is_empty() {
+            return;
+        }
+        let per_nic: Vec<String> = self
            .network
            .iter()
-            .find(|n| n.is_up && !n.ipv4_addresses.is_empty())
-            .or_else(|| self.network.first());
+            .map(|nic| {
+                let mac = nic.mac_address.to_string();
+                match nic.ipv4_addresses.first() {
+                    Some(ip) => format!("[{}, {}]", ip, mac),
+                    None => format!("[{}]", mac),
+                }
+            })
+            .collect();

-        if let Some(nic) = best_nic {
-            let speed = nic
-                .speed_mbps
-                .map(|s| format!("{}Gbps", s / 1000))
-                .unwrap_or_else(|| "N/A".to_string());
-            let mac = nic.mac_address.to_string();
-            let nic_summary = if let Some(ip) = nic.ipv4_addresses.first() {
-                format!("NIC: {} ({}, {})", speed, ip, mac)
-            } else {
-                format!("NIC: {} ({})", speed, mac)
-            };
-            parts.push(nic_summary);
-        }
-
-        parts.join(" | ")
+        let nic_summary = if per_nic.len() == 1 {
+            format!("NIC: {}", per_nic[0])
+        } else {
+            format!("{} NICs: {}", per_nic.len(), per_nic.join(", "))
+        };
+        parts.push(nic_summary);
    }

    pub fn parts_list(&self) -> String {
--- a/harmony/src/domain/inventory/mod.rs
+++ b/harmony/src/domain/inventory/mod.rs
@@ -144,6 +144,16 @@ pub enum HostRole {
    Worker,
 }

+/// A persisted role-to-host assignment: the role that was chosen, plus the
+/// operational config captured at discovery time (install disk, bond +
+/// blacklist). Returned when looking up "does this host already have a
+/// mapping?" so the UI can show what will be replaced before overwriting.
+#[derive(Debug, Clone)]
+pub struct HostRoleMapping {
+    pub role: HostRole,
+    pub host_config: crate::topology::HostConfig,
+}
+
 impl fmt::Display for HostRole {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
--- a/harmony/src/domain/inventory/repository.rs
+++ b/harmony/src/domain/inventory/repository.rs
@@ -1,8 +1,12 @@
 use async_trait::async_trait;

 use crate::{
-    hardware::PhysicalHost, interpret::InterpretError, inventory::HostRole, topology::HostConfig,
+    hardware::PhysicalHost,
+    interpret::InterpretError,
+    inventory::{HostRole, HostRoleMapping},
+    topology::{HostConfig, NetworkConfig},
 };
+use harmony_types::id::Id;

 /// Errors that can occur within the repository layer.
 #[derive(thiserror::Error, Debug)]
@@ -35,10 +39,18 @@ pub trait InventoryRepository: Send + Sync + 'static {
        &self,
        role: &HostRole,
    ) -> Result<Vec<(PhysicalHost, HostConfig)>, RepoError>;
+    /// Insert-or-replace the role mapping for this host. Any prior mapping
+    /// rows for `host.id` are deleted first (in the same transaction) so
+    /// `host_role_mapping` holds at most one row per host.
    async fn save_role_mapping(
        &self,
        role: &HostRole,
        host: &PhysicalHost,
        installation_device: &String,
+        network_config: &NetworkConfig,
    ) -> Result<(), RepoError>;
+
+    /// Return the current role mapping for a host, if any. Used at discovery
+    /// time to ask the operator whether to overwrite or cancel.
+    async fn get_role_mapping(&self, host_id: &Id) -> Result<Option<HostRoleMapping>, RepoError>;
 }
--- a/harmony/src/domain/topology/container_runtime.rs
+++ b/harmony/src/domain/topology/container_runtime.rs
@@ -50,6 +50,18 @@ pub struct ContainerSpec {
    /// labels. Used by Scores to carry grouping information (e.g. the
    /// originating deployment name).
    pub labels: Vec<(String, String)>,
+    /// Environment variables to set inside the container. Order is preserved
+    /// for deterministic spec equality; runtimes apply them as a set.
+    #[serde(default)]
+    pub env: Vec<(String, String)>,
+    /// Bind-mount volumes from the host into the container. Bind mounts only
+    /// in v0; named/anonymous volumes can be added behind the same field
+    /// later (the runtime impls would distinguish on `host_path` shape).
+    #[serde(default)]
+    pub volumes: Vec<VolumeMount>,
+    /// Restart policy on container exit. Mirrors podman/docker semantics.
+    #[serde(default)]
+    pub restart_policy: RestartPolicy,
 }

 impl ContainerSpec {
@@ -61,6 +73,51 @@ impl ContainerSpec {
    pub const MANAGED_BY_VALUE: &'static str = "harmony";
 }

+/// A single host-path → container-path bind mount. Bind mounts are the only
+/// volume kind supported in v0 — they cover ~95% of compose use cases and
+/// don't depend on a runtime-managed volume namespace.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct VolumeMount {
+    /// Absolute path on the host.
+    pub host_path: String,
+    /// Absolute path inside the container.
+    pub container_path: String,
+    /// Mount as read-only. Defaults to false (read-write) to match
+    /// docker-compose's default.
+    #[serde(default)]
+    pub read_only: bool,
+}
+
+/// Restart policy for a managed container. Names follow podman/docker
+/// conventions so docker-compose translation is mechanical.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[serde(rename_all = "kebab-case")]
+pub enum RestartPolicy {
+    /// Don't restart on exit.
+    No,
+    /// Restart unless the user explicitly stopped the container.
+    /// Docker-compose's default for long-running services and what most
+    /// fleet workloads want.
+    #[default]
+    UnlessStopped,
+    /// Restart only if the container exits with a non-zero status.
+    OnFailure,
+    /// Always restart, even on clean exits and after host reboot.
+    Always,
+}
+
+impl RestartPolicy {
+    /// Canonical string podman + docker accept on the CLI / in their APIs.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            RestartPolicy::No => "no",
+            RestartPolicy::UnlessStopped => "unless-stopped",
+            RestartPolicy::OnFailure => "on-failure",
+            RestartPolicy::Always => "always",
+        }
+    }
+}
+
 /// Observed state of a container on the runtime.
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct ContainerState {
--- a/harmony/src/domain/topology/host_binding.rs
+++ b/harmony/src/domain/topology/host_binding.rs
@@ -1,5 +1,6 @@
 use derive_new::new;
-use serde::Serialize;
+use harmony_types::firewall::LaggProtocol;
+use serde::{Deserialize, Serialize};

 use crate::hardware::PhysicalHost;

@@ -20,4 +21,23 @@ pub struct HostBinding {
 #[derive(Debug, new, Clone, Serialize)]
 pub struct HostConfig {
    pub installation_device: Option<String>,
+    #[new(default)]
+    pub network_config: NetworkConfig,
+}
+
+/// User-provided networking intent captured at discovery time.
+///
+/// Produced by the interactive discovery flow and persisted alongside the role
+/// mapping so downstream Scores can act on it (e.g. configuring a bond on the
+/// chosen interfaces and avoiding blacklisted ones).
+#[derive(Debug, Default, Clone, Serialize, Deserialize)]
+pub struct NetworkConfig {
+    pub bond: Option<BondConfig>,
+    pub blacklisted_interfaces: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BondConfig {
+    pub interfaces: Vec<String>,
+    pub mode: LaggProtocol,
 }
--- a/harmony/src/infra/inventory/sqlite.rs
+++ b/harmony/src/infra/inventory/sqlite.rs
@@ -1,12 +1,16 @@
 use crate::{
    hardware::PhysicalHost,
-    inventory::{HostRole, InventoryRepository, RepoError},
-    topology::HostConfig,
+    inventory::{HostRole, HostRoleMapping, InventoryRepository, RepoError},
+    topology::{HostConfig, NetworkConfig},
 };
 use async_trait::async_trait;
 use harmony_types::id::Id;
-use log::info;
-use sqlx::{Pool, Sqlite, SqlitePool, migrate::MigrateDatabase};
+use log::{info, warn};
+use sqlx::{
+    Pool, Sqlite,
+    sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions},
+};
+use std::str::FromStr;

 /// A thread-safe, connection-pooled repository using SQLite.
 #[derive(Debug)]
@@ -16,18 +20,18 @@ pub struct SqliteInventoryRepository {

 impl SqliteInventoryRepository {
    pub async fn new(database_url: &str) -> Result<Self, RepoError> {
-        // Ensure the database file exists for SQLite
-        if database_url.starts_with("sqlite:") {
-            let path = database_url.trim_start_matches("sqlite:");
-            if !path.contains(":memory:") && !std::path::Path::new(path).exists() {
-                sqlx::any::install_default_drivers();
-                sqlx::Sqlite::create_database(database_url)
-                    .await
-                    .map_err(|e| RepoError::ConnectionFailed(e.to_string()))?;
-            }
-        }
+        // Use the classic rollback journal (DELETE) rather than sqlx's WAL
+        // default so we don't leave `.sqlite-wal` / `.sqlite-shm` files next
+        // to the DB: this is a single-process CLI, WAL's concurrent-reader
+        // benefit is wasted. `create_if_missing(true)` replaces the manual
+        // `Sqlite::create_database` dance the code used to do.
+        let options = SqliteConnectOptions::from_str(database_url)
+            .map_err(|e| RepoError::ConnectionFailed(e.to_string()))?
+            .create_if_missing(true)
+            .journal_mode(SqliteJournalMode::Delete);

-        let pool = SqlitePool::connect(database_url)
+        let pool = SqlitePoolOptions::new()
+            .connect_with(options)
            .await
            .map_err(|e| RepoError::ConnectionFailed(e.to_string()))?;

@@ -50,6 +54,24 @@ impl InventoryRepository for SqliteInventoryRepository {
        let id = Id::default().to_string();
        let host_id = host.id.to_string();

+        // Skip the insert if the most recent row for this host is byte-identical:
+        // discovery is naturally a polling activity (mDNS is continuous, CIDR scans get
+        // re-run) and we don't want an unbounded pile of identical version rows. Real
+        // changes still produce a new version row (audit trail for free).
+        let latest = sqlx::query!(
+            r#"SELECT data as "data!: Vec<u8>" FROM physical_hosts WHERE id = ? ORDER BY version_id DESC LIMIT 1"#,
+            host_id
+        )
+        .fetch_optional(&self.pool)
+        .await?;
+
+        if let Some(row) = latest {
+            if row.data == data {
+                info!("Host '{}' unchanged, skipping save", host.id);
+                return Ok(());
+            }
+        }
+
        sqlx::query!(
            "INSERT INTO physical_hosts (id, version_id, data) VALUES (?, ?, ?)",
            host_id,
@@ -109,26 +131,85 @@ impl InventoryRepository for SqliteInventoryRepository {
        role: &HostRole,
        host: &PhysicalHost,
        installation_device: &String,
+        network_config: &NetworkConfig,
    ) -> Result<(), RepoError> {
        let host_id = host.id.to_string();
+        let network_config_json = serde_json::to_string(network_config)
+            .map_err(|e| RepoError::Serialization(e.to_string()))?;
+
+        // Replace atomically: DELETE any prior rows for this host_id (there should
+        // be at most one, but older data may have dups) then INSERT the new one.
+        // Wrapped in a transaction so a concurrent reader never sees zero rows.
+        let mut tx = self.pool.begin().await?;
+
+        sqlx::query!("DELETE FROM host_role_mapping WHERE host_id = ?", host_id)
+            .execute(&mut *tx)
+            .await?;

        sqlx::query!(
            r#"
-        INSERT INTO host_role_mapping (host_id, role, installation_device)
-        VALUES (?, ?, ?)
+        INSERT INTO host_role_mapping (host_id, role, installation_device, network_config)
+        VALUES (?, ?, ?, ?)
        "#,
            host_id,
            role,
-            installation_device
+            installation_device,
+            network_config_json,
        )
-        .execute(&self.pool)
+        .execute(&mut *tx)
        .await?;

+        tx.commit().await?;
+
        info!("Saved role mapping for host '{}' as '{:?}'", host.id, role);

        Ok(())
    }

+    async fn get_role_mapping(&self, host_id: &Id) -> Result<Option<HostRoleMapping>, RepoError> {
+        struct Row {
+            role: HostRole,
+            installation_device: Option<String>,
+            network_config: Option<String>,
+        }
+
+        let host_id_str = host_id.to_string();
+        let row = sqlx::query_as!(
+            Row,
+            r#"SELECT role as "role: HostRole", installation_device, network_config FROM host_role_mapping WHERE host_id = ? ORDER BY id DESC LIMIT 1"#,
+            host_id_str,
+        )
+        .fetch_optional(&self.pool)
+        .await?;
+
+        let Some(row) = row else { return Ok(None) };
+
+        // Tolerate unparseable network_config: log loudly and fall back to
+        // defaults so the operator can still be shown the existing mapping
+        // and choose "Update" to overwrite the bad row. This covers stored
+        // rows from older enum shapes and any accidental corruption.
+        let network_config = match row.network_config.as_deref() {
+            Some(json) => match serde_json::from_str::<NetworkConfig>(json) {
+                Ok(cfg) => cfg,
+                Err(e) => {
+                    warn!(
+                        "Discarding unreadable network_config for host '{host_id}': {e}. The existing mapping will be shown with empty network config; pick 'Update' to replace it."
+                    );
+                    NetworkConfig::default()
+                }
+            },
+            None => NetworkConfig::default(),
+        };
+
+        Ok(Some(HostRoleMapping {
+            role: row.role,
+            host_config: HostConfig {
+                installation_device: row.installation_device,
+                network_config,
+            },
+        }))
+    }
+
    async fn get_hosts_for_role(
        &self,
        role: &HostRole,
@@ -136,13 +217,14 @@ impl InventoryRepository for SqliteInventoryRepository {
        struct HostIdRow {
            host_id: String,
            installation_device: Option<String>,
+            network_config: Option<String>,
        }

        let role_str = format!("{:?}", role);

        let host_id_rows = sqlx::query_as!(
            HostIdRow,
-            "SELECT host_id, installation_device FROM host_role_mapping WHERE role = ?",
+            "SELECT host_id, installation_device, network_config FROM host_role_mapping WHERE role = ?",
            role_str
        )
        .fetch_all(&self.pool)
@@ -159,8 +241,14 @@ impl InventoryRepository for SqliteInventoryRepository {
                    )));
                }
            };
+            let network_config = match row.network_config.as_deref() {
+                Some(json) => serde_json::from_str(json)
+                    .map_err(|e| RepoError::Deserialization(e.to_string()))?,
+                None => NetworkConfig::default(),
+            };
            let host_config = HostConfig {
                installation_device: row.installation_device,
+                network_config,
            };
            hosts.push((physical_host, host_config));
        }
--- a/harmony/src/infra/opnsense/load_balancer.rs
+++ b/harmony/src/infra/opnsense/load_balancer.rs
@@ -53,7 +53,12 @@ impl LoadBalancer for OPNSenseFirewall {

    async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
        let lb = self.opnsense_config.load_balancer();
-        if lb.is_installed().await {
+        let installed = lb.is_installed().await.map_err(|e| {
+            ExecutorError::UnexpectedError(format!(
+                "Failed to query HAProxy installation status on OPNsense: {e}"
+            ))
+        })?;
+        if installed {
            debug!("HAProxy is installed");
        } else {
            self.opnsense_config
@@ -141,7 +146,7 @@ fn haproxy_service_to_harmony(svc: &HaproxyService) -> Option<LoadBalancerServic
                let method: HttpMethod = hc.http_method.clone().unwrap_or_default().into();
                let ssl = match hc.ssl.as_deref().unwrap_or("").to_uppercase().as_str() {
                    "SSL" => SSL::SSL,
-                    "SSLNI" => SSL::SNI,
+                    "SSLSNI" => SSL::SNI,
                    "NOSSL" => SSL::Disabled,
                    "" => SSL::Default,
                    other => {
@@ -177,7 +182,7 @@ pub(crate) fn harmony_service_to_lb_types(
        HealthCheck::HTTP(port, path, http_method, _status_code, ssl) => {
            let ssl_str = match ssl {
                SSL::SSL => Some("ssl".to_string()),
-                SSL::SNI => Some("sslni".to_string()),
+                SSL::SNI => Some("sslsni".to_string()),
                SSL::Disabled => Some("nossl".to_string()),
                SSL::Default => Some(String::new()),
                SSL::Other(other) => Some(other.clone()),
--- a/harmony/src/modules/fleet/mod.rs
+++ b/harmony/src/modules/fleet/mod.rs
@@ -35,6 +35,8 @@ pub use assets::{
 #[cfg(feature = "kvm")]
 pub use libvirt_pool::{HARMONY_FLEET_POOL_NAME, HarmonyFleetPool, ensure_harmony_fleet_pool};
 pub use preflight::{check_fleet_smoke_preflight, check_fleet_smoke_preflight_for_arch};
-pub use setup_score::{FleetDeviceSetupConfig, FleetDeviceSetupScore};
+pub use setup_score::{
+    FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore, HostsEntry, merge_hosts_file,
+};
 #[cfg(feature = "kvm")]
 pub use vm_score::ProvisionVmScore;
--- a/harmony/src/modules/fleet/setup_score.rs
+++ b/harmony/src/modules/fleet/setup_score.rs
@@ -34,6 +34,14 @@ use crate::score::Score;
 /// device is moved between fleet partitions: the config file is
 /// regenerated, byte-compare idempotency fires, the agent restarts,
 /// new labels propagate.
+///
+/// **On `auth`.** Two authentication modes:
+/// - [`FleetDeviceAuth::TomlShared`] — shared NATS user/password baked
+///   into the TOML. Suitable for v0/dev only.
+/// - [`FleetDeviceAuth::ZitadelJwt`] — per-device Zitadel machine-user
+///   JWT-bearer. The keyfile is dropped onto the Pi at
+///   `/etc/fleet-agent/zitadel-key.json` (mode 0640, owner
+///   `fleet-agent`). The agent's `[credentials]` block points at it.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FleetDeviceSetupConfig {
    /// Stable device identifier. Written into the agent's TOML and
@@ -49,25 +57,67 @@ pub struct FleetDeviceSetupConfig {
    pub labels: BTreeMap<String, String>,
    /// NATS URLs the agent should connect to. Typically one entry.
    pub nats_urls: Vec<String>,
-    /// Shared v0 credentials (Zitadel-issued per-device tokens in v0.2).
-    pub nats_user: String,
-    pub nats_pass: String,
+    /// Authentication for this device's NATS connection.
+    pub auth: FleetDeviceAuth,
    /// Local filesystem path to the cross-compiled `fleet-agent-v0`
    /// binary. The Score uploads it to the device and installs to
    /// `/usr/local/bin/fleet-agent`. Future v0.1: this becomes a
    /// `DownloadableAsset` pointing at CI-published artifacts.
    pub agent_binary_path: PathBuf,
+    /// `/etc/hosts` entries to add on the device. The fleet rehearsal
+    /// harness uses this so VMs on a libvirt NAT resolve
+    /// `sso.fleet.local` to the host's gateway IP — without it the
+    /// agent's HTTP client to Zitadel can't even DNS-resolve the
+    /// issuer URL. Empty by default; production deployments rely on
+    /// real DNS instead.
+    #[serde(default)]
+    pub hosts_entries: Vec<HostsEntry>,
 }

+/// One line in `/etc/hosts`. Order doesn't matter (the file ends up
+/// being a sorted dedup'd merge of these and any pre-existing
+/// non-managed entries).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HostsEntry {
+    pub ip: String,
+    pub hostname: String,
+}
+
+/// On-device NATS authentication mode for the agent.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum FleetDeviceAuth {
+    /// Username + password baked into the agent's TOML (legacy / dev).
+    TomlShared {
+        nats_user: String,
+        nats_pass: String,
+    },
+    /// Zitadel machine-user JWT-bearer flow. The keyfile content is
+    /// what `ZitadelSetupScore` returns from
+    /// `ZitadelClientConfig::machine_keys.<username>` — JSON keyfile as
+    /// emitted by Zitadel for `KEY_TYPE_JSON`.
+    ZitadelJwt {
+        /// Raw JSON keyfile content (will be written to the device).
+        machine_key_json: String,
+        /// Externally-visible Zitadel issuer URL.
+        oidc_issuer_url: String,
+        /// `aud` value for token-bearer requests. Typically the Zitadel
+        /// project ID.
+        audience: String,
+        /// Whether the agent's HTTP client accepts invalid TLS certs
+        /// (escape hatch for self-signed staging Zitadels).
+        #[serde(default)]
+        danger_accept_invalid_certs: bool,
+    },
+}
+
+/// Path the agent reads its Zitadel machine key from. Must match
+/// `harmony-fleet-agent::config::default_zitadel_key_path`.
+const ZITADEL_KEY_PATH: &str = "/etc/fleet-agent/zitadel-key.json";
+
 impl FleetDeviceSetupConfig {
    /// Render the agent's `/etc/fleet-agent/config.toml` content.
    pub fn render_toml(&self) -> String {
-        // Raw-string template with format! — the TOML escape rules for
-        // double-quoted strings are just `\` and `"`, handled by
-        // [`toml_escape`].
        let device_id = toml_escape(&self.device_id.to_string());
-        let nats_user = toml_escape(&self.nats_user);
-        let nats_pass = toml_escape(&self.nats_pass);
        let urls = self
            .nats_urls
            .iter()
@@ -83,21 +133,46 @@ impl FleetDeviceSetupConfig {
            .map(|(k, v)| format!("{} = \"{}\"", toml_escape(k), toml_escape(v)))
            .collect::<Vec<_>>()
            .join("\n");
+        let credentials = match &self.auth {
+            FleetDeviceAuth::TomlShared {
+                nats_user,
+                nats_pass,
+            } => format!(
+                "[credentials]\n\
+                 type = \"toml-shared\"\n\
+                 nats_user = \"{}\"\n\
+                 nats_pass = \"{}\"\n",
+                toml_escape(nats_user),
+                toml_escape(nats_pass),
+            ),
+            FleetDeviceAuth::ZitadelJwt {
+                oidc_issuer_url,
+                audience,
+                danger_accept_invalid_certs,
+                ..
+            } => format!(
+                "[credentials]\n\
+                 type = \"zitadel-jwt\"\n\
+                 key_path = \"{}\"\n\
+                 oidc_issuer_url = \"{}\"\n\
+                 audience = \"{}\"\n\
+                 danger_accept_invalid_certs = {}\n",
+                ZITADEL_KEY_PATH,
+                toml_escape(oidc_issuer_url),
+                toml_escape(audience),
+                danger_accept_invalid_certs,
+            ),
+        };
        format!(
-            r#"[agent]
-device_id = "{device_id}"
-
-[credentials]
-type = "toml-shared"
-nats_user = "{nats_user}"
-nats_pass = "{nats_pass}"
-
-[nats]
-urls = [{urls}]
-
-[labels]
-{labels}
-"#
+            "[agent]\n\
+             device_id = \"{device_id}\"\n\
+             \n\
+             {credentials}\n\
+             [nats]\n\
+             urls = [{urls}]\n\
+             \n\
+             [labels]\n\
+             {labels}\n"
        )
    }

@@ -214,7 +289,8 @@ impl<T: Topology + LinuxHostConfiguration> Interpret<T> for FleetDeviceSetupInte
                     proceeding will OVERWRITE it"
                );
                warn!("[{tag}]   diff (- existing, + desired):");
-                let diff = similar::TextDiff::from_lines(existing.as_str(), desired_config.as_str());
+                let diff =
+                    similar::TextDiff::from_lines(existing.as_str(), desired_config.as_str());
                let groups = diff.grouped_ops(2);
                for (idx, group) in groups.iter().enumerate() {
                    if idx > 0 {
@@ -250,6 +326,43 @@ impl<T: Topology + LinuxHostConfiguration> Interpret<T> for FleetDeviceSetupInte
            }
        }

+        // 0. /etc/hosts entries (rehearsal-only convenience). Done
+        // before package install so any package-manager mirror lookups
+        // that depend on these entries succeed. We render the line as
+        // a managed block bracketed by harmony markers — re-running
+        // is byte-stable and removing entries from the score deletes
+        // them from the file on next run.
+        if !cfg.hosts_entries.is_empty() {
+            info!(
+                "[{tag}] Step 1.5/7 — injecting {} /etc/hosts entr{} for rehearsal",
+                cfg.hosts_entries.len(),
+                if cfg.hosts_entries.len() == 1 {
+                    "y"
+                } else {
+                    "ies"
+                }
+            );
+            let existing = FileFetcher::fetch_file(topology, "/etc/hosts")
+                .await
+                .map_err(wrap)?;
+            let merged = merge_hosts_file(existing.as_deref(), &cfg.hosts_entries);
+            let hosts_r = FileDelivery::ensure_file(
+                topology,
+                &FileSpec {
+                    path: "/etc/hosts".to_string(),
+                    source: FileSource::Content(merged),
+                    owner: Some("root".to_string()),
+                    group: Some("root".to_string()),
+                    mode: Some(0o644),
+                },
+            )
+            .await
+            .map_err(wrap)?;
+            if hosts_r.changed {
+                change_count += 1;
+            }
+        }
+
        // 1. Dependencies.
        info!("[{tag}] Step 2/7 — ensuring system packages: podman, systemd-container");
        for pkg in ["podman", "systemd-container"] {
@@ -298,9 +411,10 @@ impl<T: Topology + LinuxHostConfiguration> Interpret<T> for FleetDeviceSetupInte
        // 3. User-scoped podman socket. Required by `PodmanTopology` on
        // the agent so it reaches /run/user/<uid>/podman/podman.sock.
        info!("[{tag}] Step 4/7 — activating user-scoped podman.socket");
-        let socket_r = SystemdManager::ensure_user_unit_active(topology, "fleet-agent", "podman.socket")
-            .await
-            .map_err(wrap)?;
+        let socket_r =
+            SystemdManager::ensure_user_unit_active(topology, "fleet-agent", "podman.socket")
+                .await
+                .map_err(wrap)?;
        if socket_r.changed {
            change_count += 1;
        }
@@ -330,7 +444,38 @@ impl<T: Topology + LinuxHostConfiguration> Interpret<T> for FleetDeviceSetupInte
            change_count += 1;
        }

-        // 5. /etc/fleet-agent/ + config.toml
+        // 5a. Drop the Zitadel machine keyfile when using JWT auth.
+        // Order: keyfile first, then config.toml — if both are new the
+        // agent's first systemd start finds the key already in place.
+        // Mode 0640 + group=fleet-agent so the non-root agent reads it
+        // via group permission (matches the corresponding Pod-side
+        // securityContext we use for the in-cluster callout).
+        let key_r = if let FleetDeviceAuth::ZitadelJwt {
+            machine_key_json, ..
+        } = &cfg.auth
+        {
+            info!("[{tag}] Step 6/7 — dropping Zitadel machine key to {ZITADEL_KEY_PATH}");
+            let r = FileDelivery::ensure_file(
+                topology,
+                &FileSpec {
+                    path: ZITADEL_KEY_PATH.to_string(),
+                    source: FileSource::Content(machine_key_json.clone()),
+                    owner: Some("fleet-agent".to_string()),
+                    group: Some("fleet-agent".to_string()),
+                    mode: Some(0o640),
+                },
+            )
+            .await
+            .map_err(wrap)?;
+            if r.changed {
+                change_count += 1;
+            }
+            r.changed
+        } else {
+            false
+        };
+
+        // 5b. /etc/fleet-agent/ + config.toml
        info!(
            "[{tag}] Step 6/7 — rendering /etc/fleet-agent/config.toml ({} NATS URL{}, {} label{})",
            cfg.nats_urls.len(),
@@ -368,7 +513,7 @@ impl<T: Topology + LinuxHostConfiguration> Interpret<T> for FleetDeviceSetupInte
        }

        // 7. Restart the agent iff anything that affects it changed.
-        let needs_restart = toml_r.changed || unit_r.changed || binary_r.changed;
+        let needs_restart = toml_r.changed || unit_r.changed || binary_r.changed || key_r;
        let service_state = if needs_restart {
            info!("[{tag}] 🔄 Restarting fleet-agent (config/binary/unit changed)");
            SystemdManager::restart_service(topology, "fleet-agent", SystemdScope::System)
@@ -436,6 +581,67 @@ fn wrap(e: crate::executors::ExecutorError) -> InterpretError {
    InterpretError::new(e.to_string())
 }

+const HOSTS_BEGIN_MARKER: &str = "# >>> fleet-agent managed >>>";
+const HOSTS_END_MARKER: &str = "# <<< fleet-agent managed <<<";
+
+/// Render an `/etc/hosts` file with a managed block at the end.
+/// `existing` is whatever's currently on the device (or empty on a
+/// fresh install). The managed block is bracketed by markers so we
+/// can find and replace it on subsequent runs without disturbing the
+/// rest of the file. Empty `entries` removes the block entirely.
+pub fn merge_hosts_file(existing: Option<&str>, entries: &[HostsEntry]) -> String {
+    let base = existing.unwrap_or("127.0.0.1\tlocalhost\n::1\tlocalhost\n");
+    // Strip any pre-existing managed block.
+    let stripped = strip_managed_block(base);
+
+    if entries.is_empty() {
+        return ensure_trailing_newline(&stripped);
+    }
+
+    let mut out = ensure_trailing_newline(&stripped);
+    out.push_str(HOSTS_BEGIN_MARKER);
+    out.push('\n');
+    for e in entries {
+        out.push_str(&format!("{}\t{}\n", e.ip, e.hostname));
+    }
+    out.push_str(HOSTS_END_MARKER);
+    out.push('\n');
+    out
+}
+
+fn strip_managed_block(s: &str) -> String {
+    let begin = match s.find(HOSTS_BEGIN_MARKER) {
+        Some(i) => i,
+        None => return s.to_string(),
+    };
+    let after_begin = &s[begin..];
+    let end_idx = match after_begin.find(HOSTS_END_MARKER) {
+        Some(i) => begin + i + HOSTS_END_MARKER.len(),
+        None => return s.to_string(), // malformed; leave alone
+    };
+    // Eat the trailing newline of the end marker if present.
+    let mut tail_start = end_idx;
+    if s.as_bytes().get(tail_start) == Some(&b'\n') {
+        tail_start += 1;
+    }
+    let mut head = s[..begin].to_string();
+    // Trim trailing newlines on head so we don't accumulate blanks.
+    while head.ends_with('\n') {
+        head.pop();
+    }
+    head.push('\n');
+    head.push_str(&s[tail_start..]);
+    head
+}
+
+fn ensure_trailing_newline(s: &str) -> String {
+    if s.ends_with('\n') {
+        s.to_string()
+    } else {
+        format!("{s}\n")
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -445,9 +651,29 @@ mod tests {
            device_id: Id::from("pi-42".to_string()),
            labels,
            nats_urls: vec!["nats://nats:4222".to_string()],
-            nats_user: "admin".to_string(),
-            nats_pass: "pw".to_string(),
+            auth: FleetDeviceAuth::TomlShared {
+                nats_user: "admin".to_string(),
+                nats_pass: "pw".to_string(),
+            },
            agent_binary_path: PathBuf::from("/dev/null"),
+            hosts_entries: vec![],
+        }
+    }
+
+    fn base_config_zitadel(labels: BTreeMap<String, String>) -> FleetDeviceSetupConfig {
+        FleetDeviceSetupConfig {
+            device_id: Id::from("pi-42".to_string()),
+            labels,
+            nats_urls: vec!["wss://nats.staging.example.com/".to_string()],
+            auth: FleetDeviceAuth::ZitadelJwt {
+                machine_key_json:
+                    r#"{"type":"sa","keyId":"k1","key":"-----PEM-----","userId":"u1"}"#.to_string(),
+                oidc_issuer_url: "https://zitadel.staging.example.com".to_string(),
+                audience: "366378028009259037".to_string(),
+                danger_accept_invalid_certs: false,
+            },
+            agent_binary_path: PathBuf::from("/dev/null"),
+            hosts_entries: vec![],
        }
    }

@@ -486,4 +712,99 @@ mod tests {
        let toml = base_config(labels).render_toml();
        assert!(toml.contains(r#"group = "has\"quote""#));
    }
+
+    #[test]
+    fn render_toml_emits_zitadel_jwt_block() {
+        let mut labels = BTreeMap::new();
+        labels.insert("group".to_string(), "site-a".to_string());
+        let toml = base_config_zitadel(labels).render_toml();
+        assert!(toml.contains(r#"type = "zitadel-jwt""#));
+        assert!(toml.contains(&format!(r#"key_path = "{ZITADEL_KEY_PATH}""#)));
+        assert!(toml.contains(r#"oidc_issuer_url = "https://zitadel.staging.example.com""#));
+        assert!(toml.contains(r#"audience = "366378028009259037""#));
+        // The keyfile content does NOT go in the TOML — it's dropped
+        // separately to ZITADEL_KEY_PATH on the device.
+        assert!(!toml.contains("-----PEM-----"));
+        // toml-shared keys must not appear when zitadel-jwt is selected
+        // (defense-in-depth against an accidental dual-mode rendering).
+        assert!(!toml.contains("nats_user"));
+        assert!(!toml.contains("nats_pass"));
+    }
+
+    #[test]
+    fn merge_hosts_inserts_managed_block() {
+        let entries = vec![HostsEntry {
+            ip: "192.168.122.1".to_string(),
+            hostname: "sso.fleet.local".to_string(),
+        }];
+        let out = merge_hosts_file(None, &entries);
+        assert!(out.contains("127.0.0.1\tlocalhost"));
+        assert!(out.contains("# >>> fleet-agent managed >>>"));
+        assert!(out.contains("192.168.122.1\tsso.fleet.local"));
+        assert!(out.contains("# <<< fleet-agent managed <<<"));
+    }
+
+    #[test]
+    fn merge_hosts_replaces_existing_managed_block() {
+        let existing = "127.0.0.1\tlocalhost\n\
+                        # >>> fleet-agent managed >>>\n\
+                        10.0.0.1\told-host\n\
+                        # <<< fleet-agent managed <<<\n\
+                        192.168.1.5\tunrelated\n";
+        let entries = vec![HostsEntry {
+            ip: "192.168.122.1".to_string(),
+            hostname: "sso.fleet.local".to_string(),
+        }];
+        let out = merge_hosts_file(Some(existing), &entries);
+        assert!(
+            !out.contains("old-host"),
+            "old managed entry must be removed"
+        );
+        assert!(out.contains("192.168.122.1\tsso.fleet.local"));
+        // Non-managed entries survive.
+        assert!(out.contains("192.168.1.5\tunrelated"));
+        assert!(out.contains("127.0.0.1\tlocalhost"));
+    }
+
+    #[test]
+    fn merge_hosts_empty_entries_strips_managed_block() {
+        let existing = "127.0.0.1\tlocalhost\n\
+                        # >>> fleet-agent managed >>>\n\
+                        10.0.0.1\told-host\n\
+                        # <<< fleet-agent managed <<<\n";
+        let out = merge_hosts_file(Some(existing), &[]);
+        assert!(!out.contains("old-host"));
+        assert!(!out.contains("fleet-agent managed"));
+        assert!(out.contains("127.0.0.1\tlocalhost"));
+    }
+
+    #[test]
+    fn merge_hosts_byte_stable_across_runs() {
+        // Idempotency invariant: feeding the previous output back in
+        // yields byte-identical output. The Score's drift detection
+        // relies on this.
+        let entries = vec![HostsEntry {
+            ip: "192.168.122.1".to_string(),
+            hostname: "sso.fleet.local".to_string(),
+        }];
+        let out1 = merge_hosts_file(None, &entries);
+        let out2 = merge_hosts_file(Some(&out1), &entries);
+        assert_eq!(out1, out2, "merge must be idempotent across re-runs");
+    }
+
+    #[test]
+    fn render_toml_zitadel_emits_danger_flag_inline() {
+        let mut labels = BTreeMap::new();
+        labels.insert("group".to_string(), "x".to_string());
+        let mut cfg = base_config_zitadel(labels);
+        if let FleetDeviceAuth::ZitadelJwt {
+            danger_accept_invalid_certs,
+            ..
+        } = &mut cfg.auth
+        {
+            *danger_accept_invalid_certs = true;
+        }
+        let toml = cfg.render_toml();
+        assert!(toml.contains("danger_accept_invalid_certs = true"));
+    }
 }
--- a/harmony/src/modules/helm/chart.rs
+++ b/harmony/src/modules/helm/chart.rs
@@ -39,7 +39,10 @@ pub struct HelmChartScore {
    pub values_yaml: Option<String>,
    pub create_namespace: bool,

-    /// Wether to run `helm upgrade --install` under the hood or only install when not present
+    /// `true` = run `helm install` (errors if the release already exists);
+    /// `false` = run `helm upgrade --install`, which is idempotent — helm
+    /// itself diffs the rendered chart against the live release and is a
+    /// no-op when nothing changed.
    pub install_only: bool,
    pub repository: Option<HelmRepository>,
 }
@@ -206,37 +209,38 @@ impl<T: Topology + HelmCommand> Interpret<T> for HelmChartInterpret {

        let ns_str = ns.to_string();
        if let Some(installed_chart) = self.find_installed_release(topology, &ns_str)? {
-            return match self.expected_chart_field() {
-                Some(expected)
-                    if Self::normalize_chart_field(&expected)
-                        == Self::normalize_chart_field(&installed_chart) =>
-                {
-                    warn!(
-                        "Helm release '{}' already installed at desired version ('{}'); skipping.",
-                        self.score.release_name, installed_chart
-                    );
-                    Ok(Outcome::success(format!(
-                        "Helm Chart {} already at desired version",
-                        self.score.release_name
-                    )))
-                }
-                Some(expected) => Err(InterpretError::new(format!(
+            // `install_only=true` means "deploy once, then leave it alone"
+            // — bootstrap operators (cert-manager, prometheus-operator,
+            // CRDs) use this. Skip the helm call entirely on re-runs.
+            if self.score.install_only {
+                warn!(
+                    "Helm release '{}' already installed as '{}'; \
+                     install_only=true → skipping.",
+                    self.score.release_name, installed_chart
+                );
+                return Ok(Outcome::success(format!(
+                    "Helm Chart {} already installed (install_only)",
+                    self.score.release_name
+                )));
+            }
+            // Pinned-version safety net: if the score pins a *different*
+            // version than what's installed, refuse to silently
+            // upgrade/downgrade — that's a manual decision.
+            if let Some(expected) = self.expected_chart_field()
+                && Self::normalize_chart_field(&expected)
+                    != Self::normalize_chart_field(&installed_chart)
+            {
+                return Err(InterpretError::new(format!(
                    "Helm release '{}' already installed as '{}', but score requests '{}'. \
                     Refusing to upgrade/downgrade; resolve manually.",
                    self.score.release_name, installed_chart, expected
-                ))),
-                None => {
-                    warn!(
-                        "Helm release '{}' already installed as '{}'; score has no pinned \
-                         chart_version so skipping re-install.",
-                        self.score.release_name, installed_chart
-                    );
-                    Ok(Outcome::success(format!(
-                        "Helm Chart {} already installed (version not pinned)",
-                        self.score.release_name
-                    )))
-                }
-            };
+                )));
+            }
+            // Otherwise (no pin, or pinned and matching) fall through to
+            // `helm upgrade --install`. Helm is the source of truth on
+            // whether anything actually changed: a no-op upgrade is
+            // cheap, and changed values_yaml / values_overrides get
+            // applied automatically without the caller needing to opt in.
        }

        self.add_repo(topology)?;
--- a/harmony/src/modules/inventory/discovery.rs
+++ b/harmony/src/modules/inventory/discovery.rs
@@ -1,16 +1,18 @@
 use async_trait::async_trait;
-use harmony_types::id::Id;
+use harmony_inventory_agent::hwinfo::NetworkInterface;
+use harmony_types::{firewall::LaggProtocol, id::Id};
 use log::{error, info};
 use serde::{Deserialize, Serialize};

 use crate::{
    data::Version,
+    hardware::PhysicalHost,
    infra::inventory::InventoryRepositoryFactory,
    interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
-    inventory::{HostRole, Inventory},
+    inventory::{HostRole, HostRoleMapping, Inventory},
    modules::inventory::{HarmonyDiscoveryStrategy, LaunchDiscoverInventoryAgentScore},
    score::Score,
-    topology::Topology,
+    topology::{BondConfig, NetworkConfig, Topology},
 };

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -68,6 +70,7 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
                continue;
            }

+            println!();
            let ans = inquire::Select::new(
                &format!("Select the node to be used for role {:?}:", self.score.role),
                all_hosts,
@@ -77,6 +80,18 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {

            match ans {
                Ok(choice) => {
+                    // If the host is already mapped, tell the operator what's there
+                    // and let them bail out before re-answering every prompt.
+                    if let Some(existing) = host_repo.get_role_mapping(&choice.id).await? {
+                        if !confirm_overwrite_existing_mapping(&choice, &existing)? {
+                            info!(
+                                "Cancelled: kept existing mapping for host {}",
+                                choice.summary()
+                            );
+                            continue;
+                        }
+                    }
+
                    info!(
                        "Assigned role {:?} for node {}",
                        self.score.role,
@@ -103,11 +118,9 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
                    let display_refs: Vec<&str> =
                        disk_choices.iter().map(|(d, _)| d.as_str()).collect();

-                    let disk_choice = inquire::Select::new(
-                        &format!("Select the disk to use on host {}:", choice.summary()),
-                        display_refs,
-                    )
-                    .prompt();
+                    print_host_header(&choice);
+                    let disk_choice =
+                        inquire::Select::new("Select the disk to use:", display_refs).prompt();

                    match disk_choice {
                        Ok(selected_display) => {
@@ -117,8 +130,20 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
                                .map(|(_, name)| name.clone())
                                .unwrap();
                            info!("Selected disk {} for node {}", disk_name, choice.summary());
+
+                            let network_config = prompt_network_config(&choice)?;
+
+                            // Visual break between the last prompt's answer and the
+                            // logs that follow (save, loop progress, next iteration).
+                            println!();
+
                            host_repo
-                                .save_role_mapping(&self.score.role, &choice, &disk_name)
+                                .save_role_mapping(
+                                    &self.score.role,
+                                    &choice,
+                                    &disk_name,
+                                    &network_config,
+                                )
                                .await?;
                            chosen_hosts.push(choice);
                        }
@@ -179,3 +204,228 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
        todo!()
    }
 }
+
+/// Show the existing role mapping for a host and ask whether to overwrite it.
+///
+/// Returns `true` if the operator chose to overwrite (the caller proceeds with
+/// disk/network prompts + a fresh save), `false` if they cancelled (caller
+/// skips this host and continues the selection loop).
+fn confirm_overwrite_existing_mapping(
+    host: &PhysicalHost,
+    existing: &HostRoleMapping,
+) -> Result<bool, InterpretError> {
+    print_host_header(host);
+    println!("This host already has a role mapping:");
+    println!("  Role: {}", existing.role);
+    println!(
+        "  Installation disk: {}",
+        existing
+            .host_config
+            .installation_device
+            .as_deref()
+            .unwrap_or("(none)")
+    );
+    match &existing.host_config.network_config.bond {
+        Some(bond) => println!("  Bond: {} on [{}]", bond.mode, bond.interfaces.join(", ")),
+        None => println!("  Bond: none"),
+    }
+    let blacklist = &existing.host_config.network_config.blacklisted_interfaces;
+    if !blacklist.is_empty() {
+        println!("  Blacklisted: {}", blacklist.join(", "));
+    }
+
+    let action = inquire::Select::new(
+        "What do you want to do?",
+        vec!["Update (overwrite the existing mapping)", "Cancel"],
+    )
+    .prompt()
+    .map_err(|e| InterpretError::new(format!("Could not prompt: {e}")))?;
+
+    Ok(action.starts_with("Update"))
+}
+
+/// Print a blank line and a "Host: <short summary>" header above the next prompt.
+///
+/// Harmonizes every host-specific `inquire` question in the discovery flow so
+/// the operator always sees which machine the prompt refers to — the `Host:`
+/// line sits directly above the `? ...` question rendered by inquire. The
+/// short-form summary omits the NIC list so the header fits on one screen
+/// width; full NIC details still appear inside the bond/blacklist pickers.
+fn print_host_header(host: &PhysicalHost) {
+    println!();
+    println!("Host: {}", host.summary_short());
+}
+
+/// Interactively ask the user how the host's networking should be set up.
+///
+/// Skips both prompts when the host has fewer than two network interfaces
+/// — bonding requires at least two, and blacklisting a single NIC would leave
+/// the host unreachable. The resulting [`NetworkConfig`] is persisted alongside
+/// the role mapping so downstream Scores can act on it later.
+fn prompt_network_config(host: &PhysicalHost) -> Result<NetworkConfig, InterpretError> {
+    if host.network.len() < 2 {
+        info!(
+            "Host {} has {} network interface(s); skipping bond/blacklist prompts",
+            host.summary(),
+            host.network.len()
+        );
+        return Ok(NetworkConfig::default());
+    }
+
+    let format_iface = |nic: &NetworkInterface| -> String {
+        let speed = nic
+            .speed_mbps
+            .map(|s| format!("{}Mbps", s))
+            .unwrap_or_else(|| "?Mbps".to_string());
+        let state = if nic.is_up { "up" } else { "down" };
+        let ips = if nic.ipv4_addresses.is_empty() {
+            String::new()
+        } else {
+            format!(" [{}]", nic.ipv4_addresses.join(","))
+        };
+        format!(
+            "{} ({}) - {} - {} - driver {}{}",
+            nic.name, nic.mac_address, speed, state, nic.driver, ips
+        )
+    };
+
+    let options: Vec<(String, String)> = host
+        .network
+        .iter()
+        .map(|nic| (format_iface(nic), nic.name.clone()))
+        .collect();
+
+    // --- Bond ---
+    print_host_header(host);
+    let wants_bond = inquire::Confirm::new("Configure a network bond?")
+        .with_default(false)
+        .prompt()
+        .map_err(|e| InterpretError::new(format!("Could not ask about bond: {e}")))?;
+
+    let bond = if wants_bond {
+        let display_refs: Vec<&str> = options.iter().map(|(d, _)| d.as_str()).collect();
+        print_host_header(host);
+        let selected = inquire::MultiSelect::new(
+            "Select the interfaces to include in the bond:",
+            display_refs,
+        )
+        .with_validator(|choices: &[inquire::list_option::ListOption<&&str>]| {
+            if choices.len() < 2 {
+                Ok(inquire::validator::Validation::Invalid(
+                    "Select at least two interfaces for a bond".into(),
+                ))
+            } else {
+                Ok(inquire::validator::Validation::Valid)
+            }
+        })
+        .prompt()
+        .map_err(|e| InterpretError::new(format!("Could not select bond interfaces: {e}")))?;
+
+        let interfaces: Vec<String> = options
+            .iter()
+            .filter(|(display, _)| selected.iter().any(|s| *s == display.as_str()))
+            .map(|(_, name)| name.clone())
+            .collect();
+
+        // Tuple-based picker so we can render fuller descriptions than the
+        // plain `Display` gives. Keep LACP first — it's the HA default.
+        let mode_choices: Vec<(String, LaggProtocol)> = vec![
+            (
+                "LACP (802.3ad) — negotiated aggregation with the switch".to_string(),
+                LaggProtocol::Lacp,
+            ),
+            (
+                "Failover — single active link, others standby".to_string(),
+                LaggProtocol::Failover,
+            ),
+            (
+                "Load Balance — distribute traffic across links".to_string(),
+                LaggProtocol::LoadBalance,
+            ),
+            (
+                "Round Robin — rotate through links per packet".to_string(),
+                LaggProtocol::RoundRobin,
+            ),
+        ];
+        let display_refs: Vec<&str> = mode_choices.iter().map(|(d, _)| d.as_str()).collect();
+        print_host_header(host);
+        let selected_display = inquire::Select::new("Select the bond mode:", display_refs)
+            .with_starting_cursor(0)
+            .prompt()
+            .map_err(|e| InterpretError::new(format!("Could not select bond mode: {e}")))?;
+        let mode = mode_choices
+            .iter()
+            .find(|(d, _)| d.as_str() == selected_display)
+            .map(|(_, p)| p.clone())
+            .expect("selected display must map back to a LaggProtocol");
+
+        info!(
+            "Bond configured for host {} on interfaces [{}] with mode {}",
+            host.summary(),
+            interfaces.join(", "),
+            mode
+        );
+        Some(BondConfig { interfaces, mode })
+    } else {
+        None
+    };
+
+    // --- Blacklist ---
+    // Candidates exclude any interface already claimed by the bond.
+    let bond_members: Vec<&String> = bond
+        .as_ref()
+        .map(|b| b.interfaces.iter().collect())
+        .unwrap_or_default();
+
+    let blacklist_candidates: Vec<(String, String)> = options
+        .iter()
+        .filter(|(_, name)| !bond_members.iter().any(|b| *b == name))
+        .cloned()
+        .collect();
+
+    let blacklisted_interfaces = if blacklist_candidates.is_empty() {
+        Vec::new()
+    } else {
+        print_host_header(host);
+        let wants_blacklist = inquire::Confirm::new("Blacklist any remaining interface?")
+            .with_default(false)
+            .prompt()
+            .map_err(|e| InterpretError::new(format!("Could not ask about blacklist: {e}")))?;
+
+        if wants_blacklist {
+            let display_refs: Vec<&str> = blacklist_candidates
+                .iter()
+                .map(|(d, _)| d.as_str())
+                .collect();
+            print_host_header(host);
+            let selected =
+                inquire::MultiSelect::new("Select the interfaces to blacklist:", display_refs)
+                    .prompt()
+                    .map_err(|e| {
+                        InterpretError::new(format!("Could not select blacklisted interfaces: {e}"))
+                    })?;
+
+            let names: Vec<String> = blacklist_candidates
+                .iter()
+                .filter(|(display, _)| selected.iter().any(|s| *s == display.as_str()))
+                .map(|(_, name)| name.clone())
+                .collect();
+
+            if !names.is_empty() {
+                info!(
+                    "Blacklisted interfaces on host {}: {}",
+                    host.summary(),
+                    names.join(", ")
+                );
+            }
+            names
+        } else {
+            Vec::new()
+        }
+    };
+
+    Ok(NetworkConfig {
+        bond,
+        blacklisted_interfaces,
+    })
+}
--- a/harmony/src/modules/inventory/mod.rs
+++ b/harmony/src/modules/inventory/mod.rs
@@ -35,6 +35,37 @@ use crate::{
 };
 use harmony_types::id::Id;

+/// Build the `labels` list for a host discovered via the inventory agent.
+///
+/// Always includes the `discovered-by` provenance label. Also promotes the
+/// agent's `Chipset { vendor, name }` into a `system-product-name` label so
+/// `PhysicalHost::summary()` can show something like "LENOVO 3136" instead of
+/// falling back to the generic "Server" category string. Skips that label when
+/// both chipset fields are blank.
+fn build_discovered_host_labels(chipset: &harmony_inventory_agent::hwinfo::Chipset) -> Vec<Label> {
+    let mut labels = vec![Label {
+        name: "discovered-by".to_string(),
+        value: "harmony-inventory-agent".to_string(),
+    }];
+
+    let vendor = chipset.vendor.trim();
+    let name = chipset.name.trim();
+    let product = match (vendor.is_empty(), name.is_empty()) {
+        (true, true) => None,
+        (true, false) => Some(name.to_string()),
+        (false, true) => Some(vendor.to_string()),
+        (false, false) => Some(format!("{vendor} {name}")),
+    };
+    if let Some(value) = product {
+        labels.push(Label {
+            name: "system-product-name".to_string(),
+            value,
+        });
+    }
+
+    labels
+}
+
 /// This launches an harmony_inventory_agent discovery process
 /// This will allow us to register/update hosts running harmony_inventory_agent
 /// from LAN in the Harmony inventory
@@ -154,27 +185,27 @@ impl DiscoverInventoryAgentInterpret {
                                storage_controller: _,
                                memory_modules,
                                cpus,
-                                chipset: _,
-                                network_interfaces,
+                                chipset,
+                                mut network_interfaces,
                                management_interface: _,
                                host_uuid,
                            } = host;

+                            // Sort NICs by name for deterministic display (e.g. f0 before f1)
+                            // and stable serialization — keeps save()'s byte-equality dedup
+                            // correct when the agent reports NICs in different sysfs-walk order.
+                            network_interfaces.sort_by(|a, b| a.name.cmp(&b.name));
+
                            let host = PhysicalHost {
                                id: Id::from(host_uuid),
                                category: HostCategory::Server,
                                network: network_interfaces,
                                storage: storage_drives,
-                                labels: vec![Label {
-                                    name: "discovered-by".to_string(),
-                                    value: "harmony-inventory-agent".to_string(),
-                                }],
+                                labels: build_discovered_host_labels(&chipset),
                                memory_modules,
                                cpus,
                            };

-                            // FIXME only save the host when it is new or something changed in it.
-                            // we currently are saving the host every time it is discovered.
                            let repo = InventoryRepositoryFactory::build()
                                .await
                                .map_err(|e| format!("Could not build repository : {e}"))
@@ -183,11 +214,7 @@ impl DiscoverInventoryAgentInterpret {
                                .await
                                .map_err(|e| format!("Could not save host : {e}"))
                                .unwrap();
-                            info!(
-                                "Saved new host id {}, summary : {}",
-                                host.id,
-                                host.summary()
-                            );
+                            info!("Discovered host {}, summary : {}", host.id, host.summary());
                        });
                    }
                    _ => debug!("Unhandled event {event:?}"),
@@ -248,24 +275,24 @@ impl DiscoverInventoryAgentInterpret {
                            // Reuse the same conversion to PhysicalHost as MDNS flow
                            let harmony_inventory_agent::hwinfo::PhysicalHost {
                                storage_drives,
-                                storage_controller,
+                                storage_controller: _,
                                memory_modules,
                                cpus,
                                chipset,
-                                network_interfaces,
-                                management_interface,
+                                mut network_interfaces,
+                                management_interface: _,
                                host_uuid,
                            } = host;

+                            // Sort NICs by name for deterministic ordering (see MDNS flow above).
+                            network_interfaces.sort_by(|a, b| a.name.cmp(&b.name));
+
                            let host = PhysicalHost {
                                id: Id::from(host_uuid),
                                category: HostCategory::Server,
                                network: network_interfaces,
                                storage: storage_drives,
-                                labels: vec![Label {
-                                    name: "discovered-by".to_string(),
-                                    value: "harmony-inventory-agent".to_string(),
-                                }],
+                                labels: build_discovered_host_labels(&chipset),
                                memory_modules,
                                cpus,
                            };
@@ -278,7 +305,7 @@ impl DiscoverInventoryAgentInterpret {
                            if let Err(e) = repo.save(&host).await {
                                log::debug!("Failed to save host {}: {e}", host.id);
                            } else {
-                                info!("Saved host id {}, summary : {}", host.id, host.summary());
+                                info!("Discovered host {}, summary : {}", host.id, host.summary());
                            }
                        }
                        Ok(Err(e)) => {
--- a/harmony/src/modules/linux/ansible_configurator.rs
+++ b/harmony/src/modules/linux/ansible_configurator.rs
@@ -500,8 +500,8 @@ impl AnsibleHostConfigurator {
            // adds debug signal: an unparseable stdout (real protocol
            // mismatch) or a non-empty stderr.
            let stderr = stderr.trim();
-            let already_parsed = parse_err.starts_with("UNREACHABLE!")
-                || parse_err.starts_with("FAILED!");
+            let already_parsed =
+                parse_err.starts_with("UNREACHABLE!") || parse_err.starts_with("FAILED!");
            if already_parsed && stderr.is_empty() {
                return exec(format!(
                    "ansible module {module} failed against {host}: {parse_err}"
--- a/harmony/src/modules/linux/topology.rs
+++ b/harmony/src/modules/linux/topology.rs
@@ -1,4 +1,4 @@
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};

 use async_trait::async_trait;
 use harmony_types::net::IpAddress;
@@ -66,6 +66,17 @@ pub struct SshCredentials {
    pub sudo_password: Option<String>,
 }

+impl SshCredentials {
+    pub fn default_ubuntu_aws() -> Self {
+        Self {
+            user: "ec2_user".to_string(),
+            private_key_path: Path::new("~/.ssh/id_rsa").to_path_buf(),
+            remote_python: Default::default(),
+            sudo_password: None,
+        }
+    }
+}
+
 impl LinuxHostTopology {
    pub fn new(name: impl Into<String>, host: IpAddress, credentials: SshCredentials) -> Self {
        let configurator = AnsibleHostConfigurator::new();
--- a/harmony/src/modules/mod.rs
+++ b/harmony/src/modules/mod.rs
@@ -18,6 +18,7 @@ pub mod linux;
 pub mod load_balancer;
 pub mod monitoring;
 pub mod nats;
+pub mod nats_auth_callout;
 pub mod network;
 pub mod node_health;
 pub mod okd;
--- a/harmony/src/modules/monitoring/ceph_alerts.rs
+++ b/harmony/src/modules/monitoring/ceph_alerts.rs
@@ -0,0 +1,167 @@
+use std::collections::BTreeMap;
+
+use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{Rule, RuleGroup};
+
+pub fn ceph_alert_rule_groups() -> Vec<RuleGroup> {
+    vec![
+        RuleGroup {
+            name: "ceph-cluster-health".to_string(),
+            rules: vec![
+                alert(
+                    "CephHealthWarn",
+                    "max(ceph_health_status) == 1",
+                    Some("15m"),
+                    "warning",
+                    "Ceph cluster health is WARN",
+                    "Ceph reports HEALTH_WARN for more than 15 minutes. Run `ceph -s` or check the Ceph dashboard to see active health checks.",
+                ),
+                alert(
+                    "CephHealthErr",
+                    "max(ceph_health_status) == 2",
+                    Some("5m"),
+                    "critical",
+                    "Ceph cluster health is ERR",
+                    "Ceph reports HEALTH_ERR for more than 5 minutes. Immediate investigation required.",
+                ),
+                alert(
+                    "CephMonDown",
+                    "count(max by (ceph_daemon) (ceph_mon_quorum_status == 0)) > 0",
+                    Some("5m"),
+                    "critical",
+                    "Ceph monitor is out of quorum",
+                    "One or more Ceph monitors are not in quorum. Quorum loss risks cluster availability.",
+                ),
+                alert(
+                    "CephMgrAbsent",
+                    "sum(max by (ceph_daemon) (ceph_mgr_status)) < 1",
+                    Some("5m"),
+                    "critical",
+                    "No active Ceph manager",
+                    "No Ceph manager daemon is currently active. Dashboards and orchestration will be unavailable.",
+                ),
+            ],
+        },
+        RuleGroup {
+            name: "ceph-osd".to_string(),
+            rules: vec![
+                alert(
+                    "CephOSDDown",
+                    "count(max by (ceph_daemon) (ceph_osd_up == 0)) > 0",
+                    Some("5m"),
+                    "warning",
+                    "One or more Ceph OSDs are down",
+                    "At least one OSD daemon is reporting down for 5 minutes. Data redundancy may be reduced.",
+                ),
+                alert(
+                    "CephOSDNearFull",
+                    "max by (ceph_daemon) (100 * ceph_osd_stat_bytes_used / ceph_osd_stat_bytes) > 80",
+                    Some("15m"),
+                    "warning",
+                    "Ceph OSD is near full",
+                    "OSD {{ $labels.ceph_daemon }} is above 80% utilization. Rebalance or add capacity.",
+                ),
+                alert(
+                    "CephOSDFull",
+                    "max by (ceph_daemon) (100 * ceph_osd_stat_bytes_used / ceph_osd_stat_bytes) > 90",
+                    Some("5m"),
+                    "critical",
+                    "Ceph OSD is critically full",
+                    "OSD {{ $labels.ceph_daemon }} is above 90% utilization. Writes may block. Act immediately.",
+                ),
+            ],
+        },
+        RuleGroup {
+            name: "ceph-capacity".to_string(),
+            rules: vec![
+                alert(
+                    "CephClusterNearFull",
+                    "100 * max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes) / max(ceph_cluster_total_bytes) > 75",
+                    Some("15m"),
+                    "warning",
+                    "Ceph cluster is near full",
+                    "Cluster raw utilization is above 75% for 15 minutes.",
+                ),
+                alert(
+                    "CephClusterCriticallyFull",
+                    "100 * max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes) / max(ceph_cluster_total_bytes) > 85",
+                    Some("5m"),
+                    "critical",
+                    "Ceph cluster is critically full",
+                    "Cluster raw utilization is above 85%. Imminent risk of write unavailability.",
+                ),
+                alert(
+                    "CephPoolNearFull",
+                    "100 * max by (pool_id) (ceph_pool_bytes_used) / (max by (pool_id) (ceph_pool_bytes_used) + max by (pool_id) (ceph_pool_max_avail)) > 80",
+                    Some("15m"),
+                    "warning",
+                    "Ceph pool is near full",
+                    "Pool (pool_id {{ $labels.pool_id }}) is above 80% usage.",
+                ),
+                alert(
+                    "CephDaysUntilFull",
+                    "(max(ceph_cluster_total_bytes) - max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes)) / clamp_min(deriv(max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes)[7d:1h]), 1) / 86400 < 30",
+                    Some("1h"),
+                    "warning",
+                    "Ceph cluster predicted to fill within 30 days",
+                    "Based on the 7-day usage trend, the cluster will reach capacity in less than 30 days.",
+                ),
+            ],
+        },
+        RuleGroup {
+            name: "ceph-placement-groups".to_string(),
+            rules: vec![
+                alert(
+                    "CephPGsNotActiveClean",
+                    "max(ceph_pg_total) - max(ceph_pg_clean) > 0",
+                    Some("15m"),
+                    "warning",
+                    "Some placement groups are not active+clean",
+                    "{{ $value }} PGs have been in a non-clean state for more than 15 minutes.",
+                ),
+                alert(
+                    "CephSlowOps",
+                    "max(ceph_healthcheck_slow_ops) > 0",
+                    Some("5m"),
+                    "warning",
+                    "Ceph reports slow ops",
+                    "Ceph has {{ $value }} slow operations outstanding for more than 5 minutes.",
+                ),
+            ],
+        },
+        RuleGroup {
+            name: "ceph-nodes".to_string(),
+            rules: vec![alert(
+                "CephNodeRootDiskUsage",
+                "100 * (1 - (max by (instance, mountpoint) (node_filesystem_avail_bytes{mountpoint=~\"/|/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"}) / max by (instance, mountpoint) (node_filesystem_size_bytes{mountpoint=~\"/|/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"}))) > 85",
+                Some("10m"),
+                "warning",
+                "Ceph node root/var disk above 85%",
+                "Node {{ $labels.instance }} mountpoint {{ $labels.mountpoint }} is above 85% disk usage. OSDs on this node may be at risk.",
+            )],
+        },
+    ]
+}
+
+fn alert(
+    name: &str,
+    expr: &str,
+    for_: Option<&str>,
+    severity: &str,
+    summary: &str,
+    description: &str,
+) -> Rule {
+    let mut labels = BTreeMap::new();
+    labels.insert("severity".to_string(), severity.to_string());
+
+    let mut annotations = BTreeMap::new();
+    annotations.insert("summary".to_string(), summary.to_string());
+    annotations.insert("description".to_string(), description.to_string());
+
+    Rule {
+        alert: Some(name.to_string()),
+        expr: Some(expr.to_string()),
+        for_: for_.map(|s| s.to_string()),
+        labels: Some(labels),
+        annotations: Some(annotations),
+    }
+}
--- a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/ceph.json
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/ceph.json
@@ -0,0 +1,892 @@
+{
+  "title": "Ceph Cluster",
+  "uid": "ceph-cluster",
+  "schemaVersion": 36,
+  "version": 1,
+  "refresh": "30s",
+  "time": { "from": "now-1h", "to": "now" },
+
+  "templating": {
+    "list": [
+      {
+        "name": "pool",
+        "type": "query",
+        "label": "Pool",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(ceph_pool_metadata, name)", "refId": "Pool" },
+        "definition": "label_values(ceph_pool_metadata, name)",
+        "multi": true,
+        "includeAll": true,
+        "current": { "text": "All", "value": "$__all", "selected": false },
+        "refresh": 1,
+        "sort": 1
+      },
+      {
+        "name": "osd",
+        "type": "query",
+        "label": "OSD",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(ceph_osd_metadata, ceph_daemon)", "refId": "OSD" },
+        "definition": "label_values(ceph_osd_metadata, ceph_daemon)",
+        "multi": true,
+        "includeAll": true,
+        "current": { "text": "All", "value": "$__all", "selected": false },
+        "refresh": 1,
+        "sort": 1
+      }
+    ]
+  },
+
+  "panels": [
+
+    {
+      "type": "row", "id": 1, "title": "Cluster Status", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }
+    },
+
+    {
+      "type": "stat", "id": 2, "title": "Health",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "max(ceph_health_status)", "refId": "A" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "red", "value": 2 }
+          ]},
+          "mappings": [{
+            "type": "value",
+            "options": {
+              "0": { "text": "HEALTH_OK", "index": 0 },
+              "1": { "text": "HEALTH_WARN", "index": 1 },
+              "2": { "text": "HEALTH_ERR", "index": 2 }
+            }
+          }]
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background", "graphMode": "none", "textMode": "value"
+      },
+      "gridPos": { "h": 5, "w": 4, "x": 0, "y": 1 }
+    },
+
+    {
+      "type": "stat", "id": 3, "title": "Mon Quorum",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "count(max by (ceph_daemon) (ceph_mon_quorum_status == 1)) or vector(0)", "refId": "A", "legendFormat": "In Quorum" },
+        { "expr": "count(max by (ceph_daemon) (ceph_mon_metadata)) or vector(0)", "refId": "B", "legendFormat": "Total" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background", "graphMode": "none", "textMode": "auto", "orientation": "horizontal"
+      },
+      "gridPos": { "h": 5, "w": 4, "x": 4, "y": 1 }
+    },
+
+    {
+      "type": "stat", "id": 4, "title": "MGR Active",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "sum(max by (ceph_daemon) (ceph_mgr_status)) or vector(0)", "refId": "A" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red", "value": null },
+            { "color": "green", "value": 1 }
+          ]}
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background", "graphMode": "none", "textMode": "auto"
+      },
+      "gridPos": { "h": 5, "w": 3, "x": 8, "y": 1 }
+    },
+
+    {
+      "type": "stat", "id": 5, "title": "OSDs Up / In / Total",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(max by (ceph_daemon) (ceph_osd_up)) or vector(0)", "refId": "A", "legendFormat": "Up" },
+        { "expr": "sum(max by (ceph_daemon) (ceph_osd_in)) or vector(0)", "refId": "B", "legendFormat": "In" },
+        { "expr": "count(max by (ceph_daemon) (ceph_osd_metadata)) or vector(0)", "refId": "C", "legendFormat": "Total" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background", "graphMode": "none", "textMode": "auto", "orientation": "horizontal"
+      },
+      "gridPos": { "h": 5, "w": 5, "x": 11, "y": 1 }
+    },
+
+    {
+      "type": "stat", "id": 6, "title": "Pools",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(max by (pool_id) (ceph_pool_metadata)) or vector(0)", "refId": "A" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background", "graphMode": "none", "textMode": "auto"
+      },
+      "gridPos": { "h": 5, "w": 3, "x": 16, "y": 1 }
+    },
+
+    {
+      "type": "stat", "id": 7, "title": "PGs Active+Clean / Total",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "max(ceph_pg_clean) or vector(0)", "refId": "A", "legendFormat": "Active+Clean" },
+        { "expr": "max(ceph_pg_total) or vector(0)", "refId": "B", "legendFormat": "Total" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background", "graphMode": "none", "textMode": "auto", "orientation": "horizontal"
+      },
+      "gridPos": { "h": 5, "w": 5, "x": 19, "y": 1 }
+    },
+
+    {
+      "type": "row", "id": 100, "title": "Active Issues", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 6 }
+    },
+
+    {
+      "type": "stat", "id": 101, "title": "Critical Ceph alerts firing",
+      "description": "Count of Ceph alert rules currently in firing state with severity=critical. Drives the red tile on the Health stat to concrete action. 0 when the cluster is healthy.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"Ceph.*\",severity=\"critical\"}) or vector(0)",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red",   "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background", "graphMode": "none", "justifyMode": "center", "textMode": "auto",
+        "reduceOptions": { "calcs": ["lastNotNull"] }
+      },
+      "gridPos": { "h": 4, "w": 12, "x": 0, "y": 7 }
+    },
+
+    {
+      "type": "stat", "id": 102, "title": "Warning Ceph alerts firing",
+      "description": "Count of Ceph alert rules currently in firing state with severity=warning. Matches what drives the yellow HEALTH_WARN tile on this dashboard.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"Ceph.*\",severity=\"warning\"}) or vector(0)",
+        "refId": "A", "legendFormat": ""
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": {
+        "colorMode": "background", "graphMode": "none", "justifyMode": "center", "textMode": "auto",
+        "reduceOptions": { "calcs": ["lastNotNull"] }
+      },
+      "gridPos": { "h": 4, "w": 12, "x": 12, "y": 7 }
+    },
+
+    {
+      "type": "row", "id": 104, "title": "Issue details — click to expand", "collapsed": true,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 11 },
+      "panels": [
+
+        {
+          "type": "table", "id": 105, "title": "Active Ceph health checks (ceph health detail)",
+          "description": "Exactly what `ceph health detail` would show. One row per active health check; the Check column is the Ceph check code (OSD_DOWN, POOL_NEARFULL, PG_DEGRADED, MON_CLOCK_SKEW, etc.). Severity is the Ceph-native HEALTH_WARN / HEALTH_ERR label emitted by the mgr prometheus module. An empty table means Ceph reports no active health checks — the Health tile above should be HEALTH_OK. This is the primary answer to 'why isn't it green?'.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "ceph_health_detail == 1",
+            "refId": "A", "instant": true, "legendFormat": ""
+          }],
+          "transformations": [
+            { "id": "labelsToFields", "options": { "mode": "columns" } },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "__name__":          true,
+                  "Value":             true,
+                  "ceph_health_detail":true,
+                  "Time":              true,
+                  "prometheus":        true,
+                  "container":         true,
+                  "endpoint":          true,
+                  "job":               true,
+                  "service":           true,
+                  "instance":          true,
+                  "pod":               true,
+                  "namespace":         true
+                },
+                "renameByName": {
+                  "name":     "Check",
+                  "severity": "Severity"
+                },
+                "indexByName": {
+                  "severity": 0,
+                  "name":     1
+                }
+              }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "custom": { "align": "left" },
+              "noValue": "— HEALTH_OK, no active checks —"
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Severity" },
+                "properties": [
+                  { "id": "custom.cellOptions", "value": { "type": "color-background", "mode": "basic" } },
+                  { "id": "custom.width", "value": 150 },
+                  {
+                    "id": "mappings",
+                    "value": [{
+                      "type": "value",
+                      "options": {
+                        "HEALTH_ERR":  { "text": "HEALTH_ERR",  "color": "dark-red",    "index": 0 },
+                        "HEALTH_WARN": { "text": "HEALTH_WARN", "color": "dark-yellow", "index": 1 }
+                      }
+                    }]
+                  }
+                ]
+              },
+              { "matcher": { "id": "byName", "options": "Check" }, "properties": [{ "id": "custom.width", "value": 320 }] }
+            ]
+          },
+          "options": {
+            "sortBy": [{ "desc": false, "displayName": "Severity" }],
+            "footer": { "show": false }
+          },
+          "gridPos": { "h": 6, "w": 12, "x": 0, "y": 12 }
+        },
+
+        {
+          "type": "table", "id": 103, "title": "Firing Ceph alerts (Alertmanager view)",
+          "description": "Instant-query view of every Ceph alert currently firing — the same set that pages oncall through Alertmanager. Usually matches the health-checks table above, plus derived alerts that have no direct ceph_health_detail counterpart (CephDaysUntilFull, CephNodeRootDiskUsage). The ALERTS metric carries labels only, not annotations: alert name plus daemon/pool/instance labels should be enough to identify the problem; run `oc -n openshift-monitoring get prometheusrule ceph-alerts -o yaml` or check Alertmanager for the full summary/description.",
+          "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+          "targets": [{
+            "expr": "ALERTS{alertstate=\"firing\",alertname=~\"Ceph.*\"}",
+            "refId": "A", "instant": true, "legendFormat": ""
+          }],
+          "transformations": [
+            { "id": "labelsToFields", "options": { "mode": "columns" } },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "alertstate": true,
+                  "__name__":   true,
+                  "Value":      true,
+                  "ALERTS":     true,
+                  "Time":       true,
+                  "prometheus": true,
+                  "container":  true,
+                  "endpoint":   true,
+                  "job":        true,
+                  "service":    true
+                },
+                "renameByName": {
+                  "alertname":   "Alert Name",
+                  "severity":    "Severity",
+                  "ceph_daemon": "Ceph Daemon",
+                  "pool_id":     "Pool",
+                  "instance":    "Node / Instance",
+                  "mountpoint":  "Mountpoint",
+                  "namespace":   "Namespace"
+                },
+                "indexByName": {
+                  "severity":    0,
+                  "alertname":   1,
+                  "ceph_daemon": 2,
+                  "pool_id":     3,
+                  "instance":    4,
+                  "mountpoint":  5,
+                  "namespace":   6
+                }
+              }
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "custom": { "align": "left" },
+              "noValue": "— no active Ceph issues —"
+            },
+            "overrides": [
+              {
+                "matcher": { "id": "byName", "options": "Severity" },
+                "properties": [
+                  { "id": "custom.cellOptions", "value": { "type": "color-background", "mode": "basic" } },
+                  { "id": "custom.width", "value": 110 },
+                  {
+                    "id": "mappings",
+                    "value": [{
+                      "type": "value",
+                      "options": {
+                        "critical": { "text": "CRITICAL", "color": "dark-red",    "index": 0 },
+                        "warning":  { "text": "WARNING",  "color": "dark-yellow", "index": 1 },
+                        "info":     { "text": "INFO",     "color": "dark-blue",   "index": 2 }
+                      }
+                    }]
+                  }
+                ]
+              },
+              { "matcher": { "id": "byName", "options": "Alert Name"      }, "properties": [{ "id": "custom.width", "value": 280 }] },
+              { "matcher": { "id": "byName", "options": "Ceph Daemon"     }, "properties": [{ "id": "custom.width", "value": 180 }] },
+              { "matcher": { "id": "byName", "options": "Pool"            }, "properties": [{ "id": "custom.width", "value": 120 }] },
+              { "matcher": { "id": "byName", "options": "Node / Instance" }, "properties": [{ "id": "custom.width", "value": 220 }] },
+              { "matcher": { "id": "byName", "options": "Mountpoint"      }, "properties": [{ "id": "custom.width", "value": 180 }] }
+            ]
+          },
+          "options": {
+            "sortBy": [{ "desc": false, "displayName": "Severity" }],
+            "footer": { "show": false }
+          },
+          "gridPos": { "h": 6, "w": 12, "x": 12, "y": 12 }
+        }
+
+      ]
+    },
+
+    {
+      "type": "row", "id": 8, "title": "Capacity", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 12 }
+    },
+
+    {
+      "type": "gauge", "id": 9, "title": "Cluster Used (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "100 * max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes) / max(ceph_cluster_total_bytes)",
+        "refId": "A"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "yellow", "value": 70 },
+            { "color": "red", "value": 85 }
+          ]}
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "showThresholdLabels": true, "showThresholdMarkers": true
+      },
+      "gridPos": { "h": 8, "w": 5, "x": 0, "y": 13 }
+    },
+
+    {
+      "type": "stat", "id": 10, "title": "Total / Used / Available",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "max(ceph_cluster_total_bytes)", "refId": "A", "legendFormat": "Total" },
+        { "expr": "max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes)", "refId": "B", "legendFormat": "Used" },
+        { "expr": "max(ceph_cluster_total_bytes) - max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes)", "refId": "C", "legendFormat": "Available" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes",
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] }
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "value", "graphMode": "none", "textMode": "auto", "orientation": "vertical"
+      },
+      "gridPos": { "h": 8, "w": 4, "x": 5, "y": 13 }
+    },
+
+    {
+      "type": "timeseries", "id": 11, "title": "Capacity Over Time",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "max(ceph_cluster_total_bytes)", "refId": "A", "legendFormat": "Total" },
+        { "expr": "max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes)", "refId": "B", "legendFormat": "Used" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 11, "x": 9, "y": 13 }
+    },
+
+    {
+      "type": "stat", "id": 12, "title": "Days Until Full (predicted, 7d trend)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "(max(ceph_cluster_total_bytes) - max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes))\n/\nclamp_min(deriv(max(ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes)[7d:1h]), 1)\n/ 86400",
+        "refId": "A"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "d",
+          "decimals": 1,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red", "value": null },
+            { "color": "yellow", "value": 14 },
+            { "color": "green", "value": 60 }
+          ]}
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background", "graphMode": "none", "textMode": "auto"
+      },
+      "gridPos": { "h": 8, "w": 4, "x": 20, "y": 13 }
+    },
+
+    {
+      "type": "bargauge", "id": 13, "title": "Pool Used (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "(\n  100 * max by (pool_id) (ceph_pool_bytes_used)\n  /\n  (max by (pool_id) (ceph_pool_bytes_used) + max by (pool_id) (ceph_pool_max_avail))\n)\n* on(pool_id) group_left(name) max by (pool_id, name) (ceph_pool_metadata{name=~\"$pool\"})",
+        "refId": "A",
+        "legendFormat": "{{name}}",
+        "instant": true
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "yellow", "value": 70 },
+            { "color": "red", "value": 85 }
+          ]}
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "displayMode": "gradient",
+        "showUnfilled": true,
+        "valueMode": "color",
+        "sortBy": "Value",
+        "sortOrder": "desc"
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 21 }
+    },
+
+    {
+      "type": "bargauge", "id": 14, "title": "OSD Utilization (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "100 * max by (ceph_daemon) (ceph_osd_stat_bytes_used{ceph_daemon=~\"$osd\"}) / max by (ceph_daemon) (ceph_osd_stat_bytes{ceph_daemon=~\"$osd\"})",
+        "refId": "A",
+        "legendFormat": "{{ceph_daemon}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "yellow", "value": 70 },
+            { "color": "red", "value": 85 }
+          ]}
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "displayMode": "gradient",
+        "showUnfilled": true
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 21 }
+    },
+
+    {
+      "type": "row", "id": 15, "title": "Performance", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 29 }
+    },
+
+    {
+      "type": "timeseries", "id": 16, "title": "Cluster IOPS (Read / Write)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(max by (pool_id) (rate(ceph_pool_rd[5m])))", "refId": "A", "legendFormat": "Read" },
+        { "expr": "sum(max by (pool_id) (rate(ceph_pool_wr[5m])))", "refId": "B", "legendFormat": "Write" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 30 }
+    },
+
+    {
+      "type": "timeseries", "id": 17, "title": "Cluster Throughput (Read / Write)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(max by (pool_id) (rate(ceph_pool_rd_bytes[5m])))", "refId": "A", "legendFormat": "Read" },
+        { "expr": "sum(max by (pool_id) (rate(ceph_pool_wr_bytes[5m])))", "refId": "B", "legendFormat": "Write" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 30 }
+    },
+
+    {
+      "type": "timeseries", "id": 18, "title": "Client Op Latency (Avg)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "sum(rate(ceph_osd_op_r_latency_sum[5m])) / clamp_min(sum(rate(ceph_osd_op_r_latency_count[5m])), 1)",
+          "refId": "A", "legendFormat": "Read"
+        },
+        {
+          "expr": "sum(rate(ceph_osd_op_w_latency_sum[5m])) / clamp_min(sum(rate(ceph_osd_op_w_latency_count[5m])), 1)",
+          "refId": "B", "legendFormat": "Write"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 38 }
+    },
+
+    {
+      "type": "timeseries", "id": 19, "title": "Recovery Throughput",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(rate(ceph_osd_recovery_bytes[5m])) or vector(0)", "refId": "A", "legendFormat": "Recovery B/s" },
+        { "expr": "sum(rate(ceph_osd_recovery_ops[5m])) or vector(0)", "refId": "B", "legendFormat": "Recovery ops/s" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8 }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "Recovery B/s" }, "properties": [{ "id": "unit", "value": "Bps" }] },
+          { "matcher": { "id": "byName", "options": "Recovery ops/s" }, "properties": [{ "id": "unit", "value": "ops" }] }
+        ]
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 38 }
+    },
+
+    {
+      "type": "row", "id": 20, "title": "Placement Group Health", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 46 }
+    },
+
+    {
+      "type": "timeseries", "id": 21, "title": "PG States Over Time",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "max(ceph_pg_clean)", "refId": "A", "legendFormat": "clean" },
+        { "expr": "max(ceph_pg_active)", "refId": "B", "legendFormat": "active" },
+        { "expr": "max(ceph_pg_degraded)", "refId": "C", "legendFormat": "degraded" },
+        { "expr": "max(ceph_pg_undersized)", "refId": "D", "legendFormat": "undersized" },
+        { "expr": "max(ceph_pg_peering)", "refId": "E", "legendFormat": "peering" },
+        { "expr": "max(ceph_pg_recovering)", "refId": "F", "legendFormat": "recovering" },
+        { "expr": "max(ceph_pg_backfilling)", "refId": "G", "legendFormat": "backfilling" },
+        { "expr": "max(ceph_pg_remapped)", "refId": "H", "legendFormat": "remapped" },
+        { "expr": "max(ceph_pg_inconsistent)", "refId": "I", "legendFormat": "inconsistent" },
+        { "expr": "max(ceph_pg_stale)", "refId": "J", "legendFormat": "stale" },
+        { "expr": "max(ceph_pg_unknown)", "refId": "K", "legendFormat": "unknown" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 0 }
+        }
+      },
+      "options": {
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["max", "lastNotNull"],
+          "showLegend": true,
+          "sortBy": "Max",
+          "sortDesc": true
+        }
+      },
+      "gridPos": { "h": 8, "w": 16, "x": 0, "y": 47 }
+    },
+
+    {
+      "type": "stat", "id": 22, "title": "Slow Ops",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "max(ceph_healthcheck_slow_ops) or vector(0)", "refId": "A" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "red", "value": 10 }
+          ]}
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background", "graphMode": "area", "textMode": "auto"
+      },
+      "gridPos": { "h": 4, "w": 8, "x": 16, "y": 47 }
+    },
+
+    {
+      "type": "stat", "id": 23, "title": "Misplaced / Degraded Objects",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "max(ceph_num_objects_misplaced) or vector(0)", "refId": "A", "legendFormat": "Misplaced" },
+        { "expr": "max(ceph_num_objects_degraded) or vector(0)", "refId": "B", "legendFormat": "Degraded" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "yellow", "value": 1 }
+          ]}
+        }
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"] },
+        "colorMode": "background", "graphMode": "none", "textMode": "auto", "orientation": "horizontal"
+      },
+      "gridPos": { "h": 4, "w": 8, "x": 16, "y": 51 }
+    },
+
+    {
+      "type": "row", "id": 24, "title": "OSD Detail", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 55 }
+    },
+
+    {
+      "type": "table", "id": 25, "title": "OSDs",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "max by (ceph_daemon) (ceph_osd_up{ceph_daemon=~\"$osd\"})", "refId": "A", "legendFormat": "Up", "format": "table", "instant": true },
+        { "expr": "max by (ceph_daemon) (ceph_osd_in{ceph_daemon=~\"$osd\"})", "refId": "B", "legendFormat": "In", "format": "table", "instant": true },
+        { "expr": "100 * max by (ceph_daemon) (ceph_osd_stat_bytes_used{ceph_daemon=~\"$osd\"}) / max by (ceph_daemon) (ceph_osd_stat_bytes{ceph_daemon=~\"$osd\"})", "refId": "C", "format": "table", "instant": true },
+        { "expr": "max by (ceph_daemon) (ceph_osd_numpg{ceph_daemon=~\"$osd\"})", "refId": "D", "format": "table", "instant": true },
+        { "expr": "max by (ceph_daemon) (ceph_osd_apply_latency_ms{ceph_daemon=~\"$osd\"})", "refId": "E", "format": "table", "instant": true },
+        { "expr": "max by (ceph_daemon) (ceph_osd_commit_latency_ms{ceph_daemon=~\"$osd\"})", "refId": "F", "format": "table", "instant": true }
+      ],
+      "transformations": [
+        { "id": "merge" },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": { "Time": true, "__name__": true, "job": true, "instance": true, "endpoint": true },
+            "renameByName": {
+              "ceph_daemon": "OSD",
+              "Value #A": "Up",
+              "Value #B": "In",
+              "Value #C": "Util %",
+              "Value #D": "PGs",
+              "Value #E": "Apply Latency",
+              "Value #F": "Commit Latency"
+            },
+            "indexByName": {
+              "OSD": 0, "Up": 1, "In": 2, "Util %": 3, "PGs": 4, "Apply Latency": 5, "Commit Latency": 6
+            }
+          }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {},
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Util %" },
+            "properties": [
+              { "id": "unit", "value": "percent" },
+              { "id": "decimals", "value": 1 },
+              { "id": "custom.cellOptions", "value": { "type": "color-background", "mode": "gradient" } },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "yellow", "value": 70 },
+                { "color": "red", "value": 85 }
+              ]}}
+            ]
+          },
+          { "matcher": { "id": "byName", "options": "Apply Latency" }, "properties": [{ "id": "unit", "value": "ms" }] },
+          { "matcher": { "id": "byName", "options": "Commit Latency" }, "properties": [{ "id": "unit", "value": "ms" }] },
+          {
+            "matcher": { "id": "byRegexp", "options": "Up|In" },
+            "properties": [
+              { "id": "mappings", "value": [{ "type": "value", "options": { "0": { "text": "✗", "index": 0 }, "1": { "text": "✓", "index": 1 }}}] },
+              { "id": "custom.cellOptions", "value": { "type": "color-text" } },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                { "color": "red", "value": null },
+                { "color": "green", "value": 1 }
+              ]}}
+            ]
+          }
+        ]
+      },
+      "gridPos": { "h": 10, "w": 16, "x": 0, "y": 56 }
+    },
+
+    {
+      "type": "timeseries", "id": 26, "title": "OSD Apply + Commit Latency",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "max by (ceph_daemon) (ceph_osd_apply_latency_ms{ceph_daemon=~\"$osd\"})", "refId": "A", "legendFormat": "{{ceph_daemon}} apply" },
+        { "expr": "max by (ceph_daemon) (ceph_osd_commit_latency_ms{ceph_daemon=~\"$osd\"})", "refId": "B", "legendFormat": "{{ceph_daemon}} commit" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 1, "fillOpacity": 0 }
+        }
+      },
+      "gridPos": { "h": 10, "w": 8, "x": 16, "y": 56 }
+    },
+
+    {
+      "type": "row", "id": 27, "title": "Pool Detail", "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 66 }
+    },
+
+    {
+      "type": "table", "id": 28, "title": "Pools",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "max by (pool_id, name) (ceph_pool_metadata{name=~\"$pool\"})", "refId": "A", "format": "table", "instant": true },
+        { "expr": "max by (pool_id) (ceph_pool_objects)", "refId": "B", "format": "table", "instant": true },
+        { "expr": "max by (pool_id) (ceph_pool_bytes_used)", "refId": "C", "format": "table", "instant": true },
+        { "expr": "max by (pool_id) (ceph_pool_max_avail)", "refId": "D", "format": "table", "instant": true },
+        { "expr": "100 * max by (pool_id) (ceph_pool_bytes_used) / (max by (pool_id) (ceph_pool_bytes_used) + max by (pool_id) (ceph_pool_max_avail))", "refId": "E", "format": "table", "instant": true }
+      ],
+      "transformations": [
+        { "id": "merge" },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": { "Time": true, "__name__": true, "job": true, "instance": true, "endpoint": true, "Value #A": true },
+            "renameByName": {
+              "pool_id": "ID",
+              "name": "Pool",
+              "Value #B": "Objects",
+              "Value #C": "Used",
+              "Value #D": "Available",
+              "Value #E": "Used %"
+            },
+            "indexByName": { "ID": 0, "Pool": 1, "Objects": 2, "Used": 3, "Available": 4, "Used %": 5 }
+          }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {},
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "Used" }, "properties": [{ "id": "unit", "value": "bytes" }] },
+          { "matcher": { "id": "byName", "options": "Available" }, "properties": [{ "id": "unit", "value": "bytes" }] },
+          {
+            "matcher": { "id": "byName", "options": "Used %" },
+            "properties": [
+              { "id": "unit", "value": "percent" },
+              { "id": "decimals", "value": 1 },
+              { "id": "custom.cellOptions", "value": { "type": "color-background", "mode": "gradient" } },
+              { "id": "thresholds", "value": { "mode": "absolute", "steps": [
+                { "color": "green", "value": null },
+                { "color": "yellow", "value": 70 },
+                { "color": "red", "value": 85 }
+              ]}}
+            ]
+          }
+        ]
+      },
+      "gridPos": { "h": 10, "w": 14, "x": 0, "y": 67 }
+    },
+
+    {
+      "type": "timeseries", "id": 29, "title": "Pool IOPS (Read / Write) — filtered by $pool",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "max by (pool_id) (rate(ceph_pool_rd[5m]))\n* on(pool_id) group_left(name) max by (pool_id, name) (ceph_pool_metadata{name=~\"$pool\"})",
+          "refId": "A", "legendFormat": "Read — {{name}}"
+        },
+        {
+          "expr": "max by (pool_id) (rate(ceph_pool_wr[5m]))\n* on(pool_id) group_left(name) max by (pool_id, name) (ceph_pool_metadata{name=~\"$pool\"})",
+          "refId": "B", "legendFormat": "Write — {{name}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8 }
+        }
+      },
+      "options": {
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["max", "lastNotNull"],
+          "showLegend": true,
+          "sortBy": "Max",
+          "sortDesc": true
+        }
+      },
+      "gridPos": { "h": 10, "w": 10, "x": 14, "y": 67 }
+    }
+
+  ]
+}
--- a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/cluster-overview.json
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/cluster-overview.json
@@ -368,7 +368,7 @@
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
        {
-          "expr": "100 * (1 - (sum(node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"})))",
+          "expr": "100 * (1 - (\n  sum(node_filesystem_avail_bytes{mountpoint=\"/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"} or node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"})\n  /\n  sum(node_filesystem_size_bytes{mountpoint=\"/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"} or node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"})\n))",
          "refId": "A",
          "legendFormat": "Disk"
        }
--- a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/datadog-15-k8s-metrics.json
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/datadog-15-k8s-metrics.json
@@ -0,0 +1,852 @@
+{
+  "title": "Datadog — 15 Key Kubernetes Metrics",
+  "uid": "datadog-15-k8s-metrics",
+  "schemaVersion": 36,
+  "version": 1,
+  "refresh": "30s",
+  "time": { "from": "now-1h", "to": "now" },
+  "tags": ["kubernetes", "datadog", "key-metrics", "cluster", "control-plane"],
+  "templating": {
+    "list": [
+      {
+        "name": "namespace",
+        "type": "query",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(kube_pod_info, namespace)", "refId": "A" },
+        "refresh": 2,
+        "includeAll": true,
+        "multi": true,
+        "allValue": ".*",
+        "label": "Namespace",
+        "sort": 1,
+        "current": {},
+        "options": []
+      },
+      {
+        "name": "node",
+        "type": "query",
+        "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+        "query": { "query": "label_values(kube_node_info, node)", "refId": "A" },
+        "refresh": 2,
+        "includeAll": true,
+        "multi": true,
+        "allValue": ".*",
+        "label": "Node",
+        "sort": 1,
+        "current": {},
+        "options": []
+      }
+    ]
+  },
+  "panels": [
+
+    {
+      "id": 100, "type": "row", "title": "Cluster State — metrics 1–3 (Node status, Desired vs current pods, Available vs unavailable pods)",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }
+    },
+
+    {
+      "id": 1, "type": "stat", "title": "Ready Nodes",
+      "description": "Metric 1 — Node status. Count of nodes with condition Ready=true. A node that drops out of Ready can no longer accept new pods; scheduling freezes until it recovers or is drained.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"true\"} == 1)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red", "value": null },
+            { "color": "green", "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 }
+    },
+
+    {
+      "id": 2, "type": "stat", "title": "Not Ready Nodes",
+      "description": "Nodes reporting Ready=false. These nodes cannot host new pods and existing pods may be evicted. Alert immediately.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"Ready\",status=\"false\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red", "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 }
+    },
+
+    {
+      "id": 3, "type": "stat", "title": "MemoryPressure",
+      "description": "Nodes flagged by kubelet as being under memory pressure. The kubelet will begin evicting pods that most exceed their memory request.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red", "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 }
+    },
+
+    {
+      "id": 4, "type": "stat", "title": "DiskPressure",
+      "description": "Nodes under disk pressure. Kubelet runs GC (removing unused images and dead containers) and, if space stays low, starts evicting pods.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"DiskPressure\",status=\"true\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red", "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 }
+    },
+
+    {
+      "id": 5, "type": "stat", "title": "PIDPressure",
+      "description": "Nodes that have exhausted their PID space. New processes / containers on the node will fail to start.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"PIDPressure\",status=\"true\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red", "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 }
+    },
+
+    {
+      "id": 6, "type": "stat", "title": "NetworkUnavailable",
+      "description": "Nodes whose CNI has not (yet) wired the pod network. Pods cannot schedule onto the node until this clears.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "count(kube_node_status_condition{condition=\"NetworkUnavailable\",status=\"true\"} == 1) or vector(0)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red", "value": 1 }
+          ]},
+          "unit": "short", "noValue": "0"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 }
+    },
+
+    {
+      "id": 7, "type": "timeseries", "title": "Deployments — Desired vs Current pods",
+      "description": "Metric 2 — Desired vs current pods (Deployments). A persistent gap means pods cannot be scheduled: check node capacity, PodDisruptionBudgets, and image pull failures.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(kube_deployment_spec_replicas{namespace=~\"$namespace\"})", "refId": "A", "legendFormat": "desired" },
+        { "expr": "sum(kube_deployment_status_replicas{namespace=~\"$namespace\"})", "refId": "B", "legendFormat": "current" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "desired" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "current" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 5 }
+    },
+
+    {
+      "id": 8, "type": "timeseries", "title": "Deployments — Available vs Unavailable pods",
+      "description": "Metric 3 — Available/unavailable (Deployments). Spikes in unavailable are customer-visible: crashes, failed readiness probes, or resource shortages.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(kube_deployment_status_replicas_available{namespace=~\"$namespace\"})",   "refId": "A", "legendFormat": "available" },
+        { "expr": "sum(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\"})", "refId": "B", "legendFormat": "unavailable" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "available" },   "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "unavailable" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",   "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 5 }
+    },
+
+    {
+      "id": 9, "type": "table", "title": "Top Deployments with unavailable replicas",
+      "description": "Deployments that currently report unavailable replicas. Investigate pod events / readiness probes / resource quotas for these.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(20, max by(namespace, deployment)(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\"}) > 0)",
+        "refId": "A", "legendFormat": "", "format": "table", "instant": true
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "custom": { "align": "auto" },
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null },
+            { "color": "red",   "value": 1 }
+          ]}
+        }
+      },
+      "options": { "showHeader": true },
+      "transformations": [
+        { "id": "organize", "options": { "excludeByName": { "Time": true, "__name__": true, "instance": true, "job": true, "endpoint": true, "service": true, "pod": true, "container": true, "prometheus": true, "container_name": true, "namespace_labels": true }, "renameByName": { "Value": "unavailable" } } }
+      ],
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 5 }
+    },
+
+    {
+      "id": 10, "type": "timeseries", "title": "DaemonSets — Desired vs Scheduled",
+      "description": "Metric 2 — Desired vs current pods (DaemonSets). DaemonSets should have one pod per matching node; a gap means the pod cannot be placed (taints, resources, node selectors).",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(kube_daemonset_status_desired_number_scheduled{namespace=~\"$namespace\"})", "refId": "A", "legendFormat": "desired" },
+        { "expr": "sum(kube_daemonset_status_current_number_scheduled{namespace=~\"$namespace\"})", "refId": "B", "legendFormat": "scheduled" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "desired"   }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "scheduled" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 13 }
+    },
+
+    {
+      "id": 11, "type": "timeseries", "title": "DaemonSets — Available vs Unavailable",
+      "description": "Metric 3 — Available/unavailable (DaemonSets). Unavailable DaemonSet pods often mean per-node infrastructure pods (CNI, logging, monitoring agents) are failing on specific nodes.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(kube_daemonset_status_number_available{namespace=~\"$namespace\"})",   "refId": "A", "legendFormat": "available" },
+        { "expr": "sum(kube_daemonset_status_number_unavailable{namespace=~\"$namespace\"})", "refId": "B", "legendFormat": "unavailable" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "available"   }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "unavailable" }, "properties": [{ "id": "color", "value": { "fixedColor": "red",   "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 13 }
+    },
+
+    {
+      "id": 200, "type": "row", "title": "Resources — Memory (metrics 4–6)",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 }
+    },
+
+    {
+      "id": 20, "type": "timeseries", "title": "Cluster memory — usage vs requests vs limits",
+      "description": "Metrics 4–5 — aggregate. Compares how much memory containers actually consume (working set) to what they requested and what they are limited to. A pod that crosses its limit is OOMKilled.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"})", "refId": "A", "legendFormat": "usage" },
+        { "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\",resource=\"memory\",container!=\"\"})", "refId": "B", "legendFormat": "requests" },
+        { "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\",resource=\"memory\",container!=\"\"})",   "refId": "C", "legendFormat": "limits" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "usage"    }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "requests" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "limits"   }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 22 }
+    },
+
+    {
+      "id": 21, "type": "timeseries", "title": "Top 15 pods — memory usage / memory limit (%)",
+      "description": "Metric 4 — pod-level. Pods approaching 100% of their memory limit will be OOMKilled. If a pod persistently sits near the limit, either raise the limit or optimize memory use.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(15,\n  100 * sum by(namespace, pod)(container_memory_working_set_bytes{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"})\n  /\n  sum by(namespace, pod)(kube_pod_container_resource_limits{namespace=~\"$namespace\",resource=\"memory\",container!=\"\"})\n)",
+        "refId": "A", "legendFormat": "{{namespace}}/{{pod}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 22 }
+    },
+
+    {
+      "id": 22, "type": "timeseries", "title": "Node memory — requests vs allocatable",
+      "description": "Metric 6 — per node. Compares the sum of pod memory requests placed on each node to the node's allocatable memory. If requests approach allocatable, the scheduler can no longer place new pods on that node.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum by(node)(kube_pod_container_resource_requests{resource=\"memory\",container!=\"\",node=~\"$node\"})", "refId": "A", "legendFormat": "{{node}} — requested" },
+        { "expr": "sum by(node)(kube_node_status_allocatable{resource=\"memory\",node=~\"$node\"})",                          "refId": "B", "legendFormat": "{{node}} — allocatable" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 30 }
+    },
+
+    {
+      "id": 23, "type": "bargauge", "title": "Node memory commitment (requests / allocatable)",
+      "description": "How full each node is in terms of scheduled (requested) memory. ≥ 100% means no further pods requesting memory can be scheduled there.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "100 *\n  sum by(node)(kube_pod_container_resource_requests{resource=\"memory\",container!=\"\",node=~\"$node\"})\n  /\n  sum by(node)(kube_node_status_allocatable{resource=\"memory\",node=~\"$node\"})",
+        "refId": "A", "legendFormat": "{{node}}", "instant": true
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 70 },
+            { "color": "red",    "value": 90 }
+          ]}
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "displayMode": "gradient",
+        "showUnfilled": true,
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 30 }
+    },
+
+    {
+      "id": 300, "type": "row", "title": "Resources — CPU (metrics 8–10)",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 38 }
+    },
+
+    {
+      "id": 30, "type": "timeseries", "title": "Cluster CPU — usage vs requests vs limits",
+      "description": "Metrics 9–10 — aggregate. Unlike memory, CPU is compressible: exceeding a limit causes throttling (slow), not OOMKill. A persistent gap between usage and limits is fine; a persistent gap between usage and requests wastes capacity.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"}[5m]))", "refId": "A", "legendFormat": "usage" },
+        { "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\",resource=\"cpu\",container!=\"\"})",           "refId": "B", "legendFormat": "requests" },
+        { "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\",resource=\"cpu\",container!=\"\"})",             "refId": "C", "legendFormat": "limits" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "usage"    }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "requests" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue",   "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "limits"   }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 39 }
+    },
+
+    {
+      "id": 31, "type": "timeseries", "title": "Top 15 pods — CPU usage / CPU limit (%)",
+      "description": "Metric 9 — pod-level. Pods that sit above 100% for long windows are being throttled by the kernel, which causes latency spikes even though the pod is not killed.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(15,\n  100 * sum by(namespace, pod)(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",container!=\"\",container!=\"POD\"}[5m]))\n  /\n  sum by(namespace, pod)(kube_pod_container_resource_limits{namespace=~\"$namespace\",resource=\"cpu\",container!=\"\"})\n)",
+        "refId": "A", "legendFormat": "{{namespace}}/{{pod}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 39 }
+    },
+
+    {
+      "id": 32, "type": "timeseries", "title": "Node CPU — requests vs allocatable",
+      "description": "Metric 8 — per node. Same shape as memory: once requests saturate allocatable CPU, no more pods requesting CPU can be placed on the node.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "sum by(node)(kube_pod_container_resource_requests{resource=\"cpu\",container!=\"\",node=~\"$node\"})", "refId": "A", "legendFormat": "{{node}} — requested" },
+        { "expr": "sum by(node)(kube_node_status_allocatable{resource=\"cpu\",node=~\"$node\"})",                          "refId": "B", "legendFormat": "{{node}} — allocatable" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 5, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 47 }
+    },
+
+    {
+      "id": 33, "type": "bargauge", "title": "Node CPU commitment (requests / allocatable)",
+      "description": "How full each node is in terms of scheduled (requested) CPU. ≥ 100% means no further pods requesting CPU can be scheduled there.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "100 *\n  sum by(node)(kube_pod_container_resource_requests{resource=\"cpu\",container!=\"\",node=~\"$node\"})\n  /\n  sum by(node)(kube_node_status_allocatable{resource=\"cpu\",node=~\"$node\"})",
+        "refId": "A", "legendFormat": "{{node}}", "instant": true
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 70 },
+            { "color": "red",    "value": 90 }
+          ]}
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "displayMode": "gradient",
+        "showUnfilled": true,
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 47 }
+    },
+
+    {
+      "id": 400, "type": "row", "title": "Resources — Disk (metric 7)",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 55 }
+    },
+
+    {
+      "id": 40, "type": "timeseries", "title": "Node root filesystem usage (%)",
+      "description": "Metric 7 — node level. Disk is non-compressible: when it is exhausted, kubelet raises DiskPressure and evicts pods. Alert well before 100%.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "100 * (1 - (\n  sum by(instance)(node_filesystem_avail_bytes{mountpoint=~\"/|/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"})\n  /\n  sum by(instance)(node_filesystem_size_bytes{mountpoint=~\"/|/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"})\n))",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 70 },
+            { "color": "red",    "value": 85 }
+          ]}
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 56 }
+    },
+
+    {
+      "id": 41, "type": "table", "title": "Top 20 PVC usage (%)",
+      "description": "Metric 7 — volume level. Persistent volumes that fill up cause write errors inside applications. Alert at ~80% so there is time to expand or free space.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "topk(20,\n  100 * max by(namespace, persistentvolumeclaim)(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\"})\n  /\n  max by(namespace, persistentvolumeclaim)(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\"})\n)",
+        "refId": "A", "legendFormat": "", "format": "table", "instant": true
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "custom": { "align": "auto", "cellOptions": { "type": "color-background" } },
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 70 },
+            { "color": "red",    "value": 85 }
+          ]}
+        }
+      },
+      "options": { "showHeader": true },
+      "transformations": [
+        { "id": "organize", "options": { "excludeByName": { "Time": true, "__name__": true, "instance": true, "job": true, "endpoint": true, "service": true, "pod": true, "container": true, "prometheus": true }, "renameByName": { "Value": "usage %" } } }
+      ],
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 56 }
+    },
+
+    {
+      "id": 500, "type": "row", "title": "Control plane — etcd (metrics 11–12)",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 64 }
+    },
+
+    {
+      "id": 50, "type": "stat", "title": "etcd has leader",
+      "description": "Metric 11 — etcd_server_has_leader. Minimum across members. 0 means at least one member does not see a leader — the cluster may be partitioned or mid-election.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "min(etcd_server_has_leader)", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "red",   "value": null },
+            { "color": "green", "value": 1 }
+          ]},
+          "mappings": [{
+            "type": "value",
+            "options": {
+              "0": { "text": "NO LEADER", "color": "red" },
+              "1": { "text": "LEADER OK", "color": "green" }
+            }
+          }],
+          "unit": "short", "noValue": "?"
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 6, "x": 0, "y": 65 }
+    },
+
+    {
+      "id": 51, "type": "stat", "title": "Leader changes (last 1h)",
+      "description": "Metric 12 — etcd_server_leader_changes_seen_total increase over 1h. Frequent elections usually mean network flapping or resource exhaustion on a member.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "sum(increase(etcd_server_leader_changes_seen_total[1h]))", "refId": "A", "legendFormat": "" }],
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "thresholds" },
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green",  "value": null },
+            { "color": "yellow", "value": 1 },
+            { "color": "red",    "value": 3 }
+          ]},
+          "unit": "short", "noValue": "0", "decimals": 0
+        }
+      },
+      "options": { "colorMode": "background", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": ["lastNotNull"] }, "textMode": "auto" },
+      "gridPos": { "h": 4, "w": 6, "x": 6, "y": 65 }
+    },
+
+    {
+      "id": 52, "type": "timeseries", "title": "Leader changes rate per etcd member",
+      "description": "Per-member rate of leader transitions. A steady drumbeat on a single member points to that node specifically (its disk, its network).",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "rate(etcd_server_leader_changes_seen_total[5m])",
+        "refId": "A", "legendFormat": "{{instance}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 65 }
+    },
+
+    {
+      "id": 53, "type": "timeseries", "title": "etcd has-leader per member",
+      "description": "Per-member value of etcd_server_has_leader. Any dip to 0 is the start of a leader election; frequent dips warrant investigation.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{ "expr": "etcd_server_has_leader", "refId": "A", "legendFormat": "{{instance}}" }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0, "max": 1,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false, "drawStyle": "line", "lineInterpolation": "stepAfter" }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["min", "lastNotNull"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 69 }
+    },
+
+    {
+      "id": 600, "type": "row", "title": "Control plane — API Server (metric 13)",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 77 }
+    },
+
+    {
+      "id": 60, "type": "timeseries", "title": "API server request rate by verb",
+      "description": "Metric 13 — request count. Non-streaming calls per second by verb. Read-heavy (GET/LIST) load is usually controllers; write-heavy (POST/PUT/PATCH/DELETE) is user activity or autoscaling.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(verb)(rate(apiserver_request_total{verb!~\"WATCH|CONNECT\"}[5m]))",
+        "refId": "A", "legendFormat": "{{verb}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 78 }
+    },
+
+    {
+      "id": 61, "type": "timeseries", "title": "API server latency p50 / p95 / p99",
+      "description": "Metric 13 — request duration. Rising p99 with flat p50 is classic tail-latency degradation — look at a single slow resource or an overloaded admission webhook.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum(rate(apiserver_request_duration_seconds_bucket{verb!~\"WATCH|CONNECT\"}[5m])) by (le))", "refId": "A", "legendFormat": "p50" },
+        { "expr": "histogram_quantile(0.95, sum(rate(apiserver_request_duration_seconds_bucket{verb!~\"WATCH|CONNECT\"}[5m])) by (le))", "refId": "B", "legendFormat": "p95" },
+        { "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!~\"WATCH|CONNECT\"}[5m])) by (le))", "refId": "C", "legendFormat": "p99" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 0, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 78 }
+    },
+
+    {
+      "id": 62, "type": "timeseries", "title": "API server error rate (HTTP 4xx / 5xx)",
+      "description": "Error rate by code. 429 = inflight-limit/throttling; 422 = admission-webhook rejections / invalid objects; 500/503 = apiserver faults or etcd unavailability.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(code)(rate(apiserver_request_total{code=~\"[45]..\"}[5m]))",
+        "refId": "A", "legendFormat": "HTTP {{code}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "reqps", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 86 }
+    },
+
+    {
+      "id": 63, "type": "timeseries", "title": "API server p99 latency by resource",
+      "description": "Latency broken down by Kubernetes resource — helps identify which object kind (pods, secrets, events…) is the slow one.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99,\n  sum by(resource, le)(rate(apiserver_request_duration_seconds_bucket{verb!~\"WATCH|CONNECT\"}[5m]))\n)",
+        "refId": "A", "legendFormat": "{{resource}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 1, "fillOpacity": 0, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 86 }
+    },
+
+    {
+      "id": 700, "type": "row", "title": "Control plane — Controller Manager & Scheduler (metrics 14–15)",
+      "collapsed": false,
+      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 94 }
+    },
+
+    {
+      "id": 70, "type": "timeseries", "title": "Workqueue wait (queue_duration) — p99 by queue",
+      "description": "Metric 14 — how long items sit in each controller's workqueue before being picked up. A rising line indicates the controller can no longer keep up with cluster changes.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99,\n  sum by(name, le)(rate(workqueue_queue_duration_seconds_bucket[5m]))\n)",
+        "refId": "A", "legendFormat": "{{name}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 1, "fillOpacity": 0, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 95 }
+    },
+
+    {
+      "id": 71, "type": "timeseries", "title": "Workqueue work (work_duration) — p99 by queue",
+      "description": "Metric 14 — how long each reconcile actually takes. A rising line points at slow API calls or a slow reconcile loop.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "histogram_quantile(0.99,\n  sum by(name, le)(rate(workqueue_work_duration_seconds_bucket[5m]))\n)",
+        "refId": "A", "legendFormat": "{{name}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 1, "fillOpacity": 0, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 95 }
+    },
+
+    {
+      "id": 72, "type": "timeseries", "title": "Scheduler — attempts per second by result",
+      "description": "Metric 15 — scheduler_schedule_attempts_total. 'unschedulable' = no node meets the pod's requirements (resources, taints, selectors); 'error' = a bug or stale cache in the scheduler.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [{
+        "expr": "sum by(result)(rate(scheduler_schedule_attempts_total[5m]))",
+        "refId": "A", "legendFormat": "{{result}}"
+      }],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 15, "showPoints": "never", "spanNulls": false }
+        },
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "scheduled"     }, "properties": [{ "id": "color", "value": { "fixedColor": "green",  "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "unschedulable" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+          { "matcher": { "id": "byName", "options": "error"         }, "properties": [{ "id": "color", "value": { "fixedColor": "red",    "mode": "fixed" } }] }
+        ]
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 103 }
+    },
+
+    {
+      "id": 73, "type": "timeseries", "title": "Scheduler — scheduling attempt latency (p50 / p95 / p99)",
+      "description": "Metric 15 — scheduler attempt duration. The PDF's scheduler_e2e_scheduling_duration_seconds was removed in Kubernetes 1.23; the modern equivalent is scheduler_scheduling_attempt_duration_seconds (time from picking a pod off the queue to binding it). A rising p99 often correlates with an overloaded apiserver or large, highly-constrained pod fleets.",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket[5m])) by (le))", "refId": "A", "legendFormat": "p50" },
+        { "expr": "histogram_quantile(0.95, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket[5m])) by (le))", "refId": "B", "legendFormat": "p95" },
+        { "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket[5m])) by (le))", "refId": "C", "legendFormat": "p99" }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s", "min": 0,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 0, "showPoints": "never", "spanNulls": false }
+        }
+      },
+      "options": {
+        "tooltip": { "mode": "multi", "sort": "desc" },
+        "legend": { "displayMode": "list", "placement": "bottom", "calcs": ["mean", "max"] }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 103 }
+    }
+  ]
+}
--- a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/nodes-health.json
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/nodes-health.json
@@ -440,7 +440,7 @@
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
        {
-          "expr": "100 * (1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}))",
+          "expr": "100 * (1 - (\n  max by (instance, mountpoint) (node_filesystem_avail_bytes{mountpoint=~\"/|/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"})\n  /\n  max by (instance, mountpoint) (node_filesystem_size_bytes{mountpoint=~\"/|/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"})\n))",
          "refId": "A",
          "legendFormat": "{{instance}}"
        }
@@ -467,7 +467,7 @@
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
        {
-          "expr": "100 * (1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}))",
+          "expr": "100 * (1 - (\n  max by (instance, mountpoint) (node_filesystem_avail_bytes{mountpoint=~\"/|/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"})\n  /\n  max by (instance, mountpoint) (node_filesystem_size_bytes{mountpoint=~\"/|/var\",fstype!~\"tmpfs|overlay|squashfs|ramfs\"})\n))",
          "refId": "A",
          "legendFormat": "{{instance}}"
        }
--- a/harmony/src/modules/monitoring/cluster_dashboards/dashboards/storage.json
+++ b/harmony/src/modules/monitoring/cluster_dashboards/dashboards/storage.json
@@ -1,6 +1,6 @@
 {
-  "title": "Storage Health",
-  "uid": "storage-health",
+  "title": "Persistent Storage",
+  "uid": "persistent-storage",
  "schemaVersion": 36,
  "version": 1,
  "refresh": "30s",
@@ -21,25 +21,17 @@
      "title": "Bound PVCs",
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
-        {
-          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Bound\"}) or vector(0)",
-          "refId": "A"
-        }
+        { "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Bound\"}) or vector(0)", "refId": "A" }
      ],
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [{ "color": "green", "value": null }]
-          }
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }
        }
      },
      "options": {
        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "colorMode": "background",
-        "graphMode": "none",
-        "textMode": "auto"
+        "colorMode": "background", "graphMode": "none", "textMode": "auto"
      },
      "gridPos": { "h": 5, "w": 4, "x": 0, "y": 1 }
    },
@@ -50,28 +42,19 @@
      "title": "Pending PVCs",
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
-        {
-          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Pending\"}) or vector(0)",
-          "refId": "A"
-        }
+        { "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Pending\"}) or vector(0)", "refId": "A" }
      ],
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green",  "value": null },
-              { "color": "yellow", "value": 1 }
-            ]
-          }
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null }, { "color": "yellow", "value": 1 }
+          ]}
        }
      },
      "options": {
        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "colorMode": "background",
-        "graphMode": "none",
-        "textMode": "auto"
+        "colorMode": "background", "graphMode": "none", "textMode": "auto"
      },
      "gridPos": { "h": 5, "w": 4, "x": 4, "y": 1 }
    },
@@ -82,28 +65,19 @@
      "title": "Lost PVCs",
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
-        {
-          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Lost\"}) or vector(0)",
-          "refId": "A"
-        }
+        { "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Lost\"}) or vector(0)", "refId": "A" }
      ],
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red",   "value": 1 }
-            ]
-          }
+          "thresholds": { "mode": "absolute", "steps": [
+            { "color": "green", "value": null }, { "color": "red", "value": 1 }
+          ]}
        }
      },
      "options": {
        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "colorMode": "background",
-        "graphMode": "none",
-        "textMode": "auto"
+        "colorMode": "background", "graphMode": "none", "textMode": "auto"
      },
      "gridPos": { "h": 5, "w": 4, "x": 8, "y": 1 }
    },
@@ -114,201 +88,57 @@
      "title": "Bound PVs / Available PVs",
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
-        {
-          "expr": "sum(kube_persistentvolume_status_phase{phase=\"Bound\"}) or vector(0)",
-          "refId": "A",
-          "legendFormat": "Bound"
-        },
-        {
-          "expr": "sum(kube_persistentvolume_status_phase{phase=\"Available\"}) or vector(0)",
-          "refId": "B",
-          "legendFormat": "Available"
-        }
+        { "expr": "sum(kube_persistentvolume_status_phase{phase=\"Bound\"}) or vector(0)", "refId": "A", "legendFormat": "Bound" },
+        { "expr": "sum(kube_persistentvolume_status_phase{phase=\"Available\"}) or vector(0)", "refId": "B", "legendFormat": "Available" }
      ],
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [{ "color": "blue", "value": null }]
-          }
+          "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] }
        }
      },
      "options": {
        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "colorMode": "background",
-        "graphMode": "none",
-        "textMode": "auto"
+        "colorMode": "background", "graphMode": "none", "textMode": "auto"
      },
-      "gridPos": { "h": 5, "w": 4, "x": 12, "y": 1 }
+      "gridPos": { "h": 5, "w": 6, "x": 12, "y": 1 }
    },

    {
-      "type": "stat",
+      "type": "piechart",
      "id": 6,
-      "title": "Ceph Cluster Health",
+      "title": "PVC Phase Distribution",
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
-        {
-          "expr": "ceph_health_status",
-          "refId": "A"
-        }
+        { "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Bound\"}) or vector(0)", "refId": "A", "legendFormat": "Bound" },
+        { "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Pending\"}) or vector(0)", "refId": "B", "legendFormat": "Pending" },
+        { "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Lost\"}) or vector(0)", "refId": "C", "legendFormat": "Lost" }
      ],
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green",  "value": null },
-              { "color": "yellow", "value": 1 },
-              { "color": "red",    "value": 2 }
-            ]
-          },
-          "mappings": [
-            {
-              "type": "value",
-              "options": {
-                "0": { "text": "HEALTH_OK",   "index": 0 },
-                "1": { "text": "HEALTH_WARN", "index": 1 },
-                "2": { "text": "HEALTH_ERR",  "index": 2 }
-              }
-            }
-          ]
-        }
-      },
+      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" } } },
      "options": {
        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "colorMode": "background",
-        "graphMode": "none",
-        "textMode": "value"
+        "pieType": "pie",
+        "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] }
      },
-      "gridPos": { "h": 5, "w": 4, "x": 16, "y": 1 }
-    },
-
-    {
-      "type": "stat",
-      "id": 7,
-      "title": "OSDs Up / Total",
-      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
-      "targets": [
-        {
-          "expr": "sum(ceph_osd_up) or vector(0)",
-          "refId": "A",
-          "legendFormat": "Up"
-        },
-        {
-          "expr": "count(ceph_osd_metadata) or vector(0)",
-          "refId": "B",
-          "legendFormat": "Total"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [{ "color": "green", "value": null }]
-          }
-        }
-      },
-      "options": {
-        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "colorMode": "background",
-        "graphMode": "none",
-        "textMode": "auto"
-      },
-      "gridPos": { "h": 5, "w": 4, "x": 20, "y": 1 }
+      "gridPos": { "h": 5, "w": 6, "x": 18, "y": 1 }
    },

    {
      "type": "row",
-      "id": 8,
-      "title": "Cluster Capacity",
+      "id": 7,
+      "title": "Capacity by Storage Class",
      "collapsed": false,
      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 6 }
    },

-    {
-      "type": "gauge",
-      "id": 9,
-      "title": "Ceph Cluster Used (%)",
-      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
-      "targets": [
-        {
-          "expr": "100 * (ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes) / ceph_cluster_total_bytes",
-          "refId": "A"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "unit": "percent",
-          "min": 0,
-          "max": 100,
-          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green",  "value": null },
-              { "color": "yellow", "value": 70 },
-              { "color": "red",    "value": 85 }
-            ]
-          }
-        }
-      },
-      "options": {
-        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "showThresholdLabels": true,
-        "showThresholdMarkers": true
-      },
-      "gridPos": { "h": 8, "w": 5, "x": 0, "y": 7 }
-    },
-
-    {
-      "type": "stat",
-      "id": 10,
-      "title": "Ceph Capacity — Total / Available",
-      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
-      "targets": [
-        {
-          "expr": "ceph_cluster_total_bytes",
-          "refId": "A",
-          "legendFormat": "Total"
-        },
-        {
-          "expr": "ceph_cluster_total_bytes - (ceph_cluster_total_used_raw_bytes or ceph_cluster_total_used_bytes)",
-          "refId": "B",
-          "legendFormat": "Available"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "unit": "bytes",
-          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [{ "color": "blue", "value": null }]
-          }
-        }
-      },
-      "options": {
-        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "colorMode": "value",
-        "graphMode": "none",
-        "textMode": "auto",
-        "orientation": "vertical"
-      },
-      "gridPos": { "h": 8, "w": 4, "x": 5, "y": 7 }
-    },
-
    {
      "type": "bargauge",
-      "id": 11,
+      "id": 8,
      "title": "PV Allocated Capacity by Storage Class (Bound)",
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
        {
-          "expr": "sum by (storageclass) (\n  kube_persistentvolume_capacity_bytes\n  * on(persistentvolume) group_left(storageclass)\n  kube_persistentvolume_status_phase{phase=\"Bound\"}\n)",
+          "expr": "sum by (storageclass) (\n  kube_persistentvolume_capacity_bytes\n  * on(persistentvolume) group_left() (kube_persistentvolume_status_phase{phase=\"Bound\"} == 1)\n  * on(persistentvolume) group_left(storageclass) kube_persistentvolume_info\n)",
          "refId": "A",
          "legendFormat": "{{storageclass}}"
        }
@@ -316,11 +146,7 @@
      "fieldConfig": {
        "defaults": {
          "unit": "bytes",
-          "color": { "mode": "palette-classic" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [{ "color": "blue", "value": null }]
-          }
+          "color": { "mode": "palette-classic" }
        }
      },
      "options": {
@@ -329,267 +155,214 @@
        "displayMode": "gradient",
        "showUnfilled": true
      },
-      "gridPos": { "h": 8, "w": 7, "x": 9, "y": 7 }
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 7 }
    },

    {
-      "type": "piechart",
-      "id": 12,
-      "title": "PVC Phase Distribution",
+      "type": "bargauge",
+      "id": 9,
+      "title": "PVC Count by Storage Class",
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
        {
-          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Bound\"}) or vector(0)",
+          "expr": "count by (storageclass) (kube_persistentvolumeclaim_info{storageclass!=\"\"})",
          "refId": "A",
-          "legendFormat": "Bound"
-        },
-        {
-          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Pending\"}) or vector(0)",
-          "refId": "B",
-          "legendFormat": "Pending"
-        },
-        {
-          "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Lost\"}) or vector(0)",
-          "refId": "C",
-          "legendFormat": "Lost"
+          "legendFormat": "{{storageclass}}"
        }
      ],
      "fieldConfig": {
-        "defaults": { "color": { "mode": "palette-classic" } }
+        "defaults": {
+          "unit": "short",
+          "color": { "mode": "palette-classic" }
+        }
      },
      "options": {
+        "orientation": "horizontal",
        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "pieType": "pie",
-        "legend": {
-          "displayMode": "table",
-          "placement": "right",
-          "values": ["value", "percent"]
+        "displayMode": "gradient",
+        "showUnfilled": true
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 7 }
+    },
+
+    {
+      "type": "table",
+      "id": 10,
+      "title": "Storage Classes Summary",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "count by (storageclass) (kube_persistentvolume_info)",
+          "refId": "A",
+          "legendFormat": "PVs",
+          "format": "table",
+          "instant": true
+        },
+        {
+          "expr": "sum by (storageclass) (kube_persistentvolume_capacity_bytes * on(persistentvolume) group_left(storageclass) kube_persistentvolume_info)",
+          "refId": "B",
+          "legendFormat": "Capacity",
+          "format": "table",
+          "instant": true
        }
+      ],
+      "transformations": [
+        { "id": "merge" },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": { "Time": true, "__name__": true, "job": true, "instance": true },
+            "renameByName": { "storageclass": "StorageClass", "Value #A": "PV Count", "Value #B": "Total Capacity" }
+          }
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {},
+        "overrides": [
+          { "matcher": { "id": "byName", "options": "Total Capacity" }, "properties": [{ "id": "unit", "value": "bytes" }] }
+        ]
      },
      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 7 }
    },

    {
      "type": "row",
-      "id": 13,
-      "title": "Ceph Performance",
+      "id": 11,
+      "title": "PVC Usage (kubelet volume stats)",
      "collapsed": false,
      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 15 }
    },

    {
-      "type": "timeseries",
-      "id": 14,
-      "title": "Ceph Pool IOPS (Read / Write)",
+      "type": "table",
+      "id": 12,
+      "title": "Top 20 PVCs by % Used",
      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
      "targets": [
        {
-          "expr": "rate(ceph_pool_rd[5m])",
+          "expr": "topk(20,\n  100 * max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_used_bytes)\n  /\n  max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes)\n)",
          "refId": "A",
-          "legendFormat": "Read — pool {{pool_id}}"
-        },
+          "format": "table",
+          "instant": true
+        }
+      ],
+      "transformations": [
        {
-          "expr": "rate(ceph_pool_wr[5m])",
-          "refId": "B",
-          "legendFormat": "Write — pool {{pool_id}}"
+          "id": "organize",
+          "options": {
+            "excludeByName": { "Time": true, "__name__": true, "job": true, "instance": true, "endpoint": true },
+            "renameByName": {
+              "namespace": "Namespace",
+              "persistentvolumeclaim": "PVC",
+              "Value": "Used %"
+            },
+            "indexByName": { "Namespace": 0, "PVC": 1, "Used %": 2 }
+          }
        }
      ],
      "fieldConfig": {
-        "defaults": {
-          "unit": "ops",
-          "color": { "mode": "palette-classic" },
-          "custom": { "lineWidth": 2, "fillOpacity": 8 }
-        }
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }
-    },
-
-    {
-      "type": "timeseries",
-      "id": 15,
-      "title": "Ceph Pool Throughput (Read / Write)",
-      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
-      "targets": [
-        {
-          "expr": "rate(ceph_pool_rd_bytes[5m])",
-          "refId": "A",
-          "legendFormat": "Read — pool {{pool_id}}"
-        },
-        {
-          "expr": "rate(ceph_pool_wr_bytes[5m])",
-          "refId": "B",
-          "legendFormat": "Write — pool {{pool_id}}"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "unit": "Bps",
-          "color": { "mode": "palette-classic" },
-          "custom": { "lineWidth": 2, "fillOpacity": 8 }
-        }
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }
-    },
-
-    {
-      "type": "row",
-      "id": 16,
-      "title": "Ceph OSD & Pool Details",
-      "collapsed": false,
-      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 24 }
-    },
-
-    {
-      "type": "timeseries",
-      "id": 17,
-      "title": "Ceph Pool Space Used (%)",
-      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
-      "targets": [
-        {
-          "expr": "100 * ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail)",
-          "refId": "A",
-          "legendFormat": "Pool {{pool_id}}"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "unit": "percent",
-          "min": 0,
-          "max": 100,
-          "color": { "mode": "palette-classic" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green",  "value": null },
-              { "color": "yellow", "value": 70 },
-              { "color": "red",    "value": 85 }
-            ]
-          },
-          "custom": { "lineWidth": 2, "fillOpacity": 10 }
-        }
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 25 }
-    },
-
-    {
-      "type": "bargauge",
-      "id": 18,
-      "title": "OSD Status per Daemon (green = Up, red = Down)",
-      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
-      "targets": [
-        {
-          "expr": "ceph_osd_up",
-          "refId": "A",
-          "legendFormat": "{{ceph_daemon}}"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "min": 0,
-          "max": 1,
-          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "red",   "value": null },
-              { "color": "green", "value": 1 }
-            ]
-          },
-          "mappings": [
-            {
-              "type": "value",
-              "options": {
-                "0": { "text": "DOWN", "index": 0 },
-                "1": { "text": "UP",   "index": 1 }
+        "defaults": {},
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "Used %" },
+            "properties": [
+              { "id": "unit", "value": "percent" },
+              { "id": "decimals", "value": 1 },
+              {
+                "id": "custom.cellOptions",
+                "value": { "type": "color-background", "mode": "gradient" }
+              },
+              {
+                "id": "thresholds",
+                "value": {
+                  "mode": "absolute",
+                  "steps": [
+                    { "color": "green", "value": null },
+                    { "color": "yellow", "value": 70 },
+                    { "color": "red", "value": 85 }
+                  ]
+                }
              }
-            }
-          ]
-        }
-      },
-      "options": {
-        "orientation": "horizontal",
-        "reduceOptions": { "calcs": ["lastNotNull"] },
-        "displayMode": "basic",
-        "showUnfilled": true
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 25 }
-    },
-
-    {
-      "type": "row",
-      "id": 19,
-      "title": "Node Disk Usage",
-      "collapsed": false,
-      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 33 }
-    },
-
-    {
-      "type": "timeseries",
-      "id": 20,
-      "title": "Node Root Disk Usage Over Time (%)",
-      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
-      "targets": [
-        {
-          "expr": "100 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} * 100)",
-          "refId": "A",
-          "legendFormat": "{{instance}}"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "unit": "percent",
-          "min": 0,
-          "max": 100,
-          "color": { "mode": "palette-classic" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green",  "value": null },
-              { "color": "yellow", "value": 70 },
-              { "color": "red",    "value": 85 }
-            ]
-          },
-          "custom": { "lineWidth": 2, "fillOpacity": 10 }
-        }
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 34 }
-    },
-
-    {
-      "type": "bargauge",
-      "id": 21,
-      "title": "Current Disk Usage — All Nodes & Mountpoints",
-      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
-      "targets": [
-        {
-          "expr": "100 - (node_filesystem_avail_bytes{fstype!~\"tmpfs|overlay|squashfs\"} / node_filesystem_size_bytes{fstype!~\"tmpfs|overlay|squashfs\"} * 100)",
-          "refId": "A",
-          "legendFormat": "{{instance}} — {{mountpoint}}"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "unit": "percent",
-          "min": 0,
-          "max": 100,
-          "color": { "mode": "thresholds" },
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green",  "value": null },
-              { "color": "yellow", "value": 70 },
-              { "color": "red",    "value": 85 }
            ]
          }
+        ]
+      },
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 16 }
+    },
+
+    {
+      "type": "bargauge",
+      "id": 13,
+      "title": "Top 20 PVCs by Used Bytes",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "topk(20, max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_used_bytes))",
+          "refId": "A",
+          "legendFormat": "{{namespace}} / {{persistentvolumeclaim}}",
+          "instant": true
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "bytes",
+          "color": { "mode": "palette-classic" }
        }
      },
      "options": {
        "orientation": "horizontal",
        "reduceOptions": { "calcs": ["lastNotNull"] },
        "displayMode": "gradient",
-        "showUnfilled": true
+        "showUnfilled": true,
+        "valueMode": "color",
+        "sortBy": "Value",
+        "sortOrder": "desc"
      },
-      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 34 }
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 16 }
+    },
+
+    {
+      "type": "timeseries",
+      "id": 14,
+      "title": "Top 5 PVCs Usage Over Time (%)",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "topk(5,\n  100 * max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_used_bytes)\n  /\n  max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes)\n)",
+          "refId": "A",
+          "legendFormat": "{{namespace}} / {{persistentvolumeclaim}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 2, "fillOpacity": 8 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 26 }
+    },
+
+    {
+      "type": "timeseries",
+      "id": 15,
+      "title": "PVC Inode Usage (%) — Top 20",
+      "datasource": { "type": "prometheus", "uid": "Prometheus-Cluster" },
+      "targets": [
+        {
+          "expr": "topk(20,\n  100 * max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_used)\n  /\n  max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes)\n)",
+          "refId": "A",
+          "legendFormat": "{{namespace}} / {{persistentvolumeclaim}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent", "min": 0, "max": 100,
+          "color": { "mode": "palette-classic" },
+          "custom": { "lineWidth": 1, "fillOpacity": 5 }
+        }
+      },
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 34 }
    }

  ]
--- a/harmony/src/modules/monitoring/cluster_dashboards/score.rs
+++ b/harmony/src/modules/monitoring/cluster_dashboards/score.rs
@@ -101,7 +101,7 @@ impl<T: Topology + K8sclient> Interpret<T> for ClusterDashboardsInterpret {

        Ok(Outcome::success(format!(
            "Cluster dashboards resources in namespace '{}' with {} dashboards successfully created",
-            self.namespace, 8
+            self.namespace, 10
        )))
    }

@@ -494,7 +494,11 @@ impl ClusterDashboardsInterpret {
                include_str!("dashboards/workloads-health.json"),
            ),
            ("okd-networking", include_str!("dashboards/networking.json")),
-            ("storage-health", include_str!("dashboards/storage.json")),
+            (
+                "persistent-storage",
+                include_str!("dashboards/storage.json"),
+            ),
+            ("ceph-cluster", include_str!("dashboards/ceph.json")),
            ("okd-etcd", include_str!("dashboards/etcd.json")),
            (
                "okd-control-plane",
@@ -504,6 +508,10 @@ impl ClusterDashboardsInterpret {
                "okd-alerts-events",
                include_str!("dashboards/alerts-events-problems.json"),
            ),
+            (
+                "datadog-15-k8s-metrics",
+                include_str!("dashboards/datadog-15-k8s-metrics.json"),
+            ),
        ];

        for (dashboard_name, json_content) in dashboards {
--- a/harmony/src/modules/monitoring/mod.rs
+++ b/harmony/src/modules/monitoring/mod.rs
@@ -1,6 +1,7 @@
 pub mod alert_channel;
 pub mod alert_rule;
 pub mod application_monitoring;
+pub mod ceph_alerts;
 pub mod cluster_dashboards;
 pub mod grafana;
 pub mod kube_prometheus;
--- a/harmony/src/modules/monitoring/okd/cluster_alert_rules.rs
+++ b/harmony/src/modules/monitoring/okd/cluster_alert_rules.rs
@@ -0,0 +1,114 @@
+use std::collections::BTreeMap;
+
+use async_trait::async_trait;
+use harmony_types::id::Id;
+use kube::api::ObjectMeta;
+use serde::Serialize;
+
+use crate::{
+    data::Version,
+    interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
+    inventory::Inventory,
+    modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{
+        PrometheusRule, PrometheusRuleSpec, RuleGroup,
+    },
+    score::Score,
+    topology::{K8sclient, Topology},
+};
+
+#[derive(Clone, Debug, Serialize)]
+pub struct OpenshiftPrometheusRuleScore {
+    pub namespace: String,
+    pub name: String,
+    pub rule_groups: Vec<RuleGroup>,
+    pub labels: Option<BTreeMap<String, String>>,
+}
+
+impl<T: Topology + K8sclient> Score<T> for OpenshiftPrometheusRuleScore {
+    fn name(&self) -> String {
+        format!(
+            "OpenshiftPrometheusRuleScore({}/{})",
+            self.namespace, self.name
+        )
+    }
+
+    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
+        Box::new(OpenshiftPrometheusRuleInterpret {
+            namespace: self.namespace.clone(),
+            name: self.name.clone(),
+            rule_groups: self.rule_groups.clone(),
+            labels: self.labels.clone(),
+        })
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct OpenshiftPrometheusRuleInterpret {
+    namespace: String,
+    name: String,
+    rule_groups: Vec<RuleGroup>,
+    labels: Option<BTreeMap<String, String>>,
+}
+
+#[async_trait]
+impl<T: Topology + K8sclient> Interpret<T> for OpenshiftPrometheusRuleInterpret {
+    async fn execute(
+        &self,
+        _inventory: &Inventory,
+        topology: &T,
+    ) -> Result<Outcome, InterpretError> {
+        let labels = self.labels.clone().unwrap_or_else(default_rule_labels);
+
+        let prometheus_rule = PrometheusRule {
+            metadata: ObjectMeta {
+                name: Some(self.name.clone()),
+                namespace: Some(self.namespace.clone()),
+                labels: Some(labels),
+                ..ObjectMeta::default()
+            },
+            spec: PrometheusRuleSpec {
+                groups: self.rule_groups.clone(),
+            },
+        };
+
+        let client = topology
+            .k8s_client()
+            .await
+            .map_err(|e| InterpretError::new(format!("Failed to get k8s client: {e}")))?;
+
+        client
+            .apply(&prometheus_rule, Some(&self.namespace))
+            .await
+            .map_err(|e| InterpretError::new(e.to_string()))?;
+
+        Ok(Outcome::success(format!(
+            "PrometheusRule '{}' applied to namespace '{}' with {} rule group(s)",
+            self.name,
+            self.namespace,
+            self.rule_groups.len()
+        )))
+    }
+
+    fn get_name(&self) -> InterpretName {
+        InterpretName::Custom("OpenshiftPrometheusRule")
+    }
+
+    fn get_version(&self) -> Version {
+        todo!()
+    }
+
+    fn get_status(&self) -> InterpretStatus {
+        todo!()
+    }
+
+    fn get_children(&self) -> Vec<Id> {
+        todo!()
+    }
+}
+
+fn default_rule_labels() -> BTreeMap<String, String> {
+    let mut labels = BTreeMap::new();
+    labels.insert("prometheus".to_string(), "k8s".to_string());
+    labels.insert("role".to_string(), "alert-rules".to_string());
+    labels
+}
--- a/harmony/src/modules/monitoring/okd/mod.rs
+++ b/harmony/src/modules/monitoring/okd/mod.rs
@@ -1,5 +1,6 @@
 use crate::topology::oberservability::monitoring::AlertSender;

+pub mod cluster_alert_rules;
 pub mod cluster_monitoring;
 pub(crate) mod config;
 pub mod enable_user_workload;
--- a/harmony/src/modules/nats_auth_callout/mod.rs
+++ b/harmony/src/modules/nats_auth_callout/mod.rs
@@ -0,0 +1,501 @@
+//! NATS auth callout deployment Score.
+//!
+//! Deploys the `harmony-nats-callout` binary as a single-replica
+//! Kubernetes Deployment that authenticates inbound NATS clients
+//! against Zitadel-issued JWTs. See `nats/callout/` for the binary.
+//!
+//! ## Composition
+//!
+//! This Score only deploys the *callout side*. The NATS server itself
+//! must be configured separately to delegate auth to this service:
+//!
+//! ```yaml
+//! authorization:
+//!   auth_callout:
+//!     issuer: <pubkey of issuer_nkey_seed>
+//!     auth_users: [<nats_auth_user>]
+//!     account: <target_account>
+//! accounts:
+//!   <target_account>:
+//!     users:
+//!       - user: <nats_auth_user>
+//!         password: <nats_auth_pass>
+//! ```
+//!
+//! Use [`render_auth_callout_block`] to produce this YAML snippet given
+//! the same parameters used to construct the Score, so the two halves
+//! stay in sync without hardcoding values twice.
+//!
+//! ## Why a Score and not just a YAML manifest?
+//!
+//! The Score gives compile-time safety on the topology trait bounds
+//! (`T: Topology + K8sclient`), idempotent apply via `K8sResourceScore`,
+//! and a single place to evolve the deployment shape (resource limits,
+//! pod security, image override, etc.).
+
+use std::collections::BTreeMap;
+
+use async_trait::async_trait;
+use k8s_openapi::ByteString;
+use k8s_openapi::api::apps::v1::Deployment;
+use k8s_openapi::api::core::v1::Secret;
+use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
+use serde::Serialize;
+use serde_json::json;
+
+use crate::data::Version;
+use crate::interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome};
+use crate::inventory::Inventory;
+use crate::modules::k8s::resource::K8sResourceScore;
+use crate::score::Score;
+use crate::topology::{K8sclient, Topology};
+use harmony_types::id::Id;
+
+/// Default container image. The example harness builds and side-loads
+/// this tag into k3d before invoking the Score.
+pub const DEFAULT_IMAGE: &str = "harmony-nats-callout:dev";
+
+/// Default Zitadel roles claim path. Mirrors
+/// `harmony_nats_callout::DEFAULT_ROLES_CLAIM` (kept as a string literal
+/// here to avoid a dep on the callout crate).
+pub const DEFAULT_ROLES_CLAIM: &str = "urn:zitadel:iam:org:project:roles";
+
+pub const DEFAULT_ADMIN_ROLE: &str = "fleet-admin";
+pub const DEFAULT_DEVICE_ROLE: &str = "device";
+
+#[derive(Debug, Clone, Serialize)]
+pub struct NatsAuthCalloutScore {
+    /// Resource name. Used for the Deployment + Secret + label selectors.
+    pub name: String,
+    /// Target namespace. Must already exist (the example creates it).
+    pub namespace: String,
+    /// Container image reference. The image must contain the
+    /// `harmony-nats-callout` binary at `/usr/local/bin/harmony-nats-callout`.
+    pub image: String,
+    /// NATS URL the callout itself connects to (e.g. cluster-internal
+    /// `nats://fleet-nats.fleet-system.svc.cluster.local:4222`).
+    pub nats_url: String,
+    /// NATS account name issued users land in. Must match the NATS
+    /// server's `auth_callout.account`.
+    pub target_account: String,
+    /// Username the callout uses on its own NATS connection. Listed in
+    /// `auth_callout.auth_users` so it bypasses callout (otherwise it
+    /// would deadlock authenticating itself).
+    pub nats_auth_user: String,
+    /// Password for the callout's NATS connection. Stored in a K8s Secret.
+    pub nats_auth_pass: String,
+    /// NKey account seed used to sign user JWTs. The corresponding
+    /// public key MUST be configured as `auth_callout.issuer` on the
+    /// NATS server; otherwise NATS will reject every response we sign.
+    /// Stored in a K8s Secret.
+    pub issuer_nkey_seed: String,
+    /// OIDC issuer URL (e.g. `http://zitadel.zitadel.svc.cluster.local:8080`).
+    pub oidc_issuer_url: String,
+    /// Expected `aud` claim in inbound user JWTs.
+    pub oidc_audience: String,
+    /// JSON path to the device id claim.
+    pub device_id_claim: String,
+    /// Optional prefix stripped from the extracted device id before
+    /// permission interpolation. Empty string disables. Set to `device-`
+    /// to consume Zitadel's `client_id` claim with the convention used
+    /// by `fleet_rpi_setup` and `fleet_e2e_demo`.
+    pub device_id_prefix_strip: String,
+    /// JSON path to the roles claim.
+    pub roles_claim: String,
+    /// Role name granting admin permissions.
+    pub admin_role: String,
+    /// Role name granting per-device permissions.
+    pub device_role: String,
+    /// Whether the callout's HTTP client accepts invalid TLS certs (only
+    /// for local dev — Zitadel-on-k3d typically uses HTTP, but in
+    /// development with a self-signed Zitadel cert this is the escape hatch).
+    pub danger_accept_invalid_certs: bool,
+}
+
+impl NatsAuthCalloutScore {
+    /// Sane defaults; required fields are passed positionally.
+    pub fn new(
+        name: impl Into<String>,
+        namespace: impl Into<String>,
+        nats_url: impl Into<String>,
+        oidc_issuer_url: impl Into<String>,
+        oidc_audience: impl Into<String>,
+        nats_auth_user: impl Into<String>,
+        nats_auth_pass: impl Into<String>,
+        issuer_nkey_seed: impl Into<String>,
+    ) -> Self {
+        Self {
+            name: name.into(),
+            namespace: namespace.into(),
+            image: DEFAULT_IMAGE.to_string(),
+            nats_url: nats_url.into(),
+            target_account: "DEVICES".to_string(),
+            nats_auth_user: nats_auth_user.into(),
+            nats_auth_pass: nats_auth_pass.into(),
+            issuer_nkey_seed: issuer_nkey_seed.into(),
+            oidc_issuer_url: oidc_issuer_url.into(),
+            oidc_audience: oidc_audience.into(),
+            device_id_claim: "device_id".to_string(),
+            device_id_prefix_strip: String::new(),
+            roles_claim: DEFAULT_ROLES_CLAIM.to_string(),
+            admin_role: DEFAULT_ADMIN_ROLE.to_string(),
+            device_role: DEFAULT_DEVICE_ROLE.to_string(),
+            danger_accept_invalid_certs: false,
+        }
+    }
+
+    pub fn image(mut self, image: impl Into<String>) -> Self {
+        self.image = image.into();
+        self
+    }
+
+    pub fn target_account(mut self, account: impl Into<String>) -> Self {
+        self.target_account = account.into();
+        self
+    }
+
+    pub fn admin_role(mut self, role: impl Into<String>) -> Self {
+        self.admin_role = role.into();
+        self
+    }
+
+    pub fn device_role(mut self, role: impl Into<String>) -> Self {
+        self.device_role = role.into();
+        self
+    }
+
+    pub fn danger_accept_invalid_certs(mut self, accept: bool) -> Self {
+        self.danger_accept_invalid_certs = accept;
+        self
+    }
+
+    fn secret_name(&self) -> String {
+        format!("{}-secrets", self.name)
+    }
+
+    fn build_secret(&self) -> Secret {
+        let mut data: BTreeMap<String, ByteString> = BTreeMap::new();
+        data.insert(
+            "issuer-nkey-seed".to_string(),
+            ByteString(self.issuer_nkey_seed.as_bytes().to_vec()),
+        );
+        data.insert(
+            "nats-auth-pass".to_string(),
+            ByteString(self.nats_auth_pass.as_bytes().to_vec()),
+        );
+
+        Secret {
+            metadata: ObjectMeta {
+                name: Some(self.secret_name()),
+                namespace: Some(self.namespace.clone()),
+                ..Default::default()
+            },
+            data: Some(data),
+            type_: Some("Opaque".to_string()),
+            ..Default::default()
+        }
+    }
+
+    fn build_deployment(&self) -> Deployment {
+        let secret_name = self.secret_name();
+
+        // Mounting the secret as a volume (rather than env-var-from-secret)
+        // means rotating the seed is a kubectl edit + restart, not a
+        // rolling Pod recreation. Pairs with the binary's `*_FILE` env
+        // var convention for secrets.
+        let manifest = json!({
+            "metadata": {
+                "name": self.name,
+                "namespace": self.namespace,
+                "labels": { "app": self.name }
+            },
+            "spec": {
+                "replicas": 1,
+                "selector": { "matchLabels": { "app": self.name } },
+                "template": {
+                    "metadata": { "labels": { "app": self.name } },
+                    "spec": {
+                        // fsGroup makes secret-volume files group-owned
+                        // by the runtime UID's group. Without it, the
+                        // mounted secret stays root:root and a non-root
+                        // container fails to read it (Permission denied).
+                        // 65532 matches the `nonroot` UID convention used
+                        // by the Dockerfile (and by distroless images).
+                        "securityContext": {
+                            "runAsNonRoot": true,
+                            "runAsUser": 65532,
+                            "runAsGroup": 65532,
+                            "fsGroup": 65532
+                        },
+                        "containers": [{
+                            "name": "callout",
+                            "image": self.image,
+                            "imagePullPolicy": "IfNotPresent",
+                            "env": [
+                                { "name": "NATS_URL", "value": self.nats_url },
+                                { "name": "TARGET_ACCOUNT", "value": self.target_account },
+                                { "name": "NATS_AUTH_USER", "value": self.nats_auth_user },
+                                { "name": "NATS_AUTH_PASS_FILE", "value": "/etc/callout/nats-auth-pass" },
+                                { "name": "ISSUER_NKEY_SEED_FILE", "value": "/etc/callout/issuer-nkey-seed" },
+                                { "name": "OIDC_ISSUER_URL", "value": self.oidc_issuer_url },
+                                { "name": "OIDC_AUDIENCE", "value": self.oidc_audience },
+                                { "name": "DEVICE_ID_CLAIM", "value": self.device_id_claim },
+                                { "name": "DEVICE_ID_PREFIX_STRIP", "value": self.device_id_prefix_strip },
+                                { "name": "ROLES_CLAIM", "value": self.roles_claim },
+                                { "name": "ADMIN_ROLE", "value": self.admin_role },
+                                { "name": "DEVICE_ROLE", "value": self.device_role },
+                                { "name": "DANGER_ACCEPT_INVALID_CERTS",
+                                  "value": if self.danger_accept_invalid_certs { "true" } else { "false" } },
+                                { "name": "RUST_LOG", "value": "info" }
+                            ],
+                            "volumeMounts": [{
+                                "name": "secrets",
+                                "mountPath": "/etc/callout",
+                                "readOnly": true
+                            }],
+                            "securityContext": {
+                                "allowPrivilegeEscalation": false,
+                                "readOnlyRootFilesystem": true,
+                                "capabilities": { "drop": ["ALL"] }
+                            }
+                        }],
+                        "volumes": [{
+                            "name": "secrets",
+                            "secret": {
+                                "secretName": secret_name,
+                                // 0o440 = owner+group read. The Pod's
+                                // fsGroup (65532) is the volume group;
+                                // the runtime user (also 65532) reads
+                                // via group permission.
+                                "defaultMode": 0o440
+                            }
+                        }]
+                    }
+                }
+            }
+        });
+
+        serde_json::from_value(manifest).expect("static deployment manifest must parse")
+    }
+}
+
+impl<T: Topology + K8sclient> Score<T> for NatsAuthCalloutScore {
+    fn name(&self) -> String {
+        format!("NatsAuthCalloutScore({})", self.name)
+    }
+
+    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
+        Box::new(NatsAuthCalloutInterpret {
+            score: self.clone(),
+        })
+    }
+}
+
+#[derive(Debug, Clone)]
+struct NatsAuthCalloutInterpret {
+    score: NatsAuthCalloutScore,
+}
+
+#[async_trait]
+impl<T: Topology + K8sclient> Interpret<T> for NatsAuthCalloutInterpret {
+    async fn execute(
+        &self,
+        inventory: &Inventory,
+        topology: &T,
+    ) -> Result<Outcome, InterpretError> {
+        let secret = self.score.build_secret();
+        let deployment = self.score.build_deployment();
+
+        K8sResourceScore::single(secret, Some(self.score.namespace.clone()))
+            .interpret(inventory, topology)
+            .await?;
+
+        K8sResourceScore::single(deployment, Some(self.score.namespace.clone()))
+            .interpret(inventory, topology)
+            .await?;
+
+        Ok(Outcome::success(format!(
+            "callout deployment {}/{} applied",
+            self.score.namespace, self.score.name
+        )))
+    }
+
+    fn get_name(&self) -> InterpretName {
+        InterpretName::Custom("NatsAuthCallout")
+    }
+
+    fn get_version(&self) -> Version {
+        Version::from("0.1.0").expect("static version")
+    }
+
+    fn get_status(&self) -> InterpretStatus {
+        InterpretStatus::QUEUED
+    }
+
+    fn get_children(&self) -> Vec<Id> {
+        vec![]
+    }
+}
+
+/// Render the YAML snippet that NATS needs in `config.merge` to delegate
+/// authentication to this callout service.
+///
+/// Pairs with the rest of the callout config so the issuer pubkey,
+/// account name, and auth-bypass username stay consistent across both
+/// halves of the deployment.
+pub fn render_auth_callout_block(issuer_pubkey: &str, auth_user: &str, account: &str) -> String {
+    format!(
+        "authorization:
+  auth_callout:
+    issuer: {issuer_pubkey}
+    auth_users: [ {auth_user} ]
+    account: {account}
+"
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn sample_score() -> NatsAuthCalloutScore {
+        NatsAuthCalloutScore::new(
+            "fleet-callout",
+            "fleet-system",
+            "nats://fleet-nats.fleet-system.svc:4222",
+            "http://zitadel.zitadel.svc:8080",
+            "harmony-iot-devices",
+            "auth",
+            "auth-pass-123",
+            "SAANYDXMOXMQOFP6UAOR5VHTAGHE6RAJG7FVBAOJTPLE7AQ56TXRSBQO5Q",
+        )
+    }
+
+    #[test]
+    fn defaults_are_sensible() {
+        let s = sample_score();
+        assert_eq!(s.image, DEFAULT_IMAGE);
+        assert_eq!(s.target_account, "DEVICES");
+        assert_eq!(s.admin_role, DEFAULT_ADMIN_ROLE);
+        assert_eq!(s.device_role, DEFAULT_DEVICE_ROLE);
+        assert_eq!(s.roles_claim, DEFAULT_ROLES_CLAIM);
+        assert_eq!(s.device_id_claim, "device_id");
+        assert!(!s.danger_accept_invalid_certs);
+    }
+
+    #[test]
+    fn builders_override_fields() {
+        let s = sample_score()
+            .image("custom:tag")
+            .target_account("ACME")
+            .admin_role("super-user")
+            .device_role("iot-thing")
+            .danger_accept_invalid_certs(true);
+        assert_eq!(s.image, "custom:tag");
+        assert_eq!(s.target_account, "ACME");
+        assert_eq!(s.admin_role, "super-user");
+        assert_eq!(s.device_role, "iot-thing");
+        assert!(s.danger_accept_invalid_certs);
+    }
+
+    #[test]
+    fn secret_carries_seed_and_password_at_expected_keys() {
+        let s = sample_score();
+        let secret = s.build_secret();
+        assert_eq!(
+            secret.metadata.name.as_deref(),
+            Some("fleet-callout-secrets")
+        );
+        assert_eq!(secret.metadata.namespace.as_deref(), Some("fleet-system"));
+        assert_eq!(secret.type_.as_deref(), Some("Opaque"));
+        let data = secret.data.expect("secret data set");
+        let seed = std::str::from_utf8(&data["issuer-nkey-seed"].0).unwrap();
+        let pass = std::str::from_utf8(&data["nats-auth-pass"].0).unwrap();
+        assert!(seed.starts_with("SAA"));
+        assert_eq!(pass, "auth-pass-123");
+    }
+
+    #[test]
+    fn device_id_prefix_strip_lands_as_env_value() {
+        // Regression: a non-empty prefix-strip must serialize as
+        // EnvVar { name, value: Some("...") }, not be elided.
+        let mut s = sample_score();
+        s.device_id_prefix_strip = "device-".to_string();
+        let dep = s.build_deployment();
+        let pod = dep.spec.unwrap().template.spec.unwrap();
+        let container = &pod.containers[0];
+        let env = container.env.as_ref().unwrap();
+        let prefix_env = env
+            .iter()
+            .find(|e| e.name == "DEVICE_ID_PREFIX_STRIP")
+            .expect("DEVICE_ID_PREFIX_STRIP must be present");
+        assert_eq!(prefix_env.value.as_deref(), Some("device-"));
+    }
+
+    #[test]
+    fn deployment_wires_secret_via_file_mount_not_env() {
+        // We mount the secret as a volume so binary uses the *_FILE env
+        // contract. This avoids Pod-spec churn on rotation and keeps the
+        // raw seed out of the Pod's env block (which shows up in
+        // `kubectl describe`).
+        let s = sample_score();
+        let dep = s.build_deployment();
+        let pod = dep.spec.unwrap().template.spec.unwrap();
+
+        let container = &pod.containers[0];
+        let env: Vec<&str> = container
+            .env
+            .as_ref()
+            .unwrap()
+            .iter()
+            .map(|e| e.name.as_str())
+            .collect();
+        assert!(env.contains(&"ISSUER_NKEY_SEED_FILE"));
+        assert!(env.contains(&"NATS_AUTH_PASS_FILE"));
+        // Raw values must not be set as env (otherwise both forms would
+        // be present and the file form would win, but the env form would
+        // leak the seed into the Pod descriptor).
+        assert!(!env.contains(&"ISSUER_NKEY_SEED"));
+        assert!(!env.contains(&"NATS_AUTH_PASS"));
+
+        let volumes = pod.volumes.unwrap();
+        assert_eq!(volumes.len(), 1);
+        assert_eq!(volumes[0].name, "secrets");
+        assert_eq!(
+            volumes[0].secret.as_ref().unwrap().secret_name.as_deref(),
+            Some("fleet-callout-secrets")
+        );
+    }
+
+    #[test]
+    fn deployment_runs_as_nonroot_with_dropped_caps() {
+        // Defense in depth: even if the binary were exploited, the Pod
+        // can't escalate privileges or write its own root filesystem.
+        let s = sample_score();
+        let dep = s.build_deployment();
+        let pod_spec = dep.spec.unwrap().template.spec.unwrap();
+        assert_eq!(
+            pod_spec
+                .security_context
+                .as_ref()
+                .and_then(|sc| sc.run_as_non_root),
+            Some(true)
+        );
+        let c_sec = pod_spec.containers[0].security_context.as_ref().unwrap();
+        assert_eq!(c_sec.allow_privilege_escalation, Some(false));
+        assert_eq!(c_sec.read_only_root_filesystem, Some(true));
+        assert_eq!(
+            c_sec.capabilities.as_ref().unwrap().drop.as_deref(),
+            Some(&["ALL".to_string()][..])
+        );
+    }
+
+    #[test]
+    fn render_auth_callout_block_emits_consistent_yaml() {
+        let yaml = render_auth_callout_block("ABCDEF1234567890", "auth", "DEVICES");
+        assert!(yaml.contains("issuer: ABCDEF1234567890"));
+        assert!(yaml.contains("auth_users: [ auth ]"));
+        assert!(yaml.contains("account: DEVICES"));
+        assert!(yaml.starts_with("authorization:"));
+    }
+}
--- a/harmony/src/modules/okd/crd/machine_config.rs
+++ b/harmony/src/modules/okd/crd/machine_config.rs
@@ -0,0 +1,132 @@
+use std::collections::BTreeMap;
+
+use base64::prelude::*;
+use kube::{CustomResource, api::ObjectMeta};
+use serde::{Deserialize, Serialize};
+
+#[derive(CustomResource, Deserialize, Serialize, Clone, Debug, Default)]
+#[kube(
+    group = "machineconfiguration.openshift.io",
+    version = "v1",
+    kind = "MachineConfig",
+    plural = "machineconfigs",
+    namespaced = false,
+    schema = "disabled"
+)]
+#[serde(rename_all = "camelCase")]
+pub struct MachineConfigSpec {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub config: Option<IgnitionConfig>,
+}
+
+impl Default for MachineConfig {
+    fn default() -> Self {
+        Self {
+            metadata: ObjectMeta::default(),
+            spec: MachineConfigSpec::default(),
+        }
+    }
+}
+
+#[derive(Deserialize, Serialize, Clone, Debug, Default)]
+#[serde(rename_all = "camelCase")]
+pub struct IgnitionConfig {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub ignition: Option<Ignition>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub storage: Option<Storage>,
+}
+
+#[derive(Deserialize, Serialize, Clone, Debug, Default)]
+pub struct Ignition {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub version: Option<String>,
+}
+
+#[derive(Deserialize, Serialize, Clone, Debug, Default)]
+pub struct Storage {
+    #[serde(skip_serializing_if = "Vec::is_empty", default)]
+    pub files: Vec<IgnitionFile>,
+}
+
+#[derive(Deserialize, Serialize, Clone, Debug)]
+pub struct IgnitionFile {
+    pub path: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub mode: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub overwrite: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub contents: Option<IgnitionFileContents>,
+}
+
+#[derive(Deserialize, Serialize, Clone, Debug)]
+pub struct IgnitionFileContents {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub source: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub compression: Option<String>,
+}
+
+impl MachineConfig {
+    pub fn with_file(
+        pool: MachineConfigPoolRole,
+        resource_name: &str,
+        path: &str,
+        content: &str,
+        mode: Option<u32>,
+    ) -> Self {
+        let encoded = BASE64_STANDARD.encode(content);
+        let source = format!("data:text/plain;charset=utf-8;base64,{encoded}");
+
+        Self {
+            metadata: ObjectMeta {
+                name: Some(format!("{}-{}", pool.label_value(), resource_name)),
+                labels: Some(pool.labels()),
+                ..Default::default()
+            },
+            spec: MachineConfigSpec {
+                config: Some(IgnitionConfig {
+                    ignition: Some(Ignition {
+                        version: Some("3.2.0".to_string()),
+                    }),
+                    storage: Some(Storage {
+                        files: vec![IgnitionFile {
+                            path: path.to_string(),
+                            mode,
+                            overwrite: Some(true),
+                            contents: Some(IgnitionFileContents {
+                                source: Some(source),
+                                compression: None,
+                            }),
+                        }],
+                    }),
+                }),
+            },
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, Serialize)]
+pub enum MachineConfigPoolRole {
+    Master,
+    Worker,
+}
+
+impl MachineConfigPoolRole {
+    pub fn label_value(&self) -> &'static str {
+        match self {
+            Self::Master => "master",
+            Self::Worker => "worker",
+        }
+    }
+
+    pub fn labels(&self) -> BTreeMap<String, String> {
+        let mut labels = BTreeMap::new();
+        labels.insert(
+            "machineconfiguration.openshift.io/role".to_string(),
+            self.label_value().to_string(),
+        );
+        labels
+    }
+}
--- a/harmony/src/modules/okd/crd/mod.rs
+++ b/harmony/src/modules/okd/crd/mod.rs
@@ -1,4 +1,5 @@
 pub mod ingresses_config;
 pub mod kubelet_config;
+pub mod machine_config;
 pub mod nmstate;
 pub mod route;
--- a/harmony/src/modules/okd/disable_dad_score.rs
+++ b/harmony/src/modules/okd/disable_dad_score.rs
@@ -0,0 +1,44 @@
+use serde::Serialize;
+
+use crate::{
+    interpret::Interpret,
+    modules::okd::{crd::machine_config::MachineConfigPoolRole, node_file_score::NodeFileScore},
+    score::Score,
+    topology::{K8sclient, Topology},
+};
+
+#[derive(Debug, Clone, Serialize)]
+pub struct DisableDadScore {
+    pub pool: MachineConfigPoolRole,
+}
+
+impl Default for DisableDadScore {
+    fn default() -> Self {
+        Self {
+            pool: MachineConfigPoolRole::Worker,
+        }
+    }
+}
+
+impl<T: Topology + K8sclient> Score<T> for DisableDadScore {
+    fn name(&self) -> String {
+        format!("DisableDadScore({})", self.pool.label_value())
+    }
+
+    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
+        let score = NodeFileScore {
+            pool: self.pool,
+            resource_name: "disable-dad".to_string(),
+            path: "/etc/NetworkManager/conf.d/99-disable-ipv4-dad.conf".to_string(),
+            content: "# Disable IPv4 Address Conflict Detection (ACD/DAD)\n\
+# Workaround for false positive conflict detection on\n\
+# 802.3ad LACP bonds where the second member's permanent\n\
+# MAC address triggers a spurious duplicate detection.\n\
+[connection]\n\
+ipv4.dad-timeout=0\n"
+                .to_string(),
+            mode: Some(0o644),
+        };
+        score.create_interpret()
+    }
+}
--- a/harmony/src/modules/okd/mod.rs
+++ b/harmony/src/modules/okd/mod.rs
@@ -25,5 +25,7 @@ pub use bootstrap_05_sanity_check::*;
 pub use bootstrap_06_installation_report::*;
 pub use bootstrap_persist_network_bond::*;
 pub mod crd;
+pub mod disable_dad_score;
 pub mod host_network;
+pub mod node_file_score;
 pub mod system_reserved_score;
--- a/harmony/src/modules/okd/node_file_score.rs
+++ b/harmony/src/modules/okd/node_file_score.rs
@@ -0,0 +1,49 @@
+use serde::Serialize;
+
+use crate::{
+    interpret::Interpret,
+    modules::{
+        k8s::resource::K8sResourceScore,
+        okd::crd::machine_config::{MachineConfig, MachineConfigPoolRole},
+    },
+    score::Score,
+    topology::{K8sclient, Topology},
+};
+
+#[derive(Debug, Clone, Serialize)]
+pub struct NodeFileScore {
+    pub pool: MachineConfigPoolRole,
+    pub resource_name: String,
+    pub path: String,
+    pub content: String,
+    pub mode: Option<u32>,
+}
+
+impl Default for NodeFileScore {
+    fn default() -> Self {
+        Self {
+            pool: MachineConfigPoolRole::Worker,
+            resource_name: "generic-file".to_string(),
+            path: "/etc/placeholder".to_string(),
+            content: "".to_string(),
+            mode: None,
+        }
+    }
+}
+
+impl<T: Topology + K8sclient> Score<T> for NodeFileScore {
+    fn name(&self) -> String {
+        format!("NodeFileScore({})", self.path)
+    }
+
+    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
+        let mc = MachineConfig::with_file(
+            self.pool,
+            &self.resource_name,
+            &self.path,
+            &self.content,
+            self.mode,
+        );
+        K8sResourceScore::single(mc, None).create_interpret()
+    }
+}
--- a/harmony/src/modules/okd/system_reserved_score.rs
+++ b/harmony/src/modules/okd/system_reserved_score.rs
@@ -61,7 +61,7 @@ impl Default for SystemReservedScore {

 impl<T: Topology + K8sclient> Score<T> for SystemReservedScore {
    fn name(&self) -> String {
-        "SystemReservedScore".to_string()
+        format!("SystemReservedScore({})", self.pool.label_key())
    }

    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
--- a/harmony/src/modules/podman/interpret.rs
+++ b/harmony/src/modules/podman/interpret.rs
@@ -59,6 +59,9 @@ impl<T: Topology + ContainerRuntime> Interpret<T> for PodmanV0Interpret {
                image: service.image.clone(),
                ports: service.ports.clone(),
                labels: vec![(DEPLOYMENT_LABEL.to_string(), self.score.deployment_label())],
+                env: service.env.clone(),
+                volumes: service.volumes.clone(),
+                restart_policy: service.restart_policy,
            };
            topology.ensure_service_running(&spec).await.map_err(|e| {
                InterpretError::new(format!(
--- a/harmony/src/modules/podman/score.rs
+++ b/harmony/src/modules/podman/score.rs
@@ -12,17 +12,33 @@ use serde::{Deserialize, Serialize};
 use crate::{
    interpret::Interpret,
    score::Score,
-    topology::{ContainerRuntime, Topology},
+    topology::{ContainerRuntime, RestartPolicy, Topology, VolumeMount},
 };

 use super::interpret::PodmanV0Interpret;

 /// A single container managed by podman on the target host.
+///
+/// Wire-compatible with prior releases: the new `env`, `volumes`, and
+/// `restart_policy` fields all default to empty / `unless-stopped` so older
+/// Deployment CRs without them deserialize unchanged.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub struct PodmanService {
    pub name: String,
    pub image: String,
    pub ports: Vec<String>,
+    /// Environment variables passed to the container. Order is preserved so
+    /// `PartialEq` is deterministic for drift detection.
+    #[serde(default)]
+    pub env: Vec<(String, String)>,
+    /// Bind-mount volumes. Bind-only in v0; `host_path` is an absolute path
+    /// on the device's filesystem.
+    #[serde(default)]
+    pub volumes: Vec<VolumeMount>,
+    /// Restart policy on container exit. Defaults to `unless-stopped` —
+    /// matching docker-compose's typical behavior for long-running services.
+    #[serde(default)]
+    pub restart_policy: RestartPolicy,
 }

 /// v0 Score for podman-based workloads.
@@ -87,13 +103,23 @@ impl<T: Topology + ContainerRuntime> Score<T> for ReconcileScore {
 mod tests {
    use super::*;

+    fn svc(name: &str, image: &str) -> PodmanService {
+        PodmanService {
+            name: name.to_string(),
+            image: image.to_string(),
+            ports: vec![],
+            env: vec![],
+            volumes: vec![],
+            restart_policy: RestartPolicy::default(),
+        }
+    }
+
    #[test]
    fn podman_v0_score_serializes_with_adjacent_tag() {
        let score = ReconcileScore::PodmanV0(PodmanV0Score {
            services: vec![PodmanService {
-                name: "web".to_string(),
-                image: "nginx:latest".to_string(),
                ports: vec!["8080:80".to_string()],
+                ..svc("web", "nginx:latest")
            }],
        });
        let json = serde_json::to_string(&score).unwrap();
@@ -106,14 +132,19 @@ mod tests {
        let score = ReconcileScore::PodmanV0(PodmanV0Score {
            services: vec![
                PodmanService {
-                    name: "web".to_string(),
-                    image: "nginx:latest".to_string(),
                    ports: vec!["8080:80".to_string()],
+                    env: vec![("LOG_LEVEL".to_string(), "info".to_string())],
+                    volumes: vec![VolumeMount {
+                        host_path: "/var/lib/web/data".to_string(),
+                        container_path: "/data".to_string(),
+                        read_only: false,
+                    }],
+                    restart_policy: RestartPolicy::Always,
+                    ..svc("web", "nginx:latest")
                },
                PodmanService {
-                    name: "api".to_string(),
-                    image: "myapp:1.0".to_string(),
                    ports: vec!["3000:3000".to_string(), "9090:9090".to_string()],
+                    ..svc("api", "myapp:1.0")
                },
            ],
        });
@@ -122,21 +153,59 @@ mod tests {
        assert_eq!(score, deserialized);
    }

+    #[test]
+    fn legacy_payload_without_env_volumes_or_restart_deserializes() {
+        // Wire-compat: a Deployment CR built before these fields existed
+        // still round-trips into the new PodmanService.
+        let legacy = r#"{
+            "type": "PodmanV0",
+            "data": { "services": [
+                { "name": "web", "image": "nginx", "ports": ["8080:80"] }
+            ]}
+        }"#;
+        let parsed: ReconcileScore = serde_json::from_str(legacy).unwrap();
+        let ReconcileScore::PodmanV0(score) = parsed;
+        assert_eq!(score.services.len(), 1);
+        assert!(score.services[0].env.is_empty());
+        assert!(score.services[0].volumes.is_empty());
+        assert_eq!(
+            score.services[0].restart_policy,
+            RestartPolicy::UnlessStopped
+        );
+    }
+
+    #[test]
+    fn restart_policy_serializes_kebab_case() {
+        // docker-compose users expect `unless-stopped`, `on-failure` —
+        // verify our serde rename produces that.
+        let s = serde_json::to_string(&RestartPolicy::UnlessStopped).unwrap();
+        assert_eq!(s, "\"unless-stopped\"");
+        let s = serde_json::to_string(&RestartPolicy::OnFailure).unwrap();
+        assert_eq!(s, "\"on-failure\"");
+    }
+
+    #[test]
+    fn env_ordering_is_preserved_across_roundtrip() {
+        // Deterministic equality is what `matches_spec` drift detection
+        // relies on. If env reordered on roundtrip, agents would loop
+        // on recreate.
+        let svc = PodmanService {
+            env: vec![
+                ("B".to_string(), "2".to_string()),
+                ("A".to_string(), "1".to_string()),
+                ("C".to_string(), "3".to_string()),
+            ],
+            ..svc("api", "myapp")
+        };
+        let json = serde_json::to_string(&svc).unwrap();
+        let back: PodmanService = serde_json::from_str(&json).unwrap();
+        assert_eq!(back.env, svc.env);
+    }
+
    #[test]
    fn deployment_label_joins_service_names() {
        let score = PodmanV0Score {
-            services: vec![
-                PodmanService {
-                    name: "web".to_string(),
-                    image: "nginx".to_string(),
-                    ports: vec![],
-                },
-                PodmanService {
-                    name: "api".to_string(),
-                    image: "myapp".to_string(),
-                    ports: vec![],
-                },
-            ],
+            services: vec![svc("web", "nginx"), svc("api", "myapp")],
        };
        assert_eq!(score.deployment_label(), "web,api");
    }
--- a/harmony/src/modules/podman/topology.rs
+++ b/harmony/src/modules/podman/topology.rs
@@ -5,14 +5,15 @@ use std::time::Duration;
 use async_trait::async_trait;
 use futures_util::StreamExt;
 use podman_api::Podman;
-use podman_api::models::PortMapping;
+use podman_api::models::{ContainerMount, PortMapping};
 use podman_api::opts::{
    ContainerCreateOpts, ContainerDeleteOpts, ContainerListFilter, ContainerListOpts,
-    ContainerStopOpts, PullOpts,
+    ContainerRestartPolicy, ContainerStopOpts, PullOpts,
 };

 use crate::domain::topology::{
-    ContainerRuntime, ContainerSpec, ContainerState, PreparationError, PreparationOutcome, Topology,
+    ContainerRuntime, ContainerSpec, ContainerState, PreparationError, PreparationOutcome,
+    RestartPolicy, Topology, VolumeMount,
 };
 use crate::executors::ExecutorError;

@@ -155,12 +156,21 @@ impl ContainerRuntime for PodmanTopology {
            port_mappings.push(parse_port_mapping(raw)?);
        }

-        let opts = ContainerCreateOpts::builder()
+        let env_map: HashMap<String, String> = spec.env.iter().cloned().collect();
+
+        let mounts: Vec<ContainerMount> = spec.volumes.iter().map(volume_to_mount).collect();
+
+        let mut builder = ContainerCreateOpts::builder()
            .name(&spec.name)
            .image(&spec.image)
            .labels(labels)
            .portmappings(port_mappings)
-            .build();
+            .env(env_map)
+            .restart_policy(map_restart_policy(spec.restart_policy));
+        if !mounts.is_empty() {
+            builder = builder.mounts(mounts);
+        }
+        let opts = builder.build();

        let created = self
            .containers()
@@ -277,9 +287,77 @@ fn matches_spec(observed: &podman_api::models::ListContainer, spec: &ContainerSp
            return false;
        }
    }
+    // FIXME(redeploy-loop): this branch makes the agent's periodic
+    // reconcile non-idempotent for any non-trivial Deployment.
+    // Symptom: a service with env or volumes is destroyed and
+    // recreated every 30s tick (RECONCILE_INTERVAL), even when the
+    // observed container is already correct — operators see flapping
+    // container IDs, intermittent connectivity blips, log noise.
+    //
+    // Root cause: `podman list` (v5.x) doesn't surface env or mounts,
+    // so we can't compare them; the original author chose to declare
+    // "any spec with env/volumes is drifted" as a fail-safe. That's
+    // the wrong default for a polling reconciler — it weaponizes the
+    // poll into a re-creation loop.
+    //
+    // Right fix (out of scope for the demo, in scope for delivery):
+    //   1. Switch this code path to `containers.get(name).inspect()`
+    //      which DOES return env + mounts. Compare structurally.
+    //   2. Treat absent fields on the inspect response as "unchanged",
+    //      not "drifted".
+    //   3. Add an integration test that runs ensure_service_running
+    //      twice on the same spec and asserts the container ID is
+    //      unchanged.
+    //
+    // Layered next: the upcoming health-check addition to
+    // ContainerSpec gives the agent a separate signal to decide
+    // when to recreate (failed health checks → unhealthy → recreate)
+    // independent of the spec-drift check.
+    //
+    // Until fixed: avoid env / volumes in demo-time deployments to
+    // dodge the loop. The hello-web nginx demo doesn't have either,
+    // which is why it's stable.
+    if !spec.env.is_empty() || !spec.volumes.is_empty() {
+        return false;
+    }
+    // Restart policy: ListContainer doesn't surface it directly. We
+    // only force a recreate when the spec explicitly asks for something
+    // other than the default — so unchanged podman-default behaviour
+    // stays a NOOP, and explicit policy changes converge on next apply.
+    if spec.restart_policy != RestartPolicy::default() {
+        return false;
+    }
    true
 }

+fn volume_to_mount(v: &VolumeMount) -> ContainerMount {
+    // ContainerMount expresses options as a string Vec — Podman's
+    // post-create flag list. `ro`/`rw` go there. Bind-only in v0.
+    let mut options: Vec<String> = Vec::new();
+    options.push(if v.read_only {
+        "ro".to_string()
+    } else {
+        "rw".to_string()
+    });
+    ContainerMount {
+        _type: Some("bind".to_string()),
+        source: Some(v.host_path.clone()),
+        destination: Some(v.container_path.clone()),
+        options: Some(options),
+        uid_mappings: None,
+        gid_mappings: None,
+    }
+}
+
+fn map_restart_policy(p: RestartPolicy) -> ContainerRestartPolicy {
+    match p {
+        RestartPolicy::No => ContainerRestartPolicy::No,
+        RestartPolicy::UnlessStopped => ContainerRestartPolicy::UnlessStopped,
+        RestartPolicy::OnFailure => ContainerRestartPolicy::OnFailure,
+        RestartPolicy::Always => ContainerRestartPolicy::Always,
+    }
+}
+
 fn from_list_container(c: podman_api::models::ListContainer) -> ContainerState {
    ContainerState {
        name: c
--- a/harmony/src/modules/zitadel/mod.rs
+++ b/harmony/src/modules/zitadel/mod.rs
@@ -1,7 +1,8 @@
 pub mod setup;

 pub use setup::{
-    ZitadelAppType, ZitadelApplication, ZitadelClientConfig, ZitadelMachineUser, ZitadelSetupScore,
+    MachineKeyType, ZitadelApiApp, ZitadelAppType, ZitadelApplication, ZitadelClientConfig,
+    ZitadelMachineUser, ZitadelRole, ZitadelSetupScore,
 };

 use harmony_k8s::KubernetesDistribution;
@@ -73,6 +74,17 @@ pub struct ZitadelScore {
    /// Defaults to true for production deployments.
    #[serde(default)]
    pub external_secure: bool,
+    /// External port advertised by Zitadel in its OIDC discovery document.
+    ///
+    /// Zitadel uses this to construct the issuer URL it returns to clients
+    /// (and that client JWTs must match in `aud` for JWT-bearer flows).
+    /// `None` lets the chart pick the default (80 for HTTP, 443 for HTTPS).
+    /// On k3d where the host port mapping isn't 80/443, set this to the
+    /// host-side port — otherwise Zitadel's emitted issuer (`http://host`)
+    /// won't match the URL clients actually reach (`http://host:8080`),
+    /// and JWT-bearer audience validation will 500 with `Errors.Internal`.
+    #[serde(default)]
+    pub external_port: Option<u32>,
 }

 impl Default for ZitadelScore {
@@ -81,6 +93,7 @@ impl Default for ZitadelScore {
            host: Default::default(),
            zitadel_version: "v4.12.1".to_string(),
            external_secure: true,
+            external_port: None,
        }
    }
 }
@@ -96,6 +109,7 @@ impl<T: Topology + K8sclient + HelmCommand + PostgreSQL> Score<T> for ZitadelSco
            host: self.host.clone(),
            zitadel_version: self.zitadel_version.clone(),
            external_secure: self.external_secure,
+            external_port: self.external_port,
        })
    }
 }
@@ -107,6 +121,7 @@ struct ZitadelInterpret {
    host: String,
    zitadel_version: String,
    external_secure: bool,
+    external_port: Option<u32>,
 }

 #[async_trait]
@@ -342,6 +357,12 @@ zitadel:
          LastName: "Admin"
          Email: "admin@zitadel.example.com"
          PasswordChangeRequired: true
+        Machine:
+          Machine:
+            Username: "iam-admin"
+            Name: "IAM Admin (Machine User)"
+          Pat:
+            ExpirationDate: "2099-01-01T00:00:00Z"
    TLS:
      Enabled: false
    Database:
@@ -489,6 +510,10 @@ login:
                // The Zitadel image defines User: "zitadel" (non-numeric).
                // With runAsNonRoot: true, kubelet needs a numeric UID to verify
                // the user is non-root. The "zitadel" user maps to UID 1000.
+                let external_port_line = self
+                    .external_port
+                    .map(|p| format!("\n    ExternalPort: {p}"))
+                    .unwrap_or_default();
                format!(
                    r#"image:
  tag: {zitadel_version}
@@ -496,7 +521,7 @@ zitadel:
  masterkeySecretName: "{MASTERKEY_SECRET_NAME}"
  configmapConfig:
    ExternalDomain: "{host}"
-    ExternalSecure: false
+    ExternalSecure: false{external_port_line}
    FirstInstance:
      Org:
        Human:
@@ -506,6 +531,12 @@ zitadel:
          LastName: "Admin"
          Email: "admin@zitadel.example.com"
          PasswordChangeRequired: true
+        Machine:
+          Machine:
+            Username: "iam-admin"
+            Name: "IAM Admin (Machine User)"
+          Pat:
+            ExpirationDate: "2099-01-01T00:00:00Z"
    TLS:
      Enabled: false
    Database:
--- a/harmony/src/modules/zitadel/setup.rs
+++ b/harmony/src/modules/zitadel/setup.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::path::PathBuf;

 use async_trait::async_trait;
@@ -17,7 +18,7 @@ const ADMIN_PAT_SECRET: &str = "iam-admin-pat";
 const ZITADEL_NAMESPACE: &str = "zitadel";

 /// Type of OIDC application to create.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum ZitadelAppType {
    /// OAuth 2.0 Device Authorization Grant (RFC 8628).
    /// For CLI tools, SSH sessions, containers, and headless environments.
@@ -25,33 +26,97 @@ pub enum ZitadelAppType {
 }

 /// An OIDC application to create in a Zitadel project.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ZitadelApplication {
    pub project_name: String,
    pub app_name: String,
    pub app_type: ZitadelAppType,
 }

-/// A machine user for service-to-service automation.
-#[derive(Debug, Clone, Serialize)]
+/// An API application — represents a "resource server" that machine users
+/// can request audience for via the JWT-bearer flow. Creating one is what
+/// makes the project's ID a valid `aud` claim in access tokens. Required
+/// when downstream services (e.g. the auth callout) want to validate the
+/// `aud` of an access token against a stable identifier.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ZitadelApiApp {
+    pub project_name: String,
+    pub app_name: String,
+}
+
+/// A role to provision in a project. Role keys are project-scoped and
+/// what the access-token's `urn:zitadel:iam:org:project:roles` claim
+/// uses as the role identifier.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ZitadelRole {
+    pub project_name: String,
+    pub key: String,
+    pub display_name: String,
+    #[serde(default)]
+    pub group: Option<String>,
+}
+
+/// Format of the machine key issued for a service user.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub enum MachineKeyType {
+    /// JSON keyfile (Zitadel `KEY_TYPE_JSON`, internal type code 1).
+    /// Contains `{type, keyId, key, userId}` — the JWT-bearer flow needs all
+    /// four. This is the format we use for our test fleet clients.
+    Json,
+}
+
+impl MachineKeyType {
+    fn api_value(self) -> &'static str {
+        match self {
+            MachineKeyType::Json => "KEY_TYPE_JSON",
+        }
+    }
+}
+
+/// A machine (service-account) user for service-to-service automation.
+///
+/// When `machine_key` is set, a key is provisioned and cached in
+/// [`ZitadelClientConfig::machine_keys`] under the user's username. The
+/// returned material is the *only* way to authenticate as this user via
+/// the JWT-bearer flow — Zitadel does not expose key material on
+/// subsequent reads, so the cache is the source of truth.
+///
+/// `grant_roles` enumerates project-scoped role keys to grant the user.
+/// The grant is created on first run; subsequent runs detect it and skip.
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ZitadelMachineUser {
    pub username: String,
    pub name: String,
-    /// If true, creates a Personal Access Token and includes it in the Outcome details.
+    /// If true, creates a Personal Access Token (legacy field, currently
+    /// no-op — kept for API compatibility with existing examples).
+    #[serde(default)]
    pub create_pat: bool,
+    /// If set, provision a JWT signing key in this format. The private
+    /// key material is stored in `ZitadelClientConfig::machine_keys`.
+    #[serde(default)]
+    pub machine_key: Option<MachineKeyType>,
+    /// Project name for `grant_roles`. Required when `grant_roles` is non-empty.
+    #[serde(default)]
+    pub project_name: Option<String>,
+    /// Project-scoped role keys to grant the user.
+    #[serde(default)]
+    pub grant_roles: Vec<String>,
 }

 /// Score that provisions identity resources in a deployed Zitadel instance.
 ///
-/// This is the "day two" counterpart to [`ZitadelScore`] (which handles Helm
-/// deployment). It creates projects, OIDC applications, and machine users
-/// via Zitadel's Management API, authenticated with the admin PAT from the
-/// `iam-admin-pat` K8s secret (provisioned by the Helm chart).
+/// This is the "day two" counterpart to [`super::ZitadelScore`] (which
+/// handles Helm deployment). It creates projects, applications, roles,
+/// machine users + keys, and role grants via Zitadel's Management API,
+/// authenticated with the admin PAT from the `iam-admin-pat` K8s secret
+/// (provisioned by the Helm chart when `FirstInstance.Org.Machine.Pat`
+/// is configured — done by [`super::ZitadelScore`] from version 0.x).
 ///
 /// All operations are idempotent: existing resources are detected and skipped.
-/// The `client_id` for created applications is cached locally at
-/// `~/.local/share/harmony/zitadel/client-config.json`.
-#[derive(Debug, Clone, Serialize)]
+/// Cached state lives at `~/.local/share/harmony/zitadel/client-config.json`
+/// — which now also holds the **private key material** of any provisioned
+/// machine keys. Treat that file as a secret.
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ZitadelSetupScore {
    /// Zitadel instance hostname (must match the ZitadelScore's `host`).
    pub host: String,
@@ -59,19 +124,41 @@ pub struct ZitadelSetupScore {
    pub port: u16,
    /// Whether to skip TLS verification (default: true for local dev).
    pub skip_tls: bool,
-    /// OIDC applications to create.
+    /// OIDC applications to create (typically Device Code clients).
    #[serde(default)]
    pub applications: Vec<ZitadelApplication>,
-    /// Machine users to create.
+    /// API applications. Create one per project that should appear in
+    /// `aud` of access tokens issued via JWT-bearer.
+    #[serde(default)]
+    pub api_apps: Vec<ZitadelApiApp>,
+    /// Project roles to provision.
+    #[serde(default)]
+    pub roles: Vec<ZitadelRole>,
+    /// Machine users to provision (with optional keys + role grants).
    #[serde(default)]
    pub machine_users: Vec<ZitadelMachineUser>,
 }

 /// Cached Zitadel provisioning results.
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Default, Serialize, Deserialize)]
 pub struct ZitadelClientConfig {
    pub project_id: Option<String>,
-    pub apps: std::collections::HashMap<String, String>, // app_name -> client_id
+    /// `app_name` → `clientId` (for OIDC apps that have one).
+    #[serde(default)]
+    pub apps: HashMap<String, String>,
+    /// `project_name` → `project_id`. Lets multiple projects coexist.
+    #[serde(default)]
+    pub projects: HashMap<String, String>,
+    /// `username` → machine `userId`.
+    #[serde(default)]
+    pub machine_user_ids: HashMap<String, String>,
+    /// `username` → JSON keyfile content (private key material).
+    #[serde(default)]
+    pub machine_keys: HashMap<String, String>,
+    /// `(username, project_name)` → grant `id`. Encoded as a single string
+    /// `<username>::<project_name>` for serde simplicity.
+    #[serde(default)]
+    pub user_grants: HashMap<String, String>,
 }

 impl ZitadelClientConfig {
@@ -109,6 +196,20 @@ impl ZitadelClientConfig {
    pub fn client_id(&self, app_name: &str) -> Option<&String> {
        self.apps.get(app_name)
    }
+
+    /// Get the JSON machine key (raw keyfile content) for a username.
+    pub fn machine_key(&self, username: &str) -> Option<&String> {
+        self.machine_keys.get(username)
+    }
+
+    /// Get the project ID by project name.
+    pub fn project_id_by_name(&self, project_name: &str) -> Option<&String> {
+        self.projects.get(project_name)
+    }
+
+    fn user_grant_key(username: &str, project_name: &str) -> String {
+        format!("{username}::{project_name}")
+    }
 }

 impl<T: Topology + K8sclient> Score<T> for ZitadelSetupScore {
@@ -174,6 +275,67 @@ struct OidcConfig {
    client_id: Option<String>,
 }

+#[derive(Deserialize)]
+struct RoleSearchResult {
+    result: Option<Vec<RoleEntry>>,
+}
+
+#[derive(Deserialize)]
+struct RoleEntry {
+    key: String,
+}
+
+#[derive(Deserialize)]
+struct UserSearchResult {
+    result: Option<Vec<UserSearchEntry>>,
+}
+
+#[derive(Deserialize)]
+struct UserSearchEntry {
+    id: String,
+    #[serde(rename = "userName", default)]
+    user_name: Option<String>,
+    #[serde(rename = "preferredLoginName", default)]
+    preferred_login_name: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct UserCreateResponse {
+    #[serde(rename = "userId")]
+    user_id: String,
+}
+
+/// Response when creating a machine key. Zitadel returns the keyId plus
+/// a `keyDetails` JSON that we round-trip as the keyfile content.
+#[derive(Deserialize)]
+struct MachineKeyResponse {
+    #[serde(rename = "keyId")]
+    #[allow(dead_code)]
+    key_id: String,
+    /// Base64-encoded JSON keyfile content (Zitadel returns the file as
+    /// a single base64 blob).
+    #[serde(rename = "keyDetails")]
+    key_details: String,
+}
+
+#[derive(Deserialize)]
+struct UserGrantSearchResult {
+    result: Option<Vec<UserGrantEntry>>,
+}
+
+#[derive(Deserialize)]
+struct UserGrantEntry {
+    id: String,
+    #[serde(rename = "projectId")]
+    project_id: String,
+}
+
+#[derive(Deserialize)]
+struct UserGrantCreateResponse {
+    #[serde(rename = "userGrantId")]
+    user_grant_id: String,
+}
+
 impl ZitadelSetupInterpret {
    fn api_url(&self, path: &str) -> String {
        format!("http://127.0.0.1:{}{}", self.score.port, path)
@@ -198,7 +360,8 @@ impl ZitadelSetupInterpret {
            .map_err(|e| InterpretError::new(format!("Failed to get {ADMIN_PAT_SECRET}: {e}")))?
            .ok_or_else(|| {
                InterpretError::new(format!(
-                    "Secret '{ADMIN_PAT_SECRET}' not found in namespace '{ZITADEL_NAMESPACE}'"
+                    "Secret '{ADMIN_PAT_SECRET}' not found in namespace '{ZITADEL_NAMESPACE}' — \
+                     ensure ZitadelScore Helm values configure FirstInstance.Org.Machine.Pat"
                ))
            })?;

@@ -215,6 +378,10 @@ impl ZitadelSetupInterpret {
        Ok(pat.trim().to_string())
    }

+    // ------------------------------------------------------------------
+    // Projects
+    // ------------------------------------------------------------------
+
    async fn find_project(
        &self,
        client: &reqwest::Client,
@@ -255,7 +422,9 @@ impl ZitadelSetupInterpret {
            .bearer_auth(pat)
            .json(&serde_json::json!({
                "name": name,
-                "projectRoleAssertion": true
+                "projectRoleAssertion": true,
+                "projectRoleCheck": false,
+                "hasProjectCheck": false
            }))
            .send()
            .await
@@ -273,6 +442,47 @@ impl ZitadelSetupInterpret {
        Ok(result.id)
    }

+    /// Find or create the project, refreshing the cache with the live
+    /// id every call.
+    ///
+    /// The cache is **never trusted as a source of truth for IDs** —
+    /// only as a fallback key store (machine keys, which Zitadel won't
+    /// return on subsequent reads). Trusting the cache for project IDs
+    /// silently breaks the deploy when Zitadel is reset out from under
+    /// us: the Score returns a stale id, the callout deploys with a
+    /// stale `OIDC_AUDIENCE`, and agents authenticate against a
+    /// non-existent project. Always-live lookup eliminates that drift
+    /// class at the cost of one HTTP per project per apply.
+    async fn ensure_project(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        project_name: &str,
+        config: &mut ZitadelClientConfig,
+    ) -> Result<String, InterpretError> {
+        let id = match self.find_project(client, pat, project_name).await {
+            Ok(Some(id)) => id,
+            Ok(None) => self
+                .create_project(client, pat, project_name)
+                .await
+                .map_err(InterpretError::new)?,
+            Err(e) => return Err(InterpretError::new(e)),
+        };
+
+        config.projects.insert(project_name.to_string(), id.clone());
+        // Legacy single-project field used by older ZitadelClientConfig
+        // consumers (e.g. harmony_sso). Always overwrite with the live
+        // value rather than `is_none`-guarding — guarding lets a stale
+        // cached id from a wiped Zitadel instance survive forever.
+        config.project_id = Some(id.clone());
+        info!("[ZitadelSetup] Project '{project_name}' resolved: {id}");
+        Ok(id)
+    }
+
+    // ------------------------------------------------------------------
+    // OIDC apps (DeviceCode)
+    // ------------------------------------------------------------------
+
    async fn find_app(
        &self,
        client: &reqwest::Client,
@@ -348,45 +558,14 @@ impl ZitadelSetupInterpret {
        app: &ZitadelApplication,
        config: &mut ZitadelClientConfig,
    ) -> Result<String, InterpretError> {
-        // Check cache first
-        if let Some(client_id) = config.client_id(&app.app_name) {
-            debug!(
-                "[ZitadelSetup] App '{}' found in cache: {}",
-                app.app_name, client_id
-            );
-            return Ok(client_id.clone());
-        }
+        // Always live-query — `find_app` below resolves the project +
+        // app and the cache is only refreshed from that result. Trusting
+        // a cached client_id from a wiped Zitadel would propagate a
+        // stale id into downstream Scores (e.g. the callout's audience).
+        let project_id = self
+            .ensure_project(client, pat, &app.project_name, config)
+            .await?;

-        // Ensure project exists
-        let project_id = if let Some(id) = &config.project_id {
-            id.clone()
-        } else {
-            let id = match self.find_project(client, pat, &app.project_name).await {
-                Ok(Some(id)) => {
-                    info!(
-                        "[ZitadelSetup] Project '{}' already exists: {}",
-                        app.project_name, id
-                    );
-                    id
-                }
-                Ok(None) => {
-                    let id = self
-                        .create_project(client, pat, &app.project_name)
-                        .await
-                        .map_err(InterpretError::new)?;
-                    info!(
-                        "[ZitadelSetup] Project '{}' created: {}",
-                        app.project_name, id
-                    );
-                    id
-                }
-                Err(e) => return Err(InterpretError::new(e)),
-            };
-            config.project_id = Some(id.clone());
-            id
-        };
-
-        // Check if app already exists
        if let Some(client_id) = self
            .find_app(client, pat, &project_id, &app.app_name)
            .await
@@ -400,7 +579,6 @@ impl ZitadelSetupInterpret {
            return Ok(client_id);
        }

-        // Create app
        let client_id = match &app.app_type {
            ZitadelAppType::DeviceCode => self
                .create_device_code_app(client, pat, &project_id, &app.app_name)
@@ -415,6 +593,473 @@ impl ZitadelSetupInterpret {
        config.apps.insert(app.app_name.clone(), client_id.clone());
        Ok(client_id)
    }
+
+    // ------------------------------------------------------------------
+    // API apps (resource servers — provide audience for JWT-bearer)
+    // ------------------------------------------------------------------
+
+    async fn create_api_app(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        project_id: &str,
+        app_name: &str,
+    ) -> Result<(), String> {
+        let resp = client
+            .post(self.api_url(&format!("/management/v1/projects/{project_id}/apps/api")))
+            .header("Host", &self.score.host)
+            .bearer_auth(pat)
+            .json(&serde_json::json!({
+                "name": app_name,
+                // PRIVATE_JWT lets machine users authenticate to this
+                // API via JWT-bearer (RFC 7523).
+                "authMethodType": "API_AUTH_METHOD_TYPE_PRIVATE_JWT"
+            }))
+            .send()
+            .await
+            .map_err(|e| format!("Failed to create API app: {e}"))?;
+
+        if !resp.status().is_success() {
+            let body = resp.text().await.unwrap_or_default();
+            return Err(format!("Create API app failed: {body}"));
+        }
+        Ok(())
+    }
+
+    /// Is *any* application with `app_name` present in the project,
+    /// regardless of its OIDC/API/SAML kind. `find_app` only matches OIDC
+    /// apps (it pulls a `clientId`), so API apps must use this when
+    /// checking idempotency.
+    async fn app_present(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        project_id: &str,
+        app_name: &str,
+    ) -> Result<bool, String> {
+        let resp = client
+            .post(self.api_url(&format!(
+                "/management/v1/projects/{project_id}/apps/_search"
+            )))
+            .header("Host", &self.score.host)
+            .bearer_auth(pat)
+            .json(&serde_json::json!({}))
+            .send()
+            .await
+            .map_err(|e| format!("Failed to search apps: {e}"))?;
+
+        let result: AppSearchResult = resp
+            .json()
+            .await
+            .map_err(|e| format!("Failed to parse app search: {e}"))?;
+
+        Ok(result
+            .result
+            .unwrap_or_default()
+            .into_iter()
+            .any(|a| a.name == app_name))
+    }
+
+    async fn ensure_api_app(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        app: &ZitadelApiApp,
+        config: &mut ZitadelClientConfig,
+    ) -> Result<(), InterpretError> {
+        let project_id = self
+            .ensure_project(client, pat, &app.project_name, config)
+            .await?;
+
+        if self
+            .app_present(client, pat, &project_id, &app.app_name)
+            .await
+            .map_err(InterpretError::new)?
+        {
+            info!("[ZitadelSetup] API app '{}' already exists", app.app_name);
+            return Ok(());
+        }
+
+        self.create_api_app(client, pat, &project_id, &app.app_name)
+            .await
+            .map_err(InterpretError::new)?;
+        info!("[ZitadelSetup] API app '{}' created", app.app_name);
+        Ok(())
+    }
+
+    // ------------------------------------------------------------------
+    // Roles
+    // ------------------------------------------------------------------
+
+    async fn role_exists(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        project_id: &str,
+        role_key: &str,
+    ) -> Result<bool, String> {
+        let resp = client
+            .post(self.api_url(&format!(
+                "/management/v1/projects/{project_id}/roles/_search"
+            )))
+            .header("Host", &self.score.host)
+            .bearer_auth(pat)
+            .json(&serde_json::json!({}))
+            .send()
+            .await
+            .map_err(|e| format!("Failed to search roles: {e}"))?;
+
+        let result: RoleSearchResult = resp
+            .json()
+            .await
+            .map_err(|e| format!("Failed to parse role search: {e}"))?;
+
+        Ok(result
+            .result
+            .unwrap_or_default()
+            .into_iter()
+            .any(|r| r.key == role_key))
+    }
+
+    async fn create_role(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        project_id: &str,
+        role: &ZitadelRole,
+    ) -> Result<(), String> {
+        let mut body = serde_json::json!({
+            "roleKey": role.key,
+            "displayName": role.display_name,
+        });
+        if let Some(group) = &role.group {
+            body["group"] = serde_json::Value::String(group.clone());
+        }
+
+        let resp = client
+            .post(self.api_url(&format!("/management/v1/projects/{project_id}/roles")))
+            .header("Host", &self.score.host)
+            .bearer_auth(pat)
+            .json(&body)
+            .send()
+            .await
+            .map_err(|e| format!("Failed to create role: {e}"))?;
+
+        if !resp.status().is_success() {
+            let body = resp.text().await.unwrap_or_default();
+            return Err(format!("Create role '{}' failed: {body}", role.key));
+        }
+        Ok(())
+    }
+
+    async fn ensure_role(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        role: &ZitadelRole,
+        config: &mut ZitadelClientConfig,
+    ) -> Result<(), InterpretError> {
+        let project_id = self
+            .ensure_project(client, pat, &role.project_name, config)
+            .await?;
+
+        if self
+            .role_exists(client, pat, &project_id, &role.key)
+            .await
+            .map_err(InterpretError::new)?
+        {
+            debug!("[ZitadelSetup] Role '{}' already exists", role.key);
+            return Ok(());
+        }
+
+        self.create_role(client, pat, &project_id, role)
+            .await
+            .map_err(InterpretError::new)?;
+        info!(
+            "[ZitadelSetup] Role '{}' created in project '{}'",
+            role.key, role.project_name
+        );
+        Ok(())
+    }
+
+    // ------------------------------------------------------------------
+    // Machine users + machine keys + grants
+    // ------------------------------------------------------------------
+
+    async fn find_machine_user(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        username: &str,
+    ) -> Result<Option<String>, String> {
+        // Filter by userName for an O(1)-ish lookup. The Zitadel API
+        // returns paginated results; for our test scale, no pagination
+        // is needed.
+        let resp = client
+            .post(self.api_url("/management/v1/users/_search"))
+            .header("Host", &self.score.host)
+            .bearer_auth(pat)
+            .json(&serde_json::json!({
+                "queries": [{
+                    "userNameQuery": {
+                        "userName": username,
+                        "method": "TEXT_QUERY_METHOD_EQUALS"
+                    }
+                }]
+            }))
+            .send()
+            .await
+            .map_err(|e| format!("Failed to search users: {e}"))?;
+
+        let result: UserSearchResult = resp
+            .json()
+            .await
+            .map_err(|e| format!("Failed to parse user search: {e}"))?;
+
+        Ok(result
+            .result
+            .unwrap_or_default()
+            .into_iter()
+            .find(|u| {
+                u.user_name.as_deref() == Some(username)
+                    || u.preferred_login_name.as_deref() == Some(username)
+            })
+            .map(|u| u.id))
+    }
+
+    async fn create_machine_user(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        user: &ZitadelMachineUser,
+    ) -> Result<String, String> {
+        let resp = client
+            .post(self.api_url("/management/v1/users/machine"))
+            .header("Host", &self.score.host)
+            .bearer_auth(pat)
+            .json(&serde_json::json!({
+                "userName": user.username,
+                "name": user.name,
+                "description": format!("Provisioned by Harmony ZitadelSetupScore"),
+                "accessTokenType": "ACCESS_TOKEN_TYPE_JWT"
+            }))
+            .send()
+            .await
+            .map_err(|e| format!("Failed to create machine user: {e}"))?;
+
+        if !resp.status().is_success() {
+            let body = resp.text().await.unwrap_or_default();
+            return Err(format!(
+                "Create machine user '{}' failed: {body}",
+                user.username
+            ));
+        }
+
+        let parsed: UserCreateResponse =
+            serde_json::from_str(&resp.text().await.map_err(|e| format!("Read body: {e}"))?)
+                .map_err(|e| format!("Parse machine user response: {e}"))?;
+        Ok(parsed.user_id)
+    }
+
+    async fn create_machine_key(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        user_id: &str,
+        key_type: MachineKeyType,
+    ) -> Result<String, String> {
+        let resp = client
+            .post(self.api_url(&format!("/management/v1/users/{user_id}/keys")))
+            .header("Host", &self.score.host)
+            .bearer_auth(pat)
+            .json(&serde_json::json!({
+                "type": key_type.api_value()
+            }))
+            .send()
+            .await
+            .map_err(|e| format!("Failed to create machine key: {e}"))?;
+
+        if !resp.status().is_success() {
+            let body = resp.text().await.unwrap_or_default();
+            return Err(format!("Create machine key failed: {body}"));
+        }
+
+        let parsed: MachineKeyResponse =
+            serde_json::from_str(&resp.text().await.map_err(|e| format!("Read body: {e}"))?)
+                .map_err(|e| format!("Parse machine key response: {e}"))?;
+
+        // `keyDetails` is base64-encoded JSON keyfile content.
+        use base64::Engine;
+        let bytes = base64::engine::general_purpose::STANDARD
+            .decode(&parsed.key_details)
+            .map_err(|e| format!("Decode keyDetails base64: {e}"))?;
+        String::from_utf8(bytes).map_err(|e| format!("keyDetails contained non-UTF8 bytes: {e}"))
+    }
+
+    async fn find_user_grant(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        user_id: &str,
+        project_id: &str,
+    ) -> Result<Option<String>, String> {
+        // The per-user `/management/v1/users/{userId}/grants/_search`
+        // endpoint Zitadel's docs hint at returns 405 Method Not Allowed
+        // in current Zitadel (verified against v3.x). The collection
+        // endpoint `/management/v1/users/grants/_search` accepts query
+        // filters and is what works in practice — filter by userIdQuery
+        // server-side, then narrow to the matching project_id locally.
+        let resp = client
+            .post(self.api_url("/management/v1/users/grants/_search"))
+            .header("Host", &self.score.host)
+            .bearer_auth(pat)
+            .json(&serde_json::json!({
+                "queries": [
+                    { "userIdQuery": { "userId": user_id } }
+                ]
+            }))
+            .send()
+            .await
+            .map_err(|e| format!("Failed to search user grants: {e}"))?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            return Err(format!("user-grant search returned {status}: {body}"));
+        }
+
+        let result: UserGrantSearchResult = resp
+            .json()
+            .await
+            .map_err(|e| format!("Failed to parse user grant search: {e}"))?;
+
+        Ok(result
+            .result
+            .unwrap_or_default()
+            .into_iter()
+            .find(|g| g.project_id == project_id)
+            .map(|g| g.id))
+    }
+
+    async fn create_user_grant(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        user_id: &str,
+        project_id: &str,
+        role_keys: &[String],
+    ) -> Result<String, String> {
+        let resp = client
+            .post(self.api_url(&format!("/management/v1/users/{user_id}/grants")))
+            .header("Host", &self.score.host)
+            .bearer_auth(pat)
+            .json(&serde_json::json!({
+                "projectId": project_id,
+                "roleKeys": role_keys
+            }))
+            .send()
+            .await
+            .map_err(|e| format!("Failed to create user grant: {e}"))?;
+
+        if !resp.status().is_success() {
+            let body = resp.text().await.unwrap_or_default();
+            return Err(format!("Create user grant failed: {body}"));
+        }
+
+        let parsed: UserGrantCreateResponse =
+            serde_json::from_str(&resp.text().await.map_err(|e| format!("Read body: {e}"))?)
+                .map_err(|e| format!("Parse user grant response: {e}"))?;
+        Ok(parsed.user_grant_id)
+    }
+
+    async fn ensure_machine_user(
+        &self,
+        client: &reqwest::Client,
+        pat: &str,
+        user: &ZitadelMachineUser,
+        config: &mut ZitadelClientConfig,
+    ) -> Result<(), InterpretError> {
+        // 1. Ensure the user exists. Always live-query Zitadel rather
+        //    than trusting the cache: a cached id pointing at a
+        //    user that was deleted server-side would otherwise be
+        //    propagated through the rest of the apply.
+        let user_id = match self
+            .find_machine_user(client, pat, &user.username)
+            .await
+            .map_err(InterpretError::new)?
+        {
+            Some(id) => id,
+            None => self
+                .create_machine_user(client, pat, user)
+                .await
+                .map_err(InterpretError::new)?,
+        };
+        config
+            .machine_user_ids
+            .insert(user.username.clone(), user_id.clone());
+        info!(
+            "[ZitadelSetup] Machine user '{}' resolved: {user_id}",
+            user.username
+        );
+
+        // 2. Ensure a key exists if requested. Zitadel doesn't return key
+        //    material on subsequent reads, so the cache MUST hold it; if
+        //    the cache is missing the key, we provision a new one (the
+        //    old one becomes orphaned but stays valid until expiry).
+        if let Some(key_type) = user.machine_key {
+            if !config.machine_keys.contains_key(&user.username) {
+                let key_json = self
+                    .create_machine_key(client, pat, &user_id, key_type)
+                    .await
+                    .map_err(InterpretError::new)?;
+                info!("[ZitadelSetup] Machine key created for '{}'", user.username);
+                config.machine_keys.insert(user.username.clone(), key_json);
+            }
+        }
+
+        // 3. Ensure user grants for the requested project + roles.
+        if !user.grant_roles.is_empty() {
+            let project_name = user.project_name.as_ref().ok_or_else(|| {
+                InterpretError::new(format!(
+                    "machine user '{}' has grant_roles but no project_name",
+                    user.username
+                ))
+            })?;
+            let project_id = self
+                .ensure_project(client, pat, project_name, config)
+                .await?;
+
+            // Always live-query the grant; the cache is a record of
+            // last-known reality, not a substitute for it. Trusting a
+            // cached grant id silently leaves stale role bindings if
+            // Zitadel was reset.
+            let grant_key = ZitadelClientConfig::user_grant_key(&user.username, project_name);
+            let grant_id = if let Some(id) = self
+                .find_user_grant(client, pat, &user_id, &project_id)
+                .await
+                .map_err(InterpretError::new)?
+            {
+                debug!(
+                    "[ZitadelSetup] Grant for '{}' on project '{}' already exists: {id}",
+                    user.username, project_name
+                );
+                id
+            } else {
+                let id = self
+                    .create_user_grant(client, pat, &user_id, &project_id, &user.grant_roles)
+                    .await
+                    .map_err(InterpretError::new)?;
+                info!(
+                    "[ZitadelSetup] Grant created: '{}' → project '{}' with roles {:?}",
+                    user.username, project_name, user.grant_roles
+                );
+                id
+            };
+            config.user_grants.insert(grant_key, grant_id);
+        }
+
+        Ok(())
+    }
 }

 #[async_trait]
@@ -434,21 +1079,36 @@ impl<T: Topology + K8sclient> Interpret<T> for ZitadelSetupInterpret {

        let client = self.http_client().map_err(InterpretError::new)?;

-        let mut config = ZitadelClientConfig::load().unwrap_or(ZitadelClientConfig {
-            project_id: None,
-            apps: std::collections::HashMap::new(),
-        });
+        let mut config = ZitadelClientConfig::load().unwrap_or_default();

        let mut details = Vec::new();

        for app in &self.score.applications {
            let client_id = self.ensure_app(&client, &pat, app, &mut config).await?;
-            details.push(format!("{}={}", app.app_name, client_id));
+            details.push(format!("oidc_app:{}={}", app.app_name, client_id));
        }

-        // TODO: machine user provisioning (future iteration)
-        if !self.score.machine_users.is_empty() {
-            warn!("[ZitadelSetup] Machine user provisioning not yet implemented");
+        for api_app in &self.score.api_apps {
+            self.ensure_api_app(&client, &pat, api_app, &mut config)
+                .await?;
+            details.push(format!(
+                "api_app:{}@{}",
+                api_app.app_name, api_app.project_name
+            ));
+        }
+
+        for role in &self.score.roles {
+            self.ensure_role(&client, &pat, role, &mut config).await?;
+            details.push(format!("role:{}@{}", role.key, role.project_name));
+        }
+
+        for user in &self.score.machine_users {
+            self.ensure_machine_user(&client, &pat, user, &mut config)
+                .await?;
+            details.push(format!("machine_user:{}", user.username));
+            if user.create_pat {
+                warn!("[ZitadelSetup] create_pat is currently a no-op for machine users");
+            }
        }

        config.save().map_err(InterpretError::new)?;
@@ -465,14 +1125,61 @@ impl<T: Topology + K8sclient> Interpret<T> for ZitadelSetupInterpret {
    }

    fn get_version(&self) -> Version {
-        todo!()
+        Version::from("0.2.0").expect("static version")
    }

    fn get_status(&self) -> InterpretStatus {
-        todo!()
+        InterpretStatus::QUEUED
    }

    fn get_children(&self) -> Vec<Id> {
        vec![]
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn user_grant_key_round_trips_uniquely() {
+        let k1 = ZitadelClientConfig::user_grant_key("alice", "fleet");
+        let k2 = ZitadelClientConfig::user_grant_key("alice", "platform");
+        let k3 = ZitadelClientConfig::user_grant_key("bob", "fleet");
+        assert_ne!(k1, k2);
+        assert_ne!(k1, k3);
+        assert_ne!(k2, k3);
+    }
+
+    #[test]
+    fn config_serialises_with_default_empty_collections() {
+        // Older cache files written before this version don't have the
+        // new fields. `#[serde(default)]` should let us read them and
+        // produce empty maps for the new collections.
+        let legacy = r#"{"project_id":"abc","apps":{"x":"client-1"}}"#;
+        let cfg: ZitadelClientConfig = serde_json::from_str(legacy).unwrap();
+        assert_eq!(cfg.project_id.as_deref(), Some("abc"));
+        assert_eq!(cfg.apps.get("x").map(String::as_str), Some("client-1"));
+        assert!(cfg.projects.is_empty());
+        assert!(cfg.machine_keys.is_empty());
+        assert!(cfg.machine_user_ids.is_empty());
+        assert!(cfg.user_grants.is_empty());
+    }
+
+    #[test]
+    fn machine_key_type_maps_to_zitadel_api_value() {
+        assert_eq!(MachineKeyType::Json.api_value(), "KEY_TYPE_JSON");
+    }
+
+    #[test]
+    fn machine_keys_accessor_returns_cached_material() {
+        let mut cfg = ZitadelClientConfig::default();
+        cfg.machine_keys
+            .insert("svc".to_string(), "{\"type\":\"sa\"}".to_string());
+        assert_eq!(
+            cfg.machine_key("svc").map(String::as_str),
+            Some("{\"type\":\"sa\"}")
+        );
+        assert!(cfg.machine_key("nope").is_none());
+    }
+}
--- a/harmony_types/src/firewall.rs
+++ b/harmony_types/src/firewall.rs
@@ -1,6 +1,6 @@
 //! Vendor-neutral firewall and network types for infrastructure-as-code.

-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use std::fmt;

 /// Firewall rule action.
@@ -99,7 +99,7 @@ impl fmt::Display for VipMode {
 }

 /// Link aggregation protocol.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub enum LaggProtocol {
    /// LACP (802.3ad) — negotiated aggregation with the switch.
    Lacp,
--- a/migrations/20260421000000_add_network_config_to_host_role_mapping.sql
+++ b/migrations/20260421000000_add_network_config_to_host_role_mapping.sql
@@ -0,0 +1,3 @@
+-- Add network_config column to host_role_mapping.
+-- Stores a JSON-encoded NetworkConfig (bond selection + interface blacklist).
+ALTER TABLE host_role_mapping ADD COLUMN network_config TEXT;
--- a/nats/callout/Cargo.toml
+++ b/nats/callout/Cargo.toml
@@ -0,0 +1,34 @@
+[package]
+name = "harmony-nats-callout"
+edition = "2024"
+version.workspace = true
+readme.workspace = true
+license.workspace = true
+description = "NATS auth callout service for Zitadel SSO with per-device permissions"
+rust-version = "1.85"
+
+[lib]
+name = "harmony_nats_callout"
+path = "src/lib.rs"
+
+[[bin]]
+name = "harmony-nats-callout"
+path = "src/main.rs"
+
+[dependencies]
+nats-jwt = { path = "../jwt" }
+async-nats.workspace = true
+nkeys = "0.4"
+jsonwebtoken = "9"
+reqwest = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json.workspace = true
+tracing.workspace = true
+tracing-subscriber.workspace = true
+thiserror.workspace = true
+anyhow.workspace = true
+tokio = { workspace = true, features = ["rt", "rt-multi-thread", "macros", "signal", "sync", "time"] }
+futures-util.workspace = true
+
+[dev-dependencies]
+harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
--- a/nats/callout/Dockerfile
+++ b/nats/callout/Dockerfile
@@ -0,0 +1,26 @@
+# Minimal runtime container for the NATS auth callout service.
+# Assumes `target/release/harmony-nats-callout` has already been built on
+# the host (the deployment Score / example harness does this). Same
+# convention as `fleet/harmony-fleet-operator/Dockerfile` to keep local
+# k3d iteration fast — multi-stage cargo-in-Docker rebuilds the entire
+# workspace and is reserved for the release pipeline.
+#
+# Base image is archlinux:base to guarantee the host's glibc (ABI-
+# matched) — debian:bookworm-slim ships an older glibc and would error
+# at startup with "version `GLIBC_2.x' not found".
+FROM docker.io/library/archlinux:base
+
+# ca-certificates ship with archlinux:base, which the OIDC client needs
+# for HTTPS to the Zitadel issuer.
+
+COPY target/release/harmony-nats-callout /usr/local/bin/harmony-nats-callout
+
+# Non-root runtime, matching the harmony-fleet-operator convention.
+# 65532 is the `nonroot` UID used by distroless + security-hardened
+# base images. The Pod manifest sets `runAsNonRoot: true`; the image's
+# USER directive is the portable mechanism that pairs with that flag
+# without pinning a specific UID at the Pod level (OpenShift's
+# restricted-v2 SCC assigns its own namespace-scoped UIDs).
+USER 65532:65532
+
+ENTRYPOINT ["/usr/local/bin/harmony-nats-callout"]
--- a/nats/callout/src/config.rs
+++ b/nats/callout/src/config.rs
@@ -0,0 +1,325 @@
+use nkeys::KeyPair;
+
+use crate::permissions::PermissionsConfig;
+
+/// Default JWT claim path for Zitadel project roles.
+///
+/// Zitadel emits roles under this URN as a map of `{role-name: {org-id: org-name}}`.
+/// The handler accepts both map and array shapes at this path.
+pub const DEFAULT_ROLES_CLAIM: &str = "urn:zitadel:iam:org:project:roles";
+
+/// Default role name granting unrestricted access (read+write on all subjects).
+pub const DEFAULT_ADMIN_ROLE: &str = "fleet-admin";
+
+/// Default role name granting per-device scoped access.
+pub const DEFAULT_DEVICE_ROLE: &str = "device";
+
+/// Configuration for the NATS auth callout service.
+#[derive(Debug, Clone)]
+pub struct AuthCalloutConfig {
+    /// NATS server URL to connect to.
+    pub nats_url: String,
+    /// Username for the auth callout service's own NATS connection.
+    pub auth_user: String,
+    /// Password for the auth callout service's own NATS connection.
+    pub auth_pass: String,
+    /// NKey pair used to sign user JWTs returned to NATS.
+    pub issuer_kp: KeyPair,
+    /// Account name to place authenticated users into. Must match the NATS
+    /// `auth_callout.account` setting.
+    pub target_account: String,
+    /// OIDC issuer URL (e.g. Zitadel).
+    pub oidc_issuer_url: String,
+    /// Expected OIDC audience.
+    pub oidc_audience: String,
+    /// JSON path to the device identifier claim (e.g. "device_id" or "custom.claim.path").
+    pub device_id_claim: String,
+    /// Optional prefix to strip from the extracted device-id claim before
+    /// it's used in permission interpolation. Lets the callout work with
+    /// the common Zitadel pattern where the machine user's `client_id`
+    /// is namespaced (`device-vm-device-00`) but the agent's KV keys use
+    /// the bare device id (`vm-device-00`). Empty string means no strip.
+    pub device_id_prefix_strip: String,
+    /// JSON path to the roles claim (e.g. Zitadel's `urn:zitadel:iam:org:project:roles`).
+    pub roles_claim: String,
+    /// Role name that, when present, grants the [`admin_permissions`] block.
+    pub admin_role: String,
+    /// Role name that, when present, grants the [`device_permissions`] block.
+    pub device_role: String,
+    /// Permissions issued for users carrying the [`admin_role`].
+    pub admin_permissions: PermissionsConfig,
+    /// Permissions issued for users carrying the [`device_role`]. May contain
+    /// `{device_id}` placeholders that the handler interpolates per request.
+    pub device_permissions: PermissionsConfig,
+    /// Whether to accept invalid TLS certificates (useful for local testing).
+    pub danger_accept_invalid_certs: bool,
+}
+
+impl AuthCalloutConfig {
+    pub fn builder() -> AuthCalloutConfigBuilder {
+        AuthCalloutConfigBuilder::default()
+    }
+}
+
+#[derive(Default)]
+pub struct AuthCalloutConfigBuilder {
+    nats_url: Option<String>,
+    auth_user: Option<String>,
+    auth_pass: Option<String>,
+    issuer_kp: Option<KeyPair>,
+    target_account: Option<String>,
+    oidc_issuer_url: Option<String>,
+    oidc_audience: Option<String>,
+    device_id_claim: Option<String>,
+    device_id_prefix_strip: Option<String>,
+    roles_claim: Option<String>,
+    admin_role: Option<String>,
+    device_role: Option<String>,
+    admin_permissions: Option<PermissionsConfig>,
+    device_permissions: Option<PermissionsConfig>,
+    danger_accept_invalid_certs: bool,
+}
+
+impl AuthCalloutConfigBuilder {
+    pub fn nats_url(mut self, url: impl Into<String>) -> Self {
+        self.nats_url = Some(url.into());
+        self
+    }
+
+    pub fn auth_user(mut self, user: impl Into<String>) -> Self {
+        self.auth_user = Some(user.into());
+        self
+    }
+
+    pub fn auth_pass(mut self, pass: impl Into<String>) -> Self {
+        self.auth_pass = Some(pass.into());
+        self
+    }
+
+    pub fn issuer_kp(mut self, kp: KeyPair) -> Self {
+        self.issuer_kp = Some(kp);
+        self
+    }
+
+    pub fn target_account(mut self, account: impl Into<String>) -> Self {
+        self.target_account = Some(account.into());
+        self
+    }
+
+    pub fn oidc_issuer_url(mut self, url: impl Into<String>) -> Self {
+        self.oidc_issuer_url = Some(url.into());
+        self
+    }
+
+    pub fn oidc_audience(mut self, aud: impl Into<String>) -> Self {
+        self.oidc_audience = Some(aud.into());
+        self
+    }
+
+    pub fn device_id_claim(mut self, claim: impl Into<String>) -> Self {
+        self.device_id_claim = Some(claim.into());
+        self
+    }
+
+    pub fn device_id_prefix_strip(mut self, prefix: impl Into<String>) -> Self {
+        self.device_id_prefix_strip = Some(prefix.into());
+        self
+    }
+
+    pub fn roles_claim(mut self, claim: impl Into<String>) -> Self {
+        self.roles_claim = Some(claim.into());
+        self
+    }
+
+    pub fn admin_role(mut self, role: impl Into<String>) -> Self {
+        self.admin_role = Some(role.into());
+        self
+    }
+
+    pub fn device_role(mut self, role: impl Into<String>) -> Self {
+        self.device_role = Some(role.into());
+        self
+    }
+
+    pub fn admin_permissions(mut self, perms: PermissionsConfig) -> Self {
+        self.admin_permissions = Some(perms);
+        self
+    }
+
+    pub fn device_permissions(mut self, perms: PermissionsConfig) -> Self {
+        self.device_permissions = Some(perms);
+        self
+    }
+
+    pub fn danger_accept_invalid_certs(mut self, allow: bool) -> Self {
+        self.danger_accept_invalid_certs = allow;
+        self
+    }
+
+    pub fn build(self) -> anyhow::Result<AuthCalloutConfig> {
+        // Required fields are checked first so the resulting error names a
+        // missing field rather than panicking on default construction.
+        Ok(AuthCalloutConfig {
+            nats_url: self
+                .nats_url
+                .ok_or_else(|| anyhow::anyhow!("nats_url is required"))?,
+            auth_user: self.auth_user.unwrap_or_else(|| "auth".to_string()),
+            auth_pass: self.auth_pass.unwrap_or_else(|| "auth".to_string()),
+            issuer_kp: self
+                .issuer_kp
+                .ok_or_else(|| anyhow::anyhow!("issuer_kp is required"))?,
+            target_account: self.target_account.unwrap_or_else(|| "DEVICES".to_string()),
+            oidc_issuer_url: self
+                .oidc_issuer_url
+                .ok_or_else(|| anyhow::anyhow!("oidc_issuer_url is required"))?,
+            oidc_audience: self
+                .oidc_audience
+                .ok_or_else(|| anyhow::anyhow!("oidc_audience is required"))?,
+            device_id_claim: self
+                .device_id_claim
+                .unwrap_or_else(|| "device_id".to_string()),
+            device_id_prefix_strip: self.device_id_prefix_strip.unwrap_or_default(),
+            roles_claim: self
+                .roles_claim
+                .unwrap_or_else(|| DEFAULT_ROLES_CLAIM.to_string()),
+            admin_role: self
+                .admin_role
+                .unwrap_or_else(|| DEFAULT_ADMIN_ROLE.to_string()),
+            device_role: self
+                .device_role
+                .unwrap_or_else(|| DEFAULT_DEVICE_ROLE.to_string()),
+            admin_permissions: self
+                .admin_permissions
+                .unwrap_or_else(PermissionsConfig::admin_default),
+            device_permissions: self
+                .device_permissions
+                .unwrap_or_else(PermissionsConfig::device_default),
+            danger_accept_invalid_certs: self.danger_accept_invalid_certs,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::permissions::PermissionSubjects;
+    use nkeys::KeyPair;
+
+    fn full_builder() -> AuthCalloutConfigBuilder {
+        AuthCalloutConfig::builder()
+            .nats_url("nats://localhost:4222")
+            .issuer_kp(KeyPair::new_account())
+            .oidc_issuer_url("https://issuer.example")
+            .oidc_audience("aud-1")
+    }
+
+    #[test]
+    fn defaults_are_applied_when_optional_fields_omitted() {
+        let cfg = full_builder().build().expect("build should succeed");
+        assert_eq!(cfg.auth_user, "auth");
+        assert_eq!(cfg.auth_pass, "auth");
+        assert_eq!(cfg.target_account, "DEVICES");
+        assert_eq!(cfg.device_id_claim, "device_id");
+        assert_eq!(cfg.roles_claim, DEFAULT_ROLES_CLAIM);
+        assert_eq!(cfg.admin_role, DEFAULT_ADMIN_ROLE);
+        assert_eq!(cfg.device_role, DEFAULT_DEVICE_ROLE);
+        assert!(!cfg.danger_accept_invalid_certs);
+        // Default permissions match the documented defaults of PermissionsConfig.
+        assert!(cfg.admin_permissions.r#pub.allow.contains(&">".to_string()));
+        assert!(
+            cfg.device_permissions
+                .r#pub
+                .allow
+                .iter()
+                .any(|s| s.contains("{device_id}"))
+        );
+    }
+
+    #[test]
+    fn missing_nats_url_errors() {
+        let err = AuthCalloutConfig::builder()
+            .issuer_kp(KeyPair::new_account())
+            .oidc_issuer_url("https://x")
+            .oidc_audience("y")
+            .build()
+            .unwrap_err();
+        assert!(err.to_string().contains("nats_url"));
+    }
+
+    #[test]
+    fn missing_issuer_kp_errors() {
+        let err = AuthCalloutConfig::builder()
+            .nats_url("nats://x")
+            .oidc_issuer_url("https://x")
+            .oidc_audience("y")
+            .build()
+            .unwrap_err();
+        assert!(err.to_string().contains("issuer_kp"));
+    }
+
+    #[test]
+    fn missing_oidc_issuer_url_errors() {
+        let err = AuthCalloutConfig::builder()
+            .nats_url("nats://x")
+            .issuer_kp(KeyPair::new_account())
+            .oidc_audience("y")
+            .build()
+            .unwrap_err();
+        assert!(err.to_string().contains("oidc_issuer_url"));
+    }
+
+    #[test]
+    fn missing_oidc_audience_errors() {
+        let err = AuthCalloutConfig::builder()
+            .nats_url("nats://x")
+            .issuer_kp(KeyPair::new_account())
+            .oidc_issuer_url("https://x")
+            .build()
+            .unwrap_err();
+        assert!(err.to_string().contains("oidc_audience"));
+    }
+
+    #[test]
+    fn explicit_overrides_take_effect() {
+        let cfg = full_builder()
+            .auth_user("svc")
+            .auth_pass("hunter2")
+            .target_account("ACME")
+            .device_id_claim("custom.path")
+            .roles_claim("custom_roles")
+            .admin_role("super-user")
+            .device_role("iot-thing")
+            .danger_accept_invalid_certs(true)
+            .build()
+            .unwrap();
+        assert_eq!(cfg.auth_user, "svc");
+        assert_eq!(cfg.auth_pass, "hunter2");
+        assert_eq!(cfg.target_account, "ACME");
+        assert_eq!(cfg.device_id_claim, "custom.path");
+        assert_eq!(cfg.roles_claim, "custom_roles");
+        assert_eq!(cfg.admin_role, "super-user");
+        assert_eq!(cfg.device_role, "iot-thing");
+        assert!(cfg.danger_accept_invalid_certs);
+    }
+
+    #[test]
+    fn permissions_overrides_take_effect() {
+        let perms = PermissionsConfig {
+            r#pub: PermissionSubjects {
+                allow: vec!["custom.>".to_string()],
+                deny: vec![],
+            },
+            sub: PermissionSubjects {
+                allow: vec!["custom.<".to_string()],
+                deny: vec![],
+            },
+        };
+        let cfg = full_builder()
+            .device_permissions(perms.clone())
+            .admin_permissions(perms)
+            .build()
+            .unwrap();
+        assert_eq!(cfg.admin_permissions.r#pub.allow, vec!["custom.>"]);
+        assert_eq!(cfg.device_permissions.sub.allow, vec!["custom.<"]);
+    }
+}
--- a/nats/callout/src/handler.rs
+++ b/nats/callout/src/handler.rs
@@ -0,0 +1,744 @@
+use async_nats::Client;
+use nats_jwt::algorithm::decode_unverified;
+use nats_jwt::builder::{AuthorizationResponseBuilder, UserClaimsBuilder};
+use nats_jwt::claims::auth_request::AuthorizationRequestClaims;
+use tracing::{info, warn};
+
+use crate::config::AuthCalloutConfig;
+use crate::permissions::{InterpolatedPermissions, interpolate_permissions};
+use crate::roles::{DeviceIdError, ResolvedRole, resolve as resolve_role, validate_device_id};
+use crate::zitadel::{ZitadelClaims, ZitadelValidationError, ZitadelValidator};
+
+/// Outcome of the **pure** authorization decision applied to a validated
+/// Zitadel JWT. This is the security-critical decision point — every
+/// branch is exhaustively unit-tested in `mod tests` below.
+#[derive(Debug)]
+pub enum Decision {
+    Authorize {
+        device_id: String,
+        role: ResolvedRole,
+        perms: InterpolatedPermissions,
+    },
+    Reject(RejectReason),
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum RejectReason {
+    /// The configured `device_id_claim` path is not present in the JWT.
+    DeviceIdMissing(String),
+    /// The configured `device_id_claim` is present but not a string.
+    DeviceIdNotString(String),
+    /// The device_id failed the NATS-subject-safe character whitelist —
+    /// either it would let the user inject metacharacters into the
+    /// `{device_id}` placeholder, or it was empty.
+    DeviceIdUnsafe(DeviceIdError),
+    /// No configured role (admin or device) is present on the JWT.
+    NoAuthorizedRole,
+}
+
+impl std::fmt::Display for RejectReason {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            RejectReason::DeviceIdMissing(p) => {
+                write!(f, "device_id claim '{p}' missing from token")
+            }
+            RejectReason::DeviceIdNotString(p) => {
+                write!(f, "device_id claim '{p}' is not a string")
+            }
+            RejectReason::DeviceIdUnsafe(e) => write!(f, "device_id rejected: {e}"),
+            RejectReason::NoAuthorizedRole => write!(f, "no authorized role in token"),
+        }
+    }
+}
+
+/// Pure authorization decision against a verified Zitadel JWT.
+///
+/// **Does no I/O and no signature checking.** Caller is responsible for
+/// having already validated the JWT signature, issuer, audience, and
+/// expiry through [`ZitadelValidator::validate`].
+///
+/// The branching here is the exhaustive decision tree for the security
+/// boundary; tests below cover every reachable outcome.
+pub fn decide(
+    claims: &ZitadelClaims,
+    config: &AuthCalloutConfig,
+    validator: &ZitadelValidator,
+) -> Decision {
+    let device_id = match validator.extract_device_id(claims) {
+        Ok(id) => id,
+        Err(ZitadelValidationError::ClaimNotFound(p)) => {
+            return Decision::Reject(RejectReason::DeviceIdMissing(p));
+        }
+        Err(ZitadelValidationError::ClaimNotString(p)) => {
+            return Decision::Reject(RejectReason::DeviceIdNotString(p));
+        }
+        // Only the two variants above are produced by extract_device_id;
+        // anything else from validator surface area would be a bug to
+        // fail closed on rather than silently allow.
+        Err(_) => {
+            return Decision::Reject(RejectReason::DeviceIdMissing(
+                config.device_id_claim.clone(),
+            ));
+        }
+    };
+
+    if let Err(e) = validate_device_id(&device_id) {
+        return Decision::Reject(RejectReason::DeviceIdUnsafe(e));
+    }
+
+    let roles = validator.extract_roles(claims, &config.roles_claim);
+    let role = match resolve_role(&roles, config) {
+        Some(r) => r,
+        None => return Decision::Reject(RejectReason::NoAuthorizedRole),
+    };
+
+    let perms_template = match role {
+        ResolvedRole::Admin => &config.admin_permissions,
+        ResolvedRole::Device => &config.device_permissions,
+    };
+
+    Decision::Authorize {
+        device_id: device_id.clone(),
+        role,
+        perms: interpolate_permissions(perms_template, &device_id),
+    }
+}
+
+/// Handle a single NATS auth callout request.
+///
+/// 1. Decode the auth request JWT (signed by NATS server, trusted).
+/// 2. Extract the Zitadel JWT from `connect_opts.auth_token`.
+/// 3. Verify the Zitadel JWT signature/issuer/audience/exp/nbf.
+/// 4. Extract `device_id` and **validate** it against NATS subject syntax —
+///    this is a critical security gate (a malicious or buggy issuer that
+///    emits `device_id = "x.>"` would otherwise escalate via the
+///    `{device_id}` placeholder in the per-device permissions block).
+/// 5. Extract roles and pick admin/device permissions accordingly. Reject
+///    when no configured role is present.
+/// 6. Build a user JWT with the interpolated permissions and respond.
+pub async fn handle_auth_request(
+    nc: &Client,
+    msg: &async_nats::Message,
+    config: &AuthCalloutConfig,
+    validator: &ZitadelValidator,
+) -> anyhow::Result<()> {
+    let payload_str = String::from_utf8_lossy(&msg.payload);
+    let token_str = payload_str.trim();
+
+    let request_claims: AuthorizationRequestClaims = decode_unverified(token_str)
+        .map_err(|e| anyhow::anyhow!("failed to decode auth request JWT: {e}"))?;
+
+    info!(
+        user_nkey = %request_claims.nats.user_nkey,
+        "received auth callout request"
+    );
+
+    let connect_opts = &request_claims.nats.connect_opts;
+    let token = connect_opts
+        .auth_token
+        .as_deref()
+        .or(connect_opts.jwt.as_deref());
+
+    let reply = msg
+        .reply
+        .clone()
+        .ok_or_else(|| anyhow::anyhow!("no reply subject on auth request"))?;
+
+    let Some(token) = token else {
+        info!("no auth token in request, rejecting");
+        return reject(nc, &request_claims, config, reply, "no auth token provided").await;
+    };
+
+    let oidc_claims = match validator.validate(token).await {
+        Ok(claims) => claims,
+        Err(e) => {
+            warn!(error = %e.to_string(), "Zitadel JWT validation failed");
+            return reject(
+                nc,
+                &request_claims,
+                config,
+                reply,
+                &format!("invalid credentials: {e}"),
+            )
+            .await;
+        }
+    };
+
+    let (device_id, role, interpolated) = match decide(&oidc_claims, config, validator) {
+        Decision::Authorize {
+            device_id,
+            role,
+            perms,
+        } => (device_id, role, perms),
+        Decision::Reject(reason) => {
+            warn!(reason = %reason, "rejecting auth callout");
+            return reject(nc, &request_claims, config, reply, &reason.to_string()).await;
+        }
+    };
+
+    let role_name = match role {
+        ResolvedRole::Admin => config.admin_role.as_str(),
+        ResolvedRole::Device => config.device_role.as_str(),
+    };
+
+    info!(
+        device_id = %device_id,
+        role = %role_name,
+        "Zitadel JWT validated, generating user JWT"
+    );
+
+    let user_jwt = build_user_jwt(
+        &request_claims.nats.user_nkey,
+        &device_id,
+        &interpolated,
+        config,
+    )?;
+
+    let response = AuthorizationResponseBuilder::new(&request_claims.nats.user_nkey)
+        .audience(&request_claims.nats.server_id.id)
+        .issuer(&config.issuer_kp)
+        .with_jwt(&user_jwt)
+        .sign(&config.issuer_kp)?;
+
+    info!("sending auth response");
+    nc.publish(reply, response.into()).await?;
+    nc.flush().await?;
+
+    Ok(())
+}
+
+/// Build a NATS user JWT for `user_nkey` carrying the resolved permissions.
+///
+/// Pure function — no I/O. Tested standalone in unit tests; the live
+/// handler path is covered by the integration test suite.
+pub(crate) fn build_user_jwt(
+    user_nkey: &str,
+    device_id: &str,
+    perms: &InterpolatedPermissions,
+    config: &AuthCalloutConfig,
+) -> anyhow::Result<String> {
+    let mut builder = UserClaimsBuilder::new(user_nkey)
+        .issuer(&config.issuer_kp)
+        .audience(&config.target_account)
+        .name(device_id);
+
+    for s in &perms.pub_allow {
+        builder = builder.pub_allow(s);
+    }
+    for s in &perms.pub_deny {
+        builder = builder.pub_deny(s);
+    }
+    for s in &perms.sub_allow {
+        builder = builder.sub_allow(s);
+    }
+    for s in &perms.sub_deny {
+        builder = builder.sub_deny(s);
+    }
+
+    Ok(builder.sign(&config.issuer_kp)?)
+}
+
+async fn reject(
+    nc: &Client,
+    request_claims: &AuthorizationRequestClaims,
+    config: &AuthCalloutConfig,
+    reply: async_nats::Subject,
+    reason: &str,
+) -> anyhow::Result<()> {
+    let response = AuthorizationResponseBuilder::new(&request_claims.nats.user_nkey)
+        .audience(&request_claims.nats.server_id.id)
+        .issuer(&config.issuer_kp)
+        .with_error(reason)
+        .sign(&config.issuer_kp)?;
+    nc.publish(reply, response.into()).await?;
+    nc.flush().await?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::permissions::{PermissionSubjects, PermissionsConfig};
+    use crate::zitadel::ZitadelValidator;
+    use nats_jwt::algorithm::decode;
+    use nats_jwt::claims::user::UserClaims;
+    use nkeys::KeyPair;
+    use serde_json::json;
+    use std::collections::HashMap;
+    use std::sync::Arc;
+    use tokio::sync::RwLock;
+
+    fn test_config() -> AuthCalloutConfig {
+        AuthCalloutConfig::builder()
+            .nats_url("nats://localhost:4222")
+            .issuer_kp(KeyPair::new_account())
+            .target_account("DEVICES")
+            .oidc_issuer_url("http://localhost")
+            .oidc_audience("test-aud")
+            .build()
+            .unwrap()
+    }
+
+    #[test]
+    fn build_user_jwt_carries_interpolated_permissions() {
+        let config = test_config();
+        let user_kp = KeyPair::new_user();
+        let perms = InterpolatedPermissions {
+            pub_allow: vec!["device-state.sensor-1".into()],
+            pub_deny: vec![],
+            sub_allow: vec!["device-commands.sensor-1".into()],
+            sub_deny: vec![],
+        };
+
+        let jwt =
+            build_user_jwt(&user_kp.public_key(), "sensor-1", &perms, &config).expect("sign user");
+
+        let claims: UserClaims = decode(&jwt).expect("decode user jwt");
+        assert_eq!(claims.claims_data.sub, user_kp.public_key());
+        assert_eq!(claims.claims_data.aud, "DEVICES");
+        assert_eq!(claims.claims_data.name.as_deref(), Some("sensor-1"));
+        assert_eq!(
+            claims.nats.pub_perm.allow.as_ref().expect("pub_allow set")[0],
+            "device-state.sensor-1"
+        );
+        assert_eq!(
+            claims.nats.sub_perm.allow.as_ref().expect("sub_allow set")[0],
+            "device-commands.sensor-1"
+        );
+    }
+
+    #[test]
+    fn build_user_jwt_with_deny_lists_emits_them() {
+        let config = test_config();
+        let user_kp = KeyPair::new_user();
+        let perms = InterpolatedPermissions {
+            pub_allow: vec![">".into()],
+            pub_deny: vec!["secret.>".into()],
+            sub_allow: vec![">".into()],
+            sub_deny: vec!["secret.>".into()],
+        };
+
+        let jwt =
+            build_user_jwt(&user_kp.public_key(), "ignored", &perms, &config).expect("sign user");
+        let claims: UserClaims = decode(&jwt).expect("decode");
+
+        assert_eq!(
+            claims.nats.pub_perm.deny.as_ref().expect("pub_deny set")[0],
+            "secret.>"
+        );
+        assert_eq!(
+            claims.nats.sub_perm.deny.as_ref().expect("sub_deny set")[0],
+            "secret.>"
+        );
+    }
+
+    #[test]
+    fn build_user_jwt_target_account_drives_audience() {
+        // The audience MUST match the NATS server's configured callout
+        // account; otherwise NATS rejects the response.
+        let mut cfg = test_config();
+        cfg.target_account = "ACME".to_string();
+
+        let user_kp = KeyPair::new_user();
+        let jwt = build_user_jwt(
+            &user_kp.public_key(),
+            "x",
+            &InterpolatedPermissions {
+                pub_allow: vec![],
+                pub_deny: vec![],
+                sub_allow: vec![],
+                sub_deny: vec![],
+            },
+            &cfg,
+        )
+        .unwrap();
+        let claims: UserClaims = decode(&jwt).unwrap();
+        assert_eq!(claims.claims_data.aud, "ACME");
+    }
+
+    #[test]
+    fn admin_default_grants_full_access_after_interpolation() {
+        // Admin permissions don't carry `{device_id}` placeholders, so
+        // interpolation must be a no-op and the resulting subjects must
+        // be `>` (NATS wildcard for "everything").
+        let perms = interpolate_permissions(&PermissionsConfig::admin_default(), "any-id");
+        assert_eq!(perms.pub_allow, vec![">"]);
+        assert_eq!(perms.sub_allow, vec![">"]);
+    }
+
+    #[test]
+    fn empty_permissions_block_results_in_no_allow_or_deny() {
+        let empty = PermissionsConfig {
+            r#pub: PermissionSubjects::default(),
+            sub: PermissionSubjects::default(),
+        };
+        let perms = interpolate_permissions(&empty, "x");
+        assert!(perms.pub_allow.is_empty());
+        assert!(perms.pub_deny.is_empty());
+        assert!(perms.sub_allow.is_empty());
+        assert!(perms.sub_deny.is_empty());
+    }
+
+    #[test]
+    fn multiple_device_id_placeholders_in_one_subject_are_all_replaced() {
+        let cfg = PermissionsConfig {
+            r#pub: PermissionSubjects {
+                allow: vec!["{device_id}.{device_id}.event".to_string()],
+                deny: vec![],
+            },
+            sub: PermissionSubjects::default(),
+        };
+        let perms = interpolate_permissions(&cfg, "abc");
+        assert_eq!(perms.pub_allow, vec!["abc.abc.event"]);
+    }
+
+    // ----------------------------------------------------------------
+    // decide() — every reachable branch of the security decision tree
+    // ----------------------------------------------------------------
+
+    /// Build a `ZitadelValidator` whose `extract_device_id`/`extract_roles`
+    /// surface area is enough for `decide` — it never needs network or
+    /// signing keys for this code path. We hand-stuff the internal fields
+    /// the same way the live constructor would, just empty.
+    fn validator_for_decide(device_id_claim: &str) -> ZitadelValidator {
+        ZitadelValidator {
+            issuer_url: "https://issuer.example".to_string(),
+            audience: "aud".to_string(),
+            device_id_claim: device_id_claim.to_string(),
+            device_id_prefix_strip: String::new(),
+            http: reqwest::Client::new(),
+            keys: Arc::new(RwLock::new(HashMap::new())),
+        }
+    }
+
+    fn claims_with(device_id: serde_json::Value, roles: serde_json::Value) -> ZitadelClaims {
+        let mut extra = HashMap::new();
+        if !device_id.is_null() {
+            extra.insert("device_id".to_string(), device_id);
+        }
+        if !roles.is_null() {
+            extra.insert("urn:zitadel:iam:org:project:roles".to_string(), roles);
+        }
+        ZitadelClaims {
+            iss: "https://issuer.example".to_string(),
+            sub: "user-1".to_string(),
+            aud: json!("aud"),
+            exp: 0,
+            iat: 0,
+            extra,
+        }
+    }
+
+    fn cfg_with_defaults() -> AuthCalloutConfig {
+        AuthCalloutConfig::builder()
+            .nats_url("nats://x")
+            .issuer_kp(KeyPair::new_account())
+            .oidc_issuer_url("https://issuer.example")
+            .oidc_audience("aud")
+            .build()
+            .unwrap()
+    }
+
+    fn role_map(role: &str) -> serde_json::Value {
+        json!({ role: { "test-org": "Org" } })
+    }
+
+    #[test]
+    fn decide_authorizes_admin_role_with_full_perms() {
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        let claims = claims_with(json!("ops-1"), role_map("fleet-admin"));
+
+        match decide(&claims, &cfg, &v) {
+            Decision::Authorize {
+                device_id,
+                role,
+                perms,
+            } => {
+                assert_eq!(device_id, "ops-1");
+                assert_eq!(role, ResolvedRole::Admin);
+                assert_eq!(perms.pub_allow, vec![">"]);
+                assert_eq!(perms.sub_allow, vec![">"]);
+            }
+            other => panic!("expected Authorize(admin), got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn decide_authorizes_device_role_with_interpolated_perms() {
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        let claims = claims_with(json!("sensor-7"), role_map("device"));
+
+        match decide(&claims, &cfg, &v) {
+            Decision::Authorize {
+                device_id,
+                role,
+                perms,
+            } => {
+                assert_eq!(device_id, "sensor-7");
+                assert_eq!(role, ResolvedRole::Device);
+                assert!(
+                    perms.pub_allow.iter().any(|s| s == "device-state.sensor-7"),
+                    "device_id must be interpolated into pub_allow: {:?}",
+                    perms.pub_allow
+                );
+                assert!(
+                    perms
+                        .sub_allow
+                        .iter()
+                        .any(|s| s == "device-commands.sensor-7"),
+                    "device_id must be interpolated into sub_allow: {:?}",
+                    perms.sub_allow
+                );
+            }
+            other => panic!("expected Authorize(device), got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn decide_admin_wins_when_user_has_both_roles() {
+        // Privilege escalation invariant: a user enrolled as both
+        // fleet-admin and device must not be silently downgraded.
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        let roles = json!({
+            "fleet-admin": { "org": "Org" },
+            "device": { "org": "Org" }
+        });
+        let claims = claims_with(json!("ops-and-device"), roles);
+
+        match decide(&claims, &cfg, &v) {
+            Decision::Authorize { role, perms, .. } => {
+                assert_eq!(role, ResolvedRole::Admin);
+                assert_eq!(perms.pub_allow, vec![">"]);
+            }
+            other => panic!("expected Authorize(admin), got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn decide_rejects_when_no_role_present() {
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        let claims = claims_with(json!("user-1"), role_map("some-other-role"));
+
+        assert!(matches!(
+            decide(&claims, &cfg, &v),
+            Decision::Reject(RejectReason::NoAuthorizedRole)
+        ));
+    }
+
+    #[test]
+    fn decide_rejects_when_roles_claim_absent_entirely() {
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        let claims = claims_with(json!("user-1"), serde_json::Value::Null);
+
+        assert!(matches!(
+            decide(&claims, &cfg, &v),
+            Decision::Reject(RejectReason::NoAuthorizedRole)
+        ));
+    }
+
+    #[test]
+    fn decide_rejects_when_device_id_claim_missing() {
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        let claims = claims_with(serde_json::Value::Null, role_map("device"));
+
+        match decide(&claims, &cfg, &v) {
+            Decision::Reject(RejectReason::DeviceIdMissing(p)) => assert_eq!(p, "device_id"),
+            other => panic!("expected DeviceIdMissing, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn decide_rejects_when_device_id_is_not_a_string() {
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        let claims = claims_with(json!(42), role_map("device"));
+
+        assert!(matches!(
+            decide(&claims, &cfg, &v),
+            Decision::Reject(RejectReason::DeviceIdNotString(_))
+        ));
+    }
+
+    #[test]
+    fn decide_rejects_device_id_with_subject_metacharacters() {
+        // Critical security gate: a malicious or buggy issuer that emits
+        // device_id="x.>" must NOT pass through to permissions
+        // interpolation. Each tested character would otherwise grant
+        // wildcard access on `device-state.x.<anything>`.
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        for evil in [".", "*", ">", " ", "a.b", "a*b", "a>b", "a b", ""] {
+            let claims = claims_with(json!(evil), role_map("device"));
+            let decision = decide(&claims, &cfg, &v);
+            assert!(
+                matches!(
+                    decision,
+                    Decision::Reject(
+                        RejectReason::DeviceIdUnsafe(_) | RejectReason::DeviceIdMissing(_)
+                    )
+                ),
+                "evil device_id {evil:?} must reject, got {decision:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn decide_rejects_runs_first_on_unsafe_device_id_even_when_role_is_admin() {
+        // Defense in depth: the device_id validation runs even for admin
+        // role, so a Zitadel mis-mapping that puts ".." into a
+        // fleet-admin user's device_id can't elevate via the {device_id}
+        // template (admin perms don't use it today, but the assertion
+        // protects future configurations that might).
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        let claims = claims_with(json!("ops.>"), role_map("fleet-admin"));
+
+        assert!(matches!(
+            decide(&claims, &cfg, &v),
+            Decision::Reject(RejectReason::DeviceIdUnsafe(_))
+        ));
+    }
+
+    #[test]
+    fn decide_honours_custom_role_names_from_config() {
+        let cfg = AuthCalloutConfig::builder()
+            .nats_url("nats://x")
+            .issuer_kp(KeyPair::new_account())
+            .oidc_issuer_url("https://x")
+            .oidc_audience("y")
+            .admin_role("super-user")
+            .device_role("iot-thing")
+            .build()
+            .unwrap();
+        let v = validator_for_decide("device_id");
+
+        let su = claims_with(json!("svc"), role_map("super-user"));
+        match decide(&su, &cfg, &v) {
+            Decision::Authorize { role, .. } => assert_eq!(role, ResolvedRole::Admin),
+            other => panic!("expected Admin, got {other:?}"),
+        }
+
+        let iot = claims_with(json!("svc"), role_map("iot-thing"));
+        match decide(&iot, &cfg, &v) {
+            Decision::Authorize { role, .. } => assert_eq!(role, ResolvedRole::Device),
+            other => panic!("expected Device, got {other:?}"),
+        }
+
+        // The default role names must NOT match when custom names are set.
+        let stale = claims_with(json!("svc"), role_map("fleet-admin"));
+        assert!(matches!(
+            decide(&stale, &cfg, &v),
+            Decision::Reject(RejectReason::NoAuthorizedRole)
+        ));
+    }
+
+    #[test]
+    fn decide_handles_array_shape_roles_claim() {
+        // OIDC providers other than Zitadel emit roles as a string array.
+        // The validator's extract_roles already handles both shapes; this
+        // test confirms decide() propagates that correctly.
+        let cfg = cfg_with_defaults();
+        let v = validator_for_decide("device_id");
+        let mut extra = HashMap::new();
+        extra.insert("device_id".to_string(), json!("sensor-1"));
+        extra.insert(
+            "urn:zitadel:iam:org:project:roles".to_string(),
+            json!(["device", "viewer"]),
+        );
+        let claims = ZitadelClaims {
+            iss: "https://issuer.example".to_string(),
+            sub: "user".to_string(),
+            aud: json!("aud"),
+            exp: 0,
+            iat: 0,
+            extra,
+        };
+
+        match decide(&claims, &cfg, &v) {
+            Decision::Authorize { role, .. } => assert_eq!(role, ResolvedRole::Device),
+            other => panic!("expected Device from array roles, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn decide_uses_sub_claim_when_device_id_claim_path_is_sub() {
+        let mut cfg = cfg_with_defaults();
+        cfg.device_id_claim = "sub".to_string();
+        let v = validator_for_decide("sub");
+        // No device_id key in extra; sub is the JWT subject.
+        let mut extra = HashMap::new();
+        extra.insert(
+            "urn:zitadel:iam:org:project:roles".to_string(),
+            role_map("device"),
+        );
+        let claims = ZitadelClaims {
+            iss: "https://issuer.example".to_string(),
+            sub: "sensor-from-sub".to_string(),
+            aud: json!("aud"),
+            exp: 0,
+            iat: 0,
+            extra,
+        };
+
+        match decide(&claims, &cfg, &v) {
+            Decision::Authorize { device_id, .. } => assert_eq!(device_id, "sensor-from-sub"),
+            other => panic!("expected Authorize, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn decide_uses_nested_dotted_device_id_path() {
+        let mut cfg = cfg_with_defaults();
+        cfg.device_id_claim = "metadata.hardware.id".to_string();
+        let v = validator_for_decide("metadata.hardware.id");
+        let mut extra = HashMap::new();
+        extra.insert(
+            "metadata".to_string(),
+            json!({ "hardware": { "id": "esp32-1" } }),
+        );
+        extra.insert(
+            "urn:zitadel:iam:org:project:roles".to_string(),
+            role_map("device"),
+        );
+        let claims = ZitadelClaims {
+            iss: "https://issuer.example".to_string(),
+            sub: "user".to_string(),
+            aud: json!("aud"),
+            exp: 0,
+            iat: 0,
+            extra,
+        };
+
+        match decide(&claims, &cfg, &v) {
+            Decision::Authorize { device_id, .. } => assert_eq!(device_id, "esp32-1"),
+            other => panic!("expected Authorize, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn reject_reason_display_is_actionable() {
+        // Operators read this string in NATS server logs when a callout
+        // rejects. It must name the failure category clearly.
+        assert_eq!(
+            RejectReason::NoAuthorizedRole.to_string(),
+            "no authorized role in token"
+        );
+        assert!(
+            RejectReason::DeviceIdMissing("device_id".to_string())
+                .to_string()
+                .contains("device_id")
+        );
+        let unsafe_msg =
+            RejectReason::DeviceIdUnsafe(crate::roles::DeviceIdError::Empty).to_string();
+        assert!(
+            unsafe_msg.contains("empty"),
+            "unsafe message must explain why: {unsafe_msg}"
+        );
+    }
+}
--- a/nats/callout/src/lib.rs
+++ b/nats/callout/src/lib.rs
@@ -0,0 +1,17 @@
+pub mod config;
+pub mod handler;
+pub mod permissions;
+pub mod roles;
+pub mod service;
+pub mod zitadel;
+
+pub use config::{
+    AuthCalloutConfig, AuthCalloutConfigBuilder, DEFAULT_ADMIN_ROLE, DEFAULT_DEVICE_ROLE,
+    DEFAULT_ROLES_CLAIM,
+};
+pub use permissions::{
+    InterpolatedPermissions, PermissionSubjects, PermissionsConfig, interpolate_permissions,
+};
+pub use roles::{DeviceIdError, ResolvedRole, resolve as resolve_role, validate_device_id};
+pub use service::AuthCalloutService;
+pub use zitadel::{ZitadelClaims, ZitadelValidationError, ZitadelValidator};
--- a/nats/callout/src/main.rs
+++ b/nats/callout/src/main.rs
@@ -0,0 +1,153 @@
+//! Standalone NATS auth callout service binary.
+//!
+//! Configuration is read from environment variables. The service runs until
+//! it receives SIGINT or SIGTERM, or its NATS subscription closes.
+//!
+//! ## Required env vars
+//!
+//! - `NATS_URL` — NATS server to connect to (e.g. `nats://nats:4222`).
+//! - `OIDC_ISSUER_URL` — OIDC issuer (e.g. `https://auth.example.com`).
+//! - `OIDC_AUDIENCE` — expected `aud` claim in inbound user JWTs.
+//! - One of `ISSUER_NKEY_SEED_FILE` (path to a file containing the seed) or
+//!   `ISSUER_NKEY_SEED` (raw seed string `SAA...`). The file form is preferred
+//!   when running in K8s with a mounted secret.
+//!
+//! ## Optional env vars
+//!
+//! - `NATS_AUTH_USER` (default `auth`) — service's NATS account user.
+//! - `NATS_AUTH_PASS_FILE` / `NATS_AUTH_PASS` (default `auth`) — service's password.
+//! - `TARGET_ACCOUNT` (default `DEVICES`) — account name issued users land in.
+//! - `DEVICE_ID_CLAIM` (default `device_id`) — JSON path to device identifier.
+//! - `DEVICE_ID_PREFIX_STRIP` (default empty) — prefix stripped from the
+//!   extracted device id before permission interpolation. Set to `device-`
+//!   when consuming Zitadel's `client_id` claim with the
+//!   `device-{device_id}` machine-user naming convention.
+//! - `ROLES_CLAIM` (default Zitadel URN) — JSON path to roles claim.
+//! - `ADMIN_ROLE` (default `fleet-admin`) — role granting unrestricted perms.
+//! - `DEVICE_ROLE` (default `device`) — role granting per-device perms.
+//! - `DANGER_ACCEPT_INVALID_CERTS` (`true` for local dev with self-signed certs).
+//! - `RUST_LOG` (default `info`) — tracing filter.
+
+use std::env;
+use std::fs;
+
+use anyhow::{Context, Result};
+use harmony_nats_callout::{
+    AuthCalloutConfig, AuthCalloutService, DEFAULT_ADMIN_ROLE, DEFAULT_DEVICE_ROLE,
+    DEFAULT_ROLES_CLAIM,
+};
+use nkeys::KeyPair;
+use tracing::{error, info};
+use tracing_subscriber::EnvFilter;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
+    tracing_subscriber::fmt().with_env_filter(filter).init();
+
+    let config = load_config_from_env().context("loading auth callout config from environment")?;
+
+    info!(
+        nats_url = %config.nats_url,
+        oidc_issuer = %config.oidc_issuer_url,
+        target_account = %config.target_account,
+        admin_role = %config.admin_role,
+        device_role = %config.device_role,
+        "starting harmony NATS auth callout"
+    );
+
+    let service = AuthCalloutService::new(config);
+
+    tokio::select! {
+        result = service.run() => {
+            if let Err(e) = result {
+                error!(error = %e, "auth callout service exited with error");
+                return Err(e);
+            }
+        }
+        _ = shutdown_signal() => {
+            info!("shutdown signal received, exiting");
+        }
+    }
+
+    Ok(())
+}
+
+fn load_config_from_env() -> Result<AuthCalloutConfig> {
+    let nats_url = require_env("NATS_URL")?;
+    let oidc_issuer_url = require_env("OIDC_ISSUER_URL")?;
+    let oidc_audience = require_env("OIDC_AUDIENCE")?;
+
+    let auth_user = env::var("NATS_AUTH_USER").unwrap_or_else(|_| "auth".to_string());
+    let auth_pass = read_secret("NATS_AUTH_PASS").unwrap_or_else(|| "auth".to_string());
+
+    let issuer_seed = read_secret("ISSUER_NKEY_SEED").ok_or_else(|| {
+        anyhow::anyhow!(
+            "issuer NKey seed is required: set ISSUER_NKEY_SEED_FILE (preferred) or ISSUER_NKEY_SEED"
+        )
+    })?;
+    let issuer_kp = KeyPair::from_seed(issuer_seed.trim())
+        .map_err(|e| anyhow::anyhow!("invalid ISSUER_NKEY_SEED: {e}"))?;
+
+    let target_account = env::var("TARGET_ACCOUNT").unwrap_or_else(|_| "DEVICES".to_string());
+    let device_id_claim = env::var("DEVICE_ID_CLAIM").unwrap_or_else(|_| "device_id".to_string());
+    let device_id_prefix_strip = env::var("DEVICE_ID_PREFIX_STRIP").unwrap_or_default();
+    let roles_claim = env::var("ROLES_CLAIM").unwrap_or_else(|_| DEFAULT_ROLES_CLAIM.to_string());
+    let admin_role = env::var("ADMIN_ROLE").unwrap_or_else(|_| DEFAULT_ADMIN_ROLE.to_string());
+    let device_role = env::var("DEVICE_ROLE").unwrap_or_else(|_| DEFAULT_DEVICE_ROLE.to_string());
+
+    let danger_accept_invalid_certs = env::var("DANGER_ACCEPT_INVALID_CERTS")
+        .ok()
+        .map(|v| matches!(v.trim().to_ascii_lowercase().as_str(), "1" | "true" | "yes"))
+        .unwrap_or(false);
+
+    AuthCalloutConfig::builder()
+        .nats_url(nats_url)
+        .auth_user(auth_user)
+        .auth_pass(auth_pass)
+        .issuer_kp(issuer_kp)
+        .target_account(target_account)
+        .oidc_issuer_url(oidc_issuer_url)
+        .oidc_audience(oidc_audience)
+        .device_id_claim(device_id_claim)
+        .device_id_prefix_strip(device_id_prefix_strip)
+        .roles_claim(roles_claim)
+        .admin_role(admin_role)
+        .device_role(device_role)
+        .danger_accept_invalid_certs(danger_accept_invalid_certs)
+        .build()
+}
+
+fn require_env(name: &str) -> Result<String> {
+    env::var(name).map_err(|_| anyhow::anyhow!("required env var {name} is not set"))
+}
+
+/// Read a secret-style value: prefer `<NAME>_FILE` (path to a mounted secret)
+/// over `<NAME>` (raw value) so K8s secret mounts are first-class.
+fn read_secret(name: &str) -> Option<String> {
+    if let Ok(path) = env::var(format!("{name}_FILE")) {
+        match fs::read_to_string(&path) {
+            Ok(s) => return Some(s),
+            Err(e) => {
+                error!(path = %path, error = %e, "failed to read secret file");
+            }
+        }
+    }
+    env::var(name).ok()
+}
+
+#[cfg(unix)]
+async fn shutdown_signal() {
+    use tokio::signal::unix::{SignalKind, signal};
+    let mut sigterm = signal(SignalKind::terminate()).expect("install SIGTERM handler");
+    let mut sigint = signal(SignalKind::interrupt()).expect("install SIGINT handler");
+    tokio::select! {
+        _ = sigterm.recv() => {},
+        _ = sigint.recv() => {},
+    }
+}
+
+#[cfg(not(unix))]
+async fn shutdown_signal() {
+    let _ = tokio::signal::ctrl_c().await;
+}
--- a/Show More
+++ b/Show More