feat: scaffold IoT walking skeleton — podman module, operator, and agent #264

Merged
johnride merged 210 commits from feat/iot-walking-skeleton into master 2026-05-22 22:16:18 +00:00
402 changed files with 46428 additions and 1915 deletions

View File

@@ -6,3 +6,6 @@ rustflags = ["-C", "link-arg=-Wl,--stack,8000000"]
[target.aarch64-unknown-linux-gnu]
linker = "aarch64-linux-gnu-gcc"
[profile.test]
debug = 0

View File

@@ -1,6 +1,64 @@
# Build context filter for `podman build`. The bare invocations in
# fleet/scripts/build_and_push_images.sh use the workspace root as
# context (cargo workspace path-deps require it). Without this list,
# the context tar would carry tens of GB of build artifacts, agent
# worktrees, and demo blobs to the build daemon for every image.
#
# Pattern semantics (Docker/Podman): no leading slash → matches at
# any depth. `**/foo` is the explicit recursive form (some older
# implementations require it).
# ---- Cargo build outputs (the bulk: ~100 GB combined) ---------------
target/
**/target/
# ---- VCS + tooling caches (4-40 GB) --------------------------------
.git/
.gitignore
.gitattributes
.claude/
.idea/
.vscode/
.cargo/
# ---- Local-only debug / demo artifacts -----------------------------
data/
demos/
manual_mint/
# ---- Cluster + cloud-image blobs (.qcow2 etc. easily exceed 1 GB) ---
*.qcow2
*.iso
*.img
*.tar
*.tar.gz
*.tgz
**/cloud-images/
**/kvm/pool/
# ---- Test outputs / databases --------------------------------------
*.sqlite
*.sqlite-journal
*.log
**/previous_runs/
**/reports/
# ---- Python venvs that may sneak in via the manual-mint helper -----
venv/
.venv/
__pycache__/
*.pyc
# ---- JS that could land via a docs/site preview --------------------
node_modules/
# ---- Build context noise -------------------------------------------
Dockerfile
.git
data
target
demos
**/Dockerfile.dev
docker-compose*.yml
.dockerignore
# ---- OS / editor ----------------------------------------------------
.DS_Store
*.swp
*~

7
.env.example Normal file
View File

@@ -0,0 +1,7 @@
FLEET_AUTH_ISSUER_URL=
FLEET_AUTH_AUTHORIZE_URL=
FLEET_AUTH_TOKEN_URL=
FLEET_AUTH_CLIENT_ID=
FLEET_AUTH_REDIRECT_URI=
FLEET_AUTH_SCOPE=
FLEET_AUTH_TRUSTED_AUDIENCES=

View File

@@ -0,0 +1,44 @@
name: Build and push harmony-fleet-operator image
on:
push:
branches:
- master
workflow_dispatch:
jobs:
build_and_push:
container:
image: hub.nationtech.io/harmony/harmony_composer:latest
runs-on: dind
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Log in to hub.nationtech.io
uses: docker/login-action@v3
with:
registry: hub.nationtech.io
username: ${{ secrets.HUB_BOT_USER }}
password: ${{ secrets.HUB_BOT_PASSWORD }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
# Build context is the workspace root because the operator's
# Cargo.toml has `path = "../../harmony"` deps. The multi-stage
# Dockerfile runs `cargo build` itself inside a pinned rust
# image, so no host-side cargo step is needed.
#
# TODO: add buildx layer caching. Each run currently recompiles
# the whole `harmony` workspace from scratch in the builder
# stage. Add `cache-from: type=gha` + `cache-to: type=gha,mode=max`
# below once build time becomes the bottleneck. If layer cache
# alone isn't enough, consider splitting the Dockerfile with
# cargo-chef (no other crate in this repo does that yet).
- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
file: fleet/harmony-fleet-operator/Dockerfile
push: true
tags: hub.nationtech.io/harmony/harmony-fleet-operator:latest

11
.gitignore vendored
View File

@@ -1,10 +1,15 @@
### General ###
private_repos/
.env
### Harmony ###
harmony.log
data/okd/installation_files*
# Compiled tailwind output for the operator's maud+htmx frontend.
# Source is `fleet/harmony-fleet-operator/style/input.css`.
fleet/harmony-fleet-operator/style/dist/
### Helm ###
# Chart dependencies
**/charts/*.tgz
@@ -32,3 +37,9 @@ ignore
# Generated book
book
# Scratch and agent worktrees — never commit
.claude/
ui-idea.md
ROADMAP/00-priority-matrix.md
fleet/harmony-fleet-agent/agent-config.toml

171
AGENTS.md Normal file
View File

@@ -0,0 +1,171 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Build & Test Commands
```bash
# Full CI check (check + fmt + clippy + test)
./build/check.sh
# Individual commands
cargo check --all-targets --all-features --keep-going
cargo fmt --check # Check formatting
cargo clippy # Lint
cargo test # Run all tests
# Run a single test
cargo test -p <crate_name> <test_name>
# Run a specific example
cargo run -p <example_crate_name>
# Build the mdbook documentation
mdbook build
```
## What Harmony Is
Harmony is the orchestration framework powering NationTech's vision of **decentralized micro datacenters** — small computing clusters deployed in homes, offices, and community spaces instead of hyperscaler facilities. The goal: make computing cleaner, more resilient, locally beneficial, and resistant to centralized points of failure (including geopolitical threats).
Harmony exists because existing IaC tools (Terraform, Ansible, Helm) are trapped in a **YAML mud pit**: static configuration files validated only at runtime, fragmented across tools, with errors surfacing at 3 AM instead of at compile time. Harmony replaces this entire class of tools with a single Rust codebase where **the compiler catches infrastructure misconfigurations before anything is deployed**.
This is not a wrapper around existing tools. It is a paradigm shift: infrastructure-as-real-code with compile-time safety guarantees that no YAML/HCL/DSL-based tool can provide.
## The Score-Topology-Interpret Pattern
This is the core design pattern. Understand it before touching the codebase.
**Score** — declarative desired state. A Rust struct generic over `T: Topology` that describes *what* you want (e.g., "a PostgreSQL cluster", "DNS records for these hosts"). Scores are serializable, cloneable, idempotent.
**Topology** — infrastructure capabilities. Represents *where* things run and *what the environment can do*. Exposes capabilities as traits (`DnsServer`, `K8sclient`, `HelmCommand`, `LoadBalancer`, `Firewall`, etc.). Examples: `K8sAnywhereTopology` (local K3D or any K8s cluster), `HAClusterTopology` (bare-metal HA with redundant firewalls/switches).
**Interpret** — execution glue. Translates a Score into concrete operations against a Topology's capabilities. Returns an `Outcome` (SUCCESS, NOOP, FAILURE, RUNNING, QUEUED, BLOCKED).
**The key insight — compile-time safety through trait bounds:**
```rust
impl<T: Topology + DnsServer + DhcpServer> Score<T> for DnsScore { ... }
```
The compiler rejects any attempt to use `DnsScore` with a Topology that doesn't implement `DnsServer` and `DhcpServer`. Invalid infrastructure configurations become compilation errors, not runtime surprises.
**Higher-order topologies** compose transparently:
- `FailoverTopology<T>` — primary/replica orchestration
- `DecentralizedTopology<T>` — multi-site coordination
If `T: PostgreSQL`, then `FailoverTopology<T>: PostgreSQL` automatically via blanket impls. Zero boilerplate.
## Architecture (Hexagonal)
```
harmony/src/
├── domain/ # Core domain — the heart of the framework
│ ├── score.rs # Score trait (desired state)
│ ├── topology/ # Topology trait + implementations
│ ├── interpret/ # Interpret trait + InterpretName enum (25+ variants)
│ ├── inventory/ # Physical infrastructure metadata (hosts, switches, mgmt interfaces)
│ ├── executors/ # Executor trait definitions
│ └── maestro/ # Orchestration engine (registers scores, manages topology state, executes)
├── infra/ # Infrastructure adapters (driven ports)
│ ├── opnsense/ # OPNsense firewall adapter
│ ├── brocade.rs # Brocade switch adapter
│ ├── kube.rs # Kubernetes executor
│ └── sqlx.rs # Database executor
└── modules/ # Concrete deployment modules (23+)
├── k8s/ # Kubernetes (namespaces, deployments, ingress)
├── postgresql/ # CloudNativePG clusters + multi-site failover
├── okd/ # OpenShift bare-metal from scratch
├── helm/ # Helm chart inflation → vanilla K8s YAML
├── opnsense/ # OPNsense (DHCP, DNS, etc.)
├── monitoring/ # Prometheus, Alertmanager, Grafana
├── kvm/ # KVM virtual machine management
├── network/ # Network services (iPXE, TFTP, bonds)
└── ...
```
Domain types to know: `Inventory` (read-only physical infra context), `Maestro<T>` (orchestrator — calls `topology.ensure_ready()` then executes scores), `Outcome` / `InterpretError` (execution results).
## Key Crates
| Crate | Purpose |
|---|---|
| `harmony` | Core framework: domain, infra adapters, deployment modules |
| `harmony_cli` | CLI + optional TUI (`--features tui`) |
| `harmony_config` | Unified config+secret management (env → SQLite → OpenBao → interactive prompt) |
| `harmony_secret` / `harmony_secret_derive` | Secret backends (LocalFile, OpenBao, Infisical) |
| `harmony_execution` | Execution engine |
| `harmony_agent` / `harmony_inventory_agent` | Persistent agent framework (NATS JetStream mesh), hardware discovery |
| `harmony_assets` | Asset management (URLs, local cache, S3) |
| `harmony_composer` | Infrastructure composition tool |
| `harmony-k8s` | Kubernetes utilities |
| `k3d` | Local K3D cluster management |
| `brocade` | Brocade network switch integration |
## OPNsense Crates
The `opnsense-codegen` and `opnsense-api` crates exist because OPNsense's automation ecosystem is poor — no typed API client exists. These are support crates, not the core of Harmony.
- `opnsense-codegen`: XML model files → IR → Rust structs with serde helpers for OPNsense wire format quirks (`opn_bool` for "0"/"1" strings, `opn_u16`/`opn_u32` for string-encoded numbers). Vendor sources are git submodules under `opnsense-codegen/vendor/`.
- `opnsense-api`: Hand-written `OpnsenseClient` + generated model types in `src/generated/`.
## Key Design Decisions (ADRs in docs/adr/)
- **ADR-001**: Rust chosen for type system, refactoring safety, and performance
- **ADR-002**: Hexagonal architecture — domain isolated from adapters
- **ADR-003**: Infrastructure abstractions at domain level, not provider level (no vendor lock-in)
- **ADR-005**: Custom Rust DSL over YAML/Score-spec — real language, Cargo deps, composable
- **ADR-007**: K3D as default runtime (K8s-certified, lightweight, cross-platform)
- **ADR-009**: Helm charts inflated to vanilla K8s YAML, then deployed via existing code paths
- **ADR-015**: Higher-order topologies via blanket trait impls (zero-cost composition)
- **ADR-016**: Agent-based architecture with NATS JetStream for real-time failover and distributed consensus
- **ADR-020**: Unified config+secret management — Rust struct is the schema, resolution chain: env → store → prompt
- **ADR-023**: Deploy architecture — Scores everywhere (incl. tests), per-component `*-deploy` crates, deploy blocks on smoke-test, topologies are compile-time
## Capability and Score Design Rules
**Capabilities are industry concepts, not tools.** A capability trait represents a standard infrastructure need (e.g., `DnsServer`, `LoadBalancer`, `Router`, `CertificateManagement`) that can be fulfilled by different products. OPNsense provides `DnsServer` today; CoreDNS or Route53 could provide it tomorrow. Scores must not break when the backend changes.
**Exception:** When the developer fundamentally needs to know the implementation. `PostgreSQL` is a capability (not `Database`) because the developer writes PostgreSQL-specific SQL and replication configs. Swapping to MariaDB would break the application, not just the infrastructure.
**Test:** If you could swap the underlying tool without rewriting any Score that uses the capability, the boundary is correct.
**Don't name capabilities after tools.** `SecretVault` not `OpenbaoStore`. `IdentityProvider` not `ZitadelAuth`. Think: what is the core developer need that leads to using this tool?
**Scores encapsulate operational complexity.** Move procedural knowledge (init sequences, retry logic, distribution-specific config) into Scores. A high-level example should be ~15 lines, not ~400 lines of imperative orchestration.
**Scores must be idempotent.** Running twice = same result as once. Use create-or-update, handle "already exists" gracefully.
**Scores must not depend on execution order.** Declare capability requirements via trait bounds, don't assume another Score ran first. If Score B needs what Score A provides, Score B should declare that capability as a trait bound.
See `docs/guides/writing-a-score.md` for the full guide.
## Deploy Architecture (ADR-023)
The Score-Topology-Interpret pattern above tells you how to **describe** a deployment. The rules below tell you how to **ship** one. These are non-negotiable.
**Deploy with Scores, not handrolled manifests.** No `k8s_openapi::api::*` structs outside of `Score::interpret` bodies. CLIs, examples, and **test harnesses** all compose `*Score` types — they never reimplement deploys. If you find yourself building `Deployment` / `Service` / `ConfigMap` structs in a test harness, stop: that's the YAML-mud-pit anti-pattern in Rust clothing. Reach for the existing Score, or write a missing Score in the right deploy crate.
**E2E uses the same Scores as production.** Only the `Topology` instance changes (local k3d, remote OKD, bare-metal HA). A test harness is a Score-composer running against a test Topology. If e2e needs something prod doesn't, add the knob to the Score — don't fork the manifest in the harness.
**One Score per deployable component.** Composition is the user-facing primitive: `MyAppScore` pulls in `PostgresScore`, `HttpServerScore`, etc. Don't build monolithic "deploy everything" Scores; build small testable ones and compose upward.
**Deploy returns only after smoke-test success.** Every Score owns a readiness + smoke-test contract that the framework runs and blocks on. `helm install && hope` is the anti-pattern harmony exists to fix. Convergence errors must be actionable in the style of `rustc`'s error messages, not "exit code 1 from helm". (The implementation shape of the smoke-test contract is open; the principle is locked in.)
**Deploy logic lives in a `*-deploy` crate** that depends on both `harmony` and the runtime crate. Runtime binaries (the thing that ships to constrained devices and to in-cluster pods) stay free of the `harmony` dep. Pattern: `harmony_agent/deploy`, `fleet/harmony-fleet-deploy`. *Each app area gets one deploy crate that holds every component's Score plus a `main.rs` driven by `harmony_cli` that selects which component to deploy.*
**Topologies are compile-time, selected at runtime.** A deploy binary statically lists its supported topologies; the user picks one at deploy time. Adding a new topology backend is a rebuild — that's an acceptable cost because dynamic-discovery topologies like `K8sAnywhere` already cover "any physical place that runs k8s". No `Box<dyn Topology>` plugin loaders.
**Extend Scores with companions, not API changes.** New capabilities the framework wants to attach to Scores (planning, dry-run, observability, eventually smoke-test) default to a *companion* type or trait that wraps a Score rather than a new method on `Score`/`Interpret`. The base public API stays simple.
**CLI: hybrid, staged.** Today (B): first-party tools ship as separate `harmony-*` binaries built on the existing `harmony_cli` crate. Tomorrow (C): a top-level `harmony` binary discovers `harmony-*` plugin binaries on `$PATH` (`kubectl`-style). The plugin protocol is **not** in scope for any current PR — dedicated future effort.
**Use `thiserror` almost everywhere; `anyhow` only at binary glue.** Library code, public crate boundaries, anything callers might want to match on — typed errors via `thiserror`. `anyhow` is reserved for `main.rs`-level glue where the error is just printed.
See `docs/adr/023-deploy-architecture.md` for the full rationale, including what's explicitly deferred (Score derive macro, Score registry, plugin CLI discovery, inventory redesign, smoke-test contract shape).
## Conventions
- **Rust edition 2024**, resolver v2
- **Conventional commits**: `feat:`, `fix:`, `chore:`, `docs:`, `refactor:`
- **Small PRs**: max ~200 lines (excluding generated code), single-purpose
- **License**: GNU AGPL v3
- **Quality bar**: This framework demands high-quality engineering. The type system is a feature, not a burden. Leverage it. Prefer compile-time guarantees over runtime checks. Abstractions should be domain-level, not provider-specific.

146
CLAUDE.md
View File

@@ -1,146 +0,0 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Build & Test Commands
```bash
# Full CI check (check + fmt + clippy + test)
./build/check.sh
# Individual commands
cargo check --all-targets --all-features --keep-going
cargo fmt --check # Check formatting
cargo clippy # Lint
cargo test # Run all tests
# Run a single test
cargo test -p <crate_name> <test_name>
# Run a specific example
cargo run -p <example_crate_name>
# Build the mdbook documentation
mdbook build
```
## What Harmony Is
Harmony is the orchestration framework powering NationTech's vision of **decentralized micro datacenters** — small computing clusters deployed in homes, offices, and community spaces instead of hyperscaler facilities. The goal: make computing cleaner, more resilient, locally beneficial, and resistant to centralized points of failure (including geopolitical threats).
Harmony exists because existing IaC tools (Terraform, Ansible, Helm) are trapped in a **YAML mud pit**: static configuration files validated only at runtime, fragmented across tools, with errors surfacing at 3 AM instead of at compile time. Harmony replaces this entire class of tools with a single Rust codebase where **the compiler catches infrastructure misconfigurations before anything is deployed**.
This is not a wrapper around existing tools. It is a paradigm shift: infrastructure-as-real-code with compile-time safety guarantees that no YAML/HCL/DSL-based tool can provide.
## The Score-Topology-Interpret Pattern
This is the core design pattern. Understand it before touching the codebase.
**Score** — declarative desired state. A Rust struct generic over `T: Topology` that describes *what* you want (e.g., "a PostgreSQL cluster", "DNS records for these hosts"). Scores are serializable, cloneable, idempotent.
**Topology** — infrastructure capabilities. Represents *where* things run and *what the environment can do*. Exposes capabilities as traits (`DnsServer`, `K8sclient`, `HelmCommand`, `LoadBalancer`, `Firewall`, etc.). Examples: `K8sAnywhereTopology` (local K3D or any K8s cluster), `HAClusterTopology` (bare-metal HA with redundant firewalls/switches).
**Interpret** — execution glue. Translates a Score into concrete operations against a Topology's capabilities. Returns an `Outcome` (SUCCESS, NOOP, FAILURE, RUNNING, QUEUED, BLOCKED).
**The key insight — compile-time safety through trait bounds:**
```rust
impl<T: Topology + DnsServer + DhcpServer> Score<T> for DnsScore { ... }
```
The compiler rejects any attempt to use `DnsScore` with a Topology that doesn't implement `DnsServer` and `DhcpServer`. Invalid infrastructure configurations become compilation errors, not runtime surprises.
**Higher-order topologies** compose transparently:
- `FailoverTopology<T>` — primary/replica orchestration
- `DecentralizedTopology<T>` — multi-site coordination
If `T: PostgreSQL`, then `FailoverTopology<T>: PostgreSQL` automatically via blanket impls. Zero boilerplate.
## Architecture (Hexagonal)
```
harmony/src/
├── domain/ # Core domain — the heart of the framework
│ ├── score.rs # Score trait (desired state)
│ ├── topology/ # Topology trait + implementations
│ ├── interpret/ # Interpret trait + InterpretName enum (25+ variants)
│ ├── inventory/ # Physical infrastructure metadata (hosts, switches, mgmt interfaces)
│ ├── executors/ # Executor trait definitions
│ └── maestro/ # Orchestration engine (registers scores, manages topology state, executes)
├── infra/ # Infrastructure adapters (driven ports)
│ ├── opnsense/ # OPNsense firewall adapter
│ ├── brocade.rs # Brocade switch adapter
│ ├── kube.rs # Kubernetes executor
│ └── sqlx.rs # Database executor
└── modules/ # Concrete deployment modules (23+)
├── k8s/ # Kubernetes (namespaces, deployments, ingress)
├── postgresql/ # CloudNativePG clusters + multi-site failover
├── okd/ # OpenShift bare-metal from scratch
├── helm/ # Helm chart inflation → vanilla K8s YAML
├── opnsense/ # OPNsense (DHCP, DNS, etc.)
├── monitoring/ # Prometheus, Alertmanager, Grafana
├── kvm/ # KVM virtual machine management
├── network/ # Network services (iPXE, TFTP, bonds)
└── ...
```
Domain types to know: `Inventory` (read-only physical infra context), `Maestro<T>` (orchestrator — calls `topology.ensure_ready()` then executes scores), `Outcome` / `InterpretError` (execution results).
## Key Crates
| Crate | Purpose |
|---|---|
| `harmony` | Core framework: domain, infra adapters, deployment modules |
| `harmony_cli` | CLI + optional TUI (`--features tui`) |
| `harmony_config` | Unified config+secret management (env → SQLite → OpenBao → interactive prompt) |
| `harmony_secret` / `harmony_secret_derive` | Secret backends (LocalFile, OpenBao, Infisical) |
| `harmony_execution` | Execution engine |
| `harmony_agent` / `harmony_inventory_agent` | Persistent agent framework (NATS JetStream mesh), hardware discovery |
| `harmony_assets` | Asset management (URLs, local cache, S3) |
| `harmony_composer` | Infrastructure composition tool |
| `harmony-k8s` | Kubernetes utilities |
| `k3d` | Local K3D cluster management |
| `brocade` | Brocade network switch integration |
## OPNsense Crates
The `opnsense-codegen` and `opnsense-api` crates exist because OPNsense's automation ecosystem is poor — no typed API client exists. These are support crates, not the core of Harmony.
- `opnsense-codegen`: XML model files → IR → Rust structs with serde helpers for OPNsense wire format quirks (`opn_bool` for "0"/"1" strings, `opn_u16`/`opn_u32` for string-encoded numbers). Vendor sources are git submodules under `opnsense-codegen/vendor/`.
- `opnsense-api`: Hand-written `OpnsenseClient` + generated model types in `src/generated/`.
## Key Design Decisions (ADRs in docs/adr/)
- **ADR-001**: Rust chosen for type system, refactoring safety, and performance
- **ADR-002**: Hexagonal architecture — domain isolated from adapters
- **ADR-003**: Infrastructure abstractions at domain level, not provider level (no vendor lock-in)
- **ADR-005**: Custom Rust DSL over YAML/Score-spec — real language, Cargo deps, composable
- **ADR-007**: K3D as default runtime (K8s-certified, lightweight, cross-platform)
- **ADR-009**: Helm charts inflated to vanilla K8s YAML, then deployed via existing code paths
- **ADR-015**: Higher-order topologies via blanket trait impls (zero-cost composition)
- **ADR-016**: Agent-based architecture with NATS JetStream for real-time failover and distributed consensus
- **ADR-020**: Unified config+secret management — Rust struct is the schema, resolution chain: env → store → prompt
## Capability and Score Design Rules
**Capabilities are industry concepts, not tools.** A capability trait represents a standard infrastructure need (e.g., `DnsServer`, `LoadBalancer`, `Router`, `CertificateManagement`) that can be fulfilled by different products. OPNsense provides `DnsServer` today; CoreDNS or Route53 could provide it tomorrow. Scores must not break when the backend changes.
**Exception:** When the developer fundamentally needs to know the implementation. `PostgreSQL` is a capability (not `Database`) because the developer writes PostgreSQL-specific SQL and replication configs. Swapping to MariaDB would break the application, not just the infrastructure.
**Test:** If you could swap the underlying tool without rewriting any Score that uses the capability, the boundary is correct.
**Don't name capabilities after tools.** `SecretVault` not `OpenbaoStore`. `IdentityProvider` not `ZitadelAuth`. Think: what is the core developer need that leads to using this tool?
**Scores encapsulate operational complexity.** Move procedural knowledge (init sequences, retry logic, distribution-specific config) into Scores. A high-level example should be ~15 lines, not ~400 lines of imperative orchestration.
**Scores must be idempotent.** Running twice = same result as once. Use create-or-update, handle "already exists" gracefully.
**Scores must not depend on execution order.** Declare capability requirements via trait bounds, don't assume another Score ran first. If Score B needs what Score A provides, Score B should declare that capability as a trait bound.
See `docs/guides/writing-a-score.md` for the full guide.
## Conventions
- **Rust edition 2024**, resolver v2
- **Conventional commits**: `feat:`, `fix:`, `chore:`, `docs:`, `refactor:`
- **Small PRs**: max ~200 lines (excluding generated code), single-purpose
- **License**: GNU AGPL v3
- **Quality bar**: This framework demands high-quality engineering. The type system is a feature, not a burden. Leverage it. Prefer compile-time guarantees over runtime checks. Abstractions should be domain-level, not provider-specific.

1
CLAUDE.md Symbolic link
View File

@@ -0,0 +1 @@
AGENTS.md

2333
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,7 @@ members = [
"examples/*",
"private_repos/*",
"harmony",
"harmony_zitadel_auth",
"harmony_types",
"harmony_macros",
"harmony_tui",
@@ -28,6 +29,17 @@ members = [
"harmony_node_readiness",
"harmony-k8s",
"harmony_assets", "opnsense-codegen", "opnsense-api",
"fleet/harmony-fleet-operator",
"fleet/harmony-fleet-agent",
"fleet/harmony-fleet-auth",
"fleet/harmony-fleet-deploy",
"fleet/harmony-fleet-e2e",
"harmony-reconciler-contracts",
"examples/fleet_server_install",
"examples/fleet_staging_install",
"nats/jwt",
"nats/callout",
"nats/integration-test-callout",
]
[workspace.package]
@@ -63,7 +75,7 @@ kube = { version = "1.1.0", features = [
"ws",
"jsonpatch",
] }
k8s-openapi = { version = "0.25", features = ["v1_30"] }
k8s-openapi = { version = "0.25", features = ["v1_30", "schemars"] }
# TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml
serde_yaml = "0.9"
serde-value = "0.7"
@@ -96,4 +108,14 @@ reqwest = { version = "0.12", features = [
assertor = "0.0.4"
tokio-test = "0.4"
anyhow = "1.0"
clap = { version = "4", features = ["derive"] }
clap = { version = "4", features = ["derive", "env"] }
# `websockets` enables `ws://` / `wss://` URL schemes. Without it the
# connector parses the URL but treats it as a raw TCP connect (no TLS,
# no HTTP Upgrade), so the agent against the OKD edge-TLS Route hangs
# 30s on `expected INFO, got nothing` because the router only speaks
# TLS+HTTPS on 443. The operator works without this feature because
# it talks to NATS in-cluster on `nats://...:4222` (raw TCP).
async-nats = { version = "0.45.0", features = ["websockets"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
toml = "0.8"

View File

@@ -0,0 +1,116 @@
# Phase 12: Code Review Items (April 2026)
Items identified during the `feat/opnsense-codegen` PR review that require further design or cross-cutting work.
## Completed in this PR
- **1.1** Remove panic in `haproxy_service_to_harmony` — returns `None` with `warn!()` instead of panicking on invalid bind address
- **1.2** Use `MacAddress` type from `harmony_types` in KVM module — replaced `String` MAC fields in `VmInterface`, `NetworkRef`, `DhcpHost`, and `set_interface_link`
- **1.3** Compare both firewalls in `FirewallPairTopology::list_static_mappings` — warns on mismatch between primary and backup
- **1.4** Remove no-op default for `LoadBalancer::ensure_wan_access` — now a required trait method
- **2.1** Remove `wan_firewall_ports` from `LoadBalancerScore` — callers handle WAN access separately
- **2.2** Add timeout to OKD bootstrap wait — 90min default, configurable via `HARMONY_OKD_BOOTSTRAP_TIMEOUT_MINUTES`
## Tasks (deferred)
### 12.1 Phased topology: LinuxHostTopology → KvmHostTopology
**Priority**: HIGH
**Status**: Not started
**Related**: Phase 6 (KVM E2E tests)
The `examples/opnsense_vm_integration/setup-libvirt.sh` shell script should be a Score using a phased topology approach. A `LinuxHostTopology` would be "promoted" to a `KvmHostTopology` after KVM packages are installed and libvirtd is running.
Key design challenges:
- Type-safe phase transitions (how does a topology gain new capabilities at runtime?)
- Package installation as a Score (distro-agnostic or trait-based)
- Service management (systemd enable/start) as a Score primitive
This is a major architectural feature that enables the full bare-metal-to-VM pipeline without shell scripts.
### 12.2 KvmHost validated type with compile-time macro
**Priority**: MEDIUM
**Status**: Not started
**Related**: 12.1
`KvmConnectionUri::RemoteSsh { host: String, username: String }` should become a validated `KvmHost` type with:
- A `kvm_host!("root@hypervisor1")` macro for compile-time validation
- Proper SSH URI parsing and validation
- Integration with the phased topology (12.1)
### 12.3 Unified directory module
**Priority**: LOW
**Status**: Not started
**Related**: Phase 9 (SSO + Config Hardening)
Currently three different directory patterns exist:
- `HARMONY_DATA_DIR` in `harmony/src/domain/config/mod.rs` (lazy_static, `BaseDirs`)
- `harmony_config` uses `ProjectDirs::from("io", "NationTech", "Harmony")`
- `harmony_secret` uses `BaseDirs::data_dir().join("harmony")`
- `openbao/setup.rs` has its own `keys_dir()` function
Unify into a single `harmony_dirs` module providing: `data_dir()`, `cache_dir()`, `secrets_dir()`, `keys_dir(namespace)`.
### 12.4 OpenBao unseal key storage — bootstrap secret management
**Priority**: MEDIUM
**Status**: Research needed
**Related**: Phase 9 (SSO + Config Hardening), task 9.8 (auto-unseal)
The chicken-and-egg problem: OpenBao needs to be initialized before it can be used as a secret store, but its unseal keys need to be stored somewhere. Current approach stores them as a local JSON file with 0600 permissions.
Industry solutions to evaluate:
- Upstream OpenBao/Vault storing downstream seal keys (transit auto-unseal)
- HSM-backed auto-unseal (cloud KMS or on-prem HSM)
- TPM-based local encryption
- Shamir-split recovery with multiple administrators
- TOTP-based vault (mentioned in review)
No perfect solution exists. This requires threat modeling specific to the decentralized micro-datacenter use case.
### 12.5 Use `vaultrs` crate for type-safe OpenBao provisioning
**Priority**: MEDIUM
**Status**: Not started
**Related**: Phase 9
Replace `kubectl exec bao ...` shell commands in `openbao/setup.rs` with typed `vaultrs` API calls. The `vaultrs` 0.7.4 crate (already a dependency in `harmony_secret`) provides full coverage:
| Current shell command | vaultrs equivalent |
|---|---|
| `bao operator init` | `vaultrs::sys::start_initialization()` |
| `bao operator unseal` | `vaultrs::sys::unseal()` |
| `bao secrets enable kv-v2` | `vaultrs::sys::mount::enable()` |
| `bao auth enable userpass` | `vaultrs::sys::auth::enable()` |
| `bao policy write` | `vaultrs::sys::policy::set()` |
| `bao write auth/userpass/users/...` | `vaultrs::auth::userpass::user::set()` |
| `bao auth enable jwt` | `vaultrs::sys::auth::enable()` |
| JWT config + role | `vaultrs::auth::oidc::config::set()` + `role::set()` |
**Prerequisite**: Requires port-forward or ingress to OpenBao (currently uses `kubectl exec` into the pod). Consider adding a `K8sPortForward` utility to `harmony-k8s`.
### 12.6 Topology proliferation — opinionated topologies leaking into narrow use cases
**Priority**: MEDIUM
**Status**: Not started
**Related**: 12.1 (phased topology), `feat/install-reconcile-operator-score`
`K8sAnywhereTopology` and `HAClusterTopology` have accumulated opinions — cert-manager install, tenant manager setup, helm probes, TLS passthrough, SSO wiring — that make them unfit for narrow, ad-hoc Score execution. Calling `ensure_ready()` on `K8sAnywhereTopology` to apply a single CRD installs a full product stack as a side effect; that's the opposite of what "make me ready" should mean.
Concrete example: `fleet/harmony-fleet-operator/src/install.rs` needed a topology that satisfies `K8sclient` for a single `K8sResourceScore::<CustomResourceDefinition>` apply. `K8sAnywhereTopology` was wrong (too heavy); `HAClusterTopology` was wrong (bare-metal). Work-around: a 30-line inline `InstallTopology` that wraps a pre-built `K8sClient` and has a noop `ensure_ready`. That file flags the architectural smell in its doc comment and points back to this entry.
If every narrow Score ends up vendoring its own ad-hoc topology, we get exactly the proliferation this entry is meant to prevent.
**Design direction (to be refined, not prescribed):**
- A **minimal ad-hoc topology** in harmony — `K8sBareTopology` or similar — that carries a `K8sClient` and implements `K8sclient` + noop `ensure_ready`. One screen of code. Consumed by any Score that just needs to apply a typed resource against an existing cluster.
- Existing opinionated topologies (`K8sAnywhereTopology`) stay, but grow a clear doctrine: `ensure_ready` is for *their* product setup, callers who don't need that product use the bare topology.
- Longer-term: unbundle the product-setup logic from `K8sAnywhereTopology::ensure_ready` into discrete Scores the product compositions explicitly run — so the distinction between "I'm installing a cluster" and "I'm using a cluster" is a composition choice, not a topology choice.
**What "good" looks like:**
- Adding a new ad-hoc Score against k8s doesn't require inventing a new topology.
- `K8sAnywhereTopology` stops being the default reach and starts being a deliberate product choice.
- Test: can we delete the inline `InstallTopology` in `fleet/harmony-fleet-operator/src/install.rs` by replacing it with a one-liner `K8sBareTopology::from_env()`? That's the smoke test for "we fixed the proliferation."

View File

@@ -0,0 +1,399 @@
# IoT Platform v0.1 and beyond — forward plan
Authoritative forward plan for the NationTech decentralized-infra /
IoT platform, written after the v0 walking skeleton shipped
(see `v0_walking_skeleton.md` for the historical diary). Organized as
five chapters in execution order.
## State of the world (as of 2026-04-23)
**Green, end-to-end:**
- CRD → operator → NATS JetStream KV write path (`smoke-a1.sh`).
- Agent watches KV, reconciles podman containers (`smoke-a1.sh`).
- VM-as-device provisioning: cloud-init + fleet-agent install + NATS
smoke (`smoke-a3.sh`), x86_64 (native KVM) and aarch64 (TCG).
- Power-cycle / reboot resilience (`smoke-a3.sh` phase 5).
- aarch64 cross-compile of the agent (no Harmony modules need to
feature-gate aarch64).
- Operator installed via a harmony Score (typed Rust, no yaml).
- `harmony-reconciler-contracts` crate — cross-boundary types
(bucket names, key helpers, `DeviceInfo`, `DeploymentState`,
`HeartbeatPayload`, `DeploymentName`, `Id` re-export).
**Chapter 1 shipped** (2026-04-21): composed end-to-end demo
(`smoke-a4.sh`) — operator in k3d + in-cluster NATS + ARM VM +
typed-Rust CR applier + hand-off menu + `--auto` regression. Green
on x86_64 (native KVM) and aarch64 (TCG).
**Chapter 2 shipped** (2026-04-23): selector-based targeting +
Device CRD + `.status.aggregate` reflect-back. `Deployment.spec.
targetSelector: LabelSelector` resolves against cluster-scoped
`Device` CRs materialized from NATS `device-info`. Operator writes
`desired-state` KV per matched pair, patches
`.status.aggregate` (matchedDeviceCount / succeeded / failed /
pending / lastError) at 1 Hz. Load-tested to 10 000 devices ×
1 000 Deployments at 10 000 KV writes/s sustained, zero errors.
**Not yet wired (real v0.1 work still to go):**
- Helm packaging of the operator (Chapter 3).
- Zitadel + OpenBao auth (per-device credentials, SSO for
operator users). Placeholder `CredentialSource` trait on the
agent side (Chapter 4).
- Any frontend (Chapter 5).
- Small quality items (not blockers): agent config-driven labels,
`matchExpressions` in selectors, `Device.status.conditions`
populated from heartbeat staleness.
**Verified during planning** (so future implementation doesn't
have to re-litigate):
- **Upgrade already works.** `reconciler.rs::apply` byte-compares
serialized score payloads; drift triggers re-reconcile.
`PodmanTopology::ensure_service_running` removes then re-creates
containers on spec drift. No "stale + new" window.
- **The polymorphism stays.** `ReconcileScore` is an externally-tagged
enum; adding `OkdApplyV0` later is additive.
**Surprises since v0 started** (for context, none architectural):
- Arch `edk2-aarch64-202602-2` shipped empty firmware blobs;
`202508-1` ships unpadded edk2 that needs 64 MiB pflash padding.
Fixed via runtime discovery + padding in `modules/kvm/firmware.rs`.
- MTTCG isn't default for cross-arch TCG on QEMU 10.2; force via
`qemu:commandline` override. `pauth-impdef=on` likewise a
qemu:commandline opt-in.
- `ensure_vm` is idempotent on "domain exists" — re-apply of a
changed XML requires manual `undefine --nvram --remove-all-storage`.
Noted as a follow-up in the code comments.
---
## Chapter 1 — Hands-on end-to-end demo (imminent)
**Goal:** the user runs one command, watches operator + NATS + ARM
VM come up, then drives a CRD through the full loop by hand:
`kubectl apply` it (manually or via a typed Rust applier), watch the
operator log "acquired," check the NATS KV store with `natsbox`,
SSH/console into the VM, `curl` the running nginx container from
the workstation.
### User-facing requirements (explicit)
- **No yaml fixtures.** Sample `Deployment` CRs constructed in
typed Rust using `DeploymentSpec` + `PodmanV0Score`. Same
discipline as the `install` Score that replaced `gen-crd | kubectl
apply`.
- **ArgoCD deferred.** User's production clusters have it; bringing
it into the smoke harness adds setup overhead without validating
anything `helm install` doesn't. Chapter 3 produces the chart;
ArgoCD integration is a later operational concern.
- **Operator logs every CR it acquires** — `controller.rs` already
does `tracing::info!(%ns, %name, "reconcile")`; verify the output
reads well in the command-menu hand-off.
- **natsbox debugging is first-class.** Script prints exact
natsbox one-liners at hand-off so the user can inspect KV state.
- **In-cluster NATS.** Not a side-by-side podman container (as
smoke-a1 does today). Expose to the libvirt VM via k3d
loadbalancer port mapping.
### Design decisions
- **Rust CR applier.** New binary `examples/harmony_apply_deployment/`.
CLI flags `--name --namespace --target-device --image --port
--delete`. Constructs the `Deployment` CR via
`kube::Api<Deployment>` + typed `DeploymentSpec`; calls
`api.apply(...)`. Can also `--print` the CR JSON to stdout so
`kubectl apply -f -` still works from the terminal.
- **smoke-a4.sh orchestration stays bash for now.** User agreed
this is test-harness scope, not framework path; converting it
to Rust is "not as important right now."
- **Hand-off is the default mode**, not `--keep`. The whole point
of Chapter 1 is that the user drives the last stage interactively.
`smoke-a4.sh` brings everything up, applies *nothing*, prints
the command menu, waits on `INT/TERM` to tear down. `--auto`
runs the full apply/curl/upgrade/delete regression for CI.
- **In-cluster NATS path.** Preferred: use `harmony::modules::nats`
if it has a lightweight single-node / no-supercluster mode.
Fallback: typed `K8sResourceScore` applying a minimal Deployment
+ NodePort Service. 15-min research task before committing.
### Composed smoke phases (`smoke-a4.sh`)
1. k3d cluster up with `-p "4222:4222@loadbalancer"` so the host
port 4222 forwards into the cluster. Reachable from the
libvirt VM via the gateway IP (typically `192.168.122.1:4222`).
2. NATS in-cluster via the chosen path (harmony module or direct
K8sResourceScore). Wait for readiness.
3. Install CRD via the operator's `install` subcommand (typed Rust).
4. Spawn operator as a host-side process (same pattern as
smoke-a1). Operator connects to `nats://localhost:4222`.
5. Provision ARM VM via `example_iot_vm_setup` (same entry point
smoke-a3 uses). Agent configured to connect to
`nats://<libvirt_gateway>:4222` — discover the gateway IP via
`virsh net-dumpxml default`, as smoke-a3 already does.
6. Sanity: `kubectl wait ... crd Established`, operator logged
"KV bucket ready", agent logged "watching KV keys",
`status.<device>` present in `agent-status` bucket.
7. Hand off. Print the command menu below. Exit 0 with a cleanup
trap on `INT/TERM`.
### Command menu at hand-off
- `kubectl get deployments.fleet.nationtech.io -A -w` — watch CR
reconcile reactively.
- `cargo run -q -p example_harmony_apply_deployment -- --image
nginx:latest --target-device $TARGET_DEVICE` — apply an nginx
deployment via typed Rust.
- `cargo run -q -p example_harmony_apply_deployment -- --print
--image nginx:latest --target-device $TARGET_DEVICE |
kubectl apply -f -` — same thing, through kubectl.
- `ssh -i $SSH_KEY fleet-admin@$VM_IP` — connect to the VM.
- `virsh console $VM_NAME --force` — serial console alternative.
- `podman --url unix://$VM_IP:... ps` or ssh + `podman ps`
— list containers on the VM from the workstation.
- `podman run --rm docker.io/natsio/nats-box nats --server
nats://localhost:4222 kv ls desired-state` — list desired
state keys (from the host).
- `podman run --rm ... nats kv get desired-state
'<device>.<deployment>' --raw` — dump a specific desired state.
- `podman run --rm ... nats kv get agent-status
'status.<device>' --raw` — dump the heartbeat.
- `curl http://$VM_IP:8080/` — hit the deployed nginx.
### `--auto` path (for regression)
1. Apply `nginx:latest`, wait for container on VM, `curl` 200.
2. Apply `nginx:1.26` (upgrade), wait for container *id* to change,
`curl` 200 against the new container.
3. Apply `--delete`, wait for container gone from VM.
### Files
- **NEW** `examples/harmony_apply_deployment/Cargo.toml` +
`src/main.rs` — typed applier.
- **NEW** `fleet/scripts/smoke-a4.sh`.
- **NO yaml fixtures.** Rust CLI flags cover the shape.
- Optional: factor shared smoke phases (NATS up, k3d up, operator
spawn, VM provision) into `fleet/scripts/lib/` if the duplication
across a1/a3/a4 becomes obvious. Don't force it.
### NATS exposure — implementation-time notes
- k3d `@loadbalancer` port mapping binds the host's `0.0.0.0:4222`
by default; libvirt VMs on `virbr0` can reach it via the gateway
IP. No special NAT config required.
- Fallback if environmental snag: keep the side-by-side podman
container on an opt-in `NATS_MODE=podman` flag. Don't default
to that — user explicitly asked for in-cluster.
### Verification
- Fresh host: `ARCH=aarch64 ./fleet/scripts/smoke-a4.sh` completes
in 8-15 min, prints the command menu.
- `ARCH=aarch64 ./fleet/scripts/smoke-a4.sh --auto` PASSes
end-to-end including upgrade id-change assertion.
- x86_64 (`ARCH=x86-64`) completes in 2-5 min.
### Explicitly out of scope
- `AgentStatus` / `DeploymentStatus` enrichment — Chapter 2.
- Helm chart, ArgoCD, auth, frontend — later chapters.
- Lifting the applier into a reusable `ApplyDeploymentScore` —
only if a second consumer appears.
---
## Chapter 2 — Status reflect-back + selector-based targeting **[SHIPPED 2026-04-23]**
**Goal:** CRD `.status` reflects fleet reality — per-deployment
success/failure/pending counts, last-error surface, freshness. The
Deployment CR targets devices by label selector, not by id list.
> The shipped design replaces the original `AgentStatus` + list-of-ids
> proposal wholesale. See `chapter_4_aggregation_scale.md` for the
> superseded design-doc archaeology. Commits:
> `refactor(iot): delete legacy AgentStatus path`,
> `refactor(iot): operator watches device-state KV directly; drop event stream`,
> `refactor(iot): Deployment.targetSelector + Device CRD (DaemonSet-like)`.
### What shipped
**Wire format** (in `harmony-reconciler-contracts`): four per-concern
payloads on dedicated NATS KV buckets. No monolithic per-device blob,
no separate event stream.
| Type | Bucket | Cadence |
|------|--------|---------|
| `DeviceInfo` | `device-info` | on startup + label/inventory change |
| `DeploymentState` | `device-state` | on reconcile phase transition |
| `HeartbeatPayload` | `device-heartbeat` | every 30 s |
**CRDs.** Two cluster resources:
- `Deployment` (namespaced) — `spec.targetSelector: LabelSelector`
(standard K8s `matchLabels` / `matchExpressions`). No device list
on spec. `.status.aggregate` carries `matchedDeviceCount`,
`succeeded`, `failed`, `pending`, `lastError`.
- `Device` (cluster-scoped, like `Node`) — `metadata.labels` carries
the device's routing labels; `spec.inventory` holds the hardware/OS
snapshot; `status.conditions` is reserved for liveness (populated
lazily by a future heartbeat-freshness reconciler, not every ping).
**Operator tasks** (three concurrent loops in one process):
1. `controller` — validates Deployment CR names, holds the finalizer
that cleans `desired-state.<device>.<deployment>` KV entries on
delete. No writes on apply (aggregator handles that).
2. `device_reconciler` — watches the `device-info` KV; server-side-
applies a `Device` CR per `DeviceInfo` payload, with label
sanitization. Agents remain kube-unaware.
3. `fleet_aggregator` — three caches driven by watches (Deployment
CRs, Device CRs, `device-state` KV). On any change, resolves
each selector against the Device cache, writes/deletes
`desired-state` KV entries for diffed matches, and patches
`.status.aggregate` at 1 Hz for the CRs whose counters moved.
**Agents** publish `device-id=<id>` as a default DeviceInfo label, so
targeting a single device with `matchLabels: {device-id: pi-42}` is
zero-config. User-defined labels layer on from agent config (scoped
out of this chapter; follow-up item).
### Scale proof
`fleet/scripts/load-test.sh` + `examples/fleet_load_test` simulate N
devices across M Deployments, driving `device-state` KV updates at a
configurable cadence while the full operator stack runs against a
local k3d apiserver. Verified:
- 100 devices / 10 groups / 1 Hz / 60 s — 100 writes/s sustained,
all 10 CR aggregates converge.
- 10 000 devices / 1 000 groups / 1 Hz / 120 s — ~10 000 writes/s
sustained, 0 errors, all 1 000 CR aggregates correct
(`matchedDeviceCount == expected`, `succeeded + failed + pending
== matched`). Same envelope before and after the selector rewrite.
### Out of scope in this chapter (follow-ups)
- Agent config-driven labels (`[labels]` in agent toml → DeviceInfo).
~30 lines; deferred until a concrete need lands.
- `matchExpressions` evaluator. Operator currently supports
`matchLabels` only and logs a warning for expression-bearing
selectors. ~50 lines; deferred.
- `Device.status.conditions` populated from heartbeat staleness
(Reachable / Stale transitions). Liveness is computable today by
reading `device-heartbeat` directly; CR-side reflection is a
convenience. ~100 lines; deferred.
- Full journald log streaming. The `.status.aggregate.lastError`
surface covers the user's reflect-back requirement for now.
- Multi-device regression smoke — defer until real hardware or a
second VM is around.
---
## Chapter 3 — Helm chart (ArgoCD deferred)
**Goal:** operator ships as a versioned helm chart with CRD
version-locked inside.
User clarified this session: ArgoCD exists in production; all it
does is apply resources from the chart. Standing up ArgoCD in the
smoke adds setup overhead with no incremental validation value.
Chapter 3 produces the chart + validates `helm install / helm
upgrade` lifecycles. ArgoCD consumption is a user operational
concern downstream.
### Sketch
- Chart location: `fleet/harmony-fleet-operator/chart/` (or sibling repo —
defer decision to implementation time).
- Templates: Namespace, SA, ClusterRole, ClusterRoleBinding,
Deployment (operator pod), CRD.
- **CRD yaml in the chart is generated at chart-publish time** from
the Rust `Deployment::crd()`. One-off release artifact, not
framework path — consistent with "no yaml in framework code."
- Values: operator image tag, NATS URL, log level.
- Smoke: `helm install` into k3d → CR apply → same assertions as
Chapter 1.
### Open questions
- Chart repo: subdir vs. separate git repo.
- CRD install mechanism: chart hook vs. templates directory.
Drives CRD upgrade story.
---
## Chapter 4 — Auth: Zitadel + OpenBao + per-device identity
**Goal:** per-device granular NATS credentials; SSO for operator
users; OpenBao policy per device; JWT bootstrap from Zitadel.
Zitadel + OpenBao are already ~99% integrated in harmony; this
chapter is wiring the IoT-specific flows.
### Sketch
- Agent's `CredentialSource` trait (already abstract in agent
`config.rs`) gets a Zitadel-JWT-backed implementation. Mints
short-lived NATS creds via OpenBao auth callout.
- Remove the shared-credentials `toml-shared` variant (v0 demo
leftover).
- Availability: auth-callout caches policies, tolerates OpenBao
outages.
- SSO for operator users (separate flow): Zitadel groups →
Kubernetes RBAC subjects on the `Deployment` CRD.
---
## Chapter 5 — Frontend (last)
**Goal:** operator-friendly UI for the decentralized platform.
Form factor undecided: Leptos web dashboard, CLI extension to
`harmony_cli`, or a TUI. Minimum viable product: read-only view of
fleet state (devices + deployments + aggregated status) powered by
the CRD `.status` from Chapter 2. Aspiration: write operations with
auth from Chapter 4.
---
## Chapter 6 — Customer demo rehearsal **[in progress]**
48-hour customer demo prep. PO assessment concluded that promising a
real-OKD deployment without first proving the JWT-auth chain is
reckless. **VM-based rehearsal first**, OKD second.
The rehearsal extends `smoke-a4` (k3d + libvirt VM + agent + apply
CR + reconcile podman) with **Zitadel + auth callout + agent JWT
auth**. Two devices + one admin. Same code paths as production —
only the cluster topology differs.
Detailed plan: [`v0_demo_e2e.md`](v0_demo_e2e.md).
Once the VM rehearsal is green (success criteria in that doc), the
residual deltas to ship to real OKD are configuration, not new code.
---
## Principles — what we've learned and want to keep doing
- **No yaml in framework code paths.** Every kube-rs type is
typed; every Score apply goes through typed Rust. Yaml generation
happens only at chart-publish time, never at runtime.
- **Scores describe desired state; topologies expose capabilities.**
Prefer adding capability traits over thickening a single topology.
- **Minimal topologies for ad-hoc Score execution.** `K8sAnywhereTopology`
has too many opinions (cert-manager install, tenant-manager bootstrap,
helm probes) for narrow apply-a-CRD use cases. See ROADMAP
§12.6 — a lean shared `K8sBareTopology` is the durable fix.
- **Cross-boundary wire types in `harmony-reconciler-contracts`**,
everything else in its natural crate.
- **Never ship untested code.** Every commit that changes runtime
behavior is verified against a smoke script before landing.
Cargo check + unit tests aren't enough.
- **Prove claims about upstream before blaming upstream.** The
Arch edk2 investigation showed this matters; see
`memory/feedback_prove_before_blaming_upstream.md`.

View File

@@ -0,0 +1,231 @@
# Fleet Platform v0.2 — 3-day production push
Authoritative plan for the next three days. Picks up where
`v0_1_plan.md` left the chapter structure and supersedes its forward
chapters where they conflict. Written 2026-05-06, end of the
`feat/iot-walking-skeleton` branch (31 996 LOC, 184 commits).
## State coming in
- Skeleton end-to-end works against an OKD staging cluster: Zitadel
+ NATS + auth callout + operator + agent (one VM today, real Pi
tomorrow). Verified by hand 2026-05-06.
- ~10 ancillary PRs still open across the team. Branch graph is
noisy.
- `harmony/modules/fleet/` is the wrong long-term home for the fleet
code. Flagged in the April 2026 code review. Reasons we kept it
there during bring-up are subtle (cross-module dependencies on
`K8sAnywhereTopology`, `HelmChartScore`, `K8sResourceScore`,
`harmony_secret`, the `Topology` capability traits) — those need
to be written down before the file move, not after. **ADR
pending; not started yet.**
- Agent upgrade path is undefined. Without it we cannot ship a
v0.1 agent into the field.
- ~408 compilation warnings. Not blocking but needs to be 0 before
we put `-Dwarnings` in CI.
## Strategy
This isn't 10 weeks of scaffolding. It's three days of locking the
**API surface** so the inevitable refactor — moving fleet out of
`harmony/modules/fleet/` into `fleet/harmony-fleet/`, splitting
`K8sAnywhereTopology` into `K8sBareTopology`, etc. — is mechanical
when we get to it.
The frame from JG's *Pour l'amour des compilateurs* talk applies
directly: **design the brick before moving the brick.** Physical
relocation is cheap. Redesigning a public API after customers
depend on it is expensive. We use these three days to make sure
the type-level contract is what we want it to be at v1.0, even if
the file paths still smell like v0.1.
## Day 1 — Lock the brick design
**Goal:** a fleet façade stable enough to ship to production and
refactor freely afterwards.
### 1.1 Decompose `FleetDeviceAuth` to *resolved states only*
Today: `TomlShared | ZitadelJwt | ZitadelEnroll`. Cardinality 3.
After: `ZitadelJwt`-shape only. Cardinality 1.
- `TomlShared` — v0 dev cruft, no production caller. Delete.
- `ZitadelEnroll`*pre-resolution* state (carries unresolved
admin credentials). Doesn't belong in a type that represents
"the agent's NATS auth on disk". Move to its own type
(`DeviceEnrollmentIntent`) used only by the enrollment Score
+ binary. Resolution produces a `ZitadelJwt` and that's what
the agent sees.
The `render_toml` match on `&self.auth` collapses to one arm. The
"is this resolved yet?" branch class disappears. Test
`render_toml_zitadel_enroll_renders_same_as_zitadel_jwt` becomes
unnecessary (the question is undefined; you can't render an
unresolved auth).
### 1.2 Define the `fleet` façade
What does code outside the fleet module see? Today that's a deep
walk into `harmony::modules::fleet::operator::chart::ChartOptions`.
Leakage. Lock the seam:
```text
harmony::modules::fleet::
FleetServerScore (existing — composed install)
FleetDeviceEnrollScore (new — wraps fleet_device_enroll)
FleetDeviceSetupScore (existing — keeps API)
FleetDeviceAuth (resolved-only, per 1.1)
AdminAuth (existing)
// sealed:
operator:: pub(crate)
setup_score's internals pub(crate)
chart:: pub(crate)
```
Once locked, the *file location* doesn't matter. `pub use`
re-exports preserve callers' imports across the eventual physical
move.
### 1.3 Defer the placement ADR
JG isn't satisfied with the design yet. ADR-021 stays in *proposed*
limbo until the seam from 1.2 is committed and we've lived with it
for a sprint.
**Day 1 done when:** fleet façade committed, `TomlShared` and
`ZitadelEnroll` removed from `FleetDeviceAuth`, every existing
caller compiles unchanged, no file moves.
---
## Day 2 — Polish E2E + ship the upgrade ADR
Two streams in parallel.
### Stream A — E2E hardening (~½ day)
- **A.1 Operator graceful degradation on bad device_id.** The CLI
now rejects bad ids upfront, but a stray bad KV entry shouldn't
take the operator down. Log + skip, don't restart-loop.
- **A.2 Persist `nats_auth_pass` and the issuer NKey via
`harmony_secret`.** The regenerate-every-run footgun bit us
twice on 2026-05-06. Make these `Secret`s the same way `NatsAdmin`
and `ZitadelAdmin` already are.
- **A.3 Single regression script.** `fleet/scripts/e2e-prod-shape.sh`.
Full bring-up + enroll + assert against a target cluster. Same
shape as the existing `smoke-a*.sh`. CI consumes this later.
### Stream B — ADR-022: Agent upgrade procedure (~½ day)
The ADR is the deliverable, not the implementation. Specifies the
mechanism so anyone can implement it later without inventing the
design. See `docs/adr/022-fleet-agent-upgrade.md`.
Summary of the design (full detail in the ADR):
- **K8s rolling-update shape, single-host.** Wait for in-flight
reconciles to complete + all managed services healthy + a
scheduling lock from the operator before swapping.
- **Versioned binary layout on disk:**
```
/usr/bin/fleet-agent-v0.1.1
/usr/bin/fleet-agent-v0.1.2
/usr/local/bin/fleet-agent → symlink to current
```
No version is ever erased — N-history is the rollback target.
- **Old verifies new + reports up.** Old agent stages new,
smoke-tests it (`--self-test`), starts it, watches for the new
agent's heartbeat to land in NATS with the new version. Only then
does the operator know the upgrade succeeded.
- **Operator drives the cutover.** Operator sends an explicit stop
signal to the old agent over NATS. Old agent exits cleanly. New
agent is already running and takes over.
- **Reverse path is identical.** Roll back = operator publishes
desired_version = previous; new agent does the same dance to
hand off to old.
**Day 2 done when:** A.1A.3 committed, ADR-022 landed, regression
script green against staging.
---
## Day 3 — Production deploy
**Goal:** customer cluster on v0.1, runbook accurate, signed off.
- **3.1** Tag `v0.1.0` from `master` after `feat/iot-walking-skeleton`
is merged.
- **3.2** Run `e2e-prod-shape.sh` against the customer's prod OKD
cluster. Every diff between scripted and reality goes back into
the script — so the script *is* the runbook.
- **3.3** Production-shape doc twin of
`docs/guides/fleet-staging-install.md`. Deltas only, ~50 lines.
- **3.4** `docs/guides/fleet-device-enrollment.md` — operator-facing
enrollment runbook. Captures the SSO `--admin-oidc-client-id`
resolution and the `--device-id` RFC1123 validation we locked in
on 2026-05-06.
- **3.5** Operational basics: revoke a device, rotate a key, read
the operator's logs, read NATS. Bullet lists are fine — bullet-
list-quality docs beat missing docs.
**Day 3 done when:** customer's prod cluster runs real workloads,
the runbook is what we actually used, and we'd hand operations to
someone else.
---
## In parallel — frontend (junior, ~1 week, target Day 5 merge)
Junior owns end-to-end. Spec:
- **F.1** Read-only Leptos SPA. Devices + Deployments + per-device
drilldown (DeviceInfo + last-heartbeat + agent version).
- **F.2** NATS tail panel. SSE stream of `device-info` and
`device-state` updates, plain text.
- **F.3** Served by the operator pod itself (one less Deployment).
SSO via the existing Zitadel device-code app (`harmony-cli`).
- **F.4** **Not** in v0.1: write paths, metrics dashboards, fleet-
wide rollout views, NATS GUI. None of those.
This validates the platform is observable from outside the
operator's logs — the customer's specific ask.
---
## What slips to v0.2+ (post-prod backlog)
No calendar pressure on these; sequence after we see real customer
usage.
| Item | Why deferred | Cost when we do it |
|---|---|---|
| Pluggable `harmony` CLI (kubectl-style PATH discovery) + `harmony-fleet` plugin | Customer doesn't run it themselves yet; we do. Examples are good enough. | ~1 week, mostly rename/restructure given Day 1's API freeze. |
| Physical refactor of `harmony/modules/fleet/` → `fleet/harmony-fleet/` | The Day-1 façade settles the design; the move is mechanical and the ADR for it is still in draft. | ~2 days. |
| Agent upgrade implementation (ADR ships Day 2; impl later) | First customer fleet is small enough to hand-upgrade if needed. | ~1 week. |
| ArgoCD chart publishing | Customer uses ArgoCD downstream but their initial deploy goes through harmony directly. | ~3 days. |
| Full CI e2e (k3d nightly + libvirt + OKD daily) | Manual rehearsal works for one customer. | ~1 week + runner capacity. |
| OpenBao integration (replaces `ZitadelClientConfig` cache file) | Cache file works for single-operator use; OpenBao is the multi-operator answer. | ~1 week. |
| `harmony run <ScoreName> --field=value` ad-hoc Score CLI | No v0.1 customer flow needs it. | ~2 weeks (Score-flag derive macro is the hard part). |
| Fleet-wide rollout strategies (canary, %-based) on top of the agent-upgrade primitive | Single-device upgrade is sufficient until >100-device fleets. | ~1 week. |
| Drop `K8sAnywhereTopology` for ad-hoc Score execution; introduce `K8sBareTopology` | Per the existing v0_1 §"Principles". Not blocking prod. | ~3 days. |
---
## Principles (kept verbatim from v0_1, still load-bearing)
- **No yaml in framework code paths.** Typed kube-rs everywhere.
- **Scores describe desired state; topologies expose capabilities.**
- **Cross-boundary wire types in `harmony-reconciler-contracts`.**
- **Never ship untested code.**
- **Prove claims about upstream before blaming upstream.**
Adding one for v0.2:
- **Design the brick before moving the brick.** Lock the public API
contract first; physical relocation later. Cardinality-matched
types, "make impossible states impossible" — the type system is
the deterministic feedback loop that scales with LLM-era code
generation throughput. (See JG's *Pour l'amour des compilateurs*,
Botpress Meetup, 2026-04-30.)

View File

@@ -58,6 +58,8 @@ async fn main() {
}
println!("--------------");
#[allow(unreachable_code)]
{
todo!();
let channel_name = "1";
brocade.clear_port_channel(channel_name).await.unwrap();
@@ -72,4 +74,5 @@ async fn main() {
.create_port_channel(channel_id, channel_name, &ports)
.await
.unwrap();
}
}

View File

@@ -140,7 +140,7 @@ impl BrocadeClient for FastIronClient {
async fn configure_interfaces(
&self,
_interfaces: &Vec<(String, PortOperatingMode)>,
_interfaces: &[(String, PortOperatingMode)],
) -> Result<(), Error> {
todo!()
}

View File

@@ -208,7 +208,7 @@ pub trait BrocadeClient: std::fmt::Debug {
/// Configures a set of interfaces to be operated with a specified mode (access ports, ISL, etc.).
async fn configure_interfaces(
&self,
interfaces: &Vec<(String, PortOperatingMode)>,
interfaces: &[(String, PortOperatingMode)],
) -> Result<(), Error>;
/// Scans the existing configuration to find the next available (unused)

View File

@@ -115,8 +115,8 @@ impl NetworkOperatingSystemClient {
fn map_configure_interfaces_error(&self, err: Error) -> Error {
debug!("[Brocade] {err}");
if let Error::CommandError(message) = &err {
if message.contains("switchport")
if let Error::CommandError(message) = &err
&& message.contains("switchport")
&& message.contains("Cannot configure aggregator member")
{
let re = Regex::new(r"\(conf-if-([a-zA-Z]+)-([\d/]+)\)#").unwrap();
@@ -131,7 +131,6 @@ impl NetworkOperatingSystemClient {
));
}
}
}
err
}
@@ -187,7 +186,7 @@ impl BrocadeClient for NetworkOperatingSystemClient {
async fn configure_interfaces(
&self,
interfaces: &Vec<(String, PortOperatingMode)>,
interfaces: &[(String, PortOperatingMode)],
) -> Result<(), Error> {
info!("[Brocade] Configuring {} interface(s)...", interfaces.len());

View File

@@ -35,7 +35,6 @@ impl SshOptions {
..Default::default()
},
port,
..Default::default()
}
}
@@ -47,7 +46,6 @@ impl SshOptions {
..Default::default()
},
port,
..Default::default()
}
}
}
@@ -72,8 +70,10 @@ pub async fn try_init_client(
ip: &std::net::IpAddr,
base_options: &BrocadeOptions,
) -> Result<BrocadeOptions, Error> {
let mut default = SshOptions::default();
default.port = base_options.ssh.port;
let default = SshOptions {
port: base_options.ssh.port,
..Default::default()
};
let ssh_options = vec![
default,
SshOptions::ecdhsa_sha2_nistp256(base_options.ssh.port),

127
docs/ARCHITECTURE.md Normal file
View File

@@ -0,0 +1,127 @@
# Architecture
Starting point for a human-readable architecture overview of Harmony.
The `docs/` directory has multiple overlapping documents
(`concepts.md`, `architecture-challenges.md`, `cyborg-metaphor.md`,
the `concepts/` subdirectory, ADRs under `docs/adr/`, the in-repo
`CLAUDE.md`). Cohesion work is scheduled for a follow-up PR — this
file is the new front door and the placeholder that work will
build from.
## What Harmony is
An orchestration framework for **decentralized micro datacenters**:
small computing clusters deployed in homes, offices, and community
spaces instead of hyperscaler facilities. The framework's goal is
to make infrastructure-as-code **compile-time-safe** — invalid
configurations become Rust compile errors, not 3AM YAML
surprises.
Not a wrapper around existing tools. A single Rust codebase that
replaces Terraform/Ansible/Helm in its target domain by making the
Rust type system the configuration language.
## The framework primer
The Score-Topology-Interpret pattern, the hexagonal architecture,
the module layout, and the conventions are all documented in
`CLAUDE.md` at the repo root (also available as `AGENTS.md`). That
file is kept current as the canonical entry point. Read it first.
Key ADRs that lock the foundational decisions:
- **ADR-001** — Rust chosen for type system + refactoring safety.
- **ADR-002** — Hexagonal architecture; domain isolated from
adapters.
- **ADR-003** — Infrastructure abstractions at the domain level,
not the provider level (no vendor lock-in).
- **ADR-005** — Real Rust DSL over YAML/HCL.
- **ADR-009** — Helm charts inflate into vanilla K8s YAML and
flow through the Score pipeline.
- **ADR-015** — Higher-order topologies via blanket trait impls.
- **ADR-016** — Agent-based architecture with NATS JetStream for
the global mesh.
- **ADR-020** — Unified config + secret management.
- **ADR-023** — Deploy architecture: Scores everywhere (including
tests), per-app `*-deploy` crates, deploy blocks on smoke-test,
topologies are compile-time.
The full ADR set lives under `docs/adr/`.
## Why Harmony (the framework choice)
Three load-bearing reasons that shape every other decision:
1. **The compiler is the validator.** Existing IaC tools validate
at runtime, after a deploy has already been kicked off. Harmony
validates at `cargo check`. The cost of a bad configuration
drops from "1 AM page" to "red squiggle in your editor."
2. **Decentralized by design.** The target deployment surface is
thousands of small clusters in homes, offices, partner sites,
field-deployed devices — not three hyperscaler regions. The
framework's primitives reflect that: topologies are
parameterized over physical placement, the agent mesh is
NATS-based with strict-ordered supercluster semantics, and the
capability traits never assume a centralized control plane.
3. **The team is its own largest customer.** NationTech runs
multiple OKD clusters already and uses Harmony to manage them.
Every dogfooded primitive is a primitive that's been pressure-
tested against real operational pain before it ships to
external customers.
## Why custom over k3s + ArgoCD (the fleet-platform choice)
A specific instance of the framework-choice reasoning, decided
during the fleet platform v0 work:
- **End-customer engineers are mechanical / electrical /
chemical, not Kubernetes-literate.** A k3s device forces them
to learn `kubectl` / CRDs / CNI. A single Rust binary plus
`podman` is debuggable with `systemctl`, `journalctl`, `ps`
tools they already use daily.
- **The platform bet is strategic, not technical.** Building a
custom platform on the "no vendor lock-in, decentralized,
open-source" positioning differentiates NationTech as a platform
company; an ArgoCD-on-k3s integration positions it as an
integration shop on someone else's runtime.
- **NATS is a coordination fabric, not a queue.** Federation
across regions, strict ordering across the supercluster, and
the "operator in multiple clusters, deployments coming from
everywhere" topology all depend on this choice. ArgoCD doesn't
federate naturally; that's a fundamental shape problem, not a
feature gap.
- **Harmony's daemon-mode `Score::interpret()` is already
production**, running CNPG PostgreSQL failover today via
`harmony_agent`. The fleet agent is the same pattern at a
smaller scale.
## Decision hierarchy when contributing
When the framework is silent on a question, resolve in this
order:
1. **Does this preserve the compile-time-safety guarantee?** If
the answer involves "we'll validate it at runtime," reach for
a type instead.
2. **Does this preserve a capability boundary?** Capability traits
(`DnsServer`, `LoadBalancer`, `IdentityProvider`, …) are the
seam between domain and adapters. If unsure, favor the
boundary.
3. **Is this in the smallest possible PR?** Two ~200-line PRs
beat one 400-line PR. ADR-002 placement and convention rules
live in `CLAUDE.md`.
4. **Would this introduce a string where a type would do?** Pull
the type. The `ScoreEnvelope` mistake (a string-wrapped
discriminator that re-implemented `serde` tagged enums by
hand) is the canonical anti-pattern.
5. **Is this aligned with the existing module layout?** Use the
existing patterns (`*-deploy` crates per ADR-023,
`harmony/src/modules/<thing>/` for framework primitives).
Don't invent placement; ask if you can't fit the change into
the current shape.
If after all of the above the answer is still unclear, surface
the question in a small ADR draft under `docs/adr/drafts/`
rather than guessing in code.

View File

@@ -23,6 +23,7 @@
- [Writing a Score](./guides/writing-a-score.md)
- [Writing a Topology](./guides/writing-a-topology.md)
- [Adding Capabilities](./guides/adding-capabilities.md)
- [Web Authentication and CSRF Security](./guides/web-auth-security.md)
## Configuration

View File

@@ -0,0 +1,356 @@
# Architecture Decision Record: Fleet Agent Upgrade Procedure
Initial Author: Jean-Gabriel Gill-Couture
Initial Date: 2026-05-06
Last Updated Date: 2026-05-06
## Status
Accepted (design); implementation deferred — see roadmap
`ROADMAP/fleet_platform/v0_2_plan.md`.
## Context
The v0.1 fleet agent ships as a single static aarch64-musl binary
sitting at `/usr/local/bin/fleet-agent`, started by a systemd
unit dropped at install time by `FleetDeviceSetupScore`. Every
managed device runs one. Today the only "upgrade procedure" is
`scp` + `systemctl restart` — fine for the bring-up phase, not
fine once paying customers run real workloads on the fleet.
Without a defined upgrade story we cannot ship a v0.1 agent into
the field. The contract a customer needs is:
1. New agent versions can be rolled out without operator-side
manual intervention per device.
2. Workloads currently reconciled on the device do not flap
(start/stop/start) during the upgrade.
3. A failed new version automatically reverts to the last
known-good version, on its own, without page.
4. The operator (the central one in the cluster, not the human)
sees what version each device is running, can drive a target
version per device, and observes upgrade progress.
The agent itself is the only process on-device with full context
on what's reconciling and what's healthy. Anything centralized
(Ansible-pushed, OS-package-managed) doesn't have that signal.
The agent must be the one driving its own swap, with the
operator coordinating but not executing.
## Decision
We adopt a **K8s rolling-updateshape upgrade**, single-host,
agent-driven, operator-coordinated. Old version stays alive until
new is verified healthy from the operator's vantage point; only
then does the operator signal old to exit. **No version is ever
erased from disk.** Symlinks select the active binary.
### On-disk layout
```
/usr/bin/fleet-agent-v0.1.1 ← versioned binary, immutable
/usr/bin/fleet-agent-v0.1.2 ← versioned binary, immutable
/usr/bin/fleet-agent-v0.1.3 ← versioned binary, immutable
/usr/local/bin/fleet-agent → symlink to current versioned binary
```
- Versioned binaries are the source of truth. They live forever
(history-preserving, no GC). Disk use is bounded by humans
cleaning up explicitly, not by the upgrade procedure.
- The systemd unit installed by `FleetDeviceSetupScore` references
`/usr/local/bin/fleet-agent`. Symlink swap is the cutover
primitive — atomic on POSIX (`renameat2`).
- Naming convention: exact crate version string, `v<MAJOR>.<MINOR>.<PATCH>`,
no build metadata in the path. Build metadata lives in the agent's
reported version string but not in the file path (otherwise you
can't predict the path from a version pin).
### State machine on the agent side
```
Running ──[operator publishes desired_version != current]──▶ Draining
▲ │
│ │
│ ▼
│ Staging
│ │
│ ▼
│ Verifying
│ │
│ ▼
│ ┌──────────────────────────────[smoke fails]────────┤
│ │ │
│ [revert: symlink → previous, ▼
│ stay at current] Cutover-Ready
│ │
│ [Cutover-Ready persists ≥ T_OPERATOR_OBSERVE │
│ until operator publishes stop_signal] │
│ ▼
└────────────────────────────────────────────────────── Stopping
(exit)
```
States in detail:
- **Running** — normal reconcile loop.
- **Draining** — refuses to start new podman services for new
desired-state writes. In-flight reconciles complete and report
their final state to the operator. Existing services stay
running. Heartbeat continues. State is published as part of the
agent's heartbeat (`agent_state: "draining"`).
- **Staging** — fetch new versioned binary URL (signed,
hash-pinned), verify, place at `/usr/bin/fleet-agent-v<new>`.
Set chmod, ownership. No other state mutation.
- **Verifying** — invoke the staged binary with `--self-test`. New
binary parses its config, opens NATS connection, validates JWT,
prints version + "ok", exits 0. **No state mutation.** Catches
obvious breakage (missing dependency, wrong arch, corrupt
download, broken config-schema migration) before swap.
- **Cutover-Ready** — staged binary is healthy. Old agent updates
the symlink atomically:
```
ln -sfn /usr/bin/fleet-agent-v0.1.2 /usr/local/bin/fleet-agent.new
mv -T /usr/local/bin/fleet-agent.new /usr/local/bin/fleet-agent
```
Old agent then `systemctl start fleet-agent-v0.1.2.service` (a
parallel transient service, not `systemctl restart` of itself).
Both old and new are now running. New publishes its first
heartbeat with `version=v0.1.2`. Operator sees two heartbeats
per device for a brief window.
- **Stopping** — operator publishes a stop signal to the old
agent's NATS subject. Old agent receives, gracefully exits.
systemd's `Restart=on-failure` does *not* trigger because the
exit is `success` (rc=0, code-path-driven). New agent is now
the only one running. systemd unit is reconfigured to point at
the *current* symlink target on its next restart, but that's
cosmetic — the symlink already does the job.
### Operator-side coordination
The operator is the only source of truth for "what version should
this device run". One new field per device, two new subjects.
**New on `Device` CR / KV `device-info`:**
- `current_version` — what the agent is running right now.
Reported in heartbeat; reflected to the CR.
- `desired_version` — what the operator wants the agent to run.
Set by operator-side logic (default: latest published; eventually
canary / %-based).
**New NATS subjects (per-device, scoped by callout permissions):**
- `device-cmd.<device_id>.upgrade-stop` — operator → old agent.
Payload: `{"reason": "...", "deadline_ms": ...}`. Sent only after
operator has observed a heartbeat from the new version with
`current_version == desired_version` AND `agent_state == "running"`.
- `device-state.<device_id>.upgrade` — agent → operator. Status
events: `staging`, `verifying`, `cutover-ready`, `failed`, `done`.
Drives `Device.status.upgrade.{phase, last_error, ...}`.
The operator only emits `upgrade-stop` after it has independently
verified the new agent is up. **Old agent does not stop itself
based on its own observations.** This is the load-bearing
property: the same operator that disagreed with the upgrade
("haven't seen new version's heartbeat") would never have sent
the stop signal. Single-source-of-truth handoff.
### Failure modes and rollback
- **Staging fails (download / hash mismatch):** Agent stays in
`Running`. Reports `phase: "failed"`, `last_error`. Operator
sees the failure; can fix the artifact + retry by re-publishing
the same desired_version (any change to desired_version
re-triggers the state machine).
- **Verifying fails (smoke test rc != 0):** Agent stays in
`Running`. Reports failure. Staged binary stays on disk for
inspection. Operator can collect, debug, ship a fixed version.
- **Cutover-ready, but new agent never publishes a heartbeat
with the new version within T_HEARTBEAT_TIMEOUT (suggested
60s):** Old agent reverts the symlink, stops the parallel
systemd transient service, transitions back to Running with
the old version. Reports `failed`. Same recovery path.
- **Operator never sends stop signal (e.g., operator-side
outage):** Old agent stays in Cutover-Ready indefinitely. Both
agents are running; only the new one is publishing as the
active one (the old one's writes are gated on its state). This
is expensive (2× resource use) but safe — the operator is the
authoritative coordinator and any other behavior would risk
losing both agents at once.
- **Both agents alive but new agent crashes:** systemd's
`Restart=on-failure` on the new agent's transient unit retries.
If it can't come back, the operator never sends the stop signal,
the old agent stays Cutover-Ready, and a human investigates.
The fleet keeps working on the old version — the rollback is
implicit.
- **Operator publishes an older `desired_version`:** Reverse
rollout. Same mechanism, just with old/new swapped. The "new"
binary is older, but the procedure is identical. The fact that
no version is ever GC'd is what makes this work.
### What this isn't
- **Not fleet-wide.** Per-device. Fleet-wide canary / %-based
rollouts are operator-side orchestration **on top of** this
primitive. The operator would publish `desired_version` to a
rolling subset of devices and watch heartbeats. Out of scope
for v0.2 — single-device upgrade is sufficient for a 100-Pi
fleet which is more than the 12-month customer roadmap.
- **Not blue/green of the entire OS.** We swap one userspace
binary. The OS, podman, the systemd unit text, the kernel — all
unchanged. Out of scope.
- **Not a package manager.** Versioned binaries land at fixed
paths because we control them. apt / dpkg / OSTree are
orthogonal and not in the loop.
## Rationale
- **No version ever erased.** Trivializes rollback (the previous
binary is a `ln -sfn` away). Simplifies the failure tree:
every "what if" branch resolves to "old still on disk". Disk
cost on aarch64-musl is ~510 MB per version — at 12 versions
/ year, that's 100 MB after a decade of upgrades. Negligible
compared to Pi storage.
- **Symlink swap as cutover.** POSIX-atomic. No daemon state.
Cheap to revert. Compatible with systemd unit references that
point at a stable path.
- **Old verifies new, then reports up.** This is the load-bearing
property: it places the verification at the agent (which has
the only complete view of its own runtime state) but the
*commitment* at the operator (which is the only thing safe to
trust as the cluster-wide source of truth). Either side alone
can fail safe; only consensus advances the upgrade.
- **Operator-driven stop, not agent self-stop.** A self-stopping
agent could decide to exit before the operator agrees, leaving
the cluster blind. Forcing the stop through the operator means
any disagreement keeps the old agent alive — which is the
desired bias.
- **Drains in-flight work first.** Mirrors K8s pod-shutdown
semantics. A workload reconciling at the moment of swap
finishes its current step, reports state, then queues. New
agent picks up the queue once it's the active version. No
observable flap on the workload.
- **Heartbeat-driven version reporting.** The agent already
publishes heartbeats; adding the version field is one line.
No new transport.
## Consequences
**Pros:**
- Bounded blast radius per upgrade (one device).
- Rollback is the same code path as upgrade — no special-case
bug class.
- Operator's view is monotonic: heartbeats with versions are
immutable history; there's no "did the upgrade really happen"
state.
- Old agent never decides to exit on its own. The most dangerous
failure mode in self-upgrading software (premature exit) is
designed out.
- Compatible with eventual fleet-wide rollouts (canary, %-based)
which become operator-side orchestration on top of this
primitive.
**Cons:**
- Briefly runs two agents per device (Cutover-Ready window).
Memory and connection-count both ~2× during that window.
Acceptable for the upgrade duration (typically <60s).
- Requires reliable connectivity between agent and operator to
complete the handoff. A device whose NATS link fails mid-
upgrade stays in Cutover-Ready until link recovers.
- Disk grows monotonically with version count. Bounded by human
cleanup. We do not GC.
- New NATS subjects, new heartbeat fields, new `Device.status`
fields. Schema bump that operators-in-the-field need to handle
(the operator must understand "old agent reporting no version
field" as `version: unknown`, not crash).
## Alternatives considered
1. **OS-package upgrade (apt / dpkg / OSTree).** *Pros:* zero
custom code, standard toolchain, GPG-signed.
*Cons:* Loses the "agent verifies the new agent before swap"
property. apt's restart hook flips the symlink and `systemctl
restart`s; if the new binary is broken, the device is bricked
until human intervention. Doesn't drain in-flight work. Doesn't
know about NATS-managed pause states. Couples the upgrade
schedule to the distro's repo, not to the cluster operator's
intent. Rejected.
2. **Pull-from-OCI-registry on each agent restart.** *Pros:* same
primitive as podman / kube node-image-rotation.
*Cons:* Coupling to a registry the device must reach — many
customer fleets are on private subnets without registry
access. Would mean shipping a registry mirror per fleet. Adds
a dependency for a problem we can solve with a signed binary
on a CDN.
3. **Two systemd units, blue/green at the unit level.**
`fleet-agent-v0.1.1.service` and `fleet-agent-v0.1.2.service`,
ratchet via systemctl enable/disable. *Pros:* no symlink dance.
*Cons:* duplicates a lot of unit-file content; harder to
reason about what the "active" unit is (you have to ask
systemd, not `readlink`); doesn't compose well with the
`ExecStart=/usr/local/bin/fleet-agent` line we already ship.
Symlink swap is the lighter primitive.
4. **Self-stopping agent (no operator stop signal).** New agent
tells old agent "I'm up, you can go" via NATS. *Pros:* one
fewer subject.
*Cons:* The new agent is also the agent we're least sure of
— putting it in charge of the old one's lifecycle inverts the
trust model. If the new agent has a bug that causes it to
announce ready prematurely, the cluster goes blind. The
operator path is the conservative choice.
5. **Operator-pushed binary (instead of agent-pulled).** The
operator sshes / executes a one-off command per device.
*Pros:* operator controls timing precisely.
*Cons:* Reintroduces SSH as a control plane (we just spent a
month getting rid of it for the enrollment flow). Doesn't
scale to fleets where most devices are NATted away from the
operator.
## Implementation milestones
(For a future implementer; not committed to a date here. Lives
in the v0.2+ backlog.)
1. **M1** — Versioned binary layout: builds produce
`fleet-agent-v<version>` artifacts; install Score writes them
to `/usr/bin/fleet-agent-v<version>` + creates
`/usr/local/bin/fleet-agent` symlink. Existing tests cover the
rest.
2. **M2** — Version field in heartbeat + `Device.status.current_version`
reflection on the operator side. No upgrade behavior yet.
3. **M3** — `desired_version` field on the device-info KV +
operator setter. No agent-side action yet.
4. **M4** — Agent state machine, end to end, gated by a feature
flag. Operator publishes desired_version → agent does the
dance → operator sends stop signal → done. Includes failure-
mode tests (download fail, smoke fail, heartbeat-timeout
revert).
5. **M5** — Remove the feature flag. Default-on.
6. **M6** — Operator-side rollout strategies (canary, %-based) —
only after M5 has been in production for 30 days against a
real fleet.
## Additional Notes
- Binary signing + signature verification is in scope for the
`Staging` step but the *which* signing scheme (cosign / Rekor
/ minisign) is deferred until the M1 implementation. Whatever
we pick must work on aarch64-musl Pi devices without
additional system dependencies.
- The N-versions-on-disk policy is "all of them, forever" per
the constraint above. If disk pressure becomes real on some
customer fleet, a manual GC tool can prune `/usr/bin/fleet-agent-v*`
by date — never automatic, never as part of the upgrade
itself.
- See JG's *Pour l'amour des compilateurs* talk (Botpress
Meetup, 2026-04-30) for the framing applied here:
cardinality-matched types and operator-as-coordinator are the
same idea, applied to one function and to one platform.

View File

@@ -0,0 +1,193 @@
# Architecture Decision Record: Deploy Architecture — Scores, Deploy Crates, and the E2E Contract
Initial Author: Jean-Gabriel Gill-Couture
Initial Date: 2026-05-18
Last Updated Date: 2026-05-20
## Status
Accepted. Extends the Score-Topology-Interpret pattern documented
in `CLAUDE.md` (ADR-002, ADR-003) with the *deploy* side of the
contract: what a deploy crate is, how e2e harnesses relate to
production deploys, how the CLI surface is shaped, and the
smoke-test-on-deploy semantics.
## Context
Three failure modes recur in tooling that ships infrastructure as
code, and Harmony exists in part to defeat them. This ADR locks
the deploy-time discipline that keeps them out.
1. **Manifests outside the type system.** YAML/HCL configurations
are validated at runtime, not compile time — the original
"YAML mud pit" that ADR-005 names. A Rust framework that
re-introduces raw `Deployment` / `Service` / `ConfigMap`
structs in test harnesses, examples, or CLI helpers has only
dressed up the same anti-pattern in Rust syntax: the typed
Scores get a clean sample size of one (production), and
everything else can silently diverge.
2. **Deploy logic with no canonical home.** A "how to apply
component X end-to-end" routine that lives in three places
(an example crate, a CLI subcommand, ad-hoc orchestration in
a test harness) will drift. The framework needs one address
per deployable component, and every consumer of that
component composes from there.
3. **"Applied" is not "working."** `helm install` returns
success the moment the API server accepts the manifest, and
leaves the operator to debug downstream. Harmony's whole
reason for existing is to shorten that feedback loop — a
deploy primitive that doesn't itself verify the result keeps
the loop open.
## Decision
Nine principles, grouped.
### Deployment as Scores
1. **Deploy with Scores, not handrolled manifests.** Capability
traits + compile-time bounds are the contract. No
`k8s_openapi::api::*` structs outside of `Score::interpret`
bodies. Test harnesses, examples, and CLI helpers compose
`*Score` types — they never reimplement deploys.
2. **E2E uses the same Scores as production.** Only the
`Topology` instance changes (local k3d, remote OKD,
bare-metal HA, …). A test harness is a `Score`-composer
running against a test Topology. If e2e needs something prod
doesn't, add the knob to the Score — don't fork the manifest
in the harness.
3. **One Score per deployable component.** Composition is the
user-facing primitive: a `MyAppScore` pulls in
`PostgresScore`, `HttpServerScore`, etc. Don't build
monolithic "deploy everything" Scores. Each primitive Score
must be independently testable and substitutable.
4. **Deploy returns only after smoke-test success.** Every Score
owns a readiness + smoke-test contract that the framework
runs and blocks on. Convergence errors must be actionable, in
the style of `rustc`'s error messages, not "exit code 1 from
helm". The implementation shape of the smoke-test contract is
deferred (see §Out of scope); the principle is locked in.
### Where deploy logic lives
5. **Deploy logic lives in a `*-deploy` crate** that depends on
both `harmony` and the runtime crate it deploys. Runtime
binaries (the artefacts that ship to constrained devices and
to in-cluster pods) stay free of the `harmony` dep. One
deploy crate per app area, holding every component-Score for
that app plus the `main.rs` that drives them via
`harmony_cli`. The same crate is the single import for any
consumer — CLI, e2e harness, future control planes.
`harmony` core stays focused on framework primitives and
reusable provider modules (DNS, K8s resources, Helm charts,
NATS, PostgreSQL, …). It is not a parking lot for
application-specific deploy Scores.
### Topology selection
6. **Topologies are compile-time, selected at runtime.** A
deploy binary statically lists its supported topologies; the
operator picks one at deploy time. Adding a brand-new
topology backend (AWS, GCP, …) is a rebuild — acceptable
cost, because dynamic-discovery topologies like
`K8sAnywhereTopology` already cover "any physical place that
runs k8s". No `Box<dyn Topology>` plugin loaders.
### Framework evolution
7. **Extend Scores with companions, not API changes.** New
capabilities the framework wants to attach to Scores
(planning, dry-run, observability, eventually smoke-test)
default to a *companion* type or trait that wraps a Score
rather than a new method on `Score` / `Interpret`. The base
public API stays simple. The exception is principles every
Score must honor (which may force a required method) — but
only after the principle has been validated in practice via
the companion-first iteration.
### CLI
8. **CLI: hybrid, staged.** Today (B): first-party tools ship as
separate `harmony-*` binaries built on the existing
`harmony_cli` crate. Tomorrow (C): a top-level `harmony`
binary discovers `harmony-*` plugin binaries on `$PATH`
(`kubectl`-style) so a third-party `MyAppScore` author gets
`harmony deploy my-app` for free. The plugin protocol is
deferred (see §Out of scope).
### Error handling
9. **`thiserror` almost everywhere; `anyhow` only at binary
glue.** Library code, public crate boundaries, anything a
caller might want to match on — typed errors via `thiserror`.
`anyhow` is reserved for `main.rs`-level glue where the error
is just printed.
## Out of scope (deferred, not rejected)
- **Score derive macro / deployment DSL.** Strategic intent from
day one; the framework's value-add concentrates here. Separate
design effort.
- **Score registry** (Crichton-style:
<https://willcrichton.net/rust-api-type-patterns/registries.html>).
Real itch — examples and Scores are hard to discover today.
Research + ADR first.
- **Inventory as capability-defined physical assets.** Inventory
is under-engineered today; the original idea is to represent
physical infrastructure (building → cable → switch port → MAC)
but most use cases ignore it. Decomposing inventory into a
capability set is a deep redesign.
- **Plug-in CLI discovery layer (C in principle 8).** The fix
for the "too many disconnected CLIs" cohesion problem.
Roadmap item, dedicated future effort.
- **`Application features``capabilities` relationship.** An
in-progress concept the project lead is personally unsure
about. Not resolved in this ADR.
- **Concrete smoke-test contract shape (principle 4).** Whether
smoke-test lives as a separate trait, a required method on
`Score`, a companion struct, or a typestate is open. Until
it's locked, deploy crates implement per-Score readiness
checks inside `interpret` bodies — the principle is what
travels with the Score, not yet the trait shape.
## Consequences
- New deployable components are authored as `*Score` types in a
`*-deploy` crate, not in `harmony` core. `harmony` core is
framework primitives plus reusable provider modules; it does
not accumulate application-specific deploy logic.
- Test harnesses are Score-composers. A harness that finds
itself building `Deployment` / `Service` / `ConfigMap` structs
is the signal that a Score is missing, not that the harness
needs a special path.
- Every Score owns its readiness story. Whatever shape the
smoke-test contract eventually takes, the Score is the home
for the logic — not a parallel test fixture.
- Adding a new deploy backend (a new topology) is a deploy-
binary rebuild. Dynamic loading of topologies is rejected by
this ADR, and that posture is load-bearing for the
compile-time-safety guarantees in CLAUDE.md.
- New framework-level capabilities (dry-run, observability,
smoke-test) ride in on companion types first. Only after a
companion proves out does it earn a place in the `Score` /
`Interpret` public API.
## References
- `CLAUDE.md` — Score-Topology-Interpret pattern, capability
design rules.
- `docs/adr/002-hexagonal-architecture.md` — domain/adapter split
this builds on.
- `docs/adr/005-interactive-project.md` — the original "no
YAML-mud-pit" call (Rust DSL over YAML/HCL).
- `docs/adr/009-helm-and-kustomize-handling.md` — established
pattern: external charts inflate into the same Score pipeline.
- `harmony_agent/deploy``*-deploy` crate exemplar.

View File

@@ -0,0 +1,182 @@
# Architecture Decision Record: Fleet Platform — Capability Decomposition
Initial Author: Jean-Gabriel Gill-Couture (with research by Claude)
Initial Date: 2026-05-20
Last Updated Date: 2026-05-20
## Status
**Draft — under review.** Captures the proposed shape for review;
not yet locked. If accepted, supersedes the as-built layout of
`harmony/src/modules/fleet/` documented in ADR-023's first
revision.
## Context
The fleet platform shipped under `feat/iot-walking-skeleton`
spans three concerns that today share two locations:
1. **Domain logic** — what a `FleetDevice` is, what a
`FleetDeployment` looks like, what the reconciler-contracts
wire types mean.
2. **Adapters** — concrete NATS, Zitadel, Kubernetes, Helm
integrations that drive the domain.
3. **Deploy procedures** — how to bring up the operator, agent,
NATS, Zitadel as Scores against a Topology.
Today these live in `harmony/src/modules/fleet/` (mixed), the
`harmony-reconciler-contracts` crate (wire types only), the
`harmony-fleet-deploy` crate (Scores for deploy), and the
`harmony-fleet-operator`/`harmony-fleet-agent` binaries
(runtime). The boundary between domain and adapter is not
type-level: `harmony/src/modules/fleet/setup_score.rs` for
example reaches into Zitadel, NATS, Kube, and Helm directly.
Anyone wanting to swap NATS for a different transport would
touch every fleet file.
ADR-023 already addressed the *deploy*-side of this (deploy
Scores live in `*-deploy` crates, not in `harmony` core). This
ADR proposes the *domain*-side decomposition: pull a thin
fleet-domain crate above the existing reconciler-contracts, push
provider-specific code into adapter crates, and re-direct the
deploy crate to consume the domain rather than the framework
primitives directly.
## Decision (proposed)
Five crates, layered by dependency direction:
```
harmony-reconciler-contracts (existing — wire types only)
harmony-fleet-domain (new — domain records + capability traits)
harmony-fleet-adapters-* (new — one crate per provider)
▲ (nats, zitadel, kube)
harmony-fleet-deploy (existing — bring-up Scores)
harmony-fleet-operator (existing — daemon)
harmony-fleet-agent (existing — daemon)
```
### `harmony-fleet-domain`
The domain crate. Depends only on `harmony-reconciler-contracts`
and `harmony_types`. Holds:
- **Domain records**: `FleetDevice`, `FleetDeployment`,
`FleetState`, `EnrollmentIntent`, `DeviceCredential`.
- **Capability traits**: `DeviceRegistry`,
`DesiredStatePublisher`, `ObservedStateConsumer`,
`IdentityProvider`, `AgentLifecycle`. These are the seam
between domain logic and provider-specific implementations.
### `harmony-fleet-adapters-nats`, `-zitadel`, `-kube`
One crate per provider. Each implements the capability traits
above for its specific backend:
- `nats``NatsDeviceRegistry`, `NatsDesiredStatePublisher`,
`NatsObservedStateConsumer`.
- `zitadel``ZitadelIdentityProvider`, machine-user
provisioning, JWT-bearer minting.
- `kube``KubeFleetReflector` writes `Device` and
`Deployment` CRDs as a *reflection* of domain state, not as
the source of truth. CRD types move here from
`harmony-fleet-operator`.
### `harmony-fleet-deploy`
Stays as the home for `FleetOperatorScore`, `FleetAgentScore`,
`FleetNatsScore`, `FleetCalloutScore`. Updates: imports
`harmony-fleet-domain` for types, uses
`harmony-fleet-adapters-*` to compose Scores against capability
traits rather than reaching directly into NATS/Zitadel client
crates.
### Direction of dependency
The fleet *domain* doesn't depend on the framework. The
framework's *deploy procedures* depend on the fleet's domain.
Inversion of today's direction, where `harmony::modules::fleet`
imports from `harmony_secret`, `harmony_zitadel_auth`, NATS
client crates, kube client crates, etc.
After this ADR is implemented, `harmony::modules::fleet`
disappears entirely. `harmony` core stays focused on framework
primitives.
## Open questions
These are the decision points pending review — flagged so the
review has concrete pivots:
- **Q1.** Is `IdentityProvider` the right capability name, or
should we name the two distinct concerns separately
(`DeviceCredentialMinter`, `OperatorTokenProvider`)? CLAUDE.md
rule says "if reality has two distinct concerns, two
traits."
- **Q2.** Should the `Device` CRD exist at all, or should the
agent publish to a kube `Node` (per the alternative-D
direction)? Today's mid-ground (own CRD that mirrors `Node`)
arguably the worst of both worlds.
- **Q3.** Where does `ReconcileScore`'s adjacently-tagged enum
live? It's the canonical wire seam between operator and
agent. Should sit in `harmony-reconciler-contracts` (so both
binaries import only that crate); confirm before the move.
- **Q4.** Does this redesign block the v0.1 production push, or
does it land in v0.2 alongside the agent-upgrade work
(ADR-022)? Public API churn after a customer is on it is more
expensive than a 3-day delay before they are. Recommendation:
ship the redesign first.
- **Q5.** Where do runtime tools (the `harmony-fleet` CLI plugin,
the operator's frontend) sit in the dependency graph? If they
depend on `harmony-fleet-domain` only, they build without
pulling in helm/kube/ansible at compile time — which is also
the right shape for the device-side enrollment binary
(currently feature-gated).
## Out of scope
- **Alternative D (kube-native devices).** A future v2.0
destination, not v0.1 or v0.2 work. Captured as the long-term
direction; the capability traits in this ADR are the
intentional seam that makes the migration possible later.
- **Topology decomposition.** Whether `K8sBareTopology` /
`K8sAnywhereTopology` should themselves be capability sets is a
separate concern. Tracked as a working draft at
`docs/adr/drafts/topology-proliferation.md`.
## Consequences
If accepted:
- New deployable fleet components author their Scores against
capability traits in `harmony-fleet-domain`, not against
provider clients directly. Swapping NATS for a different
transport becomes a single-crate change.
- CRD types move out of operator code and into
`harmony-fleet-adapters-kube`. Operator depends on adapter
crate; runtime binary stays slim.
- `harmony` core has no fleet code. The framework's `modules/`
directory is reserved for general-purpose primitives (DNS,
K8s, Helm, NATS, PostgreSQL, …); domain-specific code lives
in its own crate tree.
- Future fleet adapters (a different transport, a different
identity provider) are additive: one new crate, no changes to
domain or deploy.
## References
- `ROADMAP/fleet_platform/architecture_review.md` §§45 —
comparison matrix and Alternative-B rationale from which this
ADR is extracted.
- `docs/adr/023-deploy-architecture.md` — companion ADR for the
deploy-side rules. This ADR is the domain-side companion.
- `docs/adr/022-fleet-agent-upgrade.md` — the agent-upgrade
procedure, which sits cleanly on top of the
`AgentLifecycle` capability proposed here.

View File

@@ -52,6 +52,7 @@ Every ADR follows this structure:
| [019](./019-Network-bond-setup.md) | Network Bond Setup | Proposed |
| [020-1](./020-1-zitadel-openbao-secure-config-store.md) | Zitadel + OpenBao Secure Config Store | Accepted |
| [020](./020-interactive-configuration-crate.md) | Interactive Configuration Crate | Proposed |
| [022](./022-fleet-agent-upgrade.md) | Fleet Agent Upgrade Procedure | Accepted |
## Contributing

View File

@@ -0,0 +1,886 @@
# Fleet platform — architecture review
Working document for the architectural redesign of the fleet platform
before v0.1 ships to production. Started 2026-05-07.
This is a research + design document, not a plan to execute. The
output of this work is an ADR (or set of ADRs) that lock the new
shape; the v0.2 roadmap will reference whichever option we pick.
## Why now
- Three days from production. No customers depend on the API yet
→ API/UX/DX is still cheap to change. After ship, every breaking
change costs us a week of customer-coordination overhead.
- The `harmony/modules/fleet/` placement is wrong — already flagged
in code review. The reasons it ended up there are subtle (cross-
module imports of `K8sAnywhereTopology`, `HelmChartScore`,
`K8sResourceScore`, `harmony_secret`, `Topology` capability
traits). Those need to be written down before the file move,
not after.
- The plumbing — NATS + Zitadel + auth callout + operator + agent
— is sound. Highly secure, scalable by design, low resource
footprint. The redesign is about **moving code** and **better
data structures**, not rebuilding mechanisms.
- The frame from JG's *Pour l'amour des compilateurs* talk:
cardinality-matched types, "make impossible states impossible",
expressive types as the deterministic feedback loop that scales
with LLM-era code generation throughput. Apply that frame here.
## Working plan
1. **Inventory.** Map every public type, trait, score, module, and
crate that participates in the fleet domain. Markdown-bullet
shape; no diagrams.
2. **Read the room.** Pull principles from JG's talk, its
references, and harmony's existing ADRs (002 hexagonal, 003
infrastructure abstractions, 015 higher-order topologies, 016
harmony agent + global mesh, 017 NATS interconnection, 018
template hydration). Note where the existing fleet design
already follows them and where it doesn't.
3. **Identify the design problems.** Not bugs — *shape* problems.
Cardinality mismatches, leaky boundaries, "is this resolved
yet" branches, location/dependency loops.
4. **Sketch alternatives.** Three to five. At least one
conventional cleanup, at least one out-of-the-box that
reframes the domain. Compare on the same axes (cardinality,
placement, ergonomics, extensibility).
5. **Pick (or recommend) one.** Land as ADR.
This document covers steps 14. The pick happens in conversation
with JG before the ADR.
---
## §1 — Current state inventory
### §1.1 — Where the code lives
The fleet domain spans **three concerns** that today live in
**three locations**:
- **Framework-side scoring** (what runs on the operator's
workstation when they `cargo run` the install) → lives in
`harmony/src/modules/fleet/`. This is the wrong home; it's the
thing this review is about moving.
- `mod.rs` — re-exports
- `assets.rs` — Ubuntu/Debian cloud image fetchers, libvirt SSH
keypair management
- `libvirt_pool.rs` — libvirt storage pool bring-up
- `setup_score.rs` (1053 LOC, the monster) — `FleetDeviceSetupScore`,
`FleetDeviceSetupConfig`, `FleetDeviceAuth`
(TomlShared|ZitadelJwt|ZitadelEnroll), `AdminAuth`, `HostsEntry`,
`merge_hosts_file`
- `vm_score.rs``ProvisionVmScore` (libvirt VM bring-up)
- `preflight.rs``check_fleet_smoke_preflight*` (host system
checks)
- `server.rs``FleetServerScore`, `FleetServerInterpret`
(composed bring-up of Zitadel + NATS + callout + operator)
- `operator/`
- `mod.rs`, `score.rs``FleetOperatorScore`,
`FleetOperatorInterpret` (operator helm install)
- `chart.rs` (453 LOC) — chart rendering (`ChartOptions`,
`OperatorCredentials`, `build_chart`, `operator_secret`,
`build_operator_deployment`, `build_cluster_role`)
- `crd.rs``Deployment` CRD type (`DeploymentSpec`,
`Rollout`, `RolloutStrategy`, `DeploymentStatus`,
`DeploymentAggregate`, `AggregateLastError`); `Device` CRD type
(`DeviceSpec`)
- **Cross-boundary wire types** (the "contract" agent and operator
both have to agree on) → lives in `harmony-reconciler-contracts/`.
- `fleet.rs``DeviceInfo`, `DeploymentState`, `HeartbeatPayload`,
`DeploymentName`, `InvalidDeploymentName`
- `kv.rs` — bucket name constants + key-builder functions
- `status.rs``Phase`, `InventorySnapshot`
- re-exports `harmony_types::id::Id`
- **Runtime binaries** (what runs in the cluster + on devices) →
lives in `fleet/`.
- `harmony-fleet-operator/` — the operator pod. `controller.rs`,
`device_reconciler.rs`, `fleet_aggregator.rs` (833 LOC),
`install.rs`, `main.rs`. Pulls `Deployment`/`Device` CRDs from
`harmony::modules::fleet::operator::crd` (cross-crate import
that should give us pause).
- `harmony-fleet-agent/` — the on-device daemon. `config.rs`,
`reconciler.rs`, `fleet_publisher.rs`, `main.rs`.
- `harmony-fleet-auth/` — JWT-bearer / NATS-credentials helpers
used by both the operator AND the agent. `config.rs`,
`credentials.rs` (553 LOC). Sits between contracts and the
runtime crates.
### §1.2 — Public types, sorted by domain meaning (not location)
#### Identity & devices
- `harmony_types::id::Id` — opaque, sortable, collision-safe
identifier. Used as device id, deployment id, …
- `DeploymentName` (newtype with validation, `harmony-reconciler-contracts`)
- `DeviceInfo` — heartbeat payload that materializes into a
`Device` CR
- `DeviceSpec` — kube CRD, holds an optional `InventorySnapshot`
- `InventorySnapshot` — hardware/OS facts published once at
registration
#### Deployment desired-state
- `DeploymentSpec` — kube CRD: `target_selector: LabelSelector`,
`score: ReconcileScore`, `rollout: Rollout`
- `ReconcileScore` (in `harmony::modules::podman`, re-exported
from `harmony::modules::fleet::operator::crd`) — externally-tagged
enum, today only `PodmanV0(PodmanV0Score)`
- `PodmanV0Score`, `PodmanService`, `EnvVar`, `VolumeMount`,
`RestartPolicy`
- `Rollout`, `RolloutStrategy::Immediate`
#### Deployment observed-state
- `DeploymentState` — what the agent publishes per device per
deployment after reconcile
- `DeploymentStatus` (kube CRD) — operator-side rollup of all
device states for one Deployment CR
- `DeploymentAggregate` — counts (matched, succeeded, failed,
pending) + `last_error: Option<AggregateLastError>`
- `Phase``Pending | Running | Failed`
#### Authentication / identity provider
- `FleetDeviceAuth` — sum type with `TomlShared | ZitadelJwt |
ZitadelEnroll`. **The `ZitadelEnroll` arm carries
unresolved-state — admin credentials that must be turned into a
device JSON key at execute time. Mixes resolved and unresolved
states in one type, which is the cardinality bug we keep hitting.**
- `AdminAuth` — `Sso { client_id } | Token(String)` (used inside
`ZitadelEnroll`)
- `CredentialsSection` — TOML-on-disk shape (in
`harmony-fleet-auth`, parallel to `FleetDeviceAuth`)
- `CredentialSource` — runtime credential factory
- `NatsCredential` — what async-nats actually consumes
- `MachineKeyFile`, `CachedToken`
#### Setup procedures (Scores)
- `FleetDeviceSetupScore` (`FleetDeviceSetupConfig`) — the workhorse:
installs podman, drops the agent binary, drops the credentials
TOML, drops the keyfile, brings up the systemd unit.
- `FleetServerScore` — orchestrates Zitadel install + identity
setup + NATS install + callout install + operator install. Wraps
five other scores.
- `FleetOperatorScore` — operator helm chart render + install + the
credentials Secret apply.
- `ProvisionVmScore` — libvirt VM bring-up. Used by VM rehearsals.
- (External, not in fleet/) `ZitadelScore`, `ZitadelSetupScore`,
`NatsK8sScore`, `NatsAuthCalloutScore` — all consumed by the
composed install.
#### Operator-internal types
- `FleetState`, `SharedFleetState`, `DeploymentKey`, `DevicePair`,
`CachedDeployment`, `Context`, `Error` (the controller's local
error type), `selector_matches`, `apply_state`, `drop_state`,
`compute_aggregate`
#### Agent-internal types
- `AgentConfig`, `AgentSection`, `NatsSection`, `CredentialsSection`
- `FleetPublisher`, `Reconciler`
#### Fleet plumbing for development
- `FleetSshKeypair`, the cloud-image consts, `HarmonyFleetPool`,
`merge_hosts_file`, `HostsEntry`, `check_fleet_smoke_preflight*`
#### NATS subjects + KV buckets (the wire seam)
- `BUCKET_DESIRED_STATE` = `"desired-state"`
- `BUCKET_DEVICE_INFO` = `"device-info"`
- `BUCKET_DEVICE_STATE` = `"device-state"`
- `BUCKET_DEVICE_HEARTBEAT` = `"device-heartbeat"`
- Key builders: `desired_state_key(device_id, deployment_name)`,
`device_info_key(device_id)`, `device_state_key(device_id,
deployment_name)`, `device_heartbeat_key(device_id)`
### §1.3 — Concept clusters
When you squint at the inventory, the domain falls into **five
clusters**:
1. **Identity** — who is this device, who is this deployment, who
is the operator, what auth do they have.
2. **Desired state** — what should be running where.
3. **Observed state** — what is actually running where.
4. **Setup** — bringing all this into existence on a fresh
cluster + fresh device.
5. **Plumbing** — the NATS/kube/Zitadel mechanisms that make 14
work.
The current code does not cleanly separate these. Examples:
- `setup_score.rs` mixes **Setup** (drop binary, run systemd) with
**Identity** (`FleetDeviceAuth`). 1053 LOC.
- `FleetDeviceAuth` mixes resolved-Identity (`ZitadelJwt` —
here's a key) with Setup-time-Identity-resolution-intent
(`ZitadelEnroll` — here's how to mint a key).
- The chart-render helpers (`build_operator_deployment`, etc.) are
`pub` from `harmony::modules::fleet::operator::chart` so the
composed-install scores can pluck the secret out before helm
install. Plumbing leaking through Setup.
- `harmony::modules::fleet::operator::crd::DeploymentSpec` is the
CRD definition AND it's the type the operator daemon imports to
reconcile. Cross-crate import from a runtime crate
(`harmony-fleet-operator`) into a framework crate (`harmony`).
This is the placement bug.
### §1.4 — The shape problem in one diagram (text)
```
framework/operator workstation
harmony::modules::fleet ──┤ Scores: FleetServerScore, FleetDeviceSetupScore,
│ FleetOperatorScore, ProvisionVmScore
│ CRD types: Deployment, Device, DeploymentSpec, ...
│ Chart rendering helpers (operator/chart.rs)
harmony-reconciler-contracts ── wire types: DeviceInfo, DeploymentState,
│ HeartbeatPayload, KV constants
│ ▲ ▲
│ │ │
│ │ imports imports│
│ │ │
fleet/harmony-fleet-agent fleet/harmony-fleet-operator
▲ ▲
│ │
│ ALSO imports ALSO imports│
│ from harmony::modules:: from harmony::modules::
│ podman (PodmanV0Score) fleet::operator::crd
```
Two problematic edges:
1. `harmony-fleet-operator` imports `harmony::modules::fleet::operator::crd::Deployment`. The runtime daemon depends on the framework crate just for CRD type definitions.
2. `harmony-fleet-agent` imports `harmony::modules::podman::{PodmanV0Score, PodmanTopology, ReconcileScore}`. The agent depends on the framework crate's *podman module* for the score it deserializes off the wire.
Both edges should run *through* `harmony-reconciler-contracts`, not around it. That's the placement bug surfaced.
---
## §2 — Theory review
### §2.1 — From the talk
Pulling the load-bearing principles, ranked by relevance to this
redesign:
1. **Cardinality matters.** Types should match the cardinality of
the real-world concept. `&str` for "primary color" admits
infinite invalid inputs; `enum { Red, Yellow, Blue }` admits
exactly three. Friction is proportional to mismatch.
2. **Make impossible states impossible.** Don't comment the
constraint, code it. Push runtime errors to the design phase.
3. **Representations matter.** Same data, different shapes ↔
different operations are cheap. Roman numerals ↔ addition; Arabic
↔ multiplication. "An API is a computational representation of
real-world concepts."
4. **The compiler is a deterministic feedback channel.** In an era
when LLMs generate code at 510K LOC/day, the only sensor that
keeps up runs in milliseconds and is deterministic. Lean on it.
5. **Strong types reduce code volume + test boilerplate + token
waste + review burden + CI time + production incidents** — and
*increase* refactoring confidence and velocity-over-time. The
bet is asymmetric.
### §2.2 — From the references
Grouping by what they imply for *this* redesign:
#### Will Crichton — *Type-Driven API Design* + *Rust API Type Patterns*
- **Typestate.** Encode "phase of an operation" in the type
parameter. A `ProgressBar<Bounded>` exposes `.with_eta()`; a
`ProgressBar<Unbounded>` doesn't. The contradictory call doesn't
compile.
- Direct application: **`FleetDeviceAuth` mixes phases.** The
`ZitadelEnroll` arm is unresolved, the `ZitadelJwt` arm is
resolved, the `TomlShared` arm doesn't even need resolution. A
typestate would model these as distinct types; only one of them
has `agent.write_to_disk()`.
#### Richard Feldman — *Making Impossible States Impossible*
- Slogan-as-tool. Look at every `Option<T>` and ask *"can two of
these be inconsistent at once?"* If yes, that's an impossible
state — refactor.
- Direct application: `FleetDeviceSetupConfig` has `auth:
FleetDeviceAuth` AND `agent_binary_path: PathBuf`. Today nothing
prevents `auth = TomlShared` (no Zitadel) with
`agent_binary_path` pointing at the wrong-arch binary. We could
encode the agent binary's target arch as a typestate parameter
and refuse to deploy to a device with a known-different arch
inventory.
#### Sandy Maguire — *Protos Are Wrong*
- Protocol buffers throw away information real type systems
preserve. Sum types, exhaustiveness, parametric polymorphism,
Maybe/Result — protos can't express any of them precisely. The
"loose contract" sells you weak invariants.
- Direct application: `harmony-reconciler-contracts` is JSON-shaped
at the wire (matched on `type` tag for `ReconcileScore`).
We're already paying the proto-class tax: any new variant
requires both ends to know about it; the wire format doesn't
enforce a schema; old agents see new variants as parse errors.
This is an honest constraint — wire formats need to be permissive
by design — but it argues for keeping the **wire types small and
obviously evolvable** while letting in-memory types be
cardinality-matched.
#### Sean Goedecke — *Invalid States*
- The skeptic's case: making impossible states impossible *can be
over-applied*. Sometimes a `String` is the right cardinality
even when an enum exists, because the enum binds you to a
closed world.
- Direct application: **Don't make `device_id` a closed enum.**
The newtype + RFC1123 validation we just added is the right
cardinality match: it's a string-like, but only valid strings.
Over-modeling would have us build `enum DeviceId {
Pi(PiSerial), Vm(VmName), …}` — closed world, breaks first time
a customer plugs in an x86 box.
- Useful guardrail: **type-driven** ≠ **type-everything**. The
question to ask each time is "what's the cardinality of this
concept in reality" — not "can I model this".
#### Martin Fowler — *Harness Engineering* (April 2026)
- Computational sensors (compilers, type checkers, linters) over
inferential ones (tests, code review). Compiler runs on every
change; tests don't.
- Direct application: prefer compiler-checked invariants over
doc-comment invariants. If the docs say "this Score's `auth`
field must be resolved at the call site of `execute()`", the
compiler should enforce it.
### §2.3 — From harmony's own ADRs
Reading the existing ADRs *as design language already in use* —
what vocabulary should the new fleet shape stay consistent with?
#### ADR-002 (hexagonal architecture)
- "Domain isolated from adapters." Domain types own the
vocabulary; adapters (k8s client, NATS, helm) translate at the
edge.
- **Implication for fleet:** the *domain* is identity + desired
state + observed state. The *adapters* are NATS-KV, kube-CRD,
helm-chart, ansible-over-SSH. The current
`harmony::modules::fleet` mixes both. Pulling adapters out is the
refactor.
#### ADR-003 (infrastructure abstractions)
- "Abstractions at domain level, not provider level. `DnsServer`
not `OPNsenseDns`."
- **Implication for fleet:** capability traits like
`DeviceRegistry`, `DesiredStatePublisher`, `ObservedStateConsumer`
— each a standard infrastructure need that NATS-KV happens to
fulfill today, that another transport (gRPC streaming, MQTT,
Redis streams) could fulfill tomorrow.
#### ADR-015 (higher-order topologies)
- Higher-order topologies (`FailoverTopology<T>`,
`DecentralizedTopology<T>`) compose via blanket trait impls.
`T: PostgreSQL` ⇒ `FailoverTopology<T>: PostgreSQL`. Zero
boilerplate.
- **Implication for fleet:** `FleetTopology<T>` could compose with
a base `K8sTopology<T>` rather than being a parallel concept.
"A fleet is a thing that is *both* a kube cluster *and* a
device registry."
#### ADR-016 (Harmony Agent + Global Mesh)
- Agents are processes that observe + reconcile per a desired
state published into a NATS mesh. Mesh is the reliable hop;
agents are stateless processors at the edge.
- **Implication for fleet:** the IoT fleet is a *specialization*
of the agent + mesh ADR — devices are agents, the operator is
a coordinator. The fleet domain types should fit ADR-016's
vocabulary, not invent a parallel one.
#### ADR-017 (NATS clusters interconnection)
- Trust topology: per-cluster account isolation, gateway-mediated
cross-cluster traffic. Per-device permissions are a
specialization of per-account.
- **Implication for fleet:** the auth callout's per-device permission
templates should compose with the cluster-interconnection
account model — currently they're treated as orthogonal, which
is fine until we actually cross fleets.
#### ADR-018 (template hydration)
- Hydrating templates at the edge of the framework, not in the
middle. Same pattern as our generated chart YAML: render once,
apply via typed code.
- **Implication for fleet:** chart-rendering helpers
(`build_operator_deployment` et al.) are template-hydration
edges. They *should* be hidden from domain code. Today they're
`pub` — visible to consumers like `fleet_staging_install` who
reach in and grab `operator_secret(opts)`. That's adapter
leakage.
### §2.4 — Synthesis: principles for the redesign
A short list, ordered. Each line is something the new shape
should satisfy:
1. **Domain types in `harmony-reconciler-contracts` (or a sibling
crate)**, with no dependency on `harmony` framework types.
2. **Resolved types only at the API surface.** Pre-resolution
intent is a separate type, used only by the resolver.
3. **Capabilities as traits**, not concrete types. `DeviceRegistry`,
`DesiredStatePublisher`, etc. The NATS-backed impl is one of
several allowed.
4. **Closed cardinality where reality is closed; open where reality
is open.** Goedecke's check, not Feldman's.
5. **Higher-order topology, not parallel topology.** A fleet is a
`FleetTopology<T>` over a base K8s topology, not a separate
capability hierarchy.
6. **Adapters hidden behind capabilities.** Helm chart rendering,
k8s resource apply, NATS subjects — none of these surface from
the fleet's public API.
7. **No yaml in framework code paths.** Existing principle from
v0_1; keep.
8. **Keep wire types minimal + permissive.** Not because they're
the canonical model, but because they're the
evolvability seam (Maguire's protos critique applies in
reverse — *embrace* the loose contract on the wire, *reject* it
in-memory).
---
## §3 — Design problems with the current shape
Concrete issues the redesign needs to fix. Not "bugs" — *shape*
problems. Each numbered so we can refer back when comparing
alternatives.
- **P1. `harmony/modules/fleet/` is in the wrong crate.** It pulls
framework dependencies (`HelmChartScore`, `K8sResourceScore`,
`K8sAnywhereTopology`, `harmony_secret`, etc.) and the runtime
daemons import *from it*. This makes the operator/agent depend
transitively on every harmony module — including the OPNsense
XML codegen, OKD bootstrap stuff, etc. Compile times suffer; the
release surface is wrong (you can't `cargo install
harmony-fleet-operator` without all of harmony).
- **P2. `FleetDeviceAuth` mixes resolved + unresolved states.**
`ZitadelEnroll` is pre-resolution intent; `ZitadelJwt` is
post-resolution credential. A single match arm has to handle
both. The "render TOML for both" hack we wrote works but is a
symptom — the TOML for an unresolved auth should be undefined,
not "same as resolved".
- **P3. `setup_score.rs` is 1053 LOC monolith.** Eight responsibilities
in one file: ssh-vs-local connection, ansible orchestration,
systemd unit text, hosts-file merging, podman package install,
fleet-agent user provisioning, keyfile writing, agent restart.
Readability is poor; testability is per-orchestration not
per-step.
- **P4. CRD types live in framework crate.** `Deployment` and
`Device` CRDs are defined in
`harmony::modules::fleet::operator::crd`. The runtime operator
crate (`harmony-fleet-operator`) imports them from there. This
is the most visible symptom of P1.
- **P5. `ReconcileScore` polymorphism is anemic.** Today there's
exactly one variant, `PodmanV0`. The wire format is set up for
evolution but no second variant exists, and the cross-crate
import from `harmony::modules::podman` makes adding one
expensive (re-export dance).
- **P6. Adapter leakage from chart rendering.**
`build_operator_deployment`, `operator_secret`, `build_chart`
are `pub`. Consumers in `examples/` reach in to compose helm
releases by hand. Domain code should not see "what does the
operator's helm chart look like".
- **P7. Composed scores wrap composed scores wrap composed scores.**
`FleetServerScore` wraps {ZitadelScore, ZitadelSetupScore,
NatsK8sScore, NatsAuthCalloutScore, FleetOperatorScore}. Each
of those does its own k8s resource apply + helm install.
Failure modes are deep: a problem in one score's interpret
surfaces wrapped through five layers of "context()". Hard to
debug; hard to reason about ordering.
- **P8. Topology assumptions are everywhere.** Every `Score`
bound is a hand-rolled union of capability traits — `T:
Topology + HelmCommand + K8sclient + TlsRouter + 'static`. Add
a new capability and every callsite has to be updated. Higher-
order topology composition (ADR-015) would let us name "a
thing that is a fleet-capable cluster" once.
- **P9. `Id` is overloaded.** Same type for device IDs, machine
user IDs, deployment IDs, topology names. Newtype-ing each
would catch arg-order swaps at compile time.
- **P10. Configuration is a staircase.** Operator workstation has
`ZitadelClientConfig` cache file. Operator pod has env-var-from-
Secret. Agent has TOML on disk. Three different shapes for
fundamentally the same data (issuer URL, audience, key
material). Maguire's protos critique applies internally — we're
using *several* loose-contract serializations of the same
domain object.
---
## §4 — Design alternatives
Five sketches. The first three are increasingly principled
cleanups; the last two are deliberately weird, included to force
us to recognize where the *core* of the domain actually is.
For each: one paragraph of premise, the resulting top-level types,
how it answers each of P1P10 (✓ / ✗ / partial), and the
honest pros + cons.
### Alternative A — Move + thin façade (the conservative cleanup)
**Premise:** the existing types are mostly right; the location is
wrong and the façade leaks. Move `harmony/modules/fleet/` to
`fleet/harmony-fleet/`. Re-export only what's intended public.
Don't redesign types.
**Top-level types:** unchanged. `FleetDeviceSetupScore`,
`FleetServerScore`, `FleetOperatorScore`, `FleetDeviceAuth`,
`AdminAuth`, `Deployment` CRD, `Device` CRD. Same shapes, new
location.
**P1 ✓** (location fix is the goal). **P2 ✗** (auth still mixes
resolved/unresolved). **P3 ✗** (monolith preserved). **P4 ✓**
(CRDs co-located with operator). **P5 ✗**. **P6 partial** (we
can `pub(crate)` the chart helpers but the underlying coupling
remains). **P7 ✗**. **P8 ✗**. **P9 ✗**. **P10 ✗**.
**Pros:** small, safe, mechanical. Two days of work. No customer-
visible breakage. Unblocks P4 cleanup naturally.
**Cons:** doesn't actually fix the shape. We'd be back here in
six weeks. JG's review already said this isn't enough. Not the
right answer for v0.1 timing — *would* be the right answer if
we'd already shipped to two customers and couldn't break their
code.
### Alternative B — Resolved-only at boundaries + capability traits (the principled cleanup)
**Premise:** Crichton's typestate + ADR-003's domain capabilities
applied to the existing shape. Split resolved vs. unresolved
auth into separate types. Define capability traits for the
adapters. Move into the right crate. **No wholesale rewrite.**
**Top-level types:**
- New crate `harmony-fleet/` (sibling to `harmony-fleet-operator`,
-agent, -auth). Domain types live here.
- `FleetIdentity`, `FleetDevice`, `FleetDeployment` — domain
records. Plain data.
- `DeviceCredential` — *resolved* only (a JSON keyfile + issuer
URL + audience). Replaces `FleetDeviceAuth::ZitadelJwt`.
- `EnrollmentIntent` — pre-resolution. Carries `AdminAuth` and
what to mint. Method `resolve(&self) -> Result<DeviceCredential>`.
- `Score`s become small + single-responsibility:
- `EnrollDeviceScore` — runs `EnrollmentIntent::resolve` then
publishes to NATS.
- `InstallAgentScore` — drops binary + config + systemd unit.
Takes a `DeviceCredential`. Doesn't know about Zitadel.
- `InstallOperatorScore` — helm chart + Secret. Doesn't know
about devices.
- `BringUpFleetScore` — composes the above. Single layer of
composition, not five.
- Capability traits:
- `DeviceRegistry` — list/get/upsert/delete a `FleetDevice`.
Implementations: `NatsKvDeviceRegistry`,
(later) `RedisStreamsDeviceRegistry`.
- `DesiredStatePublisher`, `ObservedStateConsumer` — same
shape.
- `IdentityProvider` — mint a device credential, issue an
admin token. Today: Zitadel. Tomorrow: something else.
**P1 ✓ P2 ✓ P3 ✓** (split into 45 small Scores). **P4 ✓ P5 ✓**
(resolve in the runtime crate, contracts stay neutral).
**P6 ✓** (chart helpers `pub(crate)`, surfaced via `IdentityProvider`
+ `DeploymentReleaseManager` traits). **P7 ✓** (one composer,
not five). **P8 partial** (capability traits defined but bound
unions still get long). **P9 ✓** with newtypes. **P10 partial**
(still three on-disk shapes for credentials, but unified by
trait).
**Pros:** highest-leverage incremental redesign. Buys us most of
the principles without rebuilding plumbing. Customer-visible
breakage is contained to public API renames + import path
moves — no behavior change. Three days is realistic.
**Cons:** we still have a `Score`-shaped mental model where the
*unit of execution* is "a Score". If the right primitive turns
out to be smaller (an effect, an event, a capability call), this
choice wastes some leverage.
### Alternative C — The dataflow reframe (events in, state out)
**Premise:** the fleet platform is, in essence, a **stream
processor**. Events flow in (heartbeats, intent CR creates,
agent reconcile reports). State materializes out (Device CRs,
DeploymentAggregate counters, KV desired-state writes). Today
we model it imperatively as a series of `Score`s; the dataflow
shape is fighting that.
**Top-level types:**
- `FleetEvent` — sum type. `DeviceHeartbeat | DeviceFirstSeen |
DeploymentDesired | DeploymentObserved | DeploymentDeleted | …`
- `FleetStateSnapshot` — what the operator currently knows. Pure
data, derivable.
- `Reducer` — `(state, event) → state`. Pure function. Tests
trivially.
- `Effect` — sum type of side-effects the reducer wants done:
`WriteKv(bucket, key, value) | UpsertCr(cr) | EmitMetric(...)`.
Reducer returns `(new_state, Vec<Effect>)`.
- `EffectRunner` — adapter that performs effects. The only thing
that touches NATS / kube. One implementation per environment.
- The operator pod's main loop: `for event in stream { (state,
effects) = reduce(state, event); runner.run_all(effects) }`.
~50 lines.
**P1 ✓ P2 ✓ P3 ✓ P4 ✓ P5 ✓ P6 ✓ P7 ✓ P8 ✓** (capabilities
collapse into the `EffectRunner` trait). **P9 ✓ P10 partial**.
**Pros:** dramatically simpler operator code. Reducer is pure →
property-test-friendly. The dataflow is the platform. Aligns
with how Kafka / Materialize / Flink-class systems are
structured. Easy to add a new event type — the compiler shows
you every reducer arm to update.
**Cons:** large rewrite of the operator. Three days is
unrealistic. The current `fleet_aggregator.rs` (833 LOC) already
roughly does this but in a less disciplined shape — maybe the
incremental version of this is "make `apply_state` a real
reducer and split `compute_aggregate` into pure pieces". That's
more like Alternative B with extra discipline. The full effect-
typed version is a nice end-state but not a sprint goal.
**Cite:** Materialize's dataflow paper; Kent Beck's *Augmented
Coding* on factoring; Gergely Orosz on event-sourcing; the talk's
"good Lego bricks" framing applies — *events* are the bricks.
### Alternative D — The fleet as a **kube control plane**, period (deliberately weird)
**Premise:** strip the design to one observation. **A fleet is a
Kubernetes cluster whose Nodes happen to be devices, not
servers.** Stop modelling Devices and Deployments separately
from kube primitives. Use Kubernetes itself as the data model.
The operator is one CRD reconciler. NATS is just the transport
between the API server (in the cluster) and the device-side
kubelet-equivalent.
**Top-level types:**
- `Device` is a Node CR. Already exists; we stop wrapping it.
- `Deployment` is a `DaemonSet` (one pod per matching node) or a
`Deployment` (count: N targeted nodes). We stop inventing a
CRD; we use the standard one.
- `DeviceInfo` is the Node's `.status` (capacity, allocatable,
conditions). We stop publishing parallel data; we update
Node status from the agent's NATS messages.
- The agent on the device is a custom kubelet that speaks NATS to
the operator instead of HTTPS to the API server.
- The auth callout still exists; it gates NATS access.
- No `harmony-fleet-operator`-specific CRDs. No `Deployment` /
`Device` CRs of our own.
**P1 ✓ P2 ✓ P3 ✓ P4 N/A** (no CRDs of our own to misplace).
**P5 ✓ P6 ✓ P7 ✓ P8 ✓ P9 ✓ P10 ✓**.
**Pros:** the simplest *conceptual* answer. We stop fighting kube
+ inventing parallel concepts. Customers already understand
DaemonSets, Node selectors, and `kubectl get nodes`. The agent
becomes a known kind of thing (a kubelet variant) with shoulders
to stand on (k3s-iot, kine, virtual-kubelet projects already
prove this works).
**Cons:** *a lot* of plumbing changes. Devices need to register
as Nodes (which means either a real kubelet on each Pi, or a
virtual-kubelet façade). The agent's reconcile loop becomes
"watch a CR via NATS, render manifests, run pods" — bigger than
"watch a KV value, run podman". JetStream KV becomes redundant
with the kube API server. **Probably the right end-state for
v2.0, wrong for v0.1.** Worth noting, though, because comparing
A/B/C to D pulls out which of our current invented concepts are
load-bearing (very few — DeviceInfo is mostly just Node.status;
DeploymentAggregate is mostly just kube's
.status.observedGeneration / .status.conditions stuff).
**Cite:** virtual-kubelet, k3s-iot, KubeEdge, OpenYurt. They've
walked this path; the lessons are public.
### Alternative E — Algebra of fleets (deliberately weird, mathematical)
**Premise:** model the platform as a small algebra. A fleet is a
**set of devices** + an **assignment function** (selector → set
of deployments). Operations on fleets are set-theoretic +
function composition. Treat the API as a query language over
this algebra.
**Top-level types:**
- `Fleet` ::= `Set<Device>`. With operations: union, intersection,
filter-by-selector, partition.
- `Selector` ::= a pure predicate `Device → bool`. Built from
primitives `label("k") = "v"`, `arch = aarch64`, …, combined
with `&`, `|`, `!`.
- `Assignment` ::= `Selector → Set<Deployment>`. Pure function.
- `World` ::= `(Fleet, Assignment)`. Pure data. The operator's job
is to make reality match the World.
- `Diff(World, Reality) → Vec<Action>`. Pure function. Closed
form — given the algebra, you can prove what actions are
*necessary* and *sufficient*.
**P1P10 ✓** (in principle). **Code volume probably 30% of
current.**
**Pros:** clarity. Properties become provable: "no device gets
an unassigned deployment", "removing a label removes the
assignment", "two operators can edit independently and the merge
is well-defined" (because functions compose). The "make
impossible states impossible" principle, applied to the *fleet
shape itself*, not to individual types.
**Cons:** **almost certainly an over-fit.** The real platform has
dirty edges (devices that fail, network partitions, half-applied
state) that don't sit naturally in a pure algebra. Most teams
that go down this road end up bolting "real-world" escape hatches
back on, ending up with the original design plus extra category
theory. **Useful as a north star** for the cardinality choices,
**not as the platform's actual shape.**
**Cite:** Hillel Wayne *Using Formal Methods at Work*; Conal
Elliott on functional reactive programming; the classic "set
theory for systems people" talks.
### Comparison matrix
| | A. Move | B. Capabilities | C. Dataflow | D. Kube-native | E. Algebra |
|---|---|---|---|---|---|
| Fixes P1 (location) | ✓ | ✓ | ✓ | ✓ | ✓ |
| Fixes P2 (auth states) | ✗ | ✓ | ✓ | ✓ | ✓ |
| Fixes P3 (monolith) | ✗ | ✓ | ✓ | ✓ | ✓ |
| Fixes P4 (CRD placement) | ✓ | ✓ | ✓ | N/A | N/A |
| Fixes P5 (anemic enum) | ✗ | ✓ | ✓ | N/A | partial |
| Fixes P6 (adapter leak) | partial | ✓ | ✓ | ✓ | ✓ |
| Fixes P7 (deep wrap) | ✗ | ✓ | ✓ | ✓ | ✓ |
| Fixes P8 (trait union) | ✗ | partial | ✓ | ✓ | ✓ |
| Fixes P9 (Id overload) | ✗ | ✓ | ✓ | ✓ | ✓ |
| Fixes P10 (config staircase) | ✗ | partial | partial | ✓ | partial |
| Fits 3-day window | ✓ | ✓ (tight) | ✗ | ✗ | ✗ |
| Customer-visible breakage | low | medium | medium | very high | high |
| Risk to demo schedule | very low | low | medium | very high | high |
| Long-term ceiling | low | high | high | very high | very high |
---
## §5 — Recommendation (preliminary)
Read the matrix as: **B is the right answer for now**, with
**explicit awareness of D as the v2.0 destination**.
- A is too little. We'd be back here.
- C and E are right in shape but wrong in timing — we don't have a
week to rebuild the operator's reconcile loop, and the platform
isn't in production yet, so there's no urgent "we have to
refactor anyway" pressure.
- D is conceptually the cleanest, but a v0.1 production push
is the wrong moment to start running custom kubelets.
- B captures most of the leverage of C/D within the 3-day window,
with a clean migration path to either of them later (the
capability traits are the seam — swap the implementation, not the
callers).
**One concrete shape** to pursue under Alternative B (worth
sketching as the strawman ADR):
- New crate `harmony-fleet/` (the domain crate). Depends on
`harmony-reconciler-contracts` only.
- Domain records: `FleetDevice`, `FleetDeployment`, `FleetState`.
- Capability traits: `DeviceRegistry`, `DesiredStatePublisher`,
`ObservedStateConsumer`, `IdentityProvider`,
`AgentLifecycle`.
- `harmony-fleet-adapters-nats/` — `NatsDeviceRegistry`,
`NatsDesiredStatePublisher`, etc. NATS-specific.
- `harmony-fleet-adapters-zitadel/` — `ZitadelIdentityProvider`.
- `harmony-fleet-adapters-kube/` — `KubeFleetReflector` (writes
`Device` and `Deployment` CRs as a *reflection* of the domain
state, not as the source of truth).
- `harmony-fleet-operator/` — daemon. Wires adapters together.
- `harmony-fleet-agent/` — daemon. Wires adapters together.
- `harmony-fleet-cli/` — tomorrow's `harmony-fleet` plugin.
- `harmony/modules/fleet/` is **deleted**. The framework `harmony`
crate gets a thin `harmony::modules::fleet` *re-export only*
module that points at `harmony-fleet`. After v0.2 is shipped,
the re-export module goes away too.
CRDs (`Deployment`, `Device`) move to
`harmony-fleet-adapters-kube/` because they're a kube-specific
projection of the domain, not the domain itself. The agent
imports `harmony-fleet`'s domain types, not the CRDs.
The setup-side scores stay in `harmony` (because they need the
framework's `HelmCommand`, `K8sclient`, etc.) but they consume
`harmony-fleet`'s domain types. The fleet's *domain* doesn't
depend on the framework; the framework's *deploy procedures*
depend on the fleet's domain. Direction of dependency is the
inverse of today.
## §6 — Open questions before we lock this
These are real questions; pulling them out so JG's review has
something concrete to react to:
- **Q1.** Is `IdentityProvider` the right capability name, or is
it more honest to name it after what we actually need
(`DeviceCredentialMinter`, `OperatorTokenProvider`)? The talk
argues against generic names — if reality has two distinct
concerns, two traits.
- **Q2.** Should `Device` CRD live in adapters-kube, or should it
not exist at all (replaced by reading kube-API node info, per
alternative D)? The middle ground (own CRD that mirrors kube
Node) is what we have today, and it's the worst of both.
- **Q3.** The agent's wire-format for `ReconcileScore` —
externally tagged enum, today only `PodmanV0`. Move it to
`harmony-reconciler-contracts` (canonical wire seam) and let
*both* the agent and the operator import only that crate. This
removes the `harmony::modules::podman` cross-crate dependency.
Worth doing in any of A/B/C.
- **Q4.** Does the v0.1 prod push wait for this redesign, or does
it ship on the current shape with the redesign happening in
v0.2? Tradeoff: shipping now means committing to *some* public
API; shipping after means slipping the customer date.
Recommendation: **ship the redesign first, slip 3 days**, on
the grounds that public API churn after a customer is on it
costs more than a 3-day delay before they're on it.
- **Q5.** Where do the *runtime tools* (the `harmony-fleet` CLI
plugin, future frontend) sit in the dependency graph? If they
depend on `harmony-fleet`'s domain crate only, we can build
them without pulling in helm / kube / ansible at compile time.
This is what we want for the device-side enrollment binary too
(already feature-gated; the redesign should make the gate
unnecessary).
---
## §7 — Next steps
1. Sit with this document. Walk away from it for an hour.
2. Round-table on §3 — do P1P10 capture *the* problems, or are
we missing one?
3. Round-table on §4 — does the comparison matrix feel honest,
or is it tilted?
4. Pick one alternative as the working hypothesis.
5. Spike: take one slice through the chosen alternative
(suggested: `EnrollmentIntent::resolve` + `DeviceCredential` +
the `IdentityProvider` trait — the smallest end-to-end shape
that touches every layer). Commit it on a branch. Eyeball:
does the resulting code feel better?
6. Either: commit to the alternative as ADR-023, or back out
and try another.
This document gets updated as we go. It is NOT meant to be
locked at first draft.

View File

@@ -0,0 +1,83 @@
# Working draft: Topology proliferation
**Not an ADR yet.** A concern noted in many places that doesn't
yet have a clean answer. Parked here so it doesn't get lost.
When the answer is clear, promote to a numbered ADR under
`docs/adr/`.
## The concern
Harmony has accumulated topology structs without a clear rule
for when a *new topology* is the right answer versus when a
*capability trait on an existing topology* is the right answer:
- `K8sAnywhereTopology` — dynamic discovery, the "any place
that runs k8s" default.
- `HAClusterTopology` — bare-metal HA with redundant
firewalls/switches.
- `K8sBareTopology` — minimal topology for ad-hoc Score
execution (introduced in this branch).
- `FailoverTopology<T>` — higher-order, primary/replica.
- `DecentralizedTopology<T>` — higher-order, multi-site.
- `FirewallPairTopology`, `SwitchTopology`, …
There is no written framework rule for which path to take. The
result is a slow drift toward "every new deploy shape becomes a
new topology struct," which is the opposite of ADR-015's
zero-cost higher-order composition direction.
## Where the concern is already noted
- `ROADMAP/12-code-review-april-2026.md` task 12.6 — "topology
proliferation."
- `ROADMAP/12-code-review-april-2026.md` task 12.1 — phased
topology (`LinuxHostTopology``KvmHostTopology` after
package install). Different angle on the same shape problem:
how does a topology *gain* capabilities at runtime?
- ADR-015 ("Higher-order topologies via blanket trait impls")
argues for capability composition, but doesn't lock the rule
"prefer capabilities over new topology structs."
- ADR-023 §6 references topology as compile-time-static —
closing the door on `Box<dyn Topology>` plugins but leaving
open which axis of variability is a new type and which is a
capability impl.
## Open question
Is the right rule:
- (a) **Capability-first**: a new deploy shape adds a capability
trait to an existing topology when possible; new topology
structs are reserved for genuinely new physical-shape
categories (single-host vs. HA vs. cloud). `K8sBareTopology`
arguably should have been a constructor on
`K8sAnywhereTopology`.
- (b) **Topology-first**: every coherent deploy environment is
its own topology. `K8sBareTopology` is correct; the concern
is just naming and documentation.
- (c) **A test-driven middle ground**: capability sets are
primary, topologies are bundles of capability-set + physical
context. Re-frames the existing types but doesn't break
anything.
The fleet platform work (ADR-024 draft, capability decomposition)
points toward (a) or (c). The framework as it stands runs on
(b)-by-default.
## What's needed before this can be an ADR
- A worked example of converting one of the existing topology
structs to its capability-first equivalent (probably
`K8sBareTopology` → constructor on `K8sAnywhereTopology` with
a feature-flag capability set).
- A look at whether ADR-024's capability decomposition for the
fleet platform suggests a generalization of the same
decomposition for the framework's topology layer.
- A look at how phased topology (task 12.1) fits — phase
transition is conceptually a *gain of capabilities*, which is
hard to express in (b) but natural in (a)/(c).
## Owner
Unassigned. Next time someone touches topology code and feels the
itch, this draft is the place to add their notes.

View File

@@ -0,0 +1,189 @@
# Manual Zitadel token mint + NATS write
Operator-side recipe for talking to a callout-protected NATS by
hand: sign a JWT-bearer assertion with a Zitadel machine user's
private key, exchange it for an access token, drive `nats` CLI
commands with the token. Useful for debugging the auth chain,
poking the desired-state KV without the operator running, and
validating that a deployed callout is actually accepting what
you think it should.
Read [fleet-zitadel-faq.md](./fleet-zitadel-faq.md) first for the
underlying mechanism (RFC 7523 JWT-bearer flow, why we sign
locally, what each claim means).
## Inputs you need
Five strings:
| Input | Where to find it |
| --- | --- |
| `OIDC_ISSUER_URL` (the Zitadel base URL) | callout Deployment env: `kubectl exec -n fleet-system deploy/fleet-callout -- printenv OIDC_ISSUER_URL` |
| `project_id` (becomes the access token's `aud`) | callout Deployment env: `OIDC_AUDIENCE` |
| Machine user's `userId` | the JSON keyfile's `userId` field |
| Machine user's `keyId` | the JSON keyfile's `keyId` field |
| Private RSA key (PEM) | the JSON keyfile's `key` field |
Get the `fleet-ops` (admin role) JSON keyfile from the cache:
```bash
jq -r '.machine_keys["fleet-ops"]' \
~/.local/share/harmony/zitadel/client-config.json \
> /tmp/fleet-ops.json
jq -r '.userId' /tmp/fleet-ops.json # → user_id
jq -r '.keyId' /tmp/fleet-ops.json # → key_id
jq -r '.key' /tmp/fleet-ops.json > /tmp/fleet-ops.pem
```
The cache may drift from the deployed Zitadel state if Zitadel has
been re-seeded; **always pull `OIDC_AUDIENCE` from the running
callout**, not from the cache. The cache fix landed in commit
`f4d6fb94` but older entries can still trip you up.
## Mint script (PyJWT)
```python
# pip install PyJWT requests ← MUST be PyJWT, not the `jwt` package.
# The two share `import jwt`; `jwt` (the package) refuses raw PEM
# strings and demands an AbstractJWKBase wrapper. PyJWT takes PEM
# directly. If you ever see `TypeError: key must be an instance of
# a class implements jwt.AbstractJWKBase`, you have the wrong one.
import jwt, time, requests
# These come from the running callout + Zitadel. Don't reuse stale
# values from a checked-in note; verify against the live cluster.
OIDC_ISSUER_URL = "http://sso.fleet.local:8080"
PROJECT_ID = "371158654839160853" # = OIDC_AUDIENCE on callout
USER_ID = "..." # from machine keyfile
KEY_ID = "..." # from machine keyfile
key = open("/tmp/fleet-ops.pem").read()
now = int(time.time())
assertion = jwt.encode(
{
"iss": USER_ID,
"sub": USER_ID,
"aud": OIDC_ISSUER_URL, # for Zitadel itself, NOT the project_id
"exp": now + 60, # Zitadel rejects exp - iat > 60s
"iat": now,
},
key,
algorithm="RS256",
headers={"kid": KEY_ID}, # PyJWT spelling — `headers=`, not `optional_headers=`
)
r = requests.post(
f"{OIDC_ISSUER_URL}/oauth/v2/token",
data={
"grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
"assertion": assertion,
# Three scopes:
# openid — base OIDC
# urn:zitadel:iam:org:projects:roles — PLURAL.
# Without this, Zitadel omits the role claim and the
# callout rejects with "no authorized role in token".
# urn:zitadel:iam:org:project:id:<id>:aud — singular.
# Tells Zitadel to put <id> into the access token's
# `aud` claim, which the callout's audience check
# compares against OIDC_AUDIENCE.
"scope": (
"openid "
"urn:zitadel:iam:org:projects:roles "
f"urn:zitadel:iam:org:project:id:{PROJECT_ID}:aud"
),
},
)
r.raise_for_status()
token = r.json()["access_token"]
# Sanity check — decode without verifying signature so you can see
# what Zitadel actually emitted. If anything below is wrong, the
# callout will reject your token.
print(jwt.decode(token, options={"verify_signature": False}))
print(token)
```
Expected decoded claims (the parts the callout will check):
| Claim | What it should be | Why |
| --- | --- | --- |
| `iss` | `OIDC_ISSUER_URL` (byte-equal) | Callout: `validation.set_issuer(&[&self.issuer_url])` |
| `aud` | `["<PROJECT_ID>"]` | Callout: `validation.set_audience(&[&self.audience])`; the array form is Zitadel's default |
| `exp` | ~now + 12h | Zitadel default access token TTL |
| `client_id` | the machine user's username (`fleet-ops`, `device-vm-device-00`, …) | Callout uses this as `device_id_claim` (with optional `DEVICE_ID_PREFIX_STRIP` applied) |
| `urn:zitadel:iam:org:project:<PROJECT_ID>:roles` | object with role names as keys (e.g. `{"fleet-admin": {"<orgId>": "<orgName>"}}`) | Callout uses this as `roles_claim` and admits the role if `fleet-admin` or `device` is present |
If any of these is wrong, fix the script before bothering with NATS.
## Drive NATS with the token
`nats --token=<bearer>` puts the value into the CONNECT frame's
`auth_token`, which is what the callout expects.
```bash
NATS_SERVER=192.168.122.1:30422 # libvirt host's port mapping
TOKEN=$(python3 mint.py | tail -1) # last line is the raw token
# Read everything (admin role allows >):
nats --server "$NATS_SERVER" --token "$TOKEN" kv ls device-info
nats --server "$NATS_SERVER" --token "$TOKEN" kv get device-info info.vm-device-00
# Write a desired state — agent's KV watcher fires within 1s,
# reconciler creates the podman container.
nats --server "$NATS_SERVER" --token "$TOKEN" \
kv put desired-state vm-device-00.hello-web '{
"name": "hello-web",
"type": "PodmanV0",
"data": {
"services": [{
"name": "testnginx",
"image": "docker.io/nginx:latest",
"ports": ["8080:80"]
}]
}
}'
```
The exact JSON shape comes from
`harmony-reconciler-contracts/src/fleet.rs` — read that crate when
in doubt about field names, NOT this doc; this doc is a worked
example and may drift.
## Common failures and what they mean
| Symptom | Likely cause |
| --- | --- |
| `TypeError: key must be an instance of … AbstractJWKBase` | Wrong PyPI package. `pip uninstall jwt && pip install PyJWT`. |
| HTTP 400 from `/oauth/v2/token`: `"invalid_grant_type"` | Forgot the percent-encoded form encoding, OR `grant_type` value mistyped. The full URN is `urn:ietf:params:oauth:grant-type:jwt-bearer`. |
| HTTP 400: `"jwt: token is expired"` | Your assertion's `exp` is in the past. Wall-clock skew between your laptop and the cluster — sync NTP. |
| Token mints but no `urn:zitadel:…:roles` claim | Missing the **plural** `urn:zitadel:iam:org:projects:roles` in scope. |
| Token mints but `aud` is the issuer URL instead of the project id | Forgot the `urn:zitadel:iam:org:project:id:<id>:aud` scope. |
| NATS CLI: `nats: Authorization Violation` | Token is good but callout rejected it — check `kubectl logs -n fleet-system -l app=fleet-callout` for the actual reason. The most common ones are "InvalidAudience" (your `aud` ≠ deployed `OIDC_AUDIENCE`) and "no authorized role in token". |
| Callout log: `JWT validation failed: InvalidIssuer` | Trailing slash drift. `OIDC_ISSUER_URL=http://sso.fleet.local:8080/``http://sso.fleet.local:8080`. Match exactly. |
When the callout rejects, **its log is the source of truth**, not
your decoded claims. The validation error includes which check
failed; work backwards from there.
## Rotating the deployed `OIDC_AUDIENCE`
If Zitadel was re-seeded and `OIDC_AUDIENCE` on the callout now
points at a non-existent project:
```bash
# 1. Confirm the live project id
oc -n zitadel exec -ti deploy/zitadel -- /bin/sh -c \
'curl -s -H "Authorization: Bearer $PAT" \
$ZITADEL_URL/management/v1/projects/_search \
| jq ".result[] | select(.name == \"fleet\") | .id"'
# 2. Re-run the bring-up — the live-query fix in f4d6fb94 will
# refresh OIDC_AUDIENCE on the next NatsAuthCalloutScore apply.
```
The shape of `mint.py` doesn't change between regular operation
and post-recovery — you just plug in fresh values for
`OIDC_AUDIENCE` and `PROJECT_ID`.

View File

@@ -0,0 +1,460 @@
# Fleet staging install on OKD
End-to-end runbook for deploying the fleet stack (Zitadel + NATS +
auth callout + operator) on an OKD cluster, with a remote agent
connecting through the public WSS endpoint. Targets the staging
shape — single-instance NATS, public Zitadel + NATS WS Routes with
edge-TLS via cert-manager, env-only Secret config (no volume
mounts) so default `restricted-v2` SCC is enough.
Time budget: ~30 min on a warm cluster, ~60 min cold.
## 0. Prereqs
- `oc` CLI logged in with cluster-admin (or at least
cluster-scoped privileges on the namespaces below — namespace
create, CRD apply, ClusterRole create).
- `podman` on your laptop, authenticated to the destination registry
(default `hub.nationtech.io/harmony``podman login` if needed).
- `helm` on PATH (used by Harmony's helm chart Scores).
- The staging cluster has:
- cert-manager installed and a `ClusterIssuer` ready for the
cluster's base domain (default name: `letsencrypt-prod`
override with `--cluster-issuer` if yours differs).
- CNPG (cloudnative-pg) operator installed (Zitadel relies on it
for its Postgres cluster).
- DNS: the chosen `--base-domain` resolves to the OKD ingress
router. For `cb1.nationtech.io`, that means `*.cb1.nationtech.io`
or at least `sso-staging.cb1.nationtech.io` and
`nats-fleet-staging.cb1.nationtech.io` must point at the OKD
router VIP. If you're using the cluster's apps domain
(`apps.cb1.nationtech.io`), set `--base-domain` accordingly.
- Access to write a `[credentials]` TOML on whichever machine will
run the agent (your laptop is fine for the demo).
## 1. Build and push images
The staging install pulls operator + auth-callout images from your
registry. The helper script builds both, tags them, and pushes:
```bash
cd /path/to/harmony
./fleet/scripts/build_and_push_images.sh
```
Defaults: `REGISTRY=hub.nationtech.io/harmony`, `IMAGE_TAG=dev`,
`PUSH=1`. Override with environment variables. Skip the push (e.g.
to inspect the images locally first) with `PUSH=0`.
Output ends with the exact `--operator-image` / `--callout-image`
flags to paste into step 4.
**Verify:**
```bash
podman images | grep harmony # both refs present locally
podman pull hub.nationtech.io/harmony/harmony-fleet-operator:dev # registry confirmed
```
## 2. Create namespaces
```bash
oc new-project zitadel-staging
oc new-project fleet-staging
```
If `hub.nationtech.io` requires authentication, add the imagePullSecret
to both namespaces (each pod that pulls from the registry needs it):
```bash
# adjust to whatever you have for hub.nationtech.io
oc -n fleet-staging secrets link default <hub-pull-secret> --for=pull
oc -n zitadel-staging secrets link default <hub-pull-secret> --for=pull
```
(For Zitadel + Postgres the chart pulls from public registries, so
the secret is only strictly required in `fleet-staging` for the
operator + callout images. Linking both is safest.)
## 3. Set KUBECONFIG and verify cluster context
```bash
export KUBECONFIG=$ADMIN_KUBECONFIG
oc whoami
oc config current-context
oc get clusterversion # confirm OKD reachable + healthy
```
The install runs with this `KUBECONFIG`. **Double-check** before
running step 4 — Harmony's `K8sAnywhereTopology::from_env()` honors
this and there's no second confirmation prompt.
## 4. Run `fleet_staging_install`
```bash
cargo run --release -p example_fleet_staging_install -- \
--base-domain cb1.nationtech.io \
--operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev \
--callout-image hub.nationtech.io/harmony/harmony-nats-callout:dev
```
Optional flags (defaults shown):
```
--cluster-issuer letsencrypt-prod
--fleet-namespace fleet-staging
--zitadel-namespace zitadel-staging
--nats-account FLEET
--zitadel-version v4.12.1
--project-name fleet
--admin-role fleet-admin
--device-role device
--operator-username fleet-operator
--admin-username fleet-ops
```
Step-by-step the binary does:
1. **Zitadel helm install** — Postgres (CNPG) + Zitadel chart into
`--zitadel-namespace`. Edge-TLS Route at `sso-staging.<base>`
with cert-manager-driven certificate.
2. **Zitadel setup** — project, two roles (`fleet-admin`, `device`),
API app `nats`, and two machine users (`fleet-ops` for manual
admin work, `fleet-operator` for the operator pod). Both get
JSON keys cached at `~/.local/share/harmony/zitadel/client-config.json`.
3. **NATS install** — single-instance JetStream, `auth_callout`
block referencing the issuer NKey pubkey, WebSocket listener on
8080. Edge-TLS Route at `nats-fleet-staging.<base>`.
4. **Auth callout deployment** — env-only Secret config (no mounts),
wired to the same issuer + Zitadel project audience.
5. **Operator deployment** — single Secret holding the credentials
TOML (with the operator's JSON keyfile inlined). One env var,
`FLEET_OPERATOR_CREDENTIALS_TOML`, no volumes.
The binary prints the URLs + project_id at the end. Save that block
— you'll need the project_id for the agent config.
**Expected output tail:**
```
=== fleet-staging install complete ===
Zitadel: https://sso-staging.cb1.nationtech.io/
NATS WS public: wss://nats-fleet-staging.cb1.nationtech.io/
NATS in-cluster: nats://fleet-nats.fleet-staging.svc.cluster.local:4222
Operator: oc -n fleet-staging get deploy/harmony-fleet-operator
Auth callout: oc -n fleet-staging get deploy/fleet-callout
Project id: 371xxxxxxxxxxxxxxx
Admin user: fleet-ops (machine key in ~/.local/share/harmony/zitadel/client-config.json)
Operator user: fleet-operator (machine key embedded in operator's Secret)
```
## 5. Verify each layer
### 5.1 Zitadel reachable, certificate provisioned
```bash
# pod up
oc -n zitadel-staging get pods
# expect: zitadel-* Running, zitadel-pg-1/2 Running
# Route + certificate (cert-manager creates the secret)
oc -n zitadel-staging get route
oc -n zitadel-staging get certificate
# OIDC discovery from the public URL
curl -s https://sso-staging.cb1.nationtech.io/.well-known/openid-configuration | jq .issuer
# expect: "https://sso-staging.cb1.nationtech.io"
```
If `curl` fails with TLS errors, the cert-manager certificate isn't
ready yet. Watch its status:
```bash
oc -n zitadel-staging describe certificate
oc -n cert-manager logs deploy/cert-manager --tail=50
```
A `Ready` condition `True` + `secretName: zitadel-tls` populated
means the Route can serve HTTPS.
### 5.2 NATS pod up, callout connected
```bash
oc -n fleet-staging get pods
# expect:
# fleet-nats-0 2/2 Running (NATS + reloader sidecar)
# fleet-callout-... 1/1 Running
oc -n fleet-staging logs deploy/fleet-callout --tail=30 | grep -E "starting|JWKS|listening"
# expect:
# starting harmony NATS auth callout
# JWKS refreshed count=2
# auth callout service listening subject="$SYS.REQ.USER.AUTH"
```
If the callout pod CrashLoopBackOff:
```bash
oc -n fleet-staging logs deploy/fleet-callout --previous --tail=30
```
Most common: OIDC issuer URL mismatch. The callout's
`OIDC_ISSUER_URL` env must byte-equal what Zitadel emits as `iss` in
its discovery doc. Check both:
```bash
oc -n fleet-staging exec deploy/fleet-callout -- printenv OIDC_ISSUER_URL
# vs.
curl -s https://sso-staging.cb1.nationtech.io/.well-known/openid-configuration | jq .issuer
```
### 5.3 Operator authenticated and running
```bash
oc -n fleet-staging get pods -l app.kubernetes.io/name=harmony-fleet-operator
oc -n fleet-staging logs deploy/harmony-fleet-operator --tail=30
```
Look for, in order:
```
minted fresh Zitadel access token audience=<project_id>
connected successfully server=4222
NATS connected
KV bucket ready bucket=desired-state
starting Deployment controller
device-reconciler: watching device-info KV
aggregator: startup complete
```
If you see `Permissions Violation` errors, the callout's
`OIDC_AUDIENCE` (project_id at deploy time) doesn't match the
project_id in Zitadel today. Re-run step 4 — the live-query fix
in the Zitadel setup will refresh.
### 5.4 NATS WSS reachable from outside the cluster
```bash
curl -sSI https://nats-fleet-staging.cb1.nationtech.io/ | head -5
```
Expect a 4xx (NATS doesn't speak HTTP, but the TLS handshake should
succeed and you'll get back a WebSocket-upgrade-related response).
A connection refused or TLS handshake error means the Route or
cert-manager is unhappy.
### 5.5 CRDs registered
```bash
oc get crd | grep fleet.nationtech.io
# expect:
# deployments.fleet.nationtech.io
# devices.fleet.nationtech.io
```
## 6. Connect a remote agent
The fleet agent runs on the device (laptop, Pi, anywhere with
outbound HTTPS). It needs:
- Its own Zitadel machine user with the `device` role grant.
- The JSON keyfile from that user.
- A `[credentials]` TOML pointing at the public Zitadel + the WSS
NATS URL.
### 6.1 Mint a per-device machine user
Use `oc port-forward` or a helper to call Zitadel's API. Easier
path: drop a quick Score that adds one machine user. For tonight,
do it from the Zitadel UI:
1. Browse to `https://sso-staging.cb1.nationtech.io/ui/console/`,
log in as the human admin (password from Zitadel ConfigMap on
first install — see `docs/guides/fleet-zitadel-faq.md`).
2. Pick the `Default` org → `fleet` project → Roles → confirm
`device` exists.
3. Org → Users → Service Users → New: name `device-laptop-01`,
userName `device-laptop-01`. Save.
4. The user's "Personal Information" tab → Authorizations or
"Authorization" → "+New" — grant the `fleet` project's `device`
role to this user.
5. The user's "Keys" tab → "+New", type `JSON`, expiration future
date. **Download the keyfile JSON** — Zitadel only shows the
private half once. Save as `~/.local/share/harmony/fleet/agents/device-laptop-01.json`.
### 6.2 Build the agent locally
```bash
cargo build --release -p harmony-fleet-agent
ls -la target/release/harmony-fleet-agent
```
### 6.3 Render the agent's config TOML
```bash
PROJECT_ID=$(oc -n fleet-staging exec deploy/fleet-callout -- printenv OIDC_AUDIENCE)
cat > /tmp/fleet-agent-config.toml <<EOF
[agent]
device_id = "device-laptop-01"
[nats]
urls = ["wss://nats-fleet-staging.cb1.nationtech.io"]
[credentials]
type = "zitadel-jwt"
key_path = "/etc/fleet-agent/zitadel-key.json"
oidc_issuer_url = "https://sso-staging.cb1.nationtech.io"
audience = "$PROJECT_ID"
[labels]
env = "staging"
location = "laptop"
arch = "$(uname -m)"
EOF
```
The agent's username convention is `device-<device_id>`, matching
the callout's `DEVICE_ID_PREFIX_STRIP=device-`. The Zitadel machine
user must literally be `device-laptop-01` for the JWT-bearer flow
to extract the right device id.
### 6.4 Run the agent
```bash
sudo mkdir -p /etc/fleet-agent
sudo cp ~/.local/share/harmony/fleet/agents/device-laptop-01.json \
/etc/fleet-agent/zitadel-key.json
sudo chown $(id -u):$(id -g) /etc/fleet-agent/zitadel-key.json
sudo chmod 0400 /etc/fleet-agent/zitadel-key.json
FLEET_AGENT_CONFIG=/tmp/fleet-agent-config.toml \
RUST_LOG=info \
./target/release/harmony-fleet-agent
```
Watch the log:
```
fleet-agent-v0 starting device_id=device-laptop-01
podman socket ready
inventory loaded hostname=...
connecting to NATS ["wss://nats-fleet-staging.cb1.nationtech.io"]
minted fresh Zitadel access token audience=<project_id>
connected successfully server=...
NATS connected
fleet publisher ready
watching KV keys filter=device-laptop-01.>
```
If you hit `Permissions Violation` errors after `connected`:
- check `oc -n fleet-staging logs deploy/fleet-callout --tail=20`
it'll show why the JWT was rejected (audience, role claim,
device_id format).
### 6.5 Verify the operator created a Device CR
```bash
oc get devices
# expect:
# NAME AGE
# device-laptop-01 Xs
oc describe device device-laptop-01
# labels block reflects what the agent sent in [labels]
```
## 7. Drive a deployment end to end
```bash
cat > /tmp/hello-web.yaml <<'EOF'
apiVersion: fleet.nationtech.io/v1alpha1
kind: Deployment
metadata:
name: hello-web
spec:
score:
type: PodmanV0
data:
services:
- name: testnginx
image: docker.io/nginx:latest
ports:
- "8080:80"
targetSelector:
matchLabels:
env: staging
rollout:
strategy: Immediate
EOF
oc apply -f /tmp/hello-web.yaml
# Status reflect-back from the agent (takes ~5-15s)
oc get deployment.fleet.nationtech.io hello-web -o yaml | yq '.status'
# expect:
# aggregate:
# matchedDeviceCount: 1
# succeeded: 1
# failed: 0
# pending: 0
# On the device:
podman ps
# expect: testnginx running, port 8080→80
curl -sS http://localhost:8080 | head -3
```
## 8. Common failure modes
| Symptom | Cause / fix |
| --- | --- |
| `cert-manager` Certificate stuck `False` for 5+ min | DNS for the host doesn't resolve to the OKD router yet. `dig sso-staging.<base> +short` should match the cluster's ingress IP. Or your `letsencrypt-prod` ClusterIssuer is using HTTP01 and the route isn't reachable from Let's Encrypt. |
| Operator pod `Error: constructing CredentialSource` | The credentials TOML in the Secret is malformed. `oc -n fleet-staging get secret harmony-fleet-operator-secrets -o jsonpath='{.data.credentials\.toml}' \| base64 -d` and inspect; the `key_json` field must be a valid JSON keyfile string (multi-line triple-quoted in TOML is fine). |
| Operator pod `Permissions Violation` after `NATS connected` | Issuer pubkey or project_id mismatch between callout and NATS chart values, or Zitadel was reset and the operator's machine key no longer authenticates. Re-run `cargo run -p example_fleet_staging_install`. |
| Agent: `Zitadel token endpoint returned 400: invalid_grant_type` | TOML scope assembly bug or wrong `audience`. Confirm `audience` matches `oc exec deploy/fleet-callout -- printenv OIDC_AUDIENCE`. |
| Agent: connects, then `Permissions Violation for Publish to "$KV.device-info..."` | The device's machine user has no `device` role grant. Add via Zitadel UI → user → Authorizations. |
| `Deployment.fleet.nationtech.io` CR applied but `matchedDeviceCount: 0` | `targetSelector.matchLabels` doesn't match any Device CR's `metadata.labels`. `oc get devices --show-labels`. |
| Container redeploys every 30s on the device | Known FIXME — the agent's `matches_spec` returns false for any spec with env or volumes. For the demo, use trivial specs (the hello-web above is fine). Tracked in `harmony/src/modules/podman/topology.rs`. |
## 9. Tear down
The Helm releases own the bulk of the resources, so the cleanest
recovery from a broken state is:
```bash
helm -n zitadel-staging uninstall zitadel
helm -n fleet-staging uninstall fleet-nats
oc -n fleet-staging delete deploy/harmony-fleet-operator deploy/fleet-callout
oc -n fleet-staging delete secret harmony-fleet-operator-secrets fleet-callout-secrets
oc -n zitadel-staging delete pgcluster zitadel-pg --ignore-not-found
oc delete project zitadel-staging fleet-staging
# CRDs persist (helm.sh/resource-policy: keep). Delete by hand if you
# really want a clean slate:
oc delete crd deployments.fleet.nationtech.io devices.fleet.nationtech.io
```
The host-side `~/.local/share/harmony/zitadel/client-config.json`
caches machine keys + project IDs from this install. Wipe it before
re-installing against a freshly reset Zitadel:
```bash
rm -f ~/.local/share/harmony/zitadel/client-config.json
```
(The cache-vs-live drift bug is fixed — `ZitadelSetupScore` now
re-queries Zitadel for IDs on every apply — but stale machine-key
material from a deleted Zitadel project will fail at JWT-bearer
mint until you delete + re-create.)
## 10. Cross-reference
- [`fleet-zitadel-faq.md`](./fleet-zitadel-faq.md) — concepts behind
Zitadel projects, roles, machine users, audit-trail decisions.
- [`fleet-manual-token-mint.md`](./fleet-manual-token-mint.md) —
worked recipe for minting an admin token by hand and using it
with `nats kv` commands.
- `examples/fleet_staging_install/src/main.rs` — the install code
itself; the comments narrate every step.
- `harmony/src/modules/fleet/server.rs::FleetServerScore`
composable form of the same install for callers that don't need
the intermediate read of `ZitadelClientConfig`.

View File

@@ -0,0 +1,185 @@
# Fleet × Zitadel FAQ
Technical reference for the Zitadel setup behind the fleet
auth callout. Describes what exists, why it's that way, and where
each piece lives in the code.
Code anchors:
- `examples/fleet_e2e_demo/src/lib.rs` — bring-up flow
- `harmony/src/modules/zitadel/setup.rs``ZitadelSetupScore`
- `harmony/src/modules/zitadel/mod.rs` — Helm install
- `nats/callout/src/handler.rs` — auth callout
- `fleet/harmony-fleet-agent/src/credentials.rs` — JWT-bearer mint
---
## What is an "application" in Zitadel?
An OIDC client config: `clientId`, allowed grant types, redirect
URIs (browser apps only), PKCE settings (browser apps only).
Apps are not containers for users or roles — those live one
level up at the org. An app is the entry point a service uses to
delegate auth to Zitadel.
The `nats` app is **API type**: JWT-bearer / client-credentials
only, no browser flow. Headless agents never see a login page.
The app's `clientId` is what tokens carry as `aud` and what the
auth callout validates against (`OIDC_AUDIENCE` env on the callout
Deployment).
## Why are users and roles at org level instead of per-project?
Roles are defined inside a project but are essentially labels —
strings + display names with no inherent permissions. Each app
enforces them in code (the callout maps `device` → a
permission template).
Users live at org level so one identity can hold roles across
multiple projects in the same org and SSO between them. Role
grants are the join: "user X has roles \[A, B\] on project Y."
The only privilege ladder Zitadel enforces directly is at the
instance/org level (IAM-Owner, Org-Owner). Project roles say
nothing about Zitadel admin rights.
## What is each service account for?
| User | Created by | Purpose |
| --- | --- | --- |
| `iam-admin` | Helm `FirstInstance.Org.Machine` | IAM-Owner. Its PAT (`iam-admin-pat` k8s Secret) drives the management API from `ZitadelSetupScore`. |
| `login-client` | Helm `FirstInstance.Org.LoginClient` | Internal — Zitadel's login UI pod uses it to call back into Zitadel. Don't touch. |
| `fleet-ops` | `fleet_e2e_demo` admin setup | `fleet-admin` role grant, JSON key, used by tests and admin tooling. |
| `device-vm-device-NN` | `fleet_e2e_demo::provision_device` | One per VM. JSON key copied to `/etc/fleet-agent/zitadel-key.json`. `device` role grant. |
| `ops-station`, `sensor-a`, `sensor-b`, `intruder` | `fleet_auth_callout` (separate example) | Leftovers from previous runs. Postgres survives cluster recreates. Harmless, deletable. |
The `device-` prefix on per-device usernames is intentional:
Zitadel emits the username verbatim in the access token's
`client_id` claim. The callout strips `device-` to recover the
bare device id used for NATS subject interpolation
(`DEVICE_ID_PREFIX_STRIP=device-` env var on the callout;
`nats/callout/src/zitadel.rs::extract_device_id`).
## How does the agent authenticate? Are JWTs / refresh tokens cached?
On disk the agent keeps **only the JSON machine key** (RSA
private key) at `/etc/fleet-agent/zitadel-key.json`.
It does NOT store:
- access tokens (in memory only)
- refresh tokens (the JWT-bearer flow has none — RFC 7523 is
stateless by design)
On every NATS (re)connect, `credentials.rs::zitadel_mint`:
1. Builds a JWT assertion with `exp = now + 60s`, signs it with
the RSA key
2. POSTs it to `<zitadel>/oauth/v2/token` with grant type
`urn:ietf:params:oauth:grant-type:jwt-bearer`
3. Receives an access token (~12h validity), caches it in memory
4. Re-mints when within 5min of expiry
(`TOKEN_REFRESH_LEEWAY_SECS`)
## What happens to an offline agent?
| Time offline | Behavior |
| --- | --- |
| 0 ~12 h | Cached access token still valid. Reconnects work transparently. |
| > ~12 h | Token expired. Agent enters reconnect loop until network returns, then mints fresh on first successful reach. |
The RSA key never expires until rotated server-side.
## Where are the lifetimes set?
- **Access token TTL** — Zitadel UI: Org → Settings → OIDC
Settings → "Access Token Lifetime" (default 12 h).
- **Assertion TTL** — hardcoded 60 s in
`credentials.rs::ASSERTION_LIFETIME_SECS`. Zitadel rejects
assertions where `exp - iat > 60 s`; this is server-enforced,
not a knob.
- **Machine key TTL** — set when the key is created in
`harmony/src/modules/zitadel/setup.rs::create_machine_key`.
## Why is a JSON machine key more secure than a PAT?
Both are "if stolen, full impersonation" — the same blast radius.
The difference is in leak surface:
- **PAT**: a 60-char bearer string sent on every authenticated
request. Every log line, every env dump, every misrouted
request is a leak opportunity.
- **JSON key**: an RSA private key. Only ever signs short-lived
(60 s) assertions sent to one endpoint
(`<zitadel>/oauth/v2/token`). The bearer token NATS sees is
the access token — short-lived (12 h max), scoped, distinct
from the long-term secret. A full network capture of the
agent ↔ NATS traffic yields only access tokens that expire
within 12 h.
Plus: Zitadel allows multiple keys per machine user, so rotation
is zero-downtime (mint new → push to device → delete old). PATs
rotate one-at-a-time and are disruptive.
What this does not defend against: a fully compromised device
where the attacker reads the keyfile. That requires hardware
(TPM / secure element) and is out of scope.
## The machine keys expire in year 9999. Isn't that effectively forever?
Yes. Currently set in `ZitadelSetupScore::create_machine_key` as
a known-bad default chosen for demo convenience (re-running tests
shouldn't produce expired keys mid-run). Tracked as a known issue.
## Why is the IAM-Owner PAT stored as a plain k8s Secret?
K8s Secrets are base64-encoded, **not** encrypted at rest unless
etcd encryption-at-rest is explicitly enabled with a KMS provider.
Anyone with `get secrets` in the `zitadel` namespace effectively
has Zitadel admin.
The PAT exists because `ZitadelSetupScore` calls Zitadel's
management API (create project, role, machine user, mint key),
which requires IAM-Owner privileges. A PAT is the simplest
credential that survives across applies.
This is a known production-hardening gap. Harmony has the
`harmony_secret` crate (ADR-020) with OpenBao and local-encrypted-file
backends; the Score is currently wired against a k8s Secret only.
## What lifetime is set for the human admin password — why does the ConfigMap show one that doesn't work?
`ZitadelScore` regenerates a random admin password on every apply
and writes it to the rendered ConfigMap. Helm's `FirstInstance`
block only seeds Postgres on the **first** install against an
empty DB, so re-applies render a new ConfigMap password but leave
the original Postgres hash untouched. The displayed password is
stale on every apply after the first.
To recover access: use the `iam-admin-pat` to call Zitadel's
management API and reset the human admin's password directly.
Tracked as a known bug.
## Quick reference — tokens on the wire
| Token | Lives where | Lifetime | Signed by | Purpose |
| --- | --- | --- | --- | --- |
| **Assertion** | Agent memory, in-flight | 60 s | Agent (RSA key) | "I'm machine user X — give me an access token" |
| **Access token** | Agent memory + on-the-wire to NATS | ~12 h | Zitadel | "Zitadel says I'm device X with role `device`" |
| **NATS user JWT** | NATS server connection state | callout-defined (~30 s) | Auth callout (NKey) | "I have these permissions on these subjects" |
The agent only holds the RSA key on disk and the access token
in memory. The NATS user JWT is server-internal — agents don't
see it.
## Code map
| Topic | File |
| --- | --- |
| Helm install, masterkey, admin password | `harmony/src/modules/zitadel/mod.rs` |
| Project/role/machine user provisioning | `harmony/src/modules/zitadel/setup.rs` |
| Per-device machine user + key handoff | `examples/fleet_e2e_demo/src/lib.rs::provision_device` |
| JWT-bearer mint | `fleet/harmony-fleet-agent/src/credentials.rs::zitadel_mint` |
| Auth callout decision tree | `nats/callout/src/handler.rs::decide` |
| Per-device permission template | `nats/callout/src/permissions.rs::device_default` |
| End-to-end rehearsal runbook | `examples/fleet_e2e_demo/RUNBOOK.md` |
| Manual JWT-bearer mint + NATS write recipe | [`fleet-manual-token-mint.md`](./fleet-manual-token-mint.md) |

View File

@@ -146,6 +146,50 @@ For wildcard certificates (e.g. `*.example.com`), HTTP01 cannot be used — conf
---
## Multiple Ingresses on the Same Host
When a single host is fronted by more than one Ingress (e.g. a Helm chart that ships separate Ingresses for an API and a UI under the same hostname), **all of them must reference the same TLS Secret, and only one of them should trigger cert-manager**.
```yaml
# Ingress 1 — owns the certificate request
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: app-api
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
spec:
rules:
- host: app.example.com
http: { paths: [{ path: /, pathType: Prefix, backend: { service: { name: app-api, port: { number: 8080 } } } }] }
tls:
- hosts: [app.example.com]
secretName: app-example-com-tls # cert-manager will populate this
---
# Ingress 2 — references the same Secret, no cert-manager annotation
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: app-ui
spec:
rules:
- host: app.example.com
http: { paths: [{ path: /ui, pathType: Prefix, backend: { service: { name: app-ui, port: { number: 3000 } } } }] }
tls:
- hosts: [app.example.com]
secretName: app-example-com-tls # reuses the cert above
```
Why this matters — and the failure mode if you don't:
- Two cert-manager-annotated Ingresses on the same host create **two `Certificate` resources** and **two ACME `Order`s** for the same domain.
- Both Orders launch HTTP01 challenges concurrently; the ingress controller sees two competing challenge Ingresses for `/.well-known/acme-challenge/...` with different tokens — one wins, the other fails.
- The loser's Certificate stays `Pending`, its Secret is never created.
- On OKD specifically, the ingress-to-route controller **inlines the TLS cert/key into the generated Route** at creation time. With no Secret it cannot inline anything, and the Route for the second Ingress is silently never admitted — the path becomes unreachable, while the first Ingress's path works fine.
The diagnostic signature: `kubectl get ingress` shows both Ingresses, `kubectl get route` shows only one, the second Ingress's `status.loadBalancer` is `{}`, and the second Certificate is stuck in `Pending`.
## OKD / OpenShift Notes
On OKD, standard Ingress resources are automatically translated into OpenShift `Route` objects. The default TLS termination mode is `edge`, which is correct for most HTTP applications. To control this explicitly, add:

View File

@@ -0,0 +1,217 @@
# Web Authentication and CSRF Security Guidelines
These guidelines define the baseline for Harmony web frontends and future operator dashboards that use browser-based authentication, cookie sessions, Axum, HTMX, or OIDC providers such as Zitadel.
## Goals
- Prevent unauthenticated access.
- Prevent authenticated users from performing actions they are not authorized to perform.
- Prevent CSRF on state-changing endpoints.
- Reduce XSS impact with CSP and safe rendering practices.
- Keep authentication code understandable and reusable across projects.
## Required Baseline
Every browser-facing authenticated application must implement the following controls before production use:
1. **OIDC Authorization Code + PKCE** for login.
2. **OIDC nonce validation** on login callback.
3. **Explicit authorization checks** using roles, groups, claims, or permissions.
4. **CSRF protection** on all mutating routes.
5. **Secure cookie settings**: `HttpOnly`, `Secure` in production, constrained `SameSite`, and appropriate path/domain scoping.
6. **Strict security headers**, especially Content Security Policy.
7. **No permissive credentialed CORS** for operator dashboards.
8. **Generic client-facing errors** with detailed errors logged server-side only.
## OIDC Login Requirements
Use Authorization Code flow with PKCE. On login start, generate and persist a short-lived login attempt containing:
- `state`
- `pkce_code_verifier`
- `nonce`
- creation timestamp or cookie expiration
Send `state`, PKCE challenge, and `nonce` to the authorization endpoint.
On callback:
1. Require a valid login-attempt cookie.
2. Validate returned `state` against the stored state.
3. Exchange the authorization code using the stored PKCE verifier.
4. Validate the returned ID token as an OIDC ID token, including:
- signature
- issuer
- audience/client ID
- expiration/not-before
- nonce
- authorized party (`azp`) when applicable
5. Create the application session only after all checks pass.
6. Delete the login-attempt cookie.
`state` and `nonce` are not interchangeable:
- `state` binds the callback redirect to the browser login attempt.
- `nonce` binds the returned ID token to the browser login attempt.
- PKCE binds the code exchange to the client that started the flow.
## Session Requirements
For small internal dashboards, a verified short-lived ID token in an `HttpOnly` cookie may be acceptable. For higher-risk systems, prefer server-side sessions:
- Store a random session ID in the browser cookie.
- Store tokens and session metadata server-side.
- Support revocation, rotation, idle timeout, and absolute timeout.
Session cookies must use:
- `HttpOnly`
- `Secure` outside local development
- `SameSite=Lax` or `SameSite=Strict`
- `Path=/` unless a narrower path is possible
- No broad `Domain` attribute unless explicitly required
Production services should fail closed if HTTPS/secure-cookie configuration is inconsistent.
## Authorization Requirements
Authentication is not authorization. A valid identity provider token only proves who the user is.
Every protected application must define required permissions for each state-changing or sensitive route. Examples:
- `fleet:viewer` for read-only dashboard access
- `fleet:operator` for alert acknowledgement and operational actions
- `fleet:admin` for settings, user management, or destructive actions
Authorization must be enforced server-side. UI hiding is not sufficient.
## CSRF Protection Standard
For Axum + HTMX dashboards, the recommended baseline is:
1. Require a custom header on all mutating requests.
2. Validate `Origin` or `Referer` against the configured application origin.
3. Keep cookies `SameSite=Lax` or stricter.
4. Do not enable permissive credentialed CORS.
Mutating methods are:
- `POST`
- `PUT`
- `PATCH`
- `DELETE`
Recommended behavior:
- Reject mutating requests without `x-csrf-token`.
- Reject mutating requests whose `Origin` is present and does not match the configured base URL origin.
- If `Origin` is absent, require `Referer` to match the configured base URL origin.
- Reject when neither `Origin` nor `Referer` is available, unless the route is explicitly exempted and documented.
The CSRF header value may be static for HTMX dashboards, for example `x-csrf-token: 1`. The protection comes from the fact that cross-origin HTML forms cannot set custom headers, and cross-origin JavaScript cannot send custom headers with credentials unless CORS allows it.
Do not rely on header presence alone if adding Origin/Referer validation is practical.
## HTMX Integration
Add the CSRF header globally from a static JavaScript file:
```js
document.body.addEventListener('htmx:configRequest', (event) => {
event.detail.headers['x-csrf-token'] = '1';
});
```
Serve this as a static asset, for example `/static/app.js`. Avoid inline scripts so that the application can use a strict CSP without `unsafe-inline`.
## Content Security Policy
Every browser-facing dashboard should set a restrictive CSP. A good starting point is:
```http
Content-Security-Policy: default-src 'self'; script-src 'self'; style-src 'self'; img-src 'self' data:; connect-src 'self'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'
```
Meaning:
- Only load scripts, styles, and API/SSE/HTMX connections from the same origin.
- Prevent clickjacking with `frame-ancestors 'none'`.
- Prevent plugin/object execution with `object-src 'none'`.
- Prevent injected `<base>` tags from rewriting relative URLs.
- Prevent forms from submitting to external origins.
If inline scripts or styles are unavoidable, prefer per-response nonces over `unsafe-inline`.
## Other Security Headers
Set these headers on all HTML responses, or globally when safe:
```http
X-Content-Type-Options: nosniff
Referrer-Policy: same-origin
Permissions-Policy: geolocation=(), microphone=(), camera=()
```
When the service is HTTPS-only, also set HSTS:
```http
Strict-Transport-Security: max-age=31536000; includeSubDomains
```
Only enable HSTS when the domain and subdomains are intended to be HTTPS-only.
## CORS Policy
Operator dashboards should normally not enable CORS.
Never combine all of the following unless there is a reviewed, explicit integration need:
- credentialed requests
- arbitrary or reflected origins
- custom request headers such as `x-csrf-token`
A permissive credentialed CORS policy can bypass custom-header CSRF protection.
## Error Handling
Client-facing auth errors should be generic, for example:
```text
Authentication failed. Please start login again.
```
Detailed causes, provider responses, token validation failures, and stack traces should be logged server-side only.
Avoid returning raw OIDC provider error bodies or JWT validation details to the browser.
## Implementation Checklist
Before shipping a Harmony web frontend:
- [ ] Login uses Authorization Code + PKCE.
- [ ] Login attempt stores `state`, PKCE verifier, `nonce`, and expires quickly.
- [ ] Callback validates `state`.
- [ ] Callback validates ID token nonce.
- [ ] JWT validation checks issuer and exact intended audience/client.
- [ ] Authorization roles/permissions are enforced server-side.
- [ ] Mutating routes are protected by CSRF middleware.
- [ ] CSRF middleware requires custom header and same-origin `Origin`/`Referer`.
- [ ] Session cookies are `HttpOnly`, `Secure` in production, and `SameSite=Lax` or stricter.
- [ ] No permissive credentialed CORS is enabled.
- [ ] CSP is configured without `unsafe-inline` where practical.
- [ ] Security headers are configured.
- [ ] Auth errors shown to users are generic.
- [ ] Detailed auth failures are logged server-side.
## Recommended Default for Harmony Dashboards
For current and future Axum + HTMX dashboards, use this default design:
- Zitadel/OIDC Authorization Code + PKCE + nonce.
- Short-lived encrypted login-attempt cookie.
- Server-side authorization middleware based on roles/claims.
- `HttpOnly`, `Secure`, `SameSite=Lax` or `Strict` session cookie.
- CSRF middleware requiring `x-csrf-token` and same-origin `Origin`/`Referer`.
- Static `/static/app.js` that adds the HTMX CSRF header.
- Strict CSP that allows scripts only from `self`.
- No CORS unless explicitly reviewed.

View File

@@ -9,7 +9,7 @@ name = "example_linux_vm"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony" }
harmony = { path = "../../harmony", features = ["kvm"] }
tokio.workspace = true
log.workspace = true
env_logger.workspace = true

View File

@@ -0,0 +1,46 @@
[package]
name = "example-fleet-auth-callout"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
description = "End-to-end fleet IoT security model: Zitadel + NATS + auth callout on k3d"
[lib]
name = "example_fleet_auth_callout"
path = "src/lib.rs"
[[bin]]
name = "fleet-auth-callout"
path = "src/main.rs"
[[test]]
name = "security_model"
path = "tests/security_model.rs"
[dependencies]
harmony = { path = "../../harmony" }
harmony-k8s = { path = "../../harmony-k8s" }
harmony_types = { path = "../../harmony_types" }
k3d-rs = { path = "../../k3d" }
harmony-nats-callout = { path = "../../nats/callout" }
async-nats.workspace = true
nkeys = "0.4"
jsonwebtoken = "9"
reqwest = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tokio-test.workspace = true
serde.workspace = true
serde_json.workspace = true
anyhow.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
log.workspace = true
env_logger.workspace = true
futures-util.workspace = true
k8s-openapi.workspace = true
kube.workspace = true
base64 = "0.22"
tempfile.workspace = true
url.workspace = true
directories = "6.0.0"

View File

@@ -0,0 +1,806 @@
//! End-to-end fleet IoT security model harness.
//!
//! Brings up the full stack on a local k3d cluster:
//! 1. k3d cluster (creates if missing) with HTTP/NATS port mappings.
//! 2. Zitadel + Postgres (via the official Helm chart).
//! 3. Project + roles (`fleet-admin`, `device`) + 4 machine users +
//! JWT keys via ZitadelSetupScore.
//! 4. NATS server with `auth_callout` block referencing the issuer NKey.
//! 5. The harmony-nats-callout binary as a Deployment, sideloaded as a
//! container image into k3d.
//!
//! `main.rs` calls [`bring_up_stack`] then prints credentials and waits.
//! Tests under `tests/` share a single cluster via `OnceCell` and exercise
//! the security model through real `async_nats` clients using JWT-bearer
//! access tokens minted from the machine keys produced in step 3.
//!
//! ## Why this lives in an example, not under `harmony/src/modules/`
//!
//! Everything in this crate is a *composition* of reusable Scores plus
//! test fixtures (the JWT-bearer helper, image-build glue). The Scores
//! themselves are in `harmony/src/modules/{zitadel,nats_auth_callout}`.
use std::path::PathBuf;
use std::time::Duration;
use anyhow::{Context, Result};
use harmony::inventory::Inventory;
use harmony::modules::k8s::coredns::{CoreDNSRewrite, CoreDNSRewriteScore};
use harmony::modules::nats::NatsHelmChartScore;
use harmony::modules::nats_auth_callout::{NatsAuthCalloutScore, render_auth_callout_block};
use harmony::modules::zitadel::{
MachineKeyType, ZitadelApiApp, ZitadelClientConfig, ZitadelMachineUser, ZitadelRole,
ZitadelScore, ZitadelSetupScore,
};
use harmony::score::Score;
use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
use jsonwebtoken::{Algorithm, EncodingKey, Header as JwtHeader, encode as jwt_encode};
use k3d_rs::{K3d, PortMapping};
use log::info;
use nkeys::KeyPair;
use serde::{Deserialize, Serialize};
pub const CLUSTER_NAME: &str = "fleet-auth-callout";
pub const HTTP_PORT: u32 = 8080;
pub const NATS_NODE_PORT: i32 = 30422;
pub const ZITADEL_HOST: &str = "sso.fleet.local";
pub const FLEET_NAMESPACE: &str = "fleet-system";
pub const NATS_NAMESPACE: &str = FLEET_NAMESPACE;
pub const NATS_RELEASE: &str = "fleet-nats";
pub const CALLOUT_DEPLOYMENT_NAME: &str = "fleet-callout";
/// `localhost/` prefix matches what podman tags images as internally —
/// `podman build -t foo:tag` produces `localhost/foo:tag`. After
/// `podman save → k3d image import`, the image lands in the k3d node's
/// containerd under that exact name. Without the prefix, K8s would
/// treat `foo:tag` as a Docker Hub reference and ImagePullBackOff.
pub const CALLOUT_IMAGE_TAG: &str = "localhost/harmony-nats-callout:dev";
pub const PROJECT_NAME: &str = "fleet";
pub const API_APP_NAME: &str = "nats";
pub const ADMIN_ROLE_KEY: &str = "fleet-admin";
pub const DEVICE_ROLE_KEY: &str = "device";
pub const ADMIN_USERNAME: &str = "ops-station";
pub const DEVICE_A_USERNAME: &str = "sensor-a";
pub const DEVICE_B_USERNAME: &str = "sensor-b";
pub const NO_ROLE_USERNAME: &str = "intruder";
/// Service-side NATS account user that the callout itself authenticates
/// with (listed in `auth_callout.auth_users` to bypass the callout).
pub const NATS_AUTH_USER: &str = "auth";
pub const NATS_AUTH_PASS: &str = "auth-callout-pass";
pub const NATS_ACCOUNT: &str = "DEVICES";
pub const NATS_SYSTEM_USER: &str = "sys-admin";
pub const NATS_SYSTEM_PASS: &str = "sys-admin-pass";
#[derive(Debug, Clone)]
pub struct StackHandles {
pub cluster_name: String,
pub nats_url_external: String,
pub zitadel_url: String,
pub project_id: String,
pub admin_machine_key: String,
pub device_a_machine_key: String,
pub device_b_machine_key: String,
pub intruder_machine_key: String,
pub issuer_pubkey: String,
}
/// JSON keyfile content as Zitadel emits it for `KEY_TYPE_JSON` machine keys.
#[derive(Debug, Deserialize, Serialize)]
pub struct MachineKeyFile {
#[serde(rename = "type")]
pub r#type: String,
#[serde(rename = "keyId")]
pub key_id: String,
/// PEM-encoded RSA private key.
pub key: String,
#[serde(rename = "userId")]
pub user_id: String,
}
fn data_dir() -> PathBuf {
directories::BaseDirs::new()
.map(|dirs| dirs.data_dir().join("harmony").join("k3d"))
.unwrap_or_else(|| PathBuf::from("/tmp/harmony"))
}
pub fn create_k3d() -> K3d {
let base = data_dir();
std::fs::create_dir_all(&base).expect("create k3d data dir");
K3d::new(base, Some(CLUSTER_NAME.to_string()))
// HTTP_PORT:80 so /etc/hosts entries (or curl --resolve) hit ingress.
// NATS_NODE_PORT lets clients off-cluster talk to the NATS service.
.with_port_mappings(vec![
PortMapping::new(HTTP_PORT, 80),
PortMapping::new(NATS_NODE_PORT as u32, NATS_NODE_PORT as u32),
])
}
pub fn create_topology(k3d: &K3d) -> K8sAnywhereTopology {
let context = k3d
.context_name()
.unwrap_or_else(|| format!("k3d-{CLUSTER_NAME}"));
unsafe {
std::env::set_var("HARMONY_USE_LOCAL_K3D", "false");
std::env::set_var("HARMONY_AUTOINSTALL", "false");
std::env::set_var("HARMONY_K8S_CONTEXT", &context);
}
K8sAnywhereTopology::from_env()
}
/// Build the NATS Helm values that wire `auth_callout` to a callout
/// service running in the same account, plus a NodePort for off-cluster
/// access from tests on the host.
///
/// **Why the explicit `service.merge.spec.ports` list:** the upstream
/// chart's `service.ports.<name>.merge` field is *not* a strategic-merge
/// directive — it gets emitted as-is into the rendered Service (the
/// chart's `_helpers.tpl` does `merge (dict "name" $k) $v` which leaves
/// `merge: …` as a literal field on each port). K8s then rejects the
/// Service with "field not declared in schema". Only the top-level
/// `service.merge` is actually a `mergeOverwrite` patch; we use that
/// path and re-state the full ports list so `nats` gets our nodePort.
pub fn render_nats_values(issuer_pubkey: &str) -> String {
let auth_callout = render_auth_callout_block(issuer_pubkey, NATS_AUTH_USER, NATS_ACCOUNT);
format!(
r#"fullnameOverride: {nats_release}
config:
cluster:
enabled: false
jetstream:
enabled: true
fileStorage:
enabled: true
size: 2Gi
merge:
{auth_callout_indented}
accounts:
{nats_account}:
jetstream: enabled
users:
- user: "{auth_user}"
password: "{auth_pass}"
SYS:
users:
- user: "{sys_user}"
password: "{sys_pass}"
system_account: SYS
service:
merge:
spec:
type: NodePort
ports:
- appProtocol: tcp
name: nats
port: 4222
targetPort: nats
nodePort: {node_port}
- appProtocol: http
name: monitor
port: 8222
targetPort: monitor
"#,
nats_release = NATS_RELEASE,
auth_callout_indented = auth_callout
.lines()
.enumerate()
.map(|(i, l)| if i == 0 {
l.to_string()
} else {
format!(" {l}")
})
.collect::<Vec<_>>()
.join("\n"),
nats_account = NATS_ACCOUNT,
auth_user = NATS_AUTH_USER,
auth_pass = NATS_AUTH_PASS,
sys_user = NATS_SYSTEM_USER,
sys_pass = NATS_SYSTEM_PASS,
node_port = NATS_NODE_PORT,
)
}
/// Bring the entire stack up on a local k3d cluster. Idempotent —
/// re-running picks up existing resources.
///
/// Returns handles + credentials. The machine key fields contain raw
/// JSON keyfile content (`MachineKeyFile`) and can be passed straight
/// to [`mint_access_token`] to authenticate as the corresponding user.
pub async fn bring_up_stack() -> Result<StackHandles> {
let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
.try_init();
let k3d = create_k3d();
info!("[1/8] ensuring k3d cluster '{CLUSTER_NAME}' is up");
k3d.ensure_installed()
.await
.map_err(|e| anyhow::anyhow!("k3d ensure: {e}"))?;
let topology = create_topology(&k3d);
topology.ensure_ready().await.context("topology init")?;
info!("[2/8] deploying Zitadel (this takes several minutes the first time)");
deploy_zitadel(&topology).await?;
info!("[3/8] CoreDNS rewrite so in-cluster lookups for {ZITADEL_HOST} resolve");
CoreDNSRewriteScore {
rewrites: vec![CoreDNSRewrite {
hostname: ZITADEL_HOST.to_string(),
target: "zitadel.zitadel.svc.cluster.local".to_string(),
}],
}
.interpret(&Inventory::autoload(), &topology)
.await
.context("CoreDNS rewrite")?;
info!("[4/8] waiting for Zitadel HTTP to respond");
wait_for_zitadel_ready().await?;
info!("[5/8] provisioning project + roles + machine users in Zitadel");
let setup = ZitadelSetupScore {
host: ZITADEL_HOST.to_string(),
scheme: Default::default(),
port: None,
skip_tls: true,
endpoint: Some(format!("http://127.0.0.1:{HTTP_PORT}")),
admin_org_id: None,
namespace: "zitadel".to_string(),
applications: vec![],
api_apps: vec![ZitadelApiApp {
project_name: PROJECT_NAME.to_string(),
app_name: API_APP_NAME.to_string(),
}],
roles: vec![
ZitadelRole {
project_name: PROJECT_NAME.to_string(),
key: ADMIN_ROLE_KEY.to_string(),
display_name: "Fleet Admin".to_string(),
group: None,
},
ZitadelRole {
project_name: PROJECT_NAME.to_string(),
key: DEVICE_ROLE_KEY.to_string(),
display_name: "Device".to_string(),
group: None,
},
],
machine_users: vec![
ZitadelMachineUser {
username: ADMIN_USERNAME.to_string(),
name: "Ops Station".to_string(),
create_pat: false,
machine_key: Some(MachineKeyType::Json),
project_name: Some(PROJECT_NAME.to_string()),
grant_roles: vec![ADMIN_ROLE_KEY.to_string()],
},
ZitadelMachineUser {
username: DEVICE_A_USERNAME.to_string(),
name: "Sensor A".to_string(),
create_pat: false,
machine_key: Some(MachineKeyType::Json),
project_name: Some(PROJECT_NAME.to_string()),
grant_roles: vec![DEVICE_ROLE_KEY.to_string()],
},
ZitadelMachineUser {
username: DEVICE_B_USERNAME.to_string(),
name: "Sensor B".to_string(),
create_pat: false,
machine_key: Some(MachineKeyType::Json),
project_name: Some(PROJECT_NAME.to_string()),
grant_roles: vec![DEVICE_ROLE_KEY.to_string()],
},
ZitadelMachineUser {
username: NO_ROLE_USERNAME.to_string(),
name: "Intruder".to_string(),
create_pat: false,
machine_key: Some(MachineKeyType::Json),
project_name: None,
grant_roles: vec![],
},
],
};
setup
.interpret(&Inventory::autoload(), &topology)
.await
.context("ZitadelSetupScore failed")?;
let zcfg = ZitadelClientConfig::load()
.context("ZitadelSetupScore did not produce a client config cache")?;
let project_id = zcfg
.project_id_by_name(PROJECT_NAME)
.or(zcfg.project_id.as_ref())
.context("project_id missing from cache")?
.clone();
info!("[6/8] generating callout issuer NKey + deploying NATS with auth_callout");
// Re-use a deterministic seed across runs by stashing it in a
// K8s secret in the fleet namespace. Fall back to a fresh one
// and persist it. Keeping it stable lets us reuse the cached
// user JWTs Zitadel issued.
let issuer_seed = ensure_issuer_seed(&topology).await?;
let issuer_kp = KeyPair::from_seed(&issuer_seed)
.map_err(|e| anyhow::anyhow!("invalid persisted issuer seed: {e}"))?;
let issuer_pubkey = issuer_kp.public_key();
NatsHelmChartScore::new(
NATS_RELEASE.to_string(),
NATS_NAMESPACE.to_string(),
render_nats_values(&issuer_pubkey),
)
.interpret(&Inventory::autoload(), &topology)
.await
.context("NATS deploy")?;
info!("[7/8] building + sideloading callout image into k3d");
build_and_load_callout_image(&k3d).await?;
info!("[8/8] deploying NatsAuthCalloutScore");
let mut callout = NatsAuthCalloutScore::new(
CALLOUT_DEPLOYMENT_NAME,
FLEET_NAMESPACE,
format!("nats://{NATS_RELEASE}.{NATS_NAMESPACE}.svc.cluster.local:4222"),
format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
// Zitadel emits aud = projectId for tokens issued via the
// `urn:zitadel:iam:org:project:id:<projectId>:aud` scope.
project_id.clone(),
NATS_AUTH_USER,
NATS_AUTH_PASS,
issuer_seed.clone(),
)
.image(CALLOUT_IMAGE_TAG)
.target_account(NATS_ACCOUNT)
.admin_role(ADMIN_ROLE_KEY)
.device_role(DEVICE_ROLE_KEY)
.danger_accept_invalid_certs(true);
// Zitadel doesn't emit a custom `device_id` claim by default — that
// would require a Zitadel Action to map metadata into an extension
// claim. For this example we use `preferred_username`, which is
// populated with the machine user's username (`sensor-a`,
// `ops-station`, …). Production deployments that want a separate
// `device_id` claim should configure a Zitadel Action and override
// the device_id_claim path back to `device_id`.
// Zitadel access tokens for machine users:
// * Don't carry `preferred_username` (that's an OIDC ID-token claim);
// * Do carry `client_id` set to the machine user's userName — perfect
// for our device-id-from-username case.
//
// The project's role claim lives at a *project-scoped* path
// `urn:zitadel:iam:org:project:<projectId>:roles` (NOT the unqualified
// `urn:zitadel:iam:org:project:roles`) because we request the
// `urn:zitadel:iam:org:project:id:<projectId>:aud` scope. The latter
// forces Zitadel to scope role claims to the specific project, which
// is what we want for tenant isolation.
callout.device_id_claim = "client_id".to_string();
// Zitadel's `client_id` for a machine user equals its userName, so
// a user created as `device-vm-device-00` (matching the
// `device_username()` convention used by both fleet_e2e_demo and
// fleet_rpi_setup) lands in the JWT verbatim. Strip the `device-`
// prefix so the callout interpolates permissions against the bare
// device id (`vm-device-00`) the agent uses for KV keys.
callout.device_id_prefix_strip = "device-".to_string();
callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles");
callout
.interpret(&Inventory::autoload(), &topology)
.await
.context("callout deploy")?;
info!("waiting for callout pod to be Ready before handing the stack over");
wait_for_callout_ready(&topology).await?;
let admin_machine_key = zcfg
.machine_key(ADMIN_USERNAME)
.context("admin machine key missing from cache")?
.clone();
let device_a_machine_key = zcfg
.machine_key(DEVICE_A_USERNAME)
.context("device A machine key missing from cache")?
.clone();
let device_b_machine_key = zcfg
.machine_key(DEVICE_B_USERNAME)
.context("device B machine key missing from cache")?
.clone();
let intruder_machine_key = zcfg
.machine_key(NO_ROLE_USERNAME)
.context("intruder machine key missing from cache")?
.clone();
Ok(StackHandles {
cluster_name: CLUSTER_NAME.to_string(),
nats_url_external: format!("nats://127.0.0.1:{NATS_NODE_PORT}"),
zitadel_url: format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
project_id,
admin_machine_key,
device_a_machine_key,
device_b_machine_key,
intruder_machine_key,
issuer_pubkey,
})
}
pub async fn deploy_zitadel(topology: &K8sAnywhereTopology) -> Result<()> {
let zitadel = ZitadelScore {
host: ZITADEL_HOST.to_string(),
zitadel_version: "v4.12.1".to_string(),
external_secure: false,
// Match the host-side k3d port mapping so Zitadel's emitted
// issuer is `http://sso.fleet.local:8080`. Without this, JWT-bearer
// audience validation fails with `Errors.Internal` (the assertion
// `aud` doesn't match the chart-default issuer at port 80).
external_port: Some(HTTP_PORT),
..Default::default()
};
zitadel
.interpret(&Inventory::autoload(), topology)
.await
.context("ZitadelScore deploy")?;
Ok(())
}
pub async fn wait_for_callout_ready(topology: &K8sAnywhereTopology) -> Result<()> {
let _ = topology;
// `kubectl rollout status deployment` is the canonical "is the new
// ReplicaSet's pod up?" check — it handles observed-generation
// tracking, terminating-old-replica edge cases, and pod-readiness in
// one call. Reproducing that in the kube client is doable but error-
// prone; shelling out keeps it short and obviously-correct.
let status = tokio::process::Command::new("kubectl")
.args([
"--context",
"k3d-fleet-auth-callout",
"rollout",
"status",
"-n",
FLEET_NAMESPACE,
&format!("deployment/{CALLOUT_DEPLOYMENT_NAME}"),
"--timeout=60s",
])
.status()
.await
.context("invoke kubectl rollout status")?;
if !status.success() {
anyhow::bail!("kubectl rollout status timed out / failed");
}
Ok(())
}
pub async fn wait_for_zitadel_ready() -> Result<()> {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(5))
.build()?;
for attempt in 1..=120 {
match client
.get(format!(
"http://127.0.0.1:{HTTP_PORT}/.well-known/openid-configuration"
))
// Include the port in Host so Zitadel emits a matching issuer URL
// — see `mint_access_token` for the underlying mechanism.
.header("Host", format!("{ZITADEL_HOST}:{HTTP_PORT}"))
.send()
.await
{
Ok(r) if r.status().is_success() => return Ok(()),
Ok(r) if attempt % 15 == 0 => {
info!("Zitadel HTTP {} (attempt {attempt}/120)", r.status())
}
Err(e) if attempt % 15 == 0 => {
info!("Zitadel unreachable: {e} (attempt {attempt}/120)")
}
_ => {}
}
tokio::time::sleep(Duration::from_secs(2)).await;
}
anyhow::bail!("timed out waiting for Zitadel")
}
/// Persist the callout's issuer NKey seed in a K8s secret so re-runs of
/// the example don't invalidate previously issued user JWTs in NATS.
pub async fn ensure_issuer_seed(topology: &K8sAnywhereTopology) -> Result<String> {
use k8s_openapi::ByteString;
use k8s_openapi::api::core::v1::{Namespace, Secret};
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
use std::collections::BTreeMap;
let k8s = topology
.k8s_client()
.await
.map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
// Ensure namespace exists first — secret creation requires it.
if k8s
.get_resource::<Namespace>(FLEET_NAMESPACE, None)
.await?
.is_none()
{
let ns = Namespace {
metadata: ObjectMeta {
name: Some(FLEET_NAMESPACE.to_string()),
..Default::default()
},
..Default::default()
};
k8s.create(&ns, None).await.ok();
}
let secret_name = "callout-issuer-seed";
if let Some(existing) = k8s
.get_resource::<Secret>(secret_name, Some(FLEET_NAMESPACE))
.await?
&& let Some(data) = existing.data
&& let Some(seed_bytes) = data.get("seed")
{
let seed = String::from_utf8(seed_bytes.0.clone())?;
return Ok(seed.trim().to_string());
}
let seed = KeyPair::new_account()
.seed()
.map_err(|e| anyhow::anyhow!("nkey seed: {e}"))?;
let mut data = BTreeMap::new();
data.insert("seed".to_string(), ByteString(seed.as_bytes().to_vec()));
let secret = Secret {
metadata: ObjectMeta {
name: Some(secret_name.to_string()),
namespace: Some(FLEET_NAMESPACE.to_string()),
..Default::default()
},
data: Some(data),
type_: Some("Opaque".to_string()),
..Default::default()
};
k8s.create(&secret, Some(FLEET_NAMESPACE)).await.ok();
Ok(seed)
}
/// Build the callout binary, package the container image, and import it
/// into the running k3d cluster. Mirrors `fleet/scripts/load-test.sh`'s
/// staging-context pattern (the workspace `.dockerignore` excludes
/// `target/`).
pub async fn build_and_load_callout_image(k3d: &K3d) -> Result<()> {
let workspace_root = std::env::var("CARGO_MANIFEST_DIR")
.map(|d| PathBuf::from(d).join("..").join(".."))
.unwrap_or_else(|_| PathBuf::from("."));
let workspace_root = workspace_root.canonicalize().unwrap_or(workspace_root);
info!("cargo build --release -p harmony-nats-callout");
let status = tokio::process::Command::new("cargo")
.args(["build", "--release", "-p", "harmony-nats-callout"])
.current_dir(&workspace_root)
.status()
.await?;
if !status.success() {
anyhow::bail!("cargo build failed");
}
let ctx = tempfile::tempdir()?;
let bin_dst = ctx.path().join("target/release");
std::fs::create_dir_all(&bin_dst)?;
std::fs::copy(
workspace_root.join("target/release/harmony-nats-callout"),
bin_dst.join("harmony-nats-callout"),
)?;
// The shipped `nats/callout/Dockerfile` is multi-stage (used by
// the production build script — see
// `fleet/scripts/build_and_push_images.sh`). The k3d e2e harness
// wants the host-built binary copied in directly, so we write a
// tiny single-stage Dockerfile inline here. Same runtime image
// (archlinux:base for matched glibc — explained in the original
// Dockerfile) and same USER directive.
std::fs::write(
ctx.path().join("Dockerfile"),
r#"FROM docker.io/library/archlinux:base
COPY target/release/harmony-nats-callout /usr/local/bin/harmony-nats-callout
USER 65532:65532
ENTRYPOINT ["/usr/local/bin/harmony-nats-callout"]
"#,
)?;
info!("podman build → {CALLOUT_IMAGE_TAG}");
let status = tokio::process::Command::new("podman")
.args(["build", "-q", "-t", CALLOUT_IMAGE_TAG, "."])
.current_dir(ctx.path())
.status()
.await?;
if !status.success() {
anyhow::bail!("podman build failed");
}
info!("k3d image import {CALLOUT_IMAGE_TAG}");
let cluster = k3d.cluster_name().unwrap_or(CLUSTER_NAME).to_string();
// Deterministic .tar path with a per-process suffix so concurrent
// test crates don't trample each other.
let tar_path =
std::env::temp_dir().join(format!("harmony-callout-image-{}.tar", std::process::id()));
// `podman save` (docker-archive format) refuses to overwrite an
// existing archive — wipe any leftover from a prior failed run.
let _ = std::fs::remove_file(&tar_path);
let status = tokio::process::Command::new("podman")
.args(["save", "-o", tar_path.to_str().unwrap(), CALLOUT_IMAGE_TAG])
.status()
.await?;
if !status.success() {
anyhow::bail!("podman save failed");
}
// The k3d binary lives in `~/.local/share/harmony/k3d/k3d` — it's
// managed by k3d-rs, not on the system PATH (the user's interactive
// shell typically has it as an alias, but child processes don't
// inherit aliases). Run it via k3d-rs's accessor.
let tar_path_str = tar_path.to_str().unwrap().to_string();
let cluster_for_blocking = cluster.clone();
let tar_path_clone = tar_path.clone();
let result = tokio::task::spawn_blocking(move || {
k3d_rs::K3d::new(data_dir(), Some(cluster_for_blocking.clone())).run_k3d_command([
"image",
"import",
tar_path_str.as_str(),
"-c",
cluster_for_blocking.as_str(),
])
})
.await
.context("spawn_blocking k3d image import")?;
let _ = std::fs::remove_file(&tar_path_clone);
let output = result.map_err(|e| anyhow::anyhow!("k3d image import failed: {e}"))?;
if !output.status.success() {
anyhow::bail!(
"k3d image import returned {}: {}",
output.status,
String::from_utf8_lossy(&output.stderr)
);
}
Ok(())
}
/// RFC 7523 JWT-bearer client for Zitadel.
///
/// `issuer_url` should be the externally-visible Zitadel URL
/// (e.g. `http://sso.fleet.local:8080`) — it's used as the JWT
/// assertion's `aud` claim. The actual HTTP transport hits
/// `127.0.0.1:HTTP_PORT` and forwards the hostname via the `Host`
/// header, which is how the k3d ingress routes without requiring a
/// host-side `/etc/hosts` entry.
///
/// `machine_key_json` is the raw keyfile content Zitadel emits
/// (decoded from `keyDetails`). `scopes` are appended to the standard
/// set; pass `[format!("urn:zitadel:iam:org:project:id:{project_id}:aud")]`
/// to make the resulting access token's `aud` include the project ID.
pub async fn mint_access_token(
issuer_url: &str,
machine_key_json: &str,
scopes: &[String],
) -> Result<String> {
let key: MachineKeyFile =
serde_json::from_str(machine_key_json).context("machine key JSON parse")?;
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)?
.as_secs() as i64;
let claims = serde_json::json!({
"iss": key.user_id,
"sub": key.user_id,
"aud": issuer_url,
"exp": now + 60,
"iat": now,
});
let mut header = JwtHeader::new(Algorithm::RS256);
header.kid = Some(key.key_id.clone());
let assertion = jwt_encode(
&header,
&claims,
&EncodingKey::from_rsa_pem(key.key.as_bytes())
.context("parse RSA private key from machine key file")?,
)?;
let scope = {
let mut s = vec![
"openid".to_string(),
"profile".to_string(),
"urn:zitadel:iam:org:projects:roles".to_string(),
];
s.extend(scopes.iter().cloned());
s.join(" ")
};
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(true)
.timeout(Duration::from_secs(10))
.build()?;
// The Zitadel chart's ingress routes by Host header. Hitting
// 127.0.0.1:HTTP_PORT bypasses the need for an /etc/hosts entry
// on the host running the tests (k3d's loadbalancer maps the
// port; the ingress controller dispatches by Host header).
//
// The Host MUST include the port: Zitadel derives the OIDC issuer
// string from the request's Host header. With `Host: sso.fleet.local`
// it emits `iss: http://sso.fleet.local`; with `Host: sso.fleet.local:8080`
// it emits `iss: http://sso.fleet.local:8080`. Our JWT assertion's `aud`
// must match Zitadel's issuer exactly, so we always send the port.
let host = url::Url::parse(issuer_url)
.ok()
.and_then(|u| {
let h = u.host_str()?;
let p = u.port_or_known_default();
Some(match p {
Some(p) => format!("{h}:{p}"),
None => h.to_string(),
})
})
.unwrap_or_else(|| format!("{ZITADEL_HOST}:{HTTP_PORT}"));
let token_url = format!("http://127.0.0.1:{HTTP_PORT}/oauth/v2/token");
let resp = client
.post(&token_url)
.header("Host", host)
.form(&[
(
"grant_type",
"urn:ietf:params:oauth:grant-type:jwt-bearer".to_string(),
),
("assertion", assertion),
("scope", scope),
])
.send()
.await
.context("POST /oauth/v2/token")?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("token endpoint returned {status}: {body}");
}
#[derive(Deserialize)]
struct TokenResponse {
access_token: String,
}
let tr: TokenResponse = resp.json().await.context("parse token response")?;
if std::env::var("FLEET_AUTH_CALLOUT_DEBUG_TOKENS").is_ok()
&& let Some(payload_b64) = tr.access_token.split('.').nth(1)
{
use base64::Engine;
let pad = "=".repeat((4 - payload_b64.len() % 4) % 4);
if let Ok(bytes) = base64::engine::general_purpose::URL_SAFE_NO_PAD
.decode(format!("{payload_b64}{pad}").trim_end_matches('='))
&& let Ok(claims) = serde_json::from_slice::<serde_json::Value>(&bytes)
{
log::info!(
"[debug] access token claims: {}",
serde_json::to_string_pretty(&claims).unwrap_or_default()
);
}
}
Ok(tr.access_token)
}
/// Build the standard scope list for our project: standard claims + a
/// project-id audience scope so the access token's `aud` matches what the
/// callout's `oidc_audience` expects.
pub fn scopes_for_project(project_id: &str) -> Vec<String> {
vec![format!("urn:zitadel:iam:org:project:id:{project_id}:aud")]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn render_nats_values_inlines_auth_callout_block() {
let yaml = render_nats_values("ABCDEF");
assert!(yaml.contains("issuer: ABCDEF"));
assert!(yaml.contains("auth_users: [ auth ]"));
assert!(yaml.contains("account: DEVICES"));
assert!(yaml.contains("system_account: SYS"));
assert!(yaml.contains("nodePort: 30422"));
}
#[test]
fn scopes_for_project_emits_audience_scope() {
let s = scopes_for_project("12345");
assert_eq!(s, vec!["urn:zitadel:iam:org:project:id:12345:aud"]);
}
}

View File

@@ -0,0 +1,55 @@
//! `cargo run -p example-fleet-auth-callout` brings the full Zitadel +
//! NATS + auth callout stack up on a local k3d cluster, prints the URLs
//! and credentials, and waits for Ctrl-C.
//!
//! Tests under `tests/` exercise the security model. They do NOT run
//! unless explicitly requested with `cargo test -p example-fleet-auth-callout`
//! since they bring up the same heavy stack.
use anyhow::Result;
use example_fleet_auth_callout::{
ADMIN_USERNAME, DEVICE_A_USERNAME, DEVICE_B_USERNAME, NO_ROLE_USERNAME, bring_up_stack,
};
#[tokio::main]
async fn main() -> Result<()> {
let handles = bring_up_stack().await?;
println!("\n=========================================================");
println!(" Fleet Auth Callout — STACK READY");
println!("=========================================================");
println!(" k3d cluster: {}", handles.cluster_name);
println!(" Zitadel: {}", handles.zitadel_url);
println!(
" admin login: admin / (see Zitadel ConfigMap 'zitadel-config-yaml' for password)"
);
println!(" NATS (external): {}", handles.nats_url_external);
println!(" account: DEVICES");
println!(" Project ID: {}", handles.project_id);
println!(" Issuer pubkey: {}", handles.issuer_pubkey);
println!();
println!(" Machine keys provisioned (admin / sensor-a / sensor-b / intruder):");
for (name, key_json) in [
(ADMIN_USERNAME, &handles.admin_machine_key),
(DEVICE_A_USERNAME, &handles.device_a_machine_key),
(DEVICE_B_USERNAME, &handles.device_b_machine_key),
(NO_ROLE_USERNAME, &handles.intruder_machine_key),
] {
// Print only the keyId so the output is tidy; the full keyfile is
// cached at ~/.local/share/harmony/zitadel/client-config.json
let key_id = serde_json::from_str::<serde_json::Value>(key_json)
.ok()
.and_then(|v| {
v.get("keyId")
.and_then(|k| k.as_str().map(|s| s.to_string()))
})
.unwrap_or_else(|| "<unknown>".to_string());
println!(" {name:14} keyId={key_id}");
}
println!();
println!(" Stack is running. Press Ctrl-C to exit (cluster keeps running).");
println!("=========================================================");
tokio::signal::ctrl_c().await?;
Ok(())
}

View File

@@ -0,0 +1,134 @@
//! Real cargo tests proving the IoT fleet security model.
//!
//! All tests share a single bringup of the stack via [`OnceCell`]. The
//! cluster keeps running across the suite, with each test using the
//! cached machine keys to mint Zitadel JWTs and exercise NATS through
//! the auth callout. Three invariants:
//!
//! 1. `admin_can_read_any_device_subject` — fleet-admin sees other devices' state.
//! 2. `device_can_only_access_own_subjects` — sensor-a is denied access to sensor-b's commands.
//! 3. `unknown_role_is_rejected` — a Zitadel-authenticated user with no
//! fleet role cannot connect to NATS.
//!
//! ## Why these tests are real-stack
//!
//! Mocking the OIDC issuer or NATS would only re-prove the unit tests
//! already cover. The point of this suite is to confirm — in CI, in
//! cargo — that the **deployed** stack on k3d enforces the security
//! model end-to-end. Hidden cluster-level misconfiguration (an unset
//! `auth_callout` block, a wrong issuer pubkey, a CoreDNS rewrite drift,
//! a permissions YAML typo) only shows up here.
use std::sync::Arc;
use std::time::Duration;
use anyhow::{Context, Result};
use async_nats::ConnectOptions;
use example_fleet_auth_callout::{
StackHandles, bring_up_stack, mint_access_token, scopes_for_project,
};
use futures_util::StreamExt;
use tokio::sync::OnceCell;
static STACK: OnceCell<Arc<StackHandles>> = OnceCell::const_new();
async fn shared_stack() -> Result<Arc<StackHandles>> {
let cell = STACK
.get_or_try_init(|| async {
let handles = bring_up_stack().await?;
anyhow::Ok(Arc::new(handles))
})
.await?;
Ok(cell.clone())
}
async fn connect_with_role(stack: &StackHandles, key_json: &str) -> Result<async_nats::Client> {
let token = mint_access_token(
&stack.zitadel_url,
key_json,
&scopes_for_project(&stack.project_id),
)
.await
.context("mint Zitadel access token")?;
ConnectOptions::with_token(token)
.connection_timeout(Duration::from_secs(5))
.connect(&stack.nats_url_external)
.await
.map_err(|e| anyhow::anyhow!("NATS connect: {e}"))
}
#[tokio::test]
#[ignore = "requires k3d + docker environment"]
async fn admin_can_read_any_device_subject() -> Result<()> {
let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
let stack = shared_stack().await?;
let admin = connect_with_role(&stack, &stack.admin_machine_key).await?;
let device = connect_with_role(&stack, &stack.device_a_machine_key).await?;
let mut admin_sub = admin.subscribe("device-state.>").await?;
admin.flush().await?;
device
.publish("device-state.sensor-a", "telemetry-payload".into())
.await?;
device.flush().await?;
let msg = tokio::time::timeout(Duration::from_secs(5), admin_sub.next())
.await
.context("admin sub timeout")?
.context("admin sub closed")?;
assert_eq!(msg.payload.as_ref(), b"telemetry-payload");
Ok(())
}
#[tokio::test]
#[ignore = "requires k3d + docker environment"]
async fn device_can_only_access_own_subjects() -> Result<()> {
let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
let stack = shared_stack().await?;
let device_a = connect_with_role(&stack, &stack.device_a_machine_key).await?;
let device_b = connect_with_role(&stack, &stack.device_b_machine_key).await?;
let _b_sub = device_b.subscribe("device-commands.sensor-b").await?;
let mut a_wrong = device_a.subscribe("device-commands.sensor-b").await?;
device_a.flush().await?;
device_b.flush().await?;
// We only care that A's subscription does NOT receive B's traffic;
// pushing through B-side traffic would be a no-op since A's
// subscription was rejected by NATS at SUB time.
device_b
.publish("device-commands.sensor-b", "should-not-leak".into())
.await?;
device_b.flush().await?;
let result = tokio::time::timeout(Duration::from_millis(750), a_wrong.next()).await;
assert!(
result.is_err(),
"device A must not observe device B's commands"
);
Ok(())
}
#[tokio::test]
#[ignore = "requires k3d + docker environment"]
async fn unknown_role_is_rejected() -> Result<()> {
let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
let stack = shared_stack().await?;
// The intruder has a valid Zitadel JWT but no fleet-admin/device role
// grant. The callout must reject the connection — NATS surfaces that
// as `authorization violation` at connect time.
let result = connect_with_role(&stack, &stack.intruder_machine_key).await;
assert!(
result.is_err(),
"JWT without fleet role must not be admitted to NATS"
);
Ok(())
}

View File

@@ -0,0 +1,35 @@
[package]
name = "example_fleet_device_enroll"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "fleet_device_enroll"
path = "src/main.rs"
[features]
default = ["vm-rehearsal"]
# `--launch-pi-vm` and `--vm-rehearsal` flags. Enables the `kvm`
# feature on `harmony`, which pulls in libvirt (`libvirt-dev`) and
# does NOT cross-compile for arm64 (no aarch64 libvirt static libs
# in most distros). Disable this feature when building the
# enrollment binary FOR the target device:
# cargo build --release --target aarch64-unknown-linux-gnu \
# -p example_fleet_device_enroll --no-default-features
# A device-side build leaves out the rehearsal code entirely; the
# binary is enrollment-only and links with no native dependencies.
vm-rehearsal = ["harmony/kvm"]
[dependencies]
# `podman` is required even on device-side builds (the operator CRD
# definitions in `harmony::modules::fleet::operator` depend on
# `podman` types via the reconciler-contracts shape). `kvm` is the
# only feature that pulls libvirt and stays opt-in via `vm-rehearsal`.
harmony = { path = "../../harmony", default-features = false, features = ["podman"] }
harmony_types = { path = "../../harmony_types" }
tokio.workspace = true
log.workspace = true
env_logger.workspace = true
anyhow.workspace = true
clap.workspace = true

View File

@@ -0,0 +1,193 @@
# Example: Fleet Device Enroll
Enrolls a device into the fleet by minting its Zitadel machine user + JSON key inline (browser SSO or pre-acquired admin token), then runs `FleetDeviceSetupScore` against the device to install podman, drop the keyfile + agent config, and bring up the agent under systemd.
Two operator workflows land on the same code path:
- **Dev-on-device** — developer runs the score on a Pi with keyboard + display attached. Browser opens locally, dev signs in with their personal SSO account, the score provisions credentials for that one device.
- **Production-via-SSH** — operator runs the score from a workstation, targets each device over SSH. Browser opens once on the workstation. (Per-batch token caching is on the roadmap; v0 re-prompts per device but the browser session cookie keeps the click cheap.)
## How to use
### Prerequisites
- A running staging install (Zitadel + NATS + auth callout + operator) — see `examples/fleet_staging_install/`.
- The Zitadel project ID for `fleet` (from the staging install output).
- A cross-compiled `fleet-agent` binary for the target arch.
- For VM rehearsal: libvirt + qemu-system-aarch64 + xorriso installed locally. Run `cargo run -p example_fleet_vm_setup -- --bootstrap-only --arch aarch64` once to prime the asset cache and SSH keys.
- Your Zitadel SSO account must hold a role permitting machine-user, role-grant, and machine-key creation (typically `IAM_OWNER` or `ORG_OWNER`).
### Build flavors
The crate has two flavors selected by Cargo features:
| Flavor | Command | What it includes |
|---|---|---|
| **Workstation** (default) | `cargo build --release -p example_fleet_device_enroll` | Everything: `--launch-pi-vm`, `--vm-rehearsal`, full enrollment. Pulls in libvirt via the `vm-rehearsal` feature. |
| **Device-side** (cross-compile) | `cargo build --release --target aarch64-unknown-linux-musl -p example_fleet_device_enroll --no-default-features` | Enrollment-only — no VM-rehearsal flags, no libvirt. Builds for arm64. **Use the musl target, not gnu** (see below). |
#### Why musl, not gnu
Building with `--target aarch64-unknown-linux-gnu` links against the host's glibc. On a current Arch / Fedora workstation that's glibc 2.41+; on the device it might be glibc 2.36 (Debian 12) or 2.41 (Debian 13). When the workstation's glibc is newer than the device's, the binary fails to start with:
```
./fleet_device_enroll: /lib/aarch64-linux-gnu/libc.so.6: version `GLIBC_2.39' not found
```
`aarch64-unknown-linux-musl` produces a **fully static binary** linked against musl libc, which is bundled in. It runs on any aarch64 Linux regardless of the host's libc generation — Debian 12, 13, Pi OS, Alpine, all the same. That's what we want for a device-side binary that gets shipped onto whatever userland the production line happens to flash.
#### One-time musl setup
```bash
rustup target add aarch64-unknown-linux-musl
# Arch: sudo pacman -S aarch64-linux-musl (AUR) or use mold-aarch64
# Fedora: sudo dnf install gcc-aarch64-linux-gnu (we use musl-cross via rustup)
```
You may need to point Cargo at the right linker. In `~/.cargo/config.toml`:
```toml
[target.aarch64-unknown-linux-musl]
linker = "aarch64-linux-musl-gcc"
```
Or use `cross` (`cargo install cross`) which handles the toolchain automatically:
```bash
cross build --release --target aarch64-unknown-linux-musl \
-p example_fleet_device_enroll --no-default-features
```
#### Copying to the device
```bash
scp target/aarch64-unknown-linux-musl/release/fleet_device_enroll pi@<host>:
```
Then SSH to the device and run it as documented in [Dev-on-device](#dev-on-device) above.
### Quickstart — Pi-equivalent VM rehearsal
Boot a Pi-equivalent VM (Debian bookworm arm64 generic-cloud — same Debian base Pi OS is built on; Pi OS itself is locked to Pi hardware and won't boot in generic KVM) with one command:
```bash
cargo run -p example_fleet_device_enroll -- --launch-pi-vm
```
The command boots the VM and exits, printing the SSH connection details and a suggested next command. From there, enroll the running VM:
```bash
./target/debug/fleet_device_enroll \
--target ssh://fleet-admin@<VM_IP> \
--device-id pi-rehearsal-01 \
--issuer-url https://sso-staging.cb1.nationtech.io \
--audience <PROJECT_ID> \
--nats-url wss://nats-fleet-staging.cb1.nationtech.io \
--admin-oidc-client-id <CLIENT_ID> \
--agent-binary target/aarch64-unknown-linux-gnu/release/fleet-agent
```
`--device-id` is required and validated against RFC1123 subdomain rules (lowercase alphanumeric + `-`, must start and end with an alphanumeric, ≤253 chars total / ≤63 chars per label). Same id is reused for the agent's TOML, the Zitadel machine username (`device-<id>`), and the Kubernetes Device CR — so anything kube wouldn't accept as a `metadata.name` is rejected upfront here instead of three layers down at operator-reconcile time.
The browser opens to Zitadel's device-code login. Sign in with your SSO account; the score mints the per-device user, drops the keyfile, and brings up the agent.
### Dev-on-device
Run the binary on the Pi itself, omit `--target` entirely. The score uses ansible's local connection and runs everything on the same machine — no SSH, no keypair:
```bash
fleet_device_enroll \
--issuer-url https://sso.example.com \
--audience <PROJECT_ID> \
--nats-url wss://nats.example.com \
--admin-oidc-client-id <CLIENT_ID> \
--agent-binary /usr/local/bin/fleet-agent \
--device-id pi-001 \
--labels group=lab,arch=aarch64
```
Browser opens on the Pi's local display. The dev signs in once; the score handles the rest. Sudo prompts the operator's password if passwordless sudo isn't configured (which is fine — Debian's default).
Auto-installs `python3-venv` on first run if missing (Debian splits it out of base python3); the score detects the failure, runs `sudo apt-get install -y python3-venv`, and retries the venv create.
### Production-via-SSH
Operator runs from a workstation, targeting devices on the LAN:
```bash
fleet_device_enroll \
--target ssh://pi@10.0.0.42 \
--issuer-url https://sso.example.com \
--audience <PROJECT_ID> \
--nats-url wss://nats.example.com \
--agent-binary ./build/fleet-agent-aarch64 \
--device-id batch7-042 \
--labels group=batch7,site=warehouse-east
```
Each invocation re-prompts the browser. Token caching across runs is tracked in `ROADMAP/fleet_platform/device_enrollment_token_caching.md`.
### Non-interactive (CI / scripted)
Skip the browser by passing a Bearer token:
```bash
HARMONY_ZITADEL_ADMIN_TOKEN=<pat-or-access-token> \
fleet_device_enroll \
--target ssh://pi@10.0.0.42 \
--issuer-url https://sso.example.com \
--audience <PROJECT_ID> \
--nats-url wss://nats.example.com \
--agent-binary ./build/fleet-agent-aarch64
```
## What the score does on the device
For each invocation the score:
1. Calls Zitadel `/management/v1/*` with the admin token to find-or-create the device's machine user, grant it the `device` role on the fleet project, and mint a JSON key (idempotent on user + grant; always mints a new key because Zitadel doesn't return existing material).
2. SSHes to the target, ensures `podman` + `systemd-container` packages, creates the `fleet-agent` user with linger, activates the user-scoped podman socket.
3. Uploads the agent binary to `/usr/local/bin/fleet-agent`.
4. Drops the JSON keyfile at `/etc/fleet-agent/zitadel-key.json` (mode 0640, owned by `fleet-agent`).
5. Renders `/etc/fleet-agent/config.toml` with the agent's NATS URLs, labels, and `[credentials]` block pointing at the keyfile.
6. Installs and starts `fleet-agent.service`. Restarts only if config / binary / unit changed.
The agent then mints NATS JWTs from the keyfile via the auth callout's JWT-bearer flow and registers itself in the `device-info` KV.
## Verification
After enrollment, the device's heartbeat should appear within seconds:
```bash
nats kv get fleet-device-info <device-id>
```
Or watch via the operator's dashboard / CRs:
```bash
kubectl get fleetdev # devices CRD
```
## SSO `client_id` — where to get it
`--admin-oidc-client-id` is the **numeric Zitadel-assigned client_id**, not the human-readable app name. When `fleet_staging_install` provisions the `harmony-cli` device-code app, Zitadel generates a numeric client_id like `371639797157987125@fleet`. The staging install prints this value in its final summary block — copy it from there.
If you ever need to look it up after the fact, it's in the staging-install operator's local cache:
```bash
jq -r '.apps."harmony-cli"' ~/.local/share/harmony/zitadel/client-config.json
```
That cache is on the **operator's workstation** (the host that ran `fleet_staging_install`). The device itself doesn't have it — the operator must pass `--admin-oidc-client-id <numeric>` explicitly when running enrollment from the device, or set `HARMONY_ZITADEL_ADMIN_TOKEN` to skip SSO entirely.
## Common failure modes
- **`invalid_client: no active client not found`** — `--admin-oidc-client-id` is wrong. Most likely you passed the app name (`harmony-cli`) instead of the numeric client_id. See above.
- **`Project '<name>' not visible to the current Zitadel token`** — your SSO token's primary org differs from where the project lives. Most common when the staging install created the project as the system iam-admin user (system org) and you're signing in with a personal Zitadel account (your own org). Pass `--admin-org-id <id>` (find it in Zitadel UI → Organization → Resource ID). Alternatively, the score now logs `projects visible in current org context: …` right before the error — that list shows what your token CAN see, which usually pinpoints the org mismatch.
- **403 on management API** — operator SSO account doesn't hold a role permitting management calls. Grant `IAM_OWNER` (or equivalent scoped permission) in Zitadel admin UI.
- **`CaUsedAsEndEntity` from rustls** — talking to a dev cluster with a self-signed cert. Pass `--danger-accept-invalid-certs`.
- **Browser doesn't open over SSH** — `webbrowser` can't find a GUI. The score still prints the URL; copy it into a browser on your workstation.
## CLI flags
Run `fleet_device_enroll --help` for the full surface.

View File

@@ -0,0 +1,639 @@
//! Per-device enrollment driver — runs `FleetDeviceSetupScore` with
//! the new `FleetDeviceAuth::ZitadelEnroll` variant. Two workflows
//! land on the same code path:
//!
//! - **Dev-on-device**: developer runs this on a Pi they have a
//! keyboard / display attached to. They target their own Pi via
//! `--target ssh://<user>@127.0.0.1` (sshd is enabled in the
//! factory image so this works out of the box). The score opens
//! the local browser to Zitadel SSO, the dev signs in with their
//! personal account (must hold the admin role), the score mints
//! a per-device user + key, drops the keyfile + config in place,
//! and brings the agent up.
//!
//! - **Production-via-SSH**: operator runs this from a workstation,
//! targets each device over SSH (`--target ssh://pi@10.0.0.42`).
//! Browser opens once on the workstation; for v0 the resulting
//! token is held in memory only — re-running for the next device
//! re-prompts. Token caching is on the roadmap.
//!
//! `--vm-rehearsal` boots an aarch64 KVM VM and enrolls it through
//! the same path, so we can dry-run the whole flow without a Pi.
use std::path::PathBuf;
use anyhow::{Context, Result};
use clap::Parser;
use harmony::inventory::Inventory;
use harmony::modules::fleet::{
AdminAuth, FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore,
ensure_fleet_ssh_keypair,
};
use harmony::modules::linux::{LinuxHostTopology, LinuxLocalhostTopology, SshCredentials};
use harmony_types::id::Id;
// VM-rehearsal-only imports. Hidden behind a feature so `cargo build
// --no-default-features` (the device-side / aarch64 cross-compile)
// doesn't pull in libvirt — `libvirt-dev` doesn't link against arm64
// targets on most distros.
#[cfg(feature = "vm-rehearsal")]
use harmony::modules::fleet::{ProvisionVmScore, check_fleet_smoke_preflight_for_arch};
#[cfg(feature = "vm-rehearsal")]
use harmony::modules::kvm::KvmVirtualMachineHost;
#[cfg(feature = "vm-rehearsal")]
use harmony::modules::kvm::config::init_executor;
#[cfg(feature = "vm-rehearsal")]
use harmony::topology::{VirtualMachineSpec, VmArchitecture, VmFirstBootConfig};
#[derive(Parser, Debug)]
#[command(
name = "fleet_device_enroll",
about = "Enroll a device into the fleet by minting its Zitadel \
credentials inline (browser SSO or pre-acquired token)"
)]
struct Cli {
// ---- target ----------------------------------------------------------
/// Where to apply the score.
///
/// - **Omitted** → run on the same machine the binary is invoked
/// on (no SSH, no keypair). Ansible's `-c local` connection
/// does the work; sudo still goes through your normal
/// credentials.
/// - **`ssh://user@host`** → drive the score against a remote
/// device over SSH using the harmony fleet SSH key.
///
/// Ignored when `--vm-rehearsal` is set (the rehearsal targets
/// the freshly-booted VM).
#[arg(long)]
target: Option<String>,
/// Spin up a fresh aarch64 libvirt VM and enroll it. Pulls the
/// stock Ubuntu cloud image, attaches to the libvirt `default`
/// network, waits for SSH, then runs the setup score against it.
/// Requires the `vm-rehearsal` feature (enabled by default on
/// host builds, disabled on device-side aarch64 builds).
#[cfg(feature = "vm-rehearsal")]
#[arg(long)]
vm_rehearsal: bool,
/// Boot a Pi-equivalent aarch64 VM (Debian trixie generic-cloud
/// image — the same distribution base as Raspberry Pi OS, since
/// Pi OS itself is locked to Pi hardware and won't boot in
/// generic KVM) and **exit**. Prints the SSH connection details
/// so you can connect manually and run `fleet_device_enroll`
/// against the booted VM as a separate command. Useful for
/// dev-on-device rehearsal: launch once, then iterate with the
/// enrollment binary against the running VM. Requires the
/// `vm-rehearsal` feature.
#[cfg(feature = "vm-rehearsal")]
#[arg(long)]
launch_pi_vm: bool,
// ---- Zitadel + NATS endpoints ----------------------------------------
/// Zitadel issuer URL — what the agent will use as its OIDC
/// issuer and what the score talks to during enrollment.
/// Required for enrollment; ignored with `--launch-pi-vm`.
#[arg(long)]
issuer_url: Option<String>,
/// Zitadel project ID (the project's numeric id). Becomes the
/// agent's `audience` for JWT-bearer mint requests, and tags the
/// machine user so the auth callout's `aud` check passes.
#[arg(long)]
audience: Option<String>,
/// Project name (human-readable) the device's machine user
/// belongs to. Must already exist — created by the staging
/// install's `ZitadelSetupScore`.
#[arg(long, default_value = "fleet")]
project_name: String,
/// NATS URL the agent should connect to.
#[arg(long)]
nats_url: Option<String>,
// ---- device identity -------------------------------------------------
/// Device id baked into the agent's TOML, the Zitadel machine
/// username (`device-<device_id>`), and the Kubernetes Device CR
/// name on the operator side. **Required.**
///
/// Must be a valid RFC1123 DNS label / subdomain since the
/// operator builds Kubernetes resource names from it. The
/// validator in this binary rejects anything else upfront so
/// enrollment can't produce a Zitadel machine user that the
/// operator will later choke on with `metadata.name: Invalid value`.
///
/// Allowed: lowercase alphanumerics + `-`, must start and end with
/// an alphanumeric, max 63 chars per segment. Segments separated
/// by `.` are accepted (full RFC1123 subdomain) but `-` is the
/// usual choice.
///
/// Examples that pass: `pi-001`, `lab-rehearsal-3`, `dev-jg-vm`.
/// Examples that fail: `pi_001` (underscore), `Pi001` (uppercase),
/// `-pi001` (leading dash), `pi001-` (trailing dash).
#[arg(long)]
device_id: String,
/// Zitadel machine username for this device. Defaults to
/// `device-<device_id>` so re-running with the same device_id
/// reuses the same Zitadel user.
#[arg(long)]
device_username: Option<String>,
/// Project-scoped Zitadel role to grant the device's user.
/// Defaults to `device` — the role the auth callout maps to
/// per-device-scoped pub/sub permissions.
#[arg(long, default_value = "device")]
device_role: String,
/// Routing labels (`key=value,key=value`) the agent publishes in
/// every DeviceInfo heartbeat.
#[arg(long, default_value = "group=group-a")]
labels: String,
// ---- admin auth ------------------------------------------------------
/// Pre-acquired Bearer token (PAT or out-of-band access token).
/// When set, skips the browser device-code flow.
#[arg(long, env = "HARMONY_ZITADEL_ADMIN_TOKEN")]
admin_token: Option<String>,
/// Zitadel OIDC `client_id` for the device-code app — the
/// **numeric id** Zitadel assigns when the app is created (e.g.
/// `371639797157987125@fleet`), NOT the human-readable app name
/// (`harmony-cli`). The staging install prints this value in its
/// final summary; copy it from there. Required when using SSO
/// (omit only when `--admin-token` is set).
#[arg(long)]
admin_oidc_client_id: Option<String>,
/// Forward to the agent's HTTP client AND to our admin-side calls
/// to Zitadel. Set when talking to a dev cluster with a
/// self-signed cert.
#[arg(long)]
danger_accept_invalid_certs: bool,
/// Override the Zitadel **org context** (`x-zitadel-orgid` header)
/// for management API calls. Set when the SSO operator's primary
/// org differs from where the project + device users live —
/// typical for human SSO accounts on a Zitadel where the project
/// was provisioned by the system iam-admin (their org defaults
/// don't match). Symptom: `Project '<name>' not found in
/// Zitadel` even though the project clearly exists. Find the
/// right value in Zitadel's admin UI → Organization → Resource
/// ID, or via `/admin/v1/orgs/_search`.
#[arg(long)]
admin_org_id: Option<String>,
// ---- agent binary ----------------------------------------------------
/// Path to the cross-compiled fleet-agent binary that gets
/// uploaded to the device and installed at /usr/local/bin/fleet-agent.
/// Optional when `--launch-pi-vm` is set (no enrollment runs).
#[arg(long)]
agent_binary: Option<PathBuf>,
// ---- VM rehearsal knobs (only relevant with --vm-rehearsal) ----------
/// libvirt domain name for the rehearsal VM.
#[cfg(feature = "vm-rehearsal")]
#[arg(long, default_value = "fleet-enroll-rehearsal")]
vm_name: String,
#[cfg(feature = "vm-rehearsal")]
#[arg(long, default_value = "default")]
vm_network: String,
#[cfg(feature = "vm-rehearsal")]
#[arg(long, default_value = "fleet-admin")]
vm_admin_user: String,
#[cfg(feature = "vm-rehearsal")]
#[arg(long, default_value_t = 16)]
vm_disk_size_gb: u32,
}
#[tokio::main]
async fn main() -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
.try_init()
.ok();
let cli = Cli::parse();
#[cfg(feature = "vm-rehearsal")]
if cli.launch_pi_vm {
let vm_ip = boot_pi_rehearsal_vm(&cli).await?;
println!();
println!("=== Pi-equivalent VM ready ===");
println!("VM: {} (debian-trixie arm64)", cli.vm_name);
println!("IP: {vm_ip}");
println!(
"SSH: ssh -i {} {}@{vm_ip}",
harmony::modules::fleet::ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?
.private_key
.display(),
cli.vm_admin_user
);
println!();
println!("To enroll this VM, run from your workstation:");
println!(
" fleet_device_enroll \\\n \
--target ssh://{}@{vm_ip} \\\n \
--device-id <ID> # required, RFC1123 (e.g. pi-001) \\\n \
--issuer-url <ISSUER> \\\n \
--audience <PROJECT_ID> \\\n \
--nats-url <NATS_URL> \\\n \
--admin-oidc-client-id <CLIENT_ID> \\\n \
--agent-binary <AGENT_BIN>",
cli.vm_admin_user
);
return Ok(());
}
validate_device_id(&cli.device_id)?;
let device_id = Id::from(cli.device_id.clone());
let device_username = cli
.device_username
.clone()
.unwrap_or_else(|| format!("device-{device_id}"));
let labels = parse_labels(&cli.labels)?;
let issuer_url = cli
.issuer_url
.clone()
.context("--issuer-url is required for enrollment (omit only with --launch-pi-vm)")?;
let audience = cli
.audience
.clone()
.context("--audience is required for enrollment")?;
let nats_url = cli
.nats_url
.clone()
.context("--nats-url is required for enrollment")?;
let agent_binary = cli
.agent_binary
.clone()
.context("--agent-binary is required for enrollment")?;
let auth = FleetDeviceAuth::ZitadelEnroll {
oidc_issuer_url: issuer_url,
audience,
project_name: cli.project_name.clone(),
device_username: device_username.clone(),
device_display_name: format!("Fleet Device {device_id}"),
device_role_keys: vec![cli.device_role.clone()],
admin: match &cli.admin_token {
Some(t) => AdminAuth::Token(t.clone()),
None => AdminAuth::Sso {
client_id: cli.admin_oidc_client_id.clone().context(
"--admin-oidc-client-id is required for SSO login. \
This is the **numeric** Zitadel client_id (e.g. \
`371639797157987125@fleet`), not the app name. \
The staging install prints it in its final summary. \
Alternatively, pass --admin-token <PAT> to skip SSO.",
)?,
},
},
admin_org_id: cli.admin_org_id.clone(),
danger_accept_invalid_certs: cli.danger_accept_invalid_certs,
};
let setup_config = FleetDeviceSetupConfig {
device_id: device_id.clone(),
labels,
nats_urls: vec![nats_url],
auth,
agent_binary_path: agent_binary,
hosts_entries: vec![],
};
let setup_score = FleetDeviceSetupScore::new(setup_config);
#[cfg(feature = "vm-rehearsal")]
if cli.vm_rehearsal {
let vm_ip = boot_rehearsal_vm(&cli).await?;
let ssh = ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
let topology = LinuxHostTopology::new(
format!("rehearsal-{}", cli.vm_name),
vm_ip
.parse()
.context("rehearsal VM did not yield a valid IP")?,
SshCredentials {
user: cli.vm_admin_user.clone(),
private_key_path: ssh.private_key.clone(),
remote_python: Some("/usr/bin/python3".to_string()),
sudo_password: None,
},
);
run_setup(&setup_score, &topology).await?;
println!(
"✅ rehearsal device '{device_id}' enrolled via VM {} ({vm_ip})",
cli.vm_name
);
return Ok(());
}
match cli.target.as_deref() {
// No `--target` → run on the same machine. ansible's `-c
// local` connection skips SSH entirely; sudo still works the
// usual way (operator types the password if not configured
// passwordless).
None => {
let topology = LinuxLocalhostTopology::new("localhost");
run_setup(&setup_score, &topology).await?;
}
Some(target) => {
let (user, host) = parse_ssh_target(target)?;
let ssh = ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
let topology = LinuxHostTopology::new(
format!("ssh-{host}"),
host.parse().context("--target host is not a valid IP")?,
SshCredentials {
user,
private_key_path: ssh.private_key.clone(),
remote_python: Some("/usr/bin/python3".to_string()),
sudo_password: None,
},
);
run_setup(&setup_score, &topology).await?;
}
}
println!("✅ device '{device_id}' enrolled");
Ok(())
}
#[cfg(feature = "vm-rehearsal")]
async fn boot_rehearsal_vm(cli: &Cli) -> Result<String> {
boot_vm(cli, RehearsalImage::Ubuntu).await
}
#[cfg(feature = "vm-rehearsal")]
async fn boot_pi_rehearsal_vm(cli: &Cli) -> Result<String> {
boot_vm(cli, RehearsalImage::DebianTrixie).await
}
#[cfg(feature = "vm-rehearsal")]
#[derive(Debug, Clone, Copy)]
enum RehearsalImage {
Ubuntu,
DebianTrixie,
}
#[cfg(feature = "vm-rehearsal")]
async fn boot_vm(cli: &Cli, image: RehearsalImage) -> Result<String> {
let arch = VmArchitecture::Aarch64;
check_fleet_smoke_preflight_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("preflight: {e}"))?;
let base_image = match image {
RehearsalImage::Ubuntu => {
harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("cloud image: {e}"))?
}
RehearsalImage::DebianTrixie => {
harmony::modules::fleet::ensure_debian_trixie_arm64_cloud_image()
.await
.map_err(|e| anyhow::anyhow!("debian cloud image: {e}"))?
}
};
let pool = harmony::modules::fleet::ensure_harmony_fleet_pool()
.await
.map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
let ssh = ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
let authorized_key = harmony::modules::fleet::read_public_key(&ssh)
.await
.map_err(|e| anyhow::anyhow!("read ssh pubkey: {e}"))?;
let executor = init_executor().map_err(|e| anyhow::anyhow!("KVM init: {e}"))?;
let vm_host = KvmVirtualMachineHost::new(
"kvm-local",
executor,
pool.name.clone(),
pool.path.clone(),
base_image,
);
let vm_score = ProvisionVmScore {
spec: VirtualMachineSpec {
name: cli.vm_name.clone(),
architecture: arch,
cpus: 2,
memory_mib: 2048,
disk_size_gb: Some(cli.vm_disk_size_gb),
network: cli.vm_network.clone(),
first_boot: Some(VmFirstBootConfig {
hostname: Some(cli.vm_name.clone()),
admin_user: Some(cli.vm_admin_user.clone()),
authorized_keys: vec![authorized_key],
admin_password: None,
}),
},
};
use harmony::score::Score;
let outcome = Score::<KvmVirtualMachineHost>::create_interpret(&vm_score)
.execute(&Inventory::empty(), &vm_host)
.await
.map_err(|e| anyhow::anyhow!("ProvisionVmScore: {e}"))?;
for d in &outcome.details {
if let Some(v) = d.strip_prefix("ip=") {
return Ok(v.to_string());
}
}
anyhow::bail!("ProvisionVmScore finished without an IP")
}
async fn run_setup<T>(score: &FleetDeviceSetupScore, topology: &T) -> Result<()>
where
T: harmony::topology::Topology + harmony::topology::LinuxHostConfiguration,
{
use harmony::score::Score;
let outcome = Score::<T>::create_interpret(score)
.execute(&Inventory::empty(), topology)
.await
.map_err(|e| anyhow::anyhow!("FleetDeviceSetupScore: {e}"))?;
println!("setup outcome: {} ({:?})", outcome.message, outcome.details);
Ok(())
}
/// Validate `device_id` against RFC1123 subdomain rules so the
/// operator's downstream Device CR upsert can't fail with
/// `metadata.name: Invalid value`. See
/// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names
///
/// Constraints applied here:
/// - non-empty, ≤253 chars total
/// - one or more dot-separated **labels**
/// - each label: 1-63 chars, lowercase alphanumeric + `-`, must start
/// AND end with an alphanumeric (no leading/trailing `-`)
///
/// We're stricter than just "kube name valid" because the same
/// device_id is also embedded in NATS subjects via the auth
/// callout's permission templates — and `_`/uppercase there silently
/// passes NATS but breaks the kube path. Rejecting upfront beats
/// debugging from three layers down.
fn validate_device_id(id: &str) -> Result<()> {
if id.is_empty() {
anyhow::bail!("device id is empty");
}
if id.len() > 253 {
anyhow::bail!(
"device id '{id}' is {len} chars, max 253 (RFC1123 subdomain limit)",
len = id.len()
);
}
for label in id.split('.') {
validate_dns_label(label).with_context(|| format!("device id '{id}'"))?;
}
Ok(())
}
fn validate_dns_label(label: &str) -> Result<()> {
if label.is_empty() {
anyhow::bail!("empty label (consecutive dots or leading/trailing dot)");
}
if label.len() > 63 {
anyhow::bail!(
"label '{label}' is {len} chars, max 63 per RFC1123 label",
len = label.len()
);
}
let bytes = label.as_bytes();
if !bytes[0].is_ascii_alphanumeric() {
anyhow::bail!(
"label '{label}' must start with an alphanumeric (got `{}`)",
label.chars().next().unwrap()
);
}
if !bytes[bytes.len() - 1].is_ascii_alphanumeric() {
anyhow::bail!(
"label '{label}' must end with an alphanumeric (got `{}`)",
label.chars().last().unwrap()
);
}
for (i, c) in label.chars().enumerate() {
let ok = c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-';
if !ok {
anyhow::bail!(
"label '{label}' has invalid char `{c}` at position {i}; \
only lowercase a-z, 0-9, and `-` are allowed (no `_`, no uppercase)"
);
}
}
Ok(())
}
fn parse_ssh_target(target: &str) -> Result<(String, String)> {
let rest = target
.strip_prefix("ssh://")
.context("--target must start with `ssh://` or be `localhost`")?;
let (user, host) = rest
.split_once('@')
.context("--target must be `ssh://user@host`")?;
if user.is_empty() || host.is_empty() {
anyhow::bail!("--target ssh:// has empty user or host");
}
Ok((user.to_string(), host.to_string()))
}
fn parse_labels(raw: &str) -> Result<std::collections::BTreeMap<String, String>> {
let mut out = std::collections::BTreeMap::new();
for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) {
let (k, v) = piece
.split_once('=')
.ok_or_else(|| anyhow::anyhow!("label '{piece}' missing '='"))?;
let k = k.trim();
let v = v.trim();
if k.is_empty() || v.is_empty() {
anyhow::bail!("label '{piece}' has empty key or value");
}
out.insert(k.to_string(), v.to_string());
}
if out.is_empty() {
anyhow::bail!("--labels must include at least one key=value pair");
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::validate_device_id;
#[test]
fn accepts_simple_labels() {
for ok in [
"pi",
"pi-001",
"lab-rehearsal-3",
"dev-jg-vm",
"a",
"0",
"fb5310-qm2kpoq",
// multi-label subdomain
"pi-001.lab-east.fleet",
] {
assert!(
validate_device_id(ok).is_ok(),
"expected '{ok}' to be accepted: {:?}",
validate_device_id(ok)
);
}
}
fn err_chain(e: anyhow::Error) -> String {
// anyhow's `.to_string()` only renders the top-level context;
// the validator emits the *cause* message (`invalid char …`,
// `max 63`, etc.) further down the chain. `{:#}` renders the
// full chain joined by `: ` which is what we want to match.
format!("{e:#}")
}
#[test]
fn rejects_underscore() {
// The original `Id::default()` shape that triggered this fix.
let err = err_chain(validate_device_id("fb5310_Qm2kPoQ").unwrap_err());
assert!(err.contains("invalid char `_`"), "got: {err}");
}
#[test]
fn rejects_uppercase() {
let err = err_chain(validate_device_id("Pi001").unwrap_err());
assert!(err.contains("invalid char"), "got: {err}");
}
#[test]
fn rejects_leading_or_trailing_dash() {
assert!(validate_device_id("-pi001").is_err());
assert!(validate_device_id("pi001-").is_err());
}
#[test]
fn rejects_empty() {
assert!(validate_device_id("").is_err());
}
#[test]
fn rejects_consecutive_dots() {
assert!(validate_device_id("a..b").is_err());
}
#[test]
fn rejects_too_long_label() {
let long = "a".repeat(64);
let err = err_chain(validate_device_id(&long).unwrap_err());
assert!(err.contains("max 63"), "got: {err}");
}
#[test]
fn rejects_too_long_total() {
// 4 × (63 + 1) - 1 = 255 chars total; rejects on >253.
let segment = "a".repeat(63);
let id = [segment.as_str(); 4].join(".");
assert!(id.len() > 253);
let err = err_chain(validate_device_id(&id).unwrap_err());
assert!(err.contains("max 253"), "got: {err}");
}
}

View File

@@ -0,0 +1,48 @@
[package]
name = "example-fleet-e2e-demo"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
description = "VM-based end-to-end rehearsal: k3d + Zitadel + NATS auth callout + libvirt VM agents + operator → CR → podman → status"
[lib]
name = "example_fleet_e2e_demo"
path = "src/lib.rs"
[[bin]]
name = "fleet-e2e-demo"
path = "src/main.rs"
[[test]]
name = "e2e_walking_skeleton"
path = "tests/e2e_walking_skeleton.rs"
[dependencies]
harmony = { path = "../../harmony", features = ["kvm"] }
harmony-k8s = { path = "../../harmony-k8s" }
harmony_types = { path = "../../harmony_types" }
example-fleet-auth-callout = { path = "../fleet_auth_callout" }
harmony-nats-callout = { path = "../../nats/callout" }
harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
harmony-fleet-operator = { path = "../../fleet/harmony-fleet-operator" }
harmony-fleet-deploy = { path = "../../fleet/harmony-fleet-deploy" }
k3d-rs = { path = "../../k3d" }
async-nats.workspace = true
nkeys = "0.4"
tokio = { workspace = true, features = ["full"] }
tokio-test.workspace = true
serde.workspace = true
serde_json.workspace = true
anyhow.workspace = true
log.workspace = true
env_logger.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
futures-util.workspace = true
k8s-openapi.workspace = true
kube.workspace = true
clap = { version = "4", features = ["derive", "env"] }
directories = "6.0.0"
tempfile = "3"
url.workspace = true

View File

@@ -0,0 +1,312 @@
# Local fleet rehearsal runbook
End-to-end walkthrough of the IoT fleet platform on your laptop:
k3d-hosted control plane (Zitadel + NATS + auth callout) plus two
libvirt VMs running the fleet-agent. Mirrors the production topology
closely enough that you can watch the auth callout flow, the
JetStream KV traffic, and the per-device permission boundary in a
real cluster.
This is not the integration-test harness (that runs unattended). It
is a step-by-step sequence with inspection points in between. Run
each section, look at what happened, then continue.
## 0. Prerequisites
- Linux host with KVM (the user running the commands in `libvirt` /
`kvm` group; check with `id`).
- `podman`, `qemu-system-x86_64` (and `qemu-system-aarch64` if you
pick `--arch aarch64`), `mdbook` (optional), `kubectl`, `nats` CLI
(optional, for the manual subscribe step). Most other tooling
(k3d, ansible venv, cloud images) is auto-provisioned under
`~/.local/share/harmony/`.
- `/etc/hosts`: `127.0.0.1 sso.fleet.local` so you can hit Zitadel
from your browser through the cluster's HTTP_PORT (see
`examples/fleet_auth_callout/src/lib.rs` for the constant).
- Free TCP ports `8080` and `30422` on the host.
Source map for the things you'll inspect:
| Component | File |
| --- | --- |
| Bring-up flow | `examples/fleet_e2e_demo/src/lib.rs` |
| Per-device Zitadel + agent install | same, `provision_device()` |
| NATS Score (auth-callout mode) | `fleet/harmony-fleet-deploy/src/nats.rs::FleetNatsScore::callout` |
| Shared agent config schema | `fleet/harmony-fleet-auth/src/agent_config.rs` |
| Auth callout deployment Score | `harmony/src/modules/nats_auth_callout/mod.rs` |
| Callout decision logic | `nats/callout/src/handler.rs::decide` |
| Per-device permissions template | `nats/callout/src/permissions.rs::device_default` |
| Agent NATS auth (JWT-bearer mint) | `fleet/harmony-fleet-auth/src/credentials.rs` |
| Agent KV publishers + direct pulse | `fleet/harmony-fleet-agent/src/fleet_publisher.rs` |
| Walking-skeleton tests | `examples/fleet_e2e_demo/tests/e2e_walking_skeleton.rs` |
The NATS server's helm values are rendered from typed Rust structs
via `serde_yaml::to_string` (see `FleetNatsScore::values_yaml`),
not by `format!()` string interpolation. Same with the agent's
`/etc/fleet-agent/config.toml` — typed `AgentConfig`
`toml::to_string` → ConfigMap. Per ADR-023 principle 2 the e2e
demo composes the same `*Score` types the production deploy uses.
## 1. Provision the VMs
Each VM is one libvirt domain on the default network
(`192.168.122.0/24`). Run `fleet_vm_setup` once per VM. Pass
`--only-vm` so it stops at the cloud-init step (the agent install
happens later from the e2e bring-up — keeps the two phases legible).
```bash
# VM 0
cargo run --release -p example-fleet-vm-setup -- \
--arch aarch64 \
--vm-name vm-device-00 \
--only-vm
# VM 1
cargo run --release -p example-fleet-vm-setup -- \
--arch aarch64 \
--vm-name vm-device-01 \
--only-vm
```
Use `--arch x86_64` for native KVM speed; `aarch64` runs under
qemu-system-aarch64 TCG emulation on x86_64 hosts and is slower but
matches Pi targets.
**Inspect:**
```bash
virsh list --all
virsh domifaddr vm-device-00
virsh domifaddr vm-device-01
```
Note the IPs — you'll pass them in step 2. Confirm SSH works:
```bash
ssh -i ~/.local/share/harmony/fleet/ssh/id_ed25519 \
fleet-admin@<vm0-ip> uptime
```
The keypair lives under `~/.local/share/harmony/fleet/ssh/`,
generated on first run.
## 2. Bring up the control-plane stack
This single command does everything: k3d cluster, Zitadel,
ZitadelSetupScore (project + roles + 2 device machine users +
`fleet-ops` admin), NATS with `auth_callout`, callout image build &
sideload, callout Deployment, and finally `FleetDeviceSetupScore`
over SSH for each VM (packages, agent binary, JWT keyfile,
systemd unit).
```bash
FLEET_E2E_VM_0_IP=<vm0-ip> FLEET_E2E_VM_1_IP=<vm1-ip> \
cargo run --release -p example-fleet-e2e-demo -- --num-devices 2
```
The bring-up logs each step as `[e2e-demo X/9]`. Read along with
`examples/fleet_e2e_demo/src/lib.rs::bring_up_full_stack` to see
what's happening at each line. Stops at `STACK READY` and waits on
Ctrl-C (the cluster stays up after Ctrl-C — this is just the
foreground holder).
**Inspect:**
```bash
export KUBECONFIG=$(k3d kubeconfig write fleet-auth-callout)
# All workloads up?
kubectl get pods -n fleet-system
kubectl get pods -n zitadel
# Callout config the deployment is using:
kubectl get deployment -n fleet-system fleet-callout \
-o jsonpath='{.spec.template.spec.containers[0].env}' | jq
```
Open Zitadel in the browser: <http://sso.fleet.local:8080/ui/console>
(login with `root@zitadel.local` / the bootstrap password printed
during step `[e2e-demo 3/9]`). Click into the `fleet` project →
`Users` to see the two `device-vm-device-0X` machine users with
`device` role grants and the `fleet-ops` admin.
## 3. Watch the auth callout in action
The callout is the security boundary: every NATS connect attempt
hits `$SYS.REQ.USER.AUTH`, the callout validates the Zitadel JWT
in `connect_opts.auth_token`, applies the decision tree in
`nats/callout/src/handler.rs::decide`, and signs back a user JWT
with role-scoped permissions.
Tail it while the agents reconnect:
```bash
kubectl logs -n fleet-system -l app=fleet-callout -f
```
You'll see one set of lines per (re)connect:
```
received auth callout request user_nkey=U…
Zitadel JWT validated, generating user JWT device_id=vm-device-00 role=device
sending auth response
```
The `device_id` field is the value AFTER `device_id_prefix_strip`
runs (Zitadel emits `client_id=device-vm-device-00`; the callout
strips `device-` so permissions are interpolated against the bare
device id the agent uses for KV keys). See
`nats/callout/src/zitadel.rs::extract_device_id` for the strip.
**Force a reconnect to make a callout fire on demand:**
```bash
ssh -i ~/.local/share/harmony/fleet/ssh/id_ed25519 \
fleet-admin@<vm0-ip> 'sudo systemctl restart fleet-agent'
```
Watch the callout pod log emit one fresh request/response.
## 4. Watch the agent
```bash
ssh -i ~/.local/share/harmony/fleet/ssh/id_ed25519 \
fleet-admin@<vm0-ip> 'sudo journalctl -u fleet-agent -f'
```
What good looks like, in order:
| Log line | Where it comes from |
| --- | --- |
| `minted fresh Zitadel access token audience=…` | `credentials.rs::zitadel_mint` — RFC 7523 JWT-bearer flow, signed with the per-device machine key under `/etc/fleet-agent/zitadel-key.json` |
| `connected successfully server=4222` | NATS accepted the JWT minted by the callout |
| `fleet publisher ready` | KV buckets opened; `device-info` write succeeded |
| `watching KV keys filter=vm-device-00.>` | desired-state subscriber is up |
Absence of `Permissions Violation` lines is the success signal —
those mean the JWT's perms don't match what the agent tried to
publish (you'd hit them if `device_id_prefix_strip` were
misconfigured, for example).
## 5. Observe fleet traffic as admin
The harness mints a `fleet-ops` admin machine user with the
`fleet-admin` role; the callout maps that role to
`pub/sub allow: [">"]`. The integration test
`admin_jwt_reads_any_device_subject` exercises this — easiest path
to see it live is to run it with output. The test is
`#[ignore]`d on `cargo test` so a developer box doesn't burn a
10-minute Zitadel bring-up by accident; `--ignored` opts in:
```bash
FLEET_E2E_VM_0_IP=<vm0-ip> FLEET_E2E_VM_1_IP=<vm1-ip> \
cargo test -p example-fleet-e2e-demo \
--test e2e_walking_skeleton \
admin_jwt_reads_any_device_subject \
-- --test-threads=1 --nocapture --ignored
```
It subscribes admin to `device-state.>` (the direct, non-JetStream
fan-out subject the agent emits a pulse on every 30s — see
`fleet_publisher.rs::publish_state_pulse`) and asserts a message
arrives within 30s.
**Inspect KV state directly** using a bare admin client. The
underlying mechanism is in
`examples/fleet_e2e_demo/tests/e2e_walking_skeleton.rs::admin_nats_client`:
mint a JWT-bearer token from `stack.admin_machine_key`, hand it to
`async_nats` as `auth_token`. The test
`both_devices_heartbeat_within_60s` then reads `device-info` keys
directly:
```rust
let js = async_nats::jetstream::new(admin);
let bucket = js.get_key_value(BUCKET_DEVICE_INFO).await?;
let entry = bucket.entry(&device_info_key("vm-device-00")).await?;
```
To do it from a shell, port-forward NATS and use the `nats` CLI
with admin creds — but creds for an auth-callout server take a
JWT-bearer token, which the `nats` CLI doesn't speak natively;
running the test is the path of least friction.
## 6. Verify cross-device isolation (currently `#[ignore]`)
`cross_device_isolation_enforced_in_vm` is an empty test marked
`#[ignore = "requires E2eHandles::device_machine_key plumbing"]`
in `e2e_walking_skeleton.rs` — the test is a placeholder. The
plumbing it's waiting on is straightforward: the existing
`DeviceHandle` struct (`examples/fleet_e2e_demo/src/lib.rs:106`)
exposes `device_id` + `vm_ip` + `labels` but not the per-device
Zitadel machine key the test would need to mint a `device`-role
JWT and try cross-device subjects. `provision_device` already
creates the key (line ~324, `machine_key_json`) — wiring it through
into `DeviceHandle.machine_key` and implementing the test body
(mint JWT-bearer for vm-device-00, sub to
`device-commands.vm-device-01`, expect `Permissions Violation`)
is a single follow-up commit. I haven't touched it because nothing
in this branch's scope required it.
**You can verify the boundary manually right now**, even without
the test wired up: tail the callout pod, then SSH onto vm-device-00
and run the agent with a tampered config that points it at
vm-device-01's keyfile. The callout will issue a JWT for
`vm-device-01` (because the JWT-bearer assertion is signed with
that user's key); the agent on vm-device-00 will then publish on
`$KV.device-info.info.vm-device-00`, which is NOT in the JWT's
allow list — NATS rejects with `Permissions Violation`. This is
the same gate the test would automate.
The permissions template is in
`nats/callout/src/permissions.rs::device_default` — every allowed
subject contains `{device_id}` and is interpolated per-request, so
device A's JWT physically cannot publish to device B's subjects.
## 7. Drive the desired-state loop
(Not yet covered by a walking-skeleton test, but the agent's
reconciler is wired and observable.) From an admin client, write a
desired state for vm-device-00:
```rust
// pseudocode — see harmony-reconciler-contracts for the exact types
let kv = jetstream.create_key_value(kv::Config {
bucket: BUCKET_DESIRED_STATE.into(),
history: 1,
..Default::default()
}).await?;
kv.put(
&desired_state_key("vm-device-00", &dn("hello-web")),
payload.into(),
).await?;
```
What happens, observable from the agent's journal:
1. Agent's KV watcher (filter `vm-device-00.>`) fires.
2. Reconciler computes the diff and runs the podman create.
3. `write_deployment_state(&state)` fires:
- puts `state.vm-device-00.hello-web` into the `device-state`
KV bucket (operator-side watch picks it up)
- publishes the same payload on direct subject
`device-state.vm-device-00` (admin observers see it live)
You can subscribe to the latter with admin and watch reconcile
events stream in real time.
## 8. Teardown
The cluster persists across runs (re-running `fleet_e2e_demo`
converges drift, doesn't recreate). When you want a clean slate:
```bash
k3d cluster delete fleet-auth-callout
virsh destroy vm-device-00; virsh undefine vm-device-00 --remove-all-storage
virsh destroy vm-device-01; virsh undefine vm-device-01 --remove-all-storage
```
Cached assets (cloud images, k3d binary, ansible venv, SSH key,
fleet secrets) live under `~/.local/share/harmony/` and survive
cluster/VM destruction by design — first run after a clean reuses
them.

View File

@@ -0,0 +1,831 @@
//! VM-based end-to-end rehearsal of the customer demo flow.
//!
//! Goal: prove the JWT-auth chain works on a real-system agent
//! before pointing the demo at OKD. See
//! `ROADMAP/fleet_platform/v0_demo_e2e.md` for the full plan.
//!
//! Bring-up sequence:
//! 1. k3d cluster with HTTP + NATS port mappings (re-uses
//! fleet_auth_callout's k3d helpers — same cluster name so
//! re-runs of either example reuse the same cluster).
//! 2. Zitadel + Postgres via ZitadelScore.
//! 3. Wait for Zitadel HTTP and the chart-provisioned `iam-admin-pat`
//! secret (the chart's setup job is async).
//! 4. ZitadelSetupScore for the project + API app + roles + admin
//! machine user (no per-device users yet).
//! 5. NATS with auth_callout block + the callout pod.
//! 6. For each device i:
//! - ZitadelSetupScore minting a per-device machine user with
//! the `device` role grant. The JSON keyfile is cached in
//! `ZitadelClientConfig` and read back here for the agent.
//! - libvirt VM via `ProvisionVmScore`.
//! - SSH-inject `/etc/hosts` so the VM resolves
//! `sso.fleet.local` to the libvirt host.
//! - `FleetDeviceSetupScore` with `FleetDeviceAuth::ZitadelJwt`
//! pointing at the dropped keyfile.
//!
//! Tests in `tests/e2e_walking_skeleton.rs` share a single bring-up
//! via `OnceCell` and exercise: heartbeats, label-selector targeting,
//! status reflect-back, env+volume propagation, admin cross-device
//! read, per-device isolation, NATS-pod-restart reconnect.
use std::path::PathBuf;
use std::time::Duration;
use anyhow::{Context, Result};
use example_fleet_auth_callout::{
ADMIN_ROLE_KEY, API_APP_NAME, CALLOUT_DEPLOYMENT_NAME, CALLOUT_IMAGE_TAG, DEVICE_ROLE_KEY,
FLEET_NAMESPACE, HTTP_PORT, NATS_ACCOUNT, NATS_AUTH_PASS, NATS_AUTH_USER, NATS_NAMESPACE,
NATS_NODE_PORT, NATS_RELEASE, PROJECT_NAME, ZITADEL_HOST, build_and_load_callout_image,
create_k3d, create_topology, deploy_zitadel, ensure_issuer_seed, wait_for_callout_ready,
wait_for_zitadel_ready,
};
use harmony::inventory::Inventory;
use harmony::modules::fleet::{
FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore, HostsEntry,
ensure_fleet_ssh_keypair,
};
use harmony::modules::k8s::coredns::{CoreDNSRewrite, CoreDNSRewriteScore};
use harmony::modules::linux::{LinuxHostTopology, SshCredentials, ensure_ansible_venv};
use harmony::modules::nats_auth_callout::NatsAuthCalloutScore;
use harmony::modules::zitadel::{
MachineKeyType, ZitadelApiApp, ZitadelClientConfig, ZitadelMachineUser, ZitadelRole,
ZitadelSetupScore,
};
use harmony::score::Score;
use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
use harmony_fleet_deploy::FleetNatsScore;
use harmony_types::id::Id;
use log::{info, warn};
use nkeys::KeyPair;
// ---- constants -------------------------------------------------------------
/// Libvirt's default NAT gateway. The host's IP from inside any VM
/// attached to the `default` libvirt network. We bake this in because
/// every smoke-a* harness assumes it; if a customer runs their own
/// libvirt with a different bridge they can override via env.
pub const DEFAULT_LIBVIRT_HOST_IP: &str = "192.168.122.1";
pub const ADMIN_USERNAME: &str = "fleet-ops";
/// Separate machine user for the in-cluster operator. Distinct from
/// `fleet-ops` (manual admin tooling) so the audit trail can tell
/// operator-driven actions apart from human operator actions. Same
/// `fleet-admin` role grant — only the identity differs.
pub const OPERATOR_USERNAME: &str = "fleet-operator";
pub const OPERATOR_IMAGE_TAG: &str = "localhost/harmony-fleet-operator:dev";
/// Per-device username convention: `device-${device_id}`. Matches what
/// `fleet_rpi_setup` produces, so callout's `device_id_claim =
/// "client_id"` extracts the device id verbatim from the `client_id`
/// claim Zitadel emits in machine-user access tokens.
pub fn device_username(device_id: &str) -> String {
format!("device-{device_id}")
}
// ---- options + handles -----------------------------------------------------
#[derive(Debug, Clone)]
pub struct E2eDemoOpts {
/// Number of VM-as-device agents to provision.
pub num_devices: usize,
/// Path to the cross-compiled `fleet-agent` binary uploaded to
/// each VM. Defaults to `target/release/fleet-agent` (the same
/// path that smoke-a4 produces).
pub agent_binary: PathBuf,
/// Override for the libvirt host IP (the address VMs see as the
/// gateway). Defaults to [`DEFAULT_LIBVIRT_HOST_IP`].
pub libvirt_host_ip: String,
}
impl Default for E2eDemoOpts {
fn default() -> Self {
Self {
num_devices: 2,
agent_binary: workspace_target_path("release/harmony-fleet-agent"),
libvirt_host_ip: DEFAULT_LIBVIRT_HOST_IP.to_string(),
}
}
}
#[derive(Debug, Clone)]
pub struct DeviceHandle {
pub index: usize,
pub device_id: String,
pub vm_ip: String,
pub labels: std::collections::BTreeMap<String, String>,
}
#[derive(Debug, Clone)]
pub struct E2eHandles {
pub cluster_name: String,
pub nats_url_external: String,
pub zitadel_url: String,
pub project_id: String,
pub issuer_pubkey: String,
pub admin_machine_key: String,
pub devices: Vec<DeviceHandle>,
}
// ---- bring up --------------------------------------------------------------
pub async fn bring_up_full_stack(opts: E2eDemoOpts) -> Result<E2eHandles> {
let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
.try_init();
info!("[e2e-demo 1/9] ensuring k3d cluster");
let k3d = create_k3d();
k3d.ensure_installed()
.await
.map_err(|e| anyhow::anyhow!("k3d ensure: {e}"))?;
let topology = create_topology(&k3d);
topology.ensure_ready().await.context("topology init")?;
info!("[e2e-demo 2/9] deploying Zitadel (cold start: ~5 min)");
deploy_zitadel(&topology).await?;
info!("[e2e-demo 3/9] CoreDNS rewrite + waiting for Zitadel HTTP + iam-admin-pat secret");
CoreDNSRewriteScore {
rewrites: vec![CoreDNSRewrite {
hostname: ZITADEL_HOST.to_string(),
target: "zitadel.zitadel.svc.cluster.local".to_string(),
}],
}
.interpret(&Inventory::autoload(), &topology)
.await
.context("CoreDNSRewriteScore")?;
wait_for_zitadel_ready().await?;
wait_for_iam_admin_pat_secret(&topology).await?;
info!("[e2e-demo 4/9] provisioning project, API app, roles, admin machine user");
let admin_setup = ZitadelSetupScore {
host: ZITADEL_HOST.to_string(),
scheme: Default::default(),
port: None,
skip_tls: true,
endpoint: Some(format!("http://127.0.0.1:{HTTP_PORT}")),
admin_org_id: None,
namespace: "zitadel".to_string(),
applications: vec![],
api_apps: vec![ZitadelApiApp {
project_name: PROJECT_NAME.to_string(),
app_name: API_APP_NAME.to_string(),
}],
roles: vec![
ZitadelRole {
project_name: PROJECT_NAME.to_string(),
key: ADMIN_ROLE_KEY.to_string(),
display_name: "Fleet Admin".to_string(),
group: None,
},
ZitadelRole {
project_name: PROJECT_NAME.to_string(),
key: DEVICE_ROLE_KEY.to_string(),
display_name: "Device".to_string(),
group: None,
},
],
machine_users: vec![
ZitadelMachineUser {
username: ADMIN_USERNAME.to_string(),
name: "Fleet Operations".to_string(),
create_pat: false,
machine_key: Some(MachineKeyType::Json),
project_name: Some(PROJECT_NAME.to_string()),
grant_roles: vec![ADMIN_ROLE_KEY.to_string()],
},
// Separate machine user for the in-cluster operator pod.
// Same `fleet-admin` role grant as the manual admin
// identity, but distinct username so JWT `client_id` lets
// log analysis tell operator-driven actions apart from
// human operator actions.
ZitadelMachineUser {
username: OPERATOR_USERNAME.to_string(),
name: "Fleet Operator (in-cluster)".to_string(),
create_pat: false,
machine_key: Some(MachineKeyType::Json),
project_name: Some(PROJECT_NAME.to_string()),
grant_roles: vec![ADMIN_ROLE_KEY.to_string()],
},
],
};
admin_setup
.interpret(&Inventory::autoload(), &topology)
.await
.context("admin ZitadelSetupScore")?;
let zcfg = ZitadelClientConfig::load()
.context("ZitadelSetupScore did not produce a client config cache")?;
let project_id = zcfg
.project_id_by_name(PROJECT_NAME)
.or(zcfg.project_id.as_ref())
.context("project_id missing from cache")?
.clone();
let admin_machine_key = zcfg
.machine_key(ADMIN_USERNAME)
.context("admin machine key missing from cache")?
.clone();
info!("[e2e-demo 5/9] generating issuer NKey, deploying NATS with auth_callout");
let issuer_seed = ensure_issuer_seed(&topology).await?;
let issuer_kp = KeyPair::from_seed(&issuer_seed)
.map_err(|e| anyhow::anyhow!("invalid persisted issuer seed: {e}"))?;
let issuer_pubkey = issuer_kp.public_key();
// Per ADR-023 principle 2 — e2e uses the same Scores as production.
// `FleetNatsScore::callout` renders the auth-callout values block
// typed (serde_yaml) rather than the legacy `render_nats_values`
// string interpolation. Same upstream chart, same wire format,
// schema-checked at compile time.
FleetNatsScore::callout(
NATS_NAMESPACE,
NATS_NODE_PORT as u16,
&issuer_pubkey,
NATS_ACCOUNT,
NATS_AUTH_USER,
NATS_AUTH_PASS,
)
.release_name(NATS_RELEASE)
.interpret(&Inventory::autoload(), &topology)
.await
.context("NATS deploy")?;
info!("[e2e-demo 6/9] building + sideloading callout image into k3d");
build_and_load_callout_image(&k3d).await?;
info!("[e2e-demo 7/9] deploying NatsAuthCalloutScore");
let mut callout = NatsAuthCalloutScore::new(
CALLOUT_DEPLOYMENT_NAME,
FLEET_NAMESPACE,
format!("nats://{NATS_RELEASE}.{NATS_NAMESPACE}.svc.cluster.local:4222"),
format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
project_id.clone(),
NATS_AUTH_USER,
NATS_AUTH_PASS,
issuer_seed.clone(),
)
.image(CALLOUT_IMAGE_TAG)
.target_account(NATS_ACCOUNT)
.admin_role(ADMIN_ROLE_KEY)
.device_role(DEVICE_ROLE_KEY)
.danger_accept_invalid_certs(true);
// Same convention as fleet_auth_callout: the username is in the
// access token's `client_id` claim. The role claim path is
// project-scoped because the JWT-bearer flow requests project
// audience scope.
callout.device_id_claim = "client_id".to_string();
// Zitadel's `client_id` for a machine user equals its userName, so a
// user created as `device-vm-device-00` (the convention shared with
// fleet_rpi_setup and fleet_auth_callout) lands in the JWT verbatim.
// Strip the `device-` prefix so the callout interpolates permissions
// against the bare device id (`vm-device-00`) the agent uses for KV
// keys + direct subjects.
callout.device_id_prefix_strip = "device-".to_string();
callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles");
callout
.interpret(&Inventory::autoload(), &topology)
.await
.context("callout deploy")?;
wait_for_callout_ready(&topology).await?;
info!("[e2e-demo 8/10] building + sideloading operator image into k3d");
build_and_load_operator_image(&k3d).await?;
info!("[e2e-demo 9/10] deploying fleet operator with Zitadel JWT auth");
let operator_machine_key = zcfg
.machine_key(OPERATOR_USERNAME)
.with_context(|| format!("machine key for {OPERATOR_USERNAME} missing from cache"))?
.clone();
deploy_operator(&topology, &project_id, &operator_machine_key).await?;
wait_for_operator_ready(&topology).await?;
info!(
"[e2e-demo 10/10] provisioning {} VM(s) and onboarding agent(s)",
opts.num_devices
);
let mut devices = Vec::with_capacity(opts.num_devices);
for i in 0..opts.num_devices {
let handle = provision_device(i, &opts, &topology, &project_id).await?;
devices.push(handle);
}
info!(
"full stack ready: {} device(s), operator + admin role configured",
devices.len()
);
Ok(E2eHandles {
cluster_name: example_fleet_auth_callout::CLUSTER_NAME.to_string(),
nats_url_external: format!("nats://127.0.0.1:{NATS_NODE_PORT}"),
zitadel_url: format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
project_id,
issuer_pubkey,
admin_machine_key,
devices,
})
}
// ---- per-device provisioning ----------------------------------------------
async fn provision_device(
index: usize,
opts: &E2eDemoOpts,
topology: &K8sAnywhereTopology,
project_id: &str,
) -> Result<DeviceHandle> {
let device_id = format!("vm-device-{index:02}");
let username = device_username(&device_id);
info!("[device {index}] minting Zitadel machine user {username}");
// Per-device ZitadelSetupScore (search-then-create — running this
// for an existing user is a NOOP that just refreshes the cache
// entry pointing at the persisted machine key). The keyfile is
// re-minted because Zitadel doesn't expose the private half of
// an existing key — accept that any prior key drifts to "stale
// until expiry" on the previous device installation.
let device_setup = ZitadelSetupScore {
host: ZITADEL_HOST.to_string(),
scheme: Default::default(),
port: None,
skip_tls: true,
endpoint: Some(format!("http://127.0.0.1:{HTTP_PORT}")),
admin_org_id: None,
namespace: "zitadel".to_string(),
applications: vec![],
api_apps: vec![],
roles: vec![],
machine_users: vec![ZitadelMachineUser {
username: username.clone(),
name: format!("Fleet Device {device_id}"),
create_pat: false,
machine_key: Some(MachineKeyType::Json),
project_name: Some(PROJECT_NAME.to_string()),
grant_roles: vec![DEVICE_ROLE_KEY.to_string()],
}],
};
device_setup
.interpret(&Inventory::autoload(), topology)
.await
.with_context(|| format!("ZitadelSetupScore for {username}"))?;
let zcfg = ZitadelClientConfig::load()
.context("ZitadelClientConfig disappeared between admin and device setup")?;
let machine_key_json = zcfg
.machine_key(&username)
.with_context(|| format!("machine key for {username} missing from cache"))?
.clone();
// -- VM provisioning would go here. Deferred to keep the harness
// cold-start observable in pieces — the kvm bits (ProvisionVmScore)
// require root + libvirtd + the cloud image. Today the harness
// expects the operator to have provisioned VMs out-of-band (e.g.
// via fleet_vm_setup, or a pre-existing libvirt domain). We read
// the IP from a convention path (see `discover_vm_ip`) so the
// test driver can iterate on the agent path without re-paying VM
// boot every test cycle.
//
// Follow-up: fold ProvisionVmScore::ensure_vm here once the
// bring-up has been demonstrated end-to-end at least once.
let vm_ip = discover_vm_ip(index)
.with_context(|| format!("could not resolve IP for device {index}"))?;
info!("[device {index}] {device_id} at {vm_ip} — installing agent with Zitadel JWT auth");
let labels = build_device_labels(&device_id, index);
let agent_score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig {
device_id: Id::from(device_id.clone()),
labels: labels.clone(),
// Agent connects to NATS at the libvirt host's IP via the
// NodePort. The libvirt default network NATs the VM through
// the host so the host's port mapping is reachable.
nats_urls: vec![format!("nats://{}:{NATS_NODE_PORT}", opts.libvirt_host_ip)],
auth: FleetDeviceAuth::ZitadelJwt {
machine_key_json,
// Issuer URL the agent uses MUST match the issuer
// string Zitadel returns — Zitadel derives that from
// the request's Host header. We hit Zitadel via the
// host's port mapping, so the agent's URL is
// `http://sso.fleet.local:<host-port>`. The /etc/hosts
// entry below points sso.fleet.local at the libvirt
// host so the VM resolves it.
oidc_issuer_url: format!("http://{ZITADEL_HOST}:{HTTP_PORT}"),
audience: project_id.to_string(),
// Local rehearsal hits Zitadel over plain HTTP through
// the cluster ingress; no TLS validation needed.
danger_accept_invalid_certs: true,
},
agent_binary_path: opts.agent_binary.clone(),
hosts_entries: vec![HostsEntry {
ip: opts.libvirt_host_ip.clone(),
hostname: ZITADEL_HOST.to_string(),
}],
});
// Apply the score over SSH against the VM. Same pattern as
// fleet_rpi_setup, but synthesized inline so the harness can drive
// multiple VMs in sequence without copying the CLI plumbing.
apply_fleet_setup_to_vm(index, &vm_ip, agent_score).await?;
Ok(DeviceHandle {
index,
device_id,
vm_ip,
labels,
})
}
async fn apply_fleet_setup_to_vm(
index: usize,
vm_ip: &str,
score: FleetDeviceSetupScore,
) -> Result<()> {
ensure_ansible_venv()
.await
.map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?;
let ssh = ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
let ip = vm_ip
.parse()
.with_context(|| format!("VM IP '{vm_ip}' is not a valid IP address"))?;
let creds = SshCredentials {
// Matches the cloud-init admin user that fleet_vm_setup +
// smoke-a4 create. If the operator overrode that during
// out-of-band VM provisioning, follow-up: thread the
// username through E2eDemoOpts.
user: "fleet-admin".to_string(),
private_key_path: ssh.private_key.clone(),
remote_python: Some("/usr/bin/python3".to_string()),
sudo_password: None,
};
let topology = LinuxHostTopology::new(format!("vm-device-{index:02}"), ip, creds);
use harmony::score::Score;
score
.create_interpret()
.execute(&Inventory::empty(), &topology)
.await
.with_context(|| format!("FleetDeviceSetupScore against VM {index} ({vm_ip})"))?;
Ok(())
}
fn build_device_labels(
device_id: &str,
index: usize,
) -> std::collections::BTreeMap<String, String> {
// Two devices, two distinct group labels by default — lets
// selector tests target "exactly one device". Label scheme
// matches the demo runbook.
let mut labels = std::collections::BTreeMap::new();
labels.insert(
"group".to_string(),
if index == 0 {
"group-a".to_string()
} else {
"group-b".to_string()
},
);
labels.insert("arch".to_string(), std::env::consts::ARCH.to_string());
labels.insert("role".to_string(), "rehearsal".to_string());
labels.insert("device-id".to_string(), device_id.to_string());
labels
}
fn discover_vm_ip(index: usize) -> Result<String> {
// Convention: a `FLEET_E2E_VM_<i>_IP` env var points at the
// pre-provisioned VM's IP. This keeps the harness usable on a
// workstation where the operator runs `fleet_vm_setup` once per
// device out-of-band, then re-runs the e2e harness against the
// already-booted VMs.
let key = format!("FLEET_E2E_VM_{index}_IP");
std::env::var(&key)
.with_context(|| format!("set {key} to the libvirt VM's IP (default network)"))
}
// ---- iam-admin-pat readiness ----------------------------------------------
/// Wait for the Zitadel chart's setup job to write the `iam-admin-pat`
/// secret. The Helm release reports Ready before the job completes,
/// so calling ZitadelSetupScore immediately after Zitadel deploy
/// races. ZitadelSetupScore itself reads this secret to authenticate
/// to the management API.
async fn wait_for_iam_admin_pat_secret(topology: &K8sAnywhereTopology) -> Result<()> {
use k8s_openapi::api::core::v1::Secret;
let k8s = topology
.k8s_client()
.await
.map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
for attempt in 1..=120 {
if let Some(secret) = k8s
.get_resource::<Secret>("iam-admin-pat", Some("zitadel"))
.await?
&& let Some(data) = secret.data
&& data.contains_key("pat")
{
return Ok(());
}
if attempt % 10 == 0 {
warn!("iam-admin-pat secret not yet present in zitadel ns ({attempt}/120)");
}
tokio::time::sleep(Duration::from_secs(1)).await;
}
anyhow::bail!(
"timed out waiting for iam-admin-pat secret in 'zitadel' namespace — \
is FirstInstance.Org.Machine.Pat configured in ZitadelScore Helm values?"
)
}
// ---- operator deploy -------------------------------------------------------
const OPERATOR_NAMESPACE: &str = FLEET_NAMESPACE;
/// k3d's data directory under `$XDG_DATA_HOME`. Mirrors
/// `example_fleet_auth_callout::data_dir` (the latter is private —
/// duplicated here rather than re-exported so the operator wiring is
/// self-contained).
fn k3d_data_dir() -> PathBuf {
directories::BaseDirs::new()
.map(|dirs| dirs.data_dir().join("harmony").join("k3d"))
.unwrap_or_else(|| PathBuf::from("/tmp/harmony"))
}
/// Build the operator's release binary, package it into an OCI image,
/// and sideload into the k3d cluster. Mirrors
/// `build_and_load_callout_image`. The Dockerfile lives in the
/// operator crate.
async fn build_and_load_operator_image(k3d: &k3d_rs::K3d) -> Result<()> {
use std::process::Stdio;
let workspace_root = std::env::var("CARGO_MANIFEST_DIR")
.map(|d| PathBuf::from(d).join("..").join(".."))
.unwrap_or_else(|_| PathBuf::from("."));
let workspace_root = workspace_root.canonicalize().unwrap_or(workspace_root);
info!("cargo build --release -p harmony-fleet-operator");
let status = tokio::process::Command::new("cargo")
.args(["build", "--release", "-p", "harmony-fleet-operator"])
.current_dir(&workspace_root)
.status()
.await?;
if !status.success() {
anyhow::bail!("cargo build for fleet operator failed");
}
// Stage the binary + Dockerfile into a clean temp dir so podman
// build doesn't drag the whole target/ tree across.
let ctx = tempfile::tempdir()?;
let bin_dst = ctx.path().join("target/release");
std::fs::create_dir_all(&bin_dst)?;
std::fs::copy(
workspace_root.join("target/release/harmony-fleet-operator"),
bin_dst.join("harmony-fleet-operator"),
)
.context("staging operator binary into build context")?;
let dockerfile_src = workspace_root.join("fleet/harmony-fleet-operator/Dockerfile");
if !dockerfile_src.exists() {
anyhow::bail!(
"missing fleet/harmony-fleet-operator/Dockerfile — operator image staging \
expects it next to Cargo.toml; either add it or update the bring-up."
);
}
std::fs::copy(&dockerfile_src, ctx.path().join("Dockerfile"))?;
info!("podman build → {OPERATOR_IMAGE_TAG}");
let status = tokio::process::Command::new("podman")
.args(["build", "-q", "-t", OPERATOR_IMAGE_TAG, "."])
.current_dir(ctx.path())
.stderr(Stdio::inherit())
.status()
.await?;
if !status.success() {
anyhow::bail!("podman build for operator failed");
}
let tar_path =
std::env::temp_dir().join(format!("harmony-operator-image-{}.tar", std::process::id()));
let _ = std::fs::remove_file(&tar_path);
let status = tokio::process::Command::new("podman")
.args(["save", "-o", tar_path.to_str().unwrap(), OPERATOR_IMAGE_TAG])
.status()
.await?;
if !status.success() {
anyhow::bail!("podman save for operator failed");
}
info!("k3d image import {OPERATOR_IMAGE_TAG}");
let cluster_name = k3d
.cluster_name()
.unwrap_or(example_fleet_auth_callout::CLUSTER_NAME)
.to_string();
let tar_path_str = tar_path.to_str().unwrap().to_string();
let cluster_for_blocking = cluster_name.clone();
let data_dir = k3d_data_dir();
tokio::task::spawn_blocking(move || {
k3d_rs::K3d::new(data_dir, Some(cluster_for_blocking.clone())).run_k3d_command([
"image",
"import",
tar_path_str.as_str(),
"-c",
cluster_for_blocking.as_str(),
])
})
.await?
.map_err(|e| anyhow::anyhow!("k3d image import failed: {e}"))?;
let _ = std::fs::remove_file(&tar_path);
Ok(())
}
/// Apply the operator's CRDs + ServiceAccount + ClusterRole +
/// ClusterRoleBinding + Secret + Deployment via Harmony's
/// K8sResourceScore. The Secret carries both the `[credentials]` TOML
/// (consumed by the operator as `FLEET_OPERATOR_CREDENTIALS_TOML`) and
/// the Zitadel JSON keyfile that the TOML's `key_path` references.
async fn deploy_operator(
topology: &K8sAnywhereTopology,
project_id: &str,
operator_machine_key: &str,
) -> Result<()> {
use harmony::modules::fleet::operator::crd::{Deployment as FleetDeployment, Device};
use harmony::modules::k8s::resource::K8sResourceScore;
use harmony_fleet_deploy::operator::chart::{
ChartOptions, OperatorCredentials, RELEASE_NAME, build_cluster_role,
build_cluster_role_binding, build_operator_deployment, build_service_account,
operator_secret,
};
use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition;
use kube::CustomResourceExt;
// Render the [credentials] TOML the operator pod consumes via the
// FLEET_OPERATOR_CREDENTIALS_TOML env var (sourced from a Secret
// key). The Zitadel JSON keyfile is embedded inline under
// `key_json`; the operator never sees a file. Triple-quoted TOML
// string keeps the JSON's `"`s untouched.
let credentials_toml = format!(
r#"type = "zitadel-jwt"
oidc_issuer_url = "http://{host}:{port}"
audience = "{project_id}"
danger_accept_invalid_certs = true
key_json = """
{key_json}
"""
"#,
host = ZITADEL_HOST,
port = HTTP_PORT,
key_json = operator_machine_key,
);
let opts = ChartOptions {
output_dir: PathBuf::new(), // unused on this code path
image: OPERATOR_IMAGE_TAG.to_string(),
image_pull_policy: "IfNotPresent".to_string(),
namespace: OPERATOR_NAMESPACE.to_string(),
nats_url: format!("nats://{NATS_RELEASE}.{NATS_NAMESPACE}.svc.cluster.local:4222"),
log_level: "info,kube_runtime=warn".to_string(),
credentials: Some(OperatorCredentials { credentials_toml }),
};
// CRDs first — the operator watches them on startup.
let crds: Vec<CustomResourceDefinition> = vec![FleetDeployment::crd(), Device::crd()];
K8sResourceScore::<CustomResourceDefinition> {
resource: crds,
namespace: None,
}
.interpret(&Inventory::autoload(), topology)
.await
.context("operator CRD apply")?;
// RBAC.
K8sResourceScore::single(
build_service_account(&opts),
Some(OPERATOR_NAMESPACE.to_string()),
)
.interpret(&Inventory::autoload(), topology)
.await
.context("operator ServiceAccount apply")?;
K8sResourceScore::single(build_cluster_role(), None)
.interpret(&Inventory::autoload(), topology)
.await
.context("operator ClusterRole apply")?;
K8sResourceScore::single(build_cluster_role_binding(&opts), None)
.interpret(&Inventory::autoload(), topology)
.await
.context("operator ClusterRoleBinding apply")?;
// Secret holding both the credentials TOML and the keyfile.
let secret = operator_secret(&opts).expect("credentials present in opts");
K8sResourceScore::single(secret, Some(OPERATOR_NAMESPACE.to_string()))
.interpret(&Inventory::autoload(), topology)
.await
.context("operator Secret apply")?;
// Deployment last so it pulls the up-to-date Secret.
K8sResourceScore::single(
build_operator_deployment(&opts),
Some(OPERATOR_NAMESPACE.to_string()),
)
.interpret(&Inventory::autoload(), topology)
.await
.context("operator Deployment apply")?;
info!("operator deployment {OPERATOR_NAMESPACE}/{RELEASE_NAME} applied");
Ok(())
}
async fn wait_for_operator_ready(topology: &K8sAnywhereTopology) -> Result<()> {
use harmony_fleet_deploy::operator::chart::RELEASE_NAME;
use k8s_openapi::api::apps::v1::Deployment as K8sDeployment;
let k8s = topology
.k8s_client()
.await
.map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
for attempt in 1..=120 {
if let Some(d) = k8s
.get_resource::<K8sDeployment>(RELEASE_NAME, Some(OPERATOR_NAMESPACE))
.await?
&& let Some(status) = d.status
&& status.ready_replicas.unwrap_or(0) >= 1
{
return Ok(());
}
if attempt % 10 == 0 {
warn!("operator Deployment not yet Ready ({attempt}/120)");
}
tokio::time::sleep(Duration::from_secs(1)).await;
}
anyhow::bail!("timed out waiting for operator Deployment to become Ready")
}
// ---- helpers ---------------------------------------------------------------
fn workspace_target_path(rel: &str) -> PathBuf {
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from("."));
manifest_dir.join("..").join("..").join("target").join(rel)
}
// ---- next-steps panel ------------------------------------------------------
impl E2eHandles {
pub fn print_next_steps(&self) {
println!();
println!("============================================================");
println!(" E2E DEMO REHEARSAL — STACK READY");
println!("============================================================");
println!(" k3d cluster: {}", self.cluster_name);
println!(" Zitadel: {}", self.zitadel_url);
println!(" NATS (host): {}", self.nats_url_external);
println!(" Project ID: {}", self.project_id);
println!(" Issuer pubkey: {}", self.issuer_pubkey);
println!();
println!(" Devices ({}):", self.devices.len());
for d in &self.devices {
let labels: Vec<String> = d.labels.iter().map(|(k, v)| format!("{k}={v}")).collect();
println!(
" [{}] {} @ {} ({})",
d.index,
d.device_id,
d.vm_ip,
labels.join(",")
);
}
println!();
println!(" Run the test suite:");
println!();
println!(" cargo test -p example-fleet-e2e-demo \\");
println!(" --test e2e_walking_skeleton -- --test-threads=1 --nocapture");
println!();
println!(" Ctrl-C exits without tearing the cluster down — re-run");
println!(" the bring-up to converge any drift.");
println!("============================================================");
}
}
#[cfg(test)]
mod unit_tests {
use super::*;
#[test]
fn device_username_matches_callout_convention() {
// Callout's device_id_claim is `client_id`, which Zitadel
// populates from the machine user's username. The test we
// run later asserts the agent's per-device subjects match
// its device_id, which therefore must equal the username
// minus the "device-" prefix the callout knows about.
assert_eq!(device_username("vm-device-00"), "device-vm-device-00");
}
#[test]
fn device_labels_split_into_distinct_groups() {
let l0 = build_device_labels("vm-device-00", 0);
let l1 = build_device_labels("vm-device-01", 1);
assert_eq!(l0.get("group").unwrap(), "group-a");
assert_eq!(l1.get("group").unwrap(), "group-b");
assert_ne!(l0.get("group"), l1.get("group"));
// Ubiquitous labels: device-id + arch + role on both.
for l in [&l0, &l1] {
assert!(l.contains_key("device-id"));
assert!(l.contains_key("arch"));
assert_eq!(l.get("role").unwrap(), "rehearsal");
}
}
}

View File

@@ -0,0 +1,51 @@
//! `cargo run -p example-fleet-e2e-demo -- --num-devices 2 ...`
//!
//! Brings up the full E2E rehearsal stack: k3d + Zitadel + NATS auth
//! callout + per-device Zitadel machine users + (out-of-band)
//! libvirt VMs + agents authenticating via JWT-bearer.
//!
//! See `src/lib.rs` and `ROADMAP/fleet_platform/v0_demo_e2e.md`.
use anyhow::{Context, Result};
use clap::Parser;
use example_fleet_e2e_demo::{DEFAULT_LIBVIRT_HOST_IP, E2eDemoOpts, bring_up_full_stack};
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(
name = "fleet-e2e-demo",
about = "VM-based end-to-end rehearsal of the fleet platform demo flow"
)]
struct Cli {
/// Number of VM-as-device agents to bring up. Each one needs its
/// own libvirt domain (provisioned out-of-band today via
/// `fleet_vm_setup` — see `FLEET_E2E_VM_<i>_IP` env vars below).
#[arg(long, default_value_t = 2)]
num_devices: usize,
/// Path to the cross-compiled `fleet-agent` binary uploaded to
/// each VM. Same binary that smoke-a4 produces.
#[arg(long, default_value = "target/release/harmony-fleet-agent")]
agent_binary: PathBuf,
/// Override for the libvirt host IP (the address VMs see as the
/// gateway). Defaults to the libvirt default network's gateway.
#[arg(long, default_value = DEFAULT_LIBVIRT_HOST_IP)]
libvirt_host_ip: String,
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let handles = bring_up_full_stack(E2eDemoOpts {
num_devices: cli.num_devices,
agent_binary: cli.agent_binary,
libvirt_host_ip: cli.libvirt_host_ip,
})
.await
.context("bring_up_full_stack")?;
handles.print_next_steps();
println!();
println!(" Press Ctrl-C to exit (cluster keeps running).");
tokio::signal::ctrl_c().await?;
Ok(())
}

View File

@@ -0,0 +1,161 @@
//! End-to-end walking-skeleton tests for the VM-based demo rehearsal.
//!
//! Shares one bring-up across the whole suite via `OnceCell`. Run
//! sequentially — they touch shared k3d + libvirt VM state.
//!
//! Pre-flight (manual, before `cargo test`):
//!
//! - libvirt + qemu installed; default network active.
//! - Two cloud-init Ubuntu VMs provisioned (e.g. via
//! `cargo run -p example_fleet_vm_setup`). Their IPs exported as
//! `FLEET_E2E_VM_0_IP` and `FLEET_E2E_VM_1_IP`.
//! - SSH keypair the VMs trust at `~/.ssh/id_ed25519` (or
//! override path; harness reads the standard pair).
//!
//! Run:
//!
//! ```bash
//! FLEET_E2E_VM_0_IP=192.168.122.42 \
//! FLEET_E2E_VM_1_IP=192.168.122.43 \
//! cargo test -p example-fleet-e2e-demo --test e2e_walking_skeleton \
//! -- --test-threads=1 --nocapture
//! ```
use std::sync::Arc;
use std::time::Duration;
use anyhow::{Context, Result};
use async_nats::ConnectOptions;
use example_fleet_auth_callout::{mint_access_token, scopes_for_project};
use example_fleet_e2e_demo::{E2eDemoOpts, E2eHandles, bring_up_full_stack};
use futures_util::StreamExt;
use tokio::sync::OnceCell;
static STACK: OnceCell<Arc<E2eHandles>> = OnceCell::const_new();
async fn shared_stack() -> Result<Arc<E2eHandles>> {
let cell = STACK
.get_or_try_init(|| async {
let h = bring_up_full_stack(E2eDemoOpts::default()).await?;
anyhow::Ok(Arc::new(h))
})
.await?;
Ok(cell.clone())
}
async fn admin_nats_client(stack: &E2eHandles) -> Result<async_nats::Client> {
let token = mint_access_token(
&stack.zitadel_url,
&stack.admin_machine_key,
&scopes_for_project(&stack.project_id),
)
.await
.context("mint admin Zitadel token")?;
ConnectOptions::with_token(token)
.connection_timeout(Duration::from_secs(5))
.connect(&stack.nats_url_external)
.await
.map_err(|e| anyhow::anyhow!("admin connect: {e}"))
}
// -- Test 1 -------------------------------------------------------------
/// Each provisioned VM publishes a DeviceInfo within the heartbeat
/// window. Reads from the `device-info` KV bucket via the admin
/// client (admin role can subscribe to anything).
#[tokio::test]
#[ignore = "requires libvirt VMs + k3d + Zitadel + NATS bring-up — see header"]
async fn both_devices_heartbeat_within_60s() -> Result<()> {
let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
let stack = shared_stack().await?;
let admin = admin_nats_client(&stack).await?;
let js = async_nats::jetstream::new(admin);
let bucket = js
.get_key_value(harmony_reconciler_contracts::BUCKET_DEVICE_INFO)
.await
.context("device-info bucket")?;
let deadline = std::time::Instant::now() + Duration::from_secs(60);
let expected: std::collections::HashSet<String> =
stack.devices.iter().map(|d| d.device_id.clone()).collect();
let mut seen = std::collections::HashSet::new();
while std::time::Instant::now() < deadline && seen != expected {
for d in &stack.devices {
let key = harmony_reconciler_contracts::device_info_key(&d.device_id);
if let Some(_e) = bucket.entry(&key).await? {
seen.insert(d.device_id.clone());
}
}
tokio::time::sleep(Duration::from_millis(500)).await;
}
assert_eq!(
seen, expected,
"each provisioned device must publish DeviceInfo within 60s; saw {seen:?}"
);
Ok(())
}
// -- Test 5 (admin cross-device read) -----------------------------------
/// The admin's Zitadel JWT carries `fleet-admin` role. Callout maps
/// that to `pub/sub allow: [">"]`, so subscribing to `device-state.>`
/// is admitted and observes every device's traffic.
#[tokio::test]
#[ignore = "requires libvirt VMs + k3d + Zitadel + NATS bring-up — see header"]
async fn admin_jwt_reads_any_device_subject() -> Result<()> {
let _ = tracing_subscriber::fmt().with_env_filter("info").try_init();
let stack = shared_stack().await?;
let admin = admin_nats_client(&stack).await?;
let mut sub = admin.subscribe("device-state.>").await?;
admin.flush().await?;
// Hold the subscription open long enough that any device's
// periodic state publication should land. We don't pump traffic
// ourselves — the agents themselves publish per-deployment state
// on every reconcile tick. If no traffic arrives in 30s it means
// either the agents aren't connected or they're not publishing,
// both of which are fatal for the demo.
let result = tokio::time::timeout(Duration::from_secs(30), sub.next()).await;
assert!(
result.is_ok() && result.as_ref().unwrap().is_some(),
"admin must observe at least one device-state.* message in 30s"
);
Ok(())
}
// -- Test 6 (per-device isolation) ---------------------------------------
/// A per-device JWT has subject permissions scoped to its own
/// `device-state.{device_id}` and `device-commands.{device_id}`. The
/// callout enforces this; subscribing to a sibling device's commands
/// must fail at NATS connect-time or at SUB-time.
///
/// Skipped here because the per-device JWT minting helper (analogous
/// to `mint_access_token` but for a `device` role user) needs the
/// per-device machine key to be plumbed back from `bring_up_full_stack`
/// through `E2eHandles`. Follow-up commit adds
/// `E2eHandles::device_machine_key(idx)` so this test can be
/// implemented without re-running `ZitadelSetupScore` from the test
/// body.
#[tokio::test]
#[ignore = "requires E2eHandles::device_machine_key plumbing"]
async fn cross_device_isolation_enforced_in_vm() {}
// -- Test 7 (load-bearing reconnect) -------------------------------------
/// Kill the NATS pod, wait for the new one to come up, verify both
/// agents reconnect with fresh JWTs and resume publishing within
/// 30 seconds. This is the test that validates the "never lose
/// connectivity to a device" guarantee under realistic disturbance.
///
/// Skipped pending operator install in the harness — without the
/// operator the agents have no `desired-state` to publish status
/// against, so verifying "publishing resumed" needs a separate
/// signal. Follow-up commit observes the agents' periodic
/// heartbeat publication directly via the device-heartbeat KV.
#[tokio::test]
#[ignore = "requires NATS-pod-restart driver and heartbeat-presence assertion"]
async fn agent_recovers_from_nats_pod_restart() {}

View File

@@ -0,0 +1,24 @@
[package]
name = "example_fleet_load_test"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "fleet_load_test"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony", default-features = false }
harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
async-nats = { workspace = true }
chrono = { workspace = true }
kube = { workspace = true, features = ["runtime", "derive"] }
k8s-openapi.workspace = true
serde_json = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true }
rand = { workspace = true }

View File

@@ -0,0 +1,551 @@
//! Load test for the IoT operator's `fleet_aggregator`.
//!
//! Simulates N devices across M Deployment CRs, each device pushing
//! a `DeploymentState` update to NATS every `--tick-ms`. Measures
//! throughput on both sides (devices → NATS and operator → kube
//! apiserver) and, at the end of the run, verifies each CR's
//! `.status.aggregate` counters sum to its expected group size (and
//! that `matched_device_count` equals that size — i.e. every
//! registered device got picked up by the CR's label selector).
//!
//! Assumes an already-running stack:
//! - NATS reachable at `--nats-url`
//! - k8s cluster with the operator's CRD installed (KUBECONFIG)
//! - the operator process running against the same NATS + cluster
//!
//! The `fleet/scripts/smoke-a4.sh` script brings all three up — pass
//! `--hold` to leave them running, then run this binary.
//!
//! Typical invocation:
//!
//! cargo run -q -p example_fleet_load_test -- \
//! --namespace fleet-load \
//! --groups 55,5,5,5,5,5,5,5,5,5 \
//! --tick-ms 1000 \
//! --duration-s 60
use anyhow::{Context, Result};
use async_nats::jetstream::{self, kv};
use chrono::Utc;
use clap::Parser;
use harmony::modules::fleet::operator::{Deployment, DeploymentSpec, Rollout, RolloutStrategy};
use harmony::modules::podman::{PodmanService, PodmanV0Score, ReconcileScore};
use harmony_reconciler_contracts::{
BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE, DeploymentName,
DeploymentState, DeviceInfo, HeartbeatPayload, Id, Phase, device_heartbeat_key,
device_info_key, device_state_key,
};
use k8s_openapi::api::core::v1::Namespace;
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
use kube::Client;
use kube::api::{Api, DeleteParams, Patch, PatchParams, PostParams};
use rand::Rng;
use std::collections::BTreeMap;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{Duration, Instant};
use tokio::task::JoinSet;
#[derive(Parser, Debug, Clone)]
#[command(
name = "fleet_load_test",
about = "Synthetic load for the IoT operator's fleet_aggregator"
)]
struct Cli {
/// NATS URL (same one the operator connects to).
#[arg(long, default_value = "nats://localhost:4222")]
nats_url: String,
/// k8s namespace for the load-test Deployment CRs. Created if
/// missing.
#[arg(long, default_value = "fleet-load")]
namespace: String,
/// Group shape — comma-separated device counts, one per CR.
/// Default: 100 devices over 10 groups (1 × 55 + 9 × 5).
#[arg(long, default_value = "55,5,5,5,5,5,5,5,5,5")]
groups: String,
/// Per-device tick in ms. Each tick publishes one DeploymentState.
#[arg(long, default_value_t = 1000)]
tick_ms: u64,
/// Heartbeat cadence in seconds (separate from the state tick).
#[arg(long, default_value_t = 30)]
heartbeat_s: u64,
/// Total run duration in seconds before tearing down.
#[arg(long, default_value_t = 60)]
duration_s: u64,
/// Report throughput every N seconds.
#[arg(long, default_value_t = 5)]
report_s: u64,
/// Keep the CRs + KV entries in place after the run instead of
/// deleting them. Useful with HOLD=1 to inspect the steady-state
/// aggregate after the load finishes.
#[arg(long)]
keep: bool,
}
/// Metrics collected across all device tasks.
#[derive(Default)]
struct Counters {
state_writes: AtomicU64,
heartbeat_writes: AtomicU64,
errors: AtomicU64,
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let cli = Cli::parse();
let group_sizes = parse_groups(&cli.groups)?;
let total: usize = group_sizes.iter().sum();
tracing::info!(
devices = total,
groups = group_sizes.len(),
shape = ?group_sizes,
tick_ms = cli.tick_ms,
duration_s = cli.duration_s,
"fleet_load_test starting"
);
// --- NATS setup ----------------------------------------------------------
let nc = async_nats::connect(&cli.nats_url)
.await
.with_context(|| format!("connecting to NATS at {}", cli.nats_url))?;
let js = jetstream::new(nc);
let info_bucket = open_bucket(&js, BUCKET_DEVICE_INFO).await?;
let state_bucket = open_bucket(&js, BUCKET_DEVICE_STATE).await?;
let heartbeat_bucket = open_bucket(&js, BUCKET_DEVICE_HEARTBEAT).await?;
// --- kube setup ----------------------------------------------------------
let client = Client::try_default().await.context("kube client")?;
ensure_namespace(&client, &cli.namespace).await?;
let deployments: Api<Deployment> = Api::namespaced(client.clone(), &cli.namespace);
// --- plan groups + device ids --------------------------------------------
let plan = build_plan(&group_sizes);
apply_crs(&deployments, &plan).await?;
publish_device_infos(&info_bucket, &plan).await?;
// --- spawn simulators ----------------------------------------------------
let counters = Arc::new(Counters::default());
let mut sims = JoinSet::new();
let tick = Duration::from_millis(cli.tick_ms);
let hb_tick = Duration::from_secs(cli.heartbeat_s);
for device in &plan.devices {
let device = Arc::new(device.clone());
sims.spawn(simulate_state_loop(
device.clone(),
state_bucket.clone(),
counters.clone(),
tick,
));
sims.spawn(simulate_heartbeat_loop(
device.clone(),
heartbeat_bucket.clone(),
counters.clone(),
hb_tick,
));
}
// --- metrics reporter ----------------------------------------------------
let report_tick = Duration::from_secs(cli.report_s);
let reporter_counters = counters.clone();
let reporter = tokio::spawn(async move {
let mut ticker = tokio::time::interval(report_tick);
ticker.tick().await; // skip immediate fire
let mut prev_state = 0u64;
let mut prev_hb = 0u64;
loop {
ticker.tick().await;
let s = reporter_counters.state_writes.load(Ordering::Relaxed);
let h = reporter_counters.heartbeat_writes.load(Ordering::Relaxed);
let e = reporter_counters.errors.load(Ordering::Relaxed);
let dt = report_tick.as_secs_f64();
let ss = (s - prev_state) as f64 / dt;
let hh = (h - prev_hb) as f64 / dt;
tracing::info!(
state_writes_total = s,
state_writes_per_s = format!("{ss:.1}"),
heartbeats_total = h,
heartbeats_per_s = format!("{hh:.1}"),
errors = e,
"load"
);
prev_state = s;
prev_hb = h;
}
});
// --- run for duration ----------------------------------------------------
let started = Instant::now();
tokio::time::sleep(Duration::from_secs(cli.duration_s)).await;
reporter.abort();
sims.shutdown().await;
let elapsed = started.elapsed();
let s = counters.state_writes.load(Ordering::Relaxed);
let h = counters.heartbeat_writes.load(Ordering::Relaxed);
let e = counters.errors.load(Ordering::Relaxed);
tracing::info!(
elapsed_s = format!("{:.1}", elapsed.as_secs_f64()),
state_writes_total = s,
state_writes_per_s = format!("{:.1}", s as f64 / elapsed.as_secs_f64()),
heartbeats_total = h,
errors = e,
"run complete"
);
// --- give the aggregator a second to drain --------------------------------
tokio::time::sleep(Duration::from_secs(2)).await;
// --- verify CR status aggregates -----------------------------------------
//
// With selector-based matching there's a second axis we want to check:
// `matched_device_count` must equal the expected group size (selector
// actually resolved every registered Device), AND the phase counters
// must sum to it.
let mut all_ok = true;
for group in &plan.groups {
let cr = deployments.get(&group.cr_name).await?;
let Some(status) = cr.status.as_ref().and_then(|s| s.aggregate.as_ref()) else {
tracing::warn!(cr = %group.cr_name, "aggregate missing on CR status");
all_ok = false;
continue;
};
let total_reported = status.succeeded + status.failed + status.pending;
let expected = group.devices.len() as u32;
let ok = status.matched_device_count == expected && total_reported == expected;
if !ok {
all_ok = false;
}
tracing::info!(
cr = %group.cr_name,
expected_devices = expected,
matched = status.matched_device_count,
succeeded = status.succeeded,
failed = status.failed,
pending = status.pending,
total = total_reported,
ok,
"cr status"
);
}
if !cli.keep {
tracing::info!("cleanup: deleting CRs + KV entries");
for group in &plan.groups {
let _ = deployments
.delete(&group.cr_name, &DeleteParams::default())
.await;
}
for device in &plan.devices {
let _ = state_bucket
.delete(&device_state_key(
&device.device_id,
&DeploymentName::try_new(&device.cr_name).unwrap(),
))
.await;
let _ = info_bucket
.delete(&device_info_key(&device.device_id))
.await;
let _ = heartbeat_bucket
.delete(&device_heartbeat_key(&device.device_id))
.await;
}
}
if all_ok {
tracing::info!("PASS — all CR aggregates match device counts");
Ok(())
} else {
anyhow::bail!("FAIL — at least one CR aggregate did not sum to its target device count")
}
}
fn parse_groups(s: &str) -> Result<Vec<usize>> {
let out: Vec<usize> = s
.split(',')
.map(|t| t.trim().parse::<usize>())
.collect::<Result<_, _>>()
.context("parsing --groups")?;
if out.is_empty() {
anyhow::bail!("--groups must have at least one size");
}
Ok(out)
}
/// A single simulated device and the CR it belongs to.
#[derive(Clone)]
struct DevicePlan {
device_id: String,
cr_name: String,
}
#[derive(Clone)]
struct GroupPlan {
cr_name: String,
devices: Vec<String>,
}
struct Plan {
devices: Vec<DevicePlan>,
groups: Vec<GroupPlan>,
}
fn build_plan(group_sizes: &[usize]) -> Plan {
// CR-name + device-id width scale with group count so large runs
// get zero-padded ids that sort sensibly in kubectl.
let cr_width = group_sizes.len().to_string().len().max(2);
let total: usize = group_sizes.iter().sum();
let dev_width = total.to_string().len().max(5);
let mut devices = Vec::new();
let mut groups = Vec::new();
let mut next_id = 1usize;
for (i, size) in group_sizes.iter().enumerate() {
let cr_name = format!("load-group-{i:0cr_width$}");
let mut ids = Vec::with_capacity(*size);
for _ in 0..*size {
let id = format!("load-dev-{next_id:0dev_width$}");
next_id += 1;
devices.push(DevicePlan {
device_id: id.clone(),
cr_name: cr_name.clone(),
});
ids.push(id);
}
groups.push(GroupPlan {
cr_name,
devices: ids,
});
}
Plan { devices, groups }
}
async fn open_bucket(js: &jetstream::Context, bucket: &'static str) -> Result<kv::Store> {
Ok(js
.create_key_value(kv::Config {
bucket: bucket.to_string(),
history: 1,
..Default::default()
})
.await?)
}
async fn ensure_namespace(client: &Client, name: &str) -> Result<()> {
let api: Api<Namespace> = Api::all(client.clone());
if api.get_opt(name).await?.is_some() {
return Ok(());
}
let ns = Namespace {
metadata: kube::api::ObjectMeta {
name: Some(name.to_string()),
..Default::default()
},
..Default::default()
};
match api.create(&PostParams::default(), &ns).await {
Ok(_) => Ok(()),
Err(kube::Error::Api(ae)) if ae.code == 409 => Ok(()),
Err(e) => Err(e.into()),
}
}
async fn apply_crs(api: &Api<Deployment>, plan: &Plan) -> Result<()> {
let params = PatchParams::apply("fleet-load-test").force();
let started = Instant::now();
// Cap concurrency so we don't overwhelm the apiserver on large
// fleets. 32 in-flight applies is well under typical apiserver
// QPS limits and keeps the startup latency predictable.
const CONCURRENCY: usize = 32;
let mut in_flight: JoinSet<Result<String>> = JoinSet::new();
let mut iter = plan.groups.iter();
for _ in 0..CONCURRENCY {
if let Some(group) = iter.next() {
in_flight.spawn(apply_one_cr(api.clone(), group.clone(), params.clone()));
}
}
while let Some(res) = in_flight.join_next().await {
res??;
if let Some(group) = iter.next() {
in_flight.spawn(apply_one_cr(api.clone(), group.clone(), params.clone()));
}
}
tracing::info!(
crs = plan.groups.len(),
elapsed_ms = started.elapsed().as_millis() as u64,
"applied Deployment CRs"
);
Ok(())
}
async fn apply_one_cr(
api: Api<Deployment>,
group: GroupPlan,
params: PatchParams,
) -> Result<String> {
// Selector-based targeting: every Device CR in this group carries
// a `group=<cr_name>` label (we publish that on DeviceInfo; the
// operator reflects it into Device.metadata.labels).
let mut match_labels = BTreeMap::new();
match_labels.insert("group".to_string(), group.cr_name.clone());
let cr = Deployment::new(
&group.cr_name,
DeploymentSpec {
target_selector: LabelSelector {
match_labels: Some(match_labels),
match_expressions: None,
},
// Score content doesn't matter — no real agents consume
// the desired-state here. The aggregator still writes KV
// for each matched device; that's wire noise we accept
// as part of the realism.
score: ReconcileScore::PodmanV0(PodmanV0Score {
services: vec![PodmanService {
name: group.cr_name.clone(),
image: "docker.io/library/nginx:alpine".to_string(),
ports: vec!["8080:80".to_string()],
env: vec![],
volumes: vec![],
restart_policy: Default::default(),
}],
}),
rollout: Rollout {
strategy: RolloutStrategy::Immediate,
},
},
);
api.patch(&group.cr_name, &params, &Patch::Apply(&cr))
.await
.with_context(|| format!("applying CR {}", group.cr_name))?;
Ok(group.cr_name)
}
async fn publish_device_infos(bucket: &kv::Store, plan: &Plan) -> Result<()> {
let started = Instant::now();
const CONCURRENCY: usize = 64;
let mut in_flight: JoinSet<Result<()>> = JoinSet::new();
let mut iter = plan.devices.iter();
for _ in 0..CONCURRENCY {
if let Some(device) = iter.next() {
in_flight.spawn(publish_one_info(bucket.clone(), device.clone()));
}
}
while let Some(res) = in_flight.join_next().await {
res??;
if let Some(device) = iter.next() {
in_flight.spawn(publish_one_info(bucket.clone(), device.clone()));
}
}
tracing::info!(
devices = plan.devices.len(),
elapsed_ms = started.elapsed().as_millis() as u64,
"seeded DeviceInfo"
);
Ok(())
}
async fn publish_one_info(bucket: kv::Store, device: DevicePlan) -> Result<()> {
let info = DeviceInfo {
device_id: Id::from(device.device_id.clone()),
labels: BTreeMap::from([("group".to_string(), device.cr_name.clone())]),
inventory: None,
updated_at: Utc::now(),
};
let key = device_info_key(&device.device_id);
let payload = serde_json::to_vec(&info)?;
bucket.put(&key, payload.into()).await?;
Ok(())
}
async fn simulate_state_loop(
device: Arc<DevicePlan>,
bucket: kv::Store,
counters: Arc<Counters>,
tick: Duration,
) {
let Ok(deployment) = DeploymentName::try_new(&device.cr_name) else {
return;
};
let state_key = device_state_key(&device.device_id, &deployment);
let mut ticker = tokio::time::interval(tick);
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
ticker.tick().await;
let phase = pick_phase();
let ds = DeploymentState {
device_id: Id::from(device.device_id.clone()),
deployment: deployment.clone(),
phase,
last_event_at: Utc::now(),
last_error: matches!(phase, Phase::Failed)
.then(|| format!("synthetic failure @{}", device.device_id)),
};
match serde_json::to_vec(&ds) {
Ok(payload) => match bucket.put(&state_key, payload.into()).await {
Ok(_) => {
counters.state_writes.fetch_add(1, Ordering::Relaxed);
}
Err(_) => {
counters.errors.fetch_add(1, Ordering::Relaxed);
}
},
Err(_) => {
counters.errors.fetch_add(1, Ordering::Relaxed);
}
}
}
}
async fn simulate_heartbeat_loop(
device: Arc<DevicePlan>,
bucket: kv::Store,
counters: Arc<Counters>,
tick: Duration,
) {
let hb_key = device_heartbeat_key(&device.device_id);
let mut ticker = tokio::time::interval(tick);
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
ticker.tick().await;
let hb = HeartbeatPayload {
device_id: Id::from(device.device_id.clone()),
at: Utc::now(),
};
if let Ok(payload) = serde_json::to_vec(&hb) {
if bucket.put(&hb_key, payload.into()).await.is_ok() {
counters.heartbeat_writes.fetch_add(1, Ordering::Relaxed);
} else {
counters.errors.fetch_add(1, Ordering::Relaxed);
}
}
}
}
/// Phase distribution mirroring a healthy-ish fleet: mostly Running,
/// a sprinkle of Failed + Pending to exercise the aggregator's
/// transition-handling + last_error logic.
fn pick_phase() -> Phase {
let n: u32 = rand::rng().random_range(0..100);
match n {
0..80 => Phase::Running,
80..90 => Phase::Failed,
_ => Phase::Pending,
}
}

View File

@@ -0,0 +1,15 @@
[package]
name = "example_fleet_nats_install"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "fleet_nats_install"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony", default-features = false }
tokio.workspace = true
anyhow.workspace = true
clap.workspace = true

View File

@@ -0,0 +1,91 @@
//! Install a single-node NATS server into the cluster `KUBECONFIG`
//! points at, using harmony's `NatsBasicScore` + `K8sBareTopology`.
//!
//! This binary is the glue between the smoke harness (`smoke-a4.sh`)
//! and the framework Score. Typical usage from a demo script:
//!
//! KUBECONFIG=$KUBECFG cargo run -q -p example_fleet_nats_install \
//! -- --namespace fleet-system --name fleet-nats --node-port 4222
//!
//! Behaviour:
//! - Ensures the target namespace exists
//! - Deploys a single-replica NATS server (JetStream on)
//! - Exposes it as a Service (NodePort by default so off-cluster
//! clients like a libvirt VM agent can reach it through the
//! k3d loadbalancer port mapping)
//!
//! For production / HA / TLS, graduate to `NatsK8sScore`.
use anyhow::{Context, Result};
use clap::Parser;
use harmony::inventory::Inventory;
use harmony::modules::k8s::K8sBareTopology;
use harmony::modules::nats::NatsBasicScore;
use harmony::score::Score;
#[derive(Parser, Debug)]
#[command(
name = "fleet_nats_install",
about = "Install single-node NATS (JetStream) via NatsBasicScore"
)]
struct Cli {
/// Target namespace. Created if missing.
#[arg(long, default_value = "fleet-system")]
namespace: String,
/// Resource name for the NATS Deployment + Service.
#[arg(long, default_value = "fleet-nats")]
name: String,
/// Service exposure mode. `load-balancer` pairs with k3d's
/// `-p PORT:PORT@loadbalancer` port mapping (direct service-
/// port routing). `node-port` demands a port in the apiserver's
/// nodeport range (default 30000-32767). `cluster-ip` keeps
/// NATS in-cluster only.
#[arg(long, value_enum, default_value_t = ExposeMode::LoadBalancer)]
expose: ExposeMode,
/// NodePort when `--expose=node-port`. Must be in the cluster's
/// nodeport range (default 30000-32767). Ignored otherwise.
#[arg(long, default_value_t = 30422)]
node_port: i32,
/// Override the NATS container image.
#[arg(long)]
image: Option<String>,
}
#[derive(Clone, Debug, clap::ValueEnum)]
enum ExposeMode {
ClusterIp,
NodePort,
LoadBalancer,
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let topology = K8sBareTopology::from_kubeconfig("fleet-nats-install")
.await
.map_err(|e| anyhow::anyhow!(e))
.context("building K8sBareTopology from KUBECONFIG")?;
let mut score = NatsBasicScore::new(&cli.name, &cli.namespace);
match cli.expose {
ExposeMode::ClusterIp => {}
ExposeMode::NodePort => score = score.node_port(cli.node_port),
ExposeMode::LoadBalancer => score = score.load_balancer(),
}
if let Some(image) = cli.image {
score = score.image(image);
}
let interpret = Score::<K8sBareTopology>::create_interpret(&score);
let outcome = interpret
.execute(&Inventory::empty(), &topology)
.await
.map_err(|e| anyhow::anyhow!("execute NatsBasicScore: {e}"))?;
println!(
"NATS installed: namespace={}, name={}, expose={:?} outcome={outcome:?}",
cli.namespace, cli.name, cli.expose
);
Ok(())
}

View File

@@ -0,0 +1,23 @@
[package]
name = "example_fleet_rpi_setup"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "fleet_rpi_setup"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_secret = { path = "../../harmony_secret" }
harmony_types = { path = "../../harmony_types" }
tokio.workspace = true
log.workspace = true
anyhow.workspace = true
clap.workspace = true
reqwest = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
base64 = "0.22"

View File

@@ -0,0 +1,4 @@
export HARMONY_SECRET_NAMESPACE=fleet-rpi-setup
export HARMONY_SECRET_STORE=file
export HARMONY_DATABASE_URL=sqlite://harmony_fleet_rpi_setup.sqlite
export RUST_LOG=info

View File

@@ -0,0 +1,272 @@
//! Onboard a real, already-booted Raspberry Pi into the IoT fleet.
//!
//! This is the physical-device sibling of `fleet_vm_setup`: the VM
//! provisioning step is gone (you booted Pi OS yourself with rpi-imager
//! and preloaded an SSH key), and we go straight to applying
//! `FleetDeviceSetupScore` over SSH. That score installs podman +
//! systemd-container, creates the `fleet-agent` user, drops the agent
//! binary + config + systemd unit, and starts the service.
//!
//! Source `env.sh` first (sets `HARMONY_SECRET_NAMESPACE`,
//! `HARMONY_SECRET_STORE`, `HARMONY_DATABASE_URL`, `RUST_LOG`), then:
//!
//! ```bash
//! source examples/fleet_rpi_setup/env.sh
//! cargo run -p example_fleet_rpi_setup -- --pi-host <ip> ...
//! ```
//!
//! Output rendering (per-step traces and the final recap) is handled
//! by `harmony_cli::run` — same as every other harmony example. The
//! score's `Outcome.details` is structured for that path.
//!
//! Prereqs on the Pi (one-time, via rpi-imager or manual):
//! - SSH server enabled
//! - An admin user with sudo. Passwordless sudo is detected and
//! used silently; otherwise the example prompts for a sudo
//! password via `SecretManager` and caches it for next runs.
//! - Your driver-machine SSH public key in that user's
//! `~/.ssh/authorized_keys`
//!
//! Prereqs on the driver machine (where this binary runs):
//! - Python 3 + `python3-venv` (Ansible is auto-bootstrapped into a venv)
//! - A cross-compiled `fleet-agent` binary for aarch64
mod zitadel_bootstrap;
use anyhow::{Context, Result};
use clap::Parser;
use harmony::config::secret::SudoPassword;
use harmony::inventory::Inventory;
use harmony::modules::fleet::{FleetDeviceAuth, FleetDeviceSetupConfig, FleetDeviceSetupScore};
use harmony::modules::linux::{LinuxHostTopology, SshCredentials, ensure_ansible_venv, ssh_exec};
use harmony_secret::SecretManager;
use harmony_types::id::Id;
use log::info;
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(
name = "fleet_rpi_setup",
about = "Onboard a physical Raspberry Pi into the IoT fleet"
)]
struct Cli {
/// IP address of the Pi (e.g. 192.168.1.42).
#[arg(long)]
pi_host: String,
/// SSH user on the Pi with passwordless sudo.
#[arg(long, default_value = "pi")]
pi_user: String,
/// Path to the SSH private key whose public half is in the Pi
/// user's `~/.ssh/authorized_keys`.
#[arg(long, default_value = "~/.ssh/id_ed25519")]
ssh_key: PathBuf,
/// Device id the agent will announce to NATS. Defaults to a fresh
/// `Id` (sortable hex timestamp + random suffix).
#[arg(long)]
device_id: Option<String>,
/// Routing labels for `Deployment.spec.targetSelector` matching.
/// Comma-separated `key=value` pairs. At least one is required.
#[arg(long, default_value = "group=group-a,arch=aarch64")]
labels: String,
/// Path to the cross-compiled aarch64 fleet-agent binary on the
/// driver machine. Uploaded to `/usr/local/bin/fleet-agent`.
#[arg(long)]
agent_binary: PathBuf,
/// NATS URL the agent should connect to.
#[arg(long)]
nats_url: String,
/// Shared NATS username — used in `toml-shared` mode (no SSO).
/// Ignored when `--bootstrap-token` is set.
#[arg(long, default_value = "smoke")]
nats_user: String,
/// Shared NATS password — used in `toml-shared` mode (no SSO).
/// Ignored when `--bootstrap-token` is set.
#[arg(long, default_value = "smoke")]
nats_pass: String,
/// Zitadel admin Personal Access Token used to provision a
/// per-device machine user + role grant + JWT key on this Pi.
/// When set, the agent's NATS auth flips from `toml-shared` to
/// `zitadel-jwt` and the issued machine key is dropped onto the
/// Pi at `/etc/fleet-agent/zitadel-key.json`. The PAT itself is
/// used only by this CLI invocation — it never lands on the Pi.
#[arg(long, env = "HARMONY_ZITADEL_ADMIN_PAT")]
bootstrap_token: Option<String>,
/// Externally-visible Zitadel issuer URL (e.g.
/// `https://zitadel.customer1.nationtech.io`). Required when
/// `--bootstrap-token` is set.
#[arg(long)]
zitadel_issuer_url: Option<String>,
/// Zitadel project ID hosting the fleet roles. Required when
/// `--bootstrap-token` is set. Used as both the JWT-bearer
/// audience scope target and the role-claim path qualifier.
#[arg(long)]
zitadel_project_id: Option<String>,
/// Zitadel role key to grant the per-device machine user.
/// Defaults to `device` (matches the auth callout's
/// `device_role` config).
#[arg(long, default_value = "device")]
zitadel_device_role: String,
/// Whether the agent's HTTP client to Zitadel accepts invalid
/// TLS certs. Local-dev escape hatch; default false.
#[arg(long)]
danger_accept_invalid_certs: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
harmony_cli::cli_logger::init();
let cli = Cli::parse();
ensure_ansible_venv()
.await
.map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?;
let device_id = cli
.device_id
.clone()
.map(Id::from)
.unwrap_or_else(Id::default);
let ssh_key = expand_tilde(&cli.ssh_key);
let pi_ip = cli
.pi_host
.parse()
.with_context(|| format!("--pi-host '{}' is not a valid IP address", cli.pi_host))?;
let mut creds = SshCredentials {
user: cli.pi_user.clone(),
private_key_path: ssh_key,
// Pi OS Lite ships /usr/bin/python3 — skip auto-discovery.
remote_python: Some("/usr/bin/python3".to_string()),
sudo_password: None,
};
// If the Pi doesn't have passwordless sudo, fetch the password
// through SecretManager (same flow other scores use for SSH keys
// etc. — see harmony_secret/src/lib.rs:145). First run prompts;
// subsequent runs reuse the cached value. Probe with `sudo -n`
// first so we don't prompt the operator for a password they
// don't need.
let probe = ssh_exec(pi_ip, &creds, "sudo -n true", None)
.await
.map_err(|e| anyhow::anyhow!("sudo probe: {e}"))?;
if probe.rc != 0 {
info!("device requires a sudo password — fetching from secret store");
let secret = SecretManager::get_or_prompt::<SudoPassword>()
.await
.map_err(|e| anyhow::anyhow!("get sudo password: {e}"))?;
creds.sudo_password = Some(secret.password);
}
let topology = LinuxHostTopology::new(format!("rpi-{}", cli.pi_host), pi_ip, creds);
let labels = parse_labels(&cli.labels)?;
let auth = build_auth(&cli, &device_id).await?;
let score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig {
device_id: device_id.clone(),
labels,
nats_urls: vec![cli.nats_url.clone()],
auth,
agent_binary_path: cli.agent_binary.clone(),
hosts_entries: vec![],
});
// We have our own clap CLI, so harmony_cli must NOT call
// `Args::parse()` (it would choke on --pi-host etc.). Pass an
// explicit Args with `yes: true` — the operator already committed
// to the run by typing the command, so the extra confirmation
// prompt would just add friction.
let harmony_args = harmony_cli::Args {
yes: true,
filter: None,
interactive: false,
all: true,
number: 0,
list: false,
};
harmony_cli::run(
Inventory::empty(),
topology,
vec![Box::new(score)],
Some(harmony_args),
)
.await
.map_err(|e| anyhow::anyhow!("{e}"))?;
Ok(())
}
/// Build the per-device auth block. Either:
/// - `--bootstrap-token` is set → mint a per-device Zitadel machine
/// user + role grant + JWT key via the Management API and embed the
/// key JSON in `FleetDeviceAuth::ZitadelJwt`. The bootstrap PAT
/// never leaves this CLI invocation.
/// - Otherwise → fall back to `--nats-user`/`--nats-pass` shared creds.
async fn build_auth(cli: &Cli, device_id: &Id) -> Result<FleetDeviceAuth> {
let Some(pat) = cli.bootstrap_token.clone() else {
info!("no --bootstrap-token; using shared NATS user/pass (toml-shared)");
return Ok(FleetDeviceAuth::TomlShared {
nats_user: cli.nats_user.clone(),
nats_pass: cli.nats_pass.clone(),
});
};
let issuer = cli
.zitadel_issuer_url
.clone()
.context("--bootstrap-token requires --zitadel-issuer-url")?;
let project_id = cli
.zitadel_project_id
.clone()
.context("--bootstrap-token requires --zitadel-project-id")?;
info!("bootstrapping Zitadel machine user device-{device_id} on project {project_id}");
let bootstrap = zitadel_bootstrap::ZitadelBootstrap::new(
issuer.clone(),
pat,
cli.danger_accept_invalid_certs,
);
let key_json = bootstrap
.ensure_device_machine_user(
&format!("device-{device_id}"),
&device_id.to_string(),
&project_id,
&cli.zitadel_device_role,
)
.await
.context("Zitadel device bootstrap failed")?;
Ok(FleetDeviceAuth::ZitadelJwt {
machine_key_json: key_json,
oidc_issuer_url: issuer,
audience: project_id,
danger_accept_invalid_certs: cli.danger_accept_invalid_certs,
})
}
fn parse_labels(raw: &str) -> Result<std::collections::BTreeMap<String, String>> {
let mut out = std::collections::BTreeMap::new();
for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) {
let (k, v) = piece
.split_once('=')
.ok_or_else(|| anyhow::anyhow!("label chunk '{piece}' missing '='"))?;
let k = k.trim();
let v = v.trim();
if k.is_empty() || v.is_empty() {
anyhow::bail!("label chunk '{piece}' has empty key or value");
}
out.insert(k.to_string(), v.to_string());
}
if out.is_empty() {
anyhow::bail!("--labels must include at least one key=value pair");
}
Ok(out)
}
fn expand_tilde(p: &std::path::Path) -> PathBuf {
let s = p.to_string_lossy();
if let Some(rest) = s.strip_prefix("~/")
&& let Ok(home) = std::env::var("HOME")
{
return PathBuf::from(home).join(rest);
}
p.to_path_buf()
}

View File

@@ -0,0 +1,247 @@
//! Per-device Zitadel bootstrap for the Pi onboarding flow.
//!
//! Invoked once per Pi from the operator's machine. Uses the admin PAT
//! given on the CLI to:
//!
//! 1. Find or create a machine user `device-${device_id}` in Zitadel.
//! 2. Find or create a JSON-typed JWT signing key for that user.
//! 3. Find or create a project grant on the `device` role.
//!
//! Returns the JSON keyfile content. The caller drops it onto the Pi
//! via `FleetDeviceSetupScore`. The admin PAT is held in CLI memory
//! for the duration of the run only — it never lands on the Pi.
//!
//! All operations are idempotent: re-running for the same device id
//! is a series of NOOPs.
//!
//! NOTE: This is intentionally a minimal Management-API client. It
//! duplicates a small slice of `harmony::modules::zitadel::setup` (the
//! in-cluster ZitadelSetupScore) because `fleet_rpi_setup` runs on the
//! operator's machine without a kubeconfig pointing at the Zitadel
//! cluster. Refactoring the in-cluster Score's HTTP layer into a
//! reusable client crate is a follow-up.
use anyhow::{Context, Result};
use base64::Engine;
use serde::Deserialize;
pub struct ZitadelBootstrap {
issuer_url: String,
admin_pat: String,
http: reqwest::Client,
}
impl ZitadelBootstrap {
pub fn new(issuer_url: String, admin_pat: String, danger_accept_invalid_certs: bool) -> Self {
let http = reqwest::Client::builder()
.danger_accept_invalid_certs(danger_accept_invalid_certs)
.timeout(std::time::Duration::from_secs(10))
.build()
.expect("reqwest client builder is infallible for these settings");
Self {
issuer_url,
admin_pat,
http,
}
}
/// Ensure machine user + key + role grant for one device. Returns
/// the JSON keyfile content (raw, decoded from Zitadel's base64
/// `keyDetails`). Idempotent: re-running with the same `username`
/// reuses the existing user; if no key was previously persisted
/// (we can't read the private key back from Zitadel), a fresh one
/// is generated and returned.
pub async fn ensure_device_machine_user(
&self,
username: &str,
device_id: &str,
project_id: &str,
role_key: &str,
) -> Result<String> {
let user_id = match self.find_user_by_name(username).await? {
Some(id) => id,
None => self
.create_machine_user(username, device_id)
.await
.with_context(|| format!("creating machine user {username}"))?,
};
log::info!("[zitadel-bootstrap] machine user {username} → {user_id}");
// The grant API rejects duplicates with code 6 (ALREADY_EXISTS),
// so the cheapest path is "search → maybe create".
if self.find_user_grant(&user_id, project_id).await?.is_none() {
self.create_user_grant(&user_id, project_id, role_key)
.await
.with_context(|| {
format!("granting role {role_key} on project {project_id} to {username}")
})?;
log::info!("[zitadel-bootstrap] granted role {role_key} on project {project_id}");
} else {
log::info!("[zitadel-bootstrap] role grant already present");
}
// Always mint a fresh key — Zitadel doesn't expose the private
// half of existing keys, so we can't reuse one. Stale keys
// remain valid until expiry but never get reused on this Pi
// because the agent's keyfile is overwritten on each setup run.
let key_json = self
.create_machine_key(&user_id)
.await
.with_context(|| format!("minting machine key for {username}"))?;
Ok(key_json)
}
fn url(&self, path: &str) -> String {
format!("{}{path}", self.issuer_url.trim_end_matches('/'))
}
async fn find_user_by_name(&self, username: &str) -> Result<Option<String>> {
let resp = self
.http
.post(self.url("/management/v1/users/_search"))
.bearer_auth(&self.admin_pat)
.json(&serde_json::json!({
"queries": [{
"userNameQuery": {
"userName": username,
"method": "TEXT_QUERY_METHOD_EQUALS"
}
}]
}))
.send()
.await
.context("POST users/_search")?;
if !resp.status().is_success() {
let s = resp.status();
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("users/_search returned {s}: {body}");
}
#[derive(Deserialize)]
struct R {
#[serde(default)]
result: Vec<E>,
}
#[derive(Deserialize)]
struct E {
id: String,
#[serde(rename = "userName", default)]
user_name: Option<String>,
}
let r: R = resp.json().await.context("parse users/_search")?;
Ok(r.result
.into_iter()
.find(|e| e.user_name.as_deref() == Some(username))
.map(|e| e.id))
}
async fn create_machine_user(&self, username: &str, device_id: &str) -> Result<String> {
let resp = self
.http
.post(self.url("/management/v1/users/machine"))
.bearer_auth(&self.admin_pat)
.json(&serde_json::json!({
"userName": username,
"name": format!("Fleet Device {device_id}"),
"description": format!("Provisioned by fleet_rpi_setup for device {device_id}"),
"accessTokenType": "ACCESS_TOKEN_TYPE_JWT"
}))
.send()
.await
.context("POST users/machine")?;
if !resp.status().is_success() {
let s = resp.status();
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("create machine user returned {s}: {body}");
}
#[derive(Deserialize)]
struct R {
#[serde(rename = "userId")]
user_id: String,
}
let r: R = resp.json().await.context("parse machine user response")?;
Ok(r.user_id)
}
async fn create_machine_key(&self, user_id: &str) -> Result<String> {
let resp = self
.http
.post(self.url(&format!("/management/v1/users/{user_id}/keys")))
.bearer_auth(&self.admin_pat)
.json(&serde_json::json!({ "type": "KEY_TYPE_JSON" }))
.send()
.await
.context("POST users/{}/keys")?;
if !resp.status().is_success() {
let s = resp.status();
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("create machine key returned {s}: {body}");
}
#[derive(Deserialize)]
struct R {
#[serde(rename = "keyDetails")]
key_details: String,
}
let r: R = resp.json().await.context("parse machine key response")?;
let bytes = base64::engine::general_purpose::STANDARD
.decode(&r.key_details)
.context("decode keyDetails base64")?;
String::from_utf8(bytes).context("keyDetails is non-UTF-8")
}
async fn find_user_grant(&self, user_id: &str, project_id: &str) -> Result<Option<String>> {
let resp = self
.http
.post(self.url(&format!("/management/v1/users/{user_id}/grants/_search")))
.bearer_auth(&self.admin_pat)
.json(&serde_json::json!({}))
.send()
.await
.context("POST users/{}/grants/_search")?;
if !resp.status().is_success() {
let s = resp.status();
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("grants/_search returned {s}: {body}");
}
#[derive(Deserialize)]
struct R {
#[serde(default)]
result: Vec<E>,
}
#[derive(Deserialize)]
struct E {
id: String,
#[serde(rename = "projectId")]
project_id: String,
}
let r: R = resp.json().await.context("parse grants/_search")?;
Ok(r.result
.into_iter()
.find(|e| e.project_id == project_id)
.map(|e| e.id))
}
async fn create_user_grant(
&self,
user_id: &str,
project_id: &str,
role_key: &str,
) -> Result<()> {
let resp = self
.http
.post(self.url(&format!("/management/v1/users/{user_id}/grants")))
.bearer_auth(&self.admin_pat)
.json(&serde_json::json!({
"projectId": project_id,
"roleKeys": [role_key]
}))
.send()
.await
.context("POST users/{}/grants")?;
if !resp.status().is_success() {
let s = resp.status();
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("create grant returned {s}: {body}");
}
Ok(())
}
}

View File

@@ -0,0 +1,17 @@
[package]
name = "example_fleet_server_install"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "fleet_server_install"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony", default-features = false }
harmony_cli = { path = "../../harmony_cli" }
harmony-fleet-deploy = { path = "../../fleet/harmony-fleet-deploy" }
tokio.workspace = true
anyhow.workspace = true
clap.workspace = true

View File

@@ -0,0 +1,19 @@
export HARMONY_SECRET_NAMESPACE=fleet-server-install
export HARMONY_SECRET_STORE=file
export HARMONY_DATABASE_URL=sqlite://fleet_server_install.sqlite
export RUST_LOG=harmony=info,kube_runtime=warn
# Required: the kubeconfig for the cluster the score installs into.
# K8sAnywhereTopology::from_env() reads KUBECONFIG; without it, the
# topology will fall back to autoinstall logic that may try to spin
# up a local k3d cluster.
#export KUBECONFIG=/tmp/kubeconfig
export HARMONY_USE_LOCAL_K3D=false
# Zitadel install knobs (used by fleet/scripts/run_server_install.sh).
# Zitadel is installed by default; the script's defaults assume a
# `.localhost` hostname and HTTP ingress.
# export NO_ZITADEL=1 # skip the Zitadel install entirely
# export ZITADEL_HOST=zitadel.example.com # override the default zitadel.localhost
# export ZITADEL_VERSION=v4.12.1 # override the chart version

View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
# Thin wrapper around `cargo run -p example_fleet_server_install`. All
# flags pass through to the binary — see src/main.rs for the surface,
# or run with --help.
#
# Requires KUBECONFIG to point at a reachable cluster + helm on PATH.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
cd "$REPO_ROOT"
exec cargo run -q --release -p example_fleet_server_install -- "$@"

View File

@@ -0,0 +1,192 @@
//! Install the harmony fleet server-side stack into the cluster
//! `KUBECONFIG` points at: NATS + the harmony fleet operator (CRDs +
//! RBAC + Deployment), and optionally a central Zitadel OIDC
//! identity provider, via [`FleetServerScore`].
//!
//! This is the framework-side replacement for the
//! `example_fleet_nats_install`, `harmony-fleet-operator chart`,
//! and `helm install` chain that the load-test harness used to
//! drive by hand.
//!
//! Typical usage (operator + NATS only):
//!
//! KUBECONFIG=$KUBECFG cargo run -q -p example_fleet_server_install -- \
//! --operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev
//!
//! Including Zitadel:
//!
//! KUBECONFIG=$KUBECFG cargo run -q -p example_fleet_server_install -- \
//! --operator-image … \
//! --zitadel-host zitadel.localhost
//!
//! Behaviour:
//! - Installs single-node NATS (JetStream) into `--nats-namespace`
//! using `NatsBasicScore`, exposed per `--nats-expose`.
//! - Installs the operator chart into `--operator-namespace` via
//! `FleetOperatorScore` (which renders the chart in a tempdir
//! and helm-installs it).
//! - When `--zitadel-host` is set, also runs `ZitadelScore`:
//! provisions a CNPG PostgreSQL cluster + the upstream
//! `zitadel/zitadel` helm chart with distribution-aware ingress.
//! Defaults to HTTPS unless host endswith `.localhost` or
//! `--zitadel-insecure` is passed.
//! - Idempotent: re-running on an existing install short-circuits
//! at `HelmChartScore::find_installed_release`.
//!
//! Topology: `K8sAnywhereTopology::from_env()`. This requires `KUBECONFIG`
//! to be set and runs `CertificateManagementScore` as part of
//! `ensure_ready` — i.e. it installs cert-manager into the cluster on
//! first run. Cert-manager is needed for Zitadel's ingress TLS in
//! production; for k3d dev it's still installed but unused.
//!
//! Output is driven by `harmony_cli::run`, which wires up the
//! framework's standard logger + reporter — emoji-tagged progress
//! lines per Score, plus an end-of-run summary listing the
//! `Outcome.details` from each Score.
use anyhow::Result;
use clap::Parser;
use harmony::inventory::Inventory;
use harmony::modules::nats::NatsBasicScore;
use harmony::modules::zitadel::ZitadelScore;
use harmony::score::Score;
use harmony::topology::K8sAnywhereTopology;
use harmony_fleet_deploy::FleetOperatorScore;
#[derive(Parser, Debug)]
#[command(
name = "fleet_server_install",
about = "Install the harmony fleet server-side stack (NATS + operator [+ Zitadel])"
)]
struct Cli {
/// Namespace for the NATS Deployment + Service.
#[arg(long, default_value = "fleet-system")]
nats_namespace: String,
/// Resource name for the NATS release.
#[arg(long, default_value = "fleet-nats")]
nats_name: String,
/// NATS service exposure mode. `load-balancer` pairs with k3d's
/// `-p PORT:PORT@loadbalancer`. `node-port` requires the port be
/// in the apiserver's nodeport range (default 30000-32767).
#[arg(long, value_enum, default_value_t = NatsExpose::LoadBalancer)]
nats_expose: NatsExpose,
/// NodePort when `--nats-expose=node-port`. Ignored otherwise.
#[arg(long, default_value_t = 30422)]
nats_node_port: i32,
/// Optional NATS image override (`repository:tag`).
#[arg(long)]
nats_image: Option<String>,
/// Namespace the operator runs in.
#[arg(long, default_value = "fleet-system")]
operator_namespace: String,
/// Helm release name for the operator chart.
#[arg(long, default_value = "harmony-fleet-operator")]
operator_release: String,
/// Operator container image (`repository:tag`).
#[arg(
long,
default_value = "hub.nationtech.io/harmony/harmony-fleet-operator:dev"
)]
operator_image: String,
/// Image pull policy for the operator Deployment.
#[arg(long, default_value = "IfNotPresent")]
operator_image_pull_policy: String,
/// `RUST_LOG` value injected into the operator pod's env.
#[arg(long, default_value = "info,kube_runtime=warn")]
log_level: String,
/// Hostname Zitadel should answer on. When set, Zitadel + its
/// PostgreSQL cluster are installed alongside the operator.
/// When unset, the Zitadel install is skipped entirely.
#[arg(long)]
zitadel_host: Option<String>,
/// Zitadel chart version (matches `zitadel/zitadel` upstream tags).
#[arg(long, default_value = "v4.12.1")]
zitadel_version: String,
/// Force HTTP instead of HTTPS for the Zitadel ingress. Defaults
/// to true (HTTP) when `--zitadel-host` endswith `.localhost`,
/// false otherwise.
#[arg(long)]
zitadel_insecure: bool,
}
#[derive(Clone, Debug, clap::ValueEnum)]
enum NatsExpose {
ClusterIp,
NodePort,
LoadBalancer,
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let topology = K8sAnywhereTopology::from_env();
let mut nats = NatsBasicScore::new(&cli.nats_name, &cli.nats_namespace);
match cli.nats_expose {
NatsExpose::ClusterIp => {}
NatsExpose::NodePort => nats = nats.node_port(cli.nats_node_port),
NatsExpose::LoadBalancer => nats = nats.load_balancer(),
}
if let Some(image) = cli.nats_image {
nats = nats.image(image);
}
// Point the operator at NATS via the in-cluster service DNS the
// NatsBasicScore install creates. ClusterIP and LoadBalancer both
// expose the same `<release>.<namespace>:4222` for in-cluster
// callers.
let nats_url = format!("nats://{}.{}:4222", cli.nats_name, cli.nats_namespace);
let operator = FleetOperatorScore::new()
.namespace(&cli.operator_namespace)
.release_name(&cli.operator_release)
.image(&cli.operator_image)
.image_pull_policy(&cli.operator_image_pull_policy)
.nats_url(&nats_url)
.log_level(&cli.log_level);
// FleetServerScore now takes NatsK8sScore (auth-callout-aware,
// OKD-Route-aware) — see `fleet_staging_install` for the
// production composition. This simpler example registers the
// inner Scores directly so it can keep using the basic NATS
// helm chart for k3d-style local installs.
let mut scores: Vec<Box<dyn Score<K8sAnywhereTopology>>> =
vec![Box::new(nats), Box::new(operator)];
if let Some(host) = cli.zitadel_host {
// Default external_secure logic: HTTPS unless the host is a
// .localhost / .test development hostname or --zitadel-insecure
// was explicitly set.
let external_secure =
!cli.zitadel_insecure && !host.ends_with(".localhost") && !host.ends_with(".test");
scores.push(Box::new(ZitadelScore {
host,
zitadel_version: cli.zitadel_version,
external_secure,
external_port: None,
..Default::default()
}));
}
// We've already parsed our own Cli; pass `Some(harmony_cli::Args)`
// with dev-friendly defaults (no confirmation prompt, run every
// registered score) so harmony_cli doesn't try to re-parse argv.
harmony_cli::run(
Inventory::empty(),
topology,
scores,
Some(harmony_cli::Args {
yes: true,
filter: None,
interactive: false,
all: true,
number: 0,
list: false,
}),
)
.await
.map_err(|e| anyhow::anyhow!("{e}"))
}

View File

@@ -0,0 +1,23 @@
[package]
name = "example-fleet-sso-login"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
description = "Developer-side CLI: log in to a fleet platform staging instance via Zitadel device-code OIDC"
[[bin]]
name = "fleet-sso-login"
path = "src/main.rs"
[dependencies]
reqwest = { workspace = true }
tokio = { workspace = true, features = ["full"] }
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
anyhow.workspace = true
clap = { version = "4", features = ["derive", "env"] }
base64 = "0.22"
log.workspace = true
env_logger.workspace = true
directories = "6.0.0"

View File

@@ -0,0 +1,266 @@
//! Developer-side CLI: log in to a fleet platform staging instance via
//! Zitadel's OIDC Device Authorization Grant (RFC 8628).
//!
//! Usage:
//!
//! ```text
//! cargo run -p example-fleet-sso-login -- \
//! --base-domain customer1.nationtech.io \
//! --client-id 366378028009259038
//! ```
//!
//! Flow:
//! 1. POST to `/oauth/v2/device_authorization` with the CLI client_id —
//! receive a `verification_uri_complete`, `user_code`, `device_code`
//! and a polling interval.
//! 2. Print the URL the user opens in their browser. They authenticate
//! via Zitadel (username/password, MFA, SSO chain — Zitadel handles
//! that part).
//! 3. Poll `/oauth/v2/token` with `grant_type=urn:ietf:params:oauth:
//! grant-type:device_code` until the access token is issued.
//! 4. Decode the access token's claims, print "Welcome <preferred
//! username>", and persist the session at
//! `$DATA_DIR/harmony/sso-session.json`.
//!
//! No K8s API call yet — for the demo, this CLI proves the SSO works.
//! Future: a `harmony fleet apply` subcommand uses the persisted token
//! to talk to a fleet-platform API gateway. That gateway is post-demo.
use std::path::PathBuf;
use std::time::Duration;
use anyhow::{Context, Result, bail};
use base64::Engine;
use clap::Parser;
use serde::{Deserialize, Serialize};
#[derive(Parser, Debug)]
#[command(
name = "fleet-sso-login",
about = "Log in to a fleet platform staging instance via Zitadel device-code OIDC"
)]
struct Cli {
/// Base DNS domain — same value the operator passed to
/// fleet-staging-deploy. The Zitadel issuer derives as
/// `https://zitadel.<base>`.
#[arg(long, env = "FLEET_BASE_DOMAIN")]
base_domain: String,
/// OIDC client_id of the `harmony-cli` Device Code app on the
/// Zitadel project. Printed by `fleet-staging-deploy` at the end
/// of a successful run.
#[arg(long, env = "FLEET_CLI_CLIENT_ID")]
client_id: String,
/// Override the polling interval suggested by Zitadel
/// (defaults to whatever the device-authorization endpoint returned;
/// pass to short-circuit during testing).
#[arg(long)]
poll_interval_secs: Option<u64>,
}
#[derive(Debug, Deserialize)]
struct DeviceAuthResponse {
device_code: String,
user_code: String,
verification_uri: String,
#[serde(default)]
verification_uri_complete: Option<String>,
expires_in: u64,
#[serde(default)]
interval: Option<u64>,
}
#[derive(Debug, Deserialize, Serialize)]
struct TokenResponse {
access_token: String,
#[serde(default)]
id_token: Option<String>,
#[serde(default)]
refresh_token: Option<String>,
#[serde(default)]
expires_in: Option<u64>,
#[serde(default)]
token_type: Option<String>,
}
#[derive(Debug, Deserialize)]
struct TokenError {
error: String,
#[serde(default)]
error_description: Option<String>,
}
#[tokio::main]
async fn main() -> Result<()> {
let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
.try_init();
let cli = Cli::parse();
let issuer = format!("https://zitadel.{}", cli.base_domain);
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(15))
.build()?;
// -- Step 1: kick off the device flow ----------------------------
let device_auth_url = format!("{issuer}/oauth/v2/device_authorization");
let scope =
"openid profile email urn:zitadel:iam:user:resourceowner urn:zitadel:iam:org:project:roles";
let resp = client
.post(&device_auth_url)
.form(&[("client_id", cli.client_id.as_str()), ("scope", scope)])
.send()
.await
.with_context(|| format!("POST {device_auth_url}"))?;
if !resp.status().is_success() {
let s = resp.status();
let body = resp.text().await.unwrap_or_default();
bail!("device_authorization returned {s}: {body}");
}
let auth: DeviceAuthResponse = resp.json().await.context("parse device_authorization")?;
let display_url = auth
.verification_uri_complete
.clone()
.unwrap_or_else(|| auth.verification_uri.clone());
println!();
println!("============================================================");
println!(" Open this URL in your browser to log in:");
println!();
println!(" {display_url}");
println!();
println!(" If the URL doesn't pre-fill the code, enter:");
println!();
println!(" user_code: {}", auth.user_code);
println!();
println!(
" Waiting for browser-side completion (expires in {}s)...",
auth.expires_in
);
println!("============================================================");
println!();
// -- Step 2: poll the token endpoint -----------------------------
let token_url = format!("{issuer}/oauth/v2/token");
let interval =
Duration::from_secs(cli.poll_interval_secs.unwrap_or(auth.interval.unwrap_or(5)));
let deadline = std::time::Instant::now() + Duration::from_secs(auth.expires_in);
let access_token = loop {
if std::time::Instant::now() > deadline {
bail!("device-code expired before user completed login");
}
tokio::time::sleep(interval).await;
let resp = client
.post(&token_url)
.form(&[
("grant_type", "urn:ietf:params:oauth:grant-type:device_code"),
("device_code", auth.device_code.as_str()),
("client_id", cli.client_id.as_str()),
])
.send()
.await
.context("POST token")?;
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
if status.is_success() {
let tr: TokenResponse =
serde_json::from_str(&body).context("parse token success body")?;
break tr.access_token;
}
// Per RFC 8628, the token endpoint returns specific error
// codes during polling — `authorization_pending` and
// `slow_down` are NOT terminal, every other error is.
let err: TokenError = serde_json::from_str(&body).unwrap_or_else(|_| TokenError {
error: format!("http_{}", status.as_u16()),
error_description: Some(body.clone()),
});
match err.error.as_str() {
"authorization_pending" => {
log::debug!("authorization_pending — user hasn't approved yet");
continue;
}
"slow_down" => {
log::info!("server requested slow_down — increasing poll interval");
tokio::time::sleep(interval).await; // wait one extra interval
continue;
}
other => bail!(
"token endpoint refused: {other} ({})",
err.error_description.unwrap_or_default()
),
}
};
// -- Step 3: introspect + persist --------------------------------
let claims = decode_jwt_claims(&access_token).unwrap_or_default();
let display_name = claims
.get("name")
.or_else(|| claims.get("preferred_username"))
.and_then(|v| v.as_str())
.unwrap_or("(unknown)");
let email = claims
.get("email")
.and_then(|v| v.as_str())
.unwrap_or("(no email)");
persist_session(&issuer, &cli.client_id, &access_token, &claims)?;
println!();
println!("============================================================");
println!(" SSO LOGIN SUCCESSFUL");
println!("============================================================");
println!(" Welcome, {display_name} <{email}>");
println!(" Session stored at: {}", session_path().display());
println!("============================================================");
Ok(())
}
fn decode_jwt_claims(jwt: &str) -> Option<serde_json::Value> {
let payload_b64 = jwt.split('.').nth(1)?;
let pad = "=".repeat((4 - payload_b64.len() % 4) % 4);
let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
.decode(format!("{payload_b64}{pad}").trim_end_matches('='))
.ok()?;
serde_json::from_slice(&bytes).ok()
}
#[derive(Serialize)]
struct PersistedSession<'a> {
issuer: &'a str,
client_id: &'a str,
access_token: &'a str,
claims: &'a serde_json::Value,
}
fn persist_session(
issuer: &str,
client_id: &str,
access_token: &str,
claims: &serde_json::Value,
) -> Result<()> {
let path = session_path();
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("create session dir {}", parent.display()))?;
}
let s = PersistedSession {
issuer,
client_id,
access_token,
claims,
};
let json = serde_json::to_string_pretty(&s)?;
std::fs::write(&path, json).with_context(|| format!("write session to {}", path.display()))?;
// 0600 so other users on the box can't read the access token.
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o600)).ok();
}
Ok(())
}
fn session_path() -> PathBuf {
directories::BaseDirs::new()
.map(|d| d.data_dir().join("harmony").join("sso-session.json"))
.unwrap_or_else(|| PathBuf::from("/tmp/harmony-sso-session.json"))
}

View File

@@ -0,0 +1,36 @@
[package]
name = "example-fleet-staging-deploy"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
description = "Deploy the fleet platform stack (Zitadel + NATS + auth callout) onto an OKD/Kubernetes cluster. Operator-side, run-once-per-customer."
[lib]
name = "example_fleet_staging_deploy"
path = "src/lib.rs"
[[bin]]
name = "fleet-staging-deploy"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony" }
harmony-k8s = { path = "../../harmony-k8s" }
harmony_types = { path = "../../harmony_types" }
harmony-nats-callout = { path = "../../nats/callout" }
nkeys = "0.4"
async-nats.workspace = true
reqwest = { workspace = true }
tokio = { workspace = true, features = ["full"] }
serde.workspace = true
serde_json.workspace = true
anyhow.workspace = true
log.workspace = true
env_logger.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
clap = { version = "4", features = ["derive", "env"] }
k8s-openapi.workspace = true
kube.workspace = true
url.workspace = true

View File

@@ -0,0 +1,577 @@
//! Operator-side staging deploy harness.
//!
//! Runs once per customer instance against an OKD / Kubernetes cluster
//! to bring up the fleet platform's central services:
//!
//! 1. Zitadel + Postgres (HTTPS via OKD HAProxy ingress, edge TLS).
//! 2. The fleet project + roles (`fleet-admin`, `device`) + an API app
//! (so the project ID can be the JWT-bearer audience).
//! 3. NATS with `auth_callout` and a WSS ingress (so Pis on a customer
//! LAN connect through `wss://nats.<base>/`).
//! 4. The auth callout Deployment, configured to validate Zitadel JWTs
//! and emit per-device permissions on user JWTs to NATS.
//!
//! Everything keys off [`FleetDomainConfig::base_domain`] —
//! `zitadel.<base>`, `nats.<base>`, `api.<base>` are the only
//! customer-visible hostnames. Pi-side onboarding (see
//! `examples/fleet_rpi_setup/`) consumes the Zitadel admin PAT plus
//! the project ID this harness prints, so the operator's flow is:
//!
//! ```text
//! cargo run -p example-fleet-staging-deploy -- --base-domain customer1.nationtech.io
//! ↓ prints PROJECT_ID, NATS WSS URL, instructions to extract iam-admin-pat
//! HARMONY_ZITADEL_ADMIN_PAT=$(kubectl -n zitadel get secret iam-admin-pat -o jsonpath='{.data.pat}' | base64 -d) \
//! cargo run -p example-fleet-rpi-setup -- \
//! --pi-host 192.168.1.42 \
//! --bootstrap-token "$HARMONY_ZITADEL_ADMIN_PAT" \
//! --zitadel-issuer-url https://zitadel.customer1.nationtech.io \
//! --zitadel-project-id <PROJECT_ID printed above> \
//! --nats-url wss://nats.customer1.nationtech.io/ \
//! --agent-binary ./target/aarch64-unknown-linux-gnu/release/fleet-agent
//! ```
//!
//! The harness is **idempotent** by design — re-running picks up
//! existing resources via the new helm-upgrade-by-default behavior +
//! ZitadelSetupScore's search-then-create flow + a persisted issuer
//! NKey in a K8s secret so user JWTs survive restarts.
use std::time::Duration;
use anyhow::{Context, Result};
use harmony::inventory::Inventory;
use harmony::modules::nats::NatsHelmChartScore;
use harmony::modules::nats_auth_callout::{NatsAuthCalloutScore, render_auth_callout_block};
use harmony::modules::zitadel::{
ZitadelApiApp, ZitadelAppType, ZitadelApplication, ZitadelClientConfig, ZitadelRole,
ZitadelScore, ZitadelSetupScore,
};
use harmony::score::Score;
use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
use log::info;
use nkeys::KeyPair;
// ---- domain config ---------------------------------------------------------
/// Single source of truth for all customer-visible hostnames. Every
/// `<app>.<customer>.<base>` URL the staging deploy emits derives from
/// the one base domain — no hostnames are hardcoded so the same code
/// runs across customers / staging / canary instances.
#[derive(Debug, Clone)]
pub struct FleetDomainConfig {
/// e.g. `customer1.nationtech.io`. The deploy emits
/// `zitadel.<base>`, `nats.<base>`, `api.<base>` against it.
pub base_domain: String,
}
impl FleetDomainConfig {
pub fn new(base_domain: impl Into<String>) -> Self {
Self {
base_domain: base_domain.into(),
}
}
pub fn zitadel_host(&self) -> String {
format!("zitadel.{}", self.base_domain)
}
pub fn nats_wss_host(&self) -> String {
format!("nats.{}", self.base_domain)
}
pub fn zitadel_issuer_url(&self) -> String {
format!("https://{}", self.zitadel_host())
}
pub fn nats_wss_url(&self) -> String {
format!("wss://{}/", self.nats_wss_host())
}
}
// ---- naming + constants ----------------------------------------------------
pub const FLEET_NAMESPACE: &str = "fleet-staging";
pub const ZITADEL_NAMESPACE: &str = "zitadel-staging";
pub const NATS_RELEASE: &str = "fleet-nats";
pub const CALLOUT_DEPLOYMENT_NAME: &str = "fleet-callout";
pub const PROJECT_NAME: &str = "fleet";
pub const API_APP_NAME: &str = "nats";
pub const CLI_APP_NAME: &str = "harmony-cli";
pub const ADMIN_ROLE_KEY: &str = "fleet-admin";
pub const DEVICE_ROLE_KEY: &str = "device";
pub const NATS_AUTH_USER: &str = "auth";
pub const NATS_ACCOUNT: &str = "DEVICES";
pub const NATS_SYSTEM_USER: &str = "sys-admin";
pub const ISSUER_SEED_SECRET: &str = "callout-issuer-seed";
// ---- handles ---------------------------------------------------------------
#[derive(Debug, Clone)]
pub struct StagingHandles {
pub domain: FleetDomainConfig,
pub project_id: String,
pub issuer_pubkey: String,
/// Tag of the callout image expected to exist in a registry the
/// cluster pulls from. The operator pushes it before running the
/// deploy; this field is just the name we put on the Deployment
/// for traceability.
pub callout_image: String,
/// OIDC client_id of the `harmony-cli` Device Code app — what the
/// `fleet_sso_login` CLI sends in its device-authorization request.
/// `None` if the app pre-existed without the cache picking it up
/// (re-running the staging deploy after `rm -rf
/// ~/.local/share/harmony/zitadel/`).
pub cli_client_id: Option<String>,
}
// ---- bring up --------------------------------------------------------------
pub struct StagingDeployOpts {
pub domain: FleetDomainConfig,
pub kubeconfig_context: Option<String>,
/// Image reference the cluster will pull. Operator must have
/// pushed this beforehand (e.g. `quay.io/customer/harmony-nats-callout:demo`).
pub callout_image: String,
/// Per-NATS-account password for the callout's own NATS connection.
/// Stored in a K8s secret + listed in the chart's
/// `accounts.<account>.users` so the callout bypasses callout to
/// connect (otherwise it'd deadlock authenticating itself).
pub nats_auth_pass: String,
/// SYS account password (for `kubectl exec nats-box` debugging).
pub nats_system_pass: String,
}
pub async fn bring_up_staging(opts: StagingDeployOpts) -> Result<StagingHandles> {
let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
.try_init();
if let Some(ctx) = &opts.kubeconfig_context {
unsafe {
std::env::set_var("HARMONY_K8S_CONTEXT", ctx);
std::env::set_var("HARMONY_USE_LOCAL_K3D", "false");
std::env::set_var("HARMONY_AUTOINSTALL", "false");
}
}
let topology = K8sAnywhereTopology::from_env();
topology.ensure_ready().await.context("topology init")?;
info!(
"[1/5] deploying Zitadel at https://{}",
opts.domain.zitadel_host()
);
deploy_zitadel(&opts.domain, &topology).await?;
info!("[2/5] waiting for Zitadel HTTPS to respond");
wait_for_zitadel_ready(&opts.domain).await?;
info!("[3/5] provisioning project '{PROJECT_NAME}', api app, CLI device-code app, and roles");
provision_zitadel_project(&opts.domain, &topology).await?;
let project_id = read_project_id()?;
let cli_client_id = read_cli_client_id();
info!(" → project_id = {project_id}");
if let Some(cid) = &cli_client_id {
info!(" → cli_client_id = {cid}");
} else {
log::warn!(
" → cli_client_id missing from cache; CLI login won't work until you reset the local zitadel cache"
);
}
info!("[4/5] generating issuer NKey + deploying NATS with auth_callout + WSS ingress");
let issuer_seed = ensure_issuer_seed(&topology).await?;
let issuer_kp = KeyPair::from_seed(&issuer_seed)
.map_err(|e| anyhow::anyhow!("invalid persisted issuer seed: {e}"))?;
let issuer_pubkey = issuer_kp.public_key();
NatsHelmChartScore::new(
NATS_RELEASE.to_string(),
FLEET_NAMESPACE.to_string(),
render_nats_values(
&opts.domain,
&issuer_pubkey,
&opts.nats_auth_pass,
&opts.nats_system_pass,
),
)
.interpret(&Inventory::autoload(), &topology)
.await
.context("NATS deploy")?;
info!(
"[5/5] deploying NatsAuthCalloutScore (image: {})",
opts.callout_image
);
NatsAuthCalloutScore::new(
CALLOUT_DEPLOYMENT_NAME,
FLEET_NAMESPACE,
format!("nats://{NATS_RELEASE}.{FLEET_NAMESPACE}.svc.cluster.local:4222"),
opts.domain.zitadel_issuer_url(),
// The aud the callout validates against is the project ID —
// Zitadel emits it in access tokens minted via the
// project-id-audience scope.
project_id.clone(),
NATS_AUTH_USER,
opts.nats_auth_pass.clone(),
issuer_seed,
)
.image(&opts.callout_image)
.target_account(NATS_ACCOUNT)
.admin_role(ADMIN_ROLE_KEY)
.device_role(DEVICE_ROLE_KEY)
.interpret(&Inventory::autoload(), &topology)
.await
.context("callout deploy")?;
Ok(StagingHandles {
domain: opts.domain,
project_id,
issuer_pubkey,
callout_image: opts.callout_image,
cli_client_id,
})
}
fn read_cli_client_id() -> Option<String> {
ZitadelClientConfig::load()?
.client_id(CLI_APP_NAME)
.cloned()
}
async fn deploy_zitadel(domain: &FleetDomainConfig, topology: &K8sAnywhereTopology) -> Result<()> {
let z = ZitadelScore {
host: domain.zitadel_host(),
zitadel_version: "v4.12.1".to_string(),
// OKD HAProxy edge-terminates TLS for us, so the issuer URL
// is `https://zitadel.<base>` (port 443 implied) — leave
// external_port at None so Zitadel's emitted issuer omits the
// port, matching what clients reach.
external_secure: true,
external_port: None,
namespace: ZITADEL_NAMESPACE.to_string(),
..Default::default()
};
z.interpret(&Inventory::autoload(), topology)
.await
.context("ZitadelScore")?;
Ok(())
}
async fn provision_zitadel_project(
domain: &FleetDomainConfig,
topology: &K8sAnywhereTopology,
) -> Result<()> {
let setup = ZitadelSetupScore {
host: domain.zitadel_host(),
// Direct HTTPS through OKD's HAProxy ingress — operator runs
// anywhere with kubeconfig + DNS access. Defaults give
// `https://<host>` (port 443).
scheme: Default::default(),
port: None,
skip_tls: false,
endpoint: None,
admin_org_id: None,
namespace: ZITADEL_NAMESPACE.to_string(),
applications: vec![ZitadelApplication {
project_name: PROJECT_NAME.to_string(),
app_name: CLI_APP_NAME.to_string(),
// Device Code grant — the only browser-driven OIDC flow
// that fits a CLI tool: prints a verification URL + user
// code, polls for a token, no embedded web server / open
// listener required.
app_type: ZitadelAppType::DeviceCode,
}],
api_apps: vec![ZitadelApiApp {
project_name: PROJECT_NAME.to_string(),
app_name: API_APP_NAME.to_string(),
}],
roles: vec![
ZitadelRole {
project_name: PROJECT_NAME.to_string(),
key: ADMIN_ROLE_KEY.to_string(),
display_name: "Fleet Admin".to_string(),
group: None,
},
ZitadelRole {
project_name: PROJECT_NAME.to_string(),
key: DEVICE_ROLE_KEY.to_string(),
display_name: "Device".to_string(),
group: None,
},
],
// No machine users provisioned here — `fleet_rpi_setup` mints
// them on demand per device, so the staging deploy stays
// device-count-agnostic.
machine_users: vec![],
};
setup
.interpret(&Inventory::autoload(), topology)
.await
.context("ZitadelSetupScore")?;
Ok(())
}
fn read_project_id() -> Result<String> {
let cfg = ZitadelClientConfig::load()
.context("ZitadelSetupScore did not produce a client config cache")?;
cfg.project_id_by_name(PROJECT_NAME)
.or(cfg.project_id.as_ref())
.context("project_id missing from ZitadelClientConfig cache")
.cloned()
}
/// Persist the callout's issuer NKey seed in a K8s secret so re-runs
/// of the staging deploy don't invalidate previously-issued user JWTs
/// already in flight on customer Pis.
async fn ensure_issuer_seed(topology: &K8sAnywhereTopology) -> Result<String> {
use k8s_openapi::ByteString;
use k8s_openapi::api::core::v1::{Namespace, Secret};
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
use std::collections::BTreeMap;
let k8s = topology
.k8s_client()
.await
.map_err(|e| anyhow::anyhow!("k8s_client: {e}"))?;
if k8s
.get_resource::<Namespace>(FLEET_NAMESPACE, None)
.await?
.is_none()
{
let ns = Namespace {
metadata: ObjectMeta {
name: Some(FLEET_NAMESPACE.to_string()),
..Default::default()
},
..Default::default()
};
k8s.create(&ns, None).await.ok();
}
if let Some(existing) = k8s
.get_resource::<Secret>(ISSUER_SEED_SECRET, Some(FLEET_NAMESPACE))
.await?
&& let Some(data) = existing.data
&& let Some(seed_bytes) = data.get("seed")
{
let seed = String::from_utf8(seed_bytes.0.clone())?;
return Ok(seed.trim().to_string());
}
let seed = KeyPair::new_account()
.seed()
.map_err(|e| anyhow::anyhow!("nkey seed: {e}"))?;
let mut data = BTreeMap::new();
data.insert("seed".to_string(), ByteString(seed.as_bytes().to_vec()));
let secret = Secret {
metadata: ObjectMeta {
name: Some(ISSUER_SEED_SECRET.to_string()),
namespace: Some(FLEET_NAMESPACE.to_string()),
..Default::default()
},
data: Some(data),
type_: Some("Opaque".to_string()),
..Default::default()
};
k8s.create(&secret, Some(FLEET_NAMESPACE)).await.ok();
Ok(seed)
}
// ---- NATS values -----------------------------------------------------------
/// Render NATS Helm values for an OKD-flavored deployment with WSS
/// ingress + auth callout + JetStream.
///
/// **Why WSS rather than plain NATS-on-TLS:** OKD's default ingress
/// controller (HAProxy) is HTTP-aware and edge-terminates TLS. NATS
/// over WebSocket goes through that ingress unchanged; native NATS
/// TCP would require a TCP loadbalancer service or a passthrough
/// Route, both of which are extra infra the customer's cluster may
/// not have. WSS is also the default async-nats client transport on
/// `wss://...` URLs — no special agent code needed.
pub fn render_nats_values(
domain: &FleetDomainConfig,
issuer_pubkey: &str,
nats_auth_pass: &str,
nats_system_pass: &str,
) -> String {
let auth_callout = render_auth_callout_block(issuer_pubkey, NATS_AUTH_USER, NATS_ACCOUNT);
let auth_callout_indented = auth_callout
.lines()
.enumerate()
.map(|(i, l)| {
if i == 0 {
l.to_string()
} else {
format!(" {l}")
}
})
.collect::<Vec<_>>()
.join("\n");
format!(
r#"fullnameOverride: {nats_release}
config:
cluster:
enabled: false
jetstream:
enabled: true
fileStorage:
enabled: true
size: 5Gi
websocket:
enabled: true
port: 8443
ingress:
enabled: true
className: openshift-default
pathType: Prefix
hosts:
- {nats_wss_host}
annotations:
# OKD HAProxy edge-terminates TLS — the chart's default Route
# generation needs `route.openshift.io/termination: edge` so
# the Route's TLS block is "edge", matching the cluster's wildcard
# cert behavior. Switch to `reencrypt` if you need TLS all the
# way to the NATS pod.
route.openshift.io/termination: edge
haproxy.router.openshift.io/timeout: "1h"
merge:
{auth_callout_indented}
accounts:
{nats_account}:
jetstream: enabled
users:
- user: "{auth_user}"
password: "{auth_pass}"
SYS:
users:
- user: "{sys_user}"
password: "{sys_pass}"
system_account: SYS
service:
ports:
nats:
enabled: true
"#,
nats_release = NATS_RELEASE,
nats_wss_host = domain.nats_wss_host(),
nats_account = NATS_ACCOUNT,
auth_user = NATS_AUTH_USER,
auth_pass = nats_auth_pass,
sys_user = NATS_SYSTEM_USER,
sys_pass = nats_system_pass,
)
}
// ---- readiness -------------------------------------------------------------
async fn wait_for_zitadel_ready(domain: &FleetDomainConfig) -> Result<()> {
let issuer = domain.zitadel_issuer_url();
let well_known = format!("{issuer}/.well-known/openid-configuration");
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(5))
.build()?;
for attempt in 1..=180 {
match client.get(&well_known).send().await {
Ok(r) if r.status().is_success() => return Ok(()),
Ok(r) if attempt % 30 == 0 => {
info!("Zitadel HTTPS {} (attempt {attempt}/180)", r.status());
}
Err(e) if attempt % 30 == 0 => {
info!("Zitadel unreachable: {e} (attempt {attempt}/180)");
}
_ => {}
}
tokio::time::sleep(Duration::from_secs(2)).await;
}
anyhow::bail!("timed out waiting for Zitadel at {well_known}")
}
// ---- helpful printout ------------------------------------------------------
impl StagingHandles {
/// Print the operator's "what to do next" panel after a successful
/// staging deploy. Pasted at the end of the binary's run.
pub fn print_next_steps(&self) {
let zitadel = self.domain.zitadel_issuer_url();
let nats = self.domain.nats_wss_url();
println!();
println!("============================================================");
println!(" STAGING DEPLOY COMPLETE");
println!("============================================================");
println!(" Base domain: {}", self.domain.base_domain);
println!(" Zitadel: {zitadel}");
println!(" NATS (WSS): {nats}");
println!(" Project ID: {}", self.project_id);
println!(" Callout image: {}", self.callout_image);
println!(" Issuer pubkey: {}", self.issuer_pubkey);
if let Some(cid) = &self.cli_client_id {
println!(" CLI client_id: {cid}");
println!();
println!(" CLI SSO login (developer-side):");
println!();
println!(" cargo run -p example-fleet-sso-login -- \\");
println!(" --base-domain {} \\", self.domain.base_domain);
println!(" --client-id {cid}");
}
println!();
println!(" Onboard a Pi:");
println!();
println!(" PAT=$(kubectl -n zitadel get secret iam-admin-pat \\");
println!(" -o jsonpath='{{.data.pat}}' | base64 -d)");
println!();
println!(" cargo run -p example-fleet-rpi-setup -- \\");
println!(" --pi-host <PI_IP> \\");
println!(" --bootstrap-token \"$PAT\" \\");
println!(" --zitadel-issuer-url {zitadel} \\");
println!(" --zitadel-project-id {} \\", self.project_id);
println!(" --nats-url {nats} \\");
println!(" --agent-binary <path-to-aarch64-fleet-agent>");
println!();
println!("============================================================");
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn domain_config_derives_hostnames() {
let d = FleetDomainConfig::new("customer1.nationtech.io");
assert_eq!(d.zitadel_host(), "zitadel.customer1.nationtech.io");
assert_eq!(d.nats_wss_host(), "nats.customer1.nationtech.io");
assert_eq!(
d.zitadel_issuer_url(),
"https://zitadel.customer1.nationtech.io"
);
assert_eq!(d.nats_wss_url(), "wss://nats.customer1.nationtech.io/");
}
#[test]
fn nats_values_render_includes_wss_ingress_and_auth_callout() {
let d = FleetDomainConfig::new("acme.io");
let yaml = render_nats_values(&d, "ABCDEF", "auth-pass", "sys-pass");
// WSS plumbing.
assert!(yaml.contains("websocket:"));
assert!(yaml.contains("port: 8443"));
assert!(yaml.contains("nats.acme.io"));
// OKD edge-TLS annotations.
assert!(yaml.contains("openshift-default"));
assert!(yaml.contains("route.openshift.io/termination: edge"));
// Auth callout wired through with the issuer pubkey.
assert!(yaml.contains("auth_callout"));
assert!(yaml.contains("issuer: ABCDEF"));
assert!(yaml.contains("auth_users: [ auth ]"));
assert!(yaml.contains("system_account: SYS"));
// Account user.
assert!(yaml.contains("password: \"auth-pass\""));
}
#[test]
fn nats_values_inline_account_block_under_merge() {
// Prevent regressions where the auth_callout block leaks
// outside the `merge:` indentation level — chart expects it
// under config.merge.
let d = FleetDomainConfig::new("x.io");
let yaml = render_nats_values(&d, "K", "p", "s");
let idx_merge = yaml.find("\n merge:\n").expect("merge block present");
let idx_callout = yaml.find("auth_callout:").expect("auth_callout present");
assert!(idx_callout > idx_merge, "auth_callout must follow merge:");
}
}

View File

@@ -0,0 +1,71 @@
//! `cargo run -p example-fleet-staging-deploy -- --base-domain customer1.nationtech.io ...`
//!
//! Operator-side, run-once-per-customer-instance harness. Brings up
//! the central fleet platform services (Zitadel + NATS + auth callout)
//! against an OKD/K8s cluster pointed to by `KUBECONFIG`. Prints the
//! exact follow-up command the operator runs against a Pi to onboard
//! the first device.
//!
//! See `src/lib.rs` for the architectural notes.
use anyhow::{Context, Result};
use clap::Parser;
use example_fleet_staging_deploy::{FleetDomainConfig, StagingDeployOpts, bring_up_staging};
#[derive(Parser, Debug)]
#[command(
name = "fleet-staging-deploy",
about = "Deploy Zitadel + NATS + auth callout onto an OKD cluster"
)]
struct Cli {
/// Base DNS domain. All cluster-visible services derive from this:
/// `zitadel.<base>`, `nats.<base>`. The customer's wildcard cert /
/// CoreDNS / DNS provider must already point this at the cluster.
#[arg(long, env = "FLEET_BASE_DOMAIN")]
base_domain: String,
/// kubeconfig context to deploy against. Defaults to the
/// kubeconfig's current-context. Set this when your kubeconfig
/// has multiple contexts and you don't want to rely on the
/// global current.
#[arg(long, env = "FLEET_KUBE_CONTEXT")]
kube_context: Option<String>,
/// Container image reference for the harmony-nats-callout binary.
/// The cluster pulls this; operator must have pushed it before
/// running the deploy. Defaults to a quay.io path that the
/// customer should override per their registry.
#[arg(
long,
env = "FLEET_CALLOUT_IMAGE",
default_value = "quay.io/nationtech/harmony-nats-callout:demo"
)]
callout_image: String,
/// Password for the NATS service-account user the callout uses on
/// its own NATS connection. Stored in a K8s secret + listed in
/// the chart's `accounts.DEVICES.users` (which bypass callout —
/// otherwise the callout would deadlock authenticating itself).
#[arg(long, env = "FLEET_NATS_AUTH_PASS")]
nats_auth_pass: String,
/// Password for the NATS SYS account (used for nats-box debugging
/// inside the cluster).
#[arg(long, env = "FLEET_NATS_SYSTEM_PASS")]
nats_system_pass: String,
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let domain = FleetDomainConfig::new(cli.base_domain);
let handles = bring_up_staging(StagingDeployOpts {
domain,
kubeconfig_context: cli.kube_context,
callout_image: cli.callout_image,
nats_auth_pass: cli.nats_auth_pass,
nats_system_pass: cli.nats_system_pass,
})
.await
.context("staging deploy")?;
handles.print_next_steps();
Ok(())
}

View File

@@ -0,0 +1,25 @@
[package]
name = "example_fleet_staging_install"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
description = "Production-shape fleet install: Zitadel + NATS + auth callout + operator on OKD"
[[bin]]
name = "fleet_staging_install"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony-k8s = { path = "../../harmony-k8s" }
harmony-nats-callout = { path = "../../nats/callout" }
harmony-fleet-deploy = { path = "../../fleet/harmony-fleet-deploy" }
nkeys = "0.4"
rand = "0.9"
anyhow.workspace = true
clap = { version = "4", features = ["derive", "env"] }
tokio.workspace = true
log.workspace = true
env_logger.workspace = true

View File

@@ -0,0 +1,433 @@
//! Production-shape fleet install for OKD (or any cluster with the
//! same capabilities). Composes:
//!
//! 1. Zitadel + Postgres helm install in `--zitadel-namespace`,
//! edge-TLS Route at `sso-staging.<base>` via cert-manager.
//! 2. ZitadelSetupScore in the same call so we have the
//! `fleet-operator` machine key BEFORE the operator pod starts.
//! 3. Single-instance NATS (JetStream) in `--fleet-namespace` with
//! the auth_callout block wired to the callout's issuer NKey
//! pubkey + WebSocket listener (no_tls — Route owns TLS).
//! 4. NATS WebSocket Route at `nats-fleet-staging.<base>`,
//! edge-TLS, cert-manager-managed cert.
//! 5. NatsAuthCalloutScore deployment (Secret-based env vars only,
//! no volume mounts — OKD restricted-v2 SCC compat).
//! 6. FleetOperatorScore with credentials TOML inlining the
//! `fleet-operator` JSON keyfile (env-var-from-Secret only).
//!
//! One required CLI flag — `--base-domain` — drives every public
//! hostname. Per-cluster overrides for the cluster issuer name and
//! image refs follow.
//!
//! Usage:
//!
//! ```text
//! KUBECONFIG=$ADMIN_KUBECONFIG cargo run -p example_fleet_staging_install -- \
//! --base-domain cb1.nationtech.io \
//! --operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev \
//! --callout-image hub.nationtech.io/harmony/harmony-nats-callout:dev
//! ```
use anyhow::{Context, Result};
use clap::Parser;
use harmony::inventory::Inventory;
use harmony::modules::nats::capability::NatsCluster;
use harmony::modules::nats::score_nats_k8s::{AuthCalloutCfg, NatsK8sScore, WebSocketRouteCfg};
use harmony::modules::nats_auth_callout::NatsAuthCalloutScore;
use harmony::modules::zitadel::{
MachineKeyType, ZitadelApiApp, ZitadelAppType, ZitadelApplication, ZitadelClientConfig,
ZitadelMachineUser, ZitadelRole, ZitadelScore, ZitadelSetupScore,
};
use harmony::score::Score;
use harmony::topology::{K8sAnywhereTopology, Topology};
use harmony_fleet_deploy::{FleetOperatorScore, OperatorCredentials};
use harmony_k8s::KubernetesDistribution;
use nkeys::KeyPair;
#[derive(Parser, Debug)]
#[command(
name = "fleet_staging_install",
about = "Install fleet staging stack (Zitadel + NATS + callout + operator) on OKD"
)]
struct Cli {
/// Cluster's public base domain. Hostnames are derived from it:
/// sso-staging.<base> ← Zitadel
/// nats-fleet-staging.<base> ← NATS WebSocket
///
/// To deploy on a different cluster, change this and re-run.
#[arg(long)]
base_domain: String,
/// cert-manager `ClusterIssuer` name. Drives the
/// `cert-manager.io/cluster-issuer` annotation on the Zitadel
/// and NATS Routes. Override per cluster if your operator uses
/// a different issuer name.
#[arg(long, default_value = "letsencrypt-prod")]
cluster_issuer: String,
/// Namespace for NATS, callout, operator.
#[arg(long, default_value = "fleet-staging")]
fleet_namespace: String,
/// Namespace for Zitadel + Postgres.
#[arg(long, default_value = "zitadel-staging")]
zitadel_namespace: String,
/// Operator container image (`repository:tag`). Public on
/// hub.nationtech.io for the demo; ImagePullSecret for that
/// registry must already be present in `--fleet-namespace`.
#[arg(long)]
operator_image: String,
/// Auth callout container image (`repository:tag`).
#[arg(long)]
callout_image: String,
/// NATS account name auth-callout-issued users land in. Must
/// match the NATS Helm `auth_callout.account` field. Default
/// `FLEET` matches the rest of the staging conventions.
#[arg(long, default_value = "FLEET")]
nats_account: String,
/// Zitadel chart version pin.
#[arg(long, default_value = "v4.12.1")]
zitadel_version: String,
/// Project name created inside Zitadel for fleet auth.
#[arg(long, default_value = "fleet")]
project_name: String,
/// Role name granting full admin (operator + manual ops). The
/// callout maps this role to `pub/sub: [">"]`.
#[arg(long, default_value = "fleet-admin")]
admin_role: String,
/// Role name granting per-device scoped permissions.
#[arg(long, default_value = "device")]
device_role: String,
/// Username of the operator's Zitadel machine user. Distinct
/// from `fleet-ops` (manual admin tooling) for audit trail.
#[arg(long, default_value = "fleet-operator")]
operator_username: String,
/// Username of the manual-admin Zitadel machine user (the one
/// you mint tokens with from your laptop).
#[arg(long, default_value = "fleet-ops")]
admin_username: String,
}
#[tokio::main]
async fn main() -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
.try_init()
.ok();
let cli = Cli::parse();
let topology = K8sAnywhereTopology::from_env();
topology.ensure_ready().await?;
let zitadel_host = format!("sso-stg.{}", cli.base_domain);
let nats_ws_host = format!("nats-fleet-stg.{}", cli.base_domain);
// ---- 1. Zitadel helm install ----------------------------------------
let zitadel = ZitadelScore {
host: zitadel_host.clone(),
zitadel_version: cli.zitadel_version.clone(),
external_secure: true,
external_port: None,
namespace: cli.zitadel_namespace.clone(),
cluster_issuer: cli.cluster_issuer.clone(),
};
log::info!(
"[1/6] Zitadel helm: ns={} host={}",
cli.zitadel_namespace,
zitadel_host
);
zitadel
.interpret(&Inventory::empty(), &topology)
.await
.context("Zitadel helm install")?;
// ---- 2. ZitadelSetupScore: project + roles + machine users ----------
// Run this BEFORE building the operator score so we have the
// `fleet-operator` machine key in hand when filling
// OperatorCredentials. The Score caches keys to
// ZitadelClientConfig on disk; we read them back here.
log::info!(
"[2/6] Zitadel setup: project={} admin={} operator={}",
cli.project_name,
cli.admin_username,
cli.operator_username
);
let api_app_name = "nats";
let cli_app_name = "harmony-cli";
let zitadel_setup = ZitadelSetupScore {
host: zitadel_host.clone(),
scheme: Default::default(),
port: None,
skip_tls: false,
endpoint: None,
admin_org_id: None,
namespace: cli.zitadel_namespace.clone(),
// Device-code OIDC app for human admin login from
// `fleet_device_enroll`'s SSO flow. Operators sign in here
// with their personal Zitadel account; their resulting
// access token is what `mint_device_credentials` uses to
// create per-device users + keys. The numeric `client_id`
// generated by Zitadel for this app is what gets passed to
// `--admin-oidc-client-id`; we read it back from the
// ZitadelClientConfig cache below and print it in the
// success banner.
applications: vec![ZitadelApplication {
project_name: cli.project_name.clone(),
app_name: cli_app_name.to_string(),
app_type: ZitadelAppType::DeviceCode,
}],
api_apps: vec![ZitadelApiApp {
project_name: cli.project_name.clone(),
app_name: api_app_name.to_string(),
}],
roles: vec![
ZitadelRole {
project_name: cli.project_name.clone(),
key: cli.admin_role.clone(),
display_name: "Fleet Admin".to_string(),
group: None,
},
ZitadelRole {
project_name: cli.project_name.clone(),
key: cli.device_role.clone(),
display_name: "Device".to_string(),
group: None,
},
],
machine_users: vec![
ZitadelMachineUser {
username: cli.admin_username.clone(),
name: "Fleet Operations".to_string(),
create_pat: false,
machine_key: Some(MachineKeyType::Json),
project_name: Some(cli.project_name.clone()),
grant_roles: vec![cli.admin_role.clone()],
},
ZitadelMachineUser {
username: cli.operator_username.clone(),
name: "Fleet Operator (in-cluster)".to_string(),
create_pat: false,
machine_key: Some(MachineKeyType::Json),
project_name: Some(cli.project_name.clone()),
grant_roles: vec![cli.admin_role.clone()],
},
],
};
zitadel_setup
.interpret(&Inventory::empty(), &topology)
.await
.context("Zitadel setup (project + roles + machine users)")?;
// Read back the project_id + operator key from cache.
let zcfg = ZitadelClientConfig::load()
.context("ZitadelSetupScore did not produce a client config cache")?;
let project_id = zcfg
.project_id_by_name(&cli.project_name)
.or(zcfg.project_id.as_ref())
.context("project_id missing from cache after setup")?
.clone();
let operator_machine_key = zcfg
.machine_key(&cli.operator_username)
.with_context(|| {
format!(
"machine key for {} missing from cache after setup",
cli.operator_username
)
})?
.clone();
let cli_client_id = zcfg
.client_id(cli_app_name)
.with_context(|| {
format!(
"OIDC client_id for app '{cli_app_name}' missing from cache — \
ZitadelSetupScore should have created the app and populated \
ZitadelClientConfig.apps"
)
})?
.clone();
log::info!("[2/6] project_id resolved: {project_id}");
log::info!("[2/6] device-code client_id for '{cli_app_name}' resolved: {cli_client_id}");
// ---- 3. Issuer NKey + auth callout pieces ---------------------------
// The callout signs user JWTs with this account NKey. NATS server
// is configured with the matching pubkey via the auth_callout
// block in the helm values rendered by NatsK8sScore.
let issuer_kp = KeyPair::new_account();
let issuer_seed = issuer_kp
.seed()
.map_err(|e| anyhow::anyhow!("issuer NKey seed: {e}"))?;
let issuer_pubkey = issuer_kp.public_key();
let nats_auth_user = "auth";
let nats_auth_pass = generate_alphanum(24);
// ---- 4. NATS install ------------------------------------------------
let nats_release = "fleet-nats";
log::info!(
"[3/6] NATS install: ns={} release={} ws={}",
cli.fleet_namespace,
nats_release,
nats_ws_host
);
let nats_cluster = NatsCluster {
namespace: cli.fleet_namespace.clone(),
// `domain` is unused in single-instance mode (gateway off).
// Kept here for the legacy supercluster code path which the
// staging install doesn't take.
domain: cli.base_domain.clone(),
replicas: 1,
name: nats_release.to_string(),
gateway_advertise: String::new(),
dns_name: nats_ws_host.clone(),
// Static-string fields the NatsCluster shape requires; only
// referenced when `gateway` is Some, which it isn't here.
supercluster_ca_secret_name: "fleet-nats-supercluster-ca",
tls_cert_name: "fleet-nats-tls",
jetstream_enabled: "true",
};
let nats = NatsK8sScore {
distribution: KubernetesDistribution::OpenshiftFamily,
cluster: nats_cluster,
peers: None,
ca_bundle: None,
gateway: None, // single-instance — drop the gateway block
auth_callout: Some(AuthCalloutCfg {
issuer_pubkey: issuer_pubkey.clone(),
auth_user: nats_auth_user.to_string(),
auth_pass: nats_auth_pass.clone(),
account: cli.nats_account.clone(),
}),
websocket: Some(WebSocketRouteCfg {
host: nats_ws_host.clone(),
cluster_issuer: cli.cluster_issuer.clone(),
}),
};
nats.interpret(&Inventory::empty(), &topology)
.await
.context("NATS install (single-instance + auth_callout + WS Route)")?;
// ---- 5. Auth callout deployment -------------------------------------
log::info!(
"[4/6] Auth callout: image={} project_id={}",
cli.callout_image,
project_id
);
let mut callout = NatsAuthCalloutScore::new(
"fleet-callout",
&cli.fleet_namespace,
format!(
"nats://{nats_release}.{}.svc.cluster.local:4222",
cli.fleet_namespace
),
format!("https://{zitadel_host}"),
project_id.clone(),
nats_auth_user,
&nats_auth_pass,
&issuer_seed,
)
.image(&cli.callout_image)
.target_account(&cli.nats_account)
.admin_role(&cli.admin_role)
.device_role(&cli.device_role)
.danger_accept_invalid_certs(false);
callout.device_id_claim = "client_id".to_string();
callout.device_id_prefix_strip = "device-".to_string();
callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles");
callout
.interpret(&Inventory::empty(), &topology)
.await
.context("auth callout deploy")?;
// ---- 6. Operator deployment with credentials ------------------------
log::info!("[5/6] Operator: image={}", cli.operator_image);
// `key_json` MUST use TOML literal multi-line strings (`'''...'''`),
// not basic multi-line (`"""..."""`). Basic strings interpret
// backslash escapes, which corrupts the JSON keyfile: every `\n`
// inside the embedded RSA private key gets expanded to a literal
// newline (0x0A) before JSON parsing sees it, and JSON disallows
// raw control chars inside strings ("control character found while
// parsing a string"). Literal strings preserve `\n` as-is so the
// downstream JSON parser interprets it as an escape and decodes
// the multi-line PEM correctly.
let credentials_toml = format!(
r#"type = "zitadel-jwt"
oidc_issuer_url = "https://{zitadel_host}"
audience = "{project_id}"
key_json = '''{operator_key}'''
"#,
zitadel_host = zitadel_host,
project_id = project_id,
operator_key = operator_machine_key,
);
let mut operator = FleetOperatorScore::new()
.namespace(&cli.fleet_namespace)
.release_name("harmony-fleet-operator")
.image(&cli.operator_image)
.image_pull_policy("Always")
.nats_url(format!(
"nats://{nats_release}.{}.svc.cluster.local:4222",
cli.fleet_namespace
))
.log_level("info,kube_runtime=warn");
operator.credentials = Some(OperatorCredentials { credentials_toml });
operator
.interpret(&Inventory::empty(), &topology)
.await
.context("operator deploy")?;
log::info!("[6/6] Stack installed.");
println!("\n=== fleet-staging install complete ===");
println!("Zitadel: https://{zitadel_host}/");
println!("NATS WS public: wss://{nats_ws_host}/");
println!(
"NATS in-cluster: nats://{nats_release}.{}.svc.cluster.local:4222",
cli.fleet_namespace
);
println!(
"Operator: oc -n {} get deploy/harmony-fleet-operator",
cli.fleet_namespace
);
println!(
"Auth callout: oc -n {} get deploy/fleet-callout",
cli.fleet_namespace
);
println!("Project id: {project_id}");
println!(
"Admin user: {} (machine key in ~/.local/share/harmony/zitadel/client-config.json)",
cli.admin_username
);
println!(
"Operator user: {} (machine key embedded in operator's Secret)",
cli.operator_username
);
println!("SSO client_id: {cli_client_id} (app '{cli_app_name}', device-code grant)");
println!();
println!("To enroll a device, pass the SSO client_id explicitly:");
println!(
" fleet_device_enroll \\\n \
--target ssh://<user>@<device> \\\n \
--issuer-url https://{zitadel_host} \\\n \
--audience {project_id} \\\n \
--nats-url wss://{nats_ws_host} \\\n \
--admin-oidc-client-id {cli_client_id} \\\n \
--agent-binary <path>"
);
Ok(())
}
fn generate_alphanum(len: usize) -> String {
use rand::Rng;
const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
let mut rng = rand::rng();
(0..len)
.map(|_| CHARSET[rng.random_range(0..CHARSET.len())] as char)
.collect()
}

View File

@@ -0,0 +1,18 @@
[package]
name = "example_fleet_vm_setup"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "fleet_vm_setup"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony", features = ["kvm"] }
harmony_types = { path = "../../harmony_types" }
tokio.workspace = true
log.workspace = true
env_logger.workspace = true
anyhow.workspace = true
clap.workspace = true

View File

@@ -0,0 +1,69 @@
# example_iot_vm_setup
End-to-end driver for the IoT walking-skeleton VM-as-device flow. Runs two
Harmony Scores in sequence:
1. **`KvmVmScore`** — provision a libvirt VM from an Ubuntu 24.04 cloud
image with a cloud-init seed ISO that authorizes one SSH key. Returns
the booted VM's IP.
2. **`FleetDeviceSetupScore`** — SSH into the VM (via the Ansible-backed
`HostConfigurationProvider`) and install podman + the `fleet-agent`
binary, drop the TOML config, bring up the systemd unit.
After a successful run, the VM is a fleet member reporting to NATS under
the `--device-id` you chose, carrying the `--group` label you passed.
## One-time setup
```bash
WORK=/var/tmp/harmony-iot-smoke
mkdir -p "$WORK/ssh"
# 1. Ubuntu 24.04 cloud image (~700 MB) — cached between runs.
curl -o "$WORK/ubuntu-24.04-server-cloudimg-amd64.img" \
https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img
# 2. SSH keypair the VM will trust.
ssh-keygen -t ed25519 -N '' -f "$WORK/ssh/id_ed25519"
# 3. Runtime deps — Harmony self-installs Ansible into a managed venv
# under $HARMONY_DATA_DIR/ansible-venv on first run, so you only need
# python3 + venv on the runner. No system-wide `ansible` needed.
# On Arch:
# sudo pacman -S libvirt qemu-full xorriso python
# On Debian/Ubuntu:
# sudo apt install libvirt-daemon-system qemu-kvm xorriso python3 python3-venv
# 4. libvirt default network.
sudo virsh net-start default
sudo virsh net-autostart default
```
## Run
```bash
cargo build -p fleet-agent-v0
cargo run -p example_iot_vm_setup -- \
--base-image /var/tmp/harmony-iot-smoke/ubuntu-24.04-server-cloudimg-amd64.img \
--ssh-pubkey /var/tmp/harmony-iot-smoke/ssh/id_ed25519.pub \
--ssh-privkey /var/tmp/harmony-iot-smoke/ssh/id_ed25519 \
--work-dir /var/tmp/harmony-iot-smoke \
--agent-binary target/debug/fleet-agent-v0 \
--nats-url nats://192.168.122.1:4222
```
## Changing groups
Re-running with a different `--group` rewrites
`/etc/fleet-agent/config.toml` on the VM and restarts the agent. The VM
itself is untouched.
```bash
cargo run -p example_iot_vm_setup -- ... --group group-b
```
## Full end-to-end via smoke test
See `fleet/scripts/smoke-a3.sh` — stands up NATS in a podman container,
runs this example, asserts the agent's status lands in NATS.

View File

@@ -0,0 +1,284 @@
//! End-to-end driver for the IoT walking-skeleton VM-as-device flow.
//!
//! Runs two Scores back-to-back:
//! 1. `ProvisionVmScore` — bound to the generic `VirtualMachineHost`
//! capability. Here we satisfy it with `KvmVirtualMachineHost`
//! (libvirt). Swapping to VMware/Proxmox/cloud would be a
//! different topology injection with the same Score code.
//! 2. `FleetDeviceSetupScore` — SSHes into the booted VM and installs
//! podman + fleet-agent via the split Linux-host capabilities.
use anyhow::{Context, Result};
use clap::Parser;
use harmony::inventory::Inventory;
use harmony::modules::fleet::{
FleetDeviceSetupConfig, FleetDeviceSetupScore, ProvisionVmScore,
check_fleet_smoke_preflight_for_arch, ensure_fleet_ssh_keypair,
};
use harmony::modules::kvm::KvmVirtualMachineHost;
use harmony::modules::kvm::config::init_executor;
use harmony::modules::linux::{LinuxHostTopology, SshCredentials};
use harmony::topology::{VirtualMachineSpec, VmArchitecture, VmFirstBootConfig};
use harmony_types::id::Id;
use std::path::PathBuf;
#[derive(Parser, Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
enum CliArch {
/// Native KVM on x86_64 hosts.
X86_64,
/// Aarch64 guest. Runs on native KVM on arm64 hosts and under
/// qemu-system-aarch64 TCG emulation on x86_64 hosts (slower).
Aarch64,
}
impl From<CliArch> for VmArchitecture {
fn from(a: CliArch) -> Self {
match a {
CliArch::X86_64 => VmArchitecture::X86_64,
CliArch::Aarch64 => VmArchitecture::Aarch64,
}
}
}
#[derive(Parser, Debug)]
#[command(
name = "fleet_vm_setup",
about = "Provision one VM + onboard it into the IoT fleet"
)]
struct Cli {
/// Guest CPU architecture. Selects the cloud image, qemu
/// emulator, and firmware model.
#[arg(long, value_enum, default_value_t = CliArch::X86_64)]
arch: CliArch,
/// libvirt domain name for the VM.
#[arg(long, default_value = "fleet-vm-01")]
vm_name: String,
/// Device id the agent will announce to NATS. Defaults to a
/// fresh `Id` (hex timestamp + random suffix).
#[arg(long)]
device_id: Option<String>,
/// Routing labels to write into the agent's TOML config.
/// Comma-separated list of `key=value` pairs. Published in every
/// DeviceInfo heartbeat; the operator resolves Deployment
/// `spec.targetSelector` against this map. At least one label
/// is required so the device is targetable — the default
/// `group=group-a` satisfies that.
#[arg(long, default_value = "group=group-a")]
labels: String,
/// libvirt network name to attach the VM to.
#[arg(long, default_value = "default")]
network: String,
/// Admin username created on first boot.
#[arg(long, default_value = "fleet-admin")]
admin_user: String,
/// Optional plaintext password for the admin user. Enables SSH
/// password auth on the guest — intended for interactive
/// debugging / reliability-testing sessions where the operator
/// wants to break things on purpose. Leave unset for key-only
/// auth (production default).
#[arg(long, env = "FLEET_VM_ADMIN_PASSWORD")]
admin_password: Option<String>,
/// Path to the cross-compiled fleet-agent binary.
/// Required unless `--bootstrap-only` is set.
#[arg(long)]
agent_binary: Option<PathBuf>,
/// NATS URL the agent should connect to.
#[arg(long, default_value = "nats://192.168.122.1:4222")]
nats_url: String,
#[arg(long, default_value = "smoke")]
nats_user: String,
#[arg(long, default_value = "smoke")]
nats_pass: String,
/// Only run the VM-provisioning step; skip device setup.
#[arg(long)]
only_vm: bool,
/// Run preflight + asset bootstrap (ansible venv, cloud image,
/// SSH key, libvirt pool) and exit.
#[arg(long)]
bootstrap_only: bool,
/// Virtual disk size in GiB. The stock Ubuntu cloud image has
/// only ~2 GiB of root — resized on first boot by
/// cloud-initramfs-growroot. Bump this to 16 GiB by default so
/// podman can sideload a couple of container images without
/// running out of space.
#[arg(long, default_value_t = 16)]
disk_size_gb: u32,
}
#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();
let cli = Cli::parse();
let arch: VmArchitecture = cli.arch.into();
check_fleet_smoke_preflight_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("{e}"))?;
if cli.bootstrap_only {
harmony::modules::linux::ensure_ansible_venv()
.await
.map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?;
harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("cloud image: {e}"))?;
ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
harmony::modules::fleet::ensure_harmony_fleet_pool()
.await
.map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
println!("bootstrap complete");
return Ok(());
}
// --- Step 1: provision the VM ---
let base_image = harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("cloud image: {e}"))?;
let pool = harmony::modules::fleet::ensure_harmony_fleet_pool()
.await
.map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
let ssh = ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
let authorized_key = harmony::modules::fleet::read_public_key(&ssh)
.await
.map_err(|e| anyhow::anyhow!("read ssh pubkey: {e}"))?;
let executor = init_executor().map_err(|e| anyhow::anyhow!("KVM init: {e}"))?;
let vm_host = KvmVirtualMachineHost::new(
"kvm-local",
executor,
pool.name.clone(),
pool.path.clone(),
base_image,
);
let vm_score = ProvisionVmScore {
spec: VirtualMachineSpec {
name: cli.vm_name.clone(),
architecture: arch,
cpus: 2,
memory_mib: 2048,
disk_size_gb: Some(cli.disk_size_gb),
network: cli.network.clone(),
first_boot: Some(VmFirstBootConfig {
hostname: Some(cli.vm_name.clone()),
admin_user: Some(cli.admin_user.clone()),
authorized_keys: vec![authorized_key],
admin_password: cli.admin_password.clone(),
}),
},
};
let vm_ip = run_vm_score(&vm_score, &vm_host).await?;
println!("VM '{}' up at {vm_ip}", cli.vm_name);
if cli.only_vm {
return Ok(());
}
// --- Step 2: onboard the VM into the fleet ---
let agent_binary = cli
.agent_binary
.clone()
.context("--agent-binary is required (e.g. target/release/fleet-agent-v0)")?;
let device_id = cli
.device_id
.clone()
.map(Id::from)
.unwrap_or_else(Id::default);
let linux_topology = LinuxHostTopology::new(
format!("linux-{}", cli.vm_name),
vm_ip.parse().context("VM IP is not a valid IP address")?,
SshCredentials {
user: cli.admin_user.clone(),
private_key_path: ssh.private_key.clone(),
remote_python: Some("/usr/bin/python3".to_string()),
sudo_password: None,
},
);
let labels = parse_labels(&cli.labels)?;
let labels_display = labels
.iter()
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join(",");
let setup_score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig {
device_id: device_id.clone(),
labels,
nats_urls: vec![cli.nats_url.clone()],
// VM smoke harness keeps shared-creds for v0; the customer-
// facing Pi flow uses Zitadel JWT (see fleet_rpi_setup).
auth: harmony::modules::fleet::FleetDeviceAuth::TomlShared {
nats_user: cli.nats_user.clone(),
nats_pass: cli.nats_pass.clone(),
},
agent_binary_path: agent_binary,
hosts_entries: vec![],
});
run_setup_score(&setup_score, &linux_topology).await?;
println!("device '{device_id}' ({labels_display}) onboarded via {vm_ip}");
Ok(())
}
/// Parse `key=value,key=value` into a BTreeMap. Errors on any
/// malformed chunk, empty keys/values, or an empty map overall —
/// a device with no labels is practically untargetable, so we'd
/// rather fail at the CLI than silently onboard a ghost.
fn parse_labels(raw: &str) -> anyhow::Result<std::collections::BTreeMap<String, String>> {
let mut out = std::collections::BTreeMap::new();
for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) {
let (k, v) = piece
.split_once('=')
.ok_or_else(|| anyhow::anyhow!("label chunk '{piece}' missing '='"))?;
let k = k.trim();
let v = v.trim();
if k.is_empty() || v.is_empty() {
anyhow::bail!("label chunk '{piece}' has empty key or value");
}
out.insert(k.to_string(), v.to_string());
}
if out.is_empty() {
anyhow::bail!("--labels must include at least one key=value pair");
}
Ok(out)
}
async fn run_vm_score(
score: &ProvisionVmScore,
topology: &KvmVirtualMachineHost,
) -> Result<String> {
use harmony::score::Score;
let inventory = Inventory::empty();
let interpret = Score::<KvmVirtualMachineHost>::create_interpret(score);
let outcome = interpret
.execute(&inventory, topology)
.await
.map_err(|e| anyhow::anyhow!("ProvisionVmScore execute: {e}"))?;
for d in &outcome.details {
if let Some(v) = d.strip_prefix("ip=") {
return Ok(v.to_string());
}
}
anyhow::bail!("ProvisionVmScore finished without reporting an IP: {outcome:?}")
}
async fn run_setup_score(
score: &FleetDeviceSetupScore,
topology: &LinuxHostTopology,
) -> Result<()> {
use harmony::score::Score;
let inventory = Inventory::empty();
let interpret = Score::<LinuxHostTopology>::create_interpret(score);
let outcome = interpret
.execute(&inventory, topology)
.await
.map_err(|e| anyhow::anyhow!("FleetDeviceSetupScore execute: {e}"))?;
println!("setup: {} ({:?})", outcome.message, outcome.details);
Ok(())
}

View File

@@ -0,0 +1,18 @@
[package]
name = "example_harmony_apply_deployment"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "harmony_apply_deployment"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony", default-features = false, features = ["podman"] }
kube = { workspace = true, features = ["runtime", "derive"] }
k8s-openapi = { workspace = true }
serde_json.workspace = true
tokio.workspace = true
anyhow.workspace = true
clap.workspace = true

View File

@@ -0,0 +1,239 @@
//! Typed-Rust applier for the harmony fleet `Deployment` CR.
//!
//! Builds a `Deployment` CR via the typed `DeploymentSpec` +
//! `PodmanV0Score` + `kube::Api`, then either applies it directly
//! through the kube client or prints it to stdout so the user can
//! pipe into `kubectl apply -f -`.
//!
//! The CRD is domain-agnostic — it's "declarative reconcile intent
//! for a set of devices matched by label selector," which is the
//! same shape whether the fleet is Pi podman, OKD clusters, or
//! KVM VMs. The name `harmony_apply_deployment` reflects that
//! (not `iot_`-anything), in line with the review call to position
//! the operator as a generic fleet/reconcile tool.
//!
//! The CRD types live in `harmony::modules::fleet::operator`; the score types
//! live in `harmony::modules::podman` (PodmanV0 being the first
//! reconciler variant — future variants drop in alongside).
//!
//! Typical demo-driver usage:
//!
//! # apply an nginx deployment
//! cargo run -q -p example_harmony_apply_deployment -- \
//! --target-device fleet-smoke-vm-arm \
//! --image nginx:latest
//!
//! # print the CR JSON (lets the user kubectl-apply it manually)
//! cargo run -q -p example_harmony_apply_deployment -- \
//! --target-device fleet-smoke-vm-arm \
//! --image nginx:latest --print | kubectl apply -f -
//!
//! # upgrade the same deployment to a newer image
//! cargo run -q -p example_harmony_apply_deployment -- \
//! --target-device fleet-smoke-vm-arm \
//! --image nginx:1.26
//!
//! # delete the deployment
//! cargo run -q -p example_harmony_apply_deployment -- --delete
use anyhow::{Context, Result};
use clap::Parser;
use harmony::modules::fleet::operator::crd::{
Deployment, DeploymentSpec, Rollout, RolloutStrategy,
};
use harmony::modules::podman::{PodmanService, PodmanV0Score, ReconcileScore};
use harmony::topology::{EnvVar, RestartPolicy, VolumeMount};
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
use kube::Client;
use kube::api::{Api, DeleteParams, Patch, PatchParams};
use std::collections::BTreeMap;
#[derive(Parser, Debug)]
#[command(
name = "harmony_apply_deployment",
about = "Build + apply a harmony fleet Deployment CR from typed Rust (no yaml)"
)]
struct Cli {
/// Kubernetes namespace for the Deployment CR.
#[arg(long, default_value = "fleet-demo")]
namespace: String,
/// Deployment CR name. Also used as the KV key suffix and
/// podman container name on the device.
#[arg(long, default_value = "hello-world")]
name: String,
/// Shortcut: if set, picks a single device by id. Shorthand for
/// `--selector device-id=<target_device>` — the agent publishes
/// a `device-id=<id>` label on its DeviceInfo by default so this
/// works without any cluster-side label pre-wiring.
#[arg(long, default_value = "fleet-smoke-vm")]
target_device: String,
/// Repeatable `key=value` label selector. Takes precedence over
/// `--target-device` when provided. All pairs AND together.
#[arg(long = "selector", value_name = "KEY=VALUE")]
selectors: Vec<String>,
/// Container image to run.
#[arg(long, default_value = "docker.io/library/nginx:latest")]
image: String,
/// `host:container` port mapping exposed on the device.
#[arg(long, default_value = "8080:80")]
port: String,
/// Repeatable `KEY=VALUE` env var injected into the container.
#[arg(long = "env", value_name = "KEY=VALUE")]
envs: Vec<String>,
/// Repeatable bind-mount in `host_path:container_path[:ro]` form.
/// Append `:ro` for read-only.
#[arg(long = "volume", value_name = "HOST:CONTAINER[:ro]")]
volumes: Vec<String>,
/// Container restart policy.
#[arg(long, value_enum, default_value_t = CliRestart::UnlessStopped)]
restart: CliRestart,
/// Delete the Deployment CR instead of applying it.
#[arg(long)]
delete: bool,
/// Print the CR as JSON to stdout instead of applying it.
/// Useful for piping into `kubectl apply -f -`.
#[arg(long)]
print: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let cr = build_cr(&cli);
if cli.print {
println!("{}", serde_json::to_string_pretty(&cr)?);
return Ok(());
}
let client = Client::try_default()
.await
.context("building kube client (is KUBECONFIG set?)")?;
let api: Api<Deployment> = Api::namespaced(client, &cli.namespace);
if cli.delete {
match api.delete(&cli.name, &DeleteParams::default()).await {
Ok(_) => println!("deleted deployment '{}/{}'", cli.namespace, cli.name),
Err(kube::Error::Api(ae)) if ae.code == 404 => {
println!(
"deployment '{}/{}' not found (already gone)",
cli.namespace, cli.name
)
}
Err(e) => anyhow::bail!("delete failed: {e}"),
}
return Ok(());
}
// Server-side apply so repeated invocations (upgrades) patch
// the existing CR instead of erroring with "already exists."
let params = PatchParams::apply("harmony-apply-deployment").force();
let applied = api
.patch(&cli.name, &params, &Patch::Apply(&cr))
.await
.context("applying Deployment CR")?;
let meta = applied.metadata;
println!(
"applied deployment '{}/{}' (resourceVersion={}, image={})",
cli.namespace,
meta.name.as_deref().unwrap_or("?"),
meta.resource_version.as_deref().unwrap_or("?"),
cli.image,
);
Ok(())
}
/// Mirrors `harmony::topology::RestartPolicy` so we can keep the CLI
/// schema stable even if the underlying enum gains variants.
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
enum CliRestart {
No,
UnlessStopped,
OnFailure,
Always,
}
impl From<CliRestart> for RestartPolicy {
fn from(c: CliRestart) -> Self {
match c {
CliRestart::No => RestartPolicy::No,
CliRestart::UnlessStopped => RestartPolicy::UnlessStopped,
CliRestart::OnFailure => RestartPolicy::OnFailure,
CliRestart::Always => RestartPolicy::Always,
}
}
}
fn parse_env(s: &str) -> Result<(String, String)> {
let (k, v) = s
.split_once('=')
.ok_or_else(|| anyhow::anyhow!("--env expects KEY=VALUE, got {s:?}"))?;
Ok((k.to_string(), v.to_string()))
}
fn parse_volume(s: &str) -> Result<VolumeMount> {
let parts: Vec<&str> = s.split(':').collect();
let (host, cont, ro) = match parts.as_slice() {
[host, cont] => (host, cont, false),
[host, cont, mode] if *mode == "ro" => (host, cont, true),
[host, cont, mode] if *mode == "rw" => (host, cont, false),
_ => anyhow::bail!("--volume expects HOST:CONTAINER[:ro|rw], got {s:?}"),
};
Ok(VolumeMount {
host_path: host.to_string(),
container_path: cont.to_string(),
read_only: ro,
})
}
fn build_cr(cli: &Cli) -> Deployment {
let env: Vec<EnvVar> = cli
.envs
.iter()
.map(|s| EnvVar::from(parse_env(s).expect("--env validated")))
.collect();
let volumes: Vec<VolumeMount> = cli
.volumes
.iter()
.map(|s| parse_volume(s).expect("--volume validated"))
.collect();
let score = PodmanV0Score {
services: vec![PodmanService {
name: cli.name.clone(),
image: cli.image.clone(),
ports: vec![cli.port.clone()],
env,
volumes,
restart_policy: cli.restart.into(),
}],
};
let payload = ReconcileScore::PodmanV0(score);
let mut match_labels = BTreeMap::new();
if cli.selectors.is_empty() {
match_labels.insert("device-id".to_string(), cli.target_device.clone());
} else {
for kv in &cli.selectors {
let (k, v) = kv
.split_once('=')
.unwrap_or_else(|| panic!("--selector expects KEY=VALUE, got '{kv}'"));
match_labels.insert(k.to_string(), v.to_string());
}
}
Deployment::new(
&cli.name,
DeploymentSpec {
target_selector: LabelSelector {
match_labels: Some(match_labels),
match_expressions: None,
},
score: payload,
rollout: Rollout {
strategy: RolloutStrategy::Immediate,
},
},
)
}

View File

@@ -118,6 +118,8 @@ async fn deploy_zitadel(k3d: &K3d) -> anyhow::Result<()> {
host: ZITADEL_HOST.to_string(),
zitadel_version: "v4.12.1".to_string(),
external_secure: false,
external_port: None,
..Default::default()
};
let topology = create_topology(k3d);
@@ -294,13 +296,19 @@ async fn main() -> anyhow::Result<()> {
// Provision Zitadel project + device-code application
ZitadelSetupScore {
host: ZITADEL_HOST.to_string(),
port: HTTP_PORT as u16,
scheme: Default::default(),
port: None,
skip_tls: true,
endpoint: Some(format!("http://127.0.0.1:{HTTP_PORT}")),
admin_org_id: None,
namespace: "zitadel".to_string(),
applications: vec![ZitadelApplication {
project_name: PROJECT_NAME.to_string(),
app_name: APP_NAME.to_string(),
app_type: ZitadelAppType::DeviceCode,
}],
api_apps: vec![],
roles: vec![],
machine_users: vec![],
}
.interpret(&Inventory::autoload(), &topology)

View File

@@ -27,7 +27,7 @@ async fn main() {
if drain {
let mut options = DrainOptions::default_ignore_daemonset_delete_emptydir_data();
options.timeout = Duration::from_secs(1);
k8s.drain_node(&node_name, &options).await.unwrap();
k8s.drain_node(node_name, &options).await.unwrap();
info!("Node {node_name} successfully drained");
}
@@ -49,7 +49,7 @@ async fn main() {
if reboot {
k8s.reboot_node(
&node_name,
node_name,
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
Duration::from_secs(3600),
)

View File

@@ -22,19 +22,19 @@ async fn main() {
let content = inquire::Text::new("File content").prompt().unwrap();
let node_file = NodeFile {
path: path,
content: content,
path,
content,
mode: 0o600,
};
k8s.write_files_to_node(&node, &vec![node_file.clone()])
k8s.write_files_to_node(node, std::slice::from_ref(&node_file))
.await
.unwrap();
let cmd = inquire::Text::new("Command to run on node")
.prompt()
.unwrap();
k8s.run_privileged_command_on_node(&node, &cmd)
k8s.run_privileged_command_on_node(node, &cmd)
.await
.unwrap();

View File

@@ -66,12 +66,12 @@ async fn main() {
Ok(_d) => println!("Deployment success"),
Err(e) => {
println!("Error creating deployment {}", e);
if let kube::Error::Api(error_response) = &e {
if error_response.code == http::StatusCode::CONFLICT.as_u16() {
if let kube::Error::Api(error_response) = &e
&& error_response.code == http::StatusCode::CONFLICT.as_u16()
{
println!("Already exists");
return;
}
}
panic!("{}", e)
}
};

View File

@@ -9,7 +9,7 @@ name = "kvm_okd_ha_cluster"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony" }
harmony = { path = "../../harmony", features = ["kvm"] }
tokio.workspace = true
log.workspace = true
env_logger.workspace = true

View File

@@ -9,7 +9,7 @@ name = "kvm-vm-examples"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony" }
harmony = { path = "../../harmony", features = ["kvm"] }
tokio.workspace = true
log.workspace = true
env_logger.workspace = true

View File

@@ -42,7 +42,7 @@
use clap::{Parser, Subcommand};
use harmony::modules::kvm::config::init_executor;
use harmony::modules::kvm::{
BootDevice, ForwardMode, KvmExecutor, NetworkConfig, NetworkRef, VmConfig, VmStatus,
BootDevice, ForwardMode, KvmExecutor, NetworkConfig, NetworkRef, VmConfig,
};
use log::info;
@@ -345,7 +345,7 @@ async fn status(executor: &KvmExecutor, scenario: &str) -> Result<(), Box<dyn st
}
};
println!("{:<20} {}", "VM", "STATUS");
println!("{:<20} STATUS", "VM");
println!("{}", "-".repeat(35));
for vm in &vms {
let status = match executor.vm_status(vm).await {

View File

@@ -22,9 +22,9 @@ async fn main() {
name: site_1_name.clone(),
gateway_advertise: format!("{site_1_name}-gw.{site_1_domain}:443"),
dns_name: format!("{site_1_name}-gw.{site_1_domain}"),
supercluster_ca_secret_name: supercluster_ca_secret_name,
tls_cert_name: tls_cert_name,
jetstream_enabled: jetstream_enabled,
supercluster_ca_secret_name,
tls_cert_name,
jetstream_enabled,
};
let site_2_name = "site-2".to_string();
@@ -38,9 +38,9 @@ async fn main() {
name: site_2_name.clone(),
gateway_advertise: format!("{site_2_name}-gw.{site_2_domain}:443"),
dns_name: format!("{site_2_name}-gw.{site_2_domain}"),
supercluster_ca_secret_name: supercluster_ca_secret_name,
tls_cert_name: tls_cert_name,
jetstream_enabled: jetstream_enabled,
supercluster_ca_secret_name,
tls_cert_name,
jetstream_enabled,
};
let site_3_name = "site-3".to_string();
@@ -54,9 +54,9 @@ async fn main() {
name: site_3_name.clone(),
gateway_advertise: format!("{site_3_name}-gw.{site_3_domain}:443"),
dns_name: format!("{site_3_name}-gw.{site_3_domain}"),
supercluster_ca_secret_name: supercluster_ca_secret_name,
tls_cert_name: tls_cert_name,
jetstream_enabled: jetstream_enabled,
supercluster_ca_secret_name,
tls_cert_name,
jetstream_enabled,
};
let clusters = vec![nats_site_1, nats_site_2, nats_site_3];

View File

@@ -253,10 +253,7 @@ async fn create_nats_certs<T: Topology + CertificateManagement>(
debug!("creating issuer '{}'", self_signed_issuer_name);
topology
.create_issuer(
self_signed_issuer_name.to_string(),
&self_signed_cert_config,
)
.create_issuer(self_signed_issuer_name.to_string(), self_signed_cert_config)
.await?;
debug!("creating certificate {root_ca_cert_name}");
@@ -294,7 +291,7 @@ async fn create_nats_certs<T: Topology + CertificateManagement>(
async fn build_ca_bundle_secret(
namespace: &str,
nats_cluster: &NatsCluster,
bundle: &Vec<String>,
bundle: &[String],
) -> Secret {
Secret {
metadata: ObjectMeta {
@@ -309,7 +306,7 @@ async fn build_ca_bundle_secret(
}
}
async fn build_secret_data(bundle: &Vec<String>) -> BTreeMap<String, ByteString> {
async fn build_secret_data(bundle: &[String]) -> BTreeMap<String, ByteString> {
let mut data = BTreeMap::new();
data.insert(
@@ -323,7 +320,7 @@ async fn build_secret_data(bundle: &Vec<String>) -> BTreeMap<String, ByteString>
async fn build_ca_bundle_secret_score<T: Topology + K8sclient + 'static>(
_topology: T,
nats_cluster: &NatsCluster,
ca_bundle: &Vec<String>,
ca_bundle: &[String],
namespace: String,
) -> Box<dyn Score<T>> {
let bundle_secret = build_ca_bundle_secret(&namespace, nats_cluster, ca_bundle).await;
@@ -343,6 +340,7 @@ async fn build_route_score<T: Topology + K8sclient + 'static>(
let route = OKDRouteScore {
name: cluster.name.to_string(),
namespace,
annotations: Default::default(),
spec: RouteSpec {
to: RouteTargetReference {
kind: "Service".to_string(),
@@ -383,6 +381,7 @@ async fn build_deploy_nats_score<T: Topology + HelmCommand + TlsRouter + 'static
let domain = topology.get_internal_domain().await.unwrap().unwrap();
// Inject gateway config into the 'merge' block to comply with chart structure
let tls_secret_name = format!("{}-tls", cluster.tls_cert_name);
let values_yaml = Some(format!(
r#"config:
merge:
@@ -455,7 +454,7 @@ natsBox:
domain = domain,
gateway_gateways = gateway_gateways,
gateway_advertise = cluster.gateway_advertise,
tls_secret_name = format!("{}-tls", cluster.tls_cert_name),
tls_secret_name = tls_secret_name,
jetstream_enabled = cluster.jetstream_enabled,
supercluster_ca_secret_name = cluster.supercluster_ca_secret_name,
));
@@ -463,7 +462,7 @@ natsBox:
debug!("Prepared Helm Chart values : \n{values_yaml:#?}");
let nats = HelmChartScore {
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
release_name: NonBlankString::from_str(&cluster.name).unwrap(),
release_name: NonBlankString::from_str(cluster.name).unwrap(),
chart_name: NonBlankString::from_str("nats/nats").unwrap(),
chart_version: None,
values_overrides: None,

View File

@@ -28,7 +28,7 @@ async fn main() {
receivers: vec![Box::new(DiscordWebhook {
name: K8sName("wills-discord-webhook-example".to_string()),
url: hurl!("https://something.io"),
selectors: selectors,
selectors,
})],
})],
None,

View File

@@ -9,7 +9,7 @@ name = "opnsense-pair-integration"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony" }
harmony = { path = "../../harmony", features = ["kvm"] }
harmony_cli = { path = "../../harmony_cli" }
harmony_inventory_agent = { path = "../../harmony_inventory_agent" }
harmony_macros = { path = "../../harmony_macros" }

View File

@@ -312,11 +312,11 @@ async fn run_pair_test() -> Result<(), Box<dyn std::error::Error>> {
// Build FirewallPairTopology
let primary_host = LogicalHost {
ip: primary_ip.into(),
ip: primary_ip,
name: VM_PRIMARY.to_string(),
};
let backup_host = LogicalHost {
ip: backup_ip.into(),
ip: backup_ip,
name: VM_BACKUP.to_string(),
};
let primary_api_creds = OPNSenseApiCredentials {

View File

@@ -9,7 +9,7 @@ name = "opnsense-vm-integration"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony" }
harmony = { path = "../../harmony", features = ["kvm"] }
harmony_cli = { path = "../../harmony_cli" }
harmony_inventory_agent = { path = "../../harmony_inventory_agent" }
harmony_macros = { path = "../../harmony_macros" }

View File

@@ -253,7 +253,7 @@ async fn run_integration() -> Result<(), Box<dyn std::error::Error>> {
// Build topology
let firewall_host = LogicalHost {
ip: vm_ip.into(),
ip: vm_ip,
name: VM_NAME.to_string(),
};
let api_creds = OPNSenseApiCredentials {
@@ -343,7 +343,7 @@ async fn run_integration() -> Result<(), Box<dyn std::error::Error>> {
info!("=== IDEMPOTENCY TEST: Running all Scores a SECOND time ===");
let scores_round2 = build_all_scores()?;
let firewall_host2 = LogicalHost {
ip: vm_ip.into(),
ip: vm_ip,
name: VM_NAME.to_string(),
};
let opnsense2 =
@@ -562,8 +562,11 @@ async fn verify_state(
})
}
type FirewallScore = Box<dyn Score<OPNSenseFirewall>>;
type BuildScoresResult = Result<Vec<FirewallScore>, Box<dyn std::error::Error>>;
/// Build all test Scores — extracted so we can call it for both run 1 and run 2.
fn build_all_scores() -> Result<Vec<Box<dyn Score<OPNSenseFirewall>>>, Box<dyn std::error::Error>> {
fn build_all_scores() -> BuildScoresResult {
let lb_score = LoadBalancerScore {
public_services: vec![
LoadBalancerService {

View File

@@ -1,12 +1,3 @@
use std::{collections::HashMap, str::FromStr};
use harmony::{
inventory::Inventory,
modules::helm::chart::{HelmChartScore, HelmRepository, NonBlankString},
topology::K8sAnywhereTopology,
};
use harmony_macros::hurl;
#[tokio::main]
async fn main() {
// let mut chart_values = HashMap::new();

View File

@@ -4,6 +4,7 @@ edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
autobins = false
[[example]]
name = "try_rust_webapp"

View File

@@ -8,6 +8,8 @@ async fn main() {
host: "sso.sto1.nationtech.io".to_string(),
zitadel_version: "v4.12.1".to_string(),
external_secure: true,
external_port: None,
..Default::default()
};
harmony_cli::run(

599
fleet/ARCHITECTURE.html Normal file
View File

@@ -0,0 +1,599 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Harmony Fleet — Architecture</title>
<script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
<script>
mermaid.initialize({
startOnLoad: true,
theme: "base",
themeVariables: {
fontFamily: "ui-sans-serif, -apple-system, Segoe UI, Inter, sans-serif",
primaryColor: "#eef3fb",
primaryBorderColor: "#7a93b7",
primaryTextColor: "#1f2937",
lineColor: "#5b6b80",
tertiaryColor: "#fafbfd",
clusterBkg: "#f6f8fc",
clusterBorder: "#c6d2e2",
noteBkgColor: "#fff8e1",
noteTextColor: "#3a2f00",
actorBkg: "#eef3fb",
actorBorder: "#7a93b7",
sequenceNumberColor: "#1f2937"
}
});
</script>
<style>
:root {
--ink: #1f2937;
--ink-soft: #4b5563;
--paper: #ffffff;
--paper-tint: #f6f8fc;
--rule: #e3e8ef;
--accent: #2c5282;
--accent-soft: #ebf2fb;
--warn: #b7791f;
--warn-soft: #fff8e1;
--mono: ui-monospace, SFMono-Regular, "JetBrains Mono", Menlo, Consolas, monospace;
--sans: ui-sans-serif, -apple-system, "Segoe UI", Inter, system-ui, sans-serif;
}
* { box-sizing: border-box; }
html, body {
margin: 0;
background: var(--paper);
color: var(--ink);
font-family: var(--sans);
line-height: 1.6;
font-size: 16px;
}
main {
max-width: 880px;
margin: 0 auto;
padding: 4rem 1.5rem 6rem;
}
header.hero {
margin-bottom: 3rem;
border-bottom: 1px solid var(--rule);
padding-bottom: 2rem;
}
header.hero h1 {
font-size: 2.4rem;
line-height: 1.15;
letter-spacing: -0.02em;
margin: 0 0 1rem;
color: var(--ink);
}
header.hero p.subtitle {
margin: 0;
color: var(--ink-soft);
font-size: 1.1rem;
}
header.hero p.subtitle b { color: var(--ink); font-weight: 600; }
h2 {
margin-top: 3.5rem;
margin-bottom: 1rem;
font-size: 1.55rem;
letter-spacing: -0.01em;
color: var(--ink);
display: flex;
align-items: baseline;
gap: 0.75rem;
}
h2 .layer {
font-size: 0.7rem;
text-transform: uppercase;
letter-spacing: 0.08em;
color: var(--accent);
background: var(--accent-soft);
padding: 0.15rem 0.55rem;
border-radius: 999px;
font-weight: 600;
line-height: 1.6;
flex-shrink: 0;
}
h3 {
margin-top: 2rem;
font-size: 1.1rem;
color: var(--ink);
}
p, li { color: var(--ink); }
a { color: var(--accent); text-decoration: none; border-bottom: 1px solid transparent; }
a:hover { border-bottom-color: var(--accent); }
code {
font-family: var(--mono);
font-size: 0.92em;
background: var(--paper-tint);
padding: 0.08em 0.35em;
border-radius: 4px;
border: 1px solid var(--rule);
}
pre {
background: var(--paper-tint);
border: 1px solid var(--rule);
border-radius: 8px;
padding: 1rem 1.2rem;
overflow-x: auto;
font-family: var(--mono);
font-size: 0.88rem;
line-height: 1.5;
}
pre code {
background: none;
border: none;
padding: 0;
}
blockquote {
margin: 1.5rem 0;
padding: 0.6rem 1.2rem;
border-left: 3px solid var(--accent);
background: var(--accent-soft);
color: var(--ink);
border-radius: 0 6px 6px 0;
}
blockquote p { margin: 0.3rem 0; }
.callout {
margin: 1.5rem 0;
padding: 0.8rem 1.2rem;
border-left: 3px solid var(--warn);
background: var(--warn-soft);
border-radius: 0 6px 6px 0;
color: #4a3c10;
font-size: 0.95rem;
}
.callout b { color: #3a2f00; }
table {
border-collapse: collapse;
width: 100%;
margin: 1.5rem 0;
font-size: 0.95rem;
}
th, td {
text-align: left;
padding: 0.6rem 0.8rem;
border-bottom: 1px solid var(--rule);
vertical-align: top;
}
th {
background: var(--paper-tint);
font-weight: 600;
color: var(--ink);
border-bottom: 2px solid var(--rule);
}
tr:hover td { background: var(--paper-tint); }
details {
margin: 1.2rem 0;
border: 1px solid var(--rule);
border-radius: 8px;
background: var(--paper-tint);
padding: 0;
overflow: hidden;
}
details summary {
cursor: pointer;
padding: 0.75rem 1.1rem;
font-weight: 600;
color: var(--ink);
list-style: none;
user-select: none;
display: flex;
align-items: center;
gap: 0.5rem;
transition: background 80ms ease;
}
details summary::-webkit-details-marker { display: none; }
details summary::before {
content: "▸";
color: var(--accent);
transition: transform 120ms ease;
display: inline-block;
font-size: 0.85em;
}
details[open] summary::before { transform: rotate(90deg); }
details summary:hover { background: rgba(0,0,0,0.02); }
details > *:not(summary) {
padding: 0 1.1rem;
}
details > *:not(summary):last-child {
padding-bottom: 1rem;
}
details[open] summary {
border-bottom: 1px solid var(--rule);
}
.mermaid {
background: var(--paper);
border: 1px solid var(--rule);
border-radius: 8px;
padding: 1.2rem;
margin: 1.5rem 0;
text-align: center;
overflow-x: auto;
}
hr {
border: none;
border-top: 1px solid var(--rule);
margin: 3rem 0;
}
ul, ol { padding-left: 1.4rem; }
ul li, ol li { margin: 0.25rem 0; }
.stop-here {
margin: 2rem 0;
text-align: center;
color: var(--ink-soft);
font-style: italic;
font-size: 0.95rem;
}
.stop-here::before, .stop-here::after {
content: " — ";
color: var(--rule);
}
footer {
margin-top: 5rem;
padding-top: 2rem;
border-top: 1px solid var(--rule);
color: var(--ink-soft);
font-size: 0.9rem;
}
</style>
</head>
<body>
<main>
<header class="hero">
<h1>Harmony Fleet — Architecture</h1>
<p class="subtitle">
An operator declares <b>what</b> to run, in Kubernetes.
Agents on devices make it real, in their own containers.
NATS is the bus between them. Zitadel signs the agent's passport.
</p>
</header>
<p>This document walks the system in layers. Read until you stop having questions —
each layer adds one idea on top of the previous one.</p>
<hr>
<h2><span class="layer">Layer 0</span> One picture</h2>
<div class="mermaid">
flowchart LR
subgraph K8S [Kubernetes cluster]
OP[Harmony Fleet Operator]
end
subgraph BUS [NATS JetStream]
KV[(KV buckets)]
end
subgraph DEV [Fleet device]
AG[Fleet Agent + Podman]
end
OP -- writes desired state --> KV
KV -- watches --> AG
AG -- reports state --> KV
KV -- watches --> OP
</div>
<p>That's it. The rest of the document explains the boxes.</p>
<hr>
<h2><span class="layer">Layer 1</span> The three planes</h2>
<p>The fleet system has three planes that are deliberately decoupled:</p>
<table>
<thead><tr><th>Plane</th><th>What lives here</th><th>Why</th></tr></thead>
<tbody>
<tr>
<td><b>Control</b></td>
<td>Kubernetes (k3d, OKD, vanilla — anything) + the <b>Fleet Operator</b></td>
<td>Operators already know how to talk to k8s. <code>kubectl apply</code> is the API.</td>
</tr>
<tr>
<td><b>Bus</b></td>
<td>A NATS server with JetStream + an auth callout that talks to Zitadel</td>
<td>Edge devices come and go; the bus tolerates that. KV gives us last-writer-wins state without bespoke sync.</td>
</tr>
<tr>
<td><b>Edge</b></td>
<td>Each device runs the <b>Fleet Agent</b> binary, which drives <b>Podman</b></td>
<td>Devices don't speak k8s — they speak NATS and run containers locally.</td>
</tr>
</tbody>
</table>
<div class="mermaid">
flowchart LR
subgraph control [Control plane — Kubernetes]
direction TB
API[API Server + etcd]
OP[Fleet Operator]
DASH[/Dashboard — optional, feature-gated/]
API <--> OP
OP --- DASH
end
subgraph bus [Bus — NATS]
direction TB
NATS[NATS + JetStream KV]
CALLOUT[Auth Callout]
ZIT[Zitadel OIDC]
NATS -. token check .-> CALLOUT
CALLOUT -. validate JWT .-> ZIT
end
subgraph edge [Edge — fleet device]
direction TB
AGENT[Fleet Agent]
PODMAN[Podman]
AGENT --> PODMAN
end
OP <-->|KV| NATS
AGENT <-->|KV + commands| NATS
</div>
<div class="stop-here">Stop here if you only needed to know the shape</div>
<hr>
<h2><span class="layer">Layer 2</span> A deployment, end-to-end</h2>
<p>Walk through what happens when an operator runs <code>kubectl apply -f my-deployment.yaml</code>:</p>
<div class="mermaid">
sequenceDiagram
autonumber
actor User as Actor (SRE)
participant K8s as API Server
participant Op as Fleet Operator
participant Bus as NATS KV
participant Ag as Agent (on device)
participant Pm as Podman
User->>K8s: kubectl apply Deployment CR
K8s-->>Op: watch event (Deployment added)
Op->>Op: evaluate spec.targetSelector against Device CR labels
Op->>Bus: PUT desired-state.&lt;dev&gt;.&lt;dep&gt; = ReconcileScore JSON
Bus-->>Ag: KV watch event
Ag->>Ag: deserialize Score, build Interpret
Ag->>Pm: pull image, create/update container
Pm-->>Ag: container Running
Ag->>Bus: PUT device-state.state.&lt;dev&gt;.&lt;dep&gt; = Running
Bus-->>Op: KV watch event
Op->>K8s: PATCH Deployment.status.aggregate
</div>
<p>Things to notice:</p>
<ul>
<li><b>The agent never talks to the API server.</b> Only the operator does. Everything edge-bound flows through NATS.</li>
<li><b>The flow is one-way for desired state, one-way for reported state.</b> The two paths cross at NATS, never at k8s.</li>
<li><b>The aggregator coalesces</b> — status patches fire at 1 Hz, not on every event, so high-frequency churn doesn't beat up the API server.</li>
</ul>
<details>
<summary>The CRDs in detail</summary>
<p>Group: <code>fleet.nationtech.io</code> · Version: <code>v1alpha1</code></p>
<ul>
<li>
<b><code>Deployment</code></b> (kind), plural <code>deployments</code>, short <code>fleetdep</code>, <b>namespaced</b><br>
Spec: <code>targetSelector: LabelSelector</code>, <code>score: ReconcileScore</code>, <code>rollout: Rollout</code><br>
Status: <code>aggregate: { matchedDeviceCount, succeeded, failed, pending, lastError }</code>
</li>
<li>
<b><code>Device</code></b> (kind), plural <code>devices</code>, short <code>fleetdev</code>, <b>cluster-scoped</b><br>
Spec: <code>inventory: InventorySnapshot</code><br>
Cluster-scoped because devices are infrastructure — the same way <code>Node</code> is cluster-scoped.
</li>
</ul>
<p>Devices in k8s are <b>created by the operator</b> from agent-published <code>device-info</code> KV entries. Agents never touch the API server.</p>
<p>Source: <code>harmony/src/modules/fleet/operator/crd.rs</code></p>
</details>
<hr>
<h2><span class="layer">Layer 3</span> The four KV buckets</h2>
<p>The bus is more granular than "a NATS KV". The fleet contract pins <b>four</b> named buckets, each with its own write/read direction.</p>
<table>
<thead><tr><th>Bucket</th><th>Writer</th><th>Reader(s)</th><th>Key format</th><th>Purpose</th></tr></thead>
<tbody>
<tr>
<td><code>desired-state</code></td>
<td>Operator</td>
<td>Agent (watch)</td>
<td><code>&lt;device&gt;.&lt;deployment&gt;</code></td>
<td>The score the agent should reconcile to</td>
</tr>
<tr>
<td><code>device-state</code></td>
<td>Agent</td>
<td>Operator (watch + aggregator)</td>
<td><code>state.&lt;device&gt;.&lt;deployment&gt;</code></td>
<td>Current reconcile phase per (device, deployment)</td>
</tr>
<tr>
<td><code>device-info</code></td>
<td>Agent</td>
<td>Operator (reflects to <code>Device</code> CR)</td>
<td><code>info.&lt;device&gt;</code></td>
<td>Routing labels, inventory snapshot, agent version</td>
</tr>
<tr>
<td><code>device-heartbeat</code></td>
<td>Agent</td>
<td>Operator (liveness)</td>
<td><code>heartbeat.&lt;device&gt;</code></td>
<td>Tiny liveness ping every N seconds, kept off the state bucket to avoid churn</td>
</tr>
</tbody>
</table>
<div class="mermaid">
flowchart LR
OP[Operator]
AG[Agent]
DS[(desired-state)]
ST[(device-state)]
IN[(device-info)]
HB[(device-heartbeat)]
OP -- writes --> DS
DS -- watches --> AG
AG -- writes --> ST
AG -- writes --> IN
AG -- writes --> HB
ST -- watches --> OP
IN -- reflects to Device CR --> OP
HB -. queries .- OP
</div>
<p>These four bucket names are <b>the contract</b> between agent and operator. They live in one place to keep cross-component drift from happening:</p>
<pre><code>// harmony-reconciler-contracts/src/kv.rs
pub const BUCKET_DESIRED_STATE: &amp;str = "desired-state";
pub const BUCKET_DEVICE_INFO: &amp;str = "device-info";
pub const BUCKET_DEVICE_STATE: &amp;str = "device-state";
pub const BUCKET_DEVICE_HEARTBEAT: &amp;str = "device-heartbeat";</code></pre>
<p>There's also a <b>commands</b> path for request/response RPCs (ping today; logs/exec planned) on core-NATS subjects <code>device-commands.&lt;device-id&gt;.&lt;verb&gt;</code>, separate from JetStream KV.</p>
<hr>
<h2><span class="layer">Layer 4</span> Identity &amp; auth</h2>
<p>Agents authenticate to NATS with a <b>Zitadel-signed JWT bearer token</b>. NATS doesn't validate the JWT itself; it delegates to a NATS <b>auth callout</b>, which is just another connected client running our <code>harmony-fleet-auth</code> binary.</p>
<div class="mermaid">
sequenceDiagram
autonumber
participant Ag as Agent
participant Z as Zitadel
participant N as NATS server
participant C as Auth Callout (harmony-fleet-auth)
Note over Ag,Z: One-time bootstrap (or before token expiry)
Ag->>Z: JWT assertion (RFC 7523, signed with device key)
Z-->>Ag: short-lived access token
Note over Ag,N: Every (re)connect
Ag->>N: CONNECT with bearer = access token
N->>C: auth callout request
C->>Z: introspect / validate signature
Z-->>C: token valid, claims = { device_id, ... }
C-->>N: ALLOW, permissions scoped to device_id
N-->>Ag: connection accepted
</div>
<p><b>Per-device scoping</b> — the callout derives NATS subject permissions from the JWT's <code>device_id</code> claim, so a compromised device key can only touch its own subjects.</p>
<p><b>Token rotation</b> — the agent's auth callback is invoked by <code>async-nats</code> on every reconnect; the token cache mints a fresh one within a 5-minute leeway window. This is how the "never lose connectivity across token rollovers" guarantee holds.</p>
<div class="callout">
<b>Today vs. target.</b> The CLI in <code>harmony-fleet-deploy/src/main.rs</code> defaults to <b>user/pass NATS</b> (<code>FleetNatsScore::user_pass</code>) for the v1 walking skeleton. The Zitadel/callout path is wired through <code>FleetServerScore</code>'s optional fields and is the production target — the diagram describes the target, not what the dev <code>main.rs</code> lights up by default.
</div>
<details>
<summary>Where this lives in code</summary>
<ul>
<li>Auth callout binary: <code>fleet/harmony-fleet-auth/src/lib.rs</code></li>
<li>Credential source + JWT minting: <code>fleet/harmony-fleet-auth/src/credentials.rs</code></li>
<li>Composing it into a server install: <code>FleetServerScore { auth_callout: Some(...) }</code> in <code>fleet/harmony-fleet-deploy/src/server.rs</code></li>
</ul>
</details>
<hr>
<h2><span class="layer">Layer 5</span> Device enrollment (one-time setup)</h2>
<p>A device joins the fleet through <code>FleetDeviceSetupScore</code> (in <code>harmony/src/modules/fleet/setup_score.rs</code>). Three flavours, in order of seriousness:</p>
<ol>
<li><b>Dev / lab</b><code>FleetDeviceAuth::TomlShared</code>: a shared NATS user/pass baked into config. Zero auth infra. Don't ship this to a real device.</li>
<li><b>Production A</b><code>FleetDeviceAuth::ZitadelJwt</code>: an admin pre-creates a Zitadel machine user, exports its key JSON, and drops it at <code>/etc/fleet-agent/zitadel-key.json</code> on the device.</li>
<li><b>Production B (recommended)</b><code>FleetDeviceAuth::ZitadelEnroll</code>: the setup score itself talks to Zitadel's management API to mint a per-device machine key. No pre-provisioning. Works either developer-on-device (Zitadel device-code flow opens a browser) or operator-via-SSH.</li>
</ol>
<p>What the setup score does, in order:</p>
<ol>
<li>Renders <code>/etc/fleet-agent/config.toml</code> (device id, NATS URL, auth credentials).</li>
<li>Drops the agent binary at <code>/usr/local/bin/fleet-agent</code>.</li>
<li>Enables <code>fleet-agent.service</code> (systemd).</li>
<li>Agent boots, connects to NATS with bearer token from the keyfile.</li>
<li>Agent publishes initial DeviceInfo into <code>device-info.&lt;device_id&gt;</code>.</li>
<li>Agent starts watching <code>desired-state.&lt;device_id&gt;.&gt;</code>.</li>
<li>Agent answers <code>device-commands.&lt;device_id&gt;.ping</code>.</li>
</ol>
<p>After step 5 the operator reflects the agent-published DeviceInfo into a cluster-scoped <code>Device</code> CR. From that moment, a new <code>Deployment</code> CR whose <code>targetSelector</code> matches the Device's labels will land on the device automatically.</p>
<hr>
<h2><span class="layer">Layer 6</span> What runs where</h2>
<div class="mermaid">
flowchart TB
subgraph cluster [Kubernetes — fleet-system namespace]
direction TB
OP["Pod: harmony-fleet-operator
watches CRDs, writes desired-state KV,
aggregates device-state into CR status,
optional dashboard on :18080"]
NATS["Pod: NATS + JetStream
4 KV buckets, command subjects"]
CO["Pod: harmony-fleet-auth
NATS auth callout — validates JWTs"]
ZT["Pods: Zitadel + Postgres
OIDC, JWT signing"]
end
subgraph device [Edge — a Raspberry Pi or any podman host]
direction TB
AG["systemd: fleet-agent.service
watches desired-state.&lt;id&gt;.&gt;
writes device-state, device-info, device-heartbeat
handles device-commands.&lt;id&gt;.&lt;verb&gt;"]
PM[podman socket]
AG --> PM
end
AG <-->|NATS over WSS / TLS| NATS
OP <-->|in-cluster NATS| NATS
NATS -. callout .- CO
CO -. JWT introspect .- ZT
OP --- ZT
</div>
<hr>
<h2>Cheat sheet — where to start reading</h2>
<table>
<thead><tr><th>If you want to understand…</th><th>Open this file</th></tr></thead>
<tbody>
<tr><td>What a Deployment / Device CR looks like</td><td><code>harmony/src/modules/fleet/operator/crd.rs</code></td></tr>
<tr><td>The names of the KV buckets and key formats</td><td><code>harmony-reconciler-contracts/src/kv.rs</code></td></tr>
<tr><td>Operator: how CR → KV reconciliation works</td><td><code>fleet/harmony-fleet-operator/src/fleet_aggregator.rs</code></td></tr>
<tr><td>Agent: how KV → Podman reconciliation works</td><td><code>fleet/harmony-fleet-agent/src/reconciler.rs</code></td></tr>
<tr><td>Auth: JWT minting and NATS callout protocol</td><td><code>fleet/harmony-fleet-auth/src/credentials.rs</code></td></tr>
<tr><td>Deploying the whole server-side stack</td><td><code>fleet/harmony-fleet-deploy/src/server.rs</code></td></tr>
<tr><td>One-time device enrollment</td><td><code>harmony/src/modules/fleet/setup_score.rs</code></td></tr>
<tr><td>Why it's shaped this way (philosophy)</td><td><code>docs/adr/016-…</code> and <code>docs/adr/023-deploy-architecture.md</code></td></tr>
</tbody>
</table>
<h2>Glossary, for quick reference</h2>
<ul>
<li><b>Score</b> — a Rust struct describing desired state (declarative). <code>ReconcileScore</code> is the variant agents apply.</li>
<li><b>Topology</b> — what the environment can do (capabilities exposed as traits). The agent uses <code>PodmanTopology</code>; the deploy CLI uses <code>K8sAnywhereTopology</code>.</li>
<li><b>Interpret</b> — the glue that drives a Topology to fulfil a Score. Agents call <code>score.create_interpret().execute(&amp;inv, &amp;PodmanTopology)</code>.</li>
<li><b>Auth callout</b> — a NATS feature where the server delegates AuthN to a connected client; here, that client is <code>harmony-fleet-auth</code>.</li>
<li><b>K8sAnywhere</b> — single Topology implementation that targets any reachable cluster (k3d, OKD, vanilla) via the kubeconfig. Today the only topology wired into <code>harmony-fleet-deploy</code>; <code>K8sBareTopology</code> is planned.</li>
</ul>
<footer>
Source of truth lives in the repo. This document validates against
<code>fleet/</code> and <code>harmony/src/modules/fleet/</code> as of the commit on
<code>feat/iot-walking-skeleton</code>. If a layer looks wrong to you, it probably is — open a PR.
</footer>
</main>
</body>
</html>

View File

@@ -0,0 +1,427 @@
# Plan — Request/Reply over NATS, TDD via in-cluster e2e harness
Two intertwined deliverables:
1. **`fleet/harmony-fleet-e2e`** — a new harness crate that brings up the full stack (NATS + auth-callout + fleet-operator + fleet-agent-as-pod) in a fresh k3d namespace and tears it down at process exit. Fast (target ≤15s bring-up when the cluster is already running, ≤5s teardown). Works against k3d locally or any cluster with a kubeconfig (incl. OKD).
2. **First feature, TDD-style**: `Verb::Ping`. Failing test in the harness, then the wire types + agent handler + operator client to make it green. Subsequent verbs (logs, exec) follow the same pattern in follow-up PRs.
Both land together because the harness is what proves cohesion: every fleet feature from now on gets its e2e test in the same crate, and the scattered bring-up code in `examples/fleet_e2e_demo` and `examples/fleet_auth_callout` becomes a thin layer over this harness.
## Goals & non-goals
| Goal | In v1 |
|---|---|
| `cargo test -p harmony-fleet-e2e` brings up the stack, runs ping test, tears down | ✅ |
| Per-test namespace isolation; multiple test runs can coexist in the same cluster | ✅ |
| Images built once and sideloaded into k3d (no registry push) | ✅ |
| Cluster reused across runs; only namespace is recreated | ✅ |
| Agent runs as a Pod (no VMs, no SSH, no libvirt) | ✅ |
| Harness prints NATS URL + admin creds so the developer can poke during a hung test | ✅ |
| First feature: `ping` (operator-side `FleetCommandsClient::ping`, agent-side handler, wire types) | ✅ |
| Runs against a remote OKD cluster via `KUBECONFIG` | ✅ (image-import step is conditional) |
| Non-goal (v1) | Reason |
|---|---|
| `logs` / `exec` implementations | Same wiring; covered in follow-up commits using the same harness |
| PTY | Doc Pattern B; defer |
| JetStream audit log | Defer; sidecar consumer added later |
| Zitadel in the harness | Cold-start cost is 30-60s; harness mode A uses a mock OIDC fixture for the callout to keep bring-up fast. Real Zitadel stays in `fleet_e2e_demo` (manual rehearsal). |
## Crate layout
New workspace member at `fleet/harmony-fleet-e2e/`:
```
fleet/harmony-fleet-e2e/
├── Cargo.toml
├── README.md # How to run, debug, point at remote clusters
├── src/
│ ├── lib.rs # Public surface: Stack, StackHandle, bring_up()
│ ├── images.rs # Build + sideload (callout, operator, agent)
│ ├── namespace.rs # Unique-namespace generation + RAII cleanup
│ ├── stack.rs # Compose Scores against K8sBareTopology
│ ├── nats.rs # NatsHelmChartScore preset with callout + mock-issuer block
│ ├── mock_oidc.rs # Tiny in-cluster OIDC fixture (issues JWTs the callout accepts)
│ ├── agent_pod.rs # New Score: agent as a Pod (no VM/SSH)
│ ├── observability.rs # NodePort + admin creds, helper to mint admin JWT
│ └── client.rs # FleetCommandsClient (operator-side wrapper for tests)
└── tests/
└── ping.rs # **First TDD test** — failing until the protocol lands
```
Crate kind: library + `[[test]]` integration tests. Not a binary; harness is consumed by tests via `harmony_fleet_e2e::Stack::bring_up().await`.
Cargo workspace: add to root `members`. Build deps: `harmony` (k8s+nats helpers), `harmony-fleet-auth`, `harmony-reconciler-contracts`, `k3d`, `async-nats`, `kube`, `k8s-openapi`, `tokio`, `anyhow`, `tracing`, `uuid` (for namespace ID), `tempfile`, `serde_json`.
### Why a separate crate (not an example)
Examples currently are bring-up scripts. The e2e harness is **infrastructure for tests** consumed by multiple callers (the new `tests/ping.rs`, future `tests/logs.rs`, `tests/exec.rs`, and eventually a slimmed-down `examples/fleet_e2e_demo` that just calls into it for the manual rehearsal). A library crate lets us expose `Stack`, `StackHandle`, `FleetCommandsClient` as proper types, with `cargo test` discovery and parallel-friendly per-namespace isolation.
## Agent-side prerequisite: gate podman behind config
The agent currently `panic`s if the podman socket isn't ready (`fleet/harmony-fleet-agent/src/main.rs:200`). For the in-cluster harness we need the agent to run on a node that doesn't expose podman.
Add to `agent-config.toml`:
```toml
[agent]
device_id = "vm-device-00"
# NEW: when false, skip podman init and the reconciler loop.
# Command server still runs (ping/exec-via-fallback are still useful).
runtime_enabled = true # default true; e2e harness sets false
```
Wire-up in `main.rs`:
- When `runtime_enabled = false`, the agent skips `PodmanTopology::from_default_socket()`, skips the reconciler periodic tick, but still subscribes to desired-state (KV watch) and runs the command server. KV deliveries with a non-podman Score variant get logged + rejected with `ErrorKind::BadRequest` (today we'd just drop them silently).
Small, contained change (~30 lines). Unlocks pod-based agents and unblocks future verbs (exec/logs add their own runtime requirements).
Alternative considered: mount `/var/run/podman/podman.sock` into the pod. Rejected — k3d nodes run containerd, not podman; mount would dangle.
## Harness public API
```rust
// fleet/harmony-fleet-e2e/src/lib.rs
pub struct Stack {
pub namespace: String, // e2e-<uuid8>
pub nats_url: String, // nats://localhost:<nodeport>
pub admin_token: String, // JWT for the mock OIDC, callout-accepted
pub device_ids: Vec<Id>, // ["vm-device-00", "vm-device-01", …]
pub operator_client: async_nats::Client, // pre-authed admin client
_guard: NamespaceGuard, // Drop impl deletes the namespace
}
pub struct StackOptions {
pub kubeconfig: Option<PathBuf>, // default: $KUBECONFIG, fall back to k3d-managed
pub k3d_cluster_name: Option<String>, // None = pick the harness default; required if not using k3d
pub num_devices: usize, // default 1; ping test uses 1
pub image_rebuild: bool, // env var FLEET_E2E_FORCE_REBUILD
pub keep_namespace: bool, // env var FLEET_E2E_KEEP=1 — skip teardown for debugging
pub auth_mode: AuthMode, // Callout (default) | UserPass (fastest)
}
pub enum AuthMode {
/// Real auth-callout + mock OIDC fixture. Exercises the production code path.
Callout,
/// NATS user/pass via TomlShared credentials. Skips callout entirely.
/// ~3-5s faster bring-up; use for tests that don't care about auth.
UserPass,
}
impl Stack {
pub async fn bring_up(opts: StackOptions) -> anyhow::Result<Self>;
pub fn print_debug_info(&self); // logs URL, token, namespace, kubectl shortcuts
}
```
`Drop for NamespaceGuard`: spawns a blocking task that runs `kubectl delete namespace <name> --wait=false`. Doesn't block process exit; the namespace garbage-collects asynchronously. If `keep_namespace = true`, just logs the name.
## TDD test order
### Test 1 (first to land): ping
```rust
// fleet/harmony-fleet-e2e/tests/ping.rs
#[tokio::test(flavor = "multi_thread")]
async fn operator_can_ping_agent() -> anyhow::Result<()> {
let stack = Stack::bring_up(StackOptions::default()).await?;
let device_id = &stack.device_ids[0];
let client = FleetCommandsClient::new(stack.operator_client.clone());
let reply = tokio::time::timeout(
Duration::from_secs(10),
client.ping(device_id.as_str()),
).await??;
assert_eq!(reply.device_id.as_str(), device_id.as_str());
assert!(!reply.agent_version.is_empty());
Ok(())
}
```
**Failing → green sequence:**
1. **Red**: write the test above. It can't even compile because `FleetCommandsClient`, `Stack`, `bring_up` don't exist.
2. **Scaffold the harness**: stub `Stack::bring_up` that just returns an error. Test compiles, fails at runtime.
3. **Bring up the cluster bits incrementally**:
- Namespace creation + RAII guard.
- NATS deploy via `NatsHelmChartScore` (UserPass mode first for speed).
- Operator deploy via `FleetOperatorScore` (image sideloaded).
- Agent pod deploy via new `FleetAgentPodScore`.
- Wait for pod readiness.
- Build operator admin NATS client.
4. **Implement the wire types** in `harmony-reconciler-contracts/src/commands.rs` (just `Verb::Ping` + `CommandRequest::Ping` + `PingReply` for now).
5. **Implement agent command server** with only the ping handler (`fleet/harmony-fleet-agent/src/command_server.rs`).
6. **Implement `FleetCommandsClient::ping`** in `fleet/harmony-fleet-operator/src/commands.rs`.
7. **Test goes green.**
8. **Add Callout auth mode** to the harness (mock OIDC fixture deployed alongside NATS), re-run test in both modes.
### Test 2 (follow-up PR): no-responders → DeviceOffline
```rust
#[tokio::test]
async fn ping_to_offline_device_returns_immediately() -> anyhow::Result<()> {
let stack = Stack::bring_up(StackOptions::default()).await?;
let client = FleetCommandsClient::new(stack.operator_client.clone());
let started = Instant::now();
let err = client.ping("nonexistent-device").await.unwrap_err();
assert!(matches!(err, CommandError::DeviceOffline));
assert!(started.elapsed() < Duration::from_secs(1));
Ok(())
}
```
### Test 3+ (follow-up PR, same harness): logs + exec — same pattern.
## Image build & sideload
`src/images.rs` exposes:
```rust
pub struct Images {
pub callout: String, // e.g. harmony-nats-callout:e2e-<contenthash>
pub operator: String,
pub agent: String,
}
pub async fn build_and_sideload(cluster: &K3dCluster, opts: BuildOpts) -> Result<Images>;
```
Implementation:
- For each of (callout, operator, agent):
- Hash the crate's source tree + `Cargo.lock`.
- If `podman images` doesn't contain `<image>:<hash>` and `FLEET_E2E_FORCE_REBUILD != 1`, skip.
- Otherwise: `cargo build --release -p <crate>` + `podman build -f Dockerfile -t <image>:<hash>`.
- `podman save | k3d image import -c <cluster>` (or `--volumes` if `--import` doesn't accept stdin; use the existing pattern from `examples/fleet_e2e_demo`).
Dockerfiles:
- Callout: exists at `nats/callout/Dockerfile` (used by the demo).
- Operator: exists at `fleet/harmony-fleet-operator/Dockerfile`.
- **Agent**: doesn't exist yet — add `fleet/harmony-fleet-agent/Dockerfile`. Distroless base, single static binary, ~5MB image.
Sideload bypass for remote clusters: if `opts.registry` is set, push to that registry and skip sideload. Out of scope for v1 (the user said defer); v1 just panics if running against a non-k3d cluster.
## Per-namespace isolation
Today the demo hardcodes `fleet-system` and `zitadel`. The harness:
- Picks namespace `e2e-<uuid8>` per `Stack::bring_up` call.
- Every Score in the harness is parametrized on `namespace`; nothing is hardcoded.
- The `FleetOperatorScore` already takes a `namespace` (verified in `harmony/src/modules/fleet/operator/score.rs`). The `NatsHelmChartScore` too. The `NatsAuthCalloutScore` too. Good.
- The CRDs (`Deployment`, `Device`) are cluster-scoped — but they're created once per cluster (idempotent apply), shared across e2e runs. The operator filters by namespace via its `kube::Api::namespaced()` calls.
- Wait — `Device` is cluster-scoped. Two simultaneous e2e runs would collide on `Device` CR names. Two mitigations:
- **Option A** (simpler): per-test device IDs include the namespace suffix (`vm-device-00-e2e-abc12345`). No collision.
- **Option B**: scope the `Device` CR to a namespace. Bigger change to the operator. Out of scope.
- Plan picks A.
## Auth mode story
Default `AuthMode::Callout` because the user explicitly asked for "nats + callout + operator + agent". To avoid Zitadel's bring-up cost, the harness ships a `mock_oidc.rs` fixture: a tiny single-Pod HTTP service that:
- Serves `/.well-known/openid-configuration` and `/jwks.json` from a process-generated keypair.
- Mints JWTs for `device-<id>` and `fleet-ops` machine users on demand via a `/token` endpoint the harness calls.
- ~200 LOC, no external deps. Lives inside `harmony-fleet-e2e` (not exposed elsewhere).
The callout points its `oidc_issuer_url` at the mock service's in-cluster URL. From the callout's perspective this is indistinguishable from Zitadel.
`AuthMode::UserPass` skips the callout entirely: NATS deploys with two static accounts (`device` + `admin`) and the agent's `TomlShared` credential variant connects directly. ~3-5s faster bring-up. Useful when iterating on the command protocol itself, where auth isn't being tested.
Both modes go through the same `Stack::operator_client` surface — tests don't see the difference.
## Observability — what the harness prints
On bring-up success, `print_debug_info()` logs:
```
[e2e] namespace: e2e-7d3a91f4 (will be deleted on exit unless FLEET_E2E_KEEP=1)
[e2e] kubectl -n e2e-7d3a91f4 get pods
[e2e] NATS: nats://localhost:30422
[e2e] admin token: eyJhbGc... (use as auth_token)
[e2e] devices: vm-device-00-e2e-7d3a91f4
[e2e] tail agent: kubectl -n e2e-7d3a91f4 logs deploy/fleet-agent-vm-device-00 -f
[e2e] tail callout: kubectl -n e2e-7d3a91f4 logs deploy/fleet-callout -f
```
When a test fails, set `FLEET_E2E_KEEP=1` and the namespace persists so you can poke around. The next run uses a different namespace, so leaks don't compound.
## Reuse / cohesion plan
The existing `examples/fleet_e2e_demo/src/lib.rs` is the original bring-up Frankenstein. Once `harmony-fleet-e2e` exists, refactor `fleet_e2e_demo` to delegate:
```rust
// examples/fleet_e2e_demo/src/lib.rs (after refactor)
pub async fn bring_up_full_stack(...) -> ... {
let stack = harmony_fleet_e2e::Stack::bring_up(StackOptions {
auth_mode: AuthMode::Callout, // real
num_devices: cfg.num_devices,
oidc_provider: OidcProvider::RealZitadel(zitadel_config), // adapter for real Zitadel
agent_target: AgentTarget::Vm(vm_ips), // SSH-based, for the rehearsal flow
..
}).await?;
// ...
}
```
This requires the harness to support **multiple agent targets** (Pod vs VM/SSH) and **multiple OIDC providers** (mock vs real Zitadel). Architecture-wise this is a `trait AgentTarget` and a `trait OidcProvider`, both with mock + real impls. The v1 PR ships only the Pod + mock-OIDC impls; the demo refactor is a follow-up PR.
Cohesion deliverables this PR closes:
- Single home for "bring up a fleet stack" logic (currently scattered across 3 examples).
- Single home for image-build invocation (today inline `cargo build --release` + `podman build` calls live in `fleet_e2e_demo/src/lib.rs` lines 553623).
- Single home for "issue NATS test client" plumbing (the `admin_nats_client` helper in `e2e_walking_skeleton.rs` should be a Stack method).
## Wire types (same as previous plan, reduced for ping-only first pass)
In `harmony-reconciler-contracts/src/commands.rs` — add only what `ping` needs in PR 1:
```rust
pub enum Verb { Ping }
pub fn device_command_subject(device_id: &str, verb: Verb) -> String;
pub enum CommandRequest { Ping }
pub struct PingReply {
pub device_id: Id,
pub agent_version: String,
pub uptime_s: u64,
}
pub const HDR_REQUEST_ID: &str = "X-Harmony-Request-Id";
pub const HDR_DEADLINE: &str = "X-Harmony-Deadline";
pub const HDR_OPERATOR_SUB: &str = "X-Harmony-Operator-Sub";
```
`Verb::Exec` / `Verb::Logs` and their payloads are added in follow-up PRs alongside their tests.
## Agent-side command server (ping-only scaffold)
`fleet/harmony-fleet-agent/src/command_server.rs`:
```rust
pub struct CommandServer {
device_id: Id,
client: async_nats::Client,
agent_version: &'static str,
started_at: Instant,
}
impl CommandServer {
pub async fn run(self: Arc<Self>) -> Result<()> {
let subject = format!("device-commands.{}.>", self.device_id);
let mut sub = self.client.subscribe(subject).await?;
while let Some(msg) = sub.next().await {
self.dispatch(msg).await;
}
Ok(())
}
async fn dispatch(&self, msg: async_nats::Message) {
let verb = msg.subject.rsplit('.').next();
match verb {
Some("ping") => self.reply_ping(&msg).await,
_ => self.reply_error(&msg, ErrorKind::BadRequest, "unknown verb").await,
}
}
async fn reply_ping(&self, msg: &async_nats::Message) {
let reply = PingReply {
device_id: self.device_id.clone(),
agent_version: env!("CARGO_PKG_VERSION").to_string(),
uptime_s: self.started_at.elapsed().as_secs(),
};
if let Some(inbox) = &msg.reply {
let _ = self.client.publish(inbox.clone(), serde_json::to_vec(&reply)?.into()).await;
}
}
}
```
Wired into `main.rs` as a new arm of the existing `tokio::select!`. Future verbs slot into `dispatch`.
## Operator-side client (ping-only scaffold)
`fleet/harmony-fleet-operator/src/commands.rs`:
```rust
pub struct FleetCommandsClient {
nc: async_nats::Client,
default_timeout: Duration,
}
pub enum CommandError {
DeviceOffline, // 503 no_responders
Timeout,
BadReply(serde_json::Error),
Nats(async_nats::Error),
}
impl FleetCommandsClient {
pub fn new(nc: async_nats::Client) -> Self;
pub async fn ping(&self, device_id: &str) -> Result<PingReply, CommandError>;
}
```
`ping` uses `nc.request()` (relies on `no_responders` default-on in async-nats). Timeout: 5s. Decodes JSON reply into `PingReply`.
## Test ordering & PR slicing
**PR 1 (this plan):**
- `harmony-fleet-e2e` crate scaffolding
- `harmony-reconciler-contracts::commands` (ping types only)
- Agent: `runtime_enabled` config flag + `command_server.rs` (ping only)
- Operator: `commands.rs` (ping only)
- New `FleetAgentPodScore` (or inline manifest) for pod-based agents
- New `MockOidcScore` for the auth callout's issuer
- `tests/ping.rs` — passing
- Agent Dockerfile (new)
**PR 2** (after PR 1 merges):
- `tests/ping_offline.rs` (no_responders → DeviceOffline)
- Refactor `fleet_e2e_demo` to delegate to `harmony-fleet-e2e` with `AgentTarget::Vm` + `OidcProvider::RealZitadel`
**PR 3 (logs):**
- Wire types for `Verb::Logs` + `LogsReq` + `LogChunk`
- Agent handler invoking `podman_api::Containers::logs`
- Operator client streaming method
- `tests/logs.rs`
**PR 4 (exec):**
- Wire types for `Verb::Exec` + `ExecReq` + `ExecReply`
- Agent handler with container-only default + host-exec policy gate
- Operator client
- `tests/exec.rs`
**PR 5+**: web frontend wiring, CLI subcommands.
## Open questions for review
1. **Auth mode default** — Callout-with-mock-OIDC (slower, exercises real auth path), or UserPass (faster, doesn't test auth)? Plan picks Callout. UserPass available via env or `StackOptions`.
2. **Mock OIDC fixture** — build into the harness, or use an existing crate? I haven't found a small-enough off-the-shelf one; recommend hand-rolled ~200 LOC (uses `jsonwebtoken`).
3. **Image hash strategy** — content-hash of `Cargo.lock` + crate source (skip rebuild if matching tag exists)? Or always rebuild and rely on Docker layer cache? Plan: content-hash, with `FLEET_E2E_FORCE_REBUILD=1` escape hatch.
4. **Cluster lifecycle** — harness assumes the k3d cluster already exists (or auto-creates one named `fleet-e2e`). Should it also offer a `Stack::bring_up_isolated_cluster()` that creates+destroys the whole cluster per test? Plan: no, namespace isolation is enough; clusters are heavy.
5. **Ping reply shape**`PingReply { device_id, agent_version, uptime_s }` minimal. Add anything else useful for a health-check (memory, podman socket status, current desired-state revision)? Easy to extend later; v1 keeps it minimal.
6. **Subject choice**`device-commands.<id>.ping` (matches the existing callout permission template). Alternative `harmony.device.<id>.cmd.ping` (matches the doc's verbatim suggestion) would require updating the callout permissions. Plan picks the existing `device-commands.<id>.ping` subject and notes the doc's `harmony.device.*` is the same idea with different prefix; no callout change needed.
## What you'll see when you run the green ping test
```
$ cargo test -p harmony-fleet-e2e --test ping
Compiling harmony-fleet-e2e v0.1.0
Finished test [unoptimized + debuginfo] target(s) in 12.4s
Running tests/ping.rs
running 1 test
[e2e] building images: callout, operator, agent (cached, skipping rebuild)
[e2e] sideloading 3 images into k3d cluster fleet-e2e
[e2e] namespace: e2e-7d3a91f4
[e2e] deploying mock-oidc, nats, callout, operator, agent
[e2e] all pods ready in 7.2s
[e2e] NATS: nats://localhost:30422
[e2e] admin token: eyJhbGc...
test operator_can_ping_agent ... ok
test result: ok. 1 passed; 0 failed; 0 ignored; finished in 9.8s
[e2e] tearing down namespace e2e-7d3a91f4
```
Target: green test in <15s end-to-end, with subsequent runs hitting <10s thanks to image cache + cluster reuse.

159
fleet/README.md Normal file
View File

@@ -0,0 +1,159 @@
# Harmony Fleet
IoT / decentralized-edge orchestration for harmony. A fleet stack is:
| Component | Crate | Role |
|---|---|---|
| **Operator** | [`harmony-fleet-operator`](harmony-fleet-operator/) | Watches `Deployment` CRs, writes desired state into NATS JetStream KV, aggregates device state back into CR status. Runtime binary; no `harmony` dep. |
| **Agent** | [`harmony-fleet-agent`](harmony-fleet-agent/) | One per device. Watches the desired-state KV, drives the local runtime (podman today), publishes heartbeats + per-deployment state, answers `device-commands.*` request/reply. |
| **Auth** | [`harmony-fleet-auth`](harmony-fleet-auth/) | Shared NATS credential plumbing — `TomlShared` (dev) and `ZitadelJwt` (prod with auth-callout). |
| **Deploy** | [`harmony-fleet-deploy`](harmony-fleet-deploy/) | The canonical deploy crate. Imports `harmony` and exposes one `*Score` per component (`FleetOperatorScore`, `FleetAgentScore`, `FleetNatsScore`, `FleetServerScore`). Both the production CLI and the e2e harness compose these — see [ADR-023](../docs/adr/023-deploy-architecture.md). |
| **E2E harness** | [`harmony-fleet-e2e`](harmony-fleet-e2e/) | Brings the stack up in a fresh k3d namespace and runs integration tests against it. |
The on-the-wire types both ends agree on (KV bucket names, key formats, command-protocol payloads) live in [`../harmony-reconciler-contracts`](../harmony-reconciler-contracts/).
## Architecture in one line
`FleetOperatorScore`, `FleetAgentScore`, etc. are real Rust types with capability-bound `Topology` parameters. Production deploys, the e2e harness, and any future control-plane tool all compose the **same** Scores; the only thing that changes is the `Topology` instance. **No handrolled YAML or imperative manifest factories anywhere.** Read [ADR-023](../docs/adr/023-deploy-architecture.md) before adding deploy logic.
---
## Quickstart — run the e2e ping test
The fastest path to a green fleet stack on your laptop. Requires `podman`, `kubectl`, and `helm` on `$PATH`; everything else (`k3d`, the NATS chart, all images) is fetched / built on demand.
```bash
HARMONY_FLEET_E2E=1 cargo test -p harmony-fleet-e2e --test ping -- --nocapture
```
What it does, in order:
1. Ensures a `fleet-e2e` k3d cluster exists (creates one if not). NodePort `30423` on the host forwards to NATS inside the cluster.
2. Builds `harmony-fleet-agent` in release mode, packages it into `localhost/harmony-fleet-agent:e2e`, and sideloads the image into the k3d cluster's containerd store.
3. Mints a per-bring-up namespace `e2e-<uuid8>` and prunes any leftover `e2e-*` namespaces from prior runs (NodePort `30423` is cluster-scoped, so a stuck `Terminating` namespace would block the new bring-up — the prune waits up to 90 s for full cleanup before proceeding).
4. Deploys NATS via `FleetNatsScore` (helm chart, JetStream on, static admin/device users, NodePort Service).
5. Waits for NATS to be reachable from the host on `nats://localhost:30423` (admin/e2e-admin).
6. Deploys one `FleetAgentScore { target: Pod }` — runs with `runtime_enabled = false` so it skips podman and only runs the command-server + heartbeat loop.
7. Waits for the agent Deployment to be Ready.
8. The test publishes `device-commands.<device_id>.ping` via `FleetCommandsClient::ping` and asserts the agent replies with `{ device_id, agent_version, uptime_s }`.
Cold first run: ~80 s (release build of the agent dominates). Warm: ~25 s.
### Useful env knobs
| Var | Effect |
|---|---|
| `HARMONY_FLEET_E2E=1` | Required. Without it the test is skipped — keeps `cargo test --workspace` cheap on machines without k3d. |
| `FLEET_E2E_KEEP=1` | Skip namespace teardown on Drop. Lets you `kubectl -n e2e-<…> logs deploy/…` after a failure. The next run prunes it. |
| `RUST_LOG=info` | Or `debug` for the per-message `command dispatch` traces inside `harmony-fleet-agent::command_server`. |
### Connecting to NATS while the stack is up
```bash
# Host-side, via the NodePort
nats://localhost:30423 # user=admin pass=e2e-admin (full access)
nats://localhost:30423 # user=device pass=e2e-device (device permissions)
```
```bash
# In-cluster, from any Pod in the same namespace
nats://fleet-nats.e2e-<uuid8>.svc.cluster.local:4222
```
`FLEET_E2E_KEEP=1` + the harness's stdout line `[e2e] NATS: nats://127.0.0.1:30423 …` is the path most tests will take — leave the harness running, point a NATS client at that URL.
### Inspecting the agent
```bash
# Find your namespace
kubectl get ns -l harmony.io/managed-by=fleet-e2e
# Tail the agent
kubectl -n e2e-<uuid8> logs deploy/fleet-agent-<device-id> -f
# Tail NATS (StatefulSet, not Deployment)
kubectl -n e2e-<uuid8> logs sts/fleet-nats -c nats -f
# Send a ping by hand (requires the `nats` CLI:
# https://github.com/nats-io/natscli/releases)
nats --server nats://localhost:30423 --user admin --password e2e-admin \
request "device-commands.vm-device-00-<uuid8>.ping" ""
```
Or if you don't want to install the nats binary :
```
alias natsbox='podman run --network=host --rm docker.io/natsio/nats-box:latest nats --server nats://localhost:30423 --user admin --password e2e-admin'
```
You should see something like `{"device_id":"vm-device-00-<uuid8>","agent_version":"0.1.0","uptime_s":12}`.
### Cleaning up
The shared `OnceCell` in `harmony-fleet-e2e` lives for the test binary's lifetime, so namespaces survive a `cargo test` exit (the static is never explicitly dropped). The next `cargo test` invocation prunes them. To force a manual cleanup:
```bash
kubectl delete ns -l harmony.io/managed-by=fleet-e2e
# wipe the whole cluster:
k3d cluster delete fleet-e2e
```
---
## Production deploys
`harmony-fleet-deploy` is the binary that puts the fleet stack on a real cluster (OKD, vanilla k8s, anywhere `K8sAnywhereTopology` can reach). It composes `FleetNatsScore` + `FleetOperatorScore` + `FleetAgentScore` against the topology you point it at.
```bash
# Default: K8sAnywhereTopology against whatever KUBECONFIG points at
cargo run -p harmony-fleet-deploy -- \
--namespace fleet-system \
--operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev \
--agent-image hub.nationtech.io/harmony/harmony-fleet-agent:dev \
--agent-device-id fleet-agent-01
# Pick a single component with the harmony_cli filter
cargo run -p harmony-fleet-deploy -- \
--namespace fleet-system \
-- --filter FleetOperatorScore --all
```
`harmony-fleet-deploy` reads its full config from CLI flags + env vars (`FLEET_NAMESPACE`, `FLEET_OPERATOR_IMAGE`, …). The minimal-CLI surface is deliberate — per ADR-023 the long-term answer is a plugin-discovery layer over `harmony-*` binaries; until that lands, deploy crates stay small and use the existing `harmony_cli`.
### Connecting to the operator
The operator runs as a single-replica Deployment in `--namespace` (default `fleet-system`).
```bash
# Tail logs
kubectl -n fleet-system logs deploy/harmony-fleet-operator -f
# Port-forward the embedded web dashboard (web-frontend feature)
kubectl -n fleet-system port-forward deploy/harmony-fleet-operator 18080:18080
# Or run the dashboard standalone with seeded fake data — no NATS, no cluster
cargo run -p harmony-fleet-operator --features web-frontend -- serve-web --mock
# browse http://127.0.0.1:18080
```
---
## Existing manual rehearsal — `examples/fleet_e2e_demo`
`examples/fleet_e2e_demo` brings up a *fuller* stack than the e2e harness — real Zitadel, the auth-callout, libvirt VM agents over SSH — at the cost of a 5-min cold start. It's the manual rehearsal flow; not what you want during the dev loop. See the example's [`RUNBOOK.md`](../examples/fleet_e2e_demo/RUNBOOK.md).
The harness and the rehearsal will converge: the [follow-up PR](#whats-next) lifts `FleetCalloutScore` + a mock-OIDC fixture into `harmony-fleet-deploy`, at which point the harness can run the full production auth path in ~30 s instead of 5 min, and `fleet_e2e_demo` thins down to a caller over the same Scores.
---
## What's next
This branch lands the deploy-architecture cleanup (ADR-023), the per-component Scores, and the ping path. Slated immediately after:
1. **Zitadel + auth callout in `harmony-fleet-deploy`.** New `FleetCalloutScore` (preset over `NatsAuthCalloutScore`) plus an in-cluster mock-OIDC fixture so the e2e harness can exercise the real auth-callout code path without paying Zitadel's 5-min cold-start cost. The harness's `AuthMode::Callout` variant is already on the public API for this.
2. **Operator pod in the e2e harness.** `FleetOperatorScore` is already in the deploy crate; wiring it into the harness gives integration tests against the actual `Deployment` / `Device` reconcile loops.
3. **`Verb::Logs` and `Verb::Exec`** — the next two verbs on the `device-commands.*` protocol. Same harness, same TDD shape as `ping`.
4. **CRD types out of `harmony` core.** `harmony::modules::fleet::operator::crd` is the last fleet-deploy thing still living in `harmony`. The `ReconcileScore` payload coupling is the only blocker.
5. **Smoke-test contract.** ADR-023 principle 4 — every Score blocks on a smoke test before `deploy` returns success. Today the e2e suite plays that role; the trait/companion shape lands once it's been validated in practice.
See [`PLAN_requests_over_nats.md`](PLAN_requests_over_nats.md) for the full TDD-style plan this branch implements.

View File

@@ -0,0 +1,23 @@
[package]
name = "harmony-fleet-agent"
version = "0.1.0"
edition = "2024"
rust-version = "1.85"
[dependencies]
harmony-fleet-auth = { path = "../harmony-fleet-auth" }
harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
harmony = { path = "../../harmony", default-features = false, features = ["podman"] }
async-nats = { workspace = true }
async-trait = { workspace = true }
chrono = { workspace = true }
futures-util = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true }
toml = { workspace = true }
thiserror = { workspace = true }

View File

@@ -0,0 +1,49 @@
# Multi-stage container build for harmony-fleet-agent.
#
# Build context is the workspace root (the agent's Cargo.toml has
# `path = "../../harmony"` deps that only resolve when the whole
# workspace is in scope). Invoke from the repo root:
#
# docker build -f fleet/harmony-fleet-agent/Dockerfile \
# -t hub.nationtech.io/harmony/harmony-fleet-agent:<tag> .
#
# Both stages are pinned to bookworm for a matched glibc — the
# rust:slim image follows Debian's latest stable, and a binary built
# against trixie's glibc 2.40 fails to start on a bookworm runtime
# (`GLIBC_2.39 not found`). This is the same lesson the operator
# Dockerfile encodes; keep the two pinned to the same Debian release.
#
# The e2e harness uses a faster host-build + single-stage path
# (`fleet/harmony-fleet-e2e/src/images.rs`); this Dockerfile is the
# canonical recipe for production registries.
FROM docker.io/rust:1.94-slim-bookworm AS builder
RUN apt-get update && apt-get install -y --no-install-recommends \
pkg-config \
ca-certificates \
libssl-dev \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY . .
RUN cargo build --release --locked -p harmony-fleet-agent
FROM docker.io/library/debian:bookworm-slim
# ca-certificates: outbound TLS to NATS over wss:// when the agent is
# configured against a TLS-terminated NATS endpoint. kube-rs is not
# used at runtime on the agent; async-nats uses rustls.
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/target/release/harmony-fleet-agent /usr/local/bin/harmony-fleet-agent
# Non-root runtime. 65532 is the `nonroot` UID convention from
# distroless. Pairs with `securityContext.runAsNonRoot: true` in
# whatever Pod spec the harness or production helm chart applies.
USER 65532:65532
ENTRYPOINT ["/usr/local/bin/harmony-fleet-agent"]

View File

@@ -0,0 +1,153 @@
//! Agent-side request/reply command server.
//!
//! Subscribes to `device-commands.<device_id>.>` and dispatches one
//! handler per verb. Single-shot replies for v1; streaming verbs
//! (logs, exec follow-up) will reuse this loop and write multiple
//! frames to the inbox, terminating with the `X-Harmony-Final`
//! header.
//!
//! Runs alongside the KV reconciler in the agent's top-level
//! `tokio::select!`. Independent of the podman runtime: when
//! `[agent] runtime_enabled = false`, the reconciler is skipped but
//! the command server still runs (ping is useful for "is this device
//! online" health-checks regardless).
use std::sync::Arc;
use std::time::Instant;
use async_nats::Client;
use async_nats::Subject;
use futures_util::StreamExt;
use harmony_reconciler_contracts::{
HDR_REQUEST_ID, Id, PingReply, Verb, device_command_subscription,
};
use serde::Serialize;
use thiserror::Error;
pub struct CommandServer {
device_id: Id,
client: Client,
agent_version: &'static str,
started_at: Instant,
}
impl CommandServer {
pub fn new(device_id: Id, client: Client) -> Self {
Self {
device_id,
client,
agent_version: env!("CARGO_PKG_VERSION"),
started_at: Instant::now(),
}
}
pub async fn run(self: Arc<Self>) -> Result<(), CommandServerError> {
let subject = device_command_subscription(&self.device_id.to_string());
tracing::info!(subject = %subject, "command server subscribing");
let mut sub = self.client.subscribe(subject.clone()).await.map_err(|e| {
CommandServerError::Subscribe {
subject: subject.clone(),
source: e,
}
})?;
while let Some(msg) = sub.next().await {
let me = self.clone();
tokio::spawn(async move {
match me.dispatch(msg).await {
Ok(()) => tracing::debug!("command handled"),
Err(e) => {
tracing::error!(command_error = %e, "failed to handle command")
}
};
});
}
tracing::warn!("command server subscription ended");
Ok(())
}
async fn dispatch(&self, msg: async_nats::Message) -> Result<(), CommandError> {
// Subject token after the device id is the verb. Pattern is
// `device-commands.<id>.<verb>` — we own both ends so this
// unwrap shape is safe under normal routing.
// FIXME do not unwrap here, we cannot affoard to crash an entire fleet because a verb is
// added or removed or format changed. Log an error and move on maybe we could list supported verbs.
let verb_token = if let Some(verb) = msg.subject.rsplit('.').next() {
verb
} else {
return Err(CommandError::InvalidFormat(msg.subject.to_string()));
};
let request_id = msg
.headers
.as_ref()
.and_then(|h| h.get(HDR_REQUEST_ID))
.map(|v| v.as_str().to_string());
tracing::debug!(
subject = %msg.subject,
verb = %verb_token,
request_id = ?request_id,
"command dispatch",
);
let reply_to = match msg.reply.clone() {
Some(inbox) => inbox,
None => {
tracing::warn!(verb = %verb_token, "command without reply inbox; ignoring");
return Err(CommandError::MissingReplyInbox);
}
};
if verb_token == Verb::Ping.as_subject_token() {
self.reply_ping(reply_to).await?;
Ok(())
} else {
tracing::warn!(verb = %verb_token, "unknown command verb");
Err(CommandError::UnknownVerb(verb_token.to_string()))
}
}
async fn reply_ping(&self, reply_to: Subject) -> Result<(), CommandError> {
let reply = PingReply {
device_id: self.device_id.clone(),
agent_version: self.agent_version.to_string(),
uptime_s: self.started_at.elapsed().as_secs(),
};
let payload = serde_json::to_vec(&reply).map_err(CommandError::SerializeReply)?;
self.client
.publish(reply_to, payload.into())
.await
.map_err(|e| CommandError::PublishReply(e.to_string()))
}
}
/// Failure modes the per-message dispatcher can report. Stays
/// `pub(crate)` for now — the run loop logs and continues on each
/// variant rather than surfacing them to a caller.
#[derive(Debug, Error, Serialize)]
pub(crate) enum CommandError {
#[error("invalid command subject: {0}")]
InvalidFormat(String),
#[error("unknown verb: {0}")]
UnknownVerb(String),
#[error("command message had no reply inbox")]
MissingReplyInbox,
#[error("serializing reply: {0}")]
// `serde_json::Error` is not `Serialize`, so flatten on the
// serialize-out path. The original error stays in `Display`.
#[serde(skip)]
SerializeReply(serde_json::Error),
#[error("publishing reply: {0}")]
PublishReply(String),
}
/// Surface returned by [`CommandServer::run`]. The only currently
/// failing operation is the initial subscribe; per-message errors
/// stay inside the loop and are logged.
#[derive(Debug, Error)]
pub enum CommandServerError {
#[error("subscribing to {subject}")]
Subscribe {
subject: String,
#[source]
source: async_nats::SubscribeError,
},
}

View File

@@ -0,0 +1,9 @@
//! Agent-side config loading.
//!
//! The schema (`AgentConfig`, `AgentSection`, `NatsSection`,
//! `CredentialsSection`) lives in `harmony-fleet-auth` so it is shared
//! with the deploy crate by type, not by string interpolation. This
//! file re-exports those types so existing `crate::config::*` call
//! sites in the agent binary keep working.
pub use harmony_fleet_auth::{AgentConfig, load_config};

View File

@@ -0,0 +1,163 @@
//! Agent-side publish surface.
//!
//! Thin wrapper around three KV buckets: [`BUCKET_DEVICE_INFO`],
//! [`BUCKET_DEVICE_STATE`], [`BUCKET_DEVICE_HEARTBEAT`].
//!
//! Failure mode: log and swallow. The KV is the source of truth —
//! a dropped put gets corrected on the next reconcile transition
//! or operator watch reconnection.
use async_nats::jetstream::{self, kv};
use harmony_reconciler_contracts::{
BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE, DeploymentName,
DeploymentState, DeviceInfo, HeartbeatPayload, Id, InventorySnapshot, device_heartbeat_key,
device_info_key, device_state_key,
};
use std::collections::BTreeMap;
pub struct FleetPublisher {
device_id: Id,
/// Raw NATS client kept around so we can publish on direct
/// (non-JetStream) subjects like `device-state.<device_id>` for
/// live observers — the KV writes are storage-and-watch, the
/// direct subject is fan-out.
client: async_nats::Client,
info_bucket: kv::Store,
state_bucket: kv::Store,
heartbeat_bucket: kv::Store,
}
impl FleetPublisher {
/// Open every bucket the agent needs, creating those that don't
/// exist yet. Idempotent with operator-side creation.
pub async fn connect(client: async_nats::Client, device_id: Id) -> anyhow::Result<Self> {
let jetstream = jetstream::new(client.clone());
let info_bucket = jetstream
.create_key_value(kv::Config {
bucket: BUCKET_DEVICE_INFO.to_string(),
// If this is as I think, it would be useful to keep a history of the last 10 device
// info, with a timestamp
history: 1,
..Default::default()
})
.await?;
let state_bucket = jetstream
.create_key_value(kv::Config {
bucket: BUCKET_DEVICE_STATE.to_string(),
// If this is as I think, it would be useful to keep a history of the last 10 states
// a device had, with a timestamp
history: 1,
..Default::default()
})
.await?;
let heartbeat_bucket = jetstream
.create_key_value(kv::Config {
bucket: BUCKET_DEVICE_HEARTBEAT.to_string(),
history: 1,
..Default::default()
})
.await?;
Ok(Self {
device_id,
client,
info_bucket,
state_bucket,
heartbeat_bucket,
})
}
/// Publish the agent's static-ish facts. Called at startup and
/// on label change.
pub async fn publish_device_info(
&self,
labels: BTreeMap<String, String>,
inventory: Option<InventorySnapshot>,
) {
let info = DeviceInfo {
device_id: self.device_id.clone(),
labels,
inventory,
updated_at: chrono::Utc::now(),
};
let key = device_info_key(&self.device_id.to_string());
match serde_json::to_vec(&info) {
Ok(payload) => {
if let Err(e) = self.info_bucket.put(&key, payload.into()).await {
tracing::warn!(%key, error = %e, "publish_device_info: kv put failed");
}
}
Err(e) => tracing::warn!(error = %e, "publish_device_info: serialize failed"),
}
}
/// Tiny liveness ping. Called every 30s.
pub async fn publish_heartbeat(&self) {
let hb = HeartbeatPayload {
device_id: self.device_id.clone(),
at: chrono::Utc::now(),
};
let key = device_heartbeat_key(&self.device_id.to_string());
match serde_json::to_vec(&hb) {
Ok(payload) => {
if let Err(e) = self.heartbeat_bucket.put(&key, payload.into()).await {
tracing::debug!(%key, error = %e, "publish_heartbeat: kv put failed");
}
}
Err(e) => tracing::warn!(error = %e, "publish_heartbeat: serialize failed"),
}
}
/// Persist the authoritative current phase for a `(device,
/// deployment)` pair. The operator's watch on the `device-state`
/// bucket picks up this put and updates CR status counters.
/// Also fans out the same payload on `device-state.<device_id>`
/// for live observers that don't want to consume the KV stream.
pub async fn write_deployment_state(&self, state: &DeploymentState) {
let key = device_state_key(&self.device_id.to_string(), &state.deployment);
match serde_json::to_vec(state) {
Ok(payload) => {
if let Err(e) = self.state_bucket.put(&key, payload.clone().into()).await {
tracing::warn!(%key, error = %e, "write_deployment_state: kv put failed");
}
self.publish_direct_state(payload).await;
}
Err(e) => tracing::warn!(error = %e, "write_deployment_state: serialize failed"),
}
}
/// Emit a tiny presence pulse on `device-state.<device_id>` so live
/// observers (admin tooling, dashboards) see the device is alive
/// without subscribing to JetStream. Called from the heartbeat
/// loop alongside the KV heartbeat write — same cadence, two
/// transports.
pub async fn publish_state_pulse(&self) {
let pulse = serde_json::json!({
"device_id": self.device_id.to_string(),
"kind": "heartbeat",
"at": chrono::Utc::now(),
});
match serde_json::to_vec(&pulse) {
Ok(payload) => self.publish_direct_state(payload).await,
Err(e) => tracing::warn!(error = %e, "publish_state_pulse: serialize failed"),
}
}
async fn publish_direct_state(&self, payload: Vec<u8>) {
let subject = format!("device-state.{}", self.device_id);
if let Err(e) = self.client.publish(subject.clone(), payload.into()).await {
tracing::debug!(%subject, error = %e, "publish_direct_state: publish failed");
}
}
/// Delete the authoritative current-phase entry, e.g. when the
/// Deployment CR is removed and the agent has torn down the
/// container.
pub async fn delete_deployment_state(&self, deployment: &DeploymentName) {
let key = device_state_key(&self.device_id.to_string(), deployment);
if let Err(e) = self.state_bucket.delete(&key).await {
tracing::debug!(%key, error = %e, "delete_deployment_state: kv delete failed");
}
}
}

View File

@@ -0,0 +1,320 @@
mod command_server;
mod config;
mod fleet_publisher;
mod reconciler;
use std::sync::Arc;
use std::time::Duration;
use anyhow::{Context, Error, Result};
use clap::Parser;
use config::AgentConfig;
use harmony_fleet_auth::{
CredentialSource, connect_options_with_credentials, credential_source_from_config,
};
// Type alias to keep function signatures readable. The auth callback
// captures one `Arc<CredentialSource>` and clones it per invocation.
type Creds = Arc<CredentialSource>;
use futures_util::StreamExt;
use harmony_reconciler_contracts::{
BUCKET_DESIRED_STATE, Id, InventorySnapshot, desired_state_watch_filter,
};
use harmony::inventory::Inventory;
use harmony::modules::podman::PodmanTopology;
use harmony::topology::Topology;
use crate::command_server::CommandServer;
use crate::fleet_publisher::FleetPublisher;
use crate::reconciler::Reconciler;
/// ROADMAP §5.6 — agent polls podman every 30s as ground truth; KV watch
/// events are accelerators.
const RECONCILE_INTERVAL: Duration = Duration::from_secs(30);
#[derive(Parser)]
#[command(name = "fleet-agent-v0", about = "IoT agent for Raspberry Pi devices")]
struct Cli {
#[arg(
long,
env = "FLEET_AGENT_CONFIG",
// FIXME this should be a constant from a config, not just hardcoded here as we need the
// installation scripts and other bits to know about this file location.
default_value = "/etc/fleet-agent/config.toml"
)]
config: std::path::PathBuf,
}
async fn connect_nats(cfg: &AgentConfig, creds: Creds) -> Result<async_nats::Client> {
let urls = &cfg.nats.urls;
tracing::info!(device_id = %cfg.agent.device_id, "connecting to NATS {urls:?}");
// The auth callback is invoked on every (re)connect, so a fresh
// Zitadel access token is minted automatically when the cached one
// is near-expiry — that's how we hold the "never lose connectivity"
// guarantee even across token rollovers and NATS pod restarts.
let client = connect_options_with_credentials(creds)
.ping_interval(Duration::from_secs(10))
// Surface async-nats's connection lifecycle in our logs. This
// is load-bearing for ops: a device that quietly disconnects
// is exactly the failure mode we promise won't happen, and
// operators need to see the reconnect attempts to debug.
.event_callback(|event| async move {
use async_nats::Event;
match event {
Event::Connected => tracing::info!("NATS connected"),
Event::Disconnected => tracing::warn!("NATS disconnected, will reconnect"),
Event::LameDuckMode => tracing::warn!("NATS server entered lame-duck mode"),
Event::SlowConsumer(sid) => {
tracing::warn!(sid = %sid, "NATS slow consumer")
}
Event::ServerError(e) => tracing::error!(error = %e, "NATS server error"),
Event::ClientError(e) => tracing::error!(error = %e, "NATS client error"),
Event::Closed => tracing::error!("NATS connection closed"),
other => tracing::debug!(?other, "NATS event"),
}
})
.connect(cfg.nats.urls.as_slice())
.await?;
tracing::info!(urls = ?cfg.nats.urls, "connected to NATS");
Ok(client)
}
async fn watch_desired_state(
client: async_nats::Client,
device_id: Id,
reconciler: Arc<Reconciler>,
) -> Result<()> {
let jetstream = async_nats::jetstream::new(client);
let bucket = jetstream
.create_key_value(async_nats::jetstream::kv::Config {
bucket: BUCKET_DESIRED_STATE.to_string(),
..Default::default()
})
.await?;
let key_filter = desired_state_watch_filter(&device_id.to_string());
tracing::info!(filter = %key_filter, "watching KV keys");
let mut watch = bucket.watch(&key_filter).await?;
while let Some(result) = watch.next().await {
let entry = match result {
Ok(e) => e,
Err(e) => {
tracing::warn!(error = %e, "watch error");
continue;
}
};
tracing::debug!(key = %entry.key, "bucket watch new value {entry:?}");
match entry.operation {
async_nats::jetstream::kv::Operation::Put => {
if let Err(e) = reconciler.apply(&entry.key, &entry.value).await {
tracing::warn!(key = %entry.key, error = %e, "apply failed");
}
}
async_nats::jetstream::kv::Operation::Delete
| async_nats::jetstream::kv::Operation::Purge => {
if let Err(e) = reconciler.remove(&entry.key).await {
tracing::warn!(key = %entry.key, error = %e, "remove failed");
}
}
}
}
Ok(())
}
/// Tiny liveness-only loop: push a `HeartbeatPayload` into the
/// `device-heartbeat` bucket every N seconds, and fan out the same
/// pulse on `device-state.<device_id>` for live (non-JetStream)
/// observers. Stays separate from per-deployment state writes so
/// routine pings don't churn the device-state bucket or its watch
/// subscribers — but the direct-subject pulse uses ordinary core
/// NATS pub/sub and doesn't accumulate state anywhere.
async fn publish_heartbeat_loop(fleet: Arc<FleetPublisher>) {
let mut interval = tokio::time::interval(Duration::from_secs(30));
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
interval.tick().await;
fleet.publish_heartbeat().await;
fleet.publish_state_pulse().await;
}
}
/// Build a one-shot inventory snapshot at agent startup. Cheap,
/// published alongside every heartbeat until the agent restarts.
/// NOTE: I don't see why this is *published* with every heartbeat, it feels like noise.
/// It shoulf be published on heartbeat only when something changed. It is ok to *check* the state
/// on heartbeat but not always send it over the wire
fn local_inventory(inventory: &Inventory) -> InventorySnapshot {
InventorySnapshot {
hostname: inventory.location.name.clone(),
arch: std::env::consts::ARCH.to_string(),
os: std::env::consts::OS.to_string(),
kernel: std::fs::read_to_string("/proc/sys/kernel/osrelease")
.map(|s| s.trim().to_string())
.unwrap_or_default(),
cpu_cores: std::thread::available_parallelism()
.map(|n| n.get() as u32)
.unwrap_or(0),
memory_mb: sys_memory_total_mb().unwrap_or(0),
agent_version: env!("CARGO_PKG_VERSION").to_string(),
}
}
/// Read total RAM from /proc/meminfo. Returns None on non-Linux or
/// if /proc isn't mounted. Small, avoids a sys-info crate dep for a
/// single field.
fn sys_memory_total_mb() -> Option<u64> {
let s = std::fs::read_to_string("/proc/meminfo").ok()?;
for line in s.lines() {
if let Some(rest) = line.strip_prefix("MemTotal:") {
let kb: u64 = rest.split_whitespace().next()?.parse().ok()?;
return Some(kb / 1024);
}
}
None
}
#[tokio::main]
async fn main() -> Result<()> {
// Default to `info` so the agent produces useful output without
// requiring `RUST_LOG` to be set anywhere — the systemd unit
// installed by `FleetDeviceSetupScore` does set it, but a
// hand-launched binary or a user who's overridden the unit
// shouldn't have to know that. `RUST_LOG` still overrides
// when set (e.g. `RUST_LOG=debug` for troubleshooting).
let filter = tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
tracing_subscriber::fmt().with_env_filter(filter).init();
let cli = Cli::parse();
let cfg = config::load_config(&cli.config)?;
tracing::info!(
device_id = %cfg.agent.device_id,
runtime_enabled = cfg.agent.runtime_enabled,
"fleet-agent-v0 starting",
);
let device_id = cfg.agent.device_id.clone();
// Podman is the agent's runtime backend for deploying workloads.
// When `runtime_enabled = false`, skip the socket entirely so the
// agent can run on hosts that don't ship podman (the in-cluster
// e2e harness deploys the agent as a Pod on containerd-only k3d
// nodes). The command server + heartbeat still run; only the
// reconciler depends on the topology.
let topology = if cfg.agent.runtime_enabled {
let t = Arc::new(
PodmanTopology::from_default_socket()
.map_err(|e| anyhow::anyhow!("failed to open podman socket: {e}"))?,
);
t.ensure_ready().await.context("podman socket not ready")?;
tracing::info!("podman socket ready");
Some(t)
} else {
tracing::warn!(
"runtime_enabled=false; skipping podman + reconciler. \
Desired-state KV deliveries will be logged and dropped."
);
None
};
let inventory = Arc::new(Inventory::from_localhost());
tracing::info!(hostname = %inventory.location.name, "inventory loaded");
let inventory_snapshot = local_inventory(&inventory);
let creds = credential_source_from_config(&cfg.credentials)
.context("building NATS credential source")?;
let client = connect_nats(&cfg, creds).await.map_err(|e| {
let msg = format!("Nats connection FAILED : {e}");
tracing::error!(msg);
Error::msg(msg)
})?;
// Publish surface. Opens the three KV buckets (idempotent
// creates). Must be live before the reconciler starts so
// writes on the first desired-state KV watch land on the wire.
let fleet = Arc::new(
FleetPublisher::connect(client.clone(), device_id.clone())
.await
.context("fleet publisher connect")?,
);
tracing::info!("fleet publisher ready");
// Publish DeviceInfo once at startup. Merge the config-declared
// labels with an always-on `device-id=<id>` default so every
// device is targetable by id even without explicit labels.
// Config labels win on key conflicts — operators can override
// `device-id` if they really want to (unusual but legal).
let mut startup_labels = cfg.labels.clone();
startup_labels
.entry("device-id".to_string())
.or_insert_with(|| device_id.to_string());
fleet
.publish_device_info(startup_labels, Some(inventory_snapshot.clone()))
.await;
// Reconciler exists only when a podman topology is available.
// Without it, the desired-state watch + periodic reconcile arms
// are replaced by pending-forever futures so `select!` only sees
// heartbeat + command server.
let reconciler: Option<Arc<Reconciler>> = topology.as_ref().map(|t| {
Arc::new(Reconciler::new(
device_id.clone(),
t.clone(),
inventory.clone(),
Some(fleet.clone()),
))
});
let command_server = Arc::new(CommandServer::new(device_id.clone(), client.clone()));
let ctrlc = async {
tokio::signal::ctrl_c().await.ok();
tracing::info!("received SIGINT, shutting down");
};
let sigterm = async {
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())?
.recv()
.await;
tracing::info!("received SIGTERM, shutting down");
Ok::<(), anyhow::Error>(())
};
let _ = inventory_snapshot; // consumed by the DeviceInfo publish above
let watch: std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send>> =
match reconciler.as_ref() {
Some(r) => Box::pin(watch_desired_state(
client.clone(),
device_id.clone(),
r.clone(),
)),
None => Box::pin(async {
std::future::pending::<()>().await;
Ok(())
}),
};
let reconcile: std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send>> =
match reconciler.as_ref() {
Some(r) => Box::pin(r.clone().run_periodic(RECONCILE_INTERVAL)),
None => Box::pin(std::future::pending::<()>()),
};
let heartbeat = publish_heartbeat_loop(fleet);
let commands = command_server.run();
tokio::select! {
// Waiting on ctrlc in a select will automatically terminate other branches when
// ctrlc happens.
_ = ctrlc => {},
r = sigterm => { r?; }
r = watch => { r?; }
_ = reconcile => {}
_ = heartbeat => {}
r = commands => { r?; }
}
Ok(())
}

View File

@@ -0,0 +1,349 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Result;
use chrono::Utc;
use harmony_reconciler_contracts::{DeploymentName, DeploymentState, Id, Phase};
use tokio::sync::Mutex;
use harmony::inventory::Inventory;
use harmony::modules::podman::{PodmanTopology, PodmanV0Score, ReconcileScore};
use harmony::score::Score;
use crate::fleet_publisher::FleetPublisher;
/// Cache key → last-seen state, populated by `apply` and consulted by the
/// 30-second periodic tick and the delete path.
struct CachedEntry {
/// Serialized score JSON. Used for string-compare idempotency per
/// ROADMAP §5.5 — cheaper and more deterministic than a hash.
serialized: String,
/// Parsed score. Cached so the periodic reconcile tick and delete
/// handlers don't have to re-parse the JSON.
score: PodmanV0Score,
}
pub struct Reconciler {
device_id: Id,
topology: Arc<PodmanTopology>,
inventory: Arc<Inventory>,
/// Keyed by NATS KV key (`<device>.<deployment>`). A single entry per
/// KV key — in v0 there is no fan-out from one key to many scores.
state: Mutex<HashMap<String, CachedEntry>>,
/// Current phase per deployment, used to decide whether a new
/// write to the `device-state` KV is needed.
///
/// NOTE : this feels dangerous, conflict on deployment name could be a problem
/// We must explore this and clarify it in the design and decide if it is a constraint
deployments: Mutex<HashMap<DeploymentName, Phase>>,
/// Publish surface. Optional so unit tests without a live NATS
/// client still work; always populated in the real agent runtime.
fleet: Option<Arc<FleetPublisher>>,
}
impl Reconciler {
pub fn new(
device_id: Id,
topology: Arc<PodmanTopology>,
inventory: Arc<Inventory>,
fleet: Option<Arc<FleetPublisher>>,
) -> Self {
Self {
device_id,
topology,
inventory,
state: Mutex::new(HashMap::new()),
deployments: Mutex::new(HashMap::new()),
fleet,
}
}
/// Record a new phase for a deployment and, if it changed, write
/// the updated [`DeploymentState`] to the KV. Same-phase
/// re-confirmations are no-ops so the periodic reconcile tick
/// doesn't churn the bucket.
async fn apply_phase(
&self,
deployment: &DeploymentName,
phase: Phase,
last_error: Option<String>,
) {
{
let mut phases = self.deployments.lock().await;
// performance nitpick : we don't need a write lock here, we could check before acquiring the write
// lock
if phases.get(deployment).copied() == Some(phase) {
return;
}
phases.insert(deployment.clone(), phase);
}
if let Some(publisher) = &self.fleet {
let state = DeploymentState {
device_id: self.device_id.clone(),
deployment: deployment.clone(),
phase,
last_event_at: Utc::now(),
last_error,
};
publisher.write_deployment_state(&state).await;
}
}
/// Clear the in-memory phase for a deployment and delete its KV
/// entry. Idempotent: a delete for a never-applied deployment is
/// a no-op in memory and a harmless tombstone write on the wire.
async fn drop_phase(&self, deployment: &DeploymentName) {
let was_known = {
let mut phases = self.deployments.lock().await;
phases.remove(deployment).is_some()
};
if !was_known {
return;
}
if let Some(publisher) = &self.fleet {
publisher.delete_deployment_state(deployment).await;
}
}
/// Handle a Put event (new or updated score on NATS KV). No-ops if the
/// serialized score is byte-identical to the last-seen value for this
/// key.
pub async fn apply(&self, key: &str, value: &[u8]) -> Result<()> {
let deployment = deployment_from_key(key);
let incoming = match serde_json::from_slice::<ReconcileScore>(value) {
Ok(ReconcileScore::PodmanV0(s)) => s,
Err(e) => {
tracing::warn!(key, error = %e, "failed to deserialize score");
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Failed, Some(format!("bad payload: {e}")))
.await;
}
return Ok(());
}
};
let serialized = String::from_utf8_lossy(value).into_owned();
{
let state = self.state.lock().await;
if let Some(existing) = state.get(key) {
if existing.serialized == serialized {
tracing::debug!(key, "score unchanged — noop");
return Ok(());
}
}
}
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Pending, None).await;
}
match self.run_score(key, &incoming).await {
Ok(()) => {
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Running, None).await;
}
}
Err(e) => {
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Failed, Some(short(&e.to_string())))
.await;
}
return Err(e);
}
}
let mut state = self.state.lock().await;
state.insert(
key.to_string(),
CachedEntry {
serialized,
score: incoming,
},
);
Ok(())
}
/// Handle a Delete/Purge event. Stops and removes every container
/// referenced by the last cached score for this key. Idempotent: if we
/// never saw a Put for this key (agent restart after delete), logs and
/// returns ok.
pub async fn remove(&self, key: &str) -> Result<()> {
let deployment = deployment_from_key(key);
let mut state = self.state.lock().await;
let Some(entry) = state.remove(key) else {
tracing::info!(key, "delete for unknown key — nothing to remove");
if let Some(name) = &deployment {
self.drop_phase(name).await;
}
return Ok(());
};
drop(state);
use harmony::topology::ContainerRuntime;
for service in &entry.score.services {
if let Err(e) = self.topology.remove_service(&service.name).await {
tracing::warn!(
key,
service = %service.name,
error = %e,
"failed to remove container"
);
} else {
tracing::info!(key, service = %service.name, "removed container");
}
}
if let Some(name) = &deployment {
self.drop_phase(name).await;
}
Ok(())
}
/// Periodic ground-truth reconcile. ROADMAP §5.6 — "polling instead of
/// event-driven PLEG. Agent polls podman every 30s as ground truth;
/// KV watch events are accelerators." Re-runs each cached score against
/// podman-api; the underlying `ensure_service_running` is idempotent
/// so a converged state produces no log noise.
pub async fn tick(&self) -> Result<()> {
let snapshot: Vec<(String, PodmanV0Score)> = {
let state = self.state.lock().await;
state
.iter()
.map(|(k, v)| (k.clone(), v.score.clone()))
.collect()
};
for (key, score) in snapshot {
let deployment = deployment_from_key(&key);
match self.run_score(&key, &score).await {
Ok(()) => {
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Running, None).await;
}
}
Err(e) => {
tracing::warn!(key, error = %e, "periodic reconcile failed");
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Failed, Some(short(&e.to_string())))
.await;
}
}
}
}
Ok(())
}
pub async fn run_periodic(self: Arc<Self>, interval: Duration) {
let mut ticker = tokio::time::interval(interval);
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
ticker.tick().await;
if let Err(e) = self.tick().await {
tracing::warn!(error = %e, "reconcile tick error");
}
}
}
async fn run_score(&self, key: &str, score: &PodmanV0Score) -> Result<()> {
let interpret = Score::<PodmanTopology>::create_interpret(score);
let outcome = interpret
.execute(&self.inventory, &self.topology)
.await
.map_err(|e| anyhow::anyhow!("PodmanV0Score interpret failed for {key}: {e}"))?;
tracing::info!(key, outcome = ?outcome, "reconciled");
Ok(())
}
}
/// Extract the deployment name from a NATS KV key of the form
/// `<device>.<deployment>`.
fn deployment_from_key(key: &str) -> Option<DeploymentName> {
let (_, rest) = key.split_once('.')?;
DeploymentName::try_new(rest).ok()
}
/// Truncate a long error message so the DeploymentState payload stays
/// comfortably below NATS JetStream's per-message limit.
fn short(s: &str) -> String {
const MAX: usize = 512;
if s.len() <= MAX {
s.to_string()
} else {
let mut cut = s[..MAX].to_string();
cut.push('…');
cut
}
}
#[cfg(test)]
mod tests {
//! Focused tests for transition detection. Drive `apply_phase` /
//! `drop_phase` directly with an inert topology (no real podman
//! socket) and a `None` FleetPublisher.
use super::*;
use harmony::inventory::Inventory;
use harmony::modules::podman::PodmanTopology;
use std::path::PathBuf;
fn reconciler() -> Reconciler {
let topology = Arc::new(
PodmanTopology::from_unix_socket(PathBuf::from("/nonexistent/for-tests")).unwrap(),
);
let inventory = Arc::new(Inventory::empty());
Reconciler::new(
Id::from("test-device".to_string()),
topology,
inventory,
None,
)
}
fn dn(s: &str) -> DeploymentName {
DeploymentName::try_new(s).expect("valid test name")
}
#[tokio::test]
async fn apply_phase_records_new_phase() {
let r = reconciler();
r.apply_phase(&dn("hello"), Phase::Running, None).await;
let phases = r.deployments.lock().await;
assert_eq!(phases.get(&dn("hello")), Some(&Phase::Running));
}
#[tokio::test]
async fn apply_phase_idempotent_for_same_phase() {
let r = reconciler();
r.apply_phase(&dn("hello"), Phase::Running, None).await;
r.apply_phase(&dn("hello"), Phase::Running, None).await;
let phases = r.deployments.lock().await;
assert_eq!(phases.len(), 1);
}
#[tokio::test]
async fn apply_phase_transitions_update_phase() {
let r = reconciler();
r.apply_phase(&dn("hello"), Phase::Pending, None).await;
r.apply_phase(&dn("hello"), Phase::Running, None).await;
r.apply_phase(&dn("hello"), Phase::Failed, Some("oom".to_string()))
.await;
let phases = r.deployments.lock().await;
assert_eq!(phases.get(&dn("hello")), Some(&Phase::Failed));
}
#[tokio::test]
async fn drop_phase_clears_known_deployment() {
let r = reconciler();
r.apply_phase(&dn("hello"), Phase::Running, None).await;
r.drop_phase(&dn("hello")).await;
let phases = r.deployments.lock().await;
assert!(!phases.contains_key(&dn("hello")));
}
#[tokio::test]
async fn drop_phase_on_unknown_deployment_is_noop() {
let r = reconciler();
r.drop_phase(&dn("never-existed")).await;
let phases = r.deployments.lock().await;
assert!(phases.is_empty());
}
}

View File

@@ -0,0 +1,26 @@
[package]
name = "harmony-fleet-auth"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
description = "Shared NATS credential plumbing for the fleet agent + operator (Zitadel JWT-bearer + dev-only username/password)"
[lib]
path = "src/lib.rs"
[dependencies]
harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
async-nats = { workspace = true }
anyhow = { workspace = true }
chrono = { workspace = true }
jsonwebtoken = "9"
reqwest = { workspace = true }
serde = { workspace = true, features = ["derive"] }
tokio = { workspace = true, features = ["sync"] }
toml = { workspace = true }
tracing = { workspace = true }
serde_json = { workspace = true }
[dev-dependencies]
tokio = { workspace = true, features = ["macros", "rt"] }

View File

@@ -0,0 +1,222 @@
//! Shared agent-config schema.
//!
//! `harmony-fleet-agent` reads this from `/etc/fleet-agent/config.toml`
//! at startup; `harmony-fleet-deploy` constructs the same shape when it
//! emits a `ConfigMap` mounted into the agent's container. Keeping the
//! schema in one place — typed — means the deploy crate cannot drift
//! away from what the agent can parse without a compile error.
use crate::CredentialsSection;
use harmony_reconciler_contracts::Id;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::path::Path;
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct AgentConfig {
pub agent: AgentSection,
pub nats: NatsSection,
pub credentials: CredentialsSection,
/// Routing labels published verbatim in every DeviceInfo
/// heartbeat. The operator reflects them into
/// `Device.metadata.labels` so Deployment `spec.targetSelector`
/// resolves against them (K8s-Node-analogue flow). Empty by
/// default — a device with no labels is targetable only by its
/// auto-published `device-id` label.
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub labels: BTreeMap<String, String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct AgentSection {
/// Cross-boundary device identity. TOML deserializes the field
/// as a bare string thanks to `#[serde(transparent)]` on `Id`.
pub device_id: Id,
/// When false, skip the podman socket + reconciler loop and run
/// only the heartbeat + command-server arms. Lets the agent run
/// on hosts without podman (e.g. the in-cluster e2e harness on
/// containerd-only k3d nodes). Default true so existing RPi
/// configs are unaffected.
#[serde(default = "default_runtime_enabled")]
pub runtime_enabled: bool,
}
fn default_runtime_enabled() -> bool {
true
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct NatsSection {
pub urls: Vec<String>,
}
pub fn load_config(path: &Path) -> anyhow::Result<AgentConfig> {
let content = std::fs::read_to_string(path)?;
let config: AgentConfig = toml::from_str(&content)?;
Ok(config)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_toml_shared_credentials() {
let raw = r#"
[agent]
device_id = "pi-42"
runtime_enabled = true
[credentials]
type = "toml-shared"
nats_user = "u"
nats_pass = "p"
[nats]
urls = ["nats://nats:4222"]
[labels]
group = "site-a"
arch = "aarch64"
"#;
let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
assert_eq!(cfg.labels.get("group"), Some(&"site-a".to_string()));
match &cfg.credentials {
CredentialsSection::TomlShared {
nats_user,
nats_pass,
} => {
assert_eq!(nats_user, "u");
assert_eq!(nats_pass, "p");
}
_ => panic!("expected TomlShared"),
}
}
#[test]
fn labels_section_optional_defaults_empty() {
let raw = r#"
[agent]
device_id = "pi-42"
[credentials]
type = "toml-shared"
nats_user = "u"
nats_pass = "p"
[nats]
urls = ["nats://nats:4222"]
"#;
let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
assert!(cfg.labels.is_empty());
}
#[test]
fn runtime_enabled_defaults_to_true_when_omitted() {
// Existing RPi configs predate the runtime_enabled flag.
// Omitting it must keep podman+reconciler turned on — anything
// else silently downgrades a production agent.
let raw = r#"
[agent]
device_id = "pi-42"
[credentials]
type = "toml-shared"
nats_user = "u"
nats_pass = "p"
[nats]
urls = ["nats://nats:4222"]
"#;
let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
assert!(cfg.agent.runtime_enabled);
}
#[test]
fn runtime_enabled_false_is_honored() {
let raw = r#"
[agent]
device_id = "pi-42"
runtime_enabled = false
[credentials]
type = "toml-shared"
nats_user = "u"
nats_pass = "p"
[nats]
urls = ["nats://nats:4222"]
"#;
let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
assert!(!cfg.agent.runtime_enabled);
}
#[test]
fn round_trips_via_toml_serialize_with_labels() {
// The deploy crate emits this same schema to a `ConfigMap`
// via `toml::to_string`. The round-trip is the contract — a
// deploy that emits something the agent can't parse is a
// compile error today, but this test guards the serde
// attributes that make the round-trip behave (skip-empty,
// tagged credentials, etc.).
let original = AgentConfig {
agent: AgentSection {
device_id: Id::from("vm-device-01"),
runtime_enabled: false,
},
nats: NatsSection {
urls: vec!["nats://fleet-nats.e2e-x.svc.cluster.local:4222".to_string()],
},
credentials: CredentialsSection::TomlShared {
nats_user: "device\"with\"quotes".to_string(),
nats_pass: "p@ss\\with\\backslash".to_string(),
},
labels: BTreeMap::from([
("group".to_string(), "site\"a".to_string()),
("arch".to_string(), "aarch64".to_string()),
]),
};
let rendered = toml::to_string(&original).expect("serialize");
let parsed: AgentConfig = toml::from_str(&rendered).expect("deserialize");
assert_eq!(parsed.agent.device_id, original.agent.device_id);
assert_eq!(parsed.agent.runtime_enabled, original.agent.runtime_enabled);
assert_eq!(parsed.nats.urls, original.nats.urls);
match (&parsed.credentials, &original.credentials) {
(
CredentialsSection::TomlShared {
nats_user: pu,
nats_pass: pp,
},
CredentialsSection::TomlShared {
nats_user: ou,
nats_pass: op,
},
) => {
assert_eq!(pu, ou);
assert_eq!(pp, op);
}
_ => panic!("expected TomlShared round-trip"),
}
assert_eq!(parsed.labels, original.labels);
}
#[test]
fn empty_labels_omit_section_on_serialize() {
let cfg = AgentConfig {
agent: AgentSection {
device_id: Id::from("vm-device-01"),
runtime_enabled: false,
},
nats: NatsSection {
urls: vec!["nats://nats:4222".to_string()],
},
credentials: CredentialsSection::TomlShared {
nats_user: "u".to_string(),
nats_pass: "p".to_string(),
},
labels: BTreeMap::new(),
};
let rendered = toml::to_string(&cfg).expect("serialize");
assert!(!rendered.contains("[labels]"), "got:\n{rendered}");
}
}

View File

@@ -0,0 +1,186 @@
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
/// Externally-tagged credential definition shared between the fleet
/// agent and the fleet operator. The `type` field selects the variant;
/// each variant's other fields are flatly mixed into the
/// `[credentials]` TOML table for human-friendly editing.
///
/// **Why one struct for both processes**: the agent reads this from
/// `/etc/fleet-agent/config.toml`; the operator reads it from a single
/// env var (`FLEET_OPERATOR_CREDENTIALS_TOML`) whose value is a TOML
/// snippet shaped exactly like the `[credentials]` table. Identical
/// deserialization, identical downstream code path. The only thing
/// that differs is the byte source.
///
/// Adding a new mode is additive — emit `type = "<new>"` from the
/// installer side, decode here, instantiate the matching
/// `CredentialSource`.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(tag = "type", rename_all = "kebab-case")]
pub enum CredentialsSection {
/// Shared username + password baked into the agent config. Only
/// suitable for v0/development scenarios where every device shares
/// a single NATS account user. Not used in production.
TomlShared {
nats_user: String,
nats_pass: String,
},
/// Zitadel machine-user JWT-bearer (RFC 7523) flow. The keyfile
/// (the JSON blob Zitadel emits for `KEY_TYPE_JSON`) is the only
/// durable secret on the process — the access token is short-lived
/// and re-minted before expiry by the auth callback registered on
/// each NATS (re)connect.
///
/// Two ways to point the loader at the keyfile contents — the
/// loader prefers `key_json` when present (operator on
/// OKD-restricted-v2 SCC, no volume mounts allowed), falls back to
/// `key_path` (agent on a VM, file on disk):
///
/// * `key_json` — the JSON keyfile content embedded inline. Lets
/// the operator pod consume the entire credentials block from a
/// single env-var-from-Secret without a Secret volume mount.
/// * `key_path` — filesystem path the loader reads. The agent's
/// `FleetDeviceSetupScore` drops the keyfile here at install
/// time. Default path is the agent convention.
///
/// Setting both is explicitly allowed — the loader picks
/// `key_json` and ignores `key_path`. Setting neither is a runtime
/// error at factory time.
ZitadelJwt {
/// Filesystem path to the keyfile. Falls back to the agent
/// default when omitted (file expected to exist there).
#[serde(default = "default_zitadel_key_path")]
key_path: PathBuf,
/// Inline JSON keyfile content. When `Some`, takes precedence
/// over `key_path`. Used by the operator pod (env-var-from-
/// Secret deployment) where mounting Secret volumes conflicts
/// with OKD's restricted-v2 SCC.
#[serde(default, skip_serializing_if = "Option::is_none")]
key_json: Option<String>,
/// Externally-visible Zitadel issuer URL — must match Zitadel's
/// emitted `iss` claim exactly (including port if non-default).
oidc_issuer_url: String,
/// `aud` value for token-bearer requests. Typically the Zitadel
/// project ID (the auth callout side validates against this).
audience: String,
/// Whether the HTTP client accepts invalid TLS certs. Local-dev
/// escape hatch for self-signed staging Zitadels.
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
danger_accept_invalid_certs: bool,
},
}
fn default_zitadel_key_path() -> PathBuf {
PathBuf::from("/etc/fleet-agent/zitadel-key.json")
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(raw: &str) -> CredentialsSection {
toml::from_str(raw).expect("valid credentials TOML")
}
#[test]
fn parses_toml_shared() {
let cs = parse(
r#"
type = "toml-shared"
nats_user = "u"
nats_pass = "p"
"#,
);
match cs {
CredentialsSection::TomlShared {
nats_user,
nats_pass,
} => {
assert_eq!(nats_user, "u");
assert_eq!(nats_pass, "p");
}
_ => panic!("expected TomlShared"),
}
}
#[test]
fn parses_zitadel_jwt_with_key_path() {
let cs = parse(
r#"
type = "zitadel-jwt"
key_path = "/var/lib/fleet-agent/zitadel-key.json"
oidc_issuer_url = "https://zitadel.staging.example.com"
audience = "366378028009259037"
danger_accept_invalid_certs = false
"#,
);
match cs {
CredentialsSection::ZitadelJwt {
key_path,
key_json,
oidc_issuer_url,
audience,
danger_accept_invalid_certs,
} => {
assert_eq!(
key_path.to_str(),
Some("/var/lib/fleet-agent/zitadel-key.json")
);
assert!(key_json.is_none());
assert_eq!(oidc_issuer_url, "https://zitadel.staging.example.com");
assert_eq!(audience, "366378028009259037");
assert!(!danger_accept_invalid_certs);
}
_ => panic!("expected ZitadelJwt"),
}
}
#[test]
fn parses_zitadel_jwt_with_inline_key_json() {
// Operator-side shape: the entire credentials block plus the
// JSON keyfile content as a TOML triple-quoted string. Used
// by the operator's env-var-from-Secret deployment.
let cs = parse(
r#"
type = "zitadel-jwt"
oidc_issuer_url = "https://sso-staging.cb1.nationtech.io"
audience = "371584906720968725"
key_json = """
{"type":"serviceaccount","keyId":"k1","key":"-----BEGIN RSA PRIVATE KEY-----\nABC\n-----END RSA PRIVATE KEY-----\n","userId":"u1"}
"""
"#,
);
match cs {
CredentialsSection::ZitadelJwt {
key_json, audience, ..
} => {
let inline = key_json.expect("key_json present");
assert!(inline.contains("BEGIN RSA PRIVATE KEY"));
assert!(inline.contains("\"keyId\":\"k1\""));
assert_eq!(audience, "371584906720968725");
}
_ => panic!("expected ZitadelJwt"),
}
}
#[test]
fn zitadel_jwt_key_path_defaults_when_omitted() {
let cs = parse(
r#"
type = "zitadel-jwt"
oidc_issuer_url = "https://zitadel.staging.example.com"
audience = "366378028009259037"
"#,
);
match cs {
CredentialsSection::ZitadelJwt {
key_path, key_json, ..
} => {
assert_eq!(key_path.to_str(), Some("/etc/fleet-agent/zitadel-key.json"));
assert!(key_json.is_none());
}
_ => panic!("expected ZitadelJwt"),
}
}
}

View File

@@ -0,0 +1,553 @@
//! NATS credential sources for fleet processes (agent + operator).
//!
//! `CredentialSource::next_credential()` is invoked from async-nats's
//! `with_auth_callback` on every (re)connect attempt — including the
//! first connect. The callback shape means an expired token is
//! automatically replaced when async-nats reconnects after a transient
//! NATS outage / pod restart / network blip: the caller doesn't need
//! a separate refresh task to "never lose connectivity."
//!
//! Two variants:
//!
//! - [`CredentialSource::TomlShared`] — username + password baked into
//! the config (v0/dev only).
//! - [`CredentialSource::ZitadelJwt`] — Zitadel machine-user JWT-bearer
//! flow (RFC 7523). The keyfile is the only durable secret on the
//! process; the bearer token is short-lived and re-minted
//! transparently when a cached token is within 5 minutes of expiry.
//!
//! Modeled as an enum (rather than a `dyn Trait`) because async-nats's
//! auth-callback bounds (`Future: Send + Sync`) are incompatible with
//! `Pin<Box<dyn Future + Send>>` returned by an object-safe trait. Two
//! variants is a small enough cardinality that enum dispatch is
//! cleaner than a Trait + factory.
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::time::Duration;
use anyhow::{Context, Result};
use jsonwebtoken::{Algorithm, EncodingKey, Header as JwtHeader};
use serde::Deserialize;
use crate::config::CredentialsSection;
/// Material the NATS connector needs to authenticate. Returned per
/// (re)connect attempt — the source decides whether to mint fresh.
#[derive(Debug, Clone)]
pub enum NatsCredential {
UserPass { user: String, pass: String },
BearerToken(String),
}
/// Externally-tagged credential source. Constructed once at startup
/// from the parsed `[credentials]` section; cloned via Arc into the
/// async-nats auth callback.
pub enum CredentialSource {
TomlShared {
user: String,
pass: String,
},
ZitadelJwt {
key: MachineKeyFile,
oidc_issuer_url: String,
audience: String,
http: reqwest::Client,
cache: Mutex<Option<CachedToken>>,
},
}
impl CredentialSource {
/// Return current valid credentials, minting fresh material when any
/// cached value is within its safety window of expiry. Called on
/// every NATS (re)connect.
pub async fn next_credential(&self) -> Result<NatsCredential> {
match self {
Self::TomlShared { user, pass } => Ok(NatsCredential::UserPass {
user: user.clone(),
pass: pass.clone(),
}),
Self::ZitadelJwt { .. } => self.zitadel_next().await,
}
}
async fn zitadel_next(&self) -> Result<NatsCredential> {
// Fast path: lock the cache synchronously, copy out the token if
// it's comfortably valid, drop the lock. Holding a MutexGuard
// across `.await` would make this future !Sync, which
// async-nats's `with_auth_callback` rejects at compile time.
if let Some(token) = self.cached_if_fresh() {
return Ok(NatsCredential::BearerToken(token));
}
// Slow path: mint outside any lock. Two concurrent (re)connect
// attempts could both reach here and both mint; that's a wasted
// HTTP round-trip in a rare race, not a correctness issue —
// the second writer wins and replaces the first's value.
let fresh = self.zitadel_mint().await?;
let token = fresh.access_token.clone();
if let Self::ZitadelJwt {
cache, audience, ..
} = self
&& let Ok(mut guard) = cache.lock()
{
*guard = Some(fresh);
tracing::info!(audience = %audience, "minted fresh Zitadel access token");
}
Ok(NatsCredential::BearerToken(token))
}
fn cached_if_fresh(&self) -> Option<String> {
let Self::ZitadelJwt { cache, .. } = self else {
return None;
};
let now = chrono::Utc::now().timestamp();
let guard = cache.lock().ok()?;
let cached = guard.as_ref()?;
if cached.expires_at_unix - TOKEN_REFRESH_LEEWAY_SECS > now {
Some(cached.access_token.clone())
} else {
None
}
}
async fn zitadel_mint(&self) -> Result<CachedToken> {
let Self::ZitadelJwt {
key,
oidc_issuer_url,
audience,
http,
..
} = self
else {
anyhow::bail!("zitadel_mint called on non-ZitadelJwt variant");
};
let now = chrono::Utc::now().timestamp();
let assertion = build_assertion(key, oidc_issuer_url, now)?;
let scope = build_scope(audience);
let token_url = build_token_url(oidc_issuer_url);
let resp = http
.post(&token_url)
.form(&[
(
"grant_type",
"urn:ietf:params:oauth:grant-type:jwt-bearer".to_string(),
),
("assertion", assertion),
("scope", scope),
])
.send()
.await
.with_context(|| format!("POST {token_url}"))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("Zitadel token endpoint returned {status}: {body}");
}
#[derive(Deserialize)]
struct TokenResponse {
access_token: String,
#[serde(default)]
expires_in: Option<i64>,
}
let tr: TokenResponse = resp.json().await.context("parsing token response")?;
// Zitadel typically returns 12h (43200s); be defensive against
// a missing field by assuming a conservative 1h.
let expires_in = tr.expires_in.unwrap_or(3600);
Ok(CachedToken {
access_token: tr.access_token,
expires_at_unix: now + expires_in,
})
}
}
/// Build the JWT-bearer assertion. Split out from the network path so
/// the claims + header shape can be unit-tested without an HTTP server,
/// and split internally into the (pure) claim/header builders so they
/// can be unit-tested without an RSA private key fixture.
pub(crate) fn build_assertion(
key: &MachineKeyFile,
oidc_issuer_url: &str,
now: i64,
) -> Result<String> {
let claims = build_assertion_claims(key, oidc_issuer_url, now);
let header = build_assertion_header(key);
let assertion = jsonwebtoken::encode(
&header,
&claims,
&EncodingKey::from_rsa_pem(key.key.as_bytes())
.context("parsing RSA private key from machine key file")?,
)
.context("signing JWT assertion")?;
Ok(assertion)
}
/// Pure claim payload for the JWT-bearer assertion. `iss == sub == userId`
/// is a Zitadel requirement; `aud` is Zitadel itself (the token endpoint
/// is reached via `oidc_issuer_url`); `exp - iat` MUST be ≤ 60 s or
/// Zitadel rejects.
pub(crate) fn build_assertion_claims(
key: &MachineKeyFile,
oidc_issuer_url: &str,
now: i64,
) -> serde_json::Value {
serde_json::json!({
"iss": key.user_id,
"sub": key.user_id,
"aud": oidc_issuer_url,
"exp": now + ASSERTION_LIFETIME_SECS,
"iat": now,
})
}
/// JWT header for the assertion. The `kid` tells Zitadel which of the
/// machine user's registered keys to verify the signature against.
pub(crate) fn build_assertion_header(key: &MachineKeyFile) -> JwtHeader {
let mut header = JwtHeader::new(Algorithm::RS256);
header.kid = Some(key.key_id.clone());
header
}
/// Build the OAuth `scope` string for the token-bearer request.
///
/// Three scopes are needed for the access token to be useful here:
///
/// * `openid` — base OIDC requirement.
/// * `urn:zitadel:iam:org:projects:roles` (PLURAL "projects") —
/// tells Zitadel to include the role-claim block in the access
/// token. Without this, the callout sees "no authorized role
/// in token" even when the user has a project role grant.
/// * `urn:zitadel:iam:org:project:id:<aud>:aud` (SINGULAR
/// "project") — adds <aud> to the access token's `aud` claim
/// so the callout's audience validation accepts the project
/// ID we're using as the JWT-bearer audience.
///
/// The plural-vs-singular distinction is a Zitadel convention,
/// not a typo. Both scopes are required.
pub(crate) fn build_scope(audience: &str) -> String {
format!(
"openid \
urn:zitadel:iam:org:projects:roles \
urn:zitadel:iam:org:project:id:{audience}:aud"
)
}
/// Resolve the token endpoint URL, tolerating a trailing slash on
/// `oidc_issuer_url`. Without trimming, a configured issuer of
/// `https://sso.example.com/` produces `…//oauth/v2/token` which 404s.
pub(crate) fn build_token_url(oidc_issuer_url: &str) -> String {
format!("{}/oauth/v2/token", oidc_issuer_url.trim_end_matches('/'))
}
// ---- helper types ----------------------------------------------------------
/// JSON keyfile content as Zitadel emits it for a `KEY_TYPE_JSON`
/// machine key. The `key` is a PEM-encoded RSA private key.
#[derive(Debug, Clone, Deserialize)]
pub struct MachineKeyFile {
#[serde(rename = "type")]
pub _type: String,
#[serde(rename = "keyId")]
pub key_id: String,
pub key: String,
#[serde(rename = "userId")]
pub user_id: String,
}
#[derive(Debug, Clone)]
pub struct CachedToken {
pub(crate) access_token: String,
/// Unix seconds at which the token is no longer trusted by
/// `cached_if_fresh`. Computed from the OAuth response's `expires_in`
/// and the local clock at mint time.
pub(crate) expires_at_unix: i64,
}
/// Refresh tokens this many seconds before their advertised expiry.
/// Five minutes leaves headroom for clock skew, slow networks, and
/// the round-trip cost of re-minting against Zitadel.
pub const TOKEN_REFRESH_LEEWAY_SECS: i64 = 5 * 60;
/// Lifetime of the JWT *assertion* (the client-side bearer JWT we sign
/// to authenticate to Zitadel's token endpoint). Zitadel rejects
/// assertions with `exp - iat > 60s`; one minute is the safe ceiling.
pub const ASSERTION_LIFETIME_SECS: i64 = 60;
// ---- factory ---------------------------------------------------------------
/// Build the appropriate `CredentialSource` from the parsed config.
///
/// For [`CredentialsSection::ZitadelJwt`] this reads the keyfile from
/// disk. Both the agent and the operator mount their key as a file
/// (Secret volume in the operator's Pod, dropped by
/// `FleetDeviceSetupScore` on the agent's VM); the path is just
/// configured differently.
pub fn credential_source_from_config(creds: &CredentialsSection) -> Result<Arc<CredentialSource>> {
match creds {
CredentialsSection::TomlShared {
nats_user,
nats_pass,
} => Ok(Arc::new(CredentialSource::TomlShared {
user: nats_user.clone(),
pass: nats_pass.clone(),
})),
CredentialsSection::ZitadelJwt {
key_path,
key_json,
oidc_issuer_url,
audience,
danger_accept_invalid_certs,
} => {
// `key_json` (inline) wins over `key_path` (file). The
// operator pod uses inline because OKD's restricted-v2
// SCC + env-var-from-Secret deployment shape can't
// reliably mount Secret volumes; the agent uses the file
// path because it lives on a VM and a real file is the
// more natural rotation target.
let key = match key_json.as_deref().map(str::trim) {
Some(json) if !json.is_empty() => parse_machine_key(json)?,
_ => load_machine_key(key_path)?,
};
Ok(Arc::new(CredentialSource::ZitadelJwt {
key,
oidc_issuer_url: oidc_issuer_url.clone(),
audience: audience.clone(),
http: reqwest::Client::builder()
.danger_accept_invalid_certs(*danger_accept_invalid_certs)
.timeout(Duration::from_secs(10))
.build()
.context("building HTTP client for Zitadel token endpoint")?,
cache: Mutex::new(None),
}))
}
}
}
fn load_machine_key(key_path: &Path) -> Result<MachineKeyFile> {
let raw = std::fs::read_to_string(key_path)
.with_context(|| format!("reading machine key file at {}", key_path.display()))?;
parse_machine_key(&raw)
.with_context(|| format!("parsing machine key file at {}", key_path.display()))
}
fn parse_machine_key(raw: &str) -> Result<MachineKeyFile> {
serde_json::from_str(raw).context("parsing inline machine key JSON")
}
#[cfg(test)]
mod tests {
use super::*;
fn fake_key() -> MachineKeyFile {
MachineKeyFile {
_type: "serviceaccount".to_string(),
key_id: "kid-371358469099356247".to_string(),
// Real PEM not required for the pure-builder tests; the
// signing path that needs a parseable key is exercised
// end-to-end in the e2e harness.
key: "PEM-PLACEHOLDER".to_string(),
user_id: "uid-371358469065801815".to_string(),
}
}
fn zjwt_source() -> CredentialSource {
CredentialSource::ZitadelJwt {
key: fake_key(),
oidc_issuer_url: "http://sso.fleet.local:8080".to_string(),
audience: "366378028009259037".to_string(),
http: reqwest::Client::new(),
cache: Mutex::new(None),
}
}
// ---- next_credential / cache state -------------------------------------
#[tokio::test]
async fn toml_shared_returns_userpass_each_call() {
let s = CredentialSource::TomlShared {
user: "u".to_string(),
pass: "p".to_string(),
};
let c = s.next_credential().await.unwrap();
match c {
NatsCredential::UserPass { user, pass } => {
assert_eq!(user, "u");
assert_eq!(pass, "p");
}
other => panic!("expected UserPass, got {other:?}"),
}
}
#[test]
fn cached_token_within_leeway_is_treated_as_expired() {
// Sanity-check the comparison so refactors don't accidentally
// invert the leeway window.
let now = chrono::Utc::now().timestamp();
let about_to_expire = CachedToken {
access_token: "x".to_string(),
expires_at_unix: now + TOKEN_REFRESH_LEEWAY_SECS - 1,
};
assert!(
about_to_expire.expires_at_unix - TOKEN_REFRESH_LEEWAY_SECS <= now,
"tokens within the leeway window must be considered expired"
);
let comfortable = CachedToken {
access_token: "x".to_string(),
expires_at_unix: now + TOKEN_REFRESH_LEEWAY_SECS + 60,
};
assert!(
comfortable.expires_at_unix - TOKEN_REFRESH_LEEWAY_SECS > now,
"tokens with comfortable headroom must be cache-hits"
);
}
#[test]
fn cached_if_fresh_returns_some_when_outside_leeway() {
let src = zjwt_source();
let now = chrono::Utc::now().timestamp();
if let CredentialSource::ZitadelJwt { cache, .. } = &src {
*cache.lock().unwrap() = Some(CachedToken {
access_token: "fresh".to_string(),
expires_at_unix: now + TOKEN_REFRESH_LEEWAY_SECS + 60,
});
}
assert_eq!(src.cached_if_fresh(), Some("fresh".to_string()));
}
#[test]
fn cached_if_fresh_returns_none_when_no_cache() {
// Brand-new ZitadelJwt source — no token has been minted yet.
// Forces the slow path on first connect.
let src = zjwt_source();
assert_eq!(src.cached_if_fresh(), None);
}
#[test]
fn cached_if_fresh_returns_none_for_toml_shared() {
// Defensive: cache_if_fresh is only meaningful for ZitadelJwt;
// TomlShared has no cache. A nonsensical call must return None,
// not panic, so the cold-path can degrade gracefully.
let src = CredentialSource::TomlShared {
user: "u".into(),
pass: "p".into(),
};
assert_eq!(src.cached_if_fresh(), None);
}
// ---- assertion claims / header (pure builders) ------------------------
#[test]
fn assertion_claims_carry_iss_sub_aud_exp_iat() {
let now = 1_700_000_000;
let claims = build_assertion_claims(&fake_key(), "http://sso.fleet.local:8080", now);
assert_eq!(claims["iss"], "uid-371358469065801815");
assert_eq!(claims["sub"], "uid-371358469065801815");
assert_eq!(claims["aud"], "http://sso.fleet.local:8080");
assert_eq!(claims["iat"].as_i64(), Some(now));
assert_eq!(claims["exp"].as_i64(), Some(now + ASSERTION_LIFETIME_SECS));
}
#[test]
fn assertion_lifetime_locked_at_60_seconds() {
// Zitadel rejects assertions where exp - iat > 60s. If anyone
// bumps ASSERTION_LIFETIME_SECS thinking "more is safer", the
// mints will silently start failing in prod with no helpful
// error. Lock the constant.
assert_eq!(ASSERTION_LIFETIME_SECS, 60);
}
#[test]
fn assertion_header_carries_kid_and_rs256() {
let header = build_assertion_header(&fake_key());
assert_eq!(header.alg, jsonwebtoken::Algorithm::RS256);
assert_eq!(header.kid.as_deref(), Some("kid-371358469099356247"));
}
// ---- scope string ------------------------------------------------------
#[test]
fn scope_includes_plural_projects_roles() {
// The plural-projects URN is what tells Zitadel to emit the
// role claim. Day-one bug; lock it.
let s = build_scope("366378028009259037");
assert!(
s.contains("urn:zitadel:iam:org:projects:roles"),
"scope must include the PLURAL projects-roles URN; got {s:?}"
);
}
#[test]
fn scope_audience_uses_singular_project_id_urn() {
// The singular-project URN tells Zitadel to put <id> into the
// access token's aud claim. Different URN entirely from the
// plural one above; both required.
let s = build_scope("366378028009259037");
assert!(
s.contains("urn:zitadel:iam:org:project:id:366378028009259037:aud"),
"scope must include the SINGULAR project:id:<aud>:aud URN; got {s:?}"
);
}
#[test]
fn scope_includes_openid_base() {
let s = build_scope("any");
assert!(
s.split_whitespace().any(|tok| tok == "openid"),
"scope must include `openid` as a standalone token; got {s:?}"
);
}
// ---- token URL ---------------------------------------------------------
#[test]
fn token_url_appends_oauth_endpoint() {
assert_eq!(
build_token_url("http://sso.fleet.local:8080"),
"http://sso.fleet.local:8080/oauth/v2/token"
);
}
#[test]
fn token_url_strips_single_trailing_slash() {
// A trailing slash would yield `…//oauth/v2/token`, which 404s.
// Common configuration drift; the trim guards against it.
assert_eq!(
build_token_url("http://sso.fleet.local:8080/"),
"http://sso.fleet.local:8080/oauth/v2/token"
);
}
#[test]
fn token_url_strips_multiple_trailing_slashes() {
// Defensive — `trim_end_matches('/')` peels all of them, not
// just the first. Locks that semantics.
assert_eq!(
build_token_url("http://sso.fleet.local:8080///"),
"http://sso.fleet.local:8080/oauth/v2/token"
);
}
// ---- MachineKeyFile JSON parsing --------------------------------------
#[test]
fn machine_key_file_parses_zitadel_json_shape() {
// The serde renames (`type`, `keyId`, `userId`) are easy to
// break. This is the literal JSON shape Zitadel's
// /management/v1/users/.../keys endpoint emits.
let raw = r#"{
"type": "serviceaccount",
"keyId": "371358469099356247",
"key": "-----BEGIN RSA PRIVATE KEY-----\nABC\n-----END RSA PRIVATE KEY-----\n",
"userId": "371358469065801815"
}"#;
let parsed: MachineKeyFile = serde_json::from_str(raw).expect("valid keyfile");
assert_eq!(parsed._type, "serviceaccount");
assert_eq!(parsed.key_id, "371358469099356247");
assert_eq!(parsed.user_id, "371358469065801815");
assert!(parsed.key.contains("BEGIN RSA PRIVATE KEY"));
}
}

View File

@@ -0,0 +1,65 @@
//! Shared NATS auth plumbing for fleet processes.
//!
//! Two consumers today:
//!
//! - **`harmony-fleet-agent`** — reads `[credentials]` from
//! `/etc/fleet-agent/config.toml`. Per-device Zitadel machine user
//! with the `device` role.
//! - **`harmony-fleet-operator`** — reads the same TOML shape from a
//! single env var (the env var's value is the TOML snippet for the
//! `[credentials]` table). Singleton machine user with the
//! `fleet-admin` role.
//!
//! Both deserialize into the **same** [`CredentialsSection`], factory
//! into the **same** [`CredentialSource`], and use the **same**
//! [`connect_options_with_credentials`] helper to build a NATS client.
//! The only thing that differs between processes is where the bytes of
//! the TOML config come from and which Zitadel user signs the
//! JWT-bearer assertion.
//!
//! Adding a new mode (e.g. user JWT from a CLI session) is one new
//! variant on `CredentialsSection` + `CredentialSource`; everything
//! else flows through unchanged.
mod agent_config;
mod config;
mod credentials;
pub use agent_config::{AgentConfig, AgentSection, NatsSection, load_config};
pub use config::CredentialsSection;
pub use credentials::{
ASSERTION_LIFETIME_SECS, CachedToken, CredentialSource, MachineKeyFile, NatsCredential,
TOKEN_REFRESH_LEEWAY_SECS, credential_source_from_config,
};
use std::sync::Arc;
/// Build `async_nats::ConnectOptions` wired with the auth callback
/// that pulls fresh credentials from `creds` on every (re)connect.
///
/// Caller chains additional options (`ping_interval`, `event_callback`,
/// …) before invoking `.connect(urls)`.
pub fn connect_options_with_credentials(
creds: Arc<CredentialSource>,
) -> async_nats::ConnectOptions {
async_nats::ConnectOptions::with_auth_callback(move |_nonce| {
let cs = creds.clone();
async move {
let cred = cs
.next_credential()
.await
.map_err(|e| async_nats::AuthError::new(format!("credential source: {e}")))?;
let mut auth = async_nats::Auth::new();
match cred {
NatsCredential::UserPass { user, pass } => {
auth.username = Some(user);
auth.password = Some(pass);
}
NatsCredential::BearerToken(token) => {
auth.token = Some(token);
}
}
Ok(auth)
}
})
}

Some files were not shown because too many files have changed in this diff Show More