feat/iot-helm #275

Merged
johnride merged 52 commits from feat/iot-helm into feat/iot-walking-skeleton 2026-04-25 13:52:24 +00:00
70 changed files with 6698 additions and 956 deletions

131
Cargo.lock generated
View File

@@ -3166,7 +3166,36 @@ dependencies = [
]
[[package]]
name = "example_iot_vm_setup"
name = "example_fleet_load_test"
version = "0.1.0"
dependencies = [
"anyhow",
"async-nats",
"chrono",
"clap",
"harmony-fleet-operator",
"harmony-reconciler-contracts",
"k8s-openapi",
"kube",
"rand 0.9.2",
"serde_json",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "example_fleet_nats_install"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"harmony",
"tokio",
]
[[package]]
name = "example_fleet_vm_setup"
version = "0.1.0"
dependencies = [
"anyhow",
@@ -3178,6 +3207,20 @@ dependencies = [
"tokio",
]
[[package]]
name = "example_harmony_apply_deployment"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"harmony",
"harmony-fleet-operator",
"k8s-openapi",
"kube",
"serde_json",
"tokio",
]
[[package]]
name = "example_linux_vm"
version = "0.1.0"
@@ -3690,6 +3733,47 @@ dependencies = [
"walkdir",
]
[[package]]
name = "harmony-fleet-agent"
version = "0.1.0"
dependencies = [
"anyhow",
"async-nats",
"chrono",
"clap",
"futures-util",
"harmony",
"harmony-reconciler-contracts",
"serde",
"serde_json",
"tokio",
"toml",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "harmony-fleet-operator"
version = "0.1.0"
dependencies = [
"anyhow",
"async-nats",
"chrono",
"clap",
"futures-util",
"harmony",
"harmony-reconciler-contracts",
"k8s-openapi",
"kube",
"schemars 0.8.22",
"serde",
"serde_json",
"thiserror 2.0.18",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "harmony-k8s"
version = "0.1.0"
@@ -3732,8 +3816,10 @@ version = "0.1.0"
dependencies = [
"chrono",
"harmony_types",
"schemars 0.8.22",
"serde",
"serde_json",
"thiserror 2.0.18",
]
[[package]]
@@ -4710,48 +4796,6 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "iot-agent-v0"
version = "0.1.0"
dependencies = [
"anyhow",
"async-nats",
"chrono",
"clap",
"futures-util",
"harmony",
"harmony-reconciler-contracts",
"serde",
"serde_json",
"tokio",
"toml",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "iot-operator-v0"
version = "0.1.0"
dependencies = [
"anyhow",
"async-nats",
"async-trait",
"clap",
"futures-util",
"harmony",
"harmony-k8s",
"harmony-reconciler-contracts",
"k8s-openapi",
"kube",
"schemars 0.8.22",
"serde",
"serde_json",
"thiserror 2.0.18",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "ipnet"
version = "2.12.0"
@@ -4910,6 +4954,7 @@ checksum = "aa60a41b57ae1a0a071af77dbcf89fc9819cfe66edaf2beeb204c34459dcf0b2"
dependencies = [
"base64 0.22.1",
"chrono",
"schemars 0.8.22",
"serde",
"serde_json",
]

View File

@@ -28,8 +28,8 @@ members = [
"harmony_node_readiness",
"harmony-k8s",
"harmony_assets", "opnsense-codegen", "opnsense-api",
"iot/iot-operator-v0",
"iot/iot-agent-v0",
"fleet/harmony-fleet-operator",
"fleet/harmony-fleet-agent",
"harmony-reconciler-contracts",
]
@@ -66,7 +66,7 @@ kube = { version = "1.1.0", features = [
"ws",
"jsonpatch",
] }
k8s-openapi = { version = "0.25", features = ["v1_30"] }
k8s-openapi = { version = "0.25", features = ["v1_30", "schemars"] }
# TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml
serde_yaml = "0.9"
serde-value = "0.7"

View File

@@ -99,7 +99,7 @@ Replace `kubectl exec bao ...` shell commands in `openbao/setup.rs` with typed `
`K8sAnywhereTopology` and `HAClusterTopology` have accumulated opinions — cert-manager install, tenant manager setup, helm probes, TLS passthrough, SSO wiring — that make them unfit for narrow, ad-hoc Score execution. Calling `ensure_ready()` on `K8sAnywhereTopology` to apply a single CRD installs a full product stack as a side effect; that's the opposite of what "make me ready" should mean.
Concrete example: `iot/iot-operator-v0/src/install.rs` needed a topology that satisfies `K8sclient` for a single `K8sResourceScore::<CustomResourceDefinition>` apply. `K8sAnywhereTopology` was wrong (too heavy); `HAClusterTopology` was wrong (bare-metal). Work-around: a 30-line inline `InstallTopology` that wraps a pre-built `K8sClient` and has a noop `ensure_ready`. That file flags the architectural smell in its doc comment and points back to this entry.
Concrete example: `fleet/harmony-fleet-operator/src/install.rs` needed a topology that satisfies `K8sclient` for a single `K8sResourceScore::<CustomResourceDefinition>` apply. `K8sAnywhereTopology` was wrong (too heavy); `HAClusterTopology` was wrong (bare-metal). Work-around: a 30-line inline `InstallTopology` that wraps a pre-built `K8sClient` and has a noop `ensure_ready`. That file flags the architectural smell in its doc comment and points back to this entry.
If every narrow Score ends up vendoring its own ad-hoc topology, we get exactly the proliferation this entry is meant to prevent.
@@ -113,4 +113,4 @@ If every narrow Score ends up vendoring its own ad-hoc topology, we get exactly
- Adding a new ad-hoc Score against k8s doesn't require inventing a new topology.
- `K8sAnywhereTopology` stops being the default reach and starts being a deliberate product choice.
- Test: can we delete the inline `InstallTopology` in `iot/iot-operator-v0/src/install.rs` by replacing it with a one-liner `K8sBareTopology::from_env()`? That's the smoke test for "we fixed the proliferation."
- Test: can we delete the inline `InstallTopology` in `fleet/harmony-fleet-operator/src/install.rs` by replacing it with a one-liner `K8sBareTopology::from_env()`? That's the smoke test for "we fixed the proliferation."

View File

@@ -15,7 +15,7 @@ for CI) so:
- the VM runs the same Ubuntu 24.04 arm64 cloud image customers will
eventually flash onto a Pi;
- the iot-agent shipped to it is a real aarch64 binary produced by
- the fleet-agent shipped to it is a real aarch64 binary produced by
our existing cross-compile toolchain;
- apt/systemd/podman on the VM are the actual arm64 packages; and
- smoke-a3 exercises all of it end-to-end.
@@ -126,11 +126,11 @@ In `modules/iot/preflight.rs`, when the caller asks for arm64 VMs
### 6. Cross-compiled agent
smoke-a3.sh phase 2 currently does native `cargo build --release
-p iot-agent-v0`. When arch=aarch64:
-p fleet-agent-v0`. When arch=aarch64:
- `cargo build --release --target aarch64-unknown-linux-gnu
-p iot-agent-v0`
-p fleet-agent-v0`
- AGENT_BINARY points at `target/aarch64-unknown-linux-gnu/release/
iot-agent-v0`
fleet-agent-v0`
Opt-in via `--arch aarch64` CLI flag on both
`example_iot_vm_setup` and `smoke-a3.sh`. Default stays x86_64.
@@ -152,9 +152,9 @@ arch=aarch64. Smoke-a3's phase 5 reboot gate also lengthens.
| `harmony/src/modules/kvm/topology.rs` | Copy per-VM NVRAM template on ensure_vm; thread arch through to XML. |
| `harmony/src/modules/iot/assets.rs` | `ensure_ubuntu_2404_cloud_image_for_arch(arch)`; pin arm64 URL+sha256. |
| `harmony/src/modules/iot/preflight.rs` | Arch-aware preflight; qemu-system-aarch64 + firmware + qemu-version. |
| `examples/iot_vm_setup/src/main.rs` | `--arch x86_64|aarch64` CLI flag; resolve matching cloud image. |
| `iot/scripts/smoke-a3.sh` | Arch flag plumbing; cross-compile; extended timeouts; preflight. |
| `iot/scripts/smoke-a3-arm.sh` (new) | Dedicated arm smoke as the CI hook — `ARCH=aarch64 ./smoke-a3.sh`. |
| `examples/fleet_vm_setup/src/main.rs` | `--arch x86_64|aarch64` CLI flag; resolve matching cloud image. |
| `fleet/scripts/smoke-a3.sh` | Arch flag plumbing; cross-compile; extended timeouts; preflight. |
| `fleet/scripts/smoke-a3-arm.sh` (new) | Dedicated arm smoke as the CI hook — `ARCH=aarch64 ./smoke-a3.sh`. |
## Out of scope

View File

@@ -0,0 +1,453 @@
# Chapter 4 — Aggregation architecture at IoT scale
> **Status: SUPERSEDED (2026-04-23) — historical archaeology only.**
>
> This document proposed an event-stream CQRS architecture
> (`StateChangeEvent` on a JetStream stream, per-key `Revision`
> tracking, `LifecycleTransition::{Applied, Removed}` diff events,
> cold-start re-walk, durable consumer folding events into counters).
> The design was implemented, then entirely removed in favor of a
> simpler shape: the operator watches `device-state` KV directly
> via `bucket.watch_with_history(">")`, selector evaluation runs
> against a cluster-scoped `Device` CRD cache, and `desired-state`
> entries are diffed from the selector → matched-devices set on
> watch events. No event stream, no revisions, no transition
> enum.
>
> **What's still accurate in this doc:**
>
> - The per-concern KV split (`device-info`, `device-state`,
> `device-heartbeat`) and their cadences.
> - The operator's responsibilities: counter aggregation, dirty-set
> debouncing, 1 Hz CR patch cadence.
> - The scale target (10 000 devices × 1 000 deployments at
> 10 000 state writes/s — load-tested and green).
> - The `.status.aggregate` fields (succeeded / failed / pending /
> lastError, plus the new `matchedDeviceCount`).
>
> **What's no longer true:**
>
> - No `events.state.>` JetStream stream, no durable event consumer.
> - No per-key `Revision(agent_epoch, sequence)` — KV ordering is
> sufficient.
> - No `LifecycleTransition` diff enum on the wire — phase
> transitions are derived from cached vs. current state inside
> the operator.
> - No `events.log.>` stream, no `logs.<device>.query` request-
> reply protocol. Logs are deferred until a real consumer lands.
> - No cold-start event re-walk — KV watch with history replays
> current state, which covers restart-correctness for the
> device-state cache.
>
> **Where to look now:**
>
> - Shipped design: `v0_1_plan.md` Chapter 2 (marked SHIPPED 2026-04-23).
> - Source of truth: `fleet/harmony-fleet-operator/src/fleet_aggregator.rs`,
> `fleet/harmony-fleet-operator/src/device_reconciler.rs`,
> `harmony-reconciler-contracts/src/{fleet,kv,status}.rs`.
>
> Everything below is preserved verbatim as the decision trail of a
> path not taken. Useful as context for why the current design is
> shaped the way it is; not a spec for future work.
>
> ---
>
> (Original design draft begins here.)
## 1. Why now
We have no real deployment in the field yet. That's a liability when
shipping (no user, no revenue) but a gift when designing: we can move
the data model before customers depend on it. After a partner fleet
lands, changing the aggregation substrate is a multi-quarter
migration. Doing it now is days of work.
Chapter 2's aggregator was the right "make it work" design for a
walking-skeleton proof. It's the wrong "make it scale" design for a
partner deployment of even a few hundred devices, let alone the
fleet sizes the product thesis targets. This chapter replaces it.
## 2. What's wrong today
**Per-tick cost, current design.** Every 5 seconds, for each
Deployment CR, resolve the selector against the full device snapshot
and fold into an aggregate:
```
O(deployments × devices) per tick
+ 1 kube patch per CR per tick
```
At 10k deployments × 1M devices, that's 10^10 selector evaluations
and 10k apiserver patches every 5 s. Nothing resembles viable there.
**What else goes wrong at scale.**
- The operator holds the full fleet snapshot in memory. 1M `AgentStatus`
payloads × a few kB each = GB of heap, dominated by `recent_events`
rings.
- Agent heartbeats publish the whole `AgentStatus` every 30 s — a lot
of bytes on the wire whose only incremental content is usually a
timestamp update.
- `agent-status` is a KV bucket. KV is designed for "latest value per
key," not "stream of state changes." We've been using it for both
roles and paying the worst of each.
- Logs are nowhere yet (good — this is the moment to put them in the
right place before we're committed).
## 3. Design overview
Shift to a **CQRS-style architecture** where devices write their
authoritative state, and the operator maintains incrementally-updated
aggregates driven by state-change events.
```
device (N× agents) operator
────────────────── ────────
current state keys ───reads─▶ on cold-start:
(authoritative) walk keys → rebuild counters
then: stream consumer
state-change events ═ JS stream═▶ ± counters per event
(delta stream) ± update reverse index
on tick (1 Hz):
device_info keys ───reads─▶ patch .status for dirty deployments
(labels, inventory)
logs ───at-least-once NATS subj────▶ not stored centrally
(streamed on query)
```
Three substrates, each chosen for its fit:
- **JetStream KV, per-device keys** — device-authoritative state.
Cheap to read when needed, never scanned globally at scale.
- **JetStream stream, per-device events** — ordered delta feed.
Operator consumers replay on restart, consume incrementally during
steady state.
- **Plain NATS subjects, logs** — at-least-once pub/sub, device-side
buffering (~10k lines), streamed on query.
## 4. Data model
### 4.1 NATS KV buckets
**`device-info`** — static-ish facts per device, infrequent updates.
| Key | Value | Written by | Read by |
|-----|-------|------------|---------|
| `info.<device_id>` | `DeviceInfo` (labels, inventory, agent_version) | agent on startup + label change | operator (selector resolution, inventory display) |
**`device-state`** — current phase per deployment per device.
Authoritative source of truth for "what's running where."
| Key | Value | Written by | Read by |
|-----|-------|------------|---------|
| `state.<device_id>.<deployment_name>` | `DeploymentState` (phase, last_event_at, last_error) | agent on reconcile transition | operator on cold-start only |
One key per (device, deployment) pair. Natural TTL via JetStream KV
per-key history — lets us cap the keyspace.
**`device-heartbeat`** — liveness only. Tiny payload, frequent
updates.
| Key | Value | Written by | Read by |
|-----|-------|------------|---------|
| `heartbeat.<device_id>` | `{ timestamp }` (32 bytes) | agent every 30s | operator (stale detection) |
Separate from `device-state` so routine heartbeats don't churn the
state keys or emit spurious state-change events.
### 4.2 NATS JetStream stream
**`device-events`** — ordered delta feed for operator aggregation.
- Subject: `events.state.<device_id>.<deployment_name>`
- Payload: `StateChangeEvent { from: Phase, to: Phase, at, last_error }`
- Retention: time-based (e.g. 24h) — consumers that fall further
behind than retention rebuild from `device-state` KV on recovery.
- Agents emit one event per phase transition, **not** per heartbeat.
Separate stream for **event log** (user-facing reconcile log events):
- Subject: `events.log.<device_id>`
- Payload: `LogEvent { at, severity, message, deployment? }`
- Retention: time-based (1h, enough for "show me what happened the
last few minutes" queries; the device's in-memory ring holds the
rest).
### 4.3 Log transport (NOT JetStream)
- Subject: `logs.<device_id>` — plain pub/sub, at-least-once
- Not persisted by NATS
- Device buffers last ~10k lines in a ring buffer
- Query protocol: request-reply on `logs.<device_id>.query`
- Device responds with buffer contents, then streams live tail
until the query closes
This is a dedicated transport because structured logs at fleet scale
(1M devices × 1k lines/h = 1B messages/h) would crush JetStream's
per-subject storage without adding operator-visible value. Operators
only look at logs on-demand, per-device; device-side buffering
matches the access pattern.
### 4.4 CRD fields
Minimal change from Chapter 2:
- `.status.aggregate.succeeded | failed | pending` — now sourced
from counters, not per-tick fold.
- `.status.aggregate.last_error` — updated on `to: Failed` events.
- `.status.aggregate.last_heartbeat_at` — from the per-deployment
latest event.
- `.status.aggregate.recent_events` — bounded per-deployment ring,
updated on event arrival.
- **Drop** `.status.aggregate.unreported` (no meaningful definition
under selector-based targeting — already removed in the pre-chapter
cleanup).
- **Add** `.status.aggregate.stale: u32` — count of devices matching
the selector whose last heartbeat is older than a threshold
(default 5 min). This is the replacement for "unreported" that
makes sense at scale. Computed on tick from the operator's
reverse-indexed view, not per-device query.
### 4.5 Operator in-memory state
- **Counters** — `HashMap<DeploymentKey, PhaseCounters>`, one entry
per CR, updated atomically on event arrival.
- **Reverse index** — `HashMap<DeviceId, HashSet<DeploymentKey>>`,
updated when a device's labels change or when a CR's selector
changes. Lets a state-change event find affected deployments in
O(deployments-matching-this-device) rather than O(all-deployments).
- **Last-error rollup** — per deployment, the most recent error
keyed by timestamp.
- **Recent-events ring** — per deployment, bounded by N (e.g. 10).
- **Dirty set** — deployments whose aggregate has changed since last
patch. Tick reads + clears this set; only dirty deployments get
patched.
Operator heap is bounded by fleet + deployment count, not their
product.
## 5. Counter invariants (the contract)
Correctness rests on two rules:
### 5.1 Device publishes exactly one transition per reconcile outcome
Every reconcile results in a state. If the state differs from the
last published state for `(device, deployment)`, the agent:
1. Writes the new state to `state.<device>.<deployment>` KV (CAS
against expected-revision for multi-writer safety — only one
agent process per device, so contention is theoretical).
2. Publishes a `StateChangeEvent` to
`events.state.<device>.<deployment>`.
These two writes must be atomic from the agent's perspective — if
(1) succeeds and (2) fails (or vice versa), the agent retries until
both reach NATS. Worst case: a duplicate event on the stream;
counter handles duplicates via `from → to` structure (see 5.2).
### 5.2 Counters are driven by transitions, not snapshots
Each event carries `from: Phase, to: Phase`. Counter update is a
single atomic action:
```rust
counters[(deployment, from)] -= 1;
counters[(deployment, to)] += 1;
```
Duplicates (same `from → to` replayed) are a no-op if `from` ==
current phase for that (device, deployment) — the operator
cross-checks the device's current state in the reverse index before
applying. A duplicate past event is detected and ignored; a duplicate
current event is idempotent anyway (counters converge).
### 5.3 The bootstrap transition
A device's first-ever event for a deployment has `from: None` (or a
sentinel `Unassigned` variant): counter update is just `to`
increment.
### 5.4 Device leaves fleet
When a device's heartbeat goes stale past threshold + grace, OR
when its labels no longer match the deployment's selector:
- Counters are decremented for every deployment the device was
previously contributing to (via the reverse index).
- The device's state keys aren't touched — they're the authoritative
record; a device re-joining resumes from them.
### 5.5 CR created / selector changed
The reverse index + counters are rebuilt for the affected CR by
walking `device-info` + `device-state` once (O(devices + states)
local NATS KV reads). Cheap for a single CR; happens at CR-apply
time, not on every tick.
## 6. Cold-start protocol
On operator process start:
1. **Load CRs** — list `Deployment` CRs via kube API. Build the
reverse index skeleton (deployment → selector).
2. **Load device labels** — iterate `device-info` KV keys once.
Resolve each device against every CR's selector, populate the
reverse index device-side entries. O(devices × CRs), one-time,
in-memory. For 1M devices × 10k CRs this is 10^10 op but purely
local lookups (BTreeMap matches on label maps); back-of-envelope
has it at a few seconds to a minute on a modern CPU.
3. **Rebuild counters** — iterate `device-state` KV keys once.
For each `state.<device>.<deployment>`, look up the matching
deployments from the reverse index and increment counters.
4. **Attach stream consumer** — durable consumer on
`events.state.>`, starting from the newest sequence at cold-start
moment. The KV walk was the "past"; the stream is the "future."
5. **Begin tick loop** — patch dirty CRs on a 1 Hz schedule.
Cold-start time dominated by step 2, not step 3. An ArgoCD-style
"pause all reconciles during leader election / startup" envelope
keeps the CR patches from competing with the cold-start scans.
**What if the operator falls behind the stream's retention window?**
Reset to step 3 (re-walk `device-state`). The KV is authoritative;
the stream is an accelerator.
## 7. CR status patch cadence
- Counter updates happen in memory, instantly.
- The **dirty set** captures which deployments' aggregates changed
since the last patch.
- A 1 Hz ticker reads + clears the dirty set, patches those CRs.
- Individual CR patches are debounced to at most once per second
— avoids hammering the apiserver when a deployment is mid-rollout
and devices are transitioning in a burst.
Steady-state operator → apiserver traffic is proportional to the
rate of *interesting* changes, not to fleet size.
## 8. Failure modes
| Scenario | Detection | Recovery |
|---|---|---|
| Operator crash | k8s restarts the pod | Cold-start protocol §6 |
| Stream consumer falls behind retention | Stream API returns out-of-range | Re-run §6 step 3 (re-walk KV) |
| Agent publishes event but KV write fails | Agent-side local retry; event is replayed | Counter is idempotent per §5.2 |
| Agent writes KV but event publish fails | Agent-side local retry | Operator never sees the transition until retry succeeds; stale threshold catches the device if agent is permanently broken |
| Device's label change lost | Heartbeat carries current labels; stale entry aged out | Periodic sync (e.g. 1/h) re-scans `device-info` to catch drift |
| Duplicate event (retry) | `from == current` in reverse index | No-op (§5.2) |
| Out-of-order event (retry ordering) | Sequence number on event | Consumer tracks per-(device, deployment) last-applied sequence; old events ignored |
## 9. Scale back-of-envelope
**Target:** 1M devices, 10k deployments, p50 reconcile rate 1 event
per device per hour.
- **Event volume.** 1M × (1/3600s) = 278 events/s.
- **Operator event-processing cost.** Each event touches a bounded
number of in-memory counters (via reverse index). At 278 eps, this
is ~1 µs-equivalent of CPU, ~0 network (JetStream local to operator).
- **Operator → apiserver patches.** Deployments change at a rate
far below event rate; debounced dirty-set drains limit patches to
a few per second even during bursty rollouts.
- **Operator memory.** Reverse index entries (device_id + set of
deployment keys) ≈ 200 bytes × 1M = 200 MB. Counters ≈ 10k × few
fields = negligible. Last-error + recent-events rings ≈ 10k × 10
entries × 512 bytes = 50 MB. Total ~250 MB — fine.
- **Cold-start time.** 1M KV reads × amortized 0.1 ms (JetStream KV
is fast for key iteration) = 100 s. Acceptable for a
several-minute-once-per-release recovery window. If it becomes a
problem, chunk the walk and resume-from-checkpoint.
- **Stale device sweep.** On each tick, O(dirty set × reverse index
lookups). Stale detection itself is O(devices-whose-heartbeat-is-old);
a second, slower ticker (e.g. 30 s) scans the heartbeat KV for
entries older than threshold and emits synthetic "device went
stale" events that drive the same counter-decrement path.
## 10. Schema migration
`Deployment` CRD is still `v1alpha1`, not deployed anywhere, so no
migration machinery is needed for the CRD itself — we just change
the aggregate subtree definition.
`harmony-reconciler-contracts::AgentStatus` is deprecated by this
chapter. Replaced by narrower wire types:
- `DeviceInfo` — what `info.<device_id>` stores
- `DeploymentState` — what `state.<device>.<dep>` stores
- `HeartbeatPayload` — what `heartbeat.<device_id>` stores
- `StateChangeEvent` — what events stream emits
- `LogEvent` — what event-log stream emits
The old `AgentStatus` type goes away when the old aggregator
goes away. Clean break, same CRD version.
## 11. Implementation milestones
Landing order, each a reviewable increment:
1. **M1: new contracts crate shapes**`DeviceInfo`,
`DeploymentState`, `HeartbeatPayload`, `StateChangeEvent`,
`LogEvent`. Round-trip serde tests. No runtime code changes yet.
2. **M2: agent-side rewrite** — agent writes the new KV shapes +
publishes state-change events + heartbeats. Old `AgentStatus`
publish path stays in parallel for the smoke to keep passing.
3. **M3: operator-side cold-start protocol** — new operator task
that walks the new KV buckets and builds in-memory counters.
Runs alongside the old aggregator; logs counter parity checks
against the legacy aggregator's output so we can verify
correctness before switching over.
4. **M4: operator-side event consumer** — attach the durable stream
consumer, drive counters incrementally. Parity checks still on.
5. **M5: flip CR patch source** — the new counter-backed aggregator
patches `.status.aggregate`, the legacy one goes read-only, then
deleted in the next commit.
6. **M6: logs subject + query protocol** — device-side ring buffer,
query API, a first CLI surface (`natiq logs device=X` or
equivalent) that drives it.
7. **M7: synthetic-scale test harness** — spin up 1k (then 10k) mock
agents in-process, drive a realistic event load through the
operator, measure + publish numbers.
8. **M8: delete legacy `AgentStatus`**`harmony-reconciler-contracts`
cleanup, smoke-a4 updates.
M1-M5 can land on one branch; M6 is adjacent work; M7-M8 close out.
## 12. Open questions
- **Multi-operator HA.** The design assumes one operator at a time.
Adding HA means either (a) one active + one standby operator with
NATS-based leader election, or (b) shared counter state in KV
instead of in-memory. (a) is simpler; (b) scales better.
Defer until a specific availability target demands it.
- **Counter-KV snapshots.** Should we periodically snapshot the
in-memory counter state to a `counters` KV bucket so cold-start
can resume from a recent snapshot + a short stream tail, instead
of always re-walking `device-state`? Probably yes once cold-start
time becomes an operational concern, but not in the initial cut.
- **Stream retention tuning.** 24h for `events.state.>` is a guess.
Real number depends on observed operator downtime p99. Initial
setting, tune from operational data.
- **Compaction policy for `device-state` KV.** JetStream KV
per-key history can grow unbounded if phases churn. Set
`max_history_per_key = 1` (keep only latest value) unless there's
a reason to keep transition history (there isn't — that's what
the events stream is for).
- **Agent crash before publishing state-change event.** Transition
is durably captured in the agent's local podman state; on agent
restart the reconcile loop re-observes the phase and either
re-publishes (if it differs from `state.<device>.<dep>`) or stays
silent. Correctness preserved at the cost of event-stream ordering
ambiguity during the crash window — acceptable.
## 13. What this chapter deliberately does *not* change
- CRD `.spec.target_selector` semantics — stays exactly as shipped.
- Operator's kube-rs controller loop for CR reconcile — stays as is.
- Helm chart structure (Chapter 3) — orthogonal.
- Authentication (Chapter Auth) — orthogonal. When that chapter
lands, every subject + KV bucket above will be re-scoped under
device-specific NATS credentials; the topology above doesn't need
to change for that to slot in.

View File

@@ -183,7 +183,7 @@ Drawing these out as they're load-bearing for judgment calls:
8. **The partner relationship is strategic.** Tuesday demo conversation is half the Tuesday deliverable. Framing the v0.1/v0.2/v0.3 roadmap to them matters as much as the running code.
9. **End-customer debuggability is a UX constraint.** Mechanical/electrical/chemical engineers will touch these devices. `systemctl status iot-agent` must tell them what's happening. `journalctl -u iot-agent` must be parseable by humans. Error messages must be understandable without Kubernetes knowledge.
9. **End-customer debuggability is a UX constraint.** Mechanical/electrical/chemical engineers will touch these devices. `systemctl status fleet-agent` must tell them what's happening. `journalctl -u fleet-agent` must be parseable by humans. Error messages must be understandable without Kubernetes knowledge.
10. **NATS is the long-term architectural commitment.** Everything on NATS — not as a queue, as a coordination fabric. The "decentralized cluster management" future depends on this choice. Implementation decisions that weaken this (e.g., "let's just put a database in the middle") should be pushed back on.

View File

@@ -0,0 +1,381 @@
# IoT Platform v0.1 and beyond — forward plan
Authoritative forward plan for the NationTech decentralized-infra /
IoT platform, written after the v0 walking skeleton shipped
(see `v0_walking_skeleton.md` for the historical diary). Organized as
five chapters in execution order.
## State of the world (as of 2026-04-23)
**Green, end-to-end:**
- CRD → operator → NATS JetStream KV write path (`smoke-a1.sh`).
- Agent watches KV, reconciles podman containers (`smoke-a1.sh`).
- VM-as-device provisioning: cloud-init + fleet-agent install + NATS
smoke (`smoke-a3.sh`), x86_64 (native KVM) and aarch64 (TCG).
- Power-cycle / reboot resilience (`smoke-a3.sh` phase 5).
- aarch64 cross-compile of the agent (no Harmony modules need to
feature-gate aarch64).
- Operator installed via a harmony Score (typed Rust, no yaml).
- `harmony-reconciler-contracts` crate — cross-boundary types
(bucket names, key helpers, `DeviceInfo`, `DeploymentState`,
`HeartbeatPayload`, `DeploymentName`, `Id` re-export).
**Chapter 1 shipped** (2026-04-21): composed end-to-end demo
(`smoke-a4.sh`) — operator in k3d + in-cluster NATS + ARM VM +
typed-Rust CR applier + hand-off menu + `--auto` regression. Green
on x86_64 (native KVM) and aarch64 (TCG).
**Chapter 2 shipped** (2026-04-23): selector-based targeting +
Device CRD + `.status.aggregate` reflect-back. `Deployment.spec.
targetSelector: LabelSelector` resolves against cluster-scoped
`Device` CRs materialized from NATS `device-info`. Operator writes
`desired-state` KV per matched pair, patches
`.status.aggregate` (matchedDeviceCount / succeeded / failed /
pending / lastError) at 1 Hz. Load-tested to 10 000 devices ×
1 000 Deployments at 10 000 KV writes/s sustained, zero errors.
**Not yet wired (real v0.1 work still to go):**
- Helm packaging of the operator (Chapter 3).
- Zitadel + OpenBao auth (per-device credentials, SSO for
operator users). Placeholder `CredentialSource` trait on the
agent side (Chapter 4).
- Any frontend (Chapter 5).
- Small quality items (not blockers): agent config-driven labels,
`matchExpressions` in selectors, `Device.status.conditions`
populated from heartbeat staleness.
**Verified during planning** (so future implementation doesn't
have to re-litigate):
- **Upgrade already works.** `reconciler.rs::apply` byte-compares
serialized score payloads; drift triggers re-reconcile.
`PodmanTopology::ensure_service_running` removes then re-creates
containers on spec drift. No "stale + new" window.
- **The polymorphism stays.** `ReconcileScore` is an externally-tagged
enum; adding `OkdApplyV0` later is additive.
**Surprises since v0 started** (for context, none architectural):
- Arch `edk2-aarch64-202602-2` shipped empty firmware blobs;
`202508-1` ships unpadded edk2 that needs 64 MiB pflash padding.
Fixed via runtime discovery + padding in `modules/kvm/firmware.rs`.
- MTTCG isn't default for cross-arch TCG on QEMU 10.2; force via
`qemu:commandline` override. `pauth-impdef=on` likewise a
qemu:commandline opt-in.
- `ensure_vm` is idempotent on "domain exists" — re-apply of a
changed XML requires manual `undefine --nvram --remove-all-storage`.
Noted as a follow-up in the code comments.
---
## Chapter 1 — Hands-on end-to-end demo (imminent)
**Goal:** the user runs one command, watches operator + NATS + ARM
VM come up, then drives a CRD through the full loop by hand:
`kubectl apply` it (manually or via a typed Rust applier), watch the
operator log "acquired," check the NATS KV store with `natsbox`,
SSH/console into the VM, `curl` the running nginx container from
the workstation.
### User-facing requirements (explicit)
- **No yaml fixtures.** Sample `Deployment` CRs constructed in
typed Rust using `DeploymentSpec` + `PodmanV0Score`. Same
discipline as the `install` Score that replaced `gen-crd | kubectl
apply`.
- **ArgoCD deferred.** User's production clusters have it; bringing
it into the smoke harness adds setup overhead without validating
anything `helm install` doesn't. Chapter 3 produces the chart;
ArgoCD integration is a later operational concern.
- **Operator logs every CR it acquires** — `controller.rs` already
does `tracing::info!(%ns, %name, "reconcile")`; verify the output
reads well in the command-menu hand-off.
- **natsbox debugging is first-class.** Script prints exact
natsbox one-liners at hand-off so the user can inspect KV state.
- **In-cluster NATS.** Not a side-by-side podman container (as
smoke-a1 does today). Expose to the libvirt VM via k3d
loadbalancer port mapping.
### Design decisions
- **Rust CR applier.** New binary `examples/harmony_apply_deployment/`.
CLI flags `--name --namespace --target-device --image --port
--delete`. Constructs the `Deployment` CR via
`kube::Api<Deployment>` + typed `DeploymentSpec`; calls
`api.apply(...)`. Can also `--print` the CR JSON to stdout so
`kubectl apply -f -` still works from the terminal.
- **smoke-a4.sh orchestration stays bash for now.** User agreed
this is test-harness scope, not framework path; converting it
to Rust is "not as important right now."
- **Hand-off is the default mode**, not `--keep`. The whole point
of Chapter 1 is that the user drives the last stage interactively.
`smoke-a4.sh` brings everything up, applies *nothing*, prints
the command menu, waits on `INT/TERM` to tear down. `--auto`
runs the full apply/curl/upgrade/delete regression for CI.
- **In-cluster NATS path.** Preferred: use `harmony::modules::nats`
if it has a lightweight single-node / no-supercluster mode.
Fallback: typed `K8sResourceScore` applying a minimal Deployment
+ NodePort Service. 15-min research task before committing.
### Composed smoke phases (`smoke-a4.sh`)
1. k3d cluster up with `-p "4222:4222@loadbalancer"` so the host
port 4222 forwards into the cluster. Reachable from the
libvirt VM via the gateway IP (typically `192.168.122.1:4222`).
2. NATS in-cluster via the chosen path (harmony module or direct
K8sResourceScore). Wait for readiness.
3. Install CRD via the operator's `install` subcommand (typed Rust).
4. Spawn operator as a host-side process (same pattern as
smoke-a1). Operator connects to `nats://localhost:4222`.
5. Provision ARM VM via `example_iot_vm_setup` (same entry point
smoke-a3 uses). Agent configured to connect to
`nats://<libvirt_gateway>:4222` — discover the gateway IP via
`virsh net-dumpxml default`, as smoke-a3 already does.
6. Sanity: `kubectl wait ... crd Established`, operator logged
"KV bucket ready", agent logged "watching KV keys",
`status.<device>` present in `agent-status` bucket.
7. Hand off. Print the command menu below. Exit 0 with a cleanup
trap on `INT/TERM`.
### Command menu at hand-off
- `kubectl get deployments.fleet.nationtech.io -A -w` — watch CR
reconcile reactively.
- `cargo run -q -p example_harmony_apply_deployment -- --image
nginx:latest --target-device $TARGET_DEVICE` — apply an nginx
deployment via typed Rust.
- `cargo run -q -p example_harmony_apply_deployment -- --print
--image nginx:latest --target-device $TARGET_DEVICE |
kubectl apply -f -` — same thing, through kubectl.
- `ssh -i $SSH_KEY fleet-admin@$VM_IP` — connect to the VM.
- `virsh console $VM_NAME --force` — serial console alternative.
- `podman --url unix://$VM_IP:... ps` or ssh + `podman ps`
— list containers on the VM from the workstation.
- `podman run --rm docker.io/natsio/nats-box nats --server
nats://localhost:4222 kv ls desired-state` — list desired
state keys (from the host).
- `podman run --rm ... nats kv get desired-state
'<device>.<deployment>' --raw` — dump a specific desired state.
- `podman run --rm ... nats kv get agent-status
'status.<device>' --raw` — dump the heartbeat.
- `curl http://$VM_IP:8080/` — hit the deployed nginx.
### `--auto` path (for regression)
1. Apply `nginx:latest`, wait for container on VM, `curl` 200.
2. Apply `nginx:1.26` (upgrade), wait for container *id* to change,
`curl` 200 against the new container.
3. Apply `--delete`, wait for container gone from VM.
### Files
- **NEW** `examples/harmony_apply_deployment/Cargo.toml` +
`src/main.rs` — typed applier.
- **NEW** `fleet/scripts/smoke-a4.sh`.
- **NO yaml fixtures.** Rust CLI flags cover the shape.
- Optional: factor shared smoke phases (NATS up, k3d up, operator
spawn, VM provision) into `fleet/scripts/lib/` if the duplication
across a1/a3/a4 becomes obvious. Don't force it.
### NATS exposure — implementation-time notes
- k3d `@loadbalancer` port mapping binds the host's `0.0.0.0:4222`
by default; libvirt VMs on `virbr0` can reach it via the gateway
IP. No special NAT config required.
- Fallback if environmental snag: keep the side-by-side podman
container on an opt-in `NATS_MODE=podman` flag. Don't default
to that — user explicitly asked for in-cluster.
### Verification
- Fresh host: `ARCH=aarch64 ./fleet/scripts/smoke-a4.sh` completes
in 8-15 min, prints the command menu.
- `ARCH=aarch64 ./fleet/scripts/smoke-a4.sh --auto` PASSes
end-to-end including upgrade id-change assertion.
- x86_64 (`ARCH=x86-64`) completes in 2-5 min.
### Explicitly out of scope
- `AgentStatus` / `DeploymentStatus` enrichment — Chapter 2.
- Helm chart, ArgoCD, auth, frontend — later chapters.
- Lifting the applier into a reusable `ApplyDeploymentScore` —
only if a second consumer appears.
---
## Chapter 2 — Status reflect-back + selector-based targeting **[SHIPPED 2026-04-23]**
**Goal:** CRD `.status` reflects fleet reality — per-deployment
success/failure/pending counts, last-error surface, freshness. The
Deployment CR targets devices by label selector, not by id list.
> The shipped design replaces the original `AgentStatus` + list-of-ids
> proposal wholesale. See `chapter_4_aggregation_scale.md` for the
> superseded design-doc archaeology. Commits:
> `refactor(iot): delete legacy AgentStatus path`,
> `refactor(iot): operator watches device-state KV directly; drop event stream`,
> `refactor(iot): Deployment.targetSelector + Device CRD (DaemonSet-like)`.
### What shipped
**Wire format** (in `harmony-reconciler-contracts`): four per-concern
payloads on dedicated NATS KV buckets. No monolithic per-device blob,
no separate event stream.
| Type | Bucket | Cadence |
|------|--------|---------|
| `DeviceInfo` | `device-info` | on startup + label/inventory change |
| `DeploymentState` | `device-state` | on reconcile phase transition |
| `HeartbeatPayload` | `device-heartbeat` | every 30 s |
**CRDs.** Two cluster resources:
- `Deployment` (namespaced) — `spec.targetSelector: LabelSelector`
(standard K8s `matchLabels` / `matchExpressions`). No device list
on spec. `.status.aggregate` carries `matchedDeviceCount`,
`succeeded`, `failed`, `pending`, `lastError`.
- `Device` (cluster-scoped, like `Node`) — `metadata.labels` carries
the device's routing labels; `spec.inventory` holds the hardware/OS
snapshot; `status.conditions` is reserved for liveness (populated
lazily by a future heartbeat-freshness reconciler, not every ping).
**Operator tasks** (three concurrent loops in one process):
1. `controller` — validates Deployment CR names, holds the finalizer
that cleans `desired-state.<device>.<deployment>` KV entries on
delete. No writes on apply (aggregator handles that).
2. `device_reconciler` — watches the `device-info` KV; server-side-
applies a `Device` CR per `DeviceInfo` payload, with label
sanitization. Agents remain kube-unaware.
3. `fleet_aggregator` — three caches driven by watches (Deployment
CRs, Device CRs, `device-state` KV). On any change, resolves
each selector against the Device cache, writes/deletes
`desired-state` KV entries for diffed matches, and patches
`.status.aggregate` at 1 Hz for the CRs whose counters moved.
**Agents** publish `device-id=<id>` as a default DeviceInfo label, so
targeting a single device with `matchLabels: {device-id: pi-42}` is
zero-config. User-defined labels layer on from agent config (scoped
out of this chapter; follow-up item).
### Scale proof
`fleet/scripts/load-test.sh` + `examples/fleet_load_test` simulate N
devices across M Deployments, driving `device-state` KV updates at a
configurable cadence while the full operator stack runs against a
local k3d apiserver. Verified:
- 100 devices / 10 groups / 1 Hz / 60 s — 100 writes/s sustained,
all 10 CR aggregates converge.
- 10 000 devices / 1 000 groups / 1 Hz / 120 s — ~10 000 writes/s
sustained, 0 errors, all 1 000 CR aggregates correct
(`matchedDeviceCount == expected`, `succeeded + failed + pending
== matched`). Same envelope before and after the selector rewrite.
### Out of scope in this chapter (follow-ups)
- Agent config-driven labels (`[labels]` in agent toml → DeviceInfo).
~30 lines; deferred until a concrete need lands.
- `matchExpressions` evaluator. Operator currently supports
`matchLabels` only and logs a warning for expression-bearing
selectors. ~50 lines; deferred.
- `Device.status.conditions` populated from heartbeat staleness
(Reachable / Stale transitions). Liveness is computable today by
reading `device-heartbeat` directly; CR-side reflection is a
convenience. ~100 lines; deferred.
- Full journald log streaming. The `.status.aggregate.lastError`
surface covers the user's reflect-back requirement for now.
- Multi-device regression smoke — defer until real hardware or a
second VM is around.
---
## Chapter 3 — Helm chart (ArgoCD deferred)
**Goal:** operator ships as a versioned helm chart with CRD
version-locked inside.
User clarified this session: ArgoCD exists in production; all it
does is apply resources from the chart. Standing up ArgoCD in the
smoke adds setup overhead with no incremental validation value.
Chapter 3 produces the chart + validates `helm install / helm
upgrade` lifecycles. ArgoCD consumption is a user operational
concern downstream.
### Sketch
- Chart location: `fleet/harmony-fleet-operator/chart/` (or sibling repo —
defer decision to implementation time).
- Templates: Namespace, SA, ClusterRole, ClusterRoleBinding,
Deployment (operator pod), CRD.
- **CRD yaml in the chart is generated at chart-publish time** from
the Rust `Deployment::crd()`. One-off release artifact, not
framework path — consistent with "no yaml in framework code."
- Values: operator image tag, NATS URL, log level.
- Smoke: `helm install` into k3d → CR apply → same assertions as
Chapter 1.
### Open questions
- Chart repo: subdir vs. separate git repo.
- CRD install mechanism: chart hook vs. templates directory.
Drives CRD upgrade story.
---
## Chapter 4 — Auth: Zitadel + OpenBao + per-device identity
**Goal:** per-device granular NATS credentials; SSO for operator
users; OpenBao policy per device; JWT bootstrap from Zitadel.
Zitadel + OpenBao are already ~99% integrated in harmony; this
chapter is wiring the IoT-specific flows.
### Sketch
- Agent's `CredentialSource` trait (already abstract in agent
`config.rs`) gets a Zitadel-JWT-backed implementation. Mints
short-lived NATS creds via OpenBao auth callout.
- Remove the shared-credentials `toml-shared` variant (v0 demo
leftover).
- Availability: auth-callout caches policies, tolerates OpenBao
outages.
- SSO for operator users (separate flow): Zitadel groups →
Kubernetes RBAC subjects on the `Deployment` CRD.
---
## Chapter 5 — Frontend (last)
**Goal:** operator-friendly UI for the decentralized platform.
Form factor undecided: Leptos web dashboard, CLI extension to
`harmony_cli`, or a TUI. Minimum viable product: read-only view of
fleet state (devices + deployments + aggregated status) powered by
the CRD `.status` from Chapter 2. Aspiration: write operations with
auth from Chapter 4.
---
## Principles — what we've learned and want to keep doing
- **No yaml in framework code paths.** Every kube-rs type is
typed; every Score apply goes through typed Rust. Yaml generation
happens only at chart-publish time, never at runtime.
- **Scores describe desired state; topologies expose capabilities.**
Prefer adding capability traits over thickening a single topology.
- **Minimal topologies for ad-hoc Score execution.** `K8sAnywhereTopology`
has too many opinions (cert-manager install, tenant-manager bootstrap,
helm probes) for narrow apply-a-CRD use cases. See ROADMAP
§12.6 — a lean shared `K8sBareTopology` is the durable fix.
- **Cross-boundary wire types in `harmony-reconciler-contracts`**,
everything else in its natural crate.
- **Never ship untested code.** Every commit that changes runtime
behavior is verified against a smoke script before landing.
Cargo check + unit tests aren't enough.
- **Prove claims about upstream before blaming upstream.** The
Arch edk2 investigation showed this matters; see
`memory/feedback_prove_before_blaming_upstream.md`.

View File

@@ -1,5 +1,23 @@
# IoT Platform v0 — Walking Skeleton
> **Status: SHIPPED (2026-04-21)**
>
> This document is the historical design diary for the v0 walking skeleton
> work — it captures the decision trail, hour-by-hour plan, and risk
> analysis as they were written before the skeleton was built. It is
> preserved unchanged as an archaeology reference.
>
> The walking skeleton shipped end-to-end: CRD → operator → NATS KV →
> on-device agent → podman reconcile; VM-as-device flow (x86_64 + aarch64
> via TCG); power-cycle resilience; operator installed as a Score rather
> than kubectl-apply-a-yaml. See smoke-a1, smoke-a3, smoke-a3-arm for the
> executable proof.
>
> **Forward plan lives in `ROADMAP/fleet_platform/v0_1_plan.md`** — five
> chapters covering hands-on demo, status reflect-back, helm chart, SSO/
> secrets, and frontend. When a chapter grows scope it may move into its
> own `chapter_N_*.md`.
**Approach:** Walking skeleton (Cockburn). Thin end-to-end thread through every architectural component. Naive first, architecture emerges from running code, hardening follows real-world feedback.
## 1. Strategic framing
@@ -116,11 +134,11 @@ iot-workload-hello/
`deployment.yaml`:
```yaml
apiVersion: iot.nationtech.io/v1alpha1
apiVersion: fleet.nationtech.io/v1alpha1
kind: Deployment
metadata:
name: hello-world
namespace: iot-demo
namespace: fleet-demo
spec:
targetDevices:
- pi-demo-01
@@ -138,10 +156,10 @@ spec:
### 5.2 Central cluster setup
Existing k8s cluster. Namespaces:
- `iot-system` — operator, NATS (single-node for v0)
- `iot-demo``Deployment` CRs
- `fleet-system` — operator, NATS (single-node for v0)
- `fleet-demo``Deployment` CRs
ArgoCD application pre-configured to sync `iot-workload-hello` repo into `iot-demo` namespace.
ArgoCD application pre-configured to sync `iot-workload-hello` repo into `fleet-demo` namespace.
### 5.3 Raspberry Pi 5 setup
@@ -151,9 +169,9 @@ Base OS: **Ubuntu Server 24.04 LTS ARM64** (ships Podman 4.9 in repos). Raspberr
Installed:
- `podman` (4.4+, ARM64) with `systemctl --user enable --now podman.socket` (required for `podman-api` crate)
- `iot-agent` binary (cross-compiled to aarch64 via existing Harmony aarch64 toolchain)
- `/etc/iot-agent/config.toml` with NATS URL + shared credential
- systemd unit `iot-agent.service`
- `fleet-agent` binary (cross-compiled to aarch64 via existing Harmony aarch64 toolchain)
- `/etc/fleet-agent/config.toml` with NATS URL + shared credential
- systemd unit `fleet-agent.service`
### 5.4 What the code does
@@ -227,7 +245,7 @@ trait CredentialSource: Send + Sync {
}
```
v0: `TomlFileCredentialSource` reading `/etc/iot-agent/config.toml`.
v0: `TomlFileCredentialSource` reading `/etc/fleet-agent/config.toml`.
v0.2: `ZitadelBootstrappedCredentialSource` — same trait, swapped via config.
30 minutes Friday. Saves 3 hours of refactor in v0.2.
@@ -258,7 +276,7 @@ device_id = "pi-demo-01"
[credentials]
type = "toml-shared"
nats_user = "iot-agent"
nats_user = "fleet-agent"
nats_pass = "dev-shared-password"
[nats]
@@ -306,9 +324,9 @@ Document findings in the Friday night log regardless of outcome. v0.1 work inclu
- Write 1-page `v0-demo.md`: demo script, success criteria, fallback plan.
- Decide Pi OS: Ubuntu 24.04 ARM64 (default) vs Raspberry Pi OS 64-bit. Don't agonize beyond 10 min.
*Dispatch agent A1 (operator):* "Create Rust crate `iot/iot-operator-v0/` using `kube-rs` implementing a Deployment CRD controller that writes to NATS KV. Exact spec in task card §9.A1. Self-verify: `kubectl apply``nats kv get` shows entry. Under 300 lines main.rs. No auth."
*Dispatch agent A1 (operator):* "Create Rust crate `fleet/harmony-fleet-operator/` using `kube-rs` implementing a Deployment CRD controller that writes to NATS KV. Exact spec in task card §9.A1. Self-verify: `kubectl apply``nats kv get` shows entry. Under 300 lines main.rs. No auth."
*Dispatch agent A2 (Pi provisioning, fallback-aware):* "Attempt Harmony-based Raspberry Pi 5 provisioning Score. Target: fresh Pi flashed via SD card, boots, static IP, Ubuntu 24.04 ARM64 with Podman 4.9, podman user socket enabled, user `iot-agent` with linger enabled, `/etc/iot-agent/` ready. If Harmony doesn't have Pi primitives, document the gap and produce a manual provisioning runbook instead (rpi-imager + cloud-init). Hard time limit: 90 min. Self-verify: `ssh iot-agent@<pi-ip> 'podman --version'` returns 4.4+."
*Dispatch agent A2 (Pi provisioning, fallback-aware):* "Attempt Harmony-based Raspberry Pi 5 provisioning Score. Target: fresh Pi flashed via SD card, boots, static IP, Ubuntu 24.04 ARM64 with Podman 4.9, podman user socket enabled, user `fleet-agent` with linger enabled, `/etc/fleet-agent/` ready. If Harmony doesn't have Pi primitives, document the gap and produce a manual provisioning runbook instead (rpi-imager + cloud-init). Hard time limit: 90 min. Self-verify: `ssh fleet-agent@<pi-ip> 'podman --version'` returns 4.4+."
**Hour 2 — your work: agent crate**
@@ -324,8 +342,8 @@ Crate in `harmony/src/modules/iot_agent/` or a new binary in the Harmony workspa
**Hour 3 — local integration**
- Review agent A1's operator. Deploy to central cluster `iot-system` namespace.
- Deploy NATS to `iot-system` if not already (single-node JetStream).
- Review agent A1's operator. Deploy to central cluster `fleet-system` namespace.
- Deploy NATS to `fleet-system` if not already (single-node JetStream).
- Review agent A2's Pi provisioning. If Harmony Score succeeded, note for demo; if manual runbook, accept and move on.
- Agent compiles on laptop. Connects to central NATS.
@@ -380,7 +398,7 @@ Named subsection: the most important class of failures for Pi-in-field deploymen
**Hour 3-4 — demo polish:**
- `./demo.sh` is one command, no manual steps.
- Output is clean: clear PASS/FAIL with per-phase timings.
- `kubectl get deployments.iot.nationtech.io` output is readable.
- `kubectl get deployments.fleet.nationtech.io` output is readable.
**Hour 5-6 — partner-facing polish:**
- README in workload repo: 4 lines. "Edit this, git push, done."
@@ -421,8 +439,8 @@ Each card is self-contained. Hand the entire card to an agent.
# Note: harmony is built with --no-default-features to exclude KVM (libvirt cannot cross-compile to aarch64).
# The 5 KVM examples (kvm_vm_examples, kvm_okd_ha_cluster, opnsense_vm_integration,
# opnsense_pair_integration, example_linux_vm) are x86_64-only by design.
cargo build --target x86_64-unknown-linux-gnu -p harmony -p harmony_agent -p iot-agent-v0 -p iot-operator-v0
cargo build --target aarch64-unknown-linux-gnu -p harmony --no-default-features -p harmony_agent -p iot-agent-v0 -p iot-operator-v0
cargo build --target x86_64-unknown-linux-gnu -p harmony -p harmony_agent -p fleet-agent-v0 -p harmony-fleet-operator
cargo build --target aarch64-unknown-linux-gnu -p harmony --no-default-features -p harmony_agent -p fleet-agent-v0 -p harmony-fleet-operator
```
All three must exit 0. Note: `cargo test --target aarch64-unknown-linux-gnu` cannot run on x86_64 (exec format error) — that's expected. Test execution is only for the host architecture via `./build/check.sh`. If any check fails, fix the issue before marking the task complete. Include the output in the PR description.
@@ -431,11 +449,11 @@ All three must exit 0. Note: `cargo test --target aarch64-unknown-linux-gnu` can
**Goal:** `kube-rs` operator that watches `Deployment` CRs and writes the Score to NATS KV.
**Deliverable:** Crate `iot/iot-operator-v0/`:
**Deliverable:** Crate `fleet/harmony-fleet-operator/`:
- `Cargo.toml`: `kube`, `k8s-openapi`, `async-nats`, `serde`, `serde_yaml`, `serde_json`, `tokio`, `tracing`, `tracing-subscriber`, `anyhow`.
- `src/main.rs` under 300 lines.
- `deploy/operator.yaml` — Deployment, ServiceAccount, ClusterRole, ClusterRoleBinding.
- `deploy/crd.yaml``Deployment` CRD for `iot.nationtech.io/v1alpha1`.
- `deploy/crd.yaml``Deployment` CRD for `fleet.nationtech.io/v1alpha1`.
**Behavior:**
1. Connect to NATS on startup (`NATS_URL` env, no auth).
@@ -462,7 +480,7 @@ status:
**Self-verification:**
```bash
cd iot/iot-operator-v0
cd fleet/harmony-fleet-operator
cargo build && cargo clippy -- -D warnings
# Test against k3d:
@@ -474,7 +492,7 @@ OP_PID=$!
sleep 3
kubectl apply -f - <<EOF
apiVersion: iot.nationtech.io/v1alpha1
apiVersion: fleet.nationtech.io/v1alpha1
kind: Deployment
metadata:
name: test-deploy
@@ -496,7 +514,7 @@ sleep 5
nats --server nats://localhost:4222 kv get desired-state test-device-01.test-deploy
# Must print the Score JSON with type="PodmanV0"
kubectl get deployment.iot.nationtech.io test-deploy -o jsonpath='{.status.observedScoreString}'
kubectl get deployment.fleet.nationtech.io test-deploy -o jsonpath='{.status.observedScoreString}'
# Must print the stored string
kill $OP_PID
@@ -505,7 +523,7 @@ docker stop nats
```
**Forbidden:**
- Code outside `iot/iot-operator-v0/`.
- Code outside `fleet/harmony-fleet-operator/`.
- Zitadel, OpenBao, auth callout dependencies.
- Parsing `score.data`.
- Rollout logic beyond KV writes.
@@ -524,19 +542,19 @@ docker stop nats
- Ubuntu Server 24.04 LTS ARM64 (or Raspberry Pi OS 64-bit if Ubuntu fails).
- Static IP on lab network.
- Packages: `podman`, `systemd-container`, `openssh-server`, `curl`, `jq`.
- `systemctl --user enable --now podman.socket` for user `iot-agent`.
- User `iot-agent` with linger enabled (`loginctl enable-linger iot-agent`).
- `/etc/iot-agent/` (owned by iot-agent, 0750).
- `/var/lib/iot-agent/`.
- `systemctl --user enable --now podman.socket` for user `fleet-agent`.
- User `fleet-agent` with linger enabled (`loginctl enable-linger fleet-agent`).
- `/etc/fleet-agent/` (owned by fleet-agent, 0750).
- `/var/lib/fleet-agent/`.
**Self-verification:**
```bash
ssh iot-agent@<pi-ip> 'podman --version'
ssh fleet-agent@<pi-ip> 'podman --version'
# Must be 4.4+ (target 4.9+)
ssh iot-agent@<pi-ip> 'systemctl --user is-active podman.socket'
ssh fleet-agent@<pi-ip> 'systemctl --user is-active podman.socket'
# Must print "active"
ssh iot-agent@<pi-ip> 'loginctl show-user iot-agent | grep Linger=yes'
ssh iot-agent@<pi-ip> 'uname -m'
ssh fleet-agent@<pi-ip> 'loginctl show-user fleet-agent | grep Linger=yes'
ssh fleet-agent@<pi-ip> 'uname -m'
# Must print aarch64
```
@@ -550,13 +568,13 @@ ssh iot-agent@<pi-ip> 'uname -m'
**Prerequisites:** Agent binary exists (Sylvain writes Friday).
**Deliverable:** `iot/iot-agent-v0/scripts/install.sh`:
**Deliverable:** `iot/fleet-agent-v0/scripts/install.sh`:
1. Args: `--host <ip>`, `--device-id <id>`, `--nats-url <url>`, `--nats-user <u>`, `--nats-pass <p>`.
2. Cross-builds for aarch64 using existing Harmony aarch64 toolchain.
3. `scp` binary to Pi, `sudo mv` to `/usr/local/bin/iot-agent`.
4. Templates `/etc/iot-agent/config.toml` from args.
5. Installs `/etc/systemd/system/iot-agent.service`.
6. `systemctl daemon-reload && systemctl enable --now iot-agent`.
3. `scp` binary to Pi, `sudo mv` to `/usr/local/bin/fleet-agent`.
4. Templates `/etc/fleet-agent/config.toml` from args.
5. Installs `/etc/systemd/system/fleet-agent.service`.
6. `systemctl daemon-reload && systemctl enable --now fleet-agent`.
7. Waits up to 15s for "connected to NATS" in journal.
**systemd unit:**
@@ -568,8 +586,8 @@ Wants=network-online.target
[Service]
Type=simple
User=iot-agent
ExecStart=/usr/local/bin/iot-agent
User=fleet-agent
ExecStart=/usr/local/bin/fleet-agent
Restart=on-failure
RestartSec=5
StandardOutput=journal
@@ -584,9 +602,9 @@ WantedBy=multi-user.target
```bash
./install.sh --host <pi-ip> --device-id pi-demo-01 \
--nats-url nats://central:4222 \
--nats-user iot-agent --nats-pass dev-shared-password
ssh iot-agent@<pi-ip> 'sudo systemctl status iot-agent' # active (running)
ssh iot-agent@<pi-ip> 'sudo journalctl -u iot-agent --since "2 minutes ago"' | grep "connected to NATS"
--nats-user fleet-agent --nats-pass dev-shared-password
ssh fleet-agent@<pi-ip> 'sudo systemctl status fleet-agent' # active (running)
ssh fleet-agent@<pi-ip> 'sudo journalctl -u fleet-agent --since "2 minutes ago"' | grep "connected to NATS"
```
**Time limit:** 2 hours agent time.
@@ -595,7 +613,7 @@ ssh iot-agent@<pi-ip> 'sudo journalctl -u iot-agent --since "2 minutes ago"' | g
**Goal:** One command runs full demo flow.
**Deliverable:** `iot/scripts/demo.sh`:
**Deliverable:** `fleet/scripts/demo.sh`:
1. Verifies Pi reachable + agent running.
2. Applies `scripts/demo-deployment.yaml`.
3. Waits up to 120s for container on Pi (ssh + `podman ps`).
@@ -606,7 +624,7 @@ ssh iot-agent@<pi-ip> 'sudo journalctl -u iot-agent --since "2 minutes ago"' | g
**Self-verification:**
```bash
./iot/scripts/demo.sh
./fleet/scripts/demo.sh
# Ends with "PASS", total < 5 min
```

View File

@@ -0,0 +1,24 @@
[package]
name = "example_fleet_load_test"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "fleet_load_test"
path = "src/main.rs"
[dependencies]
harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
harmony-fleet-operator = { path = "../../fleet/harmony-fleet-operator" }
async-nats = { workspace = true }
chrono = { workspace = true }
kube = { workspace = true, features = ["runtime", "derive"] }
k8s-openapi.workspace = true
serde_json = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true }
rand = { workspace = true }

View File

@@ -0,0 +1,552 @@
//! Load test for the IoT operator's `fleet_aggregator`.
//!
//! Simulates N devices across M Deployment CRs, each device pushing
//! a `DeploymentState` update to NATS every `--tick-ms`. Measures
//! throughput on both sides (devices → NATS and operator → kube
//! apiserver) and, at the end of the run, verifies each CR's
//! `.status.aggregate` counters sum to its expected group size (and
//! that `matched_device_count` equals that size — i.e. every
//! registered device got picked up by the CR's label selector).
//!
//! Assumes an already-running stack:
//! - NATS reachable at `--nats-url`
//! - k8s cluster with the operator's CRD installed (KUBECONFIG)
//! - the operator process running against the same NATS + cluster
//!
//! The `fleet/scripts/smoke-a4.sh` script brings all three up — pass
//! `--hold` to leave them running, then run this binary.
//!
//! Typical invocation:
//!
//! cargo run -q -p example_fleet_load_test -- \
//! --namespace fleet-load \
//! --groups 55,5,5,5,5,5,5,5,5,5 \
//! --tick-ms 1000 \
//! --duration-s 60
use anyhow::{Context, Result};
use async_nats::jetstream::{self, kv};
use chrono::Utc;
use clap::Parser;
use harmony_fleet_operator::crd::{
Deployment, DeploymentSpec, Rollout, RolloutStrategy, ScorePayload,
};
use harmony_reconciler_contracts::{
BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE, DeploymentName,
DeploymentState, DeviceInfo, HeartbeatPayload, Id, Phase, device_heartbeat_key,
device_info_key, device_state_key,
};
use k8s_openapi::api::core::v1::Namespace;
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
use kube::Client;
use kube::api::{Api, DeleteParams, Patch, PatchParams, PostParams};
use rand::Rng;
use std::collections::BTreeMap;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{Duration, Instant};
use tokio::task::JoinSet;
#[derive(Parser, Debug, Clone)]
#[command(
name = "fleet_load_test",
about = "Synthetic load for the IoT operator's fleet_aggregator"
)]
struct Cli {
/// NATS URL (same one the operator connects to).
#[arg(long, default_value = "nats://localhost:4222")]
nats_url: String,
/// k8s namespace for the load-test Deployment CRs. Created if
/// missing.
#[arg(long, default_value = "fleet-load")]
namespace: String,
/// Group shape — comma-separated device counts, one per CR.
/// Default: 100 devices over 10 groups (1 × 55 + 9 × 5).
#[arg(long, default_value = "55,5,5,5,5,5,5,5,5,5")]
groups: String,
/// Per-device tick in ms. Each tick publishes one DeploymentState.
#[arg(long, default_value_t = 1000)]
tick_ms: u64,
/// Heartbeat cadence in seconds (separate from the state tick).
#[arg(long, default_value_t = 30)]
heartbeat_s: u64,
/// Total run duration in seconds before tearing down.
#[arg(long, default_value_t = 60)]
duration_s: u64,
/// Report throughput every N seconds.
#[arg(long, default_value_t = 5)]
report_s: u64,
/// Keep the CRs + KV entries in place after the run instead of
/// deleting them. Useful with HOLD=1 to inspect the steady-state
/// aggregate after the load finishes.
#[arg(long)]
keep: bool,
}
/// Metrics collected across all device tasks.
#[derive(Default)]
struct Counters {
state_writes: AtomicU64,
heartbeat_writes: AtomicU64,
errors: AtomicU64,
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let cli = Cli::parse();
let group_sizes = parse_groups(&cli.groups)?;
let total: usize = group_sizes.iter().sum();
tracing::info!(
devices = total,
groups = group_sizes.len(),
shape = ?group_sizes,
tick_ms = cli.tick_ms,
duration_s = cli.duration_s,
"fleet_load_test starting"
);
// --- NATS setup ----------------------------------------------------------
let nc = async_nats::connect(&cli.nats_url)
.await
.with_context(|| format!("connecting to NATS at {}", cli.nats_url))?;
let js = jetstream::new(nc);
let info_bucket = open_bucket(&js, BUCKET_DEVICE_INFO).await?;
let state_bucket = open_bucket(&js, BUCKET_DEVICE_STATE).await?;
let heartbeat_bucket = open_bucket(&js, BUCKET_DEVICE_HEARTBEAT).await?;
// --- kube setup ----------------------------------------------------------
let client = Client::try_default().await.context("kube client")?;
ensure_namespace(&client, &cli.namespace).await?;
let deployments: Api<Deployment> = Api::namespaced(client.clone(), &cli.namespace);
// --- plan groups + device ids --------------------------------------------
let plan = build_plan(&group_sizes);
apply_crs(&deployments, &plan).await?;
publish_device_infos(&info_bucket, &plan).await?;
// --- spawn simulators ----------------------------------------------------
let counters = Arc::new(Counters::default());
let mut sims = JoinSet::new();
let tick = Duration::from_millis(cli.tick_ms);
let hb_tick = Duration::from_secs(cli.heartbeat_s);
for device in &plan.devices {
let device = Arc::new(device.clone());
sims.spawn(simulate_state_loop(
device.clone(),
state_bucket.clone(),
counters.clone(),
tick,
));
sims.spawn(simulate_heartbeat_loop(
device.clone(),
heartbeat_bucket.clone(),
counters.clone(),
hb_tick,
));
}
// --- metrics reporter ----------------------------------------------------
let report_tick = Duration::from_secs(cli.report_s);
let reporter_counters = counters.clone();
let reporter = tokio::spawn(async move {
let mut ticker = tokio::time::interval(report_tick);
ticker.tick().await; // skip immediate fire
let mut prev_state = 0u64;
let mut prev_hb = 0u64;
loop {
ticker.tick().await;
let s = reporter_counters.state_writes.load(Ordering::Relaxed);
let h = reporter_counters.heartbeat_writes.load(Ordering::Relaxed);
let e = reporter_counters.errors.load(Ordering::Relaxed);
let dt = report_tick.as_secs_f64();
let ss = (s - prev_state) as f64 / dt;
let hh = (h - prev_hb) as f64 / dt;
tracing::info!(
state_writes_total = s,
state_writes_per_s = format!("{ss:.1}"),
heartbeats_total = h,
heartbeats_per_s = format!("{hh:.1}"),
errors = e,
"load"
);
prev_state = s;
prev_hb = h;
}
});
// --- run for duration ----------------------------------------------------
let started = Instant::now();
tokio::time::sleep(Duration::from_secs(cli.duration_s)).await;
reporter.abort();
sims.shutdown().await;
let elapsed = started.elapsed();
let s = counters.state_writes.load(Ordering::Relaxed);
let h = counters.heartbeat_writes.load(Ordering::Relaxed);
let e = counters.errors.load(Ordering::Relaxed);
tracing::info!(
elapsed_s = format!("{:.1}", elapsed.as_secs_f64()),
state_writes_total = s,
state_writes_per_s = format!("{:.1}", s as f64 / elapsed.as_secs_f64()),
heartbeats_total = h,
errors = e,
"run complete"
);
// --- give the aggregator a second to drain --------------------------------
tokio::time::sleep(Duration::from_secs(2)).await;
// --- verify CR status aggregates -----------------------------------------
//
// With selector-based matching there's a second axis we want to check:
// `matched_device_count` must equal the expected group size (selector
// actually resolved every registered Device), AND the phase counters
// must sum to it.
let mut all_ok = true;
for group in &plan.groups {
let cr = deployments.get(&group.cr_name).await?;
let Some(status) = cr.status.as_ref().and_then(|s| s.aggregate.as_ref()) else {
tracing::warn!(cr = %group.cr_name, "aggregate missing on CR status");
all_ok = false;
continue;
};
let total_reported = status.succeeded + status.failed + status.pending;
let expected = group.devices.len() as u32;
let ok = status.matched_device_count == expected && total_reported == expected;
if !ok {
all_ok = false;
}
tracing::info!(
cr = %group.cr_name,
expected_devices = expected,
matched = status.matched_device_count,
succeeded = status.succeeded,
failed = status.failed,
pending = status.pending,
total = total_reported,
ok,
"cr status"
);
}
if !cli.keep {
tracing::info!("cleanup: deleting CRs + KV entries");
for group in &plan.groups {
let _ = deployments
.delete(&group.cr_name, &DeleteParams::default())
.await;
}
for device in &plan.devices {
let _ = state_bucket
.delete(&device_state_key(
&device.device_id,
&DeploymentName::try_new(&device.cr_name).unwrap(),
))
.await;
let _ = info_bucket
.delete(&device_info_key(&device.device_id))
.await;
let _ = heartbeat_bucket
.delete(&device_heartbeat_key(&device.device_id))
.await;
}
}
if all_ok {
tracing::info!("PASS — all CR aggregates match device counts");
Ok(())
} else {
anyhow::bail!("FAIL — at least one CR aggregate did not sum to its target device count")
}
}
fn parse_groups(s: &str) -> Result<Vec<usize>> {
let out: Vec<usize> = s
.split(',')
.map(|t| t.trim().parse::<usize>())
.collect::<Result<_, _>>()
.context("parsing --groups")?;
if out.is_empty() {
anyhow::bail!("--groups must have at least one size");
}
Ok(out)
}
/// A single simulated device and the CR it belongs to.
#[derive(Clone)]
struct DevicePlan {
device_id: String,
cr_name: String,
}
#[derive(Clone)]
struct GroupPlan {
cr_name: String,
devices: Vec<String>,
}
struct Plan {
devices: Vec<DevicePlan>,
groups: Vec<GroupPlan>,
}
fn build_plan(group_sizes: &[usize]) -> Plan {
// CR-name + device-id width scale with group count so large runs
// get zero-padded ids that sort sensibly in kubectl.
let cr_width = group_sizes.len().to_string().len().max(2);
let total: usize = group_sizes.iter().sum();
let dev_width = total.to_string().len().max(5);
let mut devices = Vec::new();
let mut groups = Vec::new();
let mut next_id = 1usize;
for (i, size) in group_sizes.iter().enumerate() {
let cr_name = format!("load-group-{i:0cr_width$}");
let mut ids = Vec::with_capacity(*size);
for _ in 0..*size {
let id = format!("load-dev-{next_id:0dev_width$}");
next_id += 1;
devices.push(DevicePlan {
device_id: id.clone(),
cr_name: cr_name.clone(),
});
ids.push(id);
}
groups.push(GroupPlan {
cr_name,
devices: ids,
});
}
Plan { devices, groups }
}
async fn open_bucket(js: &jetstream::Context, bucket: &'static str) -> Result<kv::Store> {
Ok(js
.create_key_value(kv::Config {
bucket: bucket.to_string(),
history: 1,
..Default::default()
})
.await?)
}
async fn ensure_namespace(client: &Client, name: &str) -> Result<()> {
let api: Api<Namespace> = Api::all(client.clone());
if api.get_opt(name).await?.is_some() {
return Ok(());
}
let ns = Namespace {
metadata: kube::api::ObjectMeta {
name: Some(name.to_string()),
..Default::default()
},
..Default::default()
};
match api.create(&PostParams::default(), &ns).await {
Ok(_) => Ok(()),
Err(kube::Error::Api(ae)) if ae.code == 409 => Ok(()),
Err(e) => Err(e.into()),
}
}
async fn apply_crs(api: &Api<Deployment>, plan: &Plan) -> Result<()> {
let params = PatchParams::apply("fleet-load-test").force();
let started = Instant::now();
// Cap concurrency so we don't overwhelm the apiserver on large
// fleets. 32 in-flight applies is well under typical apiserver
// QPS limits and keeps the startup latency predictable.
const CONCURRENCY: usize = 32;
let mut in_flight: JoinSet<Result<String>> = JoinSet::new();
let mut iter = plan.groups.iter();
for _ in 0..CONCURRENCY {
if let Some(group) = iter.next() {
in_flight.spawn(apply_one_cr(api.clone(), group.clone(), params.clone()));
}
}
while let Some(res) = in_flight.join_next().await {
res??;
if let Some(group) = iter.next() {
in_flight.spawn(apply_one_cr(api.clone(), group.clone(), params.clone()));
}
}
tracing::info!(
crs = plan.groups.len(),
elapsed_ms = started.elapsed().as_millis() as u64,
"applied Deployment CRs"
);
Ok(())
}
async fn apply_one_cr(
api: Api<Deployment>,
group: GroupPlan,
params: PatchParams,
) -> Result<String> {
// Selector-based targeting: every Device CR in this group carries
// a `group=<cr_name>` label (we publish that on DeviceInfo; the
// operator reflects it into Device.metadata.labels).
let mut match_labels = BTreeMap::new();
match_labels.insert("group".to_string(), group.cr_name.clone());
let cr = Deployment::new(
&group.cr_name,
DeploymentSpec {
target_selector: LabelSelector {
match_labels: Some(match_labels),
match_expressions: None,
},
// Score content doesn't matter — no real agents consume
// the desired-state here. The aggregator still writes KV
// for each matched device; that's wire noise we accept
// as part of the realism.
score: ScorePayload {
type_: "PodmanV0".to_string(),
data: serde_json::json!({
"services": [{
"name": group.cr_name,
"image": "docker.io/library/nginx:alpine",
"ports": ["8080:80"],
}],
}),
},
rollout: Rollout {
strategy: RolloutStrategy::Immediate,
},
},
);
api.patch(&group.cr_name, &params, &Patch::Apply(&cr))
.await
.with_context(|| format!("applying CR {}", group.cr_name))?;
Ok(group.cr_name)
}
async fn publish_device_infos(bucket: &kv::Store, plan: &Plan) -> Result<()> {
let started = Instant::now();
const CONCURRENCY: usize = 64;
let mut in_flight: JoinSet<Result<()>> = JoinSet::new();
let mut iter = plan.devices.iter();
for _ in 0..CONCURRENCY {
if let Some(device) = iter.next() {
in_flight.spawn(publish_one_info(bucket.clone(), device.clone()));
}
}
while let Some(res) = in_flight.join_next().await {
res??;
if let Some(device) = iter.next() {
in_flight.spawn(publish_one_info(bucket.clone(), device.clone()));
}
}
tracing::info!(
devices = plan.devices.len(),
elapsed_ms = started.elapsed().as_millis() as u64,
"seeded DeviceInfo"
);
Ok(())
}
async fn publish_one_info(bucket: kv::Store, device: DevicePlan) -> Result<()> {
let info = DeviceInfo {
device_id: Id::from(device.device_id.clone()),
labels: BTreeMap::from([("group".to_string(), device.cr_name.clone())]),
inventory: None,
updated_at: Utc::now(),
};
let key = device_info_key(&device.device_id);
let payload = serde_json::to_vec(&info)?;
bucket.put(&key, payload.into()).await?;
Ok(())
}
async fn simulate_state_loop(
device: Arc<DevicePlan>,
bucket: kv::Store,
counters: Arc<Counters>,
tick: Duration,
) {
let Ok(deployment) = DeploymentName::try_new(&device.cr_name) else {
return;
};
let state_key = device_state_key(&device.device_id, &deployment);
let mut ticker = tokio::time::interval(tick);
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
ticker.tick().await;
let phase = pick_phase();
let ds = DeploymentState {
device_id: Id::from(device.device_id.clone()),
deployment: deployment.clone(),
phase,
last_event_at: Utc::now(),
last_error: matches!(phase, Phase::Failed)
.then(|| format!("synthetic failure @{}", device.device_id)),
};
match serde_json::to_vec(&ds) {
Ok(payload) => match bucket.put(&state_key, payload.into()).await {
Ok(_) => {
counters.state_writes.fetch_add(1, Ordering::Relaxed);
}
Err(_) => {
counters.errors.fetch_add(1, Ordering::Relaxed);
}
},
Err(_) => {
counters.errors.fetch_add(1, Ordering::Relaxed);
}
}
}
}
async fn simulate_heartbeat_loop(
device: Arc<DevicePlan>,
bucket: kv::Store,
counters: Arc<Counters>,
tick: Duration,
) {
let hb_key = device_heartbeat_key(&device.device_id);
let mut ticker = tokio::time::interval(tick);
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
ticker.tick().await;
let hb = HeartbeatPayload {
device_id: Id::from(device.device_id.clone()),
at: Utc::now(),
};
if let Ok(payload) = serde_json::to_vec(&hb) {
if bucket.put(&hb_key, payload.into()).await.is_ok() {
counters.heartbeat_writes.fetch_add(1, Ordering::Relaxed);
} else {
counters.errors.fetch_add(1, Ordering::Relaxed);
}
}
}
}
/// Phase distribution mirroring a healthy-ish fleet: mostly Running,
/// a sprinkle of Failed + Pending to exercise the aggregator's
/// transition-handling + last_error logic.
fn pick_phase() -> Phase {
let n: u32 = rand::rng().random_range(0..100);
match n {
0..80 => Phase::Running,
80..90 => Phase::Failed,
_ => Phase::Pending,
}
}

View File

@@ -0,0 +1,15 @@
[package]
name = "example_fleet_nats_install"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "fleet_nats_install"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony", default-features = false }
tokio.workspace = true
anyhow.workspace = true
clap.workspace = true

View File

@@ -0,0 +1,91 @@
//! Install a single-node NATS server into the cluster `KUBECONFIG`
//! points at, using harmony's `NatsBasicScore` + `K8sBareTopology`.
//!
//! This binary is the glue between the smoke harness (`smoke-a4.sh`)
//! and the framework Score. Typical usage from a demo script:
//!
//! KUBECONFIG=$KUBECFG cargo run -q -p example_fleet_nats_install \
//! -- --namespace fleet-system --name fleet-nats --node-port 4222
//!
//! Behaviour:
//! - Ensures the target namespace exists
//! - Deploys a single-replica NATS server (JetStream on)
//! - Exposes it as a Service (NodePort by default so off-cluster
//! clients like a libvirt VM agent can reach it through the
//! k3d loadbalancer port mapping)
//!
//! For production / HA / TLS, graduate to `NatsK8sScore`.
use anyhow::{Context, Result};
use clap::Parser;
use harmony::inventory::Inventory;
use harmony::modules::k8s::K8sBareTopology;
use harmony::modules::nats::NatsBasicScore;
use harmony::score::Score;
#[derive(Parser, Debug)]
#[command(
name = "fleet_nats_install",
about = "Install single-node NATS (JetStream) via NatsBasicScore"
)]
struct Cli {
/// Target namespace. Created if missing.
#[arg(long, default_value = "fleet-system")]
namespace: String,
/// Resource name for the NATS Deployment + Service.
#[arg(long, default_value = "fleet-nats")]
name: String,
/// Service exposure mode. `load-balancer` pairs with k3d's
/// `-p PORT:PORT@loadbalancer` port mapping (direct service-
/// port routing). `node-port` demands a port in the apiserver's
/// nodeport range (default 30000-32767). `cluster-ip` keeps
/// NATS in-cluster only.
#[arg(long, value_enum, default_value_t = ExposeMode::LoadBalancer)]
expose: ExposeMode,
/// NodePort when `--expose=node-port`. Must be in the cluster's
/// nodeport range (default 30000-32767). Ignored otherwise.
#[arg(long, default_value_t = 30422)]
node_port: i32,
/// Override the NATS container image.
#[arg(long)]
image: Option<String>,
}
#[derive(Clone, Debug, clap::ValueEnum)]
enum ExposeMode {
ClusterIp,
NodePort,
LoadBalancer,
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let topology = K8sBareTopology::from_kubeconfig("fleet-nats-install")
.await
.map_err(|e| anyhow::anyhow!(e))
.context("building K8sBareTopology from KUBECONFIG")?;
let mut score = NatsBasicScore::new(&cli.name, &cli.namespace);
match cli.expose {
ExposeMode::ClusterIp => {}
ExposeMode::NodePort => score = score.node_port(cli.node_port),
ExposeMode::LoadBalancer => score = score.load_balancer(),
}
if let Some(image) = cli.image {
score = score.image(image);
}
let interpret = Score::<K8sBareTopology>::create_interpret(&score);
let outcome = interpret
.execute(&Inventory::empty(), &topology)
.await
.map_err(|e| anyhow::anyhow!("execute NatsBasicScore: {e}"))?;
println!(
"NATS installed: namespace={}, name={}, expose={:?} outcome={outcome:?}",
cli.namespace, cli.name, cli.expose
);
Ok(())
}

View File

@@ -1,11 +1,11 @@
[package]
name = "example_iot_vm_setup"
name = "example_fleet_vm_setup"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "iot_vm_setup"
name = "fleet_vm_setup"
path = "src/main.rs"
[dependencies]

View File

@@ -6,8 +6,8 @@ Harmony Scores in sequence:
1. **`KvmVmScore`** — provision a libvirt VM from an Ubuntu 24.04 cloud
image with a cloud-init seed ISO that authorizes one SSH key. Returns
the booted VM's IP.
2. **`IotDeviceSetupScore`** — SSH into the VM (via the Ansible-backed
`HostConfigurationProvider`) and install podman + the `iot-agent`
2. **`FleetDeviceSetupScore`** — SSH into the VM (via the Ansible-backed
`HostConfigurationProvider`) and install podman + the `fleet-agent`
binary, drop the TOML config, bring up the systemd unit.
After a successful run, the VM is a fleet member reporting to NATS under
@@ -42,21 +42,21 @@ sudo virsh net-autostart default
## Run
```bash
cargo build -p iot-agent-v0
cargo build -p fleet-agent-v0
cargo run -p example_iot_vm_setup -- \
--base-image /var/tmp/harmony-iot-smoke/ubuntu-24.04-server-cloudimg-amd64.img \
--ssh-pubkey /var/tmp/harmony-iot-smoke/ssh/id_ed25519.pub \
--ssh-privkey /var/tmp/harmony-iot-smoke/ssh/id_ed25519 \
--work-dir /var/tmp/harmony-iot-smoke \
--agent-binary target/debug/iot-agent-v0 \
--agent-binary target/debug/fleet-agent-v0 \
--nats-url nats://192.168.122.1:4222
```
## Changing groups
Re-running with a different `--group` rewrites
`/etc/iot-agent/config.toml` on the VM and restarts the agent. The VM
`/etc/fleet-agent/config.toml` on the VM and restarts the agent. The VM
itself is untouched.
```bash
@@ -65,5 +65,5 @@ cargo run -p example_iot_vm_setup -- ... --group group-b
## Full end-to-end via smoke test
See `iot/scripts/smoke-a3.sh` — stands up NATS in a podman container,
See `fleet/scripts/smoke-a3.sh` — stands up NATS in a podman container,
runs this example, asserts the agent's status lands in NATS.

View File

@@ -5,15 +5,15 @@
//! capability. Here we satisfy it with `KvmVirtualMachineHost`
//! (libvirt). Swapping to VMware/Proxmox/cloud would be a
//! different topology injection with the same Score code.
//! 2. `IotDeviceSetupScore` — SSHes into the booted VM and installs
//! podman + iot-agent via the split Linux-host capabilities.
//! 2. `FleetDeviceSetupScore` — SSHes into the booted VM and installs
//! podman + fleet-agent via the split Linux-host capabilities.
use anyhow::{Context, Result};
use clap::Parser;
use harmony::inventory::Inventory;
use harmony::modules::iot::{
IotDeviceSetupConfig, IotDeviceSetupScore, ProvisionVmScore,
check_iot_smoke_preflight_for_arch, ensure_iot_ssh_keypair,
use harmony::modules::fleet::{
FleetDeviceSetupConfig, FleetDeviceSetupScore, ProvisionVmScore,
check_fleet_smoke_preflight_for_arch, ensure_fleet_ssh_keypair,
};
use harmony::modules::kvm::KvmVirtualMachineHost;
use harmony::modules::kvm::config::init_executor;
@@ -42,7 +42,7 @@ impl From<CliArch> for VmArchitecture {
#[derive(Parser, Debug)]
#[command(
name = "iot_vm_setup",
name = "fleet_vm_setup",
about = "Provision one VM + onboard it into the IoT fleet"
)]
struct Cli {
@@ -51,22 +51,34 @@ struct Cli {
#[arg(long, value_enum, default_value_t = CliArch::X86_64)]
arch: CliArch,
/// libvirt domain name for the VM.
#[arg(long, default_value = "iot-vm-01")]
#[arg(long, default_value = "fleet-vm-01")]
vm_name: String,
/// Device id the agent will announce to NATS. Defaults to a
/// fresh `Id` (hex timestamp + random suffix).
#[arg(long)]
device_id: Option<String>,
/// Fleet group label to write into the agent's TOML config.
#[arg(long, default_value = "group-a")]
group: String,
/// Routing labels to write into the agent's TOML config.
/// Comma-separated list of `key=value` pairs. Published in every
/// DeviceInfo heartbeat; the operator resolves Deployment
/// `spec.targetSelector` against this map. At least one label
/// is required so the device is targetable — the default
/// `group=group-a` satisfies that.
#[arg(long, default_value = "group=group-a")]
labels: String,
/// libvirt network name to attach the VM to.
#[arg(long, default_value = "default")]
network: String,
/// Admin username created on first boot.
#[arg(long, default_value = "iot-admin")]
#[arg(long, default_value = "fleet-admin")]
admin_user: String,
/// Path to the cross-compiled iot-agent binary.
/// Optional plaintext password for the admin user. Enables SSH
/// password auth on the guest — intended for interactive
/// debugging / reliability-testing sessions where the operator
/// wants to break things on purpose. Leave unset for key-only
/// auth (production default).
#[arg(long, env = "FLEET_VM_ADMIN_PASSWORD")]
admin_password: Option<String>,
/// Path to the cross-compiled fleet-agent binary.
/// Required unless `--bootstrap-only` is set.
#[arg(long)]
agent_binary: Option<PathBuf>,
@@ -84,6 +96,13 @@ struct Cli {
/// SSH key, libvirt pool) and exit.
#[arg(long)]
bootstrap_only: bool,
/// Virtual disk size in GiB. The stock Ubuntu cloud image has
/// only ~2 GiB of root — resized on first boot by
/// cloud-initramfs-growroot. Bump this to 16 GiB by default so
/// podman can sideload a couple of container images without
/// running out of space.
#[arg(long, default_value_t = 16)]
disk_size_gb: u32,
}
#[tokio::main]
@@ -92,7 +111,7 @@ async fn main() -> Result<()> {
let cli = Cli::parse();
let arch: VmArchitecture = cli.arch.into();
check_iot_smoke_preflight_for_arch(arch)
check_fleet_smoke_preflight_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("{e}"))?;
@@ -100,13 +119,13 @@ async fn main() -> Result<()> {
harmony::modules::linux::ensure_ansible_venv()
.await
.map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?;
harmony::modules::iot::ensure_ubuntu_2404_cloud_image_for_arch(arch)
harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("cloud image: {e}"))?;
ensure_iot_ssh_keypair()
ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
harmony::modules::iot::ensure_harmony_iot_pool()
harmony::modules::fleet::ensure_harmony_fleet_pool()
.await
.map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
println!("bootstrap complete");
@@ -114,16 +133,16 @@ async fn main() -> Result<()> {
}
// --- Step 1: provision the VM ---
let base_image = harmony::modules::iot::ensure_ubuntu_2404_cloud_image_for_arch(arch)
let base_image = harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch)
.await
.map_err(|e| anyhow::anyhow!("cloud image: {e}"))?;
let pool = harmony::modules::iot::ensure_harmony_iot_pool()
let pool = harmony::modules::fleet::ensure_harmony_fleet_pool()
.await
.map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?;
let ssh = ensure_iot_ssh_keypair()
let ssh = ensure_fleet_ssh_keypair()
.await
.map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?;
let authorized_key = harmony::modules::iot::read_public_key(&ssh)
let authorized_key = harmony::modules::fleet::read_public_key(&ssh)
.await
.map_err(|e| anyhow::anyhow!("read ssh pubkey: {e}"))?;
@@ -142,12 +161,13 @@ async fn main() -> Result<()> {
architecture: arch,
cpus: 2,
memory_mib: 2048,
disk_size_gb: None,
disk_size_gb: Some(cli.disk_size_gb),
network: cli.network.clone(),
first_boot: Some(VmFirstBootConfig {
hostname: Some(cli.vm_name.clone()),
admin_user: Some(cli.admin_user.clone()),
authorized_keys: vec![authorized_key],
admin_password: cli.admin_password.clone(),
}),
},
};
@@ -162,7 +182,7 @@ async fn main() -> Result<()> {
let agent_binary = cli
.agent_binary
.clone()
.context("--agent-binary is required (e.g. target/release/iot-agent-v0)")?;
.context("--agent-binary is required (e.g. target/release/fleet-agent-v0)")?;
let device_id = cli
.device_id
.clone()
@@ -179,9 +199,16 @@ async fn main() -> Result<()> {
},
);
let setup_score = IotDeviceSetupScore::new(IotDeviceSetupConfig {
let labels = parse_labels(&cli.labels)?;
let labels_display = labels
.iter()
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join(",");
let setup_score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig {
device_id: device_id.clone(),
group: cli.group.clone(),
labels,
nats_urls: vec![cli.nats_url.clone()],
nats_user: cli.nats_user.clone(),
nats_pass: cli.nats_pass.clone(),
@@ -189,13 +216,33 @@ async fn main() -> Result<()> {
});
run_setup_score(&setup_score, &linux_topology).await?;
println!(
"device '{device_id}' (group '{}') onboarded via {vm_ip}",
cli.group
);
println!("device '{device_id}' ({labels_display}) onboarded via {vm_ip}");
Ok(())
}
/// Parse `key=value,key=value` into a BTreeMap. Errors on any
/// malformed chunk, empty keys/values, or an empty map overall —
/// a device with no labels is practically untargetable, so we'd
/// rather fail at the CLI than silently onboard a ghost.
fn parse_labels(raw: &str) -> anyhow::Result<std::collections::BTreeMap<String, String>> {
let mut out = std::collections::BTreeMap::new();
for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) {
let (k, v) = piece
.split_once('=')
.ok_or_else(|| anyhow::anyhow!("label chunk '{piece}' missing '='"))?;
let k = k.trim();
let v = v.trim();
if k.is_empty() || v.is_empty() {
anyhow::bail!("label chunk '{piece}' has empty key or value");
}
out.insert(k.to_string(), v.to_string());
}
if out.is_empty() {
anyhow::bail!("--labels must include at least one key=value pair");
}
Ok(out)
}
async fn run_vm_score(
score: &ProvisionVmScore,
topology: &KvmVirtualMachineHost,
@@ -215,14 +262,17 @@ async fn run_vm_score(
anyhow::bail!("ProvisionVmScore finished without reporting an IP: {outcome:?}")
}
async fn run_setup_score(score: &IotDeviceSetupScore, topology: &LinuxHostTopology) -> Result<()> {
async fn run_setup_score(
score: &FleetDeviceSetupScore,
topology: &LinuxHostTopology,
) -> Result<()> {
use harmony::score::Score;
let inventory = Inventory::empty();
let interpret = Score::<LinuxHostTopology>::create_interpret(score);
let outcome = interpret
.execute(&inventory, topology)
.await
.map_err(|e| anyhow::anyhow!("IotDeviceSetupScore execute: {e}"))?;
.map_err(|e| anyhow::anyhow!("FleetDeviceSetupScore execute: {e}"))?;
println!("setup: {} ({:?})", outcome.message, outcome.details);
Ok(())
}

View File

@@ -0,0 +1,19 @@
[package]
name = "example_harmony_apply_deployment"
version.workspace = true
edition = "2024"
license.workspace = true
[[bin]]
name = "harmony_apply_deployment"
path = "src/main.rs"
[dependencies]
harmony = { path = "../../harmony", default-features = false, features = ["podman"] }
harmony-fleet-operator = { path = "../../fleet/harmony-fleet-operator" }
kube = { workspace = true, features = ["runtime", "derive"] }
k8s-openapi = { workspace = true }
serde_json.workspace = true
tokio.workspace = true
anyhow.workspace = true
clap.workspace = true

View File

@@ -0,0 +1,178 @@
//! Typed-Rust applier for the harmony fleet `Deployment` CR.
//!
//! Builds a `Deployment` CR via the typed `DeploymentSpec` +
//! `PodmanV0Score` + `kube::Api`, then either applies it directly
//! through the kube client or prints it to stdout so the user can
//! pipe into `kubectl apply -f -`.
//!
//! The CRD is domain-agnostic — it's "declarative reconcile intent
//! for a set of devices matched by label selector," which is the
//! same shape whether the fleet is Pi podman, OKD clusters, or
//! KVM VMs. The name `harmony_apply_deployment` reflects that
//! (not `iot_`-anything), in line with the review call to position
//! the operator as a generic fleet/reconcile tool.
//!
//! The CRD types live in `harmony_fleet_operator::crd`; the score types
//! live in `harmony::modules::podman` (PodmanV0 being the first
//! reconciler variant — future variants drop in alongside).
//!
//! Typical demo-driver usage:
//!
//! # apply an nginx deployment
//! cargo run -q -p example_harmony_apply_deployment -- \
//! --target-device fleet-smoke-vm-arm \
//! --image nginx:latest
//!
//! # print the CR JSON (lets the user kubectl-apply it manually)
//! cargo run -q -p example_harmony_apply_deployment -- \
//! --target-device fleet-smoke-vm-arm \
//! --image nginx:latest --print | kubectl apply -f -
//!
//! # upgrade the same deployment to a newer image
//! cargo run -q -p example_harmony_apply_deployment -- \
//! --target-device fleet-smoke-vm-arm \
//! --image nginx:1.26
//!
//! # delete the deployment
//! cargo run -q -p example_harmony_apply_deployment -- --delete
use anyhow::{Context, Result};
use clap::Parser;
use harmony::modules::podman::{PodmanService, PodmanV0Score};
use harmony_fleet_operator::crd::{
Deployment, DeploymentSpec, Rollout, RolloutStrategy, ScorePayload,
};
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
use kube::Client;
use kube::api::{Api, DeleteParams, Patch, PatchParams};
use std::collections::BTreeMap;
#[derive(Parser, Debug)]
#[command(
name = "harmony_apply_deployment",
about = "Build + apply a harmony fleet Deployment CR from typed Rust (no yaml)"
)]
struct Cli {
/// Kubernetes namespace for the Deployment CR.
#[arg(long, default_value = "fleet-demo")]
namespace: String,
/// Deployment CR name. Also used as the KV key suffix and
/// podman container name on the device.
#[arg(long, default_value = "hello-world")]
name: String,
/// Shortcut: if set, picks a single device by id. Shorthand for
/// `--selector device-id=<target_device>` — the agent publishes
/// a `device-id=<id>` label on its DeviceInfo by default so this
/// works without any cluster-side label pre-wiring.
#[arg(long, default_value = "fleet-smoke-vm")]
target_device: String,
/// Repeatable `key=value` label selector. Takes precedence over
/// `--target-device` when provided. All pairs AND together.
#[arg(long = "selector", value_name = "KEY=VALUE")]
selectors: Vec<String>,
/// Container image to run.
#[arg(long, default_value = "docker.io/library/nginx:latest")]
image: String,
/// `host:container` port mapping exposed on the device.
#[arg(long, default_value = "8080:80")]
port: String,
/// Delete the Deployment CR instead of applying it.
#[arg(long)]
delete: bool,
/// Print the CR as JSON to stdout instead of applying it.
/// Useful for piping into `kubectl apply -f -`.
#[arg(long)]
print: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let cr = build_cr(&cli);
if cli.print {
println!("{}", serde_json::to_string_pretty(&cr)?);
return Ok(());
}
let client = Client::try_default()
.await
.context("building kube client (is KUBECONFIG set?)")?;
let api: Api<Deployment> = Api::namespaced(client, &cli.namespace);
if cli.delete {
match api.delete(&cli.name, &DeleteParams::default()).await {
Ok(_) => println!("deleted deployment '{}/{}'", cli.namespace, cli.name),
Err(kube::Error::Api(ae)) if ae.code == 404 => {
println!(
"deployment '{}/{}' not found (already gone)",
cli.namespace, cli.name
)
}
Err(e) => anyhow::bail!("delete failed: {e}"),
}
return Ok(());
}
// Server-side apply so repeated invocations (upgrades) patch
// the existing CR instead of erroring with "already exists."
let params = PatchParams::apply("harmony-apply-deployment").force();
let applied = api
.patch(&cli.name, &params, &Patch::Apply(&cr))
.await
.context("applying Deployment CR")?;
let meta = applied.metadata;
println!(
"applied deployment '{}/{}' (resourceVersion={}, image={})",
cli.namespace,
meta.name.as_deref().unwrap_or("?"),
meta.resource_version.as_deref().unwrap_or("?"),
cli.image,
);
Ok(())
}
fn build_cr(cli: &Cli) -> Deployment {
let score = PodmanV0Score {
services: vec![PodmanService {
name: cli.name.clone(),
image: cli.image.clone(),
ports: vec![cli.port.clone()],
}],
};
let payload = ScorePayload {
type_: "PodmanV0".to_string(),
// `ScorePayload::data` is `serde_json::Value` by design
// (opaque payload routed to the agent). Serialize the typed
// score through serde_json — the agent's `ReconcileScore` enum
// accepts exactly this shape via `#[serde(tag, content)]`.
data: serde_json::to_value(&score).expect("PodmanV0Score is JSON-clean"),
};
let mut match_labels = BTreeMap::new();
if cli.selectors.is_empty() {
match_labels.insert("device-id".to_string(), cli.target_device.clone());
} else {
for kv in &cli.selectors {
let (k, v) = kv
.split_once('=')
.unwrap_or_else(|| panic!("--selector expects KEY=VALUE, got '{kv}'"));
match_labels.insert(k.to_string(), v.to_string());
}
}
Deployment::new(
&cli.name,
DeploymentSpec {
target_selector: LabelSelector {
match_labels: Some(match_labels),
match_expressions: None,
},
score: payload,
rollout: Rollout {
strategy: RolloutStrategy::Immediate,
},
},
)
}

View File

@@ -1,5 +1,5 @@
[package]
name = "iot-agent-v0"
name = "harmony-fleet-agent"
version = "0.1.0"
edition = "2024"
rust-version = "1.85"

View File

@@ -1,5 +1,6 @@
use harmony_reconciler_contracts::Id;
use serde::Deserialize;
use std::collections::BTreeMap;
use std::path::Path;
#[derive(Debug, Clone, Deserialize)]
@@ -7,6 +8,14 @@ pub struct AgentConfig {
pub agent: AgentSection,
pub nats: NatsSection,
pub credentials: CredentialsSection,
/// Routing labels published verbatim in every DeviceInfo
/// heartbeat. The operator reflects them into
/// `Device.metadata.labels` so Deployment `spec.targetSelector`
/// resolves against them (K8s-Node-analogue flow). Empty by
/// default — a device with no labels is targetable only by its
/// auto-published `device-id` label.
#[serde(default)]
pub labels: BTreeMap<String, String>,
}
#[derive(Debug, Clone, Deserialize)]
@@ -69,3 +78,49 @@ pub fn load_config(path: &Path) -> anyhow::Result<AgentConfig> {
let config: AgentConfig = toml::from_str(&content)?;
Ok(config)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_config_with_labels_section() {
let raw = r#"
[agent]
device_id = "pi-42"
[credentials]
type = "toml-shared"
nats_user = "u"
nats_pass = "p"
[nats]
urls = ["nats://nats:4222"]
[labels]
group = "site-a"
arch = "aarch64"
"#;
let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
assert_eq!(cfg.labels.get("group"), Some(&"site-a".to_string()));
assert_eq!(cfg.labels.get("arch"), Some(&"aarch64".to_string()));
}
#[test]
fn labels_section_optional_defaults_empty() {
let raw = r#"
[agent]
device_id = "pi-42"
[credentials]
type = "toml-shared"
nats_user = "u"
nats_pass = "p"
[nats]
urls = ["nats://nats:4222"]
"#;
let cfg: AgentConfig = toml::from_str(raw).expect("valid config");
assert!(cfg.labels.is_empty());
}
}

View File

@@ -0,0 +1,126 @@
//! Agent-side publish surface.
//!
//! Thin wrapper around three KV buckets: [`BUCKET_DEVICE_INFO`],
//! [`BUCKET_DEVICE_STATE`], [`BUCKET_DEVICE_HEARTBEAT`].
//!
//! Failure mode: log and swallow. The KV is the source of truth —
//! a dropped put gets corrected on the next reconcile transition
//! or operator watch reconnection.
use async_nats::jetstream::{self, kv};
use harmony_reconciler_contracts::{
BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE, DeploymentName,
DeploymentState, DeviceInfo, HeartbeatPayload, Id, InventorySnapshot, device_heartbeat_key,
device_info_key, device_state_key,
};
use std::collections::BTreeMap;
pub struct FleetPublisher {
device_id: Id,
info_bucket: kv::Store,
state_bucket: kv::Store,
heartbeat_bucket: kv::Store,
}
impl FleetPublisher {
/// Open every bucket the agent needs, creating those that don't
/// exist yet. Idempotent with operator-side creation.
pub async fn connect(client: async_nats::Client, device_id: Id) -> anyhow::Result<Self> {
let jetstream = jetstream::new(client);
let info_bucket = jetstream
.create_key_value(kv::Config {
bucket: BUCKET_DEVICE_INFO.to_string(),
history: 1,
..Default::default()
})
.await?;
let state_bucket = jetstream
.create_key_value(kv::Config {
bucket: BUCKET_DEVICE_STATE.to_string(),
history: 1,
..Default::default()
})
.await?;
let heartbeat_bucket = jetstream
.create_key_value(kv::Config {
bucket: BUCKET_DEVICE_HEARTBEAT.to_string(),
history: 1,
..Default::default()
})
.await?;
Ok(Self {
device_id,
info_bucket,
state_bucket,
heartbeat_bucket,
})
}
/// Publish the agent's static-ish facts. Called at startup and
/// on label change.
pub async fn publish_device_info(
&self,
labels: BTreeMap<String, String>,
inventory: Option<InventorySnapshot>,
) {
let info = DeviceInfo {
device_id: self.device_id.clone(),
labels,
inventory,
updated_at: chrono::Utc::now(),
};
let key = device_info_key(&self.device_id.to_string());
match serde_json::to_vec(&info) {
Ok(payload) => {
if let Err(e) = self.info_bucket.put(&key, payload.into()).await {
tracing::warn!(%key, error = %e, "publish_device_info: kv put failed");
}
}
Err(e) => tracing::warn!(error = %e, "publish_device_info: serialize failed"),
}
}
/// Tiny liveness ping. Called every 30s.
pub async fn publish_heartbeat(&self) {
let hb = HeartbeatPayload {
device_id: self.device_id.clone(),
at: chrono::Utc::now(),
};
let key = device_heartbeat_key(&self.device_id.to_string());
match serde_json::to_vec(&hb) {
Ok(payload) => {
if let Err(e) = self.heartbeat_bucket.put(&key, payload.into()).await {
tracing::debug!(%key, error = %e, "publish_heartbeat: kv put failed");
}
}
Err(e) => tracing::warn!(error = %e, "publish_heartbeat: serialize failed"),
}
}
/// Persist the authoritative current phase for a `(device,
/// deployment)` pair. The operator's watch on the `device-state`
/// bucket picks up this put and updates CR status counters.
pub async fn write_deployment_state(&self, state: &DeploymentState) {
let key = device_state_key(&self.device_id.to_string(), &state.deployment);
match serde_json::to_vec(state) {
Ok(payload) => {
if let Err(e) = self.state_bucket.put(&key, payload.into()).await {
tracing::warn!(%key, error = %e, "write_deployment_state: kv put failed");
}
}
Err(e) => tracing::warn!(error = %e, "write_deployment_state: serialize failed"),
}
}
/// Delete the authoritative current-phase entry, e.g. when the
/// Deployment CR is removed and the agent has torn down the
/// container.
pub async fn delete_deployment_state(&self, deployment: &DeploymentName) {
let key = device_state_key(&self.device_id.to_string(), deployment);
if let Err(e) = self.state_bucket.delete(&key).await {
tracing::debug!(%key, error = %e, "delete_deployment_state: kv delete failed");
}
}
}

View File

@@ -1,4 +1,5 @@
mod config;
mod fleet_publisher;
mod reconciler;
use std::sync::Arc;
@@ -8,14 +9,13 @@ use anyhow::{Context, Result};
use clap::Parser;
use config::{AgentConfig, CredentialSource, TomlFileCredentialSource};
use futures_util::StreamExt;
use harmony_reconciler_contracts::{
AgentStatus, BUCKET_AGENT_STATUS, BUCKET_DESIRED_STATE, Id, status_key,
};
use harmony_reconciler_contracts::{BUCKET_DESIRED_STATE, Id, InventorySnapshot};
use harmony::inventory::Inventory;
use harmony::modules::podman::PodmanTopology;
use harmony::topology::Topology;
use crate::fleet_publisher::FleetPublisher;
use crate::reconciler::Reconciler;
/// ROADMAP §5.6 — agent polls podman every 30s as ground truth; KV watch
@@ -23,12 +23,12 @@ use crate::reconciler::Reconciler;
const RECONCILE_INTERVAL: Duration = Duration::from_secs(30);
#[derive(Parser)]
#[command(name = "iot-agent-v0", about = "IoT agent for Raspberry Pi devices")]
#[command(name = "fleet-agent-v0", about = "IoT agent for Raspberry Pi devices")]
struct Cli {
#[arg(
long,
env = "IOT_AGENT_CONFIG",
default_value = "/etc/iot-agent/config.toml"
env = "FLEET_AGENT_CONFIG",
default_value = "/etc/fleet-agent/config.toml"
)]
config: std::path::PathBuf,
}
@@ -85,31 +85,51 @@ async fn watch_desired_state(
Ok(())
}
async fn report_status(client: async_nats::Client, device_id: Id) -> Result<()> {
let jetstream = async_nats::jetstream::new(client);
let bucket = jetstream
.create_key_value(async_nats::jetstream::kv::Config {
bucket: BUCKET_AGENT_STATUS.to_string(),
..Default::default()
})
.await?;
let key = status_key(&device_id.to_string());
/// Tiny liveness-only loop: push a `HeartbeatPayload` into the
/// `device-heartbeat` bucket every N seconds. Stays separate from
/// per-deployment state writes so routine pings don't churn the
/// device-state bucket or its watch subscribers.
async fn publish_heartbeat_loop(fleet: Arc<FleetPublisher>) {
let mut interval = tokio::time::interval(Duration::from_secs(30));
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
interval.tick().await;
let status = AgentStatus {
device_id: device_id.clone(),
status: "running".to_string(),
timestamp: chrono::Utc::now(),
};
let payload = serde_json::to_vec(&status)?;
bucket.put(&key, payload.into()).await?;
tracing::debug!(key = %key, "reported status");
fleet.publish_heartbeat().await;
}
}
/// Build a one-shot inventory snapshot at agent startup. Cheap,
/// published alongside every heartbeat until the agent restarts.
fn local_inventory(inventory: &Inventory) -> InventorySnapshot {
InventorySnapshot {
hostname: inventory.location.name.clone(),
arch: std::env::consts::ARCH.to_string(),
os: std::env::consts::OS.to_string(),
kernel: std::fs::read_to_string("/proc/sys/kernel/osrelease")
.map(|s| s.trim().to_string())
.unwrap_or_default(),
cpu_cores: std::thread::available_parallelism()
.map(|n| n.get() as u32)
.unwrap_or(0),
memory_mb: sys_memory_total_mb().unwrap_or(0),
agent_version: env!("CARGO_PKG_VERSION").to_string(),
}
}
/// Read total RAM from /proc/meminfo. Returns None on non-Linux or
/// if /proc isn't mounted. Small, avoids a sys-info crate dep for a
/// single field.
fn sys_memory_total_mb() -> Option<u64> {
let s = std::fs::read_to_string("/proc/meminfo").ok()?;
for line in s.lines() {
if let Some(rest) = line.strip_prefix("MemTotal:") {
let kb: u64 = rest.split_whitespace().next()?.parse().ok()?;
return Some(kb / 1024);
}
}
None
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -118,7 +138,7 @@ async fn main() -> Result<()> {
let cli = Cli::parse();
let cfg = config::load_config(&cli.config)?;
tracing::info!(device_id = %cfg.agent.device_id, "iot-agent-v0 starting");
tracing::info!(device_id = %cfg.agent.device_id, "fleet-agent-v0 starting");
let device_id = cfg.agent.device_id.clone();
@@ -134,11 +154,40 @@ async fn main() -> Result<()> {
let inventory = Arc::new(Inventory::from_localhost());
tracing::info!(hostname = %inventory.location.name, "inventory loaded");
let reconciler = Arc::new(Reconciler::new(topology, inventory));
let inventory_snapshot = local_inventory(&inventory);
let client = connect_nats(&cfg).await?;
// Publish surface. Opens the three KV buckets (idempotent
// creates). Must be live before the reconciler starts so
// writes on the first desired-state KV watch land on the wire.
let fleet = Arc::new(
FleetPublisher::connect(client.clone(), device_id.clone())
.await
.context("fleet publisher connect")?,
);
tracing::info!("fleet publisher ready");
// Publish DeviceInfo once at startup. Merge the config-declared
// labels with an always-on `device-id=<id>` default so every
// device is targetable by id even without explicit labels.
// Config labels win on key conflicts — operators can override
// `device-id` if they really want to (unusual but legal).
let mut startup_labels = cfg.labels.clone();
startup_labels
.entry("device-id".to_string())
.or_insert_with(|| device_id.to_string());
fleet
.publish_device_info(startup_labels, Some(inventory_snapshot.clone()))
.await;
let reconciler = Arc::new(Reconciler::new(
device_id.clone(),
topology,
inventory,
Some(fleet.clone()),
));
let ctrlc = async {
tokio::signal::ctrl_c().await.ok();
tracing::info!("received SIGINT, shutting down");
@@ -151,16 +200,17 @@ async fn main() -> Result<()> {
Ok::<(), anyhow::Error>(())
};
let watch = watch_desired_state(client.clone(), device_id.clone(), reconciler.clone());
let status = report_status(client, device_id);
let _ = inventory_snapshot; // consumed by the DeviceInfo publish above
let watch = watch_desired_state(client, device_id, reconciler.clone());
let reconcile = reconciler.clone().run_periodic(RECONCILE_INTERVAL);
let heartbeat = publish_heartbeat_loop(fleet);
tokio::select! {
_ = ctrlc => {},
r = sigterm => { r?; }
r = watch => { r?; }
r = status => { r?; }
_ = reconcile => {}
_ = heartbeat => {}
}
Ok(())

View File

@@ -0,0 +1,344 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Result;
use chrono::Utc;
use harmony_reconciler_contracts::{DeploymentName, DeploymentState, Id, Phase};
use tokio::sync::Mutex;
use harmony::inventory::Inventory;
use harmony::modules::podman::{PodmanTopology, PodmanV0Score, ReconcileScore};
use harmony::score::Score;
use crate::fleet_publisher::FleetPublisher;
/// Cache key → last-seen state, populated by `apply` and consulted by the
/// 30-second periodic tick and the delete path.
struct CachedEntry {
/// Serialized score JSON. Used for string-compare idempotency per
/// ROADMAP §5.5 — cheaper and more deterministic than a hash.
serialized: String,
/// Parsed score. Cached so the periodic reconcile tick and delete
/// handlers don't have to re-parse the JSON.
score: PodmanV0Score,
}
pub struct Reconciler {
device_id: Id,
topology: Arc<PodmanTopology>,
inventory: Arc<Inventory>,
/// Keyed by NATS KV key (`<device>.<deployment>`). A single entry per
/// KV key — in v0 there is no fan-out from one key to many scores.
state: Mutex<HashMap<String, CachedEntry>>,
/// Current phase per deployment, used to decide whether a new
/// write to the `device-state` KV is needed.
phases: Mutex<HashMap<DeploymentName, Phase>>,
/// Publish surface. Optional so unit tests without a live NATS
/// client still work; always populated in the real agent runtime.
fleet: Option<Arc<FleetPublisher>>,
}
impl Reconciler {
pub fn new(
device_id: Id,
topology: Arc<PodmanTopology>,
inventory: Arc<Inventory>,
fleet: Option<Arc<FleetPublisher>>,
) -> Self {
Self {
device_id,
topology,
inventory,
state: Mutex::new(HashMap::new()),
phases: Mutex::new(HashMap::new()),
fleet,
}
}
/// Record a new phase for a deployment and, if it changed, write
/// the updated [`DeploymentState`] to the KV. Same-phase
/// re-confirmations are no-ops so the periodic reconcile tick
/// doesn't churn the bucket.
async fn apply_phase(
&self,
deployment: &DeploymentName,
phase: Phase,
last_error: Option<String>,
) {
{
let mut phases = self.phases.lock().await;
if phases.get(deployment).copied() == Some(phase) {
return;
}
phases.insert(deployment.clone(), phase);
}
if let Some(publisher) = &self.fleet {
let state = DeploymentState {
device_id: self.device_id.clone(),
deployment: deployment.clone(),
phase,
last_event_at: Utc::now(),
last_error,
};
publisher.write_deployment_state(&state).await;
}
}
/// Clear the in-memory phase for a deployment and delete its KV
/// entry. Idempotent: a delete for a never-applied deployment is
/// a no-op in memory and a harmless tombstone write on the wire.
async fn drop_phase(&self, deployment: &DeploymentName) {
let was_known = {
let mut phases = self.phases.lock().await;
phases.remove(deployment).is_some()
};
if !was_known {
return;
}
if let Some(publisher) = &self.fleet {
publisher.delete_deployment_state(deployment).await;
}
}
/// Handle a Put event (new or updated score on NATS KV). No-ops if the
/// serialized score is byte-identical to the last-seen value for this
/// key.
pub async fn apply(&self, key: &str, value: &[u8]) -> Result<()> {
let deployment = deployment_from_key(key);
let incoming = match serde_json::from_slice::<ReconcileScore>(value) {
Ok(ReconcileScore::PodmanV0(s)) => s,
Err(e) => {
tracing::warn!(key, error = %e, "failed to deserialize score");
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Failed, Some(format!("bad payload: {e}")))
.await;
}
return Ok(());
}
};
let serialized = String::from_utf8_lossy(value).into_owned();
{
let state = self.state.lock().await;
if let Some(existing) = state.get(key) {
if existing.serialized == serialized {
tracing::debug!(key, "score unchanged — noop");
return Ok(());
}
}
}
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Pending, None).await;
}
match self.run_score(key, &incoming).await {
Ok(()) => {
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Running, None).await;
}
}
Err(e) => {
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Failed, Some(short(&e.to_string())))
.await;
}
return Err(e);
}
}
let mut state = self.state.lock().await;
state.insert(
key.to_string(),
CachedEntry {
serialized,
score: incoming,
},
);
Ok(())
}
/// Handle a Delete/Purge event. Stops and removes every container
/// referenced by the last cached score for this key. Idempotent: if we
/// never saw a Put for this key (agent restart after delete), logs and
/// returns ok.
pub async fn remove(&self, key: &str) -> Result<()> {
let deployment = deployment_from_key(key);
let mut state = self.state.lock().await;
let Some(entry) = state.remove(key) else {
tracing::info!(key, "delete for unknown key — nothing to remove");
if let Some(name) = &deployment {
self.drop_phase(name).await;
}
return Ok(());
};
drop(state);
use harmony::topology::ContainerRuntime;
for service in &entry.score.services {
if let Err(e) = self.topology.remove_service(&service.name).await {
tracing::warn!(
key,
service = %service.name,
error = %e,
"failed to remove container"
);
} else {
tracing::info!(key, service = %service.name, "removed container");
}
}
if let Some(name) = &deployment {
self.drop_phase(name).await;
}
Ok(())
}
/// Periodic ground-truth reconcile. ROADMAP §5.6 — "polling instead of
/// event-driven PLEG. Agent polls podman every 30s as ground truth;
/// KV watch events are accelerators." Re-runs each cached score against
/// podman-api; the underlying `ensure_service_running` is idempotent
/// so a converged state produces no log noise.
pub async fn tick(&self) -> Result<()> {
let snapshot: Vec<(String, PodmanV0Score)> = {
let state = self.state.lock().await;
state
.iter()
.map(|(k, v)| (k.clone(), v.score.clone()))
.collect()
};
for (key, score) in snapshot {
let deployment = deployment_from_key(&key);
match self.run_score(&key, &score).await {
Ok(()) => {
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Running, None).await;
}
}
Err(e) => {
tracing::warn!(key, error = %e, "periodic reconcile failed");
if let Some(name) = &deployment {
self.apply_phase(name, Phase::Failed, Some(short(&e.to_string())))
.await;
}
}
}
}
Ok(())
}
pub async fn run_periodic(self: Arc<Self>, interval: Duration) {
let mut ticker = tokio::time::interval(interval);
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
ticker.tick().await;
if let Err(e) = self.tick().await {
tracing::warn!(error = %e, "reconcile tick error");
}
}
}
async fn run_score(&self, key: &str, score: &PodmanV0Score) -> Result<()> {
let interpret = Score::<PodmanTopology>::create_interpret(score);
let outcome = interpret
.execute(&self.inventory, &self.topology)
.await
.map_err(|e| anyhow::anyhow!("PodmanV0Score interpret failed for {key}: {e}"))?;
tracing::info!(key, outcome = ?outcome, "reconciled");
Ok(())
}
}
/// Extract the deployment name from a NATS KV key of the form
/// `<device>.<deployment>`.
fn deployment_from_key(key: &str) -> Option<DeploymentName> {
let (_, rest) = key.split_once('.')?;
DeploymentName::try_new(rest).ok()
}
/// Truncate a long error message so the DeploymentState payload stays
/// comfortably below NATS JetStream's per-message limit.
fn short(s: &str) -> String {
const MAX: usize = 512;
if s.len() <= MAX {
s.to_string()
} else {
let mut cut = s[..MAX].to_string();
cut.push('…');
cut
}
}
#[cfg(test)]
mod tests {
//! Focused tests for transition detection. Drive `apply_phase` /
//! `drop_phase` directly with an inert topology (no real podman
//! socket) and a `None` FleetPublisher.
use super::*;
use harmony::inventory::Inventory;
use harmony::modules::podman::PodmanTopology;
use std::path::PathBuf;
fn reconciler() -> Reconciler {
let topology = Arc::new(
PodmanTopology::from_unix_socket(PathBuf::from("/nonexistent/for-tests")).unwrap(),
);
let inventory = Arc::new(Inventory::empty());
Reconciler::new(
Id::from("test-device".to_string()),
topology,
inventory,
None,
)
}
fn dn(s: &str) -> DeploymentName {
DeploymentName::try_new(s).expect("valid test name")
}
#[tokio::test]
async fn apply_phase_records_new_phase() {
let r = reconciler();
r.apply_phase(&dn("hello"), Phase::Running, None).await;
let phases = r.phases.lock().await;
assert_eq!(phases.get(&dn("hello")), Some(&Phase::Running));
}
#[tokio::test]
async fn apply_phase_idempotent_for_same_phase() {
let r = reconciler();
r.apply_phase(&dn("hello"), Phase::Running, None).await;
r.apply_phase(&dn("hello"), Phase::Running, None).await;
let phases = r.phases.lock().await;
assert_eq!(phases.len(), 1);
}
#[tokio::test]
async fn apply_phase_transitions_update_phase() {
let r = reconciler();
r.apply_phase(&dn("hello"), Phase::Pending, None).await;
r.apply_phase(&dn("hello"), Phase::Running, None).await;
r.apply_phase(&dn("hello"), Phase::Failed, Some("oom".to_string()))
.await;
let phases = r.phases.lock().await;
assert_eq!(phases.get(&dn("hello")), Some(&Phase::Failed));
}
#[tokio::test]
async fn drop_phase_clears_known_deployment() {
let r = reconciler();
r.apply_phase(&dn("hello"), Phase::Running, None).await;
r.drop_phase(&dn("hello")).await;
let phases = r.phases.lock().await;
assert!(!phases.contains_key(&dn("hello")));
}
#[tokio::test]
async fn drop_phase_on_unknown_deployment_is_noop() {
let r = reconciler();
r.drop_phase(&dn("never-existed")).await;
let phases = r.phases.lock().await;
assert!(phases.is_empty());
}
}

View File

@@ -1,14 +1,13 @@
[package]
name = "iot-operator-v0"
name = "harmony-fleet-operator"
version = "0.1.0"
edition = "2024"
rust-version = "1.85"
[dependencies]
harmony = { path = "../../harmony" }
harmony-k8s = { path = "../../harmony-k8s" }
harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" }
async-trait.workspace = true
chrono = { workspace = true, features = ["serde"] }
kube = { workspace = true, features = ["runtime", "derive"] }
k8s-openapi.workspace = true
async-nats = { workspace = true }

View File

@@ -0,0 +1,26 @@
# Minimal runtime container for the IoT operator. Assumes
# `target/release/harmony-fleet-operator` has already been built on the
# host (the load-test harness does this). Base image is
# archlinux:base to guarantee the host's glibc (ABI-matched) —
# debian:bookworm-slim and similar distros ship older glibcs and
# would error at startup with "version `GLIBC_2.x' not found".
#
# When the operator gets its own release pipeline, swap this for a
# two-stage build that produces the binary inside a pinned Rust
# toolchain image.
FROM docker.io/library/archlinux:base
COPY target/release/harmony-fleet-operator /usr/local/bin/harmony-fleet-operator
# Non-root runtime. Pairs with the Pod's `securityContext.
# runAsNonRoot: true` in the helm chart — k8s admission rejects
# pods with that flag unless either the image declares a non-root
# USER or the Pod pins runAsUser. We deliberately don't pin
# runAsUser (OpenShift's restricted-v2 SCC assigns a namespace-
# specific UID and rejects fixed UIDs); the image's USER is the
# portable mechanism. 65532 is the `nonroot` UID convention used
# by distroless + many security-hardened base images; it's
# arbitrary but safe — no overlap with typical system UIDs.
USER 65532:65532
ENTRYPOINT ["/usr/local/bin/harmony-fleet-operator"]

View File

@@ -0,0 +1,342 @@
//! Generate the operator's helm chart from typed Rust.
//!
//! Produces a self-contained chart directory that `helm install`
//! accepts as a path. Resources are constructed as typed k8s_openapi
//! values and serialized at chart-build time, matching ADR 018
//! (Template Hydration) — no hand-authored yaml in the source tree.
//!
//! The chart has no Helm templating (`{{ .Values.foo }}`); the caller
//! re-runs the generator whenever config changes. For a publishable
//! chart with user-facing values, layer a templating pass on top of
//! this output.
//!
//! Parity with `install` subcommand: both install the same two CRDs
//! (`Deployment`, `Device`). `install` applies the CRDs only, for
//! the host-side-operator path; `chart` packages CRDs + RBAC + the
//! operator Deployment into a helm chart the cluster runs itself.
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use harmony::modules::application::helm::{HelmChart, HelmResourceKind};
use k8s_openapi::api::apps::v1::{
Deployment as K8sDeployment, DeploymentSpec as K8sDeploymentSpec,
};
use k8s_openapi::api::core::v1::{
Capabilities, Container, EnvVar, PodSpec, PodTemplateSpec, SeccompProfile, SecurityContext,
ServiceAccount,
};
use k8s_openapi::api::rbac::v1::{ClusterRole, ClusterRoleBinding, PolicyRule, RoleRef, Subject};
use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition;
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
use kube::CustomResourceExt;
use kube::api::ObjectMeta;
use crate::crd::{Deployment, Device};
/// Inputs for chart generation. Default values are aimed at a
/// local-dev k3d install; override via the `chart` subcommand flags.
pub struct ChartOptions {
/// Where to write the chart directory. The chart is created as a
/// subdirectory `harmony-fleet-operator` inside this path.
pub output_dir: PathBuf,
/// Container image tag the operator Deployment should pull. For
/// k3d with sideloaded images, `IfNotPresent` + a tag that's
/// already in the cluster store is enough.
pub image: String,
/// `Always` for registry-backed dev loops, `IfNotPresent` for
/// sideloaded k3d images, `Never` if the image must already be
/// present.
pub image_pull_policy: String,
/// Namespace the operator Deployment runs in. `helm install
/// --create-namespace` creates it if absent; the chart itself
/// doesn't include a Namespace resource so the chart stays
/// reusable across namespaces.
pub namespace: String,
/// NATS URL the operator connects to. For in-cluster NATS at
/// `fleet-nats.fleet-system` the default `nats://fleet-nats.fleet-system:4222`
/// works with no config.
pub nats_url: String,
/// `RUST_LOG` value for the operator process.
pub log_level: String,
}
impl Default for ChartOptions {
fn default() -> Self {
Self {
output_dir: PathBuf::from("/tmp/fleet-load-test/chart"),
image: "localhost/harmony-fleet-operator:latest".to_string(),
image_pull_policy: "IfNotPresent".to_string(),
namespace: "fleet-system".to_string(),
nats_url: "nats://fleet-nats.fleet-system:4222".to_string(),
log_level: "info,kube_runtime=warn".to_string(),
}
}
}
const RELEASE_NAME: &str = "harmony-fleet-operator";
const SERVICE_ACCOUNT: &str = "harmony-fleet-operator";
const CLUSTER_ROLE: &str = "harmony-fleet-operator";
const CLUSTER_ROLE_BINDING: &str = "harmony-fleet-operator";
/// Build + write the chart to `opts.output_dir`. Returns the full
/// path to the generated chart directory (which is what `helm
/// install <path>` wants).
pub fn build_chart(opts: &ChartOptions) -> Result<PathBuf> {
std::fs::create_dir_all(&opts.output_dir)
.with_context(|| format!("creating {:?}", opts.output_dir))?;
let mut chart = HelmChart::new(
RELEASE_NAME.to_string(),
env!("CARGO_PKG_VERSION").to_string(),
);
chart.description = "IoT operator — Deployment CRD → NATS KV".to_string();
chart.add_resource(HelmResourceKind::Crd(crd_with_keep_annotation(
Deployment::crd(),
)));
chart.add_resource(HelmResourceKind::Crd(crd_with_keep_annotation(
Device::crd(),
)));
chart.add_resource(HelmResourceKind::ServiceAccount(service_account(
&opts.namespace,
)));
chart.add_resource(HelmResourceKind::ClusterRole(cluster_role()));
chart.add_resource(HelmResourceKind::ClusterRoleBinding(cluster_role_binding(
&opts.namespace,
)));
chart.add_resource(HelmResourceKind::Deployment(operator_deployment(opts)));
let written = chart
.write_to(Path::new(&opts.output_dir))
.map_err(|e| anyhow::anyhow!("writing chart: {e}"))?;
Ok(written)
}
/// Annotate a CRD with `helm.sh/resource-policy: keep` so
/// `helm uninstall` **does not** cascade-delete the CRD and its
/// CRs. Without this, uninstall wipes every `Deployment` + `Device`
/// CR in the cluster via the GC → agents notice the desired-state
/// KV deletes → the whole fleet tears down its containers. One
/// typo on uninstall would be catastrophic. `keep` makes uninstall
/// idempotent and data-preserving; the user explicitly `kubectl
/// delete crd …` if they actually want to wipe.
fn crd_with_keep_annotation(mut crd: CustomResourceDefinition) -> CustomResourceDefinition {
let annotations = crd.metadata.annotations.get_or_insert_with(BTreeMap::new);
annotations.insert("helm.sh/resource-policy".to_string(), "keep".to_string());
crd
}
fn service_account(namespace: &str) -> ServiceAccount {
ServiceAccount {
metadata: ObjectMeta {
name: Some(SERVICE_ACCOUNT.to_string()),
namespace: Some(namespace.to_string()),
..Default::default()
},
..Default::default()
}
}
/// Verbs the operator actually uses — nothing aspirational. Tightening
/// later is a matter of deleting a line.
fn cluster_role() -> ClusterRole {
let group = "fleet.nationtech.io".to_string();
ClusterRole {
metadata: ObjectMeta {
name: Some(CLUSTER_ROLE.to_string()),
..Default::default()
},
rules: Some(vec![
// Deployments: controller lists + watches + patches
// (finalizer metadata); aggregator lists + watches +
// patches status.
PolicyRule {
api_groups: Some(vec![group.clone()]),
resources: Some(vec!["deployments".to_string()]),
verbs: vec!["get", "list", "watch", "patch", "update"]
.into_iter()
.map(String::from)
.collect(),
..Default::default()
},
PolicyRule {
api_groups: Some(vec![group.clone()]),
resources: Some(vec![
"deployments/status".to_string(),
"deployments/finalizers".to_string(),
]),
verbs: vec!["get", "update", "patch"]
.into_iter()
.map(String::from)
.collect(),
..Default::default()
},
// Devices: reconciler server-side-applies + deletes;
// aggregator lists + watches.
PolicyRule {
api_groups: Some(vec![group]),
resources: Some(vec!["devices".to_string()]),
verbs: vec![
"get", "list", "watch", "create", "update", "patch", "delete",
]
.into_iter()
.map(String::from)
.collect(),
..Default::default()
},
]),
..Default::default()
}
}
fn cluster_role_binding(namespace: &str) -> ClusterRoleBinding {
ClusterRoleBinding {
metadata: ObjectMeta {
name: Some(CLUSTER_ROLE_BINDING.to_string()),
..Default::default()
},
role_ref: RoleRef {
api_group: "rbac.authorization.k8s.io".to_string(),
kind: "ClusterRole".to_string(),
name: CLUSTER_ROLE.to_string(),
},
subjects: Some(vec![Subject {
kind: "ServiceAccount".to_string(),
name: SERVICE_ACCOUNT.to_string(),
namespace: Some(namespace.to_string()),
..Default::default()
}]),
}
}
fn operator_deployment(opts: &ChartOptions) -> K8sDeployment {
let mut match_labels = BTreeMap::new();
match_labels.insert(
"app.kubernetes.io/name".to_string(),
RELEASE_NAME.to_string(),
);
K8sDeployment {
metadata: ObjectMeta {
name: Some(RELEASE_NAME.to_string()),
namespace: Some(opts.namespace.clone()),
labels: Some(match_labels.clone()),
..Default::default()
},
spec: Some(K8sDeploymentSpec {
replicas: Some(1),
selector: LabelSelector {
match_labels: Some(match_labels.clone()),
match_expressions: None,
},
template: PodTemplateSpec {
metadata: Some(ObjectMeta {
labels: Some(match_labels),
..Default::default()
}),
spec: Some(PodSpec {
service_account_name: Some(SERVICE_ACCOUNT.to_string()),
containers: vec![Container {
name: "operator".to_string(),
image: Some(opts.image.clone()),
image_pull_policy: Some(opts.image_pull_policy.clone()),
env: Some(vec![
EnvVar {
name: "NATS_URL".to_string(),
value: Some(opts.nats_url.clone()),
..Default::default()
},
EnvVar {
name: "RUST_LOG".to_string(),
value: Some(opts.log_level.clone()),
..Default::default()
},
]),
security_context: Some(container_security_context()),
..Default::default()
}],
..Default::default()
}),
},
..Default::default()
}),
..Default::default()
}
}
/// Minimum-privilege container security context.
///
/// - `runAsNonRoot: true` — a compromised operator pod with
/// cluster-scoped write on Deployment + Device CRs is enough to
/// tear down the fleet; running as non-root limits blast radius.
/// - `readOnlyRootFilesystem: true` — the Rust operator logs to
/// stdout only; it never writes to `/`.
/// - `allowPrivilegeEscalation: false` — no setuid binaries, no
/// capability gain under any child exec.
/// - `capabilities: drop [ALL]` — no kernel capabilities retained.
/// - `seccompProfile: RuntimeDefault` — runtime's default syscall
/// filter (blocks the obscure/dangerous ones).
///
/// **Deliberately no `runAsUser`** — OpenShift's `restricted-v2`
/// SCC assigns namespace-specific UIDs and rejects pods that pin
/// a fixed UID outside its range. Relying on the image's USER
/// directive (see Dockerfile) lets vanilla k8s and OpenShift pick
/// a compatible UID without custom SCC bindings.
fn container_security_context() -> SecurityContext {
SecurityContext {
run_as_non_root: Some(true),
read_only_root_filesystem: Some(true),
allow_privilege_escalation: Some(false),
capabilities: Some(Capabilities {
add: None,
drop: Some(vec!["ALL".to_string()]),
}),
seccomp_profile: Some(SeccompProfile {
type_: "RuntimeDefault".to_string(),
localhost_profile: None,
}),
..Default::default()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn crds_carry_keep_annotation() {
let crd = crd_with_keep_annotation(Deployment::crd());
assert_eq!(
crd.metadata
.annotations
.as_ref()
.and_then(|a| a.get("helm.sh/resource-policy"))
.map(String::as_str),
Some("keep"),
"CRDs must carry the keep annotation so helm uninstall doesn't \
cascade-delete CRs and wipe the fleet"
);
}
#[test]
fn security_context_is_locked_down() {
let sc = container_security_context();
assert_eq!(sc.run_as_non_root, Some(true));
assert_eq!(sc.read_only_root_filesystem, Some(true));
assert_eq!(sc.allow_privilege_escalation, Some(false));
assert_eq!(
sc.capabilities.as_ref().and_then(|c| c.drop.as_ref()),
Some(&vec!["ALL".to_string()])
);
assert_eq!(
sc.seccomp_profile.as_ref().map(|s| s.type_.as_str()),
Some("RuntimeDefault")
);
// OpenShift SCC compatibility: no fixed runAsUser, let the
// image/SCC negotiate.
assert!(sc.run_as_user.is_none());
}
}

View File

@@ -0,0 +1,136 @@
//! Deployment controller.
//!
//! With the selector-based model, the controller's job shrank to:
//! - validate that the CR name is a valid `DeploymentName`
//! (apiserver already validates RFC 1123 — this is the
//! additional NATS-subject-safety check),
//! - hold a finalizer so delete is synchronous with desired-state
//! KV cleanup.
//!
//! The aggregator owns:
//! - resolving `spec.targetSelector` against Device CRs,
//! - writing `desired-state.<device>.<deployment>` KV entries,
//! - patching `.status.aggregate`.
//!
//! So on `apply` this function is a no-op past validation; the
//! aggregator notices the new CR via its own kube watch and
//! materializes KV entries for matched devices on the next tick.
//!
//! On `cleanup` we still need to remove every KV entry for this
//! deployment synchronously so agents stop reconciling before the
//! CR disappears. KV doesn't support prefix delete; we scan the
//! bucket and drop keys with the matching `.<deployment_name>`
//! suffix.
use std::sync::Arc;
use std::time::Duration;
use async_nats::jetstream::kv::Store;
use futures_util::StreamExt;
use harmony_reconciler_contracts::DeploymentName;
use kube::runtime::Controller;
use kube::runtime::controller::Action;
use kube::runtime::finalizer::{Event as FinalizerEvent, finalizer};
use kube::runtime::watcher::Config as WatcherConfig;
use kube::{Api, Client, ResourceExt};
use crate::crd::Deployment;
const FINALIZER: &str = "fleet.nationtech.io/finalizer";
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("kube api: {0}")]
Kube(#[from] kube::Error),
#[error("nats kv: {0}")]
Kv(String),
#[error("missing namespace on resource")]
MissingNamespace,
#[error("invalid deployment name '{0}': {1}")]
InvalidName(String, String),
}
pub struct Context {
pub client: Client,
pub kv: Store,
}
pub async fn run(client: Client, kv: Store) -> anyhow::Result<()> {
let api: Api<Deployment> = Api::all(client.clone());
let ctx = Arc::new(Context { client, kv });
tracing::info!("starting Deployment controller");
Controller::new(api, WatcherConfig::default())
.run(reconcile, error_policy, ctx)
.for_each(|res| async move {
match res {
Ok((obj, _)) => tracing::debug!(?obj, "reconciled"),
Err(e) => tracing::warn!(error = %e, "reconcile error"),
}
})
.await;
Ok(())
}
async fn reconcile(obj: Arc<Deployment>, ctx: Arc<Context>) -> Result<Action, Error> {
let ns = obj.namespace().ok_or(Error::MissingNamespace)?;
let name = obj.name_any();
// Validation pass: apiserver accepts any RFC 1123 name; we need
// the additional NATS-subject-safety properties before anything
// downstream tries to use it as a KV key fragment.
DeploymentName::try_new(&name).map_err(|e| Error::InvalidName(name.clone(), e.to_string()))?;
let api: Api<Deployment> = Api::namespaced(ctx.client.clone(), &ns);
finalizer(&api, FINALIZER, obj, |event| async {
match event {
// No work on apply — the aggregator picks up the CR via
// its own kube watch and writes KV entries for matching
// devices. Long requeue so we're not pointlessly polling.
FinalizerEvent::Apply(_) => Ok(Action::requeue(Duration::from_secs(300))),
FinalizerEvent::Cleanup(d) => cleanup(d, &ctx.kv).await,
}
})
.await
.map_err(|e| match e {
kube::runtime::finalizer::Error::ApplyFailed(e)
| kube::runtime::finalizer::Error::CleanupFailed(e) => e,
kube::runtime::finalizer::Error::AddFinalizer(e)
| kube::runtime::finalizer::Error::RemoveFinalizer(e) => Error::Kube(e),
kube::runtime::finalizer::Error::UnnamedObject => Error::Kv("unnamed object".into()),
kube::runtime::finalizer::Error::InvalidFinalizer => Error::Kv("invalid finalizer".into()),
})
}
async fn cleanup(obj: Arc<Deployment>, kv: &Store) -> Result<Action, Error> {
let name = obj.name_any();
let deployment_name =
DeploymentName::try_new(&name).map_err(|e| Error::InvalidName(name, e.to_string()))?;
let suffix = format!(".{}", deployment_name.as_str());
let mut removed = 0u64;
let mut keys = kv
.keys()
.await
.map_err(|e| Error::Kv(format!("listing keys: {e}")))?;
while let Some(key_res) = keys.next().await {
let key = key_res.map_err(|e| Error::Kv(format!("reading key: {e}")))?;
if key.ends_with(&suffix) {
kv.delete(&key)
.await
.map_err(|e| Error::Kv(format!("deleting {key}: {e}")))?;
removed += 1;
}
}
tracing::info!(
deployment = %deployment_name,
removed,
"cleanup: deleted desired-state entries"
);
Ok(Action::await_change())
}
fn error_policy(_obj: Arc<Deployment>, err: &Error, _ctx: Arc<Context>) -> Action {
tracing::warn!(error = %err, "requeueing after error");
Action::requeue(Duration::from_secs(30))
}

View File

@@ -1,3 +1,5 @@
use harmony_reconciler_contracts::InventorySnapshot;
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
use kube::CustomResource;
use schemars::JsonSchema;
use schemars::schema::{
@@ -5,19 +7,25 @@ use schemars::schema::{
};
use serde::{Deserialize, Serialize};
/// Deployment intent. Targets devices by label selector — identical
/// to the pattern K8s itself uses for DaemonSet nodeSelector, Service
/// pod selector, etc. The operator resolves the selector against
/// `Device` CRs at reconcile time; no list of device ids on spec.
#[derive(CustomResource, Serialize, Deserialize, Clone, Debug, JsonSchema)]
#[kube(
group = "iot.nationtech.io",
group = "fleet.nationtech.io",
version = "v1alpha1",
kind = "Deployment",
plural = "deployments",
shortname = "iotdep",
shortname = "fleetdep",
namespaced,
status = "DeploymentStatus"
)]
#[serde(rename_all = "camelCase")]
pub struct DeploymentSpec {
pub target_devices: Vec<String>,
/// Which devices this deployment targets. matches against
/// `Device.metadata.labels`.
pub target_selector: LabelSelector,
#[schemars(schema_with = "score_payload_schema")]
pub score: ScorePayload,
pub rollout: Rollout,
@@ -35,13 +43,11 @@ pub struct ScorePayload {
///
/// 1. `x-kubernetes-preserve-unknown-fields: true` on `data` — the payload
/// is routed opaquely; its shape is enforced on-device by the agent's
/// typed `IotScore` deserialization, not by the apiserver.
/// typed `ReconcileScore` deserialization, not by the apiserver.
/// 2. An `x-kubernetes-validations` CEL rule on the enclosing `score` object
/// requiring `type` to be a valid Rust identifier, so typos (`"pdoman"`)
/// are rejected at `kubectl apply` time rather than silently reaching
/// the agent. This validates the *shape* of the discriminator without
/// listing the known variant catalog — the operator stays a generic
/// router (v0.3+ can add `OkdApplyV0` etc. without an operator release).
/// the agent.
fn score_payload_schema(_: &mut schemars::r#gen::SchemaGenerator) -> Schema {
let type_schema = Schema::Object(SchemaObject {
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::String))),
@@ -100,6 +106,65 @@ pub enum RolloutStrategy {
#[derive(Serialize, Deserialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct DeploymentStatus {
/// Per-deployment rollup. Present once the aggregator has
/// evaluated the selector at least once.
#[serde(skip_serializing_if = "Option::is_none")]
pub observed_score_string: Option<String>,
pub aggregate: Option<DeploymentAggregate>,
}
/// Rollup of per-device deployment phases for this Deployment CR.
#[derive(Serialize, Deserialize, Clone, Debug, Default, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct DeploymentAggregate {
/// How many Device CRs currently match `spec.targetSelector`.
/// The three phase counters below sum to this; targeted-but-
/// unreported devices are folded into `pending`.
pub matched_device_count: u32,
pub succeeded: u32,
pub failed: u32,
pub pending: u32,
/// Device id of the most recent device reporting a failure, with
/// its short error message. Cleared when that device transitions
/// back to Running.
#[serde(skip_serializing_if = "Option::is_none")]
pub last_error: Option<AggregateLastError>,
}
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct AggregateLastError {
pub device_id: String,
pub message: String,
pub at: String,
}
/// A physical/virtual device registered with the fleet. Cluster-scoped
/// because devices aren't tenant-isolated by namespace — they're
/// infrastructure, the same way K8s Nodes are cluster-scoped.
///
/// Created by the operator from `DeviceInfo` entries in the NATS
/// `device-info` bucket. Agents never touch the kube apiserver
/// directly; they publish DeviceInfo to NATS and the operator
/// reflects it here.
///
/// `metadata.labels` carries the device's routing labels. `spec.
/// inventory` holds the hardware/OS snapshot. No status subresource
/// today — liveness is queried from the NATS `device-heartbeat`
/// bucket directly; when a CR-side reflection (Reachable / Stale
/// conditions) becomes useful, it'll land with its own reconciler
/// rather than sitting here as speculative surface.
#[derive(CustomResource, Serialize, Deserialize, Clone, Debug, JsonSchema)]
#[kube(
group = "fleet.nationtech.io",
version = "v1alpha1",
kind = "Device",
plural = "devices",
shortname = "fleetdev"
)]
#[serde(rename_all = "camelCase")]
pub struct DeviceSpec {
/// Hardware + OS facts reported by the agent at registration.
/// Rarely changes after first publish.
#[serde(skip_serializing_if = "Option::is_none")]
pub inventory: Option<InventorySnapshot>,
}

View File

@@ -0,0 +1,165 @@
//! DeviceInfo (NATS `device-info` KV) → Device CR (kube).
//!
//! Agents publish a `DeviceInfo` payload to NATS on startup + on
//! label/inventory change. This reconciler watches that bucket and
//! materializes each entry as a cluster-scoped `Device` custom
//! resource, so label selectors and `kubectl get devices -l …`
//! work the way they do for K8s Nodes.
//!
//! Failure mode: idempotent server-side apply with a fixed field
//! manager, so repeated writes don't accumulate revisions and
//! concurrent edits from other sources stay merged safely.
use anyhow::Result;
use async_nats::jetstream::kv::{Operation, Store};
use futures_util::StreamExt;
use harmony_reconciler_contracts::{BUCKET_DEVICE_INFO, DeviceInfo};
use kube::Client;
use kube::api::{Api, DeleteParams, Patch, PatchParams};
use std::collections::BTreeMap;
use crate::crd::{Device, DeviceSpec};
const FIELD_MANAGER: &str = "harmony-fleet-operator-device-reconciler";
pub async fn run(client: Client, js: async_nats::jetstream::Context) -> Result<()> {
let bucket = js
.create_key_value(async_nats::jetstream::kv::Config {
bucket: BUCKET_DEVICE_INFO.to_string(),
..Default::default()
})
.await?;
run_loop(client, bucket).await
}
async fn run_loop(client: Client, bucket: Store) -> Result<()> {
let devices: Api<Device> = Api::all(client);
// `watch_with_history` replays every current entry then streams
// live updates. Matches the aggregator's pattern and means we
// don't need a separate cold-start KV scan here.
let mut watch = bucket.watch_with_history(">").await?;
tracing::info!("device-reconciler: watching device-info KV");
while let Some(entry_res) = watch.next().await {
let entry = match entry_res {
Ok(e) => e,
Err(e) => {
tracing::warn!(error = %e, "device-reconciler: watch delivery error");
continue;
}
};
match entry.operation {
Operation::Put => {
let info: DeviceInfo = match serde_json::from_slice(&entry.value) {
Ok(d) => d,
Err(e) => {
tracing::warn!(key = %entry.key, error = %e, "device-reconciler: bad DeviceInfo payload");
continue;
}
};
if let Err(e) = upsert_device(&devices, &info).await {
tracing::warn!(
device = %info.device_id,
error = %e,
"device-reconciler: upsert failed"
);
}
}
Operation::Delete | Operation::Purge => {
let Some(device_id) = entry.key.strip_prefix("info.") else {
continue;
};
if let Err(e) = delete_device(&devices, device_id).await {
tracing::warn!(%device_id, error = %e, "device-reconciler: delete failed");
}
}
}
}
Ok(())
}
async fn upsert_device(api: &Api<Device>, info: &DeviceInfo) -> Result<()> {
let name = info.device_id.to_string();
let mut device = Device::new(
&name,
DeviceSpec {
inventory: info.inventory.clone(),
},
);
device.metadata.labels = Some(clean_labels(&info.labels));
api.patch(
&name,
&PatchParams::apply(FIELD_MANAGER).force(),
&Patch::Apply(&device),
)
.await?;
tracing::debug!(%name, "device-reconciler: upserted");
Ok(())
}
async fn delete_device(api: &Api<Device>, name: &str) -> Result<()> {
match api.delete(name, &DeleteParams::default()).await {
Ok(_) => {
tracing::debug!(%name, "device-reconciler: deleted");
Ok(())
}
Err(kube::Error::Api(ae)) if ae.code == 404 => Ok(()),
Err(e) => Err(e.into()),
}
}
/// Drop labels whose keys or values violate k8s label-syntax rules.
/// Agents could in theory publish arbitrary strings; kube will reject
/// a whole apply if even one is malformed, which would take out that
/// device's registration. Skip-and-log beats block-everything.
fn clean_labels(raw: &BTreeMap<String, String>) -> BTreeMap<String, String> {
raw.iter()
.filter(|(k, v)| is_label_key(k) && is_label_value(v))
.map(|(k, v)| (k.clone(), v.clone()))
.collect()
}
fn is_label_key(s: &str) -> bool {
// Simplified: DNS-subdomain-like prefix + name ≤ 63 chars alnum/-/./_.
if s.is_empty() || s.len() > 253 {
return false;
}
let name = s.rsplit_once('/').map(|(_, n)| n).unwrap_or(s);
!name.is_empty()
&& name.len() <= 63
&& name
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '.' || c == '_')
}
fn is_label_value(s: &str) -> bool {
if s.len() > 63 {
return false;
}
s.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '.' || c == '_')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn label_cleaner_accepts_common_cases() {
assert!(is_label_key("group"));
assert!(is_label_key("arch"));
assert!(is_label_key("fleet.nationtech.io/region"));
assert!(is_label_value("aarch64"));
assert!(is_label_value("site-01"));
}
#[test]
fn label_cleaner_rejects_bad_cases() {
assert!(!is_label_key(""));
assert!(!is_label_key("has space"));
assert!(!is_label_value("has space"));
assert!(!is_label_value(&"x".repeat(64)));
}
}

View File

@@ -0,0 +1,831 @@
//! Operator-side aggregator + desired-state writer.
//!
//! Maintains three in-memory caches driven by watches:
//! - Deployment CRs (kube watch) → what we want to run
//! - Device CRs (kube watch) → where we could run it
//! - DeploymentState KV (NATS watch) → what's actually running
//!
//! Outputs:
//! - Writes `desired-state.<device>.<deployment>` KV entries when a
//! Deployment's selector matches a Device. Deletes them when the
//! match goes away.
//! - Patches `Deployment.status.aggregate` at 1 Hz for every CR
//! whose matched-device set or phase counts changed.
//!
//! No separate event stream, no per-key revision tracking: KV watches
//! are ordered and last-writer-wins, and the dirty set naturally
//! coalesces high-frequency state churn into one patch per tick.
use std::collections::{BTreeMap, HashMap, HashSet};
use std::sync::Arc;
use std::time::Duration;
use async_nats::jetstream::kv::{Operation, Store};
use futures_util::{StreamExt, TryStreamExt};
use harmony_reconciler_contracts::{
BUCKET_DESIRED_STATE, BUCKET_DEVICE_STATE, DeploymentName, DeploymentState, Phase,
desired_state_key,
};
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
use kube::api::{Api, Patch, PatchParams};
use kube::runtime::watcher::{self, Config as WatcherConfig, Event};
use kube::{Client, ResourceExt};
use serde_json::json;
use tokio::sync::Mutex;
use crate::crd::{AggregateLastError, Deployment, DeploymentAggregate, Device};
const PATCH_TICK: Duration = Duration::from_secs(1);
// ---------------------------------------------------------------------------
// State
// ---------------------------------------------------------------------------
/// (namespace, name) identifying a Deployment CR.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DeploymentKey {
pub namespace: String,
pub name: String,
}
impl DeploymentKey {
pub fn from_cr(cr: &Deployment) -> Option<Self> {
Some(Self {
namespace: cr.namespace()?,
name: cr.name_any(),
})
}
}
/// One `(device, deployment)` pair.
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct DevicePair {
pub device_id: String,
pub deployment: DeploymentName,
}
/// Thin projection of a Deployment CR — everything we need for
/// selector evaluation + desired-state writes + status aggregation,
/// without borrowing the full kube object.
#[derive(Debug, Clone)]
pub struct CachedDeployment {
key: DeploymentKey,
deployment_name: DeploymentName,
selector: LabelSelector,
/// JSON-serialized score payload ready to `put` into
/// desired-state. Cached because the same bytes are written to
/// every matched device's KV entry.
score_json: Vec<u8>,
}
#[derive(Debug, Default)]
pub struct FleetState {
/// Cached Deployment CRs, keyed by (namespace, name).
deployments: HashMap<DeploymentKey, CachedDeployment>,
/// Cached Device labels, keyed by `metadata.name`.
devices: HashMap<String, BTreeMap<String, String>>,
/// Latest DeploymentState per (device, deployment) pair.
states: HashMap<DevicePair, DeploymentState>,
/// Which devices have we pushed desired-state for, per deployment?
/// Diff against recomputed targets on any change. Keyed by
/// `DeploymentName` (not `DeploymentKey`) because the
/// `desired-state` KV key space doesn't carry namespace —
/// deployment names are globally unique at the NATS level. This
/// lets cold-start seeding from the KV populate the map
/// correctly without having to guess namespaces.
owned_targets: HashMap<DeploymentName, HashSet<String>>,
/// Per-deployment latest-failure surface for the CR status.
last_error: HashMap<DeploymentKey, AggregateLastError>,
/// CR keys whose status needs re-patching on the next tick.
dirty: HashSet<DeploymentKey>,
}
pub type SharedFleetState = Arc<Mutex<FleetState>>;
// ---------------------------------------------------------------------------
// Selector evaluation
// ---------------------------------------------------------------------------
/// Does `selector` match this label set? matchLabels only for MVP —
/// matchExpressions logs a warning once and is treated as "no match"
/// until we need it.
pub fn selector_matches(selector: &LabelSelector, labels: &BTreeMap<String, String>) -> bool {
if let Some(match_labels) = &selector.match_labels {
for (k, v) in match_labels {
if labels.get(k) != Some(v) {
return false;
}
}
}
if selector
.match_expressions
.as_ref()
.is_some_and(|v| !v.is_empty())
{
tracing::warn!(
"LabelSelector.matchExpressions is not yet supported; treating CR as empty-selector (matches nothing)"
);
return false;
}
true
}
/// Set of Device names currently matching `selector`.
fn matched_devices(
selector: &LabelSelector,
devices: &HashMap<String, BTreeMap<String, String>>,
) -> HashSet<String> {
devices
.iter()
.filter(|(_, labels)| selector_matches(selector, labels))
.map(|(name, _)| name.clone())
.collect()
}
// ---------------------------------------------------------------------------
// Top-level run
// ---------------------------------------------------------------------------
pub async fn run(client: Client, js: async_nats::jetstream::Context) -> anyhow::Result<()> {
let state_bucket = js
.create_key_value(async_nats::jetstream::kv::Config {
bucket: BUCKET_DEVICE_STATE.to_string(),
..Default::default()
})
.await?;
let desired_bucket = js
.create_key_value(async_nats::jetstream::kv::Config {
bucket: BUCKET_DESIRED_STATE.to_string(),
..Default::default()
})
.await?;
// Cold-start: initialize owned_targets from the current contents
// of the desired-state bucket so we don't orphan entries written
// by a previous operator run.
let state: SharedFleetState = Arc::new(Mutex::new(FleetState::default()));
seed_owned_targets(&desired_bucket, &state).await?;
let deployments_api: Api<Deployment> = Api::all(client.clone());
let devices_api: Api<Device> = Api::all(client.clone());
let patch_api: Api<Deployment> = Api::all(client);
tracing::info!(
owned = state
.lock()
.await
.owned_targets
.values()
.map(|s| s.len())
.sum::<usize>(),
"aggregator: startup complete"
);
let state_watcher_handle = {
let state = state.clone();
let bucket = state_bucket.clone();
tokio::spawn(async move {
if let Err(e) = run_state_kv_watcher(bucket, state).await {
tracing::warn!(error = %e, "aggregator: state watcher exited");
}
})
};
let deployment_watcher_handle = {
let state = state.clone();
let desired = desired_bucket.clone();
tokio::spawn(async move {
if let Err(e) = run_deployment_watcher(deployments_api.clone(), state, desired).await {
tracing::warn!(error = %e, "aggregator: deployment watcher exited");
}
})
};
let device_watcher_handle = {
let state = state.clone();
let desired = desired_bucket.clone();
tokio::spawn(async move {
if let Err(e) = run_device_watcher(devices_api, state, desired).await {
tracing::warn!(error = %e, "aggregator: device watcher exited");
}
})
};
let patch_state = state.clone();
let patch_loop = async move {
let mut ticker = tokio::time::interval(PATCH_TICK);
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
ticker.tick().await;
if let Err(e) = patch_tick(&patch_api, &patch_state).await {
tracing::warn!(error = %e, "aggregator: patch tick failed");
}
}
};
tokio::select! {
_ = patch_loop => Ok(()),
_ = state_watcher_handle => Ok(()),
_ = deployment_watcher_handle => Ok(()),
_ = device_watcher_handle => Ok(()),
}
}
// ---------------------------------------------------------------------------
// Device-state KV watcher (unchanged path)
// ---------------------------------------------------------------------------
fn parse_state_key(key: &str) -> Option<DevicePair> {
let rest = key.strip_prefix("state.")?;
let (device, deployment) = rest.split_once('.')?;
Some(DevicePair {
device_id: device.to_string(),
deployment: DeploymentName::try_new(deployment).ok()?,
})
}
async fn run_state_kv_watcher(bucket: Store, state: SharedFleetState) -> anyhow::Result<()> {
let mut watch = bucket.watch_with_history(">").await?;
while let Some(entry_res) = watch.next().await {
let entry = match entry_res {
Ok(e) => e,
Err(e) => {
tracing::warn!(error = %e, "aggregator: state watch delivery error");
continue;
}
};
let Some(pair) = parse_state_key(&entry.key) else {
continue;
};
match entry.operation {
Operation::Put => {
let ds: DeploymentState = match serde_json::from_slice(&entry.value) {
Ok(d) => d,
Err(e) => {
tracing::warn!(key = %entry.key, error = %e, "aggregator: bad device_state payload");
continue;
}
};
let mut guard = state.lock().await;
apply_state(&mut guard, pair, ds);
}
Operation::Delete | Operation::Purge => {
let mut guard = state.lock().await;
drop_state(&mut guard, &pair);
}
}
}
Ok(())
}
/// Record a device's latest state, dedup against older timestamps,
/// maintain last_error, mark the deployment dirty.
pub fn apply_state(state: &mut FleetState, pair: DevicePair, ds: DeploymentState) {
if let Some(prev) = state.states.get(&pair) {
if prev.last_event_at > ds.last_event_at {
return;
}
}
let phase = ds.phase;
let device_id = ds.device_id.to_string();
let last_error_msg = ds.last_error.clone();
let at = ds.last_event_at.to_rfc3339();
state.states.insert(pair.clone(), ds);
for key in matching_deployment_keys(state, &pair.deployment) {
match phase {
Phase::Failed => {
if let Some(msg) = last_error_msg.as_deref() {
state.last_error.insert(
key.clone(),
AggregateLastError {
device_id: device_id.clone(),
message: msg.to_string(),
at: at.clone(),
},
);
}
}
Phase::Running => {
if let Some(existing) = state.last_error.get(&key) {
if existing.device_id == device_id {
state.last_error.remove(&key);
}
}
}
Phase::Pending => {}
}
state.dirty.insert(key);
}
}
pub fn drop_state(state: &mut FleetState, pair: &DevicePair) {
let Some(removed) = state.states.remove(pair) else {
return;
};
let device_id = removed.device_id.to_string();
for key in matching_deployment_keys(state, &pair.deployment) {
if let Some(existing) = state.last_error.get(&key) {
if existing.device_id == device_id {
state.last_error.remove(&key);
}
}
state.dirty.insert(key);
}
}
/// CR keys that carry a given deployment name. Deployment names are
/// globally unique at the KV level, so typically 0 or 1 entry here;
/// Vec lets us surface a warning rather than panic if a misconfigured
/// cluster has duplicates across namespaces.
fn matching_deployment_keys(state: &FleetState, deployment: &DeploymentName) -> Vec<DeploymentKey> {
state
.deployments
.values()
.filter(|d| &d.deployment_name == deployment)
.map(|d| d.key.clone())
.collect()
}
// ---------------------------------------------------------------------------
// Deployment CR watcher
// ---------------------------------------------------------------------------
async fn run_deployment_watcher(
api: Api<Deployment>,
state: SharedFleetState,
desired: Store,
) -> anyhow::Result<()> {
let mut stream = watcher::watcher(api, WatcherConfig::default()).boxed();
while let Some(event) = stream.try_next().await? {
match event {
Event::Apply(cr) | Event::InitApply(cr) => {
on_deployment_upsert(&state, &desired, cr).await;
}
Event::Delete(cr) => {
on_deployment_delete(&state, &desired, cr).await;
}
Event::Init | Event::InitDone => {}
}
}
Ok(())
}
async fn on_deployment_upsert(state: &SharedFleetState, desired: &Store, cr: Deployment) {
let Some(key) = DeploymentKey::from_cr(&cr) else {
return;
};
let Ok(deployment_name) = DeploymentName::try_new(&key.name) else {
tracing::warn!(name = %key.name, "aggregator: CR name is not a valid DeploymentName, skipping");
return;
};
let selector = cr.spec.target_selector.clone();
let score_json = match serde_json::to_vec(&cr.spec.score) {
Ok(v) => v,
Err(e) => {
tracing::warn!(namespace = %key.namespace, name = %key.name, error = %e, "aggregator: score payload not serializable");
return;
}
};
let (new_targets, previous_targets) = {
let mut guard = state.lock().await;
let new_targets = matched_devices(&selector, &guard.devices);
guard.deployments.insert(
key.clone(),
CachedDeployment {
key: key.clone(),
deployment_name: deployment_name.clone(),
selector: selector.clone(),
score_json: score_json.clone(),
},
);
let previous = guard
.owned_targets
.remove(&deployment_name)
.unwrap_or_default();
guard
.owned_targets
.insert(deployment_name.clone(), new_targets.clone());
guard.dirty.insert(key.clone());
(new_targets, previous)
};
reconcile_kv(
desired,
&deployment_name,
&new_targets,
&previous_targets,
&score_json,
)
.await;
}
async fn on_deployment_delete(state: &SharedFleetState, desired: &Store, cr: Deployment) {
let Some(key) = DeploymentKey::from_cr(&cr) else {
return;
};
let Ok(deployment_name) = DeploymentName::try_new(&key.name) else {
return;
};
let previous = {
let mut guard = state.lock().await;
guard.deployments.remove(&key);
guard.last_error.remove(&key);
guard.dirty.remove(&key);
guard
.owned_targets
.remove(&deployment_name)
.unwrap_or_default()
};
// Every previously-owned target becomes a KV delete. Controller
// finalizer does a belt-and-suspenders scan, but we pull our own
// entries here too so agents react immediately.
for device in &previous {
let k = desired_state_key(device, &deployment_name);
if let Err(e) = desired.delete(&k).await {
tracing::debug!(key = %k, error = %e, "aggregator: desired-state delete on CR delete failed");
}
}
}
// ---------------------------------------------------------------------------
// Device CR watcher
// ---------------------------------------------------------------------------
async fn run_device_watcher(
api: Api<Device>,
state: SharedFleetState,
desired: Store,
) -> anyhow::Result<()> {
let mut stream = watcher::watcher(api, WatcherConfig::default()).boxed();
while let Some(event) = stream.try_next().await? {
match event {
Event::Apply(dev) | Event::InitApply(dev) => {
on_device_upsert(&state, &desired, dev).await;
}
Event::Delete(dev) => {
on_device_delete(&state, &desired, dev).await;
}
Event::Init | Event::InitDone => {}
}
}
Ok(())
}
async fn on_device_upsert(state: &SharedFleetState, desired: &Store, dev: Device) {
let name = dev.name_any();
let labels: BTreeMap<String, String> = dev.metadata.labels.clone().unwrap_or_default();
// For every deployment, compute whether this single device now
// matches vs. previously matched; diff against owned_targets;
// collect the KV writes/deletes to perform after the lock is
// released.
let per_deployment: Vec<(CachedDeployment, bool, bool)> = {
let mut guard = state.lock().await;
let snapshot: Vec<CachedDeployment> = guard.deployments.values().cloned().collect();
let previously_matched: HashMap<DeploymentName, bool> = snapshot
.iter()
.map(|d| {
let was = guard
.owned_targets
.get(&d.deployment_name)
.is_some_and(|set| set.contains(&name));
(d.deployment_name.clone(), was)
})
.collect();
guard.devices.insert(name.clone(), labels.clone());
let mut out = Vec::with_capacity(snapshot.len());
for d in snapshot {
let was = previously_matched
.get(&d.deployment_name)
.copied()
.unwrap_or(false);
let now = selector_matches(&d.selector, &labels);
if was != now {
let targets = guard
.owned_targets
.entry(d.deployment_name.clone())
.or_default();
if now {
targets.insert(name.clone());
} else {
targets.remove(&name);
}
guard.dirty.insert(d.key.clone());
}
out.push((d, was, now));
}
out
};
for (cached, was, now) in per_deployment {
match (was, now) {
(false, true) => {
let k = desired_state_key(&name, &cached.deployment_name);
if let Err(e) = desired.put(&k, cached.score_json.clone().into()).await {
tracing::debug!(key = %k, error = %e, "aggregator: desired-state put failed");
}
}
(true, false) => {
let k = desired_state_key(&name, &cached.deployment_name);
if let Err(e) = desired.delete(&k).await {
tracing::debug!(key = %k, error = %e, "aggregator: desired-state delete failed");
}
}
_ => {}
}
}
}
async fn on_device_delete(state: &SharedFleetState, desired: &Store, dev: Device) {
let name = dev.name_any();
let removed_from: Vec<DeploymentName> = {
let mut guard = state.lock().await;
guard.devices.remove(&name);
let mut out = Vec::new();
let deployments_snapshot: Vec<CachedDeployment> =
guard.deployments.values().cloned().collect();
for cached in deployments_snapshot {
if let Some(set) = guard.owned_targets.get_mut(&cached.deployment_name) {
if set.remove(&name) {
out.push(cached.deployment_name.clone());
guard.dirty.insert(cached.key.clone());
}
}
}
out
};
for deployment_name in removed_from {
let k = desired_state_key(&name, &deployment_name);
if let Err(e) = desired.delete(&k).await {
tracing::debug!(key = %k, error = %e, "aggregator: desired-state delete on device delete failed");
}
}
}
// ---------------------------------------------------------------------------
// Diff helper: write/delete desired-state entries for one deployment
// ---------------------------------------------------------------------------
async fn reconcile_kv(
desired: &Store,
deployment_name: &DeploymentName,
new_targets: &HashSet<String>,
previous_targets: &HashSet<String>,
score_json: &[u8],
) {
// Writes: new_targets, unconditionally — idempotent put; agents
// byte-compare and no-op on unchanged content.
for device in new_targets {
let k = desired_state_key(device, deployment_name);
if let Err(e) = desired.put(&k, score_json.to_vec().into()).await {
tracing::debug!(key = %k, error = %e, "aggregator: desired-state put failed");
}
}
// Deletes: anything we owned previously but no longer target.
for device in previous_targets.difference(new_targets) {
let k = desired_state_key(device, deployment_name);
if let Err(e) = desired.delete(&k).await {
tracing::debug!(key = %k, error = %e, "aggregator: desired-state delete failed");
}
}
}
/// Initialize `owned_targets` from the current contents of the
/// `desired-state` KV. After a restart, we need to know what was
/// previously written so we can diff correctly on the first
/// watch-driven reconcile (otherwise we'd leak orphans when a
/// selector change causes a deployment to stop targeting a device).
async fn seed_owned_targets(bucket: &Store, state: &SharedFleetState) -> anyhow::Result<()> {
let mut guard = state.lock().await;
let mut keys = bucket.keys().await?;
while let Some(key_res) = keys.next().await {
let key = key_res?;
// Keys are `<device>.<deployment>`. The KV key space carries
// no namespace — names are globally unique at this layer —
// which is exactly why `owned_targets` keys by DeploymentName.
let Some((device, deployment)) = key.split_once('.') else {
continue;
};
let Ok(deployment_name) = DeploymentName::try_new(deployment) else {
continue;
};
guard
.owned_targets
.entry(deployment_name)
.or_default()
.insert(device.to_string());
}
Ok(())
}
// ---------------------------------------------------------------------------
// Patch tick
// ---------------------------------------------------------------------------
async fn patch_tick(api: &Api<Deployment>, state: &SharedFleetState) -> anyhow::Result<()> {
let dirty: Vec<(DeploymentKey, DeploymentAggregate)> = {
let mut guard = state.lock().await;
let keys: Vec<DeploymentKey> = guard.dirty.drain().collect();
keys.iter()
.filter_map(|k| {
let cached = guard.deployments.get(k)?.clone();
let agg = compute_aggregate(&guard, &cached);
Some((k.clone(), agg))
})
.collect()
};
for (key, aggregate) in dirty {
let ns_api: Api<Deployment> = Api::namespaced(api.clone().into_client(), &key.namespace);
let status = json!({ "status": { "aggregate": aggregate } });
if let Err(e) = ns_api
.patch_status(&key.name, &PatchParams::default(), &Patch::Merge(&status))
.await
{
tracing::warn!(
namespace = %key.namespace,
name = %key.name,
error = %e,
"aggregator: status patch failed"
);
} else {
tracing::debug!(
namespace = %key.namespace,
name = %key.name,
matched = aggregate.matched_device_count,
succeeded = aggregate.succeeded,
failed = aggregate.failed,
pending = aggregate.pending,
"aggregator: status patched"
);
}
}
Ok(())
}
/// Compute the aggregate for one Deployment from current caches.
/// `owned_targets` is the authoritative "currently selector-matched"
/// set for the deployment, as maintained by the watchers.
pub fn compute_aggregate(state: &FleetState, cached: &CachedDeployment) -> DeploymentAggregate {
let empty = HashSet::new();
let targets = state
.owned_targets
.get(&cached.deployment_name)
.unwrap_or(&empty);
let mut agg = DeploymentAggregate {
matched_device_count: targets.len() as u32,
..Default::default()
};
for device_id in targets {
let pair = DevicePair {
device_id: device_id.clone(),
deployment: cached.deployment_name.clone(),
};
match state.states.get(&pair).map(|s| s.phase) {
Some(Phase::Running) => agg.succeeded += 1,
Some(Phase::Failed) => agg.failed += 1,
Some(Phase::Pending) | None => agg.pending += 1,
}
}
agg.last_error = state.last_error.get(&cached.key).cloned();
agg
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::{TimeZone, Utc};
use harmony_reconciler_contracts::Id;
fn dn(s: &str) -> DeploymentName {
DeploymentName::try_new(s).expect("valid test name")
}
fn state(device: &str, deployment: &str, phase: Phase, seconds: i64) -> DeploymentState {
DeploymentState {
device_id: Id::from(device.to_string()),
deployment: dn(deployment),
phase,
last_event_at: Utc.timestamp_opt(1_700_000_000 + seconds, 0).unwrap(),
last_error: None,
}
}
fn cached(namespace: &str, name: &str, match_key: &str, match_val: &str) -> CachedDeployment {
let mut ml = BTreeMap::new();
ml.insert(match_key.to_string(), match_val.to_string());
CachedDeployment {
key: DeploymentKey {
namespace: namespace.to_string(),
name: name.to_string(),
},
deployment_name: dn(name),
selector: LabelSelector {
match_labels: Some(ml),
match_expressions: None,
},
score_json: b"{}".to_vec(),
}
}
fn pair(device: &str, deployment: &str) -> DevicePair {
DevicePair {
device_id: device.to_string(),
deployment: dn(deployment),
}
}
#[test]
fn selector_match_labels_only() {
let mut ml = BTreeMap::new();
ml.insert("group".to_string(), "edge-a".to_string());
let sel = LabelSelector {
match_labels: Some(ml),
match_expressions: None,
};
let mut matching = BTreeMap::new();
matching.insert("group".to_string(), "edge-a".to_string());
matching.insert("arch".to_string(), "aarch64".to_string());
assert!(selector_matches(&sel, &matching));
let mut non_matching = BTreeMap::new();
non_matching.insert("group".to_string(), "edge-b".to_string());
assert!(!selector_matches(&sel, &non_matching));
let empty = BTreeMap::new();
assert!(!selector_matches(&sel, &empty));
}
#[test]
fn empty_selector_matches_everything() {
let sel = LabelSelector::default();
let mut labels = BTreeMap::new();
labels.insert("anything".to_string(), "goes".to_string());
assert!(selector_matches(&sel, &labels));
assert!(selector_matches(&sel, &BTreeMap::new()));
}
#[test]
fn compute_aggregate_counts_matched_devices() {
let cached = cached("fleet-demo", "hello", "group", "edge-a");
let key = cached.key.clone();
let mut s = FleetState::default();
s.deployments.insert(key, cached.clone());
// Three devices already in owned_targets (selector resolution
// is separate from the aggregate; aggregate reads owned_targets).
s.owned_targets.insert(
cached.deployment_name.clone(),
["pi-01", "pi-02", "pi-03"]
.iter()
.map(|s| s.to_string())
.collect(),
);
s.states.insert(
pair("pi-01", "hello"),
state("pi-01", "hello", Phase::Running, 0),
);
s.states.insert(
pair("pi-02", "hello"),
state("pi-02", "hello", Phase::Failed, 0),
);
// pi-03 has no state entry → pending
let agg = compute_aggregate(&s, &cached);
assert_eq!(agg.matched_device_count, 3);
assert_eq!(agg.succeeded, 1);
assert_eq!(agg.failed, 1);
assert_eq!(agg.pending, 1);
}
#[test]
fn matched_devices_picks_by_label() {
let mut ml = BTreeMap::new();
ml.insert("group".to_string(), "edge-a".to_string());
let sel = LabelSelector {
match_labels: Some(ml),
match_expressions: None,
};
let mut devices: HashMap<String, BTreeMap<String, String>> = HashMap::new();
let mut a = BTreeMap::new();
a.insert("group".to_string(), "edge-a".to_string());
devices.insert("pi-01".to_string(), a);
let mut b = BTreeMap::new();
b.insert("group".to_string(), "edge-b".to_string());
devices.insert("pi-02".to_string(), b);
let matched = matched_devices(&sel, &devices);
assert_eq!(matched.len(), 1);
assert!(matched.contains("pi-01"));
}
}

View File

@@ -0,0 +1,46 @@
//! Install the operator's CRD into a target Kubernetes cluster
//! via a harmony Score — no yaml generation, no kubectl shell-out.
//!
//! The Score is just [`K8sResourceScore`] over `Deployment::crd()`;
//! the topology is the shared `K8sBareTopology`, which exposes a
//! `K8sclient` backed by the caller's `KUBECONFIG` without dragging
//! in `K8sAnywhereTopology`'s product-level `ensure_ready`.
use anyhow::{Context, Result};
use harmony::inventory::Inventory;
use harmony::modules::k8s::K8sBareTopology;
use harmony::modules::k8s::resource::K8sResourceScore;
use harmony::score::Score;
use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition;
use kube::CustomResourceExt;
use crate::crd::{Deployment, Device};
/// Apply the operator's CRDs to whatever cluster `KUBECONFIG` points
/// at. Returns once the apply call completes — does **not** wait for
/// the apiserver to mark the CRD `Established`; the caller does that
/// (e.g. with `kubectl wait --for=condition=Established`) if it
/// cares.
pub async fn install_crds() -> Result<()> {
let topology = K8sBareTopology::from_kubeconfig("harmony-fleet-operator-install")
.await
.map_err(|e| anyhow::anyhow!(e))
.context("building K8sBareTopology from KUBECONFIG")?;
let inventory = Inventory::empty();
let crds: Vec<CustomResourceDefinition> = vec![Deployment::crd(), Device::crd()];
let score = K8sResourceScore::<CustomResourceDefinition> {
resource: crds,
namespace: None,
};
let interpret = Score::<K8sBareTopology>::create_interpret(&score);
let outcome = interpret
.execute(&inventory, &topology)
.await
.map_err(|e| anyhow::anyhow!("install CRD: {e}"))
.context("executing K8sResourceScore for Deployment CRD")?;
tracing::info!(?outcome, "CRD installed");
Ok(())
}

View File

@@ -0,0 +1,11 @@
//! Library surface of the IoT operator crate.
//!
//! Most of the crate is a binary (reconcile loop, install subcommand).
//! The CRD type definitions are exposed here as a library so external
//! consumers — tooling that applies CRs, tests, documentation generators
//! — can import the typed `Deployment`, `DeploymentSpec`,
//! `ScorePayload`, etc. without duplicating them.
pub mod crd;
pub mod device_reconciler;
pub mod fleet_aggregator;

View File

@@ -0,0 +1,146 @@
mod chart;
mod controller;
mod install;
use harmony_fleet_operator::{crd, device_reconciler, fleet_aggregator};
use anyhow::Result;
use async_nats::jetstream;
use clap::{Parser, Subcommand};
use harmony_reconciler_contracts::BUCKET_DESIRED_STATE;
use kube::Client;
use std::path::PathBuf;
#[derive(Parser)]
#[command(
name = "harmony-fleet-operator",
about = "IoT operator — Deployment CRD → NATS KV"
)]
struct Cli {
#[command(subcommand)]
command: Option<Command>,
#[arg(
long,
env = "NATS_URL",
default_value = "nats://localhost:4222",
global = true
)]
nats_url: String,
#[arg(
long,
env = "KV_BUCKET",
default_value = BUCKET_DESIRED_STATE,
global = true
)]
kv_bucket: String,
}
#[derive(Subcommand)]
enum Command {
/// Run the controller (default when no subcommand is given).
Run,
/// Apply the operator's CRDs to the cluster `KUBECONFIG` points
/// at. Uses harmony's typed k8s client — no yaml, no kubectl.
Install,
/// Generate a helm chart directory that installs the operator
/// in-cluster (Deployment + RBAC + CRDs). Prints the written
/// chart path on success; `helm install <path>` takes it from
/// there. No registry publish — the chart lives on disk.
Chart {
#[arg(long, default_value = "/tmp/fleet-load-test/chart")]
output: PathBuf,
#[arg(long, default_value = "localhost/harmony-fleet-operator:latest")]
image: String,
#[arg(long, default_value = "IfNotPresent")]
image_pull_policy: String,
#[arg(long, default_value = "fleet-system")]
namespace: String,
#[arg(long, default_value = "nats://fleet-nats.fleet-system:4222")]
nats_url: String,
#[arg(long, default_value = "info,kube_runtime=warn")]
log_level: String,
},
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let cli = Cli::parse();
match cli.command.unwrap_or(Command::Run) {
Command::Install => install::install_crds().await,
Command::Run => run(&cli.nats_url, &cli.kv_bucket).await,
Command::Chart {
output,
image,
image_pull_policy,
namespace,
nats_url,
log_level,
} => {
let written = chart::build_chart(&chart::ChartOptions {
output_dir: output,
image,
image_pull_policy,
namespace,
nats_url,
log_level,
})?;
println!("{}", written.display());
Ok(())
}
}
}
async fn run(nats_url: &str, bucket: &str) -> Result<()> {
// Retry on the initial connect — startup races against the NATS
// server becoming fully ready.
let nats = connect_with_retry(nats_url).await?;
tracing::info!(url = %nats_url, "connected to NATS");
let js = jetstream::new(nats);
let desired_state_kv = js
.create_key_value(jetstream::kv::Config {
bucket: bucket.to_string(),
..Default::default()
})
.await?;
tracing::info!(bucket = %bucket, "KV bucket ready");
let client = Client::try_default().await?;
// Three concurrent tasks:
// controller — CR validation + finalizer-cleanup
// device_reconciler — NATS device-info → Device CR
// fleet_aggregator — watches Deployments + Devices + states,
// writes desired-state KV, patches CR status
// Any failing tears the process down; kube-rs Controller swallows
// its own transient reconcile errors.
let ctl_client = client.clone();
let dr_client = client.clone();
let dr_js = js.clone();
tokio::select! {
r = controller::run(ctl_client, desired_state_kv) => r,
r = device_reconciler::run(dr_client, dr_js) => r,
r = fleet_aggregator::run(client, js) => r,
}
}
async fn connect_with_retry(nats_url: &str) -> Result<async_nats::Client> {
use std::time::Duration;
let mut last_err: Option<anyhow::Error> = None;
for attempt in 0..15 {
match async_nats::connect(nats_url).await {
Ok(c) => return Ok(c),
Err(e) => {
tracing::warn!(attempt, error = %e, "NATS connect failed; retrying");
last_err = Some(e.into());
tokio::time::sleep(Duration::from_secs(2)).await;
}
}
}
Err(last_err.unwrap_or_else(|| anyhow::anyhow!("NATS connect failed after retries")))
}

301
fleet/scripts/load-test.sh Executable file
View File

@@ -0,0 +1,301 @@
#!/usr/bin/env bash
# Load-test harness for the Harmony fleet operator's fleet_aggregator.
#
# Brings up the minimum stack (k3d + in-cluster NATS + CRD + operator)
# with no VM or real agent, then runs the `fleet_load_test` binary
# which simulates N devices pushing DeploymentState to NATS.
#
# All stable paths under $WORK_DIR (default /tmp/fleet-load-test) so you
# can point kubectl / tail at them while the test is running.
#
# Quick usage:
# fleet/scripts/load-test.sh # 100-device default (55 + 9×5)
# HOLD=1 fleet/scripts/load-test.sh # leave stack running for exploration
# DEVICES=10000 GROUP_SIZES=5500,500,500,500,500,500,500,500,500,500 \
# DURATION=90 fleet/scripts/load-test.sh
#
# While it's running, in another terminal:
# export KUBECONFIG=/tmp/fleet-load-test/kubeconfig
# kubectl get deployments.fleet.nationtech.io -A -w
# kubectl get deployments.fleet.nationtech.io -A \
# -o custom-columns=NAME:.metadata.name,RUN:.status.aggregate.succeeded,FAIL:.status.aggregate.failed,PEND:.status.aggregate.pending
# tail -f /tmp/fleet-load-test/operator.log
#
# Set DEBUG=1 to bump RUST_LOG so the operator logs every status patch.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
OPERATOR_DIR="$REPO_ROOT/fleet/harmony-fleet-operator"
# ---- config -----------------------------------------------------------------
K3D_BIN="${K3D_BIN:-$HOME/.local/share/harmony/k3d/k3d}"
CLUSTER_NAME="${CLUSTER_NAME:-fleet-load}"
NATS_NAMESPACE="${NATS_NAMESPACE:-fleet-system}"
NATS_NAME="${NATS_NAME:-fleet-nats}"
NATS_NODE_PORT="${NATS_NODE_PORT:-4222}"
NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}"
DEVICES="${DEVICES:-100}"
GROUP_SIZES="${GROUP_SIZES:-55,5,5,5,5,5,5,5,5,5}"
TICK_MS="${TICK_MS:-1000}"
DURATION="${DURATION:-60}"
NAMESPACE="${NAMESPACE:-fleet-load}"
# Keep the stack alive after the test completes so the user can poke
# at CRs + NATS interactively. Ctrl-C to tear everything down.
HOLD="${HOLD:-0}"
# Stable working dir so kubectl + tail targets are predictable.
WORK_DIR="${WORK_DIR:-/tmp/fleet-load-test}"
mkdir -p "$WORK_DIR"
KUBECONFIG_FILE="$WORK_DIR/kubeconfig"
OPERATOR_LOG="$WORK_DIR/operator.log"
CHART_DIR="$WORK_DIR/chart"
OPERATOR_IMAGE="${OPERATOR_IMAGE:-localhost/harmony-fleet-operator:latest}"
OPERATOR_NAMESPACE="${OPERATOR_NAMESPACE:-fleet-system}"
OPERATOR_RELEASE="${OPERATOR_RELEASE:-harmony-fleet-operator}"
OPERATOR_PID="" # unused in the helm path; kept so older trap-cleanup logic doesn't choke.
log() { printf '\033[1;34m[load-test]\033[0m %s\n' "$*"; }
fail() { printf '\033[1;31m[load-test FAIL]\033[0m %s\n' "$*" >&2; exit 1; }
dump_operator_log() {
[[ -n "$KUBECONFIG" && -f "$KUBECONFIG" ]] || return 0
kubectl -n "$OPERATOR_NAMESPACE" logs "deployment/$OPERATOR_RELEASE" \
--tail=1000 >"$OPERATOR_LOG" 2>/dev/null || true
}
cleanup() {
local rc=$?
log "cleanup…"
# Capture the operator's in-cluster log before we kill the
# cluster, so the tail-on-failure hook has something to show.
dump_operator_log
"$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true
if [[ $rc -ne 0 && -s "$OPERATOR_LOG" ]]; then
log "operator log at $OPERATOR_LOG (kept for inspection)"
echo "----- operator log tail -----"
tail -n 60 "$OPERATOR_LOG" 2>/dev/null || true
elif [[ -s "$OPERATOR_LOG" ]]; then
log "operator log at $OPERATOR_LOG"
fi
exit $rc
}
trap cleanup EXIT INT TERM
require() { command -v "$1" >/dev/null 2>&1 || fail "missing required tool: $1"; }
require cargo
require kubectl
require podman
require docker
require helm
[[ -x "$K3D_BIN" ]] || fail "k3d binary not executable at $K3D_BIN"
# ---- phase 1: k3d cluster ---------------------------------------------------
log "phase 1: create k3d cluster '$CLUSTER_NAME' (host port $NATS_NODE_PORT → loadbalancer)"
"$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true
"$K3D_BIN" cluster create "$CLUSTER_NAME" \
--wait --timeout 90s \
-p "${NATS_NODE_PORT}:${NATS_NODE_PORT}@loadbalancer" \
>/dev/null
"$K3D_BIN" kubeconfig get "$CLUSTER_NAME" > "$KUBECONFIG_FILE"
export KUBECONFIG="$KUBECONFIG_FILE"
# ---- phase 2: NATS in-cluster ------------------------------------------------
log "phase 2a: sideload NATS image ($NATS_IMAGE)"
if ! docker image inspect "$NATS_IMAGE" >/dev/null 2>&1; then
if ! podman image inspect "$NATS_IMAGE" >/dev/null 2>&1; then
podman pull "$NATS_IMAGE" >/dev/null || fail "podman pull $NATS_IMAGE failed"
fi
tmptar="$(mktemp -t nats-image.XXXXXX.tar)"
podman save "$NATS_IMAGE" -o "$tmptar" >/dev/null
docker load -i "$tmptar" >/dev/null
rm -f "$tmptar"
fi
"$K3D_BIN" image import "$NATS_IMAGE" -c "$CLUSTER_NAME" >/dev/null
log "phase 2b: install NATS via NatsBasicScore"
(
cd "$REPO_ROOT"
cargo run -q --release -p example_fleet_nats_install -- \
--namespace "$NATS_NAMESPACE" \
--name "$NATS_NAME" \
--expose load-balancer
)
# The upstream nats/nats helm chart provisions a StatefulSet, not a
# Deployment. Waiting on the pod-label condition works across both
# shapes without hardcoding a workload kind.
kubectl -n "$NATS_NAMESPACE" wait --for=condition=Ready \
"pod" -l "app.kubernetes.io/name=nats" --timeout=180s >/dev/null
log "probing nats://localhost:$NATS_NODE_PORT end-to-end"
for _ in $(seq 1 60); do
(echo >"/dev/tcp/127.0.0.1/$NATS_NODE_PORT") 2>/dev/null && break
sleep 1
done
(echo >"/dev/tcp/127.0.0.1/$NATS_NODE_PORT") 2>/dev/null \
|| fail "TCP localhost:$NATS_NODE_PORT never came up"
# ---- phase 3: operator container image + helm install ---------------------
log "phase 3a: build operator release binary"
(
cd "$REPO_ROOT"
cargo build -q --release -p harmony-fleet-operator
)
log "phase 3b: build container image $OPERATOR_IMAGE"
# The workspace's top-level .dockerignore excludes target/, which is
# the right default for most container builds but exactly what we
# need here. Stage the release binary into a dedicated clean build
# context so the Dockerfile's COPY sees it.
IMAGE_CTX="$WORK_DIR/image-ctx"
rm -rf "$IMAGE_CTX"
mkdir -p "$IMAGE_CTX/target/release"
cp "$REPO_ROOT/target/release/harmony-fleet-operator" "$IMAGE_CTX/target/release/harmony-fleet-operator"
cp "$REPO_ROOT/fleet/harmony-fleet-operator/Dockerfile" "$IMAGE_CTX/Dockerfile"
podman build -q -t "$OPERATOR_IMAGE" "$IMAGE_CTX" >/dev/null
log "phase 3c: sideload operator image into k3d cluster"
tmptar="$(mktemp -t harmony-fleet-operator-image.XXXXXX.tar)"
podman save "$OPERATOR_IMAGE" -o "$tmptar" >/dev/null
docker load -i "$tmptar" >/dev/null
rm -f "$tmptar"
"$K3D_BIN" image import "$OPERATOR_IMAGE" -c "$CLUSTER_NAME" >/dev/null
log "phase 3d: generate helm chart + install operator in-cluster"
# DEBUG=1 bumps operator logging so `kubectl logs` prints every
# status patch + transition.
if [[ "${DEBUG:-0}" == "1" ]]; then
OPERATOR_RUST_LOG="debug,async_nats=warn,hyper=warn,rustls=warn,kube=info"
else
OPERATOR_RUST_LOG="info,kube_runtime=warn"
fi
rm -rf "$CHART_DIR"
mkdir -p "$CHART_DIR"
(
cd "$OPERATOR_DIR"
cargo run -q -- chart \
--output "$CHART_DIR" \
--image "$OPERATOR_IMAGE" \
--image-pull-policy IfNotPresent \
--namespace "$OPERATOR_NAMESPACE" \
--nats-url "nats://${NATS_NAME}.${NATS_NAMESPACE}:4222" \
--log-level "$OPERATOR_RUST_LOG"
) >/dev/null
helm upgrade --install "$OPERATOR_RELEASE" "$CHART_DIR/$OPERATOR_RELEASE" \
--namespace "$OPERATOR_NAMESPACE" \
--create-namespace \
--wait --timeout 120s >/dev/null
kubectl wait --for=condition=Established \
"crd/deployments.fleet.nationtech.io" --timeout=30s >/dev/null
kubectl wait --for=condition=Established \
"crd/devices.fleet.nationtech.io" --timeout=30s >/dev/null
kubectl -n "$OPERATOR_NAMESPACE" wait --for=condition=Available \
"deployment/$OPERATOR_RELEASE" --timeout=120s >/dev/null
# Seed the operator log file from the pod so HOLD=1 banner + final
# summary both have something to read. We re-dump on cleanup.
dump_operator_log
# ---- explore banner (before the load run so the user can start watching) ----
print_banner() {
cat <<EOF
$(printf '\033[1;32m[load-test]\033[0m stack ready. In another terminal:')
$(printf '\033[1mPoint kubectl at the k3d cluster:\033[0m')
export KUBECONFIG=$KUBECONFIG_FILE
$(printf '\033[1mWatch CRs as they update:\033[0m')
kubectl -n $NAMESPACE get deployments.fleet.nationtech.io -w
$(printf '\033[1mSnapshot aggregate columns:\033[0m')
kubectl -n $NAMESPACE get deployments.fleet.nationtech.io \\
-o custom-columns=NAME:.metadata.name,MATCHED:.status.aggregate.matchedDeviceCount,OK:.status.aggregate.succeeded,FAIL:.status.aggregate.failed,PEND:.status.aggregate.pending,LAST_ERR:.status.aggregate.lastError.message
$(printf '\033[1mInspect a Deployment spec (no device list — selector only):\033[0m')
kubectl -n $NAMESPACE get deployments.fleet.nationtech.io/load-group-00 -o jsonpath='{.spec}' | jq
$(printf '\033[1mFull CR status JSON for one CR:\033[0m')
kubectl -n $NAMESPACE get deployments.fleet.nationtech.io/load-group-00 -o jsonpath='{.status.aggregate}' | jq
$(printf '\033[1mList Devices + filter by label:\033[0m')
kubectl get devices.fleet.nationtech.io | head -20
kubectl get devices.fleet.nationtech.io -l group=load-group-00 | head -10
kubectl get device.fleet.nationtech.io load-dev-00001 -o yaml
$(printf '\033[1mOperator log (in-cluster pod):\033[0m')
kubectl -n $OPERATOR_NAMESPACE logs -f deployment/$OPERATOR_RELEASE
# or the last snapshot dumped by the harness:
tail -F $OPERATOR_LOG
$(printf '\033[1mPeek at NATS KV directly (natsbox):\033[0m')
alias natsbox='podman run --rm docker.io/natsio/nats-box:latest nats --server nats://host.containers.internal:$NATS_NODE_PORT'
natsbox kv ls device-state
natsbox kv get device-state 'state.load-dev-00001.load-group-00' --raw
natsbox kv ls device-heartbeat
natsbox kv get device-heartbeat 'heartbeat.load-dev-00001' --raw
EOF
}
print_banner
# ---- phase 5: load test ------------------------------------------------------
log "phase 5: run fleet_load_test (devices=$DEVICES, tick=${TICK_MS}ms, duration=${DURATION}s)"
(
cd "$REPO_ROOT"
cargo build -q --release -p example_fleet_load_test
)
# `--no-cleanup` keeps the CRs + KV entries around after the run so
# you can inspect steady-state aggregate numbers after duration elapses.
LOAD_ARGS=(
--nats-url "nats://localhost:$NATS_NODE_PORT"
--namespace "$NAMESPACE"
--groups "$GROUP_SIZES"
--tick-ms "$TICK_MS"
--duration-s "$DURATION"
)
if [[ "$HOLD" == "1" ]]; then
LOAD_ARGS+=(--keep)
fi
RUST_LOG="info" "$REPO_ROOT/target/release/fleet_load_test" "${LOAD_ARGS[@]}"
# ---- phase 6: operator log stats --------------------------------------------
log "phase 6: operator log summary"
dump_operator_log
patches="$(grep -c "aggregator: status patched" "$OPERATOR_LOG" 2>/dev/null || echo 0)"
warnings="$(grep -c " WARN " "$OPERATOR_LOG" 2>/dev/null || echo 0)"
errors="$(grep -c " ERROR " "$OPERATOR_LOG" 2>/dev/null || echo 0)"
log " CR status patches logged (DEBUG-level; use DEBUG=1 to surface): $patches"
log " operator warnings: $warnings errors: $errors"
if [[ "$errors" -gt 0 ]]; then
echo "----- operator error lines -----"
grep " ERROR " "$OPERATOR_LOG" | tail -20
fi
# ---- hold open (optional) ---------------------------------------------------
if [[ "$HOLD" == "1" ]]; then
print_banner
log "HOLD=1 — stack is still running. Ctrl-C to tear down."
# Block until user interrupts; cleanup trap does the teardown.
while true; do sleep 60; done
fi
log "PASS"

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env bash
# End-to-end smoke test for the IoT walking skeleton (ROADMAP/iot_platform/
# End-to-end smoke test for the IoT walking skeleton (ROADMAP/fleet_platform/
# v0_walking_skeleton.md §9.A1 and §5.4 agent dispatch).
#
# Deployment CR ─apply─▶ operator ─KV put─▶ NATS ◀─watch─ agent ─podman─▶ nginx
@@ -22,25 +22,25 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
OPERATOR_DIR="$REPO_ROOT/iot/iot-operator-v0"
AGENT_DIR="$REPO_ROOT/iot/iot-agent-v0"
OPERATOR_DIR="$REPO_ROOT/fleet/harmony-fleet-operator"
AGENT_DIR="$REPO_ROOT/fleet/harmony-fleet-agent"
K3D_BIN="${K3D_BIN:-$HOME/.local/share/harmony/k3d/k3d}"
CLUSTER_NAME="${CLUSTER_NAME:-iot-smoke}"
NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats}"
NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net}"
CLUSTER_NAME="${CLUSTER_NAME:-fleet-smoke}"
NATS_CONTAINER="${NATS_CONTAINER:-fleet-smoke-nats}"
NATS_NET_NAME="${NATS_NET_NAME:-fleet-smoke-net}"
NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}"
NATSBOX_IMAGE="${NATSBOX_IMAGE:-docker.io/natsio/nats-box:latest}"
NATS_PORT="${NATS_PORT:-4222}"
TARGET_DEVICE="${TARGET_DEVICE:-pi-demo-01}"
DEPLOY_NAME="${DEPLOY_NAME:-hello-world}"
DEPLOY_NS="${DEPLOY_NS:-iot-demo}"
DEPLOY_NS="${DEPLOY_NS:-fleet-demo}"
HELLO_CONTAINER="${HELLO_CONTAINER:-hello}"
HELLO_PORT="${HELLO_PORT:-8080}"
OPERATOR_LOG="$(mktemp -t iot-operator.XXXXXX.log)"
OPERATOR_LOG="$(mktemp -t harmony-fleet-operator.XXXXXX.log)"
OPERATOR_PID=""
AGENT_LOG="$(mktemp -t iot-agent.XXXXXX.log)"
AGENT_LOG="$(mktemp -t fleet-agent.XXXXXX.log)"
AGENT_PID=""
AGENT_CONFIG_FILE=""
KUBECONFIG_FILE=""
@@ -126,13 +126,13 @@ log "phase 2: create k3d cluster '$CLUSTER_NAME'"
"$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true
"$K3D_BIN" cluster create "$CLUSTER_NAME" --wait --timeout 90s >/dev/null
KUBECONFIG_FILE="$(mktemp -t iot-smoke-kubeconfig.XXXXXX)"
KUBECONFIG_FILE="$(mktemp -t fleet-smoke-kubeconfig.XXXXXX)"
"$K3D_BIN" kubeconfig get "$CLUSTER_NAME" > "$KUBECONFIG_FILE"
export KUBECONFIG="$KUBECONFIG_FILE"
log "install CRD via operator's install subcommand (typed Rust — no yaml, no kubectl apply)"
( cd "$OPERATOR_DIR" && cargo run -q -- install ) >/dev/null
kubectl wait --for=condition=Established "crd/deployments.iot.nationtech.io" --timeout=30s >/dev/null
kubectl wait --for=condition=Established "crd/deployments.fleet.nationtech.io" --timeout=30s >/dev/null
kubectl get ns "$DEPLOY_NS" >/dev/null 2>&1 || kubectl create namespace "$DEPLOY_NS" >/dev/null
@@ -142,7 +142,7 @@ kubectl get ns "$DEPLOY_NS" >/dev/null 2>&1 || kubectl create namespace "$DEPLOY
###############################################################################
log "phase 2b: apiserver rejects invalid score.type"
BAD_CR=$(cat <<EOF
apiVersion: iot.nationtech.io/v1alpha1
apiVersion: fleet.nationtech.io/v1alpha1
kind: Deployment
metadata:
name: bad-discriminator
@@ -163,8 +163,8 @@ else
fail "expected CEL rejection for score.type='has spaces'; got: $BAD_OUT"
fi
# Belt-and-braces: make sure nothing was persisted
if kubectl -n "$DEPLOY_NS" get deployment.iot.nationtech.io bad-discriminator >/dev/null 2>&1; then
kubectl -n "$DEPLOY_NS" delete deployment.iot.nationtech.io bad-discriminator >/dev/null 2>&1 || true
if kubectl -n "$DEPLOY_NS" get deployment.fleet.nationtech.io bad-discriminator >/dev/null 2>&1; then
kubectl -n "$DEPLOY_NS" delete deployment.fleet.nationtech.io bad-discriminator >/dev/null 2>&1 || true
fail "apiserver should have rejected 'bad-discriminator' but it was persisted"
fi
@@ -179,7 +179,7 @@ log "phase 3: start operator"
NATS_URL="nats://127.0.0.1:$NATS_PORT" \
KV_BUCKET="desired-state" \
RUST_LOG="info,kube_runtime=warn" \
"$REPO_ROOT/target/debug/iot-operator-v0" \
"$REPO_ROOT/target/debug/harmony-fleet-operator" \
>"$OPERATOR_LOG" 2>&1 &
OPERATOR_PID=$!
log "operator pid=$OPERATOR_PID (log: $OPERATOR_LOG)"
@@ -207,7 +207,7 @@ log "phase 3b: build + start agent"
# doesn't occupy the host port before we even start.
podman rm -f "$HELLO_CONTAINER" >/dev/null 2>&1 || true
AGENT_CONFIG_FILE="$(mktemp -t iot-agent-config.XXXXXX.toml)"
AGENT_CONFIG_FILE="$(mktemp -t fleet-agent-config.XXXXXX.toml)"
cat >"$AGENT_CONFIG_FILE" <<EOF
[agent]
device_id = "$TARGET_DEVICE"
@@ -221,9 +221,9 @@ nats_pass = "smoke"
urls = ["nats://127.0.0.1:$NATS_PORT"]
EOF
IOT_AGENT_CONFIG="$AGENT_CONFIG_FILE" \
FLEET_AGENT_CONFIG="$AGENT_CONFIG_FILE" \
RUST_LOG="info,async_nats=warn" \
"$REPO_ROOT/target/debug/iot-agent-v0" \
"$REPO_ROOT/target/debug/harmony-fleet-agent" \
>"$AGENT_LOG" 2>&1 &
AGENT_PID=$!
log "agent pid=$AGENT_PID (log: $AGENT_LOG)"
@@ -241,7 +241,7 @@ grep -q "watching KV keys" "$AGENT_LOG" \
###############################################################################
log "phase 4: apply Deployment CR"
cat <<EOF | kubectl apply -f - >/dev/null
apiVersion: iot.nationtech.io/v1alpha1
apiVersion: fleet.nationtech.io/v1alpha1
kind: Deployment
metadata:
name: $DEPLOY_NAME
@@ -276,7 +276,7 @@ echo "$KV_VALUE" | grep -q '"image":"docker.io/library/nginx:alpine"' \
log "wait for .status.observedScoreString"
OBSERVED=""
for _ in $(seq 1 30); do
OBSERVED="$(kubectl -n "$DEPLOY_NS" get deployment.iot.nationtech.io "$DEPLOY_NAME" \
OBSERVED="$(kubectl -n "$DEPLOY_NS" get deployment.fleet.nationtech.io "$DEPLOY_NAME" \
-o jsonpath='{.status.observedScoreString}' 2>/dev/null || true)"
[[ -n "$OBSERVED" ]] && break
sleep 1
@@ -315,7 +315,7 @@ log "nginx responded"
# phase 5 — delete CR, expect cleanup via finalizer + agent
###############################################################################
log "phase 5: delete Deployment CR — finalizer + agent should remove KV and container"
kubectl -n "$DEPLOY_NS" delete deployment.iot.nationtech.io "$DEPLOY_NAME" --wait=true >/dev/null
kubectl -n "$DEPLOY_NS" delete deployment.fleet.nationtech.io "$DEPLOY_NAME" --wait=true >/dev/null
log "wait for KV key removal"
for _ in $(seq 1 30); do

View File

@@ -4,7 +4,7 @@
# native KVM when the host is already arm64).
#
# This is exactly equivalent to:
# ARCH=aarch64 VM_NAME=iot-smoke-vm-arm ./smoke-a3.sh
# ARCH=aarch64 VM_NAME=fleet-smoke-vm-arm ./smoke-a3.sh
# with the VM name defaulted so it can live alongside an x86-64
# smoke run on the same host without clobbering libvirt state.
@@ -13,9 +13,9 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
export ARCH=aarch64
export VM_NAME="${VM_NAME:-iot-smoke-vm-arm}"
export VM_NAME="${VM_NAME:-fleet-smoke-vm-arm}"
export DEVICE_ID="${DEVICE_ID:-$VM_NAME}"
export NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats-a3-arm}"
export NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net-a3-arm}"
export NATS_CONTAINER="${NATS_CONTAINER:-fleet-smoke-nats-a3-arm}"
export NATS_NET_NAME="${NATS_NET_NAME:-fleet-smoke-net-a3-arm}"
exec "$SCRIPT_DIR/smoke-a3.sh" "$@"

View File

@@ -6,7 +6,7 @@
# ssh+Ansible ◀────┘
# │
# ▼
# IotDeviceSetupScore ──▶ podman + iot-agent on VM
# FleetDeviceSetupScore ──▶ podman + fleet-agent on VM
# │
# ▼
# existing operator ──NATS────────┘ (agent joins fleet, reconciles CR)
@@ -32,7 +32,7 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
VM_NAME="${VM_NAME:-iot-smoke-vm}"
VM_NAME="${VM_NAME:-fleet-smoke-vm}"
DEVICE_ID="${DEVICE_ID:-$VM_NAME}"
GROUP="${GROUP:-group-a}"
LIBVIRT_URI="${LIBVIRT_URI:-qemu:///system}"
@@ -43,8 +43,8 @@ LIBVIRT_URI="${LIBVIRT_URI:-qemu:///system}"
# target, phase 4 timeout.
ARCH="${ARCH:-x86-64}"
NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats-a3}"
NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net-a3}"
NATS_CONTAINER="${NATS_CONTAINER:-fleet-smoke-nats-a3}"
NATS_NET_NAME="${NATS_NET_NAME:-fleet-smoke-net-a3}"
NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}"
NATS_PORT="${NATS_PORT:-4222}"
@@ -99,20 +99,20 @@ NAT_GW="$(virsh --connect "$LIBVIRT_URI" net-dumpxml default \
log "libvirt network gateway = $NAT_GW (VM will dial NATS at nats://$NAT_GW:$NATS_PORT)"
# ---------------------------- phase 2: build ---------------------------
log "phase 2: build iot-agent-v0 for guest arch=$ARCH (release — debug binary fills cloud rootfs)"
log "phase 2: build harmony-fleet-agent for guest arch=$ARCH (release — debug binary fills cloud rootfs)"
(
cd "$REPO_ROOT"
if [[ -n "$AGENT_TARGET" ]]; then
rustup target add "$AGENT_TARGET" >/dev/null
cargo build -q --release --target "$AGENT_TARGET" -p iot-agent-v0
cargo build -q --release --target "$AGENT_TARGET" -p harmony-fleet-agent
else
cargo build -q --release -p iot-agent-v0
cargo build -q --release -p harmony-fleet-agent
fi
)
if [[ -n "$AGENT_TARGET" ]]; then
AGENT_BINARY="$REPO_ROOT/target/$AGENT_TARGET/release/iot-agent-v0"
AGENT_BINARY="$REPO_ROOT/target/$AGENT_TARGET/release/harmony-fleet-agent"
else
AGENT_BINARY="$REPO_ROOT/target/release/iot-agent-v0"
AGENT_BINARY="$REPO_ROOT/target/release/harmony-fleet-agent"
fi
[[ -f "$AGENT_BINARY" ]] || fail "agent binary missing after build: $AGENT_BINARY"
@@ -120,7 +120,7 @@ fi
log "phase 3: bootstrap assets + provision VM + onboard device (arch=$EXAMPLE_ARCH)"
(
cd "$REPO_ROOT"
cargo run -q --release -p example_iot_vm_setup -- \
cargo run -q --release -p example_fleet_vm_setup -- \
--arch "$EXAMPLE_ARCH" \
--vm-name "$VM_NAME" \
--device-id "$DEVICE_ID" \
@@ -136,34 +136,34 @@ case "$ARCH" in
aarch64|arm64) STATUS_TIMEOUT=300 ;;
*) STATUS_TIMEOUT=60 ;;
esac
log "phase 4: wait for agent to report status to NATS (timeout=${STATUS_TIMEOUT}s)"
log "phase 4: wait for agent to report heartbeat to NATS (timeout=${STATUS_TIMEOUT}s)"
wait_for_status() {
local timeout=$1
for _ in $(seq 1 "$timeout"); do
if podman run --rm --network "$NATS_NET_NAME" \
docker.io/natsio/nats-box:latest \
nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \
"status.$DEVICE_ID" --raw >/dev/null 2>&1; then
nats --server "nats://$NATS_CONTAINER:4222" kv get device-heartbeat \
"heartbeat.$DEVICE_ID" --raw >/dev/null 2>&1; then
return 0
fi
sleep 1
done
return 1
}
wait_for_status "$STATUS_TIMEOUT" || fail "agent-status never appeared for $DEVICE_ID"
log "agent status present on NATS"
wait_for_status "$STATUS_TIMEOUT" || fail "device-heartbeat never appeared for $DEVICE_ID"
log "agent heartbeat present on NATS"
# ---------------------------- phase 5: hard power-cycle, expect recovery ----------------------------
log "phase 5: power-cycle VM (virsh destroy + start) → agent must reconnect to NATS"
nats_status_timestamp() {
# Prints the "timestamp" field of the status.<device> entry, or "".
# Prints the "at" field of the heartbeat.<device> entry, or "".
# Never errors (for `set -e` safety).
podman run --rm --network "$NATS_NET_NAME" \
docker.io/natsio/nats-box:latest \
nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \
"status.$DEVICE_ID" --raw 2>/dev/null \
| grep -oE '"timestamp":"[^"]+"' \
nats --server "nats://$NATS_CONTAINER:4222" kv get device-heartbeat \
"heartbeat.$DEVICE_ID" --raw 2>/dev/null \
| grep -oE '"at":"[^"]+"' \
| head -1 | cut -d'"' -f4 || true
}

529
fleet/scripts/smoke-a4.sh Executable file
View File

@@ -0,0 +1,529 @@
#!/usr/bin/env bash
# End-to-end hands-on demo: operator + in-cluster NATS + ARM VM agent.
#
# [k3d cluster]
# ├── NATS (single-node, NodePort 4222)
# └── CRD: fleet.nationtech.io/v1alpha1/Deployment
# ▲
# │ kubectl apply / harmony_apply_deployment
# │
# [host]
# ├── operator (cargo run) ──▶ NATS KV desired-state
# └── libvirt VM
# └── fleet-agent ──▶ NATS KV (watch) ──▶ podman container
#
# By default the script brings the whole stack up, applies no
# Deployment CR, prints a "command menu" of user-runnable one-liners,
# and blocks on Ctrl-C. With `--auto`, it also drives an apply +
# upgrade + delete cycle for regression coverage.
#
# Prereqs on the runner host (one-time, generic):
# 1. podman (rootless), cargo, kubectl, virsh, xorriso, python3,
# libvirt, qemu-system-x86_64/aarch64 + edk2 firmware for the
# chosen ARCH.
# 2. Be in the `libvirt` group.
# 3. `sudo virsh net-start default` (once per boot unless autostart).
# 4. Rootless podman user socket running:
# `systemctl --user start podman.socket`.
# 5. k3d binary at $K3D_BIN (defaults to Harmony's downloaded copy).
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
OPERATOR_DIR="$REPO_ROOT/fleet/harmony-fleet-operator"
# ---- config -----------------------------------------------------------------
K3D_BIN="${K3D_BIN:-$HOME/.local/share/harmony/k3d/k3d}"
CLUSTER_NAME="${CLUSTER_NAME:-fleet-demo}"
ARCH="${ARCH:-x86-64}"
VM_NAME="${VM_NAME:-fleet-demo-vm}"
DEVICE_ID="${DEVICE_ID:-$VM_NAME}"
GROUP="${GROUP:-group-a}"
LIBVIRT_URI="${LIBVIRT_URI:-qemu:///system}"
NATS_NAMESPACE="${NATS_NAMESPACE:-fleet-system}"
NATS_NAME="${NATS_NAME:-fleet-nats}"
NATS_NODE_PORT="${NATS_NODE_PORT:-4222}"
DEPLOY_NS="${DEPLOY_NS:-fleet-demo}"
DEPLOY_NAME="${DEPLOY_NAME:-hello-world}"
DEPLOY_PORT="${DEPLOY_PORT:-8080:80}"
# Source image we sideload into the VM's podman. Defaults to the
# `nginx:alpine` variant (~60 MB) which is almost always cached on
# dev boxes and keeps TCG-aarch64 boot budgets sane. The tarball
# transport + podman IfNotPresent semantics mean the agent never
# hits a public registry for this image.
SRC_IMAGE="${SRC_IMAGE:-docker.io/library/nginx:alpine}"
AUTO=0
[[ "${1:-}" == "--auto" ]] && AUTO=1
OPERATOR_LOG="$(mktemp -t harmony-fleet-operator.XXXXXX.log)"
OPERATOR_PID=""
KUBECONFIG_FILE=""
# ---- arch demux -------------------------------------------------------------
case "$ARCH" in
x86-64|x86_64)
EXAMPLE_ARCH=x86-64
AGENT_TARGET=
# Native-KVM x86: podman pull + layer unpack is seconds.
CONTAINER_WAIT_STEPS=90 # 180 s
;;
aarch64|arm64)
EXAMPLE_ARCH=aarch64
AGENT_TARGET=aarch64-unknown-linux-gnu
# TCG aarch64: network stack + userns layer unpack run
# ~3-5× slower than native. An `nginx:latest` pull (~250 MB)
# on a cold image takes 4-8 min observed here. Give it 15.
CONTAINER_WAIT_STEPS=450 # 900 s
;;
*) printf '[smoke-a4 FAIL] unsupported ARCH=%s (expected: x86-64 | aarch64)\n' "$ARCH" >&2; exit 1 ;;
esac
log() { printf '\033[1;34m[smoke-a4]\033[0m %s\n' "$*"; }
fail() { printf '\033[1;31m[smoke-a4 FAIL]\033[0m %s\n' "$*" >&2; exit 1; }
cleanup() {
local rc=$?
log "cleanup…"
if [[ -n "$OPERATOR_PID" ]] && kill -0 "$OPERATOR_PID" 2>/dev/null; then
kill "$OPERATOR_PID" 2>/dev/null || true
wait "$OPERATOR_PID" 2>/dev/null || true
fi
if [[ "${KEEP:-0}" != "1" ]]; then
virsh --connect "$LIBVIRT_URI" destroy "$VM_NAME" 2>/dev/null || true
virsh --connect "$LIBVIRT_URI" undefine --nvram \
--remove-all-storage "$VM_NAME" 2>/dev/null || true
"$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true
[[ -n "$KUBECONFIG_FILE" ]] && rm -f "$KUBECONFIG_FILE"
else
log "KEEP=1 — leaving cluster '$CLUSTER_NAME' and VM '$VM_NAME' running"
[[ -n "$KUBECONFIG_FILE" ]] && log "KUBECONFIG=$KUBECONFIG_FILE"
fi
if [[ $rc -ne 0 && -s "$OPERATOR_LOG" ]]; then
log "operator log at $OPERATOR_LOG"
echo "----- operator log tail -----"
tail -n 40 "$OPERATOR_LOG" 2>/dev/null || true
else
rm -f "$OPERATOR_LOG"
fi
exit $rc
}
trap cleanup EXIT INT TERM
require() { command -v "$1" >/dev/null 2>&1 || fail "missing required tool: $1"; }
require cargo
require kubectl
require virsh
require podman
require docker # cross-runtime image transfer for k3d sideload
[[ -x "$K3D_BIN" ]] || fail "k3d binary not executable at $K3D_BIN (set K3D_BIN=…)"
# ---- phase 1: k3d cluster with NATS port exposed ----------------------------
log "phase 1: create k3d cluster '$CLUSTER_NAME' (host port $NATS_NODE_PORT → loadbalancer)"
"$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true
"$K3D_BIN" cluster create "$CLUSTER_NAME" \
--wait --timeout 90s \
-p "${NATS_NODE_PORT}:${NATS_NODE_PORT}@loadbalancer" \
>/dev/null
KUBECONFIG_FILE="$(mktemp -t fleet-demo-kubeconfig.XXXXXX)"
"$K3D_BIN" kubeconfig get "$CLUSTER_NAME" > "$KUBECONFIG_FILE"
export KUBECONFIG="$KUBECONFIG_FILE"
# ---- phase 2: NATS in-cluster via NatsBasicScore ----------------------------
NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}"
# Sideload the NATS image into k3d so the install doesn't race the
# Docker Hub rate limiter. `docker inspect` + `podman save` + `docker
# load` is the cross-runtime bridge on hosts that have both (rootful
# docker for k3d, rootless podman for IoT smokes). Cheap when the
# image is already in podman's store; a one-time Hub pull when not.
log "phase 2a: sideload NATS image ($NATS_IMAGE) into k3d cluster"
if ! docker image inspect "$NATS_IMAGE" >/dev/null 2>&1; then
if ! podman image inspect "$NATS_IMAGE" >/dev/null 2>&1; then
log "NATS image not cached locally — pulling from Docker Hub"
podman pull "$NATS_IMAGE" >/dev/null || fail "podman pull $NATS_IMAGE failed"
fi
tmptar="$(mktemp -t nats-image.XXXXXX.tar)"
podman save "$NATS_IMAGE" -o "$tmptar" >/dev/null
docker load -i "$tmptar" >/dev/null
rm -f "$tmptar"
fi
"$K3D_BIN" image import "$NATS_IMAGE" -c "$CLUSTER_NAME" >/dev/null
log "phase 2b: install NATS in-cluster via NatsBasicScore (namespace=$NATS_NAMESPACE, expose=load-balancer)"
(
cd "$REPO_ROOT"
cargo run -q --release -p example_fleet_nats_install -- \
--namespace "$NATS_NAMESPACE" \
--name "$NATS_NAME" \
--expose load-balancer
)
log "waiting for NATS Deployment to be Available"
kubectl -n "$NATS_NAMESPACE" wait --for=condition=Available \
"deployment/$NATS_NAME" --timeout=120s >/dev/null
# kubectl "Available" reports on pod readiness — k3d's klipper-lb
# takes a further few seconds to wire the host loadbalancer port to
# the Service endpoints. Probe the actual TCP port from the host
# before declaring NATS routable, else the operator's connect will
# race and die with "expected INFO, got nothing."
log "probing nats://localhost:$NATS_NODE_PORT end-to-end"
for _ in $(seq 1 60); do
if (echo >"/dev/tcp/127.0.0.1/$NATS_NODE_PORT") 2>/dev/null; then
break
fi
sleep 1
done
(echo >"/dev/tcp/127.0.0.1/$NATS_NODE_PORT") 2>/dev/null \
|| fail "TCP localhost:$NATS_NODE_PORT never came up after Deployment Available"
# ---- phase 3: install Deployment CRD via operator's Score-based install -----
log "phase 3: install Deployment CRD via operator \`install\` subcommand"
(
cd "$OPERATOR_DIR"
cargo run -q -- install
)
kubectl wait --for=condition=Established \
"crd/deployments.fleet.nationtech.io" --timeout=30s >/dev/null
kubectl get ns "$DEPLOY_NS" >/dev/null 2>&1 || \
kubectl create namespace "$DEPLOY_NS" >/dev/null
# ---- phase 4: operator running host-side ------------------------------------
log "phase 4: start operator (host-side) connected to nats://localhost:$NATS_NODE_PORT"
(
cd "$OPERATOR_DIR"
cargo build -q --release
)
NATS_URL="nats://localhost:$NATS_NODE_PORT" \
KV_BUCKET="desired-state" \
RUST_LOG="info,kube_runtime=warn" \
"$REPO_ROOT/target/release/harmony-fleet-operator" \
>"$OPERATOR_LOG" 2>&1 &
OPERATOR_PID=$!
log "operator pid=$OPERATOR_PID (log: $OPERATOR_LOG)"
for _ in $(seq 1 30); do
if grep -q "starting Deployment controller" "$OPERATOR_LOG"; then break; fi
if ! kill -0 "$OPERATOR_PID" 2>/dev/null; then fail "operator exited early"; fi
sleep 0.5
done
grep -q "starting Deployment controller" "$OPERATOR_LOG" \
|| fail "operator never logged 'starting Deployment controller'"
grep -q "KV bucket ready" "$OPERATOR_LOG" \
|| fail "operator never confirmed KV bucket ready"
# ---- phase 4.5: export the workload image to a tarball ----------------------
# Instead of running a local OCI registry (which needs `registry:2` from
# Docker Hub — rate-limited!), sideload the image straight into the VM's
# podman via `podman save`/`scp`/`podman load`. Paired with harmony's
# `PodmanTopology::ensure_image_present` (IfNotPresent semantics: present
# = skip pull), the agent never touches a public registry for known
# images. This is the same compounding-framework-value move as the k3d
# NATS sideload in phase 2a.
NAT_GW="$(virsh --connect "$LIBVIRT_URI" net-dumpxml default \
| grep -oP "ip address='\K[^']+" | head -1)"
[[ -n "$NAT_GW" ]] || fail "couldn't determine libvirt 'default' gateway IP"
log "libvirt network gateway = $NAT_GW (VM agent will dial nats://$NAT_GW:$NATS_NODE_PORT)"
log "phase 4.5: export $SRC_IMAGE to a local tarball for VM sideload"
# Arch the VM expects.
case "$ARCH" in
x86-64|x86_64) EXPECTED_IMAGE_ARCH=amd64 ;;
aarch64|arm64) EXPECTED_IMAGE_ARCH=arm64 ;;
esac
if ! podman image inspect "$SRC_IMAGE" >/dev/null 2>&1; then
log "source image $SRC_IMAGE not cached — attempting pull (platform=$EXPECTED_IMAGE_ARCH)"
podman pull --platform="linux/$EXPECTED_IMAGE_ARCH" "$SRC_IMAGE" >/dev/null || \
fail "podman pull $SRC_IMAGE failed (Docker Hub rate limit?). \
Pre-pull it when the quota is available (\`podman pull --platform=linux/$EXPECTED_IMAGE_ARCH $SRC_IMAGE\`), then re-run."
fi
# Verify arch matches. A podman cache shared across ARCH= runs can
# end up with a tag pointing at the wrong arch (pulling
# \`nginx:alpine\` for arm64 overwrites the tag's arm64/amd64
# binding). Better to fail loudly here than ship the VM an image
# it can't exec.
IMAGE_ACTUAL_ARCH="$(podman inspect "$SRC_IMAGE" --format '{{.Architecture}}' 2>/dev/null || true)"
if [[ "$IMAGE_ACTUAL_ARCH" != "$EXPECTED_IMAGE_ARCH" ]]; then
fail "$SRC_IMAGE is arch '$IMAGE_ACTUAL_ARCH' but ARCH=$ARCH needs '$EXPECTED_IMAGE_ARCH'. \
Either pre-pull the right platform (\`podman pull --platform=linux/$EXPECTED_IMAGE_ARCH $SRC_IMAGE\`) \
or point SRC_IMAGE at a locally-tagged variant."
fi
# The smoke upgrade test asserts container id change on image-tag
# change, so we'll expose two distinct local tag names pointing at
# the same bits. Tagging happens on the VM side after `podman load`
# so we stay compatible with older podman versions that don't grok
# the multi-image archive format (`podman save -m`).
V1_IMAGE="localdev/nginx:v1"
V2_IMAGE="localdev/nginx:v2"
IMAGE_TARBALL="$(mktemp -t fleet-demo-images.XXXXXX.tar)"
podman save -o "$IMAGE_TARBALL" "$SRC_IMAGE" >/dev/null \
|| fail "podman save failed"
log "exported $SRC_IMAGE$IMAGE_TARBALL ($(du -h "$IMAGE_TARBALL" | cut -f1))"
# ---- phase 5: provision VM + install agent ----------------------------------
log "phase 5: build harmony-fleet-agent for arch=$ARCH + provision VM"
(
cd "$REPO_ROOT"
if [[ -n "$AGENT_TARGET" ]]; then
rustup target add "$AGENT_TARGET" >/dev/null
cargo build -q --release --target "$AGENT_TARGET" -p harmony-fleet-agent
else
cargo build -q --release -p harmony-fleet-agent
fi
)
if [[ -n "$AGENT_TARGET" ]]; then
AGENT_BINARY="$REPO_ROOT/target/$AGENT_TARGET/release/harmony-fleet-agent"
else
AGENT_BINARY="$REPO_ROOT/target/release/harmony-fleet-agent"
fi
[[ -f "$AGENT_BINARY" ]] || fail "agent binary missing: $AGENT_BINARY"
(
cd "$REPO_ROOT"
# Pass through FLEET_VM_ADMIN_PASSWORD if set so the VM admin user
# accepts SSH password auth. Useful for chaos / reliability
# testing sessions where the operator wants to log in and break
# things on purpose. Unset by default = key-only auth.
cargo run -q --release -p example_fleet_vm_setup -- \
--arch "$EXAMPLE_ARCH" \
--vm-name "$VM_NAME" \
--device-id "$DEVICE_ID" \
--group "$GROUP" \
--agent-binary "$AGENT_BINARY" \
--nats-url "nats://$NAT_GW:$NATS_NODE_PORT"
)
VM_IP="$(virsh --connect "$LIBVIRT_URI" domifaddr "$VM_NAME" \
| awk '/ipv4/ { print $4 }' | head -1 | cut -d/ -f1)"
[[ -n "$VM_IP" ]] || fail "couldn't resolve VM IP"
# ---- phase 5c: sideload workload images into fleet-agent's podman -------------
log "phase 5c: sideload $V1_IMAGE + $V2_IMAGE into fleet-agent's podman on VM"
# scp the tarball (ssh as the admin user, the only one with sshd
# access), then `podman load` inside an fleet-agent user session.
# Post-load the fleet-agent's podman has both tags locally, so
# `ensure_image_present` in harmony's PodmanTopology takes the
# "already present, skip pull" branch — no Docker Hub hit.
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \
"$IMAGE_TARBALL" "fleet-admin@$VM_IP:/tmp/fleet-demo-images.tar" >/dev/null \
|| fail "scp image tarball to VM failed"
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \
"fleet-admin@$VM_IP" -- \
"sudo chown fleet-agent:fleet-agent /tmp/fleet-demo-images.tar && \
sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman load -i /tmp/fleet-demo-images.tar' && \
sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman tag $SRC_IMAGE $V1_IMAGE' && \
sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman tag $SRC_IMAGE $V2_IMAGE' && \
sudo rm -f /tmp/fleet-demo-images.tar" >/dev/null \
|| fail "podman load + tag on VM failed"
rm -f "$IMAGE_TARBALL"
log "sideload complete — fleet-agent's podman has $V1_IMAGE + $V2_IMAGE"
# ---- phase 6: sanity --------------------------------------------------------
log "phase 6: sanity — operator + agent + KV"
for _ in $(seq 1 60); do
if kubectl -n "$NATS_NAMESPACE" get pod -l app="$NATS_NAME" \
-o jsonpath='{.items[0].status.phase}' 2>/dev/null \
| grep -q Running; then
break
fi
sleep 1
done
# NATS box one-liner we'll reuse in the hand-off too. Uses the host
# loadbalancer port so no pod-network plumbing needed.
NATSBOX_HOST="podman run --rm docker.io/natsio/nats-box:latest \
nats --server nats://host.containers.internal:$NATS_NODE_PORT"
log "checking agent heartbeat in NATS KV (device-heartbeat bucket)"
for _ in $(seq 1 30); do
if $NATSBOX_HOST kv get device-heartbeat "heartbeat.$DEVICE_ID" --raw \
>/dev/null 2>&1; then
break
fi
sleep 2
done
$NATSBOX_HOST kv get device-heartbeat "heartbeat.$DEVICE_ID" --raw >/dev/null \
|| fail "agent never published heartbeat to NATS"
log "agent heartbeat present: heartbeat.$DEVICE_ID"
# ---- phase 7: either hand off to user, or drive regression ------------------
if [[ "$AUTO" == "1" ]]; then
log "phase 7 (--auto): apply nginx via typed CR, verify, upgrade, delete"
log "applying $V1_IMAGE deployment"
(
cd "$REPO_ROOT"
cargo run -q -p example_harmony_apply_deployment -- \
--namespace "$DEPLOY_NS" \
--name "$DEPLOY_NAME" \
--target-device "$DEVICE_ID" \
--image "$V1_IMAGE" \
--port "$DEPLOY_PORT"
)
log "waiting for container on VM (up to $((CONTAINER_WAIT_STEPS * 2))s)"
CONTAINER_ID_V1=""
for _ in $(seq 1 "$CONTAINER_WAIT_STEPS"); do
id="$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \
"fleet-admin@$VM_IP" -- \
"sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman ps -q --filter name=$DEPLOY_NAME'" \
2>/dev/null | head -1)" || true
if [[ -n "$id" ]]; then CONTAINER_ID_V1="$id"; break; fi
sleep 2
done
[[ -n "$CONTAINER_ID_V1" ]] || fail "nginx container never appeared on VM"
log "container id (v1): $CONTAINER_ID_V1"
log "curl http://$VM_IP:${DEPLOY_PORT%%:*}/"
for _ in $(seq 1 30); do
if curl -sf "http://$VM_IP:${DEPLOY_PORT%%:*}/" >/dev/null; then
log "nginx responded (v1)"; break
fi
sleep 2
done
log "waiting for operator to aggregate .status.aggregate.succeeded == 1"
for _ in $(seq 1 30); do
got="$(kubectl -n "$DEPLOY_NS" get deployment.fleet.nationtech.io "$DEPLOY_NAME" \
-o jsonpath='{.status.aggregate.succeeded}' 2>/dev/null || true)"
if [[ "$got" == "1" ]]; then
log ".status.aggregate.succeeded = 1 — aggregator reflected agent state"
break
fi
sleep 2
done
got="$(kubectl -n "$DEPLOY_NS" get deployment.fleet.nationtech.io "$DEPLOY_NAME" \
-o jsonpath='{.status.aggregate.succeeded}' 2>/dev/null || true)"
[[ "$got" == "1" ]] || fail ".status.aggregate.succeeded never reached 1 (got '$got')"
log "upgrading to $V2_IMAGE"
(
cd "$REPO_ROOT"
cargo run -q -p example_harmony_apply_deployment -- \
--namespace "$DEPLOY_NS" \
--name "$DEPLOY_NAME" \
--target-device "$DEVICE_ID" \
--image "$V2_IMAGE" \
--port "$DEPLOY_PORT"
)
log "waiting for container id to change (upgrade, up to $((CONTAINER_WAIT_STEPS * 2))s)"
CONTAINER_ID_V2=""
for _ in $(seq 1 "$CONTAINER_WAIT_STEPS"); do
id="$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \
"fleet-admin@$VM_IP" -- \
"sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman ps -q --filter name=$DEPLOY_NAME'" \
2>/dev/null | head -1)" || true
if [[ -n "$id" && "$id" != "$CONTAINER_ID_V1" ]]; then
CONTAINER_ID_V2="$id"; break
fi
sleep 2
done
[[ -n "$CONTAINER_ID_V2" ]] || fail "container id did not change after upgrade"
log "container id (v2): $CONTAINER_ID_V2 — upgrade confirmed"
log "deleting deployment"
(
cd "$REPO_ROOT"
cargo run -q -p example_harmony_apply_deployment -- \
--namespace "$DEPLOY_NS" \
--name "$DEPLOY_NAME" \
--target-device "$DEVICE_ID" \
--delete
)
for _ in $(seq 1 60); do
if ! ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \
"fleet-admin@$VM_IP" -- podman ps -q --filter "name=$DEPLOY_NAME" 2>/dev/null \
| grep -q .; then
log "container removed from VM"
break
fi
sleep 2
done
log "PASS (--auto)"
exit 0
fi
# ---- hand-off mode ----------------------------------------------------------
SSH_KEY="$HOME/.local/share/harmony/fleet/ssh/id_ed25519"
cat <<EOF
$(printf '\033[1;32m[smoke-a4]\033[0m full stack is up. Your playground:\n')
KUBECONFIG=$KUBECONFIG_FILE
VM IP: $VM_IP
device id: $DEVICE_ID
libvirt NAT gateway (VM's view of the host): $NAT_GW
NATS URL (from host): nats://localhost:$NATS_NODE_PORT
NATS URL (from the VM): nats://$NAT_GW:$NATS_NODE_PORT
$(printf '\033[1mWatch CRs reconcile:\033[0m\n')
kubectl get deployments.fleet.nationtech.io -A -w
$(printf '\033[1mApply an nginx deployment (typed Rust):\033[0m\n')
cargo run -q -p example_harmony_apply_deployment -- \\
--namespace $DEPLOY_NS \\
--name $DEPLOY_NAME \\
--target-device $DEVICE_ID \\
--image docker.io/library/nginx:latest
$(printf '\033[1mUpgrade it:\033[0m\n')
cargo run -q -p example_harmony_apply_deployment -- \\
--namespace $DEPLOY_NS --name $DEPLOY_NAME --target-device $DEVICE_ID \\
--image docker.io/library/nginx:1.26
$(printf '\033[1mPreview the CR as JSON (and apply via kubectl):\033[0m\n')
cargo run -q -p example_harmony_apply_deployment -- \\
--name $DEPLOY_NAME --target-device $DEVICE_ID \\
--image docker.io/library/nginx:latest --print | kubectl apply -f -
$(printf '\033[1mConnect to the device:\033[0m\n')
ssh -i $SSH_KEY fleet-admin@$VM_IP
virsh --connect $LIBVIRT_URI console $VM_NAME --force # alternative
# list containers (agent runs rootless as fleet-agent, not fleet-admin):
ssh -i $SSH_KEY fleet-admin@$VM_IP "sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman ps'"
$(printf '\033[1mInspect NATS KV (natsbox):\033[0m\n')
alias natsbox='podman run --rm docker.io/natsio/nats-box:latest nats --server nats://host.containers.internal:$NATS_NODE_PORT'
natsbox kv ls desired-state
natsbox kv get desired-state '$DEVICE_ID.$DEPLOY_NAME' --raw
natsbox kv ls device-state
natsbox kv ls device-heartbeat
natsbox kv get device-heartbeat 'heartbeat.$DEVICE_ID' --raw
$(printf '\033[1mHit the deployed nginx:\033[0m\n')
curl http://$VM_IP:${DEPLOY_PORT%%:*}/
$(printf '\033[1mOperator log:\033[0m\n')
tail -F $OPERATOR_LOG
$(printf '\033[1;33mCtrl-C to tear everything down.\033[0m\n')
EOF
# Block until user interrupts; cleanup trap handles teardown.
while true; do sleep 60; done

View File

@@ -16,5 +16,7 @@ license.workspace = true
[dependencies]
chrono = { workspace = true, features = ["serde"] }
harmony_types = { path = "../harmony_types" }
schemars = "0.8.22"
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
thiserror = { workspace = true }

View File

@@ -0,0 +1,272 @@
//! Fleet-scale wire-format types.
//!
//! Per-concern payloads on dedicated NATS KV buckets:
//!
//! | Type | Bucket | Cadence |
//! |------|--------|---------|
//! | [`DeviceInfo`] | KV `device-info` | on startup + label/inventory change |
//! | [`DeploymentState`] | KV `device-state` | on reconcile phase transition |
//! | [`HeartbeatPayload`] | KV `device-heartbeat` | every 30 s |
//!
//! The operator watches `device-state` directly — KV watch deliveries
//! are ordered and last-writer-wins, so there's no separate event
//! stream or per-write revision to track.
use std::collections::BTreeMap;
use std::fmt;
use chrono::{DateTime, Utc};
use harmony_types::id::Id;
use serde::{Deserialize, Deserializer, Serialize};
use crate::status::{InventorySnapshot, Phase};
/// Deployment CR `metadata.name`, validated for NATS-subject safety.
///
/// Scope: what identifies a Deployment to the agent. Appears in KV
/// keys (`state.<device>.<deployment>`) and every in-memory map
/// keyed by "which deployment." A raw `String` here would let an
/// invalid name (containing a `.`, splitting into extra subject
/// tokens) break routing at runtime.
///
/// Validation:
/// - Not empty.
/// - No `.` (would alias an extra subject token).
/// - No `*` / `>` (NATS wildcards).
/// - No ASCII whitespace.
/// - ≤ 253 bytes (RFC 1123 max, matches Kubernetes name limit).
///
/// The constructor is fallible; deserialization runs the same
/// validation so malformed payloads are rejected at the wire.
#[derive(Debug, Clone, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize)]
#[serde(transparent)]
pub struct DeploymentName(String);
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
pub enum InvalidDeploymentName {
#[error("deployment name must not be empty")]
Empty,
#[error("deployment name must not exceed 253 bytes")]
TooLong,
#[error("deployment name must not contain '.' (would alias an extra NATS subject token)")]
ContainsDot,
#[error("deployment name must not contain NATS wildcards '*' or '>'")]
ContainsWildcard,
#[error("deployment name must not contain whitespace")]
ContainsWhitespace,
}
impl DeploymentName {
pub fn try_new(s: impl Into<String>) -> Result<Self, InvalidDeploymentName> {
let s = s.into();
if s.is_empty() {
return Err(InvalidDeploymentName::Empty);
}
if s.len() > 253 {
return Err(InvalidDeploymentName::TooLong);
}
if s.contains('.') {
return Err(InvalidDeploymentName::ContainsDot);
}
if s.contains('*') || s.contains('>') {
return Err(InvalidDeploymentName::ContainsWildcard);
}
if s.chars().any(|c| c.is_ascii_whitespace()) {
return Err(InvalidDeploymentName::ContainsWhitespace);
}
Ok(Self(s))
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for DeploymentName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl<'de> Deserialize<'de> for DeploymentName {
fn deserialize<D: Deserializer<'de>>(de: D) -> Result<Self, D::Error> {
let s = String::deserialize(de)?;
Self::try_new(s).map_err(serde::de::Error::custom)
}
}
/// Static-ish per-device facts: routing labels, hardware, agent
/// version. Written to KV key `info.<device_id>` in
/// [`crate::BUCKET_DEVICE_INFO`]. Rewritten by the agent on startup
/// and whenever its labels change — **not** on every heartbeat.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct DeviceInfo {
pub device_id: Id,
/// Routing labels. Operator resolves Deployment
/// `targetSelector.matchLabels` against this map.
#[serde(default)]
pub labels: BTreeMap<String, String>,
/// Hardware / OS snapshot. `None` until the first post-startup
/// publish.
#[serde(default)]
pub inventory: Option<InventorySnapshot>,
/// RFC 3339 UTC timestamp of this publish.
pub updated_at: DateTime<Utc>,
}
/// Authoritative current phase for one `(device, deployment)` pair.
/// Written to KV key `state.<device_id>.<deployment>` in
/// [`crate::BUCKET_DEVICE_STATE`]. Deleted when the deployment is
/// removed from the device.
///
/// The operator's KV watch sees every write + delete in order, so
/// this value alone — plus the operator's in-memory belief about
/// the last phase for the pair — is enough to drive the aggregate
/// counters. No separate event stream, no per-write revision.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct DeploymentState {
pub device_id: Id,
pub deployment: DeploymentName,
pub phase: Phase,
pub last_event_at: DateTime<Utc>,
#[serde(default)]
pub last_error: Option<String>,
}
/// Tiny liveness ping. Written to KV key `heartbeat.<device_id>` in
/// [`crate::BUCKET_DEVICE_HEARTBEAT`].
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct HeartbeatPayload {
pub device_id: Id,
pub at: DateTime<Utc>,
}
#[cfg(test)]
mod tests {
use super::*;
fn ts(s: &str) -> DateTime<Utc> {
DateTime::parse_from_rfc3339(s).unwrap().with_timezone(&Utc)
}
fn dn(s: &str) -> DeploymentName {
DeploymentName::try_new(s).expect("valid")
}
#[test]
fn deployment_name_accepts_rfc1123() {
assert!(DeploymentName::try_new("hello-world").is_ok());
assert!(DeploymentName::try_new("a").is_ok());
assert!(DeploymentName::try_new("a-b-c-1-2-3").is_ok());
}
#[test]
fn deployment_name_rejects_dot() {
assert_eq!(
DeploymentName::try_new("hello.world"),
Err(InvalidDeploymentName::ContainsDot)
);
}
#[test]
fn deployment_name_rejects_nats_wildcards() {
assert_eq!(
DeploymentName::try_new("hello*"),
Err(InvalidDeploymentName::ContainsWildcard)
);
assert_eq!(
DeploymentName::try_new("hello>"),
Err(InvalidDeploymentName::ContainsWildcard)
);
}
#[test]
fn deployment_name_rejects_empty_and_too_long() {
assert_eq!(
DeploymentName::try_new(""),
Err(InvalidDeploymentName::Empty)
);
assert_eq!(
DeploymentName::try_new("x".repeat(254)),
Err(InvalidDeploymentName::TooLong)
);
}
#[test]
fn deployment_name_rejects_whitespace() {
assert_eq!(
DeploymentName::try_new("hello world"),
Err(InvalidDeploymentName::ContainsWhitespace)
);
assert_eq!(
DeploymentName::try_new("hello\tworld"),
Err(InvalidDeploymentName::ContainsWhitespace)
);
}
#[test]
fn deployment_name_deserialization_validates() {
let json = r#""bad.name""#;
let result: Result<DeploymentName, _> = serde_json::from_str(json);
assert!(result.is_err());
}
#[test]
fn deployment_name_roundtrip() {
let name = dn("hello-world");
let json = serde_json::to_string(&name).unwrap();
assert_eq!(json, r#""hello-world""#);
let back: DeploymentName = serde_json::from_str(&json).unwrap();
assert_eq!(name, back);
}
#[test]
fn deployment_state_roundtrip() {
let original = DeploymentState {
device_id: Id::from("pi-01".to_string()),
deployment: dn("hello-web"),
phase: Phase::Failed,
last_event_at: ts("2026-04-22T10:05:00Z"),
last_error: Some("image pull 429".to_string()),
};
let json = serde_json::to_string(&original).unwrap();
let back: DeploymentState = serde_json::from_str(&json).unwrap();
assert_eq!(original, back);
}
#[test]
fn heartbeat_is_tiny() {
let hb = HeartbeatPayload {
device_id: Id::from("pi-01".to_string()),
at: ts("2026-04-22T10:00:30Z"),
};
let bytes = serde_json::to_vec(&hb).unwrap();
assert!(
bytes.len() < 96,
"heartbeat payload grew to {} bytes: {}",
bytes.len(),
String::from_utf8_lossy(&bytes),
);
}
#[test]
fn device_info_roundtrip() {
let original = DeviceInfo {
device_id: Id::from("pi-01".to_string()),
labels: BTreeMap::from([("group".to_string(), "site-a".to_string())]),
inventory: Some(InventorySnapshot {
hostname: "pi-01".to_string(),
arch: "aarch64".to_string(),
os: "Ubuntu 24.04".to_string(),
kernel: "6.8.0".to_string(),
cpu_cores: 4,
memory_mb: 8192,
agent_version: "0.1.0".to_string(),
}),
updated_at: ts("2026-04-22T10:00:00Z"),
};
let json = serde_json::to_string(&original).unwrap();
let back: DeviceInfo = serde_json::from_str(&json).unwrap();
assert_eq!(original, back);
}
}

View File

@@ -7,47 +7,88 @@
//! here; agent + operator consume the constants directly, and smoke
//! scripts grep for the literal values locked in the tests below.
use crate::fleet::DeploymentName;
/// Operator-written bucket. One entry per `(device, deployment)` pair.
/// Values are the JSON-serialized Score envelope — today
/// `harmony::modules::podman::IotScore`, tomorrow any variant of
/// `harmony::modules::podman::ReconcileScore`, tomorrow any variant of
/// a polymorphic `Score` enum the framework ships.
pub const BUCKET_DESIRED_STATE: &str = "desired-state";
/// Agent-written bucket. One entry per device at `status.<device_id>`.
/// Values are JSON-serialized [`crate::AgentStatus`].
pub const BUCKET_AGENT_STATUS: &str = "agent-status";
/// Static-ish per-device facts: routing labels, inventory, agent
/// version. Agent rewrites the entry on startup and whenever its
/// labels change. Key format: `info.<device_id>`.
pub const BUCKET_DEVICE_INFO: &str = "device-info";
/// Current reconcile phase for each `(device, deployment)` pair.
/// Agent writes on phase transition; operator watches this bucket
/// to drive CR `.status.aggregate`. Authoritative source of truth
/// for "what's running where." Key format:
/// `state.<device_id>.<deployment>`.
pub const BUCKET_DEVICE_STATE: &str = "device-state";
/// Tiny liveness ping from each device every N seconds. Separate
/// from [`BUCKET_DEVICE_STATE`] so routine heartbeats don't churn
/// the state bucket. Key format: `heartbeat.<device_id>`.
pub const BUCKET_DEVICE_HEARTBEAT: &str = "device-heartbeat";
/// KV key for a `(device, deployment)` pair in [`BUCKET_DESIRED_STATE`].
/// Format: `<device>.<deployment>`.
pub fn desired_state_key(device_id: &str, deployment_name: &str) -> String {
format!("{device_id}.{deployment_name}")
pub fn desired_state_key(device_id: &str, deployment_name: &DeploymentName) -> String {
format!("{device_id}.{}", deployment_name.as_str())
}
/// KV key for a device's last-known status in [`BUCKET_AGENT_STATUS`].
/// Format: `status.<device_id>`.
pub fn status_key(device_id: &str) -> String {
format!("status.{device_id}")
/// KV key for a device's `DeviceInfo` entry in [`BUCKET_DEVICE_INFO`].
/// Format: `info.<device_id>`.
pub fn device_info_key(device_id: &str) -> String {
format!("info.{device_id}")
}
/// KV key for a `(device, deployment)` state entry in
/// [`BUCKET_DEVICE_STATE`]. Format: `state.<device_id>.<deployment>`.
pub fn device_state_key(device_id: &str, deployment_name: &DeploymentName) -> String {
format!("state.{device_id}.{}", deployment_name.as_str())
}
/// KV key for a device's liveness entry in
/// [`BUCKET_DEVICE_HEARTBEAT`]. Format: `heartbeat.<device_id>`.
pub fn device_heartbeat_key(device_id: &str) -> String {
format!("heartbeat.{device_id}")
}
#[cfg(test)]
mod tests {
use super::*;
fn dn(s: &str) -> crate::DeploymentName {
crate::DeploymentName::try_new(s).expect("valid")
}
#[test]
fn desired_state_key_format() {
assert_eq!(desired_state_key("pi-01", "hello-web"), "pi-01.hello-web");
assert_eq!(
desired_state_key("pi-01", &dn("hello-web")),
"pi-01.hello-web"
);
}
#[test]
fn status_key_format() {
assert_eq!(status_key("pi-01"), "status.pi-01");
}
#[test]
fn bucket_names_match_smoke_scripts() {
// These strings are also grepped by iot/scripts/smoke-*.sh —
// flipping them here must be paired with a script update.
fn bucket_names_stable() {
// Flipping these is a cross-component break — operator,
// agent, and smoke scripts all grep for the literal values.
assert_eq!(BUCKET_DESIRED_STATE, "desired-state");
assert_eq!(BUCKET_AGENT_STATUS, "agent-status");
assert_eq!(BUCKET_DEVICE_INFO, "device-info");
assert_eq!(BUCKET_DEVICE_STATE, "device-state");
assert_eq!(BUCKET_DEVICE_HEARTBEAT, "device-heartbeat");
}
#[test]
fn key_formats() {
assert_eq!(device_info_key("pi-01"), "info.pi-01");
assert_eq!(
device_state_key("pi-01", &dn("hello-web")),
"state.pi-01.hello-web"
);
assert_eq!(device_heartbeat_key("pi-01"), "heartbeat.pi-01");
}
}

View File

@@ -3,28 +3,31 @@
//! Harmony's "reconciler" pattern is: a central **operator** writes
//! desired state into NATS JetStream KV; a remote **agent** watches
//! the KV, deserializes each entry as a Score, and drives the host
//! toward that state. This split lets one operator orchestrate a
//! fleet of agents across network boundaries it can't reach
//! directly — IoT devices today, OKD cluster agents or edge-compute
//! reconcilers tomorrow.
//! toward that state. The agent writes back per-device info and
//! per-deployment state into separate KV buckets; the operator reads
//! those to aggregate `.status.aggregate` onto the CR.
//!
//! This crate holds the wire-format bits both sides must agree on:
//! NATS bucket names, KV key formats, and the `AgentStatus`
//! heartbeat payload. The Score types themselves (`PodmanV0Score`,
//! future variants) live in their respective harmony modules
//! consumers import them from there and serialize them over the
//! transport this crate describes.
//! NATS bucket names, KV key formats, and the typed payloads
//! (`DeviceInfo`, `DeploymentState`, `HeartbeatPayload`). The Score
//! types themselves live in their respective harmony modules.
//!
//! **Deliberately lean** — no tokio, no async-nats, no harmony.
//! The on-device agent build pulls it in alongside a minimal
//! async-nats client; the operator pulls it alongside kube-rs.
//! Neither should pay for the other's dependencies.
pub mod fleet;
pub mod kv;
pub mod status;
pub use kv::{BUCKET_AGENT_STATUS, BUCKET_DESIRED_STATE, desired_state_key, status_key};
pub use status::AgentStatus;
pub use fleet::{
DeploymentName, DeploymentState, DeviceInfo, HeartbeatPayload, InvalidDeploymentName,
};
pub use kv::{
BUCKET_DESIRED_STATE, BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE,
desired_state_key, device_heartbeat_key, device_info_key, device_state_key,
};
pub use status::{InventorySnapshot, Phase};
// Re-exports so consumers (agent, operator) don't need a direct
// harmony_types dependency purely to name the cross-boundary types.

View File

@@ -1,74 +1,37 @@
//! Agent → NATS KV status payload.
//!
//! The agent publishes a heartbeat + rollup status to the
//! `agent-status` bucket every 30 s (see
//! [`crate::BUCKET_AGENT_STATUS`]). Today the payload is intentionally
//! minimal — a single `"running"` state + a timestamp — so the
//! operator can implement §12 v0.1 "Status aggregation in operator"
//! without waiting on richer per-workload reporting.
//!
//! When the agent grows richer status (per-container state, rollout
//! progress) this struct gains fields with `#[serde(default)]`; old
//! operators keep working against newer agents.
//! Shared status primitives reused across the fleet wire format.
use chrono::{DateTime, Utc};
use harmony_types::id::Id;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
/// A single heartbeat published by the agent at
/// `status.<device_id>` in the `agent-status` bucket.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentStatus {
/// Echoed from the agent's own config so the operator can
/// cross-check which device it came from if the KV key is ever
/// ambiguous. Serializes transparently as a plain string.
pub device_id: Id,
/// Coarse rollup state. v0 only ever writes `"running"`; richer
/// variants are a v0.1+ concern. A String (not an enum) so old
/// operators parsing this payload don't fail on a new variant.
pub status: String,
/// RFC 3339 UTC timestamp. Used by the smoke test's reboot-
/// detection gate — any timestamp strictly greater than the gate
/// is evidence of a post-reboot write. `chrono::DateTime<Utc>`
/// serde-serializes as RFC 3339, so the wire format stays
/// lex-comparable (the smoke's string `>` still works).
pub timestamp: DateTime<Utc>,
/// Coarse state of a single reconcile on one device.
///
/// Deliberately coarse — richer granularity (ImagePulling,
/// ContainerCreating, …) is agent-internal; the operator's
/// aggregation only needs success/failure/pending counts.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
pub enum Phase {
/// Agent has applied the Score and the container is up.
Running,
/// Reconcile hit an error. See paired `last_error` for the message.
Failed,
/// Reconcile is in flight or waiting on an external dependency
/// (image pull, network, etc.). Agents may also report this
/// between a CR apply and the first reconcile tick.
Pending,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn status_roundtrip() {
let s = AgentStatus {
device_id: Id::from("pi-01".to_string()),
status: "running".to_string(),
timestamp: DateTime::parse_from_rfc3339("2026-04-21T18:15:42Z")
.unwrap()
.with_timezone(&Utc),
};
let json = serde_json::to_string(&s).unwrap();
let back: AgentStatus = serde_json::from_str(&json).unwrap();
assert_eq!(s, back);
}
#[test]
fn status_has_expected_wire_keys() {
let s = AgentStatus {
device_id: Id::from("pi-01".to_string()),
status: "running".to_string(),
timestamp: DateTime::parse_from_rfc3339("2026-04-21T18:15:42Z")
.unwrap()
.with_timezone(&Utc),
};
let json = serde_json::to_string(&s).unwrap();
// device_id must serialize as a flat string (not {"value": …}).
// Relies on `#[serde(transparent)]` on `harmony_types::id::Id`.
assert!(json.contains("\"device_id\":\"pi-01\""), "got {json}");
assert!(json.contains("\"status\":\"running\""));
// RFC 3339 output — the smoke script greps a `"timestamp":"<rfc3339>"`
// literal and compares lexicographically against a gate.
assert!(json.contains("\"timestamp\":\"2026-04-21T18:15:42Z\""));
}
/// Static-ish facts about the device. Embedded in
/// [`crate::DeviceInfo`]; republished on change.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct InventorySnapshot {
pub hostname: String,
pub arch: String,
pub os: String,
pub kernel: String,
pub cpu_cores: u32,
pub memory_mb: u64,
/// Agent semver (e.g. `"0.1.0"`). Lets the operator flag
/// agents that are behind the current release.
pub agent_version: String,
}

View File

@@ -39,7 +39,7 @@ pub enum InterpretName {
K8sIngress,
PodmanV0,
KvmVm,
IotDeviceSetup,
FleetDeviceSetup,
}
impl std::fmt::Display for InterpretName {
@@ -75,7 +75,7 @@ impl std::fmt::Display for InterpretName {
InterpretName::K8sIngress => f.write_str("K8sIngress"),
InterpretName::PodmanV0 => f.write_str("PodmanV0"),
InterpretName::KvmVm => f.write_str("KvmVm"),
InterpretName::IotDeviceSetup => f.write_str("IotDeviceSetup"),
InterpretName::FleetDeviceSetup => f.write_str("FleetDeviceSetup"),
}
}
}

View File

@@ -89,7 +89,7 @@ pub trait SystemdManager: Send + Sync {
) -> Result<ChangeReport, ExecutorError>;
/// Enable+start a user-scoped unit (e.g. `podman.socket` under
/// `iot-agent`). Assumes [`UnixUserManager::ensure_linger`] has
/// `fleet-agent`). Assumes [`UnixUserManager::ensure_linger`] has
/// already been called for the user.
async fn ensure_user_unit_active(
&self,

View File

@@ -119,6 +119,14 @@ pub struct VmFirstBootConfig {
/// Public SSH keys (OpenSSH single-line format) to authorize for
/// the admin user.
pub authorized_keys: Vec<String>,
/// Optional plaintext password for the admin user. When set,
/// the account is unlocked + SSH password auth is enabled on
/// the guest. Intended for interactive debugging / chaos
/// testing where the operator wants to log in and break things
/// manually. Leave `None` for production deployments — key-only
/// auth is the default.
#[serde(default)]
pub admin_password: Option<String>,
}
/// Observed runtime info for a VM.

View File

@@ -2,10 +2,12 @@
pub use k8s_openapi::api::{
apps::v1::{Deployment, DeploymentSpec},
core::v1::{
Container, ContainerPort, EnvVar, PodSpec, PodTemplateSpec, Service as K8sService,
ServicePort, ServiceSpec,
Container, ContainerPort, EnvVar, Namespace, PodSpec, PodTemplateSpec,
Service as K8sService, ServiceAccount, ServicePort, ServiceSpec,
},
rbac::v1::{ClusterRole, ClusterRoleBinding},
};
pub use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition;
use k8s_openapi::apimachinery::pkg::util::intstr::IntOrString;
use kube::core::ObjectMeta;
@@ -14,16 +16,36 @@ use crate::modules::application::config::{ApplicationNetworkPort, NetworkProtoco
use std::fs;
use std::path::{Path, PathBuf};
/// Enum representing all supported Kubernetes resource types for Helm charts.
/// Supports built-in typed resources and custom CRDs via YAML strings.
/// A rendered Kubernetes resource ready to drop into a helm chart's
/// `templates/` directory.
///
/// Each variant wraps a strongly-typed `k8s_openapi` struct — the chart
/// writer serializes via `serde_yaml` at package time, keeping the
/// `templates/` directory a pure data-transfer format (ADR 018
/// template hydration). The `CustomYaml` escape hatch is here for
/// resources we haven't typed yet; **prefer adding a typed variant
/// over using it**.
pub enum HelmResourceKind {
/// Built-in typed Service resource
/// `v1` Service (namespaced).
Service(K8sService),
/// Built-in typed Deployment resource
/// `apps/v1` Deployment (namespaced).
Deployment(Deployment),
/// Custom resource as pre-serialized YAML (e.g., CRDs, custom types)
/// `v1` Namespace (cluster-scoped).
Namespace(Namespace),
/// `v1` ServiceAccount (namespaced).
ServiceAccount(ServiceAccount),
/// `rbac.authorization.k8s.io/v1` ClusterRole (cluster-scoped).
ClusterRole(ClusterRole),
/// `rbac.authorization.k8s.io/v1` ClusterRoleBinding (cluster-scoped).
ClusterRoleBinding(ClusterRoleBinding),
/// `apiextensions.k8s.io/v1` CustomResourceDefinition
/// (cluster-scoped). Expected to be produced by
/// `kube::CustomResourceExt::crd()` on a derive-built type —
/// never hand-authored.
Crd(CustomResourceDefinition),
/// Escape hatch for resources without a typed variant yet.
/// Adding a typed variant above is always preferred.
CustomYaml { filename: String, content: String },
// Can add more typed variants as needed: ConfigMap, Secret, Ingress, etc.
}
impl HelmResourceKind {
@@ -31,6 +53,23 @@ impl HelmResourceKind {
match self {
HelmResourceKind::Service(_) => "service.yaml".to_string(),
HelmResourceKind::Deployment(_) => "deployment.yaml".to_string(),
HelmResourceKind::Namespace(_) => "namespace.yaml".to_string(),
HelmResourceKind::ServiceAccount(sa) => format!(
"serviceaccount-{}.yaml",
sa.metadata.name.as_deref().unwrap_or("unnamed")
),
HelmResourceKind::ClusterRole(cr) => format!(
"clusterrole-{}.yaml",
cr.metadata.name.as_deref().unwrap_or("unnamed")
),
HelmResourceKind::ClusterRoleBinding(crb) => format!(
"clusterrolebinding-{}.yaml",
crb.metadata.name.as_deref().unwrap_or("unnamed")
),
HelmResourceKind::Crd(c) => format!(
"crd-{}.yaml",
c.metadata.name.as_deref().unwrap_or("unnamed")
),
HelmResourceKind::CustomYaml { filename, .. } => filename.clone(),
}
}
@@ -39,6 +78,11 @@ impl HelmResourceKind {
match self {
HelmResourceKind::Service(s) => serde_yaml::to_string(s),
HelmResourceKind::Deployment(d) => serde_yaml::to_string(d),
HelmResourceKind::Namespace(n) => serde_yaml::to_string(n),
HelmResourceKind::ServiceAccount(sa) => serde_yaml::to_string(sa),
HelmResourceKind::ClusterRole(cr) => serde_yaml::to_string(cr),
HelmResourceKind::ClusterRoleBinding(crb) => serde_yaml::to_string(crb),
HelmResourceKind::Crd(c) => serde_yaml::to_string(c),
HelmResourceKind::CustomYaml { content, .. } => Ok(content.clone()),
}
}
@@ -65,7 +109,8 @@ impl HelmResourceKind {
}
}
/// Add a custom resource from any type that implements Serialize
/// Add a custom resource from any type that implements Serialize.
/// Prefer a typed variant constructor over this where one exists.
pub fn from_serializable<T: serde::Serialize>(
filename: impl Into<String>,
resource: &T,
@@ -444,3 +489,85 @@ pub fn create_service_from_ports(
..Default::default()
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn typed_variants_have_unique_filenames() {
let ns = Namespace {
metadata: ObjectMeta {
name: Some("fleet-system".to_string()),
..Default::default()
},
..Default::default()
};
let sa = ServiceAccount {
metadata: ObjectMeta {
name: Some("harmony-fleet-operator".to_string()),
namespace: Some("fleet-system".to_string()),
..Default::default()
},
..Default::default()
};
let cr = ClusterRole {
metadata: ObjectMeta {
name: Some("harmony-fleet-operator".to_string()),
..Default::default()
},
rules: None,
..Default::default()
};
let crb = ClusterRoleBinding {
metadata: ObjectMeta {
name: Some("harmony-fleet-operator".to_string()),
..Default::default()
},
role_ref: k8s_openapi::api::rbac::v1::RoleRef {
api_group: "rbac.authorization.k8s.io".to_string(),
kind: "ClusterRole".to_string(),
name: "harmony-fleet-operator".to_string(),
},
subjects: None,
};
let crd = CustomResourceDefinition {
metadata: ObjectMeta {
name: Some("widgets.example.io".to_string()),
..Default::default()
},
..Default::default()
};
let resources = [
HelmResourceKind::Namespace(ns),
HelmResourceKind::ServiceAccount(sa),
HelmResourceKind::ClusterRole(cr),
HelmResourceKind::ClusterRoleBinding(crb),
HelmResourceKind::Crd(crd),
];
let mut seen = std::collections::HashSet::new();
for r in &resources {
let f = r.filename();
assert!(seen.insert(f.clone()), "duplicate filename {f}");
// Make sure it serializes cleanly — catches any missing
// arm in `serialize_to_yaml`.
let yaml = r.serialize_to_yaml().expect("serialize");
assert!(!yaml.is_empty());
}
}
#[test]
fn crd_filename_carries_crd_name() {
let crd = CustomResourceDefinition {
metadata: ObjectMeta {
name: Some("deployments.fleet.nationtech.io".to_string()),
..Default::default()
},
..Default::default()
};
assert_eq!(
HelmResourceKind::Crd(crd).filename(),
"crd-deployments.fleet.nationtech.io.yaml"
);
}
}

View File

@@ -1,7 +1,7 @@
//! Bootstrapped assets shared across IoT workflows.
//!
//! Everything here follows the `ensure_*` pattern — idempotent, caches
//! results under [`HARMONY_DATA_DIR`]`/iot/…`, and runs at most once per
//! results under [`HARMONY_DATA_DIR`]`/fleet/…`, and runs at most once per
//! process (enforced by a `tokio::sync::OnceCell`). The goal is that an
//! operator can run the IoT smoke test against a freshly-installed host
//! with nothing but `libvirt + qemu + xorriso + python3 + cargo +
@@ -127,7 +127,7 @@ async fn ensure_cloud_image(
return Err(exec(format!(
"downloaded image sha256 mismatch: expected {expected_sha256}, got {actual}. \
Ubuntu may have rotated the 'current release' pointer bump the pin in \
modules::iot::assets.rs."
modules::fleet::assets.rs."
)));
}
// World-readable so libvirt-qemu can open it without a chmod ritual.
@@ -195,7 +195,7 @@ async fn sha256_of_file(path: &Path) -> Result<String, ExecutorError> {
}
fn cloud_images_dir() -> PathBuf {
HARMONY_DATA_DIR.join("iot").join("cloud-images")
HARMONY_DATA_DIR.join("fleet").join("cloud-images")
}
// ---------------------------------------------------------------------
@@ -206,20 +206,20 @@ fn cloud_images_dir() -> PathBuf {
/// same key identifies every VM we provision for smoke/integration
/// testing — cheap to reuse, easy to discard (just `rm -rf` the dir).
#[derive(Debug, Clone)]
pub struct IotSshKeypair {
pub struct FleetSshKeypair {
pub private_key: PathBuf,
pub public_key: PathBuf,
}
/// Ensure `$HARMONY_DATA_DIR/iot/ssh/id_ed25519[.pub]` exists. Runs
/// Ensure `$HARMONY_DATA_DIR/fleet/ssh/id_ed25519[.pub]` exists. Runs
/// `ssh-keygen` once; subsequent calls return the existing paths.
pub async fn ensure_iot_ssh_keypair() -> Result<IotSshKeypair, ExecutorError> {
static CELL: OnceCell<IotSshKeypair> = OnceCell::const_new();
pub async fn ensure_fleet_ssh_keypair() -> Result<FleetSshKeypair, ExecutorError> {
static CELL: OnceCell<FleetSshKeypair> = OnceCell::const_new();
CELL.get_or_try_init(provision_ssh_keypair).await.cloned()
}
async fn provision_ssh_keypair() -> Result<IotSshKeypair, ExecutorError> {
let dir = HARMONY_DATA_DIR.join("iot").join("ssh");
async fn provision_ssh_keypair() -> Result<FleetSshKeypair, ExecutorError> {
let dir = HARMONY_DATA_DIR.join("fleet").join("ssh");
tokio::fs::create_dir_all(&dir)
.await
.map_err(|e| exec(format!("create ssh dir {dir:?}: {e}")))?;
@@ -231,7 +231,7 @@ async fn provision_ssh_keypair() -> Result<IotSshKeypair, ExecutorError> {
let pub_path = dir.join("id_ed25519.pub");
if priv_path.exists() && pub_path.exists() {
info!("ssh keypair cache hit at {priv_path:?}");
return Ok(IotSshKeypair {
return Ok(FleetSshKeypair {
private_key: priv_path,
public_key: pub_path,
});
@@ -248,7 +248,7 @@ async fn provision_ssh_keypair() -> Result<IotSshKeypair, ExecutorError> {
"-N",
"", // no passphrase
"-C",
"harmony-iot-smoke",
"harmony-fleet-smoke",
"-f",
])
.arg(&priv_path) // PathBuf — kept separate so we don't force &str conversion
@@ -263,7 +263,7 @@ async fn provision_ssh_keypair() -> Result<IotSshKeypair, ExecutorError> {
String::from_utf8_lossy(&status.stderr).trim()
)));
}
Ok(IotSshKeypair {
Ok(FleetSshKeypair {
private_key: priv_path,
public_key: pub_path,
})
@@ -271,7 +271,7 @@ async fn provision_ssh_keypair() -> Result<IotSshKeypair, ExecutorError> {
/// Read the generated public key (one line, openssh format) into a string
/// suitable for cloud-init's `authorized_keys`.
pub async fn read_public_key(kp: &IotSshKeypair) -> Result<String, ExecutorError> {
pub async fn read_public_key(kp: &FleetSshKeypair) -> Result<String, ExecutorError> {
let content = tokio::fs::read_to_string(&kp.public_key)
.await
.map_err(|e| exec(format!("read {:?}: {e}", kp.public_key)))?;

View File

@@ -4,14 +4,14 @@
//! writable place to drop per-VM overlay disks + cloud-init seed ISOs.
//! Rather than ask the operator to set that up, we create a user-
//! owned dir-backed libvirt pool at
//! `$HARMONY_DATA_DIR/iot/kvm/pool/` and let libvirt handle:
//! `$HARMONY_DATA_DIR/fleet/kvm/pool/` and let libvirt handle:
//!
//! - **Perms**: dir contents get chowned to libvirt-qemu on VM start
//! via dynamic-ownership (default-on), and back to us on VM stop
//! (via remember_owner, also default-on). No `chmod 644` gymnastics.
//! - **Visibility**: `virsh vol-list harmony-iot` shows every
//! - **Visibility**: `virsh vol-list harmony-fleet` shows every
//! artifact we've created.
//! - **Cleanup**: `virsh vol-delete <name> harmony-iot` removes
//! - **Cleanup**: `virsh vol-delete <name> harmony-fleet` removes
//! managed volumes alongside `virsh undefine --remove-all-storage`.
//!
//! We *don't* rewrite the VM XML to use `<source pool="…" volume="…"/>`
@@ -30,11 +30,11 @@ use virt::storage_pool::StoragePool;
use crate::domain::config::HARMONY_DATA_DIR;
use crate::executors::ExecutorError;
pub const HARMONY_IOT_POOL_NAME: &str = "harmony-iot";
pub const HARMONY_FLEET_POOL_NAME: &str = "harmony-fleet";
/// Filesystem path + libvirt name of the managed pool.
#[derive(Debug, Clone)]
pub struct HarmonyIotPool {
pub struct HarmonyFleetPool {
pub name: String,
pub path: PathBuf,
}
@@ -46,13 +46,13 @@ pub struct HarmonyIotPool {
/// **Requires libvirt-group membership**. When the user isn't in the
/// group, libvirt rejects the `qemu:///system` connection — the
/// preflight check catches that upstream.
pub async fn ensure_harmony_iot_pool() -> Result<HarmonyIotPool, ExecutorError> {
static CELL: OnceCell<HarmonyIotPool> = OnceCell::const_new();
pub async fn ensure_harmony_fleet_pool() -> Result<HarmonyFleetPool, ExecutorError> {
static CELL: OnceCell<HarmonyFleetPool> = OnceCell::const_new();
CELL.get_or_try_init(provision_pool).await.cloned()
}
async fn provision_pool() -> Result<HarmonyIotPool, ExecutorError> {
let pool_dir = HARMONY_DATA_DIR.join("iot").join("kvm").join("pool");
async fn provision_pool() -> Result<HarmonyFleetPool, ExecutorError> {
let pool_dir = HARMONY_DATA_DIR.join("fleet").join("kvm").join("pool");
tokio::fs::create_dir_all(&pool_dir)
.await
.map_err(|e| exec(format!("create pool dir {pool_dir:?}: {e}")))?;
@@ -66,7 +66,7 @@ async fn provision_pool() -> Result<HarmonyIotPool, ExecutorError> {
.map_err(|e| exec(format!("chmod pool dir: {e}")))?;
let pool_path = pool_dir.clone();
let pool_name = HARMONY_IOT_POOL_NAME.to_string();
let pool_name = HARMONY_FLEET_POOL_NAME.to_string();
// virt-rs is blocking C bindings — bounce into spawn_blocking.
let pool_name_blocking = pool_name.clone();
@@ -106,7 +106,7 @@ async fn provision_pool() -> Result<HarmonyIotPool, ExecutorError> {
.await
.map_err(|e| exec(format!("spawn_blocking pool setup: {e}")))??;
Ok(HarmonyIotPool {
Ok(HarmonyFleetPool {
name: pool_name,
path: pool_path,
})

View File

@@ -0,0 +1,40 @@
//! Harmony-side Scores for fleet device onboarding.
//!
//! Today this module exposes [`FleetDeviceSetupScore`] — a customer
//! runs it against a freshly-booted device (Pi, VM, bare-metal node
//! later) to install podman, place the `fleet-agent` binary, drop
//! the TOML config, and bring up the agent under systemd. Re-running
//! with a changed config (different labels, new NATS URL, new
//! credentials) is how a device is moved between fleet partitions.
//!
//! The operator + agent crates live outside `harmony/` under
//! `fleet/harmony-fleet-operator/` and `fleet/harmony-fleet-agent/`.
//! What belongs here is the harmony-framework side: the Scores a
//! customer runs through `harmony_cli::run` to provision devices
//! before they ever talk to NATS.
//!
//! "Fleet" is deliberately domain-agnostic — IoT was the first
//! customer's use case but the reconciler pattern (operator → NATS
//! KV → agent → target) applies equally to Pi podman, OKD apply,
//! KVM VMs, etc.
pub mod assets;
#[cfg(feature = "kvm")]
pub mod libvirt_pool;
pub mod preflight;
mod setup_score;
#[cfg(feature = "kvm")]
mod vm_score;
pub use assets::{
FleetSshKeypair, UBUNTU_2404_CLOUDIMG_ARM64_FILENAME, UBUNTU_2404_CLOUDIMG_ARM64_SHA256,
UBUNTU_2404_CLOUDIMG_ARM64_URL, UBUNTU_2404_CLOUDIMG_FILENAME, UBUNTU_2404_CLOUDIMG_SHA256,
UBUNTU_2404_CLOUDIMG_URL, ensure_fleet_ssh_keypair, ensure_ubuntu_2404_cloud_image,
ensure_ubuntu_2404_cloud_image_for_arch, read_public_key,
};
#[cfg(feature = "kvm")]
pub use libvirt_pool::{HARMONY_FLEET_POOL_NAME, HarmonyFleetPool, ensure_harmony_fleet_pool};
pub use preflight::{check_fleet_smoke_preflight, check_fleet_smoke_preflight_for_arch};
pub use setup_score::{FleetDeviceSetupConfig, FleetDeviceSetupScore};
#[cfg(feature = "kvm")]
pub use vm_score::ProvisionVmScore;

View File

@@ -19,18 +19,20 @@ use crate::executors::ExecutorError;
use crate::modules::kvm::firmware::discover_aarch64_firmware;
/// Run every preflight check for an x86_64 smoke run — equivalent
/// to [`check_iot_smoke_preflight_for_arch`] with
/// to [`check_fleet_smoke_preflight_for_arch`] with
/// [`VmArchitecture::X86_64`]. Kept as a distinct function so
/// existing callers don't need to thread an arch through yet.
pub async fn check_iot_smoke_preflight() -> Result<(), ExecutorError> {
check_iot_smoke_preflight_for_arch(VmArchitecture::X86_64).await
pub async fn check_fleet_smoke_preflight() -> Result<(), ExecutorError> {
check_fleet_smoke_preflight_for_arch(VmArchitecture::X86_64).await
}
/// Arch-aware preflight. On top of the host-generic checks
/// (virsh, qemu-img, xorriso, python3, ssh-keygen, libvirt group,
/// default network), an aarch64 target requires
/// `qemu-system-aarch64` and a usable AAVMF firmware pair.
pub async fn check_iot_smoke_preflight_for_arch(arch: VmArchitecture) -> Result<(), ExecutorError> {
pub async fn check_fleet_smoke_preflight_for_arch(
arch: VmArchitecture,
) -> Result<(), ExecutorError> {
check_tool_on_path("virsh", "libvirt client").await?;
check_tool_on_path("qemu-img", "qemu-utils").await?;
check_tool_on_path("xorriso", "ISO image builder").await?;

View File

@@ -1,8 +1,9 @@
//! [`IotDeviceSetupScore`] — install podman + the iot-agent, wire the
//! [`FleetDeviceSetupScore`] — install podman + the fleet-agent, wire the
//! agent's TOML config, enable the systemd unit. Idempotent: re-running
//! with a changed config (e.g. a different `group`) updates only what
//! differs and restarts the agent once.
//! with a changed config (different labels, new NATS url, etc.) updates
//! only what differs and restarts the agent once.
use std::collections::BTreeMap;
use std::path::PathBuf;
use async_trait::async_trait;
@@ -25,43 +26,46 @@ use crate::score::Score;
/// User-visible configuration for the setup Score. Everything a customer
/// needs to tell us to bring a device into the fleet.
///
/// **On `group`.** For v0 the group is a *label*, written into the
/// agent's TOML config and reported back via the status bucket. It does
/// not yet drive deployment routing — `Deployment.spec.targetDevices`
/// still takes explicit device IDs. `targetGroups` is a v0.1+ item
/// (ROADMAP §6.5). Running this Score twice against the same device
/// with different `group` values is how a device is moved between
/// fleet partitions once group routing lands.
/// **On `labels`.** The label map is published verbatim in every
/// DeviceInfo heartbeat so the operator can resolve a Deployment's
/// `spec.targetSelector` against this device (K8s-Node-analogue flow).
/// `group` is the conventional primary label but any key/value pair
/// is legal. Re-running this Score with a changed label map is how a
/// device is moved between fleet partitions: the config file is
/// regenerated, byte-compare idempotency fires, the agent restarts,
/// new labels propagate.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IotDeviceSetupConfig {
pub struct FleetDeviceSetupConfig {
/// Stable device identifier. Written into the agent's TOML and
/// used as the KV key prefix (`<device_id>.<deployment>`). Harmony
/// `Id` values are sortable-by-creation-time and collision-safe
/// at up to ~10k devices/sec, which matches the feel of a fleet
/// registry.
pub device_id: Id,
/// Fleet partition this device belongs to.
pub group: String,
/// Routing labels. Published in every DeviceInfo heartbeat; the
/// operator reflects them into `Device.metadata.labels` so
/// Deployment selectors can match. Typical keys: `group`,
/// `arch`, `role`, `region`.
pub labels: BTreeMap<String, String>,
/// NATS URLs the agent should connect to. Typically one entry.
pub nats_urls: Vec<String>,
/// Shared v0 credentials (Zitadel-issued per-device tokens in v0.2).
pub nats_user: String,
pub nats_pass: String,
/// Local filesystem path to the cross-compiled `iot-agent-v0`
/// Local filesystem path to the cross-compiled `fleet-agent-v0`
/// binary. The Score uploads it to the device and installs to
/// `/usr/local/bin/iot-agent`. Future v0.1: this becomes a
/// `/usr/local/bin/fleet-agent`. Future v0.1: this becomes a
/// `DownloadableAsset` pointing at CI-published artifacts.
pub agent_binary_path: PathBuf,
}
impl IotDeviceSetupConfig {
/// Render the agent's `/etc/iot-agent/config.toml` content.
impl FleetDeviceSetupConfig {
/// Render the agent's `/etc/fleet-agent/config.toml` content.
pub fn render_toml(&self) -> String {
// Raw-string template with format! — the TOML escape rules for
// double-quoted strings are just `\` and `"`, handled by
// [`toml_escape`].
let device_id = toml_escape(&self.device_id.to_string());
let group = toml_escape(&self.group);
let nats_user = toml_escape(&self.nats_user);
let nats_pass = toml_escape(&self.nats_pass);
let urls = self
@@ -70,10 +74,18 @@ impl IotDeviceSetupConfig {
.map(|u| format!("\"{}\"", toml_escape(u)))
.collect::<Vec<_>>()
.join(", ");
// BTreeMap iteration is ordered — same labels render to
// byte-identical TOML across runs, which is what the
// Score's byte-compare idempotency relies on.
let labels = self
.labels
.iter()
.map(|(k, v)| format!("{} = \"{}\"", toml_escape(k), toml_escape(v)))
.collect::<Vec<_>>()
.join("\n");
format!(
r#"[agent]
device_id = "{device_id}"
group = "{group}"
[credentials]
type = "toml-shared"
@@ -82,6 +94,9 @@ nats_pass = "{nats_pass}"
[nats]
urls = [{urls}]
[labels]
{labels}
"#
)
}
@@ -95,10 +110,10 @@ Wants=network-online.target
[Service]
Type=simple
User=iot-agent
Environment=IOT_AGENT_CONFIG=/etc/iot-agent/config.toml
User=fleet-agent
Environment=FLEET_AGENT_CONFIG=/etc/fleet-agent/config.toml
Environment=RUST_LOG=info
ExecStart=/usr/local/bin/iot-agent
ExecStart=/usr/local/bin/fleet-agent
Restart=on-failure
RestartSec=5
StandardOutput=journal
@@ -115,23 +130,23 @@ fn toml_escape(s: &str) -> String {
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IotDeviceSetupScore {
pub config: IotDeviceSetupConfig,
pub struct FleetDeviceSetupScore {
pub config: FleetDeviceSetupConfig,
}
impl IotDeviceSetupScore {
pub fn new(config: IotDeviceSetupConfig) -> Self {
impl FleetDeviceSetupScore {
pub fn new(config: FleetDeviceSetupConfig) -> Self {
Self { config }
}
}
impl<T: Topology + LinuxHostConfiguration> Score<T> for IotDeviceSetupScore {
impl<T: Topology + LinuxHostConfiguration> Score<T> for FleetDeviceSetupScore {
fn name(&self) -> String {
format!("IotDeviceSetupScore({})", self.config.device_id)
format!("FleetDeviceSetupScore({})", self.config.device_id)
}
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(IotDeviceSetupInterpret {
Box::new(FleetDeviceSetupInterpret {
config: self.config.clone(),
version: Version::from("0.1.0").expect("static version"),
status: InterpretStatus::QUEUED,
@@ -140,16 +155,16 @@ impl<T: Topology + LinuxHostConfiguration> Score<T> for IotDeviceSetupScore {
}
#[derive(Debug)]
struct IotDeviceSetupInterpret {
config: IotDeviceSetupConfig,
struct FleetDeviceSetupInterpret {
config: FleetDeviceSetupConfig,
version: Version,
status: InterpretStatus,
}
#[async_trait]
impl<T: Topology + LinuxHostConfiguration> Interpret<T> for IotDeviceSetupInterpret {
impl<T: Topology + LinuxHostConfiguration> Interpret<T> for FleetDeviceSetupInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::IotDeviceSetup
InterpretName::FleetDeviceSetup
}
fn get_version(&self) -> Version {
self.version.clone()
@@ -179,33 +194,38 @@ impl<T: Topology + LinuxHostConfiguration> Interpret<T> for IotDeviceSetupInterp
log_change(&mut change_log, format!("package:{pkg}"), r);
}
// 2. iot-agent system user. Lingered so its user-systemd survives
// logout (needed for the user podman.socket we'll enable below).
// No explicit primary group — useradd on Debian-family systems
// defaults to `USERGROUPS_ENAB yes` which auto-creates a group
// matching the username. Setting `group:` here would require a
// separate `ensure_group` step to pre-create it.
// 2. fleet-agent user. Not `--system`: Ubuntu's useradd skips
// subuid/subgid auto-allocation for system users on the
// assumption that service accounts don't run user namespaces.
// Rootless podman needs those ranges in /etc/subuid +
// /etc/subgid before the container runtime ever starts. A
// regular useradd auto-allocates a non-overlapping range, so
// we get correct behavior for free and can coexist with any
// other user on the host that also runs rootless containers.
//
// Lingered so the user-systemd instance survives logout —
// required for the user podman.socket we enable below.
let user_spec = UserSpec {
name: "iot-agent".to_string(),
name: "fleet-agent".to_string(),
group: None,
supplementary_groups: vec![],
shell: Some("/bin/bash".to_string()),
system: true,
system: false,
create_home: true,
};
let r = UnixUserManager::ensure_user(topology, &user_spec)
.await
.map_err(wrap)?;
log_change(&mut change_log, "user:iot-agent", r);
log_change(&mut change_log, "user:fleet-agent", r);
let r = UnixUserManager::ensure_linger(topology, "iot-agent")
let r = UnixUserManager::ensure_linger(topology, "fleet-agent")
.await
.map_err(wrap)?;
log_change(&mut change_log, "linger:iot-agent", r);
log_change(&mut change_log, "linger:fleet-agent", r);
// 3. User-scoped podman socket. Required by `PodmanTopology` on
// the agent so it reaches /run/user/<uid>/podman/podman.sock.
let r = SystemdManager::ensure_user_unit_active(topology, "iot-agent", "podman.socket")
let r = SystemdManager::ensure_user_unit_active(topology, "fleet-agent", "podman.socket")
.await
.map_err(wrap)?;
log_change(&mut change_log, "user-unit:podman.socket", r);
@@ -218,7 +238,7 @@ impl<T: Topology + LinuxHostConfiguration> Interpret<T> for IotDeviceSetupInterp
let binary_r = FileDelivery::ensure_file(
topology,
&FileSpec {
path: "/usr/local/bin/iot-agent".to_string(),
path: "/usr/local/bin/fleet-agent".to_string(),
source: FileSource::LocalPath(cfg.agent_binary_path.clone()),
owner: Some("root".to_string()),
group: Some("root".to_string()),
@@ -227,25 +247,25 @@ impl<T: Topology + LinuxHostConfiguration> Interpret<T> for IotDeviceSetupInterp
)
.await
.map_err(wrap)?;
log_change(&mut change_log, "file:/usr/local/bin/iot-agent", binary_r);
log_change(&mut change_log, "file:/usr/local/bin/fleet-agent", binary_r);
// 5. /etc/iot-agent/ + config.toml
// 5. /etc/fleet-agent/ + config.toml
let config_toml = cfg.render_toml();
let toml_spec = FileSpec {
path: "/etc/iot-agent/config.toml".to_string(),
path: "/etc/fleet-agent/config.toml".to_string(),
source: FileSource::Content(config_toml),
owner: Some("iot-agent".to_string()),
group: Some("iot-agent".to_string()),
owner: Some("fleet-agent".to_string()),
group: Some("fleet-agent".to_string()),
mode: Some(0o600),
};
let toml_r = FileDelivery::ensure_file(topology, &toml_spec)
.await
.map_err(wrap)?;
log_change(&mut change_log, "file:/etc/iot-agent/config.toml", toml_r);
log_change(&mut change_log, "file:/etc/fleet-agent/config.toml", toml_r);
// 6. systemd unit for the agent itself.
let unit = SystemdUnitSpec {
name: "iot-agent".to_string(),
name: "fleet-agent".to_string(),
unit_content: cfg.render_systemd_unit().to_string(),
scope: SystemdScope::System,
start_immediately: true,
@@ -253,18 +273,18 @@ impl<T: Topology + LinuxHostConfiguration> Interpret<T> for IotDeviceSetupInterp
let unit_r = SystemdManager::ensure_systemd_unit(topology, &unit)
.await
.map_err(wrap)?;
log_change(&mut change_log, "unit:iot-agent", unit_r);
log_change(&mut change_log, "unit:fleet-agent", unit_r);
// 7. Restart the agent iff anything that affects it changed.
let needs_restart = toml_r.changed || unit_r.changed || binary_r.changed;
if needs_restart {
SystemdManager::restart_service(topology, "iot-agent", SystemdScope::System)
SystemdManager::restart_service(topology, "fleet-agent", SystemdScope::System)
.await
.map_err(wrap)?;
change_log.push("restart:iot-agent".to_string());
info!("iot-agent restarted to pick up config/unit change");
change_log.push("restart:fleet-agent".to_string());
info!("fleet-agent restarted to pick up config/unit change");
} else {
debug!("iot-agent config + unit unchanged; no restart");
debug!("fleet-agent config + unit unchanged; no restart");
}
let outcome = if change_log.is_empty() {
@@ -292,3 +312,55 @@ fn log_change(change_log: &mut Vec<String>, what: impl Into<String>, r: ChangeRe
change_log.push(what.into());
}
}
#[cfg(test)]
mod tests {
use super::*;
fn base_config(labels: BTreeMap<String, String>) -> FleetDeviceSetupConfig {
FleetDeviceSetupConfig {
device_id: Id::from("pi-42".to_string()),
labels,
nats_urls: vec!["nats://nats:4222".to_string()],
nats_user: "admin".to_string(),
nats_pass: "pw".to_string(),
agent_binary_path: PathBuf::from("/dev/null"),
}
}
#[test]
fn render_toml_includes_labels_section() {
let mut labels = BTreeMap::new();
labels.insert("group".to_string(), "site-a".to_string());
labels.insert("arch".to_string(), "aarch64".to_string());
let toml = base_config(labels).render_toml();
assert!(toml.contains("[labels]"));
// BTreeMap sorts keys: `arch` before `group`.
let labels_block = toml.split("[labels]").nth(1).unwrap();
let arch_idx = labels_block.find("arch").unwrap();
let group_idx = labels_block.find("group").unwrap();
assert!(arch_idx < group_idx, "labels must render sorted");
assert!(labels_block.contains(r#"arch = "aarch64""#));
assert!(labels_block.contains(r#"group = "site-a""#));
}
#[test]
fn render_toml_same_labels_yields_identical_output() {
// Core idempotency invariant: two structurally-identical
// configs render byte-identical TOML. The Score's change
// detection relies on this.
let mut labels = BTreeMap::new();
labels.insert("group".to_string(), "site-a".to_string());
let a = base_config(labels.clone()).render_toml();
let b = base_config(labels).render_toml();
assert_eq!(a, b);
}
#[test]
fn render_toml_escapes_label_values() {
let mut labels = BTreeMap::new();
labels.insert("group".to_string(), r#"has"quote"#.to_string());
let toml = base_config(labels).render_toml();
assert!(toml.contains(r#"group = "has\"quote""#));
}
}

View File

@@ -1,33 +0,0 @@
//! IoT fleet primitives exposed to customers.
//!
//! Right now that's the single [`IotDeviceSetupScore`] — a customer runs
//! it against a freshly-booted device (Pi or VM) to install podman,
//! place the iot-agent binary, drop the TOML config, and bring up the
//! agent under systemd. Re-running with a different config (e.g.
//! different `group`) is what moves a device between fleet partitions.
//!
//! The operator + agent crates live outside of `harmony/` in `iot/`.
//! This module is where *Harmony Scores* that target IoT fleets live —
//! they run inside the Harmony framework proper, driven by the same
//! `harmony_cli::run` story every other Score uses.
pub mod assets;
#[cfg(feature = "kvm")]
pub mod libvirt_pool;
pub mod preflight;
mod setup_score;
#[cfg(feature = "kvm")]
mod vm_score;
pub use assets::{
IotSshKeypair, UBUNTU_2404_CLOUDIMG_ARM64_FILENAME, UBUNTU_2404_CLOUDIMG_ARM64_SHA256,
UBUNTU_2404_CLOUDIMG_ARM64_URL, UBUNTU_2404_CLOUDIMG_FILENAME, UBUNTU_2404_CLOUDIMG_SHA256,
UBUNTU_2404_CLOUDIMG_URL, ensure_iot_ssh_keypair, ensure_ubuntu_2404_cloud_image,
ensure_ubuntu_2404_cloud_image_for_arch, read_public_key,
};
#[cfg(feature = "kvm")]
pub use libvirt_pool::{HARMONY_IOT_POOL_NAME, HarmonyIotPool, ensure_harmony_iot_pool};
pub use preflight::{check_iot_smoke_preflight, check_iot_smoke_preflight_for_arch};
pub use setup_score::{IotDeviceSetupConfig, IotDeviceSetupScore};
#[cfg(feature = "kvm")]
pub use vm_score::ProvisionVmScore;

View File

@@ -0,0 +1,98 @@
//! Minimal Kubernetes topology for ad-hoc Score execution.
//!
//! Harmony's opinionated topologies (`K8sAnywhereTopology`,
//! `HAClusterTopology`) do a lot of product-level setup inside
//! `ensure_ready` — cert-manager install, tenant-manager bootstrap,
//! helm probes, TLS routing. That's appropriate when a caller is
//! standing up an entire NationTech-style product stack. It is
//! **not** appropriate when a caller just wants to apply a typed
//! resource (a CRD, a Deployment, a Secret, …) against an existing
//! Kubernetes cluster.
//!
//! `K8sBareTopology` is what that narrow use case needs: it carries
//! a single [`K8sClient`], implements [`K8sclient`] by handing it
//! out, and its `ensure_ready` is a noop. No helm, no certs, no
//! tenant-manager, no PLEG. Compose it with whichever
//! `K8sResourceScore<K>` / domain score needs a cluster client and
//! nothing more.
//!
//! History: this type is the promotion of a three-dozen-line
//! `InstallTopology` that lived inside `harmony-fleet-operator`'s
//! `install.rs`. When the NATS single-node install work added a
//! second consumer wanting the same shape, the extraction became
//! obvious (see ROADMAP/12-code-review-april-2026.md §12.6).
use std::process::Command;
use std::sync::Arc;
use async_trait::async_trait;
use harmony_k8s::K8sClient;
use crate::domain::topology::{HelmCommand, PreparationError, PreparationOutcome, Topology};
use crate::topology::K8sclient;
/// Minimal `Topology` that only knows how to hand out a pre-built
/// `K8sClient`. Use for Scores that need `K8sclient` but nothing
/// else from their topology.
///
/// Construct via [`K8sBareTopology::from_kubeconfig`] or
/// [`K8sBareTopology::from_client`].
#[derive(Clone)]
pub struct K8sBareTopology {
name: String,
client: Arc<K8sClient>,
}
impl K8sBareTopology {
/// Wrap a pre-built `K8sClient`. Caller is responsible for
/// having loaded it from the right place (KUBECONFIG, explicit
/// path, in-cluster service account, …).
pub fn from_client(name: impl Into<String>, client: Arc<K8sClient>) -> Self {
Self {
name: name.into(),
client,
}
}
/// Build a client from the standard kube client config
/// resolution (`KUBECONFIG` env var → `~/.kube/config` →
/// in-cluster service account, in that order).
pub async fn from_kubeconfig(name: impl Into<String>) -> Result<Self, String> {
let kube_client = kube::Client::try_default()
.await
.map_err(|e| format!("building kube client: {e}"))?;
Ok(Self::from_client(
name,
Arc::new(K8sClient::new(kube_client)),
))
}
}
#[async_trait]
impl Topology for K8sBareTopology {
fn name(&self) -> &str {
&self.name
}
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
Ok(PreparationOutcome::Noop)
}
}
#[async_trait]
impl K8sclient for K8sBareTopology {
async fn k8s_client(&self) -> Result<Arc<K8sClient>, String> {
Ok(self.client.clone())
}
}
/// Run the host's `helm` binary with whatever KUBECONFIG resolution
/// was used to build the `K8sBareTopology`. No extra context / ns
/// args — callers pass those on the command line. Lets NATS +
/// operator-install flows go through `HelmChartScore` against the
/// same cluster the bare topology already targets.
impl HelmCommand for K8sBareTopology {
fn get_helm_command(&self) -> Command {
Command::new("helm")
}
}

View File

@@ -1,7 +1,10 @@
pub mod apps;
pub mod bare_topology;
pub mod coredns;
pub mod deployment;
mod failover;
pub mod ingress;
pub mod namespace;
pub mod resource;
pub use bare_topology::K8sBareTopology;

View File

@@ -48,6 +48,13 @@ pub struct CloudInitSeedConfig<'a> {
pub authorized_key: &'a str,
/// Local username to create with passwordless sudo.
pub user: &'a str,
/// Optional plaintext password for the admin user. `None` keeps
/// the account SSH-key-only (the default). Setting a password
/// unlocks the account *and* enables `ssh_pwauth: true` on the
/// guest — intended for interactive debugging / chaos-testing
/// workflows where the operator wants console or SSH password
/// access to break things on purpose.
pub admin_password: Option<&'a str>,
/// Extra `runcmd` lines to append to the user-data. Mostly useful
/// for no-op debugging; keep empty in production paths.
pub extra_runcmd: Vec<String>,
@@ -144,6 +151,21 @@ fn render_user_data(cfg: &CloudInitSeedConfig<'_>) -> String {
}
s
};
// Password handling is split into user-level (lock_passwd +
// plain_text_passwd) and daemon-level (ssh_pwauth). When a
// password is provided, cloud-init hashes + sets the password and
// we allow SSH password auth. When it isn't, the account stays
// locked and sshd denies password logins — the production default.
let (lock_passwd, plain_text_passwd_line, ssh_pwauth) = match cfg.admin_password {
Some(pw) => (
"false",
format!(" plain_text_passwd: \"{}\"\n", yaml_escape(pw)),
"true",
),
None => ("true", String::new(), "false"),
};
format!(
r#"#cloud-config
hostname: {hostname}
@@ -153,10 +175,10 @@ users:
- name: {user}
sudo: ALL=(ALL) NOPASSWD:ALL
shell: /bin/bash
lock_passwd: true
ssh_authorized_keys:
lock_passwd: {lock_passwd}
{plain_text_passwd_line} ssh_authorized_keys:
- {authorized_key}
ssh_pwauth: false
ssh_pwauth: {ssh_pwauth}
disable_root: true
{runcmd}"#,
hostname = cfg.hostname,
@@ -165,6 +187,11 @@ disable_root: true
)
}
fn yaml_escape(s: &str) -> String {
// Double-quoted YAML: backslash and double-quote need escaping.
s.replace('\\', "\\\\").replace('"', "\\\"")
}
async fn write_file(path: &Path, content: &str) -> Result<(), KvmError> {
let mut f = tokio::fs::File::create(path).await.map_err(KvmError::Io)?;
f.write_all(content.as_bytes())
@@ -188,3 +215,60 @@ async fn which_xorriso() -> Option<PathBuf> {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn no_password_locks_account_and_disables_ssh_pwauth() {
let cfg = CloudInitSeedConfig {
hostname: "pi-01",
authorized_key: "ssh-ed25519 AAAA test",
user: "fleet-admin",
admin_password: None,
extra_runcmd: vec![],
};
let out = render_user_data(&cfg);
assert!(out.contains("lock_passwd: true"), "got:\n{out}");
assert!(out.contains("ssh_pwauth: false"), "got:\n{out}");
assert!(
!out.contains("plain_text_passwd"),
"password leaked into cloud-init without admin_password set:\n{out}"
);
}
#[test]
fn with_password_unlocks_account_and_enables_ssh_pwauth() {
let cfg = CloudInitSeedConfig {
hostname: "pi-01",
authorized_key: "ssh-ed25519 AAAA test",
user: "fleet-admin",
admin_password: Some("break-things-123"),
extra_runcmd: vec![],
};
let out = render_user_data(&cfg);
assert!(out.contains("lock_passwd: false"), "got:\n{out}");
assert!(out.contains("ssh_pwauth: true"), "got:\n{out}");
assert!(
out.contains("plain_text_passwd: \"break-things-123\""),
"password not inlined in cloud-init:\n{out}"
);
}
#[test]
fn password_with_quotes_is_yaml_escaped() {
let cfg = CloudInitSeedConfig {
hostname: "pi-01",
authorized_key: "ssh-ed25519 AAAA",
user: "fleet-admin",
admin_password: Some("he said \"hi\""),
extra_runcmd: vec![],
};
let out = render_user_data(&cfg);
assert!(
out.contains(r#"plain_text_passwd: "he said \"hi\"""#),
"got:\n{out}"
);
}
}

View File

@@ -35,7 +35,7 @@ pub const DEFAULT_ADMIN_USER: &str = "harmony-admin";
///
/// Composes with a caller-chosen storage pool directory where per-VM
/// overlays + seed ISOs are placed. Harmony's IoT workflows use
/// [`crate::modules::iot::ensure_harmony_iot_pool`] to populate that
/// [`crate::modules::fleet::ensure_harmony_fleet_pool`] to populate that
/// dir; other callers can point at any user-owned libvirt pool root.
pub struct KvmVirtualMachineHost {
name: String,
@@ -120,7 +120,7 @@ impl VirtualMachineHost for KvmVirtualMachineHost {
.await
.map_err(|e| exec(format!("remove stale overlay: {e}")))?;
}
create_overlay(&self.base_image_path, &overlay_path).await?;
create_overlay(&self.base_image_path, &overlay_path, spec.disk_size_gb).await?;
info!(
"created overlay disk {overlay_path:?} backed by {:?}",
self.base_image_path
@@ -297,21 +297,36 @@ async fn ensure_vm_firmware(
async fn create_overlay(
base: &std::path::Path,
overlay: &std::path::Path,
size_gb: Option<u32>,
) -> Result<(), ExecutorError> {
let base_str = base
.to_str()
.ok_or_else(|| exec("base image path is not valid UTF-8"))?;
let overlay_str = overlay
.to_str()
.ok_or_else(|| exec("overlay path is not valid UTF-8"))?;
// qemu-img takes an optional trailing SIZE. Without it, the
// overlay inherits the backing image's virtual size (2-3 GiB
// for the stock Ubuntu cloud image) which is tight as soon as
// a couple of container images land. Ubuntu cloud-init ships
// `cloud-initramfs-growroot`, so a larger virtual size is
// resized on first boot without extra glue.
let size_arg = size_gb.filter(|g| *g > 0).map(|g| format!("{g}G"));
let mut args: Vec<&str> = vec![
"create",
"-f",
"qcow2",
"-F",
"qcow2",
"-b",
base_str,
overlay_str,
];
if let Some(s) = size_arg.as_deref() {
args.push(s);
}
let output = Command::new("qemu-img")
.args([
"create",
"-f",
"qcow2",
"-F",
"qcow2",
"-b",
base.to_str()
.ok_or_else(|| exec("base image path is not valid UTF-8"))?,
overlay
.to_str()
.ok_or_else(|| exec("overlay path is not valid UTF-8"))?,
])
.args(&args)
.stdout(Stdio::null())
.stderr(Stdio::piped())
.output()
@@ -349,6 +364,7 @@ async fn build_cloud_init_seed(
hostname: &hostname,
authorized_key: &authorized_key,
user: &admin_user,
admin_password: first_boot.admin_password.as_deref(),
extra_runcmd: vec![],
},
pool_dir,

View File

@@ -57,7 +57,7 @@ impl AnsibleHostConfigurator {
// encapsulation we want. Callers say "install podman"; we
// pick apt/dnf/pacman/apk. Debian-family is the only dispatch
// currently wired because it's our first concrete target (IoT
// runs on Raspbian/Ubuntu per ROADMAP/iot_platform/
// runs on Raspbian/Ubuntu per ROADMAP/fleet_platform/
// v0_walking_skeleton.md §5.3). Extending to RHEL/Fedora/
// Alpine is a matter of detecting the family here and picking
// `ansible.builtin.dnf` / `community.general.pacman` /
@@ -112,7 +112,7 @@ impl AnsibleHostConfigurator {
spec: &FileSpec,
) -> Result<ChangeReport, ExecutorError> {
// Ansible's `copy` module doesn't auto-create parent dirs, so
// writes into fresh paths like `/etc/iot-agent/config.toml`
// writes into fresh paths like `/etc/fleet-agent/config.toml`
// fail with "Destination directory … does not exist". Create
// the parent first via the `file` module; state=directory is
// idempotent so this is a cheap noop on re-run.

View File

@@ -5,10 +5,10 @@ pub mod cert_manager;
pub mod dhcp;
pub mod dns;
pub mod dummy;
pub mod fleet;
pub mod helm;
pub mod http;
pub mod inventory;
pub mod iot;
pub mod k3d;
pub mod k8s;
#[cfg(feature = "kvm")]

View File

@@ -0,0 +1,185 @@
//! Shared helm-chart primitive for every NATS deployment shape.
//!
//! The upstream `nats/nats` helm chart is the single source of truth
//! for how a NATS pod / STS is actually built: probes, resource
//! shapes, RBAC, stateful-set options, JetStream storage volumes,
//! clustering, TLS, gateways, leaf nodes. Every high-level NATS
//! Score — `NatsBasicScore` for single-node, `NatsK8sScore` for
//! supercluster — delegates here. Differences between shapes are
//! expressed as `values_yaml`, not as parallel resource constructors.
//!
//! Why this is the right primitive:
//!
//! - The NATS project's chart tracks upstream server features
//! automatically; we get new knobs (`websocket.enabled`,
//! `gateway.merge.advertise`, …) without shipping code.
//! - One helm release per NATS deployment means `helm upgrade` /
//! `helm uninstall` / `helm list` all work naturally.
//! - Chapter 4 of the harmony review learned this the hard way: a
//! parallel k8s_openapi-based NATS primitive diverged on probe
//! shape + pod-anti-affinity and was deleted.
use std::str::FromStr;
use async_trait::async_trait;
use harmony_macros::hurl;
use harmony_types::id::Id;
use non_blank_string_rs::NonBlankString;
use serde::Serialize;
use crate::data::Version;
use crate::interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome};
use crate::inventory::Inventory;
use crate::modules::helm::chart::{HelmChartScore, HelmRepository};
use crate::score::Score;
use crate::topology::{HelmCommand, Topology};
/// The NATS-IO project's published helm chart. `hurl!` needs a
/// literal so the URL is inlined at the one call site below rather
/// than being a `const &str`.
const CHART_NAME: &str = "nats/nats";
const REPO_NAME: &str = "nats";
/// Thin preset over [`HelmChartScore`] that pins the NATS chart +
/// repository and leaves `values_yaml` as the one parameter.
///
/// Callers should almost never construct this directly — build a
/// high-level preset (`NatsBasicScore`, `NatsK8sScore`) instead.
/// The type is `pub` so those presets across different files can
/// share a single definition.
#[derive(Debug, Clone, Serialize)]
pub struct NatsHelmChartScore {
pub namespace: NonBlankString,
pub release_name: NonBlankString,
/// Helm values YAML specific to this shape. Build with the
/// preset's dedicated rendering function; `values_overrides`
/// style is intentionally not exposed — values_yaml is readable
/// + diffable, overrides are not.
pub values_yaml: String,
/// Whether helm should create the target namespace if missing.
pub create_namespace: bool,
/// `true` = `helm install` (fail on re-apply), `false` =
/// `helm upgrade --install` (idempotent). Presets default to
/// upgrade-install so re-running a Score is safe.
pub install_only: bool,
}
impl NatsHelmChartScore {
/// Build a score targeting the upstream NATS chart at the given
/// release name + namespace with the caller's values yaml.
pub fn new(
release_name: impl Into<String>,
namespace: impl Into<String>,
values_yaml: String,
) -> Self {
Self {
release_name: NonBlankString::from_str(&release_name.into())
.expect("non-blank release_name"),
namespace: NonBlankString::from_str(&namespace.into()).expect("non-blank namespace"),
values_yaml,
create_namespace: true,
install_only: false,
}
}
/// Convert into the underlying [`HelmChartScore`]. Exists for the
/// rare callers that need to hand the result to a non-NATS
/// pipeline (e.g. `ArgoCD`-backed deploy wrappers); presets
/// normally just use the `Score` impl.
pub fn into_helm_chart_score(self) -> HelmChartScore {
HelmChartScore {
namespace: Some(self.namespace),
release_name: self.release_name,
chart_name: NonBlankString::from_str(CHART_NAME).expect("chart name const is valid"),
chart_version: None,
values_overrides: None,
values_yaml: Some(self.values_yaml),
create_namespace: self.create_namespace,
install_only: self.install_only,
repository: Some(HelmRepository::new(
REPO_NAME.to_string(),
hurl!("https://nats-io.github.io/k8s/helm/charts/"),
true,
)),
}
}
}
impl<T: Topology + HelmCommand> Score<T> for NatsHelmChartScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(NatsHelmChartInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {
format!("NatsHelmChartScore({})", self.release_name)
}
}
#[derive(Debug)]
pub struct NatsHelmChartInterpret {
score: NatsHelmChartScore,
}
#[async_trait]
impl<T: Topology + HelmCommand> Interpret<T> for NatsHelmChartInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
self.score
.clone()
.into_helm_chart_score()
.create_interpret()
.execute(inventory, topology)
.await
}
fn get_name(&self) -> InterpretName {
InterpretName::HelmChart
}
fn get_version(&self) -> Version {
Version::from("0.1.0").expect("static version literal")
}
fn get_status(&self) -> InterpretStatus {
InterpretStatus::QUEUED
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn into_helm_chart_score_pins_chart_and_repo() {
let s = NatsHelmChartScore::new(
"fleet-nats",
"fleet-system",
"replicaCount: 1\n".to_string(),
);
let hc = s.into_helm_chart_score();
assert_eq!(hc.chart_name.to_string(), CHART_NAME);
let repo = hc.repository.expect("repo must be pinned");
// We're not inspecting the fields further — HelmRepository's
// fields are private — but pinning `repository = Some(..)`
// at all is what matters: without it `helm install` would
// try the release-name as a local path.
let _ = repo;
assert_eq!(hc.values_yaml.as_deref(), Some("replicaCount: 1\n"));
}
#[test]
fn defaults_are_upgrade_install_with_namespace_creation() {
let s = NatsHelmChartScore::new("n", "ns", "".to_string());
assert!(s.create_namespace, "presets expect namespace creation");
assert!(!s.install_only, "presets expect upgrade-install semantics");
}
}

View File

@@ -1,5 +1,10 @@
pub mod capability;
pub mod decentralized;
pub mod helm_chart;
pub mod pki;
pub mod score_nats_basic;
pub mod score_nats_k8s;
pub mod score_nats_supercluster;
pub use helm_chart::NatsHelmChartScore;
pub use score_nats_basic::{NatsBasicScore, NatsServiceType};

View File

@@ -0,0 +1,307 @@
//! Single-node NATS — high-level preset over [`NatsHelmChartScore`].
//!

The feature is correct but the implementation of the nats node primitive is wrong, see previous comment.

The feature is correct but the implementation of the nats node primitive is wrong, see previous comment.
//! The shape this Score covers: one NATS server pod in a cluster,
//! JetStream on by default, exposed via ClusterIP / NodePort /
//! LoadBalancer. No TLS, no clustering, no auth. For any of those,
//! graduate to `NatsK8sScore` (supercluster + TLS + gateways).
//!
//! Everything concrete — probes, resource limits, statefulset
//! options — comes from the upstream `nats/nats` helm chart.
//! This Score just picks the chart values that select a minimal
//! single-node install.
//!
//! Typical usage:
//!
//! ```ignore
//! use harmony::modules::k8s::K8sBareTopology;
//! use harmony::modules::nats::NatsBasicScore;
//! use harmony::score::Score;
//! use harmony::inventory::Inventory;
//!
//! let topology = K8sBareTopology::from_kubeconfig("nats-install").await?;
//! let score = NatsBasicScore::new("fleet-nats", "fleet-system").load_balancer();
//! score.create_interpret().execute(&Inventory::empty(), &topology).await?;
//! ```
use async_trait::async_trait;
use harmony_types::id::Id;
use serde::Serialize;
use crate::data::Version;
use crate::interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome};
use crate::inventory::Inventory;
use crate::modules::nats::helm_chart::NatsHelmChartScore;
use crate::score::Score;
use crate::topology::{HelmCommand, Topology};
/// How the NATS client port is exposed.
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
pub enum NatsServiceType {
/// In-cluster only. Caller reaches NATS via
/// `<release>.<namespace>.svc.cluster.local:4222`.
ClusterIp,
/// NodePort on the given host port — must fall in the cluster's
/// configured service-node-port range (default 30000-32767).
NodePort(i32),
/// LoadBalancer service. On k3d this uses the built-in
/// `klipper-lb`, which pairs naturally with
/// `k3d cluster create -p PORT:PORT@loadbalancer`.
LoadBalancer,
}
/// Declarative single-node NATS. Construct via [`new`], tune with
/// the builder-style methods, hand to a topology that implements
/// [`HelmCommand`].
#[derive(Debug, Clone, Serialize)]
pub struct NatsBasicScore {
release_name: String,
namespace: String,
jetstream: bool,
service_type: NatsServiceType,
/// Optional image override (`repository:tag` or full ref).
/// `None` = use the chart's default image.
image: Option<String>,
}
impl NatsBasicScore {
/// Build a single-node NATS score with JetStream on and
/// ClusterIP exposure. Use the builder methods to change the
/// exposure or image.
pub fn new(release_name: impl Into<String>, namespace: impl Into<String>) -> Self {
Self {
release_name: release_name.into(),
namespace: namespace.into(),
jetstream: true,
service_type: NatsServiceType::ClusterIp,
image: None,
}
}
pub fn jetstream(mut self, enabled: bool) -> Self {
self.jetstream = enabled;
self
}
pub fn node_port(mut self, port: i32) -> Self {
self.service_type = NatsServiceType::NodePort(port);
self
}
pub fn load_balancer(mut self) -> Self {
self.service_type = NatsServiceType::LoadBalancer;
self
}
pub fn image(mut self, image: impl Into<String>) -> Self {
self.image = Some(image.into());
self
}
/// Render the chart values for this preset. Public so tests +
/// downstream tools (e.g. `helm template` diffs) can inspect
/// exactly what the Score will install.
pub fn render_values(&self) -> String {
let mut y = String::new();
y.push_str(&format!("fullnameOverride: {}\n", self.release_name));
y.push_str("replicaCount: 1\n");
y.push_str("config:\n");
y.push_str(" cluster:\n");
y.push_str(" enabled: false\n");
y.push_str(" jetstream:\n");
y.push_str(&format!(" enabled: {}\n", self.jetstream));
if self.jetstream {
y.push_str(" fileStorage:\n");
y.push_str(" enabled: true\n");
y.push_str(" size: 10Gi\n");
}
match self.service_type {
NatsServiceType::ClusterIp => {
// Chart default. No overrides needed.
}
NatsServiceType::NodePort(port) => {
y.push_str("service:\n");
y.push_str(" merge:\n");
y.push_str(" spec:\n");
y.push_str(" type: NodePort\n");
y.push_str(" ports:\n");
y.push_str(" nats:\n");
y.push_str(" merge:\n");
y.push_str(&format!(" nodePort: {port}\n"));
}
NatsServiceType::LoadBalancer => {
y.push_str("service:\n");
y.push_str(" merge:\n");
y.push_str(" spec:\n");
y.push_str(" type: LoadBalancer\n");
}
}
if let Some(img) = &self.image {
let (repo, tag) = split_image_ref(img);
y.push_str("container:\n");
y.push_str(" image:\n");
y.push_str(&format!(" repository: {repo}\n"));
if let Some(tag) = tag {
y.push_str(&format!(" tag: {tag}\n"));
}
}
y
}
/// Name accessors — used by downstream presets + tests that
/// need to reference what this Score will name its resources.
pub fn release_name(&self) -> &str {
&self.release_name
}
pub fn namespace(&self) -> &str {
&self.namespace
}
}
fn split_image_ref(image: &str) -> (String, Option<String>) {
// Split on the *last* colon that isn't part of a registry port
// (`registry.io:5000/foo:v1`). Good enough for the shapes we
// see in practice (`nats:2.10-alpine`, `ghcr.io/nats-io/nats:v2`).
match image.rsplit_once(':') {
Some((r, t)) if !t.contains('/') => (r.to_string(), Some(t.to_string())),
_ => (image.to_string(), None),
}
}
impl<T: Topology + HelmCommand> Score<T> for NatsBasicScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(NatsBasicInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {
"NatsBasicScore".to_string()
}
}
#[derive(Debug)]
pub struct NatsBasicInterpret {
score: NatsBasicScore,
}
#[async_trait]
impl<T: Topology + HelmCommand> Interpret<T> for NatsBasicInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let values_yaml = self.score.render_values();
NatsHelmChartScore::new(&self.score.release_name, &self.score.namespace, values_yaml)
.create_interpret()
.execute(inventory, topology)
.await
}
fn get_name(&self) -> InterpretName {
InterpretName::Custom("NatsBasicInterpret")
}
fn get_version(&self) -> Version {
Version::from("0.1.0").expect("static version literal")
}
fn get_status(&self) -> InterpretStatus {
InterpretStatus::QUEUED
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn defaults_are_clusterip_jetstream_on() {
let s = NatsBasicScore::new("n", "ns");
assert_eq!(s.service_type, NatsServiceType::ClusterIp);
assert!(s.jetstream);
assert!(s.image.is_none());
}
#[test]
fn render_values_includes_fullname_and_replica() {
let y = NatsBasicScore::new("fleet-nats", "fleet-system").render_values();
assert!(y.contains("fullnameOverride: fleet-nats"));
assert!(y.contains("replicaCount: 1"));
// cluster.enabled stays false for a single-node shape.
assert!(y.contains("cluster:\n enabled: false"));
}
#[test]
fn render_values_enables_jetstream_with_storage_by_default() {
let y = NatsBasicScore::new("n", "ns").render_values();
assert!(y.contains("jetstream:\n enabled: true"));
assert!(y.contains("fileStorage:\n enabled: true"));
}
#[test]
fn render_values_omits_storage_when_jetstream_off() {
let y = NatsBasicScore::new("n", "ns")
.jetstream(false)
.render_values();
assert!(y.contains("jetstream:\n enabled: false"));
assert!(!y.contains("fileStorage"));
}
#[test]
fn render_values_node_port_patches_service_and_port() {
let y = NatsBasicScore::new("n", "ns")
.node_port(30222)
.render_values();
assert!(y.contains("type: NodePort"));
assert!(y.contains("nodePort: 30222"));
}
#[test]
fn render_values_load_balancer_sets_service_type() {
let y = NatsBasicScore::new("n", "ns")
.load_balancer()
.render_values();
assert!(y.contains("type: LoadBalancer"));
// LoadBalancer doesn't specify a nodePort — let kube assign.
assert!(!y.contains("nodePort:"));
}
#[test]
fn render_values_clusterip_has_no_service_block() {
let y = NatsBasicScore::new("n", "ns").render_values();
assert!(!y.contains("service:"));
}
#[test]
fn render_values_image_override_splits_repo_and_tag() {
let y = NatsBasicScore::new("n", "ns")
.image("registry.io/custom/nats:2.10-alpine")
.render_values();
assert!(y.contains("repository: registry.io/custom/nats"));
assert!(y.contains("tag: 2.10-alpine"));
}
#[test]
fn render_values_image_without_tag_omits_tag_line() {
let y = NatsBasicScore::new("n", "ns")
.image("my.internal/nats-no-tag")
.render_values();
assert!(y.contains("repository: my.internal/nats-no-tag"));
assert!(!y.contains("tag:"));
}
#[test]
fn setters_return_self_for_chaining() {
let s = NatsBasicScore::new("n", "ns")
.jetstream(true)
.load_balancer()
.image("nats:latest");
assert_eq!(s.release_name(), "n");
assert_eq!(s.namespace(), "ns");
}
}

View File

@@ -1,14 +1,12 @@
use std::{collections::BTreeMap, str::FromStr};
use std::collections::BTreeMap;
use async_trait::async_trait;
use harmony_k8s::KubernetesDistribution;
use harmony_macros::hurl;
use harmony_secret::{Secret, SecretManager};
use harmony_types::id::Id;
use k8s_openapi::{ByteString, api::core::v1::Secret as K8sSecret};
use kube::api::ObjectMeta;
use log::{debug, info};
use non_blank_string_rs::NonBlankString;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
@@ -17,9 +15,11 @@ use crate::{
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
modules::{
helm::chart::{HelmChartScore, HelmRepository},
k8s::{ingress::K8sIngressScore, resource::K8sResourceScore},
nats::capability::{Nats, NatsCluster, NatsEndpoint},
nats::{
capability::{Nats, NatsCluster, NatsEndpoint},
helm_chart::NatsHelmChartScore,
},
okd::{
crd::route::{RoutePort, RouteSpec, RouteTargetReference, TLSConfig},
route::OKDRouteScore,
@@ -325,21 +325,8 @@ natsBox:
));
debug!("Prepared Helm Chart values : \n{values_yaml:#?}");
let nats = HelmChartScore {
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
release_name: NonBlankString::from_str(&cluster.name).unwrap(),
chart_name: NonBlankString::from_str("nats/nats").unwrap(),
chart_version: None,
values_overrides: None,
values_yaml,
create_namespace: true,
install_only: false,
repository: Some(HelmRepository::new(
"nats".to_string(),
hurl!("https://nats-io.github.io/k8s/helm/charts/"),
true,
)),
};
let values_yaml = values_yaml.expect("supercluster always builds a values_yaml");
let nats = NatsHelmChartScore::new(cluster.name.clone(), namespace, values_yaml);
nats.interpret(inventory, topology).await
}
}

View File

@@ -3,5 +3,5 @@ mod score;
mod topology;
pub use interpret::PodmanV0Interpret;
pub use score::{IotScore, PodmanService, PodmanV0Score};
pub use score::{PodmanService, PodmanV0Score, ReconcileScore};
pub use topology::PodmanTopology;

View File

@@ -55,7 +55,7 @@ impl PodmanV0Score {
/// log-and-skip the unknown tag.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(tag = "type", content = "data")]
pub enum IotScore {
pub enum ReconcileScore {
PodmanV0(PodmanV0Score),
}
@@ -69,16 +69,16 @@ impl<T: Topology + ContainerRuntime> Score<T> for PodmanV0Score {
}
}
impl<T: Topology + ContainerRuntime> Score<T> for IotScore {
impl<T: Topology + ContainerRuntime> Score<T> for ReconcileScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
match self {
IotScore::PodmanV0(score) => score.create_interpret(),
ReconcileScore::PodmanV0(score) => score.create_interpret(),
}
}
fn name(&self) -> String {
match self {
IotScore::PodmanV0(_) => "PodmanV0Score".to_string(),
ReconcileScore::PodmanV0(_) => "PodmanV0Score".to_string(),
}
}
}
@@ -89,7 +89,7 @@ mod tests {
#[test]
fn podman_v0_score_serializes_with_adjacent_tag() {
let score = IotScore::PodmanV0(PodmanV0Score {
let score = ReconcileScore::PodmanV0(PodmanV0Score {
services: vec![PodmanService {
name: "web".to_string(),
image: "nginx:latest".to_string(),
@@ -103,7 +103,7 @@ mod tests {
#[test]
fn podman_v0_score_roundtrip() {
let score = IotScore::PodmanV0(PodmanV0Score {
let score = ReconcileScore::PodmanV0(PodmanV0Score {
services: vec![
PodmanService {
name: "web".to_string(),
@@ -118,7 +118,7 @@ mod tests {
],
});
let serialized = serde_json::to_string(&score).unwrap();
let deserialized: IotScore = serde_json::from_str(&serialized).unwrap();
let deserialized: ReconcileScore = serde_json::from_str(&serialized).unwrap();
assert_eq!(score, deserialized);
}

View File

@@ -62,8 +62,21 @@ impl PodmanTopology {
}
async fn ensure_image_present(&self, image: &str) -> Result<(), ExecutorError> {
let opts = PullOpts::builder().reference(image).build();
// Fast path: image already in the local store → no network
// call, no rate-limit exposure. Matches the behaviour a
// Kubernetes `imagePullPolicy: IfNotPresent` would give, and
// it's the right default for a long-lived device agent —
// every podman `pull` against a public registry is rate-
// limited traffic we only want to spend when strictly
// necessary. Upgrades (different `image` string / tag) hit
// this function with a reference that's NOT locally
// present yet and still do the pull below.
let images = self.podman.images();
if images.get(image).exists().await.map_err(to_exec_error)? {
return Ok(());
}
let opts = PullOpts::builder().reference(image).build();
let mut stream = images.pull(&opts);
while let Some(event) = stream.next().await {
let event = event.map_err(to_exec_error)?;

View File

@@ -1,145 +0,0 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Result;
use tokio::sync::Mutex;
use harmony::inventory::Inventory;
use harmony::modules::podman::{IotScore, PodmanTopology, PodmanV0Score};
use harmony::score::Score;
/// Cache key → last-seen state, populated by `apply` and consulted by the
/// 30-second periodic tick and the delete path.
struct CachedEntry {
/// Serialized score JSON. Used for string-compare idempotency per
/// ROADMAP §5.5 — cheaper and more deterministic than a hash.
serialized: String,
/// Parsed score. Cached so the periodic reconcile tick and delete
/// handlers don't have to re-parse the JSON.
score: PodmanV0Score,
}
pub struct Reconciler {
topology: Arc<PodmanTopology>,
inventory: Arc<Inventory>,
/// Keyed by NATS KV key (`<device>.<deployment>`). A single entry per
/// KV key — in v0 there is no fan-out from one key to many scores.
state: Mutex<HashMap<String, CachedEntry>>,
}
impl Reconciler {
pub fn new(topology: Arc<PodmanTopology>, inventory: Arc<Inventory>) -> Self {
Self {
topology,
inventory,
state: Mutex::new(HashMap::new()),
}
}
/// Handle a Put event (new or updated score on NATS KV). No-ops if the
/// serialized score is byte-identical to the last-seen value for this
/// key.
pub async fn apply(&self, key: &str, value: &[u8]) -> Result<()> {
let incoming = match serde_json::from_slice::<IotScore>(value) {
Ok(IotScore::PodmanV0(s)) => s,
Err(e) => {
tracing::warn!(key, error = %e, "failed to deserialize score");
return Ok(());
}
};
let serialized = String::from_utf8_lossy(value).into_owned();
{
let state = self.state.lock().await;
if let Some(existing) = state.get(key) {
if existing.serialized == serialized {
tracing::debug!(key, "score unchanged — noop");
return Ok(());
}
}
}
self.run_score(key, &incoming).await?;
let mut state = self.state.lock().await;
state.insert(
key.to_string(),
CachedEntry {
serialized,
score: incoming,
},
);
Ok(())
}
/// Handle a Delete/Purge event. Stops and removes every container
/// referenced by the last cached score for this key. Idempotent: if we
/// never saw a Put for this key (agent restart after delete), logs and
/// returns ok.
pub async fn remove(&self, key: &str) -> Result<()> {
let mut state = self.state.lock().await;
let Some(entry) = state.remove(key) else {
tracing::info!(key, "delete for unknown key — nothing to remove");
return Ok(());
};
drop(state);
use harmony::topology::ContainerRuntime;
for service in &entry.score.services {
if let Err(e) = self.topology.remove_service(&service.name).await {
tracing::warn!(
key,
service = %service.name,
error = %e,
"failed to remove container"
);
} else {
tracing::info!(key, service = %service.name, "removed container");
}
}
Ok(())
}
/// Periodic ground-truth reconcile. ROADMAP §5.6 — "polling instead of
/// event-driven PLEG. Agent polls podman every 30s as ground truth;
/// KV watch events are accelerators." Re-runs each cached score against
/// podman-api; the underlying `ensure_service_running` is idempotent
/// so a converged state produces no log noise.
pub async fn tick(&self) -> Result<()> {
let snapshot: Vec<(String, PodmanV0Score)> = {
let state = self.state.lock().await;
state
.iter()
.map(|(k, v)| (k.clone(), v.score.clone()))
.collect()
};
for (key, score) in snapshot {
if let Err(e) = self.run_score(&key, &score).await {
tracing::warn!(key, error = %e, "periodic reconcile failed");
}
}
Ok(())
}
pub async fn run_periodic(self: Arc<Self>, interval: Duration) {
let mut ticker = tokio::time::interval(interval);
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
ticker.tick().await;
if let Err(e) = self.tick().await {
tracing::warn!(error = %e, "reconcile tick error");
}
}
}
async fn run_score(&self, key: &str, score: &PodmanV0Score) -> Result<()> {
let interpret = Score::<PodmanTopology>::create_interpret(score);
let outcome = interpret
.execute(&self.inventory, &self.topology)
.await
.map_err(|e| anyhow::anyhow!("PodmanV0Score interpret failed for {key}: {e}"))?;
tracing::info!(key, outcome = ?outcome, "reconciled");
Ok(())
}
}

View File

@@ -1,137 +0,0 @@
use std::sync::Arc;
use std::time::Duration;
use async_nats::jetstream::kv::Store;
use futures_util::StreamExt;
use harmony_reconciler_contracts::desired_state_key;
use kube::api::{Patch, PatchParams};
use kube::runtime::Controller;
use kube::runtime::controller::Action;
use kube::runtime::finalizer::{Event as FinalizerEvent, finalizer};
use kube::runtime::watcher::Config as WatcherConfig;
use kube::{Api, Client, ResourceExt};
use serde_json::json;
use crate::crd::{Deployment, DeploymentStatus, ScorePayload};
const FINALIZER: &str = "iot.nationtech.io/finalizer";
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("kube api: {0}")]
Kube(#[from] kube::Error),
#[error("nats kv: {0}")]
Kv(String),
#[error("serde: {0}")]
Serde(#[from] serde_json::Error),
#[error("missing namespace on resource")]
MissingNamespace,
#[error("missing target devices")]
MissingTargets,
}
pub struct Context {
pub client: Client,
pub kv: Store,
}
pub async fn run(client: Client, kv: Store) -> anyhow::Result<()> {
let api: Api<Deployment> = Api::all(client.clone());
let ctx = Arc::new(Context { client, kv });
tracing::info!("starting Deployment controller");
Controller::new(api, WatcherConfig::default())
.run(reconcile, error_policy, ctx)
.for_each(|res| async move {
match res {
Ok((obj, _)) => tracing::debug!(?obj, "reconciled"),
Err(e) => tracing::warn!(error = %e, "reconcile error"),
}
})
.await;
Ok(())
}
async fn reconcile(obj: Arc<Deployment>, ctx: Arc<Context>) -> Result<Action, Error> {
let ns = obj.namespace().ok_or(Error::MissingNamespace)?;
let name = obj.name_any();
tracing::info!(%ns, %name, "reconcile");
let api: Api<Deployment> = Api::namespaced(ctx.client.clone(), &ns);
finalizer(&api, FINALIZER, obj, |event| async {
match event {
FinalizerEvent::Apply(d) => apply(d, &api, &ctx.kv).await,
FinalizerEvent::Cleanup(d) => cleanup(d, &ctx.kv).await,
}
})
.await
.map_err(|e| match e {
kube::runtime::finalizer::Error::ApplyFailed(e)
| kube::runtime::finalizer::Error::CleanupFailed(e) => e,
kube::runtime::finalizer::Error::AddFinalizer(e)
| kube::runtime::finalizer::Error::RemoveFinalizer(e) => Error::Kube(e),
kube::runtime::finalizer::Error::UnnamedObject => Error::Kv("unnamed object".into()),
kube::runtime::finalizer::Error::InvalidFinalizer => Error::Kv("invalid finalizer".into()),
})
}
async fn apply(obj: Arc<Deployment>, api: &Api<Deployment>, kv: &Store) -> Result<Action, Error> {
let name = obj.name_any();
if obj.spec.target_devices.is_empty() {
return Err(Error::MissingTargets);
}
let score_json = serialize_score(&obj.spec.score)?;
let already_observed = obj
.status
.as_ref()
.and_then(|s| s.observed_score_string.as_deref())
== Some(score_json.as_str());
if already_observed {
tracing::debug!(%name, "score unchanged; skipping KV write and status patch");
return Ok(Action::requeue(Duration::from_secs(300)));
}
for device_id in &obj.spec.target_devices {
let key = kv_key(device_id, &name);
kv.put(key.clone(), score_json.clone().into_bytes().into())
.await
.map_err(|e| Error::Kv(e.to_string()))?;
tracing::info!(%key, "wrote desired state");
}
let status = json!({
"status": DeploymentStatus {
observed_score_string: Some(score_json),
}
});
api.patch_status(&name, &PatchParams::default(), &Patch::Merge(&status))
.await?;
Ok(Action::requeue(Duration::from_secs(300)))
}
async fn cleanup(obj: Arc<Deployment>, kv: &Store) -> Result<Action, Error> {
let name = obj.name_any();
for device_id in &obj.spec.target_devices {
let key = kv_key(device_id, &name);
kv.delete(&key)
.await
.map_err(|e| Error::Kv(e.to_string()))?;
tracing::info!(%key, "deleted desired state");
}
Ok(Action::await_change())
}
fn serialize_score(score: &ScorePayload) -> Result<String, Error> {
Ok(serde_json::to_string(score)?)
}
fn kv_key(device_id: &str, deployment_name: &str) -> String {
desired_state_key(device_id, deployment_name)
}
fn error_policy(_obj: Arc<Deployment>, err: &Error, _ctx: Arc<Context>) -> Action {
tracing::warn!(error = %err, "requeueing after error");
Action::requeue(Duration::from_secs(30))
}

View File

@@ -1,96 +0,0 @@
//! Install the operator's CRD into a target Kubernetes cluster
//! via a harmony Score — no yaml generation, no kubectl shell-out.
//!
//! The Score side is just [`K8sResourceScore`] over
//! [`Deployment::crd()`]; what this module owns is a thin
//! [`InstallTopology`] that satisfies `K8sclient` by loading the
//! current `KUBECONFIG` directly. We don't use
//! [`K8sAnywhereTopology`] because its `ensure_ready` does a lot of
//! product-level setup (cert-manager, tenant manager, helm probes)
//! that isn't appropriate for a narrow "apply a CRD" action.
use std::sync::Arc;
use anyhow::{Context, Result};
use async_trait::async_trait;
use harmony::inventory::Inventory;
use harmony::modules::k8s::resource::K8sResourceScore;
use harmony::score::Score;
use harmony::topology::{K8sclient, PreparationOutcome, Topology};
use harmony_k8s::K8sClient;
use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition;
use kube::CustomResourceExt;
use crate::crd::Deployment;
/// Topology that only knows how to hand out a pre-built `K8sClient`.
/// Used by [`install_crds`] so the Score machinery has something
/// that satisfies `K8sclient` without dragging in the full
/// `K8sAnywhereTopology` bootstrap.
///
/// # Architectural smell — do not copy this pattern without reading the roadmap
///
/// Vendoring an ad-hoc `Topology` impl in a module that just wants to
/// apply a CRD is a symptom of a bigger problem: the existing
/// opinionated topologies (`K8sAnywhereTopology`, `HAClusterTopology`)
/// have accumulated product-level side effects in their `ensure_ready`
/// — cert-manager install, tenant manager setup, helm probes — that
/// make them unfit for narrow actions. The correct long-term fix is a
/// minimal reusable `K8sBareTopology` in harmony that carries a
/// `K8sClient` and exposes `K8sclient` with a noop `ensure_ready`, so
/// every narrow Score isn't tempted to vendor its own copy.
///
/// See `ROADMAP/12-code-review-april-2026.md` §12.6 "Topology
/// proliferation". The explicit smoke test for "that roadmap item is
/// done" is: this file can delete `InstallTopology` and replace
/// `topology` construction with a one-liner against the shared type.
struct InstallTopology {
client: Arc<K8sClient>,
}
#[async_trait]
impl Topology for InstallTopology {
fn name(&self) -> &str {
"iot-operator-install"
}
async fn ensure_ready(
&self,
) -> Result<PreparationOutcome, harmony::topology::PreparationError> {
Ok(PreparationOutcome::Noop)
}
}
#[async_trait]
impl K8sclient for InstallTopology {
async fn k8s_client(&self) -> Result<Arc<K8sClient>, String> {
Ok(self.client.clone())
}
}
/// Apply the operator's CRDs to whatever cluster `KUBECONFIG` points
/// at. Returns once the apply call completes — does **not** wait for
/// the apiserver to mark the CRD `Established`; the caller does that
/// (e.g. with `kubectl wait --for=condition=Established`) if it
/// cares.
pub async fn install_crds() -> Result<()> {
let kube_client = kube::Client::try_default()
.await
.context("building kube client from KUBECONFIG")?;
let topology = InstallTopology {
client: Arc::new(K8sClient::new(kube_client)),
};
let inventory = Inventory::empty();
let crd: CustomResourceDefinition = Deployment::crd();
let score = K8sResourceScore::<CustomResourceDefinition>::single(crd, None);
let interpret = Score::<InstallTopology>::create_interpret(&score);
let outcome = interpret
.execute(&inventory, &topology)
.await
.map_err(|e| anyhow::anyhow!("install CRD: {e}"))
.context("executing K8sResourceScore for Deployment CRD")?;
tracing::info!(?outcome, "CRD installed");
Ok(())
}

View File

@@ -1,73 +0,0 @@
mod controller;
mod crd;
mod install;
use anyhow::Result;
use async_nats::jetstream;
use clap::{Parser, Subcommand};
use harmony_reconciler_contracts::BUCKET_DESIRED_STATE;
use kube::Client;
#[derive(Parser)]
#[command(
name = "iot-operator-v0",
about = "IoT operator — Deployment CRD → NATS KV"
)]
struct Cli {
#[command(subcommand)]
command: Option<Command>,
#[arg(
long,
env = "NATS_URL",
default_value = "nats://localhost:4222",
global = true
)]
nats_url: String,
#[arg(
long,
env = "KV_BUCKET",
default_value = BUCKET_DESIRED_STATE,
global = true
)]
kv_bucket: String,
}
#[derive(Subcommand)]
enum Command {
/// Run the controller (default when no subcommand is given).
Run,
/// Apply the operator's CRD to the cluster `KUBECONFIG` points
/// at. Uses harmony's typed k8s client — no yaml, no kubectl.
Install,
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let cli = Cli::parse();
match cli.command.unwrap_or(Command::Run) {
Command::Install => install::install_crds().await,
Command::Run => run(&cli.nats_url, &cli.kv_bucket).await,
}
}
async fn run(nats_url: &str, bucket: &str) -> Result<()> {
let nats = async_nats::connect(nats_url).await?;
tracing::info!(url = %nats_url, "connected to NATS");
let js = jetstream::new(nats);
let kv = js
.create_key_value(jetstream::kv::Config {
bucket: bucket.to_string(),
..Default::default()
})
.await?;
tracing::info!(bucket = %bucket, "KV bucket ready");
let client = Client::try_default().await?;
controller::run(client, kv).await
}