diff --git a/Cargo.lock b/Cargo.lock index ef44076a..da364e76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3166,7 +3166,36 @@ dependencies = [ ] [[package]] -name = "example_iot_vm_setup" +name = "example_fleet_load_test" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-nats", + "chrono", + "clap", + "harmony-fleet-operator", + "harmony-reconciler-contracts", + "k8s-openapi", + "kube", + "rand 0.9.2", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "example_fleet_nats_install" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "harmony", + "tokio", +] + +[[package]] +name = "example_fleet_vm_setup" version = "0.1.0" dependencies = [ "anyhow", @@ -3178,6 +3207,20 @@ dependencies = [ "tokio", ] +[[package]] +name = "example_harmony_apply_deployment" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "harmony", + "harmony-fleet-operator", + "k8s-openapi", + "kube", + "serde_json", + "tokio", +] + [[package]] name = "example_linux_vm" version = "0.1.0" @@ -3690,6 +3733,47 @@ dependencies = [ "walkdir", ] +[[package]] +name = "harmony-fleet-agent" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-nats", + "chrono", + "clap", + "futures-util", + "harmony", + "harmony-reconciler-contracts", + "serde", + "serde_json", + "tokio", + "toml", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "harmony-fleet-operator" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-nats", + "chrono", + "clap", + "futures-util", + "harmony", + "harmony-reconciler-contracts", + "k8s-openapi", + "kube", + "schemars 0.8.22", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tracing", + "tracing-subscriber", +] + [[package]] name = "harmony-k8s" version = "0.1.0" @@ -3732,8 +3816,10 @@ version = "0.1.0" dependencies = [ "chrono", "harmony_types", + "schemars 0.8.22", "serde", "serde_json", + "thiserror 2.0.18", ] [[package]] @@ -4710,48 +4796,6 @@ dependencies = [ "thiserror 1.0.69", ] -[[package]] -name = "iot-agent-v0" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-nats", - "chrono", - "clap", - "futures-util", - "harmony", - "harmony-reconciler-contracts", - "serde", - "serde_json", - "tokio", - "toml", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "iot-operator-v0" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-nats", - "async-trait", - "clap", - "futures-util", - "harmony", - "harmony-k8s", - "harmony-reconciler-contracts", - "k8s-openapi", - "kube", - "schemars 0.8.22", - "serde", - "serde_json", - "thiserror 2.0.18", - "tokio", - "tracing", - "tracing-subscriber", -] - [[package]] name = "ipnet" version = "2.12.0" @@ -4910,6 +4954,7 @@ checksum = "aa60a41b57ae1a0a071af77dbcf89fc9819cfe66edaf2beeb204c34459dcf0b2" dependencies = [ "base64 0.22.1", "chrono", + "schemars 0.8.22", "serde", "serde_json", ] diff --git a/Cargo.toml b/Cargo.toml index 1e9eeaf8..92182b4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,8 +28,8 @@ members = [ "harmony_node_readiness", "harmony-k8s", "harmony_assets", "opnsense-codegen", "opnsense-api", - "iot/iot-operator-v0", - "iot/iot-agent-v0", + "fleet/harmony-fleet-operator", + "fleet/harmony-fleet-agent", "harmony-reconciler-contracts", ] @@ -66,7 +66,7 @@ kube = { version = "1.1.0", features = [ "ws", "jsonpatch", ] } -k8s-openapi = { version = "0.25", features = ["v1_30"] } +k8s-openapi = { version = "0.25", features = ["v1_30", "schemars"] } # TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml serde_yaml = "0.9" serde-value = "0.7" diff --git a/ROADMAP/12-code-review-april-2026.md b/ROADMAP/12-code-review-april-2026.md index 7986aa1e..cbaf7938 100644 --- a/ROADMAP/12-code-review-april-2026.md +++ b/ROADMAP/12-code-review-april-2026.md @@ -99,7 +99,7 @@ Replace `kubectl exec bao ...` shell commands in `openbao/setup.rs` with typed ` `K8sAnywhereTopology` and `HAClusterTopology` have accumulated opinions — cert-manager install, tenant manager setup, helm probes, TLS passthrough, SSO wiring — that make them unfit for narrow, ad-hoc Score execution. Calling `ensure_ready()` on `K8sAnywhereTopology` to apply a single CRD installs a full product stack as a side effect; that's the opposite of what "make me ready" should mean. -Concrete example: `iot/iot-operator-v0/src/install.rs` needed a topology that satisfies `K8sclient` for a single `K8sResourceScore::` apply. `K8sAnywhereTopology` was wrong (too heavy); `HAClusterTopology` was wrong (bare-metal). Work-around: a 30-line inline `InstallTopology` that wraps a pre-built `K8sClient` and has a noop `ensure_ready`. That file flags the architectural smell in its doc comment and points back to this entry. +Concrete example: `fleet/harmony-fleet-operator/src/install.rs` needed a topology that satisfies `K8sclient` for a single `K8sResourceScore::` apply. `K8sAnywhereTopology` was wrong (too heavy); `HAClusterTopology` was wrong (bare-metal). Work-around: a 30-line inline `InstallTopology` that wraps a pre-built `K8sClient` and has a noop `ensure_ready`. That file flags the architectural smell in its doc comment and points back to this entry. If every narrow Score ends up vendoring its own ad-hoc topology, we get exactly the proliferation this entry is meant to prevent. @@ -113,4 +113,4 @@ If every narrow Score ends up vendoring its own ad-hoc topology, we get exactly - Adding a new ad-hoc Score against k8s doesn't require inventing a new topology. - `K8sAnywhereTopology` stops being the default reach and starts being a deliberate product choice. -- Test: can we delete the inline `InstallTopology` in `iot/iot-operator-v0/src/install.rs` by replacing it with a one-liner `K8sBareTopology::from_env()`? That's the smoke test for "we fixed the proliferation." +- Test: can we delete the inline `InstallTopology` in `fleet/harmony-fleet-operator/src/install.rs` by replacing it with a one-liner `K8sBareTopology::from_env()`? That's the smoke test for "we fixed the proliferation." diff --git a/ROADMAP/iot_platform/arm_vm_plan.md b/ROADMAP/fleet_platform/arm_vm_plan.md similarity index 94% rename from ROADMAP/iot_platform/arm_vm_plan.md rename to ROADMAP/fleet_platform/arm_vm_plan.md index b4118cc8..653f2b67 100644 --- a/ROADMAP/iot_platform/arm_vm_plan.md +++ b/ROADMAP/fleet_platform/arm_vm_plan.md @@ -15,7 +15,7 @@ for CI) so: - the VM runs the same Ubuntu 24.04 arm64 cloud image customers will eventually flash onto a Pi; -- the iot-agent shipped to it is a real aarch64 binary produced by +- the fleet-agent shipped to it is a real aarch64 binary produced by our existing cross-compile toolchain; - apt/systemd/podman on the VM are the actual arm64 packages; and - smoke-a3 exercises all of it end-to-end. @@ -126,11 +126,11 @@ In `modules/iot/preflight.rs`, when the caller asks for arm64 VMs ### 6. Cross-compiled agent smoke-a3.sh phase 2 currently does native `cargo build --release --p iot-agent-v0`. When arch=aarch64: +-p fleet-agent-v0`. When arch=aarch64: - `cargo build --release --target aarch64-unknown-linux-gnu - -p iot-agent-v0` + -p fleet-agent-v0` - AGENT_BINARY points at `target/aarch64-unknown-linux-gnu/release/ - iot-agent-v0` + fleet-agent-v0` Opt-in via `--arch aarch64` CLI flag on both `example_iot_vm_setup` and `smoke-a3.sh`. Default stays x86_64. @@ -152,9 +152,9 @@ arch=aarch64. Smoke-a3's phase 5 reboot gate also lengthens. | `harmony/src/modules/kvm/topology.rs` | Copy per-VM NVRAM template on ensure_vm; thread arch through to XML. | | `harmony/src/modules/iot/assets.rs` | `ensure_ubuntu_2404_cloud_image_for_arch(arch)`; pin arm64 URL+sha256. | | `harmony/src/modules/iot/preflight.rs` | Arch-aware preflight; qemu-system-aarch64 + firmware + qemu-version. | -| `examples/iot_vm_setup/src/main.rs` | `--arch x86_64|aarch64` CLI flag; resolve matching cloud image. | -| `iot/scripts/smoke-a3.sh` | Arch flag plumbing; cross-compile; extended timeouts; preflight. | -| `iot/scripts/smoke-a3-arm.sh` (new) | Dedicated arm smoke as the CI hook — `ARCH=aarch64 ./smoke-a3.sh`. | +| `examples/fleet_vm_setup/src/main.rs` | `--arch x86_64|aarch64` CLI flag; resolve matching cloud image. | +| `fleet/scripts/smoke-a3.sh` | Arch flag plumbing; cross-compile; extended timeouts; preflight. | +| `fleet/scripts/smoke-a3-arm.sh` (new) | Dedicated arm smoke as the CI hook — `ARCH=aarch64 ./smoke-a3.sh`. | ## Out of scope diff --git a/ROADMAP/fleet_platform/chapter_4_aggregation_scale.md b/ROADMAP/fleet_platform/chapter_4_aggregation_scale.md new file mode 100644 index 00000000..6647c735 --- /dev/null +++ b/ROADMAP/fleet_platform/chapter_4_aggregation_scale.md @@ -0,0 +1,453 @@ +# Chapter 4 — Aggregation architecture at IoT scale + +> **Status: SUPERSEDED (2026-04-23) — historical archaeology only.** +> +> This document proposed an event-stream CQRS architecture +> (`StateChangeEvent` on a JetStream stream, per-key `Revision` +> tracking, `LifecycleTransition::{Applied, Removed}` diff events, +> cold-start re-walk, durable consumer folding events into counters). +> The design was implemented, then entirely removed in favor of a +> simpler shape: the operator watches `device-state` KV directly +> via `bucket.watch_with_history(">")`, selector evaluation runs +> against a cluster-scoped `Device` CRD cache, and `desired-state` +> entries are diffed from the selector → matched-devices set on +> watch events. No event stream, no revisions, no transition +> enum. +> +> **What's still accurate in this doc:** +> +> - The per-concern KV split (`device-info`, `device-state`, +> `device-heartbeat`) and their cadences. +> - The operator's responsibilities: counter aggregation, dirty-set +> debouncing, 1 Hz CR patch cadence. +> - The scale target (10 000 devices × 1 000 deployments at +> 10 000 state writes/s — load-tested and green). +> - The `.status.aggregate` fields (succeeded / failed / pending / +> lastError, plus the new `matchedDeviceCount`). +> +> **What's no longer true:** +> +> - No `events.state.>` JetStream stream, no durable event consumer. +> - No per-key `Revision(agent_epoch, sequence)` — KV ordering is +> sufficient. +> - No `LifecycleTransition` diff enum on the wire — phase +> transitions are derived from cached vs. current state inside +> the operator. +> - No `events.log.>` stream, no `logs..query` request- +> reply protocol. Logs are deferred until a real consumer lands. +> - No cold-start event re-walk — KV watch with history replays +> current state, which covers restart-correctness for the +> device-state cache. +> +> **Where to look now:** +> +> - Shipped design: `v0_1_plan.md` Chapter 2 (marked SHIPPED 2026-04-23). +> - Source of truth: `fleet/harmony-fleet-operator/src/fleet_aggregator.rs`, +> `fleet/harmony-fleet-operator/src/device_reconciler.rs`, +> `harmony-reconciler-contracts/src/{fleet,kv,status}.rs`. +> +> Everything below is preserved verbatim as the decision trail of a +> path not taken. Useful as context for why the current design is +> shaped the way it is; not a spec for future work. +> +> --- +> +> (Original design draft begins here.) + +## 1. Why now + +We have no real deployment in the field yet. That's a liability when +shipping (no user, no revenue) but a gift when designing: we can move +the data model before customers depend on it. After a partner fleet +lands, changing the aggregation substrate is a multi-quarter +migration. Doing it now is days of work. + +Chapter 2's aggregator was the right "make it work" design for a +walking-skeleton proof. It's the wrong "make it scale" design for a +partner deployment of even a few hundred devices, let alone the +fleet sizes the product thesis targets. This chapter replaces it. + +## 2. What's wrong today + +**Per-tick cost, current design.** Every 5 seconds, for each +Deployment CR, resolve the selector against the full device snapshot +and fold into an aggregate: + +``` +O(deployments × devices) per tick ++ 1 kube patch per CR per tick +``` + +At 10k deployments × 1M devices, that's 10^10 selector evaluations +and 10k apiserver patches every 5 s. Nothing resembles viable there. + +**What else goes wrong at scale.** + +- The operator holds the full fleet snapshot in memory. 1M `AgentStatus` + payloads × a few kB each = GB of heap, dominated by `recent_events` + rings. +- Agent heartbeats publish the whole `AgentStatus` every 30 s — a lot + of bytes on the wire whose only incremental content is usually a + timestamp update. +- `agent-status` is a KV bucket. KV is designed for "latest value per + key," not "stream of state changes." We've been using it for both + roles and paying the worst of each. +- Logs are nowhere yet (good — this is the moment to put them in the + right place before we're committed). + +## 3. Design overview + +Shift to a **CQRS-style architecture** where devices write their +authoritative state, and the operator maintains incrementally-updated +aggregates driven by state-change events. + +``` + device (N× agents) operator + ────────────────── ──────── + current state keys ───reads─▶ on cold-start: + (authoritative) walk keys → rebuild counters + then: stream consumer + state-change events ═ JS stream═▶ ± counters per event + (delta stream) ± update reverse index + on tick (1 Hz): + device_info keys ───reads─▶ patch .status for dirty deployments + (labels, inventory) + + logs ───at-least-once NATS subj────▶ not stored centrally + (streamed on query) +``` + +Three substrates, each chosen for its fit: + +- **JetStream KV, per-device keys** — device-authoritative state. + Cheap to read when needed, never scanned globally at scale. +- **JetStream stream, per-device events** — ordered delta feed. + Operator consumers replay on restart, consume incrementally during + steady state. +- **Plain NATS subjects, logs** — at-least-once pub/sub, device-side + buffering (~10k lines), streamed on query. + +## 4. Data model + +### 4.1 NATS KV buckets + +**`device-info`** — static-ish facts per device, infrequent updates. + +| Key | Value | Written by | Read by | +|-----|-------|------------|---------| +| `info.` | `DeviceInfo` (labels, inventory, agent_version) | agent on startup + label change | operator (selector resolution, inventory display) | + +**`device-state`** — current phase per deployment per device. +Authoritative source of truth for "what's running where." + +| Key | Value | Written by | Read by | +|-----|-------|------------|---------| +| `state..` | `DeploymentState` (phase, last_event_at, last_error) | agent on reconcile transition | operator on cold-start only | + +One key per (device, deployment) pair. Natural TTL via JetStream KV +per-key history — lets us cap the keyspace. + +**`device-heartbeat`** — liveness only. Tiny payload, frequent +updates. + +| Key | Value | Written by | Read by | +|-----|-------|------------|---------| +| `heartbeat.` | `{ timestamp }` (32 bytes) | agent every 30s | operator (stale detection) | + +Separate from `device-state` so routine heartbeats don't churn the +state keys or emit spurious state-change events. + +### 4.2 NATS JetStream stream + +**`device-events`** — ordered delta feed for operator aggregation. + +- Subject: `events.state..` +- Payload: `StateChangeEvent { from: Phase, to: Phase, at, last_error }` +- Retention: time-based (e.g. 24h) — consumers that fall further + behind than retention rebuild from `device-state` KV on recovery. +- Agents emit one event per phase transition, **not** per heartbeat. + +Separate stream for **event log** (user-facing reconcile log events): + +- Subject: `events.log.` +- Payload: `LogEvent { at, severity, message, deployment? }` +- Retention: time-based (1h, enough for "show me what happened the + last few minutes" queries; the device's in-memory ring holds the + rest). + +### 4.3 Log transport (NOT JetStream) + +- Subject: `logs.` — plain pub/sub, at-least-once +- Not persisted by NATS +- Device buffers last ~10k lines in a ring buffer +- Query protocol: request-reply on `logs..query` + - Device responds with buffer contents, then streams live tail + until the query closes + +This is a dedicated transport because structured logs at fleet scale +(1M devices × 1k lines/h = 1B messages/h) would crush JetStream's +per-subject storage without adding operator-visible value. Operators +only look at logs on-demand, per-device; device-side buffering +matches the access pattern. + +### 4.4 CRD fields + +Minimal change from Chapter 2: + +- `.status.aggregate.succeeded | failed | pending` — now sourced + from counters, not per-tick fold. +- `.status.aggregate.last_error` — updated on `to: Failed` events. +- `.status.aggregate.last_heartbeat_at` — from the per-deployment + latest event. +- `.status.aggregate.recent_events` — bounded per-deployment ring, + updated on event arrival. +- **Drop** `.status.aggregate.unreported` (no meaningful definition + under selector-based targeting — already removed in the pre-chapter + cleanup). +- **Add** `.status.aggregate.stale: u32` — count of devices matching + the selector whose last heartbeat is older than a threshold + (default 5 min). This is the replacement for "unreported" that + makes sense at scale. Computed on tick from the operator's + reverse-indexed view, not per-device query. + +### 4.5 Operator in-memory state + +- **Counters** — `HashMap`, one entry + per CR, updated atomically on event arrival. +- **Reverse index** — `HashMap>`, + updated when a device's labels change or when a CR's selector + changes. Lets a state-change event find affected deployments in + O(deployments-matching-this-device) rather than O(all-deployments). +- **Last-error rollup** — per deployment, the most recent error + keyed by timestamp. +- **Recent-events ring** — per deployment, bounded by N (e.g. 10). +- **Dirty set** — deployments whose aggregate has changed since last + patch. Tick reads + clears this set; only dirty deployments get + patched. + +Operator heap is bounded by fleet + deployment count, not their +product. + +## 5. Counter invariants (the contract) + +Correctness rests on two rules: + +### 5.1 Device publishes exactly one transition per reconcile outcome + +Every reconcile results in a state. If the state differs from the +last published state for `(device, deployment)`, the agent: + +1. Writes the new state to `state..` KV (CAS + against expected-revision for multi-writer safety — only one + agent process per device, so contention is theoretical). +2. Publishes a `StateChangeEvent` to + `events.state..`. + +These two writes must be atomic from the agent's perspective — if +(1) succeeds and (2) fails (or vice versa), the agent retries until +both reach NATS. Worst case: a duplicate event on the stream; +counter handles duplicates via `from → to` structure (see 5.2). + +### 5.2 Counters are driven by transitions, not snapshots + +Each event carries `from: Phase, to: Phase`. Counter update is a +single atomic action: + +```rust +counters[(deployment, from)] -= 1; +counters[(deployment, to)] += 1; +``` + +Duplicates (same `from → to` replayed) are a no-op if `from` == +current phase for that (device, deployment) — the operator +cross-checks the device's current state in the reverse index before +applying. A duplicate past event is detected and ignored; a duplicate +current event is idempotent anyway (counters converge). + +### 5.3 The bootstrap transition + +A device's first-ever event for a deployment has `from: None` (or a +sentinel `Unassigned` variant): counter update is just `to` +increment. + +### 5.4 Device leaves fleet + +When a device's heartbeat goes stale past threshold + grace, OR +when its labels no longer match the deployment's selector: + +- Counters are decremented for every deployment the device was + previously contributing to (via the reverse index). +- The device's state keys aren't touched — they're the authoritative + record; a device re-joining resumes from them. + +### 5.5 CR created / selector changed + +The reverse index + counters are rebuilt for the affected CR by +walking `device-info` + `device-state` once (O(devices + states) +local NATS KV reads). Cheap for a single CR; happens at CR-apply +time, not on every tick. + +## 6. Cold-start protocol + +On operator process start: + +1. **Load CRs** — list `Deployment` CRs via kube API. Build the + reverse index skeleton (deployment → selector). +2. **Load device labels** — iterate `device-info` KV keys once. + Resolve each device against every CR's selector, populate the + reverse index device-side entries. O(devices × CRs), one-time, + in-memory. For 1M devices × 10k CRs this is 10^10 op but purely + local lookups (BTreeMap matches on label maps); back-of-envelope + has it at a few seconds to a minute on a modern CPU. +3. **Rebuild counters** — iterate `device-state` KV keys once. + For each `state..`, look up the matching + deployments from the reverse index and increment counters. +4. **Attach stream consumer** — durable consumer on + `events.state.>`, starting from the newest sequence at cold-start + moment. The KV walk was the "past"; the stream is the "future." +5. **Begin tick loop** — patch dirty CRs on a 1 Hz schedule. + +Cold-start time dominated by step 2, not step 3. An ArgoCD-style +"pause all reconciles during leader election / startup" envelope +keeps the CR patches from competing with the cold-start scans. + +**What if the operator falls behind the stream's retention window?** +Reset to step 3 (re-walk `device-state`). The KV is authoritative; +the stream is an accelerator. + +## 7. CR status patch cadence + +- Counter updates happen in memory, instantly. +- The **dirty set** captures which deployments' aggregates changed + since the last patch. +- A 1 Hz ticker reads + clears the dirty set, patches those CRs. +- Individual CR patches are debounced to at most once per second + — avoids hammering the apiserver when a deployment is mid-rollout + and devices are transitioning in a burst. + +Steady-state operator → apiserver traffic is proportional to the +rate of *interesting* changes, not to fleet size. + +## 8. Failure modes + +| Scenario | Detection | Recovery | +|---|---|---| +| Operator crash | k8s restarts the pod | Cold-start protocol §6 | +| Stream consumer falls behind retention | Stream API returns out-of-range | Re-run §6 step 3 (re-walk KV) | +| Agent publishes event but KV write fails | Agent-side local retry; event is replayed | Counter is idempotent per §5.2 | +| Agent writes KV but event publish fails | Agent-side local retry | Operator never sees the transition until retry succeeds; stale threshold catches the device if agent is permanently broken | +| Device's label change lost | Heartbeat carries current labels; stale entry aged out | Periodic sync (e.g. 1/h) re-scans `device-info` to catch drift | +| Duplicate event (retry) | `from == current` in reverse index | No-op (§5.2) | +| Out-of-order event (retry ordering) | Sequence number on event | Consumer tracks per-(device, deployment) last-applied sequence; old events ignored | + +## 9. Scale back-of-envelope + +**Target:** 1M devices, 10k deployments, p50 reconcile rate 1 event +per device per hour. + +- **Event volume.** 1M × (1/3600s) = 278 events/s. +- **Operator event-processing cost.** Each event touches a bounded + number of in-memory counters (via reverse index). At 278 eps, this + is ~1 µs-equivalent of CPU, ~0 network (JetStream local to operator). +- **Operator → apiserver patches.** Deployments change at a rate + far below event rate; debounced dirty-set drains limit patches to + a few per second even during bursty rollouts. +- **Operator memory.** Reverse index entries (device_id + set of + deployment keys) ≈ 200 bytes × 1M = 200 MB. Counters ≈ 10k × few + fields = negligible. Last-error + recent-events rings ≈ 10k × 10 + entries × 512 bytes = 50 MB. Total ~250 MB — fine. +- **Cold-start time.** 1M KV reads × amortized 0.1 ms (JetStream KV + is fast for key iteration) = 100 s. Acceptable for a + several-minute-once-per-release recovery window. If it becomes a + problem, chunk the walk and resume-from-checkpoint. +- **Stale device sweep.** On each tick, O(dirty set × reverse index + lookups). Stale detection itself is O(devices-whose-heartbeat-is-old); + a second, slower ticker (e.g. 30 s) scans the heartbeat KV for + entries older than threshold and emits synthetic "device went + stale" events that drive the same counter-decrement path. + +## 10. Schema migration + +`Deployment` CRD is still `v1alpha1`, not deployed anywhere, so no +migration machinery is needed for the CRD itself — we just change +the aggregate subtree definition. + +`harmony-reconciler-contracts::AgentStatus` is deprecated by this +chapter. Replaced by narrower wire types: + +- `DeviceInfo` — what `info.` stores +- `DeploymentState` — what `state..` stores +- `HeartbeatPayload` — what `heartbeat.` stores +- `StateChangeEvent` — what events stream emits +- `LogEvent` — what event-log stream emits + +The old `AgentStatus` type goes away when the old aggregator +goes away. Clean break, same CRD version. + +## 11. Implementation milestones + +Landing order, each a reviewable increment: + +1. **M1: new contracts crate shapes** — `DeviceInfo`, + `DeploymentState`, `HeartbeatPayload`, `StateChangeEvent`, + `LogEvent`. Round-trip serde tests. No runtime code changes yet. +2. **M2: agent-side rewrite** — agent writes the new KV shapes + + publishes state-change events + heartbeats. Old `AgentStatus` + publish path stays in parallel for the smoke to keep passing. +3. **M3: operator-side cold-start protocol** — new operator task + that walks the new KV buckets and builds in-memory counters. + Runs alongside the old aggregator; logs counter parity checks + against the legacy aggregator's output so we can verify + correctness before switching over. +4. **M4: operator-side event consumer** — attach the durable stream + consumer, drive counters incrementally. Parity checks still on. +5. **M5: flip CR patch source** — the new counter-backed aggregator + patches `.status.aggregate`, the legacy one goes read-only, then + deleted in the next commit. +6. **M6: logs subject + query protocol** — device-side ring buffer, + query API, a first CLI surface (`natiq logs device=X` or + equivalent) that drives it. +7. **M7: synthetic-scale test harness** — spin up 1k (then 10k) mock + agents in-process, drive a realistic event load through the + operator, measure + publish numbers. +8. **M8: delete legacy `AgentStatus`** — `harmony-reconciler-contracts` + cleanup, smoke-a4 updates. + +M1-M5 can land on one branch; M6 is adjacent work; M7-M8 close out. + +## 12. Open questions + +- **Multi-operator HA.** The design assumes one operator at a time. + Adding HA means either (a) one active + one standby operator with + NATS-based leader election, or (b) shared counter state in KV + instead of in-memory. (a) is simpler; (b) scales better. + Defer until a specific availability target demands it. +- **Counter-KV snapshots.** Should we periodically snapshot the + in-memory counter state to a `counters` KV bucket so cold-start + can resume from a recent snapshot + a short stream tail, instead + of always re-walking `device-state`? Probably yes once cold-start + time becomes an operational concern, but not in the initial cut. +- **Stream retention tuning.** 24h for `events.state.>` is a guess. + Real number depends on observed operator downtime p99. Initial + setting, tune from operational data. +- **Compaction policy for `device-state` KV.** JetStream KV + per-key history can grow unbounded if phases churn. Set + `max_history_per_key = 1` (keep only latest value) unless there's + a reason to keep transition history (there isn't — that's what + the events stream is for). +- **Agent crash before publishing state-change event.** Transition + is durably captured in the agent's local podman state; on agent + restart the reconcile loop re-observes the phase and either + re-publishes (if it differs from `state..`) or stays + silent. Correctness preserved at the cost of event-stream ordering + ambiguity during the crash window — acceptable. + +## 13. What this chapter deliberately does *not* change + +- CRD `.spec.target_selector` semantics — stays exactly as shipped. +- Operator's kube-rs controller loop for CR reconcile — stays as is. +- Helm chart structure (Chapter 3) — orthogonal. +- Authentication (Chapter Auth) — orthogonal. When that chapter + lands, every subject + KV bucket above will be re-scoped under + device-specific NATS credentials; the topology above doesn't need + to change for that to slot in. diff --git a/ROADMAP/iot_platform/context_conversation.md b/ROADMAP/fleet_platform/context_conversation.md similarity index 99% rename from ROADMAP/iot_platform/context_conversation.md rename to ROADMAP/fleet_platform/context_conversation.md index 8c8f588b..2a44d003 100644 --- a/ROADMAP/iot_platform/context_conversation.md +++ b/ROADMAP/fleet_platform/context_conversation.md @@ -183,7 +183,7 @@ Drawing these out as they're load-bearing for judgment calls: 8. **The partner relationship is strategic.** Tuesday demo conversation is half the Tuesday deliverable. Framing the v0.1/v0.2/v0.3 roadmap to them matters as much as the running code. -9. **End-customer debuggability is a UX constraint.** Mechanical/electrical/chemical engineers will touch these devices. `systemctl status iot-agent` must tell them what's happening. `journalctl -u iot-agent` must be parseable by humans. Error messages must be understandable without Kubernetes knowledge. +9. **End-customer debuggability is a UX constraint.** Mechanical/electrical/chemical engineers will touch these devices. `systemctl status fleet-agent` must tell them what's happening. `journalctl -u fleet-agent` must be parseable by humans. Error messages must be understandable without Kubernetes knowledge. 10. **NATS is the long-term architectural commitment.** Everything on NATS — not as a queue, as a coordination fabric. The "decentralized cluster management" future depends on this choice. Implementation decisions that weaken this (e.g., "let's just put a database in the middle") should be pushed back on. diff --git a/ROADMAP/fleet_platform/v0_1_plan.md b/ROADMAP/fleet_platform/v0_1_plan.md new file mode 100644 index 00000000..5bf663fc --- /dev/null +++ b/ROADMAP/fleet_platform/v0_1_plan.md @@ -0,0 +1,381 @@ +# IoT Platform v0.1 and beyond — forward plan + +Authoritative forward plan for the NationTech decentralized-infra / +IoT platform, written after the v0 walking skeleton shipped +(see `v0_walking_skeleton.md` for the historical diary). Organized as +five chapters in execution order. + +## State of the world (as of 2026-04-23) + +**Green, end-to-end:** + +- CRD → operator → NATS JetStream KV write path (`smoke-a1.sh`). +- Agent watches KV, reconciles podman containers (`smoke-a1.sh`). +- VM-as-device provisioning: cloud-init + fleet-agent install + NATS + smoke (`smoke-a3.sh`), x86_64 (native KVM) and aarch64 (TCG). +- Power-cycle / reboot resilience (`smoke-a3.sh` phase 5). +- aarch64 cross-compile of the agent (no Harmony modules need to + feature-gate aarch64). +- Operator installed via a harmony Score (typed Rust, no yaml). +- `harmony-reconciler-contracts` crate — cross-boundary types + (bucket names, key helpers, `DeviceInfo`, `DeploymentState`, + `HeartbeatPayload`, `DeploymentName`, `Id` re-export). + +**Chapter 1 shipped** (2026-04-21): composed end-to-end demo +(`smoke-a4.sh`) — operator in k3d + in-cluster NATS + ARM VM + +typed-Rust CR applier + hand-off menu + `--auto` regression. Green +on x86_64 (native KVM) and aarch64 (TCG). + +**Chapter 2 shipped** (2026-04-23): selector-based targeting + +Device CRD + `.status.aggregate` reflect-back. `Deployment.spec. +targetSelector: LabelSelector` resolves against cluster-scoped +`Device` CRs materialized from NATS `device-info`. Operator writes +`desired-state` KV per matched pair, patches +`.status.aggregate` (matchedDeviceCount / succeeded / failed / +pending / lastError) at 1 Hz. Load-tested to 10 000 devices × +1 000 Deployments at 10 000 KV writes/s sustained, zero errors. + +**Not yet wired (real v0.1 work still to go):** + +- Helm packaging of the operator (Chapter 3). +- Zitadel + OpenBao auth (per-device credentials, SSO for + operator users). Placeholder `CredentialSource` trait on the + agent side (Chapter 4). +- Any frontend (Chapter 5). +- Small quality items (not blockers): agent config-driven labels, + `matchExpressions` in selectors, `Device.status.conditions` + populated from heartbeat staleness. + +**Verified during planning** (so future implementation doesn't +have to re-litigate): + +- **Upgrade already works.** `reconciler.rs::apply` byte-compares + serialized score payloads; drift triggers re-reconcile. + `PodmanTopology::ensure_service_running` removes then re-creates + containers on spec drift. No "stale + new" window. +- **The polymorphism stays.** `ReconcileScore` is an externally-tagged + enum; adding `OkdApplyV0` later is additive. + +**Surprises since v0 started** (for context, none architectural): + +- Arch `edk2-aarch64-202602-2` shipped empty firmware blobs; + `202508-1` ships unpadded edk2 that needs 64 MiB pflash padding. + Fixed via runtime discovery + padding in `modules/kvm/firmware.rs`. +- MTTCG isn't default for cross-arch TCG on QEMU 10.2; force via + `qemu:commandline` override. `pauth-impdef=on` likewise a + qemu:commandline opt-in. +- `ensure_vm` is idempotent on "domain exists" — re-apply of a + changed XML requires manual `undefine --nvram --remove-all-storage`. + Noted as a follow-up in the code comments. + +--- + +## Chapter 1 — Hands-on end-to-end demo (imminent) + +**Goal:** the user runs one command, watches operator + NATS + ARM +VM come up, then drives a CRD through the full loop by hand: +`kubectl apply` it (manually or via a typed Rust applier), watch the +operator log "acquired," check the NATS KV store with `natsbox`, +SSH/console into the VM, `curl` the running nginx container from +the workstation. + +### User-facing requirements (explicit) + +- **No yaml fixtures.** Sample `Deployment` CRs constructed in + typed Rust using `DeploymentSpec` + `PodmanV0Score`. Same + discipline as the `install` Score that replaced `gen-crd | kubectl + apply`. +- **ArgoCD deferred.** User's production clusters have it; bringing + it into the smoke harness adds setup overhead without validating + anything `helm install` doesn't. Chapter 3 produces the chart; + ArgoCD integration is a later operational concern. +- **Operator logs every CR it acquires** — `controller.rs` already + does `tracing::info!(%ns, %name, "reconcile")`; verify the output + reads well in the command-menu hand-off. +- **natsbox debugging is first-class.** Script prints exact + natsbox one-liners at hand-off so the user can inspect KV state. +- **In-cluster NATS.** Not a side-by-side podman container (as + smoke-a1 does today). Expose to the libvirt VM via k3d + loadbalancer port mapping. + +### Design decisions + +- **Rust CR applier.** New binary `examples/harmony_apply_deployment/`. + CLI flags `--name --namespace --target-device --image --port + --delete`. Constructs the `Deployment` CR via + `kube::Api` + typed `DeploymentSpec`; calls + `api.apply(...)`. Can also `--print` the CR JSON to stdout so + `kubectl apply -f -` still works from the terminal. +- **smoke-a4.sh orchestration stays bash for now.** User agreed + this is test-harness scope, not framework path; converting it + to Rust is "not as important right now." +- **Hand-off is the default mode**, not `--keep`. The whole point + of Chapter 1 is that the user drives the last stage interactively. + `smoke-a4.sh` brings everything up, applies *nothing*, prints + the command menu, waits on `INT/TERM` to tear down. `--auto` + runs the full apply/curl/upgrade/delete regression for CI. +- **In-cluster NATS path.** Preferred: use `harmony::modules::nats` + if it has a lightweight single-node / no-supercluster mode. + Fallback: typed `K8sResourceScore` applying a minimal Deployment + + NodePort Service. 15-min research task before committing. + +### Composed smoke phases (`smoke-a4.sh`) + +1. k3d cluster up with `-p "4222:4222@loadbalancer"` so the host + port 4222 forwards into the cluster. Reachable from the + libvirt VM via the gateway IP (typically `192.168.122.1:4222`). +2. NATS in-cluster via the chosen path (harmony module or direct + K8sResourceScore). Wait for readiness. +3. Install CRD via the operator's `install` subcommand (typed Rust). +4. Spawn operator as a host-side process (same pattern as + smoke-a1). Operator connects to `nats://localhost:4222`. +5. Provision ARM VM via `example_iot_vm_setup` (same entry point + smoke-a3 uses). Agent configured to connect to + `nats://:4222` — discover the gateway IP via + `virsh net-dumpxml default`, as smoke-a3 already does. +6. Sanity: `kubectl wait ... crd Established`, operator logged + "KV bucket ready", agent logged "watching KV keys", + `status.` present in `agent-status` bucket. +7. Hand off. Print the command menu below. Exit 0 with a cleanup + trap on `INT/TERM`. + +### Command menu at hand-off + +- `kubectl get deployments.fleet.nationtech.io -A -w` — watch CR + reconcile reactively. +- `cargo run -q -p example_harmony_apply_deployment -- --image + nginx:latest --target-device $TARGET_DEVICE` — apply an nginx + deployment via typed Rust. +- `cargo run -q -p example_harmony_apply_deployment -- --print + --image nginx:latest --target-device $TARGET_DEVICE | + kubectl apply -f -` — same thing, through kubectl. +- `ssh -i $SSH_KEY fleet-admin@$VM_IP` — connect to the VM. +- `virsh console $VM_NAME --force` — serial console alternative. +- `podman --url unix://$VM_IP:... ps` or ssh + `podman ps` + — list containers on the VM from the workstation. +- `podman run --rm docker.io/natsio/nats-box nats --server + nats://localhost:4222 kv ls desired-state` — list desired + state keys (from the host). +- `podman run --rm ... nats kv get desired-state + '.' --raw` — dump a specific desired state. +- `podman run --rm ... nats kv get agent-status + 'status.' --raw` — dump the heartbeat. +- `curl http://$VM_IP:8080/` — hit the deployed nginx. + +### `--auto` path (for regression) + +1. Apply `nginx:latest`, wait for container on VM, `curl` 200. +2. Apply `nginx:1.26` (upgrade), wait for container *id* to change, + `curl` 200 against the new container. +3. Apply `--delete`, wait for container gone from VM. + +### Files + +- **NEW** `examples/harmony_apply_deployment/Cargo.toml` + + `src/main.rs` — typed applier. +- **NEW** `fleet/scripts/smoke-a4.sh`. +- **NO yaml fixtures.** Rust CLI flags cover the shape. +- Optional: factor shared smoke phases (NATS up, k3d up, operator + spawn, VM provision) into `fleet/scripts/lib/` if the duplication + across a1/a3/a4 becomes obvious. Don't force it. + +### NATS exposure — implementation-time notes + +- k3d `@loadbalancer` port mapping binds the host's `0.0.0.0:4222` + by default; libvirt VMs on `virbr0` can reach it via the gateway + IP. No special NAT config required. +- Fallback if environmental snag: keep the side-by-side podman + container on an opt-in `NATS_MODE=podman` flag. Don't default + to that — user explicitly asked for in-cluster. + +### Verification + +- Fresh host: `ARCH=aarch64 ./fleet/scripts/smoke-a4.sh` completes + in 8-15 min, prints the command menu. +- `ARCH=aarch64 ./fleet/scripts/smoke-a4.sh --auto` PASSes + end-to-end including upgrade id-change assertion. +- x86_64 (`ARCH=x86-64`) completes in 2-5 min. + +### Explicitly out of scope + +- `AgentStatus` / `DeploymentStatus` enrichment — Chapter 2. +- Helm chart, ArgoCD, auth, frontend — later chapters. +- Lifting the applier into a reusable `ApplyDeploymentScore` — + only if a second consumer appears. + +--- + +## Chapter 2 — Status reflect-back + selector-based targeting **[SHIPPED 2026-04-23]** + +**Goal:** CRD `.status` reflects fleet reality — per-deployment +success/failure/pending counts, last-error surface, freshness. The +Deployment CR targets devices by label selector, not by id list. + +> The shipped design replaces the original `AgentStatus` + list-of-ids +> proposal wholesale. See `chapter_4_aggregation_scale.md` for the +> superseded design-doc archaeology. Commits: +> `refactor(iot): delete legacy AgentStatus path`, +> `refactor(iot): operator watches device-state KV directly; drop event stream`, +> `refactor(iot): Deployment.targetSelector + Device CRD (DaemonSet-like)`. + +### What shipped + +**Wire format** (in `harmony-reconciler-contracts`): four per-concern +payloads on dedicated NATS KV buckets. No monolithic per-device blob, +no separate event stream. + +| Type | Bucket | Cadence | +|------|--------|---------| +| `DeviceInfo` | `device-info` | on startup + label/inventory change | +| `DeploymentState` | `device-state` | on reconcile phase transition | +| `HeartbeatPayload` | `device-heartbeat` | every 30 s | + +**CRDs.** Two cluster resources: + +- `Deployment` (namespaced) — `spec.targetSelector: LabelSelector` + (standard K8s `matchLabels` / `matchExpressions`). No device list + on spec. `.status.aggregate` carries `matchedDeviceCount`, + `succeeded`, `failed`, `pending`, `lastError`. +- `Device` (cluster-scoped, like `Node`) — `metadata.labels` carries + the device's routing labels; `spec.inventory` holds the hardware/OS + snapshot; `status.conditions` is reserved for liveness (populated + lazily by a future heartbeat-freshness reconciler, not every ping). + +**Operator tasks** (three concurrent loops in one process): + +1. `controller` — validates Deployment CR names, holds the finalizer + that cleans `desired-state..` KV entries on + delete. No writes on apply (aggregator handles that). +2. `device_reconciler` — watches the `device-info` KV; server-side- + applies a `Device` CR per `DeviceInfo` payload, with label + sanitization. Agents remain kube-unaware. +3. `fleet_aggregator` — three caches driven by watches (Deployment + CRs, Device CRs, `device-state` KV). On any change, resolves + each selector against the Device cache, writes/deletes + `desired-state` KV entries for diffed matches, and patches + `.status.aggregate` at 1 Hz for the CRs whose counters moved. + +**Agents** publish `device-id=` as a default DeviceInfo label, so +targeting a single device with `matchLabels: {device-id: pi-42}` is +zero-config. User-defined labels layer on from agent config (scoped +out of this chapter; follow-up item). + +### Scale proof + +`fleet/scripts/load-test.sh` + `examples/fleet_load_test` simulate N +devices across M Deployments, driving `device-state` KV updates at a +configurable cadence while the full operator stack runs against a +local k3d apiserver. Verified: + +- 100 devices / 10 groups / 1 Hz / 60 s — 100 writes/s sustained, + all 10 CR aggregates converge. +- 10 000 devices / 1 000 groups / 1 Hz / 120 s — ~10 000 writes/s + sustained, 0 errors, all 1 000 CR aggregates correct + (`matchedDeviceCount == expected`, `succeeded + failed + pending + == matched`). Same envelope before and after the selector rewrite. + +### Out of scope in this chapter (follow-ups) + +- Agent config-driven labels (`[labels]` in agent toml → DeviceInfo). + ~30 lines; deferred until a concrete need lands. +- `matchExpressions` evaluator. Operator currently supports + `matchLabels` only and logs a warning for expression-bearing + selectors. ~50 lines; deferred. +- `Device.status.conditions` populated from heartbeat staleness + (Reachable / Stale transitions). Liveness is computable today by + reading `device-heartbeat` directly; CR-side reflection is a + convenience. ~100 lines; deferred. +- Full journald log streaming. The `.status.aggregate.lastError` + surface covers the user's reflect-back requirement for now. +- Multi-device regression smoke — defer until real hardware or a + second VM is around. + +--- + +## Chapter 3 — Helm chart (ArgoCD deferred) + +**Goal:** operator ships as a versioned helm chart with CRD +version-locked inside. + +User clarified this session: ArgoCD exists in production; all it +does is apply resources from the chart. Standing up ArgoCD in the +smoke adds setup overhead with no incremental validation value. + +Chapter 3 produces the chart + validates `helm install / helm +upgrade` lifecycles. ArgoCD consumption is a user operational +concern downstream. + +### Sketch + +- Chart location: `fleet/harmony-fleet-operator/chart/` (or sibling repo — + defer decision to implementation time). +- Templates: Namespace, SA, ClusterRole, ClusterRoleBinding, + Deployment (operator pod), CRD. +- **CRD yaml in the chart is generated at chart-publish time** from + the Rust `Deployment::crd()`. One-off release artifact, not + framework path — consistent with "no yaml in framework code." +- Values: operator image tag, NATS URL, log level. +- Smoke: `helm install` into k3d → CR apply → same assertions as + Chapter 1. + +### Open questions + +- Chart repo: subdir vs. separate git repo. +- CRD install mechanism: chart hook vs. templates directory. + Drives CRD upgrade story. + +--- + +## Chapter 4 — Auth: Zitadel + OpenBao + per-device identity + +**Goal:** per-device granular NATS credentials; SSO for operator +users; OpenBao policy per device; JWT bootstrap from Zitadel. + +Zitadel + OpenBao are already ~99% integrated in harmony; this +chapter is wiring the IoT-specific flows. + +### Sketch + +- Agent's `CredentialSource` trait (already abstract in agent + `config.rs`) gets a Zitadel-JWT-backed implementation. Mints + short-lived NATS creds via OpenBao auth callout. +- Remove the shared-credentials `toml-shared` variant (v0 demo + leftover). +- Availability: auth-callout caches policies, tolerates OpenBao + outages. +- SSO for operator users (separate flow): Zitadel groups → + Kubernetes RBAC subjects on the `Deployment` CRD. + +--- + +## Chapter 5 — Frontend (last) + +**Goal:** operator-friendly UI for the decentralized platform. + +Form factor undecided: Leptos web dashboard, CLI extension to +`harmony_cli`, or a TUI. Minimum viable product: read-only view of +fleet state (devices + deployments + aggregated status) powered by +the CRD `.status` from Chapter 2. Aspiration: write operations with +auth from Chapter 4. + +--- + +## Principles — what we've learned and want to keep doing + +- **No yaml in framework code paths.** Every kube-rs type is + typed; every Score apply goes through typed Rust. Yaml generation + happens only at chart-publish time, never at runtime. +- **Scores describe desired state; topologies expose capabilities.** + Prefer adding capability traits over thickening a single topology. +- **Minimal topologies for ad-hoc Score execution.** `K8sAnywhereTopology` + has too many opinions (cert-manager install, tenant-manager bootstrap, + helm probes) for narrow apply-a-CRD use cases. See ROADMAP + §12.6 — a lean shared `K8sBareTopology` is the durable fix. +- **Cross-boundary wire types in `harmony-reconciler-contracts`**, + everything else in its natural crate. +- **Never ship untested code.** Every commit that changes runtime + behavior is verified against a smoke script before landing. + Cargo check + unit tests aren't enough. +- **Prove claims about upstream before blaming upstream.** The + Arch edk2 investigation showed this matters; see + `memory/feedback_prove_before_blaming_upstream.md`. diff --git a/ROADMAP/iot_platform/v0_walking_skeleton.md b/ROADMAP/fleet_platform/v0_walking_skeleton.md similarity index 89% rename from ROADMAP/iot_platform/v0_walking_skeleton.md rename to ROADMAP/fleet_platform/v0_walking_skeleton.md index 4990334f..1380f0c3 100644 --- a/ROADMAP/iot_platform/v0_walking_skeleton.md +++ b/ROADMAP/fleet_platform/v0_walking_skeleton.md @@ -1,5 +1,23 @@ # IoT Platform v0 — Walking Skeleton +> **Status: SHIPPED (2026-04-21)** +> +> This document is the historical design diary for the v0 walking skeleton +> work — it captures the decision trail, hour-by-hour plan, and risk +> analysis as they were written before the skeleton was built. It is +> preserved unchanged as an archaeology reference. +> +> The walking skeleton shipped end-to-end: CRD → operator → NATS KV → +> on-device agent → podman reconcile; VM-as-device flow (x86_64 + aarch64 +> via TCG); power-cycle resilience; operator installed as a Score rather +> than kubectl-apply-a-yaml. See smoke-a1, smoke-a3, smoke-a3-arm for the +> executable proof. +> +> **Forward plan lives in `ROADMAP/fleet_platform/v0_1_plan.md`** — five +> chapters covering hands-on demo, status reflect-back, helm chart, SSO/ +> secrets, and frontend. When a chapter grows scope it may move into its +> own `chapter_N_*.md`. + **Approach:** Walking skeleton (Cockburn). Thin end-to-end thread through every architectural component. Naive first, architecture emerges from running code, hardening follows real-world feedback. ## 1. Strategic framing @@ -116,11 +134,11 @@ iot-workload-hello/ `deployment.yaml`: ```yaml -apiVersion: iot.nationtech.io/v1alpha1 +apiVersion: fleet.nationtech.io/v1alpha1 kind: Deployment metadata: name: hello-world - namespace: iot-demo + namespace: fleet-demo spec: targetDevices: - pi-demo-01 @@ -138,10 +156,10 @@ spec: ### 5.2 Central cluster setup Existing k8s cluster. Namespaces: -- `iot-system` — operator, NATS (single-node for v0) -- `iot-demo` — `Deployment` CRs +- `fleet-system` — operator, NATS (single-node for v0) +- `fleet-demo` — `Deployment` CRs -ArgoCD application pre-configured to sync `iot-workload-hello` repo into `iot-demo` namespace. +ArgoCD application pre-configured to sync `iot-workload-hello` repo into `fleet-demo` namespace. ### 5.3 Raspberry Pi 5 setup @@ -151,9 +169,9 @@ Base OS: **Ubuntu Server 24.04 LTS ARM64** (ships Podman 4.9 in repos). Raspberr Installed: - `podman` (4.4+, ARM64) with `systemctl --user enable --now podman.socket` (required for `podman-api` crate) -- `iot-agent` binary (cross-compiled to aarch64 via existing Harmony aarch64 toolchain) -- `/etc/iot-agent/config.toml` with NATS URL + shared credential -- systemd unit `iot-agent.service` +- `fleet-agent` binary (cross-compiled to aarch64 via existing Harmony aarch64 toolchain) +- `/etc/fleet-agent/config.toml` with NATS URL + shared credential +- systemd unit `fleet-agent.service` ### 5.4 What the code does @@ -227,7 +245,7 @@ trait CredentialSource: Send + Sync { } ``` -v0: `TomlFileCredentialSource` reading `/etc/iot-agent/config.toml`. +v0: `TomlFileCredentialSource` reading `/etc/fleet-agent/config.toml`. v0.2: `ZitadelBootstrappedCredentialSource` — same trait, swapped via config. 30 minutes Friday. Saves 3 hours of refactor in v0.2. @@ -258,7 +276,7 @@ device_id = "pi-demo-01" [credentials] type = "toml-shared" -nats_user = "iot-agent" +nats_user = "fleet-agent" nats_pass = "dev-shared-password" [nats] @@ -306,9 +324,9 @@ Document findings in the Friday night log regardless of outcome. v0.1 work inclu - Write 1-page `v0-demo.md`: demo script, success criteria, fallback plan. - Decide Pi OS: Ubuntu 24.04 ARM64 (default) vs Raspberry Pi OS 64-bit. Don't agonize beyond 10 min. -*Dispatch agent A1 (operator):* "Create Rust crate `iot/iot-operator-v0/` using `kube-rs` implementing a Deployment CRD controller that writes to NATS KV. Exact spec in task card §9.A1. Self-verify: `kubectl apply` → `nats kv get` shows entry. Under 300 lines main.rs. No auth." +*Dispatch agent A1 (operator):* "Create Rust crate `fleet/harmony-fleet-operator/` using `kube-rs` implementing a Deployment CRD controller that writes to NATS KV. Exact spec in task card §9.A1. Self-verify: `kubectl apply` → `nats kv get` shows entry. Under 300 lines main.rs. No auth." -*Dispatch agent A2 (Pi provisioning, fallback-aware):* "Attempt Harmony-based Raspberry Pi 5 provisioning Score. Target: fresh Pi flashed via SD card, boots, static IP, Ubuntu 24.04 ARM64 with Podman 4.9, podman user socket enabled, user `iot-agent` with linger enabled, `/etc/iot-agent/` ready. If Harmony doesn't have Pi primitives, document the gap and produce a manual provisioning runbook instead (rpi-imager + cloud-init). Hard time limit: 90 min. Self-verify: `ssh iot-agent@ 'podman --version'` returns 4.4+." +*Dispatch agent A2 (Pi provisioning, fallback-aware):* "Attempt Harmony-based Raspberry Pi 5 provisioning Score. Target: fresh Pi flashed via SD card, boots, static IP, Ubuntu 24.04 ARM64 with Podman 4.9, podman user socket enabled, user `fleet-agent` with linger enabled, `/etc/fleet-agent/` ready. If Harmony doesn't have Pi primitives, document the gap and produce a manual provisioning runbook instead (rpi-imager + cloud-init). Hard time limit: 90 min. Self-verify: `ssh fleet-agent@ 'podman --version'` returns 4.4+." **Hour 2 — your work: agent crate** @@ -324,8 +342,8 @@ Crate in `harmony/src/modules/iot_agent/` or a new binary in the Harmony workspa **Hour 3 — local integration** -- Review agent A1's operator. Deploy to central cluster `iot-system` namespace. -- Deploy NATS to `iot-system` if not already (single-node JetStream). +- Review agent A1's operator. Deploy to central cluster `fleet-system` namespace. +- Deploy NATS to `fleet-system` if not already (single-node JetStream). - Review agent A2's Pi provisioning. If Harmony Score succeeded, note for demo; if manual runbook, accept and move on. - Agent compiles on laptop. Connects to central NATS. @@ -380,7 +398,7 @@ Named subsection: the most important class of failures for Pi-in-field deploymen **Hour 3-4 — demo polish:** - `./demo.sh` is one command, no manual steps. - Output is clean: clear PASS/FAIL with per-phase timings. -- `kubectl get deployments.iot.nationtech.io` output is readable. +- `kubectl get deployments.fleet.nationtech.io` output is readable. **Hour 5-6 — partner-facing polish:** - README in workload repo: 4 lines. "Edit this, git push, done." @@ -421,8 +439,8 @@ Each card is self-contained. Hand the entire card to an agent. # Note: harmony is built with --no-default-features to exclude KVM (libvirt cannot cross-compile to aarch64). # The 5 KVM examples (kvm_vm_examples, kvm_okd_ha_cluster, opnsense_vm_integration, # opnsense_pair_integration, example_linux_vm) are x86_64-only by design. -cargo build --target x86_64-unknown-linux-gnu -p harmony -p harmony_agent -p iot-agent-v0 -p iot-operator-v0 -cargo build --target aarch64-unknown-linux-gnu -p harmony --no-default-features -p harmony_agent -p iot-agent-v0 -p iot-operator-v0 +cargo build --target x86_64-unknown-linux-gnu -p harmony -p harmony_agent -p fleet-agent-v0 -p harmony-fleet-operator +cargo build --target aarch64-unknown-linux-gnu -p harmony --no-default-features -p harmony_agent -p fleet-agent-v0 -p harmony-fleet-operator ``` All three must exit 0. Note: `cargo test --target aarch64-unknown-linux-gnu` cannot run on x86_64 (exec format error) — that's expected. Test execution is only for the host architecture via `./build/check.sh`. If any check fails, fix the issue before marking the task complete. Include the output in the PR description. @@ -431,11 +449,11 @@ All three must exit 0. Note: `cargo test --target aarch64-unknown-linux-gnu` can **Goal:** `kube-rs` operator that watches `Deployment` CRs and writes the Score to NATS KV. -**Deliverable:** Crate `iot/iot-operator-v0/`: +**Deliverable:** Crate `fleet/harmony-fleet-operator/`: - `Cargo.toml`: `kube`, `k8s-openapi`, `async-nats`, `serde`, `serde_yaml`, `serde_json`, `tokio`, `tracing`, `tracing-subscriber`, `anyhow`. - `src/main.rs` under 300 lines. - `deploy/operator.yaml` — Deployment, ServiceAccount, ClusterRole, ClusterRoleBinding. -- `deploy/crd.yaml` — `Deployment` CRD for `iot.nationtech.io/v1alpha1`. +- `deploy/crd.yaml` — `Deployment` CRD for `fleet.nationtech.io/v1alpha1`. **Behavior:** 1. Connect to NATS on startup (`NATS_URL` env, no auth). @@ -462,7 +480,7 @@ status: **Self-verification:** ```bash -cd iot/iot-operator-v0 +cd fleet/harmony-fleet-operator cargo build && cargo clippy -- -D warnings # Test against k3d: @@ -474,7 +492,7 @@ OP_PID=$! sleep 3 kubectl apply -f - < 'podman --version' +ssh fleet-agent@ 'podman --version' # Must be 4.4+ (target 4.9+) -ssh iot-agent@ 'systemctl --user is-active podman.socket' +ssh fleet-agent@ 'systemctl --user is-active podman.socket' # Must print "active" -ssh iot-agent@ 'loginctl show-user iot-agent | grep Linger=yes' -ssh iot-agent@ 'uname -m' +ssh fleet-agent@ 'loginctl show-user fleet-agent | grep Linger=yes' +ssh fleet-agent@ 'uname -m' # Must print aarch64 ``` @@ -550,13 +568,13 @@ ssh iot-agent@ 'uname -m' **Prerequisites:** Agent binary exists (Sylvain writes Friday). -**Deliverable:** `iot/iot-agent-v0/scripts/install.sh`: +**Deliverable:** `iot/fleet-agent-v0/scripts/install.sh`: 1. Args: `--host `, `--device-id `, `--nats-url `, `--nats-user `, `--nats-pass

`. 2. Cross-builds for aarch64 using existing Harmony aarch64 toolchain. -3. `scp` binary to Pi, `sudo mv` to `/usr/local/bin/iot-agent`. -4. Templates `/etc/iot-agent/config.toml` from args. -5. Installs `/etc/systemd/system/iot-agent.service`. -6. `systemctl daemon-reload && systemctl enable --now iot-agent`. +3. `scp` binary to Pi, `sudo mv` to `/usr/local/bin/fleet-agent`. +4. Templates `/etc/fleet-agent/config.toml` from args. +5. Installs `/etc/systemd/system/fleet-agent.service`. +6. `systemctl daemon-reload && systemctl enable --now fleet-agent`. 7. Waits up to 15s for "connected to NATS" in journal. **systemd unit:** @@ -568,8 +586,8 @@ Wants=network-online.target [Service] Type=simple -User=iot-agent -ExecStart=/usr/local/bin/iot-agent +User=fleet-agent +ExecStart=/usr/local/bin/fleet-agent Restart=on-failure RestartSec=5 StandardOutput=journal @@ -584,9 +602,9 @@ WantedBy=multi-user.target ```bash ./install.sh --host --device-id pi-demo-01 \ --nats-url nats://central:4222 \ - --nats-user iot-agent --nats-pass dev-shared-password -ssh iot-agent@ 'sudo systemctl status iot-agent' # active (running) -ssh iot-agent@ 'sudo journalctl -u iot-agent --since "2 minutes ago"' | grep "connected to NATS" + --nats-user fleet-agent --nats-pass dev-shared-password +ssh fleet-agent@ 'sudo systemctl status fleet-agent' # active (running) +ssh fleet-agent@ 'sudo journalctl -u fleet-agent --since "2 minutes ago"' | grep "connected to NATS" ``` **Time limit:** 2 hours agent time. @@ -595,7 +613,7 @@ ssh iot-agent@ 'sudo journalctl -u iot-agent --since "2 minutes ago"' | g **Goal:** One command runs full demo flow. -**Deliverable:** `iot/scripts/demo.sh`: +**Deliverable:** `fleet/scripts/demo.sh`: 1. Verifies Pi reachable + agent running. 2. Applies `scripts/demo-deployment.yaml`. 3. Waits up to 120s for container on Pi (ssh + `podman ps`). @@ -606,7 +624,7 @@ ssh iot-agent@ 'sudo journalctl -u iot-agent --since "2 minutes ago"' | g **Self-verification:** ```bash -./iot/scripts/demo.sh +./fleet/scripts/demo.sh # Ends with "PASS", total < 5 min ``` diff --git a/examples/fleet_load_test/Cargo.toml b/examples/fleet_load_test/Cargo.toml new file mode 100644 index 00000000..7456f570 --- /dev/null +++ b/examples/fleet_load_test/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "example_fleet_load_test" +version.workspace = true +edition = "2024" +license.workspace = true + +[[bin]] +name = "fleet_load_test" +path = "src/main.rs" + +[dependencies] +harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" } +harmony-fleet-operator = { path = "../../fleet/harmony-fleet-operator" } +async-nats = { workspace = true } +chrono = { workspace = true } +kube = { workspace = true, features = ["runtime", "derive"] } +k8s-openapi.workspace = true +serde_json = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } +clap = { workspace = true } +rand = { workspace = true } diff --git a/examples/fleet_load_test/src/main.rs b/examples/fleet_load_test/src/main.rs new file mode 100644 index 00000000..0761f3dd --- /dev/null +++ b/examples/fleet_load_test/src/main.rs @@ -0,0 +1,552 @@ +//! Load test for the IoT operator's `fleet_aggregator`. +//! +//! Simulates N devices across M Deployment CRs, each device pushing +//! a `DeploymentState` update to NATS every `--tick-ms`. Measures +//! throughput on both sides (devices → NATS and operator → kube +//! apiserver) and, at the end of the run, verifies each CR's +//! `.status.aggregate` counters sum to its expected group size (and +//! that `matched_device_count` equals that size — i.e. every +//! registered device got picked up by the CR's label selector). +//! +//! Assumes an already-running stack: +//! - NATS reachable at `--nats-url` +//! - k8s cluster with the operator's CRD installed (KUBECONFIG) +//! - the operator process running against the same NATS + cluster +//! +//! The `fleet/scripts/smoke-a4.sh` script brings all three up — pass +//! `--hold` to leave them running, then run this binary. +//! +//! Typical invocation: +//! +//! cargo run -q -p example_fleet_load_test -- \ +//! --namespace fleet-load \ +//! --groups 55,5,5,5,5,5,5,5,5,5 \ +//! --tick-ms 1000 \ +//! --duration-s 60 + +use anyhow::{Context, Result}; +use async_nats::jetstream::{self, kv}; +use chrono::Utc; +use clap::Parser; +use harmony_fleet_operator::crd::{ + Deployment, DeploymentSpec, Rollout, RolloutStrategy, ScorePayload, +}; +use harmony_reconciler_contracts::{ + BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE, DeploymentName, + DeploymentState, DeviceInfo, HeartbeatPayload, Id, Phase, device_heartbeat_key, + device_info_key, device_state_key, +}; +use k8s_openapi::api::core::v1::Namespace; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; +use kube::Client; +use kube::api::{Api, DeleteParams, Patch, PatchParams, PostParams}; +use rand::Rng; +use std::collections::BTreeMap; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{Duration, Instant}; +use tokio::task::JoinSet; + +#[derive(Parser, Debug, Clone)] +#[command( + name = "fleet_load_test", + about = "Synthetic load for the IoT operator's fleet_aggregator" +)] +struct Cli { + /// NATS URL (same one the operator connects to). + #[arg(long, default_value = "nats://localhost:4222")] + nats_url: String, + + /// k8s namespace for the load-test Deployment CRs. Created if + /// missing. + #[arg(long, default_value = "fleet-load")] + namespace: String, + + /// Group shape — comma-separated device counts, one per CR. + /// Default: 100 devices over 10 groups (1 × 55 + 9 × 5). + #[arg(long, default_value = "55,5,5,5,5,5,5,5,5,5")] + groups: String, + + /// Per-device tick in ms. Each tick publishes one DeploymentState. + #[arg(long, default_value_t = 1000)] + tick_ms: u64, + + /// Heartbeat cadence in seconds (separate from the state tick). + #[arg(long, default_value_t = 30)] + heartbeat_s: u64, + + /// Total run duration in seconds before tearing down. + #[arg(long, default_value_t = 60)] + duration_s: u64, + + /// Report throughput every N seconds. + #[arg(long, default_value_t = 5)] + report_s: u64, + + /// Keep the CRs + KV entries in place after the run instead of + /// deleting them. Useful with HOLD=1 to inspect the steady-state + /// aggregate after the load finishes. + #[arg(long)] + keep: bool, +} + +/// Metrics collected across all device tasks. +#[derive(Default)] +struct Counters { + state_writes: AtomicU64, + heartbeat_writes: AtomicU64, + errors: AtomicU64, +} + +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .init(); + + let cli = Cli::parse(); + let group_sizes = parse_groups(&cli.groups)?; + let total: usize = group_sizes.iter().sum(); + + tracing::info!( + devices = total, + groups = group_sizes.len(), + shape = ?group_sizes, + tick_ms = cli.tick_ms, + duration_s = cli.duration_s, + "fleet_load_test starting" + ); + + // --- NATS setup ---------------------------------------------------------- + let nc = async_nats::connect(&cli.nats_url) + .await + .with_context(|| format!("connecting to NATS at {}", cli.nats_url))?; + let js = jetstream::new(nc); + let info_bucket = open_bucket(&js, BUCKET_DEVICE_INFO).await?; + let state_bucket = open_bucket(&js, BUCKET_DEVICE_STATE).await?; + let heartbeat_bucket = open_bucket(&js, BUCKET_DEVICE_HEARTBEAT).await?; + + // --- kube setup ---------------------------------------------------------- + let client = Client::try_default().await.context("kube client")?; + ensure_namespace(&client, &cli.namespace).await?; + let deployments: Api = Api::namespaced(client.clone(), &cli.namespace); + + // --- plan groups + device ids -------------------------------------------- + let plan = build_plan(&group_sizes); + apply_crs(&deployments, &plan).await?; + publish_device_infos(&info_bucket, &plan).await?; + + // --- spawn simulators ---------------------------------------------------- + let counters = Arc::new(Counters::default()); + let mut sims = JoinSet::new(); + + let tick = Duration::from_millis(cli.tick_ms); + let hb_tick = Duration::from_secs(cli.heartbeat_s); + for device in &plan.devices { + let device = Arc::new(device.clone()); + sims.spawn(simulate_state_loop( + device.clone(), + state_bucket.clone(), + counters.clone(), + tick, + )); + sims.spawn(simulate_heartbeat_loop( + device.clone(), + heartbeat_bucket.clone(), + counters.clone(), + hb_tick, + )); + } + + // --- metrics reporter ---------------------------------------------------- + let report_tick = Duration::from_secs(cli.report_s); + let reporter_counters = counters.clone(); + let reporter = tokio::spawn(async move { + let mut ticker = tokio::time::interval(report_tick); + ticker.tick().await; // skip immediate fire + let mut prev_state = 0u64; + let mut prev_hb = 0u64; + loop { + ticker.tick().await; + let s = reporter_counters.state_writes.load(Ordering::Relaxed); + let h = reporter_counters.heartbeat_writes.load(Ordering::Relaxed); + let e = reporter_counters.errors.load(Ordering::Relaxed); + let dt = report_tick.as_secs_f64(); + let ss = (s - prev_state) as f64 / dt; + let hh = (h - prev_hb) as f64 / dt; + tracing::info!( + state_writes_total = s, + state_writes_per_s = format!("{ss:.1}"), + heartbeats_total = h, + heartbeats_per_s = format!("{hh:.1}"), + errors = e, + "load" + ); + prev_state = s; + prev_hb = h; + } + }); + + // --- run for duration ---------------------------------------------------- + let started = Instant::now(); + tokio::time::sleep(Duration::from_secs(cli.duration_s)).await; + reporter.abort(); + sims.shutdown().await; + let elapsed = started.elapsed(); + + let s = counters.state_writes.load(Ordering::Relaxed); + let h = counters.heartbeat_writes.load(Ordering::Relaxed); + let e = counters.errors.load(Ordering::Relaxed); + tracing::info!( + elapsed_s = format!("{:.1}", elapsed.as_secs_f64()), + state_writes_total = s, + state_writes_per_s = format!("{:.1}", s as f64 / elapsed.as_secs_f64()), + heartbeats_total = h, + errors = e, + "run complete" + ); + + // --- give the aggregator a second to drain -------------------------------- + tokio::time::sleep(Duration::from_secs(2)).await; + + // --- verify CR status aggregates ----------------------------------------- + // + // With selector-based matching there's a second axis we want to check: + // `matched_device_count` must equal the expected group size (selector + // actually resolved every registered Device), AND the phase counters + // must sum to it. + let mut all_ok = true; + for group in &plan.groups { + let cr = deployments.get(&group.cr_name).await?; + let Some(status) = cr.status.as_ref().and_then(|s| s.aggregate.as_ref()) else { + tracing::warn!(cr = %group.cr_name, "aggregate missing on CR status"); + all_ok = false; + continue; + }; + let total_reported = status.succeeded + status.failed + status.pending; + let expected = group.devices.len() as u32; + let ok = status.matched_device_count == expected && total_reported == expected; + if !ok { + all_ok = false; + } + tracing::info!( + cr = %group.cr_name, + expected_devices = expected, + matched = status.matched_device_count, + succeeded = status.succeeded, + failed = status.failed, + pending = status.pending, + total = total_reported, + ok, + "cr status" + ); + } + + if !cli.keep { + tracing::info!("cleanup: deleting CRs + KV entries"); + for group in &plan.groups { + let _ = deployments + .delete(&group.cr_name, &DeleteParams::default()) + .await; + } + for device in &plan.devices { + let _ = state_bucket + .delete(&device_state_key( + &device.device_id, + &DeploymentName::try_new(&device.cr_name).unwrap(), + )) + .await; + let _ = info_bucket + .delete(&device_info_key(&device.device_id)) + .await; + let _ = heartbeat_bucket + .delete(&device_heartbeat_key(&device.device_id)) + .await; + } + } + + if all_ok { + tracing::info!("PASS — all CR aggregates match device counts"); + Ok(()) + } else { + anyhow::bail!("FAIL — at least one CR aggregate did not sum to its target device count") + } +} + +fn parse_groups(s: &str) -> Result> { + let out: Vec = s + .split(',') + .map(|t| t.trim().parse::()) + .collect::>() + .context("parsing --groups")?; + if out.is_empty() { + anyhow::bail!("--groups must have at least one size"); + } + Ok(out) +} + +/// A single simulated device and the CR it belongs to. +#[derive(Clone)] +struct DevicePlan { + device_id: String, + cr_name: String, +} + +#[derive(Clone)] +struct GroupPlan { + cr_name: String, + devices: Vec, +} + +struct Plan { + devices: Vec, + groups: Vec, +} + +fn build_plan(group_sizes: &[usize]) -> Plan { + // CR-name + device-id width scale with group count so large runs + // get zero-padded ids that sort sensibly in kubectl. + let cr_width = group_sizes.len().to_string().len().max(2); + let total: usize = group_sizes.iter().sum(); + let dev_width = total.to_string().len().max(5); + + let mut devices = Vec::new(); + let mut groups = Vec::new(); + let mut next_id = 1usize; + for (i, size) in group_sizes.iter().enumerate() { + let cr_name = format!("load-group-{i:0cr_width$}"); + let mut ids = Vec::with_capacity(*size); + for _ in 0..*size { + let id = format!("load-dev-{next_id:0dev_width$}"); + next_id += 1; + devices.push(DevicePlan { + device_id: id.clone(), + cr_name: cr_name.clone(), + }); + ids.push(id); + } + groups.push(GroupPlan { + cr_name, + devices: ids, + }); + } + Plan { devices, groups } +} + +async fn open_bucket(js: &jetstream::Context, bucket: &'static str) -> Result { + Ok(js + .create_key_value(kv::Config { + bucket: bucket.to_string(), + history: 1, + ..Default::default() + }) + .await?) +} + +async fn ensure_namespace(client: &Client, name: &str) -> Result<()> { + let api: Api = Api::all(client.clone()); + if api.get_opt(name).await?.is_some() { + return Ok(()); + } + let ns = Namespace { + metadata: kube::api::ObjectMeta { + name: Some(name.to_string()), + ..Default::default() + }, + ..Default::default() + }; + match api.create(&PostParams::default(), &ns).await { + Ok(_) => Ok(()), + Err(kube::Error::Api(ae)) if ae.code == 409 => Ok(()), + Err(e) => Err(e.into()), + } +} + +async fn apply_crs(api: &Api, plan: &Plan) -> Result<()> { + let params = PatchParams::apply("fleet-load-test").force(); + let started = Instant::now(); + + // Cap concurrency so we don't overwhelm the apiserver on large + // fleets. 32 in-flight applies is well under typical apiserver + // QPS limits and keeps the startup latency predictable. + const CONCURRENCY: usize = 32; + let mut in_flight: JoinSet> = JoinSet::new(); + let mut iter = plan.groups.iter(); + + for _ in 0..CONCURRENCY { + if let Some(group) = iter.next() { + in_flight.spawn(apply_one_cr(api.clone(), group.clone(), params.clone())); + } + } + while let Some(res) = in_flight.join_next().await { + res??; + if let Some(group) = iter.next() { + in_flight.spawn(apply_one_cr(api.clone(), group.clone(), params.clone())); + } + } + + tracing::info!( + crs = plan.groups.len(), + elapsed_ms = started.elapsed().as_millis() as u64, + "applied Deployment CRs" + ); + Ok(()) +} + +async fn apply_one_cr( + api: Api, + group: GroupPlan, + params: PatchParams, +) -> Result { + // Selector-based targeting: every Device CR in this group carries + // a `group=` label (we publish that on DeviceInfo; the + // operator reflects it into Device.metadata.labels). + let mut match_labels = BTreeMap::new(); + match_labels.insert("group".to_string(), group.cr_name.clone()); + + let cr = Deployment::new( + &group.cr_name, + DeploymentSpec { + target_selector: LabelSelector { + match_labels: Some(match_labels), + match_expressions: None, + }, + // Score content doesn't matter — no real agents consume + // the desired-state here. The aggregator still writes KV + // for each matched device; that's wire noise we accept + // as part of the realism. + score: ScorePayload { + type_: "PodmanV0".to_string(), + data: serde_json::json!({ + "services": [{ + "name": group.cr_name, + "image": "docker.io/library/nginx:alpine", + "ports": ["8080:80"], + }], + }), + }, + rollout: Rollout { + strategy: RolloutStrategy::Immediate, + }, + }, + ); + api.patch(&group.cr_name, ¶ms, &Patch::Apply(&cr)) + .await + .with_context(|| format!("applying CR {}", group.cr_name))?; + Ok(group.cr_name) +} + +async fn publish_device_infos(bucket: &kv::Store, plan: &Plan) -> Result<()> { + let started = Instant::now(); + const CONCURRENCY: usize = 64; + let mut in_flight: JoinSet> = JoinSet::new(); + let mut iter = plan.devices.iter(); + + for _ in 0..CONCURRENCY { + if let Some(device) = iter.next() { + in_flight.spawn(publish_one_info(bucket.clone(), device.clone())); + } + } + while let Some(res) = in_flight.join_next().await { + res??; + if let Some(device) = iter.next() { + in_flight.spawn(publish_one_info(bucket.clone(), device.clone())); + } + } + + tracing::info!( + devices = plan.devices.len(), + elapsed_ms = started.elapsed().as_millis() as u64, + "seeded DeviceInfo" + ); + Ok(()) +} + +async fn publish_one_info(bucket: kv::Store, device: DevicePlan) -> Result<()> { + let info = DeviceInfo { + device_id: Id::from(device.device_id.clone()), + labels: BTreeMap::from([("group".to_string(), device.cr_name.clone())]), + inventory: None, + updated_at: Utc::now(), + }; + let key = device_info_key(&device.device_id); + let payload = serde_json::to_vec(&info)?; + bucket.put(&key, payload.into()).await?; + Ok(()) +} + +async fn simulate_state_loop( + device: Arc, + bucket: kv::Store, + counters: Arc, + tick: Duration, +) { + let Ok(deployment) = DeploymentName::try_new(&device.cr_name) else { + return; + }; + let state_key = device_state_key(&device.device_id, &deployment); + let mut ticker = tokio::time::interval(tick); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + loop { + ticker.tick().await; + let phase = pick_phase(); + let ds = DeploymentState { + device_id: Id::from(device.device_id.clone()), + deployment: deployment.clone(), + phase, + last_event_at: Utc::now(), + last_error: matches!(phase, Phase::Failed) + .then(|| format!("synthetic failure @{}", device.device_id)), + }; + match serde_json::to_vec(&ds) { + Ok(payload) => match bucket.put(&state_key, payload.into()).await { + Ok(_) => { + counters.state_writes.fetch_add(1, Ordering::Relaxed); + } + Err(_) => { + counters.errors.fetch_add(1, Ordering::Relaxed); + } + }, + Err(_) => { + counters.errors.fetch_add(1, Ordering::Relaxed); + } + } + } +} + +async fn simulate_heartbeat_loop( + device: Arc, + bucket: kv::Store, + counters: Arc, + tick: Duration, +) { + let hb_key = device_heartbeat_key(&device.device_id); + let mut ticker = tokio::time::interval(tick); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + loop { + ticker.tick().await; + let hb = HeartbeatPayload { + device_id: Id::from(device.device_id.clone()), + at: Utc::now(), + }; + if let Ok(payload) = serde_json::to_vec(&hb) { + if bucket.put(&hb_key, payload.into()).await.is_ok() { + counters.heartbeat_writes.fetch_add(1, Ordering::Relaxed); + } else { + counters.errors.fetch_add(1, Ordering::Relaxed); + } + } + } +} + +/// Phase distribution mirroring a healthy-ish fleet: mostly Running, +/// a sprinkle of Failed + Pending to exercise the aggregator's +/// transition-handling + last_error logic. +fn pick_phase() -> Phase { + let n: u32 = rand::rng().random_range(0..100); + match n { + 0..80 => Phase::Running, + 80..90 => Phase::Failed, + _ => Phase::Pending, + } +} diff --git a/examples/fleet_nats_install/Cargo.toml b/examples/fleet_nats_install/Cargo.toml new file mode 100644 index 00000000..8a5bfd4b --- /dev/null +++ b/examples/fleet_nats_install/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "example_fleet_nats_install" +version.workspace = true +edition = "2024" +license.workspace = true + +[[bin]] +name = "fleet_nats_install" +path = "src/main.rs" + +[dependencies] +harmony = { path = "../../harmony", default-features = false } +tokio.workspace = true +anyhow.workspace = true +clap.workspace = true diff --git a/examples/fleet_nats_install/src/main.rs b/examples/fleet_nats_install/src/main.rs new file mode 100644 index 00000000..8270abca --- /dev/null +++ b/examples/fleet_nats_install/src/main.rs @@ -0,0 +1,91 @@ +//! Install a single-node NATS server into the cluster `KUBECONFIG` +//! points at, using harmony's `NatsBasicScore` + `K8sBareTopology`. +//! +//! This binary is the glue between the smoke harness (`smoke-a4.sh`) +//! and the framework Score. Typical usage from a demo script: +//! +//! KUBECONFIG=$KUBECFG cargo run -q -p example_fleet_nats_install \ +//! -- --namespace fleet-system --name fleet-nats --node-port 4222 +//! +//! Behaviour: +//! - Ensures the target namespace exists +//! - Deploys a single-replica NATS server (JetStream on) +//! - Exposes it as a Service (NodePort by default so off-cluster +//! clients like a libvirt VM agent can reach it through the +//! k3d loadbalancer port mapping) +//! +//! For production / HA / TLS, graduate to `NatsK8sScore`. + +use anyhow::{Context, Result}; +use clap::Parser; +use harmony::inventory::Inventory; +use harmony::modules::k8s::K8sBareTopology; +use harmony::modules::nats::NatsBasicScore; +use harmony::score::Score; + +#[derive(Parser, Debug)] +#[command( + name = "fleet_nats_install", + about = "Install single-node NATS (JetStream) via NatsBasicScore" +)] +struct Cli { + /// Target namespace. Created if missing. + #[arg(long, default_value = "fleet-system")] + namespace: String, + /// Resource name for the NATS Deployment + Service. + #[arg(long, default_value = "fleet-nats")] + name: String, + /// Service exposure mode. `load-balancer` pairs with k3d's + /// `-p PORT:PORT@loadbalancer` port mapping (direct service- + /// port routing). `node-port` demands a port in the apiserver's + /// nodeport range (default 30000-32767). `cluster-ip` keeps + /// NATS in-cluster only. + #[arg(long, value_enum, default_value_t = ExposeMode::LoadBalancer)] + expose: ExposeMode, + /// NodePort when `--expose=node-port`. Must be in the cluster's + /// nodeport range (default 30000-32767). Ignored otherwise. + #[arg(long, default_value_t = 30422)] + node_port: i32, + /// Override the NATS container image. + #[arg(long)] + image: Option, +} + +#[derive(Clone, Debug, clap::ValueEnum)] +enum ExposeMode { + ClusterIp, + NodePort, + LoadBalancer, +} + +#[tokio::main] +async fn main() -> Result<()> { + let cli = Cli::parse(); + + let topology = K8sBareTopology::from_kubeconfig("fleet-nats-install") + .await + .map_err(|e| anyhow::anyhow!(e)) + .context("building K8sBareTopology from KUBECONFIG")?; + + let mut score = NatsBasicScore::new(&cli.name, &cli.namespace); + match cli.expose { + ExposeMode::ClusterIp => {} + ExposeMode::NodePort => score = score.node_port(cli.node_port), + ExposeMode::LoadBalancer => score = score.load_balancer(), + } + if let Some(image) = cli.image { + score = score.image(image); + } + + let interpret = Score::::create_interpret(&score); + let outcome = interpret + .execute(&Inventory::empty(), &topology) + .await + .map_err(|e| anyhow::anyhow!("execute NatsBasicScore: {e}"))?; + + println!( + "NATS installed: namespace={}, name={}, expose={:?} outcome={outcome:?}", + cli.namespace, cli.name, cli.expose + ); + Ok(()) +} diff --git a/examples/iot_vm_setup/Cargo.toml b/examples/fleet_vm_setup/Cargo.toml similarity index 86% rename from examples/iot_vm_setup/Cargo.toml rename to examples/fleet_vm_setup/Cargo.toml index 7bc93e10..1f495e17 100644 --- a/examples/iot_vm_setup/Cargo.toml +++ b/examples/fleet_vm_setup/Cargo.toml @@ -1,11 +1,11 @@ [package] -name = "example_iot_vm_setup" +name = "example_fleet_vm_setup" version.workspace = true edition = "2024" license.workspace = true [[bin]] -name = "iot_vm_setup" +name = "fleet_vm_setup" path = "src/main.rs" [dependencies] diff --git a/examples/iot_vm_setup/README.md b/examples/fleet_vm_setup/README.md similarity index 84% rename from examples/iot_vm_setup/README.md rename to examples/fleet_vm_setup/README.md index ab44915f..a5b57087 100644 --- a/examples/iot_vm_setup/README.md +++ b/examples/fleet_vm_setup/README.md @@ -6,8 +6,8 @@ Harmony Scores in sequence: 1. **`KvmVmScore`** — provision a libvirt VM from an Ubuntu 24.04 cloud image with a cloud-init seed ISO that authorizes one SSH key. Returns the booted VM's IP. -2. **`IotDeviceSetupScore`** — SSH into the VM (via the Ansible-backed - `HostConfigurationProvider`) and install podman + the `iot-agent` +2. **`FleetDeviceSetupScore`** — SSH into the VM (via the Ansible-backed + `HostConfigurationProvider`) and install podman + the `fleet-agent` binary, drop the TOML config, bring up the systemd unit. After a successful run, the VM is a fleet member reporting to NATS under @@ -42,21 +42,21 @@ sudo virsh net-autostart default ## Run ```bash -cargo build -p iot-agent-v0 +cargo build -p fleet-agent-v0 cargo run -p example_iot_vm_setup -- \ --base-image /var/tmp/harmony-iot-smoke/ubuntu-24.04-server-cloudimg-amd64.img \ --ssh-pubkey /var/tmp/harmony-iot-smoke/ssh/id_ed25519.pub \ --ssh-privkey /var/tmp/harmony-iot-smoke/ssh/id_ed25519 \ --work-dir /var/tmp/harmony-iot-smoke \ - --agent-binary target/debug/iot-agent-v0 \ + --agent-binary target/debug/fleet-agent-v0 \ --nats-url nats://192.168.122.1:4222 ``` ## Changing groups Re-running with a different `--group` rewrites -`/etc/iot-agent/config.toml` on the VM and restarts the agent. The VM +`/etc/fleet-agent/config.toml` on the VM and restarts the agent. The VM itself is untouched. ```bash @@ -65,5 +65,5 @@ cargo run -p example_iot_vm_setup -- ... --group group-b ## Full end-to-end via smoke test -See `iot/scripts/smoke-a3.sh` — stands up NATS in a podman container, +See `fleet/scripts/smoke-a3.sh` — stands up NATS in a podman container, runs this example, asserts the agent's status lands in NATS. diff --git a/examples/iot_vm_setup/src/main.rs b/examples/fleet_vm_setup/src/main.rs similarity index 62% rename from examples/iot_vm_setup/src/main.rs rename to examples/fleet_vm_setup/src/main.rs index 3bc25fc9..2610047f 100644 --- a/examples/iot_vm_setup/src/main.rs +++ b/examples/fleet_vm_setup/src/main.rs @@ -5,15 +5,15 @@ //! capability. Here we satisfy it with `KvmVirtualMachineHost` //! (libvirt). Swapping to VMware/Proxmox/cloud would be a //! different topology injection with the same Score code. -//! 2. `IotDeviceSetupScore` — SSHes into the booted VM and installs -//! podman + iot-agent via the split Linux-host capabilities. +//! 2. `FleetDeviceSetupScore` — SSHes into the booted VM and installs +//! podman + fleet-agent via the split Linux-host capabilities. use anyhow::{Context, Result}; use clap::Parser; use harmony::inventory::Inventory; -use harmony::modules::iot::{ - IotDeviceSetupConfig, IotDeviceSetupScore, ProvisionVmScore, - check_iot_smoke_preflight_for_arch, ensure_iot_ssh_keypair, +use harmony::modules::fleet::{ + FleetDeviceSetupConfig, FleetDeviceSetupScore, ProvisionVmScore, + check_fleet_smoke_preflight_for_arch, ensure_fleet_ssh_keypair, }; use harmony::modules::kvm::KvmVirtualMachineHost; use harmony::modules::kvm::config::init_executor; @@ -42,7 +42,7 @@ impl From for VmArchitecture { #[derive(Parser, Debug)] #[command( - name = "iot_vm_setup", + name = "fleet_vm_setup", about = "Provision one VM + onboard it into the IoT fleet" )] struct Cli { @@ -51,22 +51,34 @@ struct Cli { #[arg(long, value_enum, default_value_t = CliArch::X86_64)] arch: CliArch, /// libvirt domain name for the VM. - #[arg(long, default_value = "iot-vm-01")] + #[arg(long, default_value = "fleet-vm-01")] vm_name: String, /// Device id the agent will announce to NATS. Defaults to a /// fresh `Id` (hex timestamp + random suffix). #[arg(long)] device_id: Option, - /// Fleet group label to write into the agent's TOML config. - #[arg(long, default_value = "group-a")] - group: String, + /// Routing labels to write into the agent's TOML config. + /// Comma-separated list of `key=value` pairs. Published in every + /// DeviceInfo heartbeat; the operator resolves Deployment + /// `spec.targetSelector` against this map. At least one label + /// is required so the device is targetable — the default + /// `group=group-a` satisfies that. + #[arg(long, default_value = "group=group-a")] + labels: String, /// libvirt network name to attach the VM to. #[arg(long, default_value = "default")] network: String, /// Admin username created on first boot. - #[arg(long, default_value = "iot-admin")] + #[arg(long, default_value = "fleet-admin")] admin_user: String, - /// Path to the cross-compiled iot-agent binary. + /// Optional plaintext password for the admin user. Enables SSH + /// password auth on the guest — intended for interactive + /// debugging / reliability-testing sessions where the operator + /// wants to break things on purpose. Leave unset for key-only + /// auth (production default). + #[arg(long, env = "FLEET_VM_ADMIN_PASSWORD")] + admin_password: Option, + /// Path to the cross-compiled fleet-agent binary. /// Required unless `--bootstrap-only` is set. #[arg(long)] agent_binary: Option, @@ -84,6 +96,13 @@ struct Cli { /// SSH key, libvirt pool) and exit. #[arg(long)] bootstrap_only: bool, + /// Virtual disk size in GiB. The stock Ubuntu cloud image has + /// only ~2 GiB of root — resized on first boot by + /// cloud-initramfs-growroot. Bump this to 16 GiB by default so + /// podman can sideload a couple of container images without + /// running out of space. + #[arg(long, default_value_t = 16)] + disk_size_gb: u32, } #[tokio::main] @@ -92,7 +111,7 @@ async fn main() -> Result<()> { let cli = Cli::parse(); let arch: VmArchitecture = cli.arch.into(); - check_iot_smoke_preflight_for_arch(arch) + check_fleet_smoke_preflight_for_arch(arch) .await .map_err(|e| anyhow::anyhow!("{e}"))?; @@ -100,13 +119,13 @@ async fn main() -> Result<()> { harmony::modules::linux::ensure_ansible_venv() .await .map_err(|e| anyhow::anyhow!("ansible venv: {e}"))?; - harmony::modules::iot::ensure_ubuntu_2404_cloud_image_for_arch(arch) + harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch) .await .map_err(|e| anyhow::anyhow!("cloud image: {e}"))?; - ensure_iot_ssh_keypair() + ensure_fleet_ssh_keypair() .await .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?; - harmony::modules::iot::ensure_harmony_iot_pool() + harmony::modules::fleet::ensure_harmony_fleet_pool() .await .map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?; println!("bootstrap complete"); @@ -114,16 +133,16 @@ async fn main() -> Result<()> { } // --- Step 1: provision the VM --- - let base_image = harmony::modules::iot::ensure_ubuntu_2404_cloud_image_for_arch(arch) + let base_image = harmony::modules::fleet::ensure_ubuntu_2404_cloud_image_for_arch(arch) .await .map_err(|e| anyhow::anyhow!("cloud image: {e}"))?; - let pool = harmony::modules::iot::ensure_harmony_iot_pool() + let pool = harmony::modules::fleet::ensure_harmony_fleet_pool() .await .map_err(|e| anyhow::anyhow!("libvirt pool: {e}"))?; - let ssh = ensure_iot_ssh_keypair() + let ssh = ensure_fleet_ssh_keypair() .await .map_err(|e| anyhow::anyhow!("ssh keypair: {e}"))?; - let authorized_key = harmony::modules::iot::read_public_key(&ssh) + let authorized_key = harmony::modules::fleet::read_public_key(&ssh) .await .map_err(|e| anyhow::anyhow!("read ssh pubkey: {e}"))?; @@ -142,12 +161,13 @@ async fn main() -> Result<()> { architecture: arch, cpus: 2, memory_mib: 2048, - disk_size_gb: None, + disk_size_gb: Some(cli.disk_size_gb), network: cli.network.clone(), first_boot: Some(VmFirstBootConfig { hostname: Some(cli.vm_name.clone()), admin_user: Some(cli.admin_user.clone()), authorized_keys: vec![authorized_key], + admin_password: cli.admin_password.clone(), }), }, }; @@ -162,7 +182,7 @@ async fn main() -> Result<()> { let agent_binary = cli .agent_binary .clone() - .context("--agent-binary is required (e.g. target/release/iot-agent-v0)")?; + .context("--agent-binary is required (e.g. target/release/fleet-agent-v0)")?; let device_id = cli .device_id .clone() @@ -179,9 +199,16 @@ async fn main() -> Result<()> { }, ); - let setup_score = IotDeviceSetupScore::new(IotDeviceSetupConfig { + let labels = parse_labels(&cli.labels)?; + let labels_display = labels + .iter() + .map(|(k, v)| format!("{k}={v}")) + .collect::>() + .join(","); + + let setup_score = FleetDeviceSetupScore::new(FleetDeviceSetupConfig { device_id: device_id.clone(), - group: cli.group.clone(), + labels, nats_urls: vec![cli.nats_url.clone()], nats_user: cli.nats_user.clone(), nats_pass: cli.nats_pass.clone(), @@ -189,13 +216,33 @@ async fn main() -> Result<()> { }); run_setup_score(&setup_score, &linux_topology).await?; - println!( - "device '{device_id}' (group '{}') onboarded via {vm_ip}", - cli.group - ); + println!("device '{device_id}' ({labels_display}) onboarded via {vm_ip}"); Ok(()) } +/// Parse `key=value,key=value` into a BTreeMap. Errors on any +/// malformed chunk, empty keys/values, or an empty map overall — +/// a device with no labels is practically untargetable, so we'd +/// rather fail at the CLI than silently onboard a ghost. +fn parse_labels(raw: &str) -> anyhow::Result> { + let mut out = std::collections::BTreeMap::new(); + for piece in raw.split(',').map(str::trim).filter(|p| !p.is_empty()) { + let (k, v) = piece + .split_once('=') + .ok_or_else(|| anyhow::anyhow!("label chunk '{piece}' missing '='"))?; + let k = k.trim(); + let v = v.trim(); + if k.is_empty() || v.is_empty() { + anyhow::bail!("label chunk '{piece}' has empty key or value"); + } + out.insert(k.to_string(), v.to_string()); + } + if out.is_empty() { + anyhow::bail!("--labels must include at least one key=value pair"); + } + Ok(out) +} + async fn run_vm_score( score: &ProvisionVmScore, topology: &KvmVirtualMachineHost, @@ -215,14 +262,17 @@ async fn run_vm_score( anyhow::bail!("ProvisionVmScore finished without reporting an IP: {outcome:?}") } -async fn run_setup_score(score: &IotDeviceSetupScore, topology: &LinuxHostTopology) -> Result<()> { +async fn run_setup_score( + score: &FleetDeviceSetupScore, + topology: &LinuxHostTopology, +) -> Result<()> { use harmony::score::Score; let inventory = Inventory::empty(); let interpret = Score::::create_interpret(score); let outcome = interpret .execute(&inventory, topology) .await - .map_err(|e| anyhow::anyhow!("IotDeviceSetupScore execute: {e}"))?; + .map_err(|e| anyhow::anyhow!("FleetDeviceSetupScore execute: {e}"))?; println!("setup: {} ({:?})", outcome.message, outcome.details); Ok(()) } diff --git a/examples/harmony_apply_deployment/Cargo.toml b/examples/harmony_apply_deployment/Cargo.toml new file mode 100644 index 00000000..d0736fe2 --- /dev/null +++ b/examples/harmony_apply_deployment/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "example_harmony_apply_deployment" +version.workspace = true +edition = "2024" +license.workspace = true + +[[bin]] +name = "harmony_apply_deployment" +path = "src/main.rs" + +[dependencies] +harmony = { path = "../../harmony", default-features = false, features = ["podman"] } +harmony-fleet-operator = { path = "../../fleet/harmony-fleet-operator" } +kube = { workspace = true, features = ["runtime", "derive"] } +k8s-openapi = { workspace = true } +serde_json.workspace = true +tokio.workspace = true +anyhow.workspace = true +clap.workspace = true diff --git a/examples/harmony_apply_deployment/src/main.rs b/examples/harmony_apply_deployment/src/main.rs new file mode 100644 index 00000000..904e74be --- /dev/null +++ b/examples/harmony_apply_deployment/src/main.rs @@ -0,0 +1,178 @@ +//! Typed-Rust applier for the harmony fleet `Deployment` CR. +//! +//! Builds a `Deployment` CR via the typed `DeploymentSpec` + +//! `PodmanV0Score` + `kube::Api`, then either applies it directly +//! through the kube client or prints it to stdout so the user can +//! pipe into `kubectl apply -f -`. +//! +//! The CRD is domain-agnostic — it's "declarative reconcile intent +//! for a set of devices matched by label selector," which is the +//! same shape whether the fleet is Pi podman, OKD clusters, or +//! KVM VMs. The name `harmony_apply_deployment` reflects that +//! (not `iot_`-anything), in line with the review call to position +//! the operator as a generic fleet/reconcile tool. +//! +//! The CRD types live in `harmony_fleet_operator::crd`; the score types +//! live in `harmony::modules::podman` (PodmanV0 being the first +//! reconciler variant — future variants drop in alongside). +//! +//! Typical demo-driver usage: +//! +//! # apply an nginx deployment +//! cargo run -q -p example_harmony_apply_deployment -- \ +//! --target-device fleet-smoke-vm-arm \ +//! --image nginx:latest +//! +//! # print the CR JSON (lets the user kubectl-apply it manually) +//! cargo run -q -p example_harmony_apply_deployment -- \ +//! --target-device fleet-smoke-vm-arm \ +//! --image nginx:latest --print | kubectl apply -f - +//! +//! # upgrade the same deployment to a newer image +//! cargo run -q -p example_harmony_apply_deployment -- \ +//! --target-device fleet-smoke-vm-arm \ +//! --image nginx:1.26 +//! +//! # delete the deployment +//! cargo run -q -p example_harmony_apply_deployment -- --delete + +use anyhow::{Context, Result}; +use clap::Parser; +use harmony::modules::podman::{PodmanService, PodmanV0Score}; +use harmony_fleet_operator::crd::{ + Deployment, DeploymentSpec, Rollout, RolloutStrategy, ScorePayload, +}; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; +use kube::Client; +use kube::api::{Api, DeleteParams, Patch, PatchParams}; +use std::collections::BTreeMap; + +#[derive(Parser, Debug)] +#[command( + name = "harmony_apply_deployment", + about = "Build + apply a harmony fleet Deployment CR from typed Rust (no yaml)" +)] +struct Cli { + /// Kubernetes namespace for the Deployment CR. + #[arg(long, default_value = "fleet-demo")] + namespace: String, + /// Deployment CR name. Also used as the KV key suffix and + /// podman container name on the device. + #[arg(long, default_value = "hello-world")] + name: String, + /// Shortcut: if set, picks a single device by id. Shorthand for + /// `--selector device-id=` — the agent publishes + /// a `device-id=` label on its DeviceInfo by default so this + /// works without any cluster-side label pre-wiring. + #[arg(long, default_value = "fleet-smoke-vm")] + target_device: String, + /// Repeatable `key=value` label selector. Takes precedence over + /// `--target-device` when provided. All pairs AND together. + #[arg(long = "selector", value_name = "KEY=VALUE")] + selectors: Vec, + /// Container image to run. + #[arg(long, default_value = "docker.io/library/nginx:latest")] + image: String, + /// `host:container` port mapping exposed on the device. + #[arg(long, default_value = "8080:80")] + port: String, + /// Delete the Deployment CR instead of applying it. + #[arg(long)] + delete: bool, + /// Print the CR as JSON to stdout instead of applying it. + /// Useful for piping into `kubectl apply -f -`. + #[arg(long)] + print: bool, +} + +#[tokio::main] +async fn main() -> Result<()> { + let cli = Cli::parse(); + let cr = build_cr(&cli); + + if cli.print { + println!("{}", serde_json::to_string_pretty(&cr)?); + return Ok(()); + } + + let client = Client::try_default() + .await + .context("building kube client (is KUBECONFIG set?)")?; + let api: Api = Api::namespaced(client, &cli.namespace); + + if cli.delete { + match api.delete(&cli.name, &DeleteParams::default()).await { + Ok(_) => println!("deleted deployment '{}/{}'", cli.namespace, cli.name), + Err(kube::Error::Api(ae)) if ae.code == 404 => { + println!( + "deployment '{}/{}' not found (already gone)", + cli.namespace, cli.name + ) + } + Err(e) => anyhow::bail!("delete failed: {e}"), + } + return Ok(()); + } + + // Server-side apply so repeated invocations (upgrades) patch + // the existing CR instead of erroring with "already exists." + let params = PatchParams::apply("harmony-apply-deployment").force(); + let applied = api + .patch(&cli.name, ¶ms, &Patch::Apply(&cr)) + .await + .context("applying Deployment CR")?; + let meta = applied.metadata; + println!( + "applied deployment '{}/{}' (resourceVersion={}, image={})", + cli.namespace, + meta.name.as_deref().unwrap_or("?"), + meta.resource_version.as_deref().unwrap_or("?"), + cli.image, + ); + Ok(()) +} + +fn build_cr(cli: &Cli) -> Deployment { + let score = PodmanV0Score { + services: vec![PodmanService { + name: cli.name.clone(), + image: cli.image.clone(), + ports: vec![cli.port.clone()], + }], + }; + + let payload = ScorePayload { + type_: "PodmanV0".to_string(), + // `ScorePayload::data` is `serde_json::Value` by design + // (opaque payload routed to the agent). Serialize the typed + // score through serde_json — the agent's `ReconcileScore` enum + // accepts exactly this shape via `#[serde(tag, content)]`. + data: serde_json::to_value(&score).expect("PodmanV0Score is JSON-clean"), + }; + + let mut match_labels = BTreeMap::new(); + if cli.selectors.is_empty() { + match_labels.insert("device-id".to_string(), cli.target_device.clone()); + } else { + for kv in &cli.selectors { + let (k, v) = kv + .split_once('=') + .unwrap_or_else(|| panic!("--selector expects KEY=VALUE, got '{kv}'")); + match_labels.insert(k.to_string(), v.to_string()); + } + } + + Deployment::new( + &cli.name, + DeploymentSpec { + target_selector: LabelSelector { + match_labels: Some(match_labels), + match_expressions: None, + }, + score: payload, + rollout: Rollout { + strategy: RolloutStrategy::Immediate, + }, + }, + ) +} diff --git a/iot/iot-agent-v0/Cargo.toml b/fleet/harmony-fleet-agent/Cargo.toml similarity index 91% rename from iot/iot-agent-v0/Cargo.toml rename to fleet/harmony-fleet-agent/Cargo.toml index f90e9e65..8cd98369 100644 --- a/iot/iot-agent-v0/Cargo.toml +++ b/fleet/harmony-fleet-agent/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "iot-agent-v0" +name = "harmony-fleet-agent" version = "0.1.0" edition = "2024" rust-version = "1.85" diff --git a/iot/iot-agent-v0/src/config.rs b/fleet/harmony-fleet-agent/src/config.rs similarity index 60% rename from iot/iot-agent-v0/src/config.rs rename to fleet/harmony-fleet-agent/src/config.rs index e0c8291f..19b2a99a 100644 --- a/iot/iot-agent-v0/src/config.rs +++ b/fleet/harmony-fleet-agent/src/config.rs @@ -1,5 +1,6 @@ use harmony_reconciler_contracts::Id; use serde::Deserialize; +use std::collections::BTreeMap; use std::path::Path; #[derive(Debug, Clone, Deserialize)] @@ -7,6 +8,14 @@ pub struct AgentConfig { pub agent: AgentSection, pub nats: NatsSection, pub credentials: CredentialsSection, + /// Routing labels published verbatim in every DeviceInfo + /// heartbeat. The operator reflects them into + /// `Device.metadata.labels` so Deployment `spec.targetSelector` + /// resolves against them (K8s-Node-analogue flow). Empty by + /// default — a device with no labels is targetable only by its + /// auto-published `device-id` label. + #[serde(default)] + pub labels: BTreeMap, } #[derive(Debug, Clone, Deserialize)] @@ -69,3 +78,49 @@ pub fn load_config(path: &Path) -> anyhow::Result { let config: AgentConfig = toml::from_str(&content)?; Ok(config) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_config_with_labels_section() { + let raw = r#" +[agent] +device_id = "pi-42" + +[credentials] +type = "toml-shared" +nats_user = "u" +nats_pass = "p" + +[nats] +urls = ["nats://nats:4222"] + +[labels] +group = "site-a" +arch = "aarch64" +"#; + let cfg: AgentConfig = toml::from_str(raw).expect("valid config"); + assert_eq!(cfg.labels.get("group"), Some(&"site-a".to_string())); + assert_eq!(cfg.labels.get("arch"), Some(&"aarch64".to_string())); + } + + #[test] + fn labels_section_optional_defaults_empty() { + let raw = r#" +[agent] +device_id = "pi-42" + +[credentials] +type = "toml-shared" +nats_user = "u" +nats_pass = "p" + +[nats] +urls = ["nats://nats:4222"] +"#; + let cfg: AgentConfig = toml::from_str(raw).expect("valid config"); + assert!(cfg.labels.is_empty()); + } +} diff --git a/fleet/harmony-fleet-agent/src/fleet_publisher.rs b/fleet/harmony-fleet-agent/src/fleet_publisher.rs new file mode 100644 index 00000000..0c334d6e --- /dev/null +++ b/fleet/harmony-fleet-agent/src/fleet_publisher.rs @@ -0,0 +1,126 @@ +//! Agent-side publish surface. +//! +//! Thin wrapper around three KV buckets: [`BUCKET_DEVICE_INFO`], +//! [`BUCKET_DEVICE_STATE`], [`BUCKET_DEVICE_HEARTBEAT`]. +//! +//! Failure mode: log and swallow. The KV is the source of truth — +//! a dropped put gets corrected on the next reconcile transition +//! or operator watch reconnection. + +use async_nats::jetstream::{self, kv}; +use harmony_reconciler_contracts::{ + BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE, DeploymentName, + DeploymentState, DeviceInfo, HeartbeatPayload, Id, InventorySnapshot, device_heartbeat_key, + device_info_key, device_state_key, +}; +use std::collections::BTreeMap; + +pub struct FleetPublisher { + device_id: Id, + info_bucket: kv::Store, + state_bucket: kv::Store, + heartbeat_bucket: kv::Store, +} + +impl FleetPublisher { + /// Open every bucket the agent needs, creating those that don't + /// exist yet. Idempotent with operator-side creation. + pub async fn connect(client: async_nats::Client, device_id: Id) -> anyhow::Result { + let jetstream = jetstream::new(client); + + let info_bucket = jetstream + .create_key_value(kv::Config { + bucket: BUCKET_DEVICE_INFO.to_string(), + history: 1, + ..Default::default() + }) + .await?; + let state_bucket = jetstream + .create_key_value(kv::Config { + bucket: BUCKET_DEVICE_STATE.to_string(), + history: 1, + ..Default::default() + }) + .await?; + let heartbeat_bucket = jetstream + .create_key_value(kv::Config { + bucket: BUCKET_DEVICE_HEARTBEAT.to_string(), + history: 1, + ..Default::default() + }) + .await?; + + Ok(Self { + device_id, + info_bucket, + state_bucket, + heartbeat_bucket, + }) + } + + /// Publish the agent's static-ish facts. Called at startup and + /// on label change. + pub async fn publish_device_info( + &self, + labels: BTreeMap, + inventory: Option, + ) { + let info = DeviceInfo { + device_id: self.device_id.clone(), + labels, + inventory, + updated_at: chrono::Utc::now(), + }; + let key = device_info_key(&self.device_id.to_string()); + match serde_json::to_vec(&info) { + Ok(payload) => { + if let Err(e) = self.info_bucket.put(&key, payload.into()).await { + tracing::warn!(%key, error = %e, "publish_device_info: kv put failed"); + } + } + Err(e) => tracing::warn!(error = %e, "publish_device_info: serialize failed"), + } + } + + /// Tiny liveness ping. Called every 30s. + pub async fn publish_heartbeat(&self) { + let hb = HeartbeatPayload { + device_id: self.device_id.clone(), + at: chrono::Utc::now(), + }; + let key = device_heartbeat_key(&self.device_id.to_string()); + match serde_json::to_vec(&hb) { + Ok(payload) => { + if let Err(e) = self.heartbeat_bucket.put(&key, payload.into()).await { + tracing::debug!(%key, error = %e, "publish_heartbeat: kv put failed"); + } + } + Err(e) => tracing::warn!(error = %e, "publish_heartbeat: serialize failed"), + } + } + + /// Persist the authoritative current phase for a `(device, + /// deployment)` pair. The operator's watch on the `device-state` + /// bucket picks up this put and updates CR status counters. + pub async fn write_deployment_state(&self, state: &DeploymentState) { + let key = device_state_key(&self.device_id.to_string(), &state.deployment); + match serde_json::to_vec(state) { + Ok(payload) => { + if let Err(e) = self.state_bucket.put(&key, payload.into()).await { + tracing::warn!(%key, error = %e, "write_deployment_state: kv put failed"); + } + } + Err(e) => tracing::warn!(error = %e, "write_deployment_state: serialize failed"), + } + } + + /// Delete the authoritative current-phase entry, e.g. when the + /// Deployment CR is removed and the agent has torn down the + /// container. + pub async fn delete_deployment_state(&self, deployment: &DeploymentName) { + let key = device_state_key(&self.device_id.to_string(), deployment); + if let Err(e) = self.state_bucket.delete(&key).await { + tracing::debug!(%key, error = %e, "delete_deployment_state: kv delete failed"); + } + } +} diff --git a/iot/iot-agent-v0/src/main.rs b/fleet/harmony-fleet-agent/src/main.rs similarity index 53% rename from iot/iot-agent-v0/src/main.rs rename to fleet/harmony-fleet-agent/src/main.rs index 2d386aab..3b388349 100644 --- a/iot/iot-agent-v0/src/main.rs +++ b/fleet/harmony-fleet-agent/src/main.rs @@ -1,4 +1,5 @@ mod config; +mod fleet_publisher; mod reconciler; use std::sync::Arc; @@ -8,14 +9,13 @@ use anyhow::{Context, Result}; use clap::Parser; use config::{AgentConfig, CredentialSource, TomlFileCredentialSource}; use futures_util::StreamExt; -use harmony_reconciler_contracts::{ - AgentStatus, BUCKET_AGENT_STATUS, BUCKET_DESIRED_STATE, Id, status_key, -}; +use harmony_reconciler_contracts::{BUCKET_DESIRED_STATE, Id, InventorySnapshot}; use harmony::inventory::Inventory; use harmony::modules::podman::PodmanTopology; use harmony::topology::Topology; +use crate::fleet_publisher::FleetPublisher; use crate::reconciler::Reconciler; /// ROADMAP §5.6 — agent polls podman every 30s as ground truth; KV watch @@ -23,12 +23,12 @@ use crate::reconciler::Reconciler; const RECONCILE_INTERVAL: Duration = Duration::from_secs(30); #[derive(Parser)] -#[command(name = "iot-agent-v0", about = "IoT agent for Raspberry Pi devices")] +#[command(name = "fleet-agent-v0", about = "IoT agent for Raspberry Pi devices")] struct Cli { #[arg( long, - env = "IOT_AGENT_CONFIG", - default_value = "/etc/iot-agent/config.toml" + env = "FLEET_AGENT_CONFIG", + default_value = "/etc/fleet-agent/config.toml" )] config: std::path::PathBuf, } @@ -85,31 +85,51 @@ async fn watch_desired_state( Ok(()) } -async fn report_status(client: async_nats::Client, device_id: Id) -> Result<()> { - let jetstream = async_nats::jetstream::new(client); - let bucket = jetstream - .create_key_value(async_nats::jetstream::kv::Config { - bucket: BUCKET_AGENT_STATUS.to_string(), - ..Default::default() - }) - .await?; - - let key = status_key(&device_id.to_string()); +/// Tiny liveness-only loop: push a `HeartbeatPayload` into the +/// `device-heartbeat` bucket every N seconds. Stays separate from +/// per-deployment state writes so routine pings don't churn the +/// device-state bucket or its watch subscribers. +async fn publish_heartbeat_loop(fleet: Arc) { let mut interval = tokio::time::interval(Duration::from_secs(30)); - + interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); loop { interval.tick().await; - let status = AgentStatus { - device_id: device_id.clone(), - status: "running".to_string(), - timestamp: chrono::Utc::now(), - }; - let payload = serde_json::to_vec(&status)?; - bucket.put(&key, payload.into()).await?; - tracing::debug!(key = %key, "reported status"); + fleet.publish_heartbeat().await; } } +/// Build a one-shot inventory snapshot at agent startup. Cheap, +/// published alongside every heartbeat until the agent restarts. +fn local_inventory(inventory: &Inventory) -> InventorySnapshot { + InventorySnapshot { + hostname: inventory.location.name.clone(), + arch: std::env::consts::ARCH.to_string(), + os: std::env::consts::OS.to_string(), + kernel: std::fs::read_to_string("/proc/sys/kernel/osrelease") + .map(|s| s.trim().to_string()) + .unwrap_or_default(), + cpu_cores: std::thread::available_parallelism() + .map(|n| n.get() as u32) + .unwrap_or(0), + memory_mb: sys_memory_total_mb().unwrap_or(0), + agent_version: env!("CARGO_PKG_VERSION").to_string(), + } +} + +/// Read total RAM from /proc/meminfo. Returns None on non-Linux or +/// if /proc isn't mounted. Small, avoids a sys-info crate dep for a +/// single field. +fn sys_memory_total_mb() -> Option { + let s = std::fs::read_to_string("/proc/meminfo").ok()?; + for line in s.lines() { + if let Some(rest) = line.strip_prefix("MemTotal:") { + let kb: u64 = rest.split_whitespace().next()?.parse().ok()?; + return Some(kb / 1024); + } + } + None +} + #[tokio::main] async fn main() -> Result<()> { tracing_subscriber::fmt() @@ -118,7 +138,7 @@ async fn main() -> Result<()> { let cli = Cli::parse(); let cfg = config::load_config(&cli.config)?; - tracing::info!(device_id = %cfg.agent.device_id, "iot-agent-v0 starting"); + tracing::info!(device_id = %cfg.agent.device_id, "fleet-agent-v0 starting"); let device_id = cfg.agent.device_id.clone(); @@ -134,11 +154,40 @@ async fn main() -> Result<()> { let inventory = Arc::new(Inventory::from_localhost()); tracing::info!(hostname = %inventory.location.name, "inventory loaded"); - - let reconciler = Arc::new(Reconciler::new(topology, inventory)); + let inventory_snapshot = local_inventory(&inventory); let client = connect_nats(&cfg).await?; + // Publish surface. Opens the three KV buckets (idempotent + // creates). Must be live before the reconciler starts so + // writes on the first desired-state KV watch land on the wire. + let fleet = Arc::new( + FleetPublisher::connect(client.clone(), device_id.clone()) + .await + .context("fleet publisher connect")?, + ); + tracing::info!("fleet publisher ready"); + + // Publish DeviceInfo once at startup. Merge the config-declared + // labels with an always-on `device-id=` default so every + // device is targetable by id even without explicit labels. + // Config labels win on key conflicts — operators can override + // `device-id` if they really want to (unusual but legal). + let mut startup_labels = cfg.labels.clone(); + startup_labels + .entry("device-id".to_string()) + .or_insert_with(|| device_id.to_string()); + fleet + .publish_device_info(startup_labels, Some(inventory_snapshot.clone())) + .await; + + let reconciler = Arc::new(Reconciler::new( + device_id.clone(), + topology, + inventory, + Some(fleet.clone()), + )); + let ctrlc = async { tokio::signal::ctrl_c().await.ok(); tracing::info!("received SIGINT, shutting down"); @@ -151,16 +200,17 @@ async fn main() -> Result<()> { Ok::<(), anyhow::Error>(()) }; - let watch = watch_desired_state(client.clone(), device_id.clone(), reconciler.clone()); - let status = report_status(client, device_id); + let _ = inventory_snapshot; // consumed by the DeviceInfo publish above + let watch = watch_desired_state(client, device_id, reconciler.clone()); let reconcile = reconciler.clone().run_periodic(RECONCILE_INTERVAL); + let heartbeat = publish_heartbeat_loop(fleet); tokio::select! { _ = ctrlc => {}, r = sigterm => { r?; } r = watch => { r?; } - r = status => { r?; } _ = reconcile => {} + _ = heartbeat => {} } Ok(()) diff --git a/fleet/harmony-fleet-agent/src/reconciler.rs b/fleet/harmony-fleet-agent/src/reconciler.rs new file mode 100644 index 00000000..619d9bf0 --- /dev/null +++ b/fleet/harmony-fleet-agent/src/reconciler.rs @@ -0,0 +1,344 @@ +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; + +use anyhow::Result; +use chrono::Utc; +use harmony_reconciler_contracts::{DeploymentName, DeploymentState, Id, Phase}; +use tokio::sync::Mutex; + +use harmony::inventory::Inventory; +use harmony::modules::podman::{PodmanTopology, PodmanV0Score, ReconcileScore}; +use harmony::score::Score; + +use crate::fleet_publisher::FleetPublisher; + +/// Cache key → last-seen state, populated by `apply` and consulted by the +/// 30-second periodic tick and the delete path. +struct CachedEntry { + /// Serialized score JSON. Used for string-compare idempotency per + /// ROADMAP §5.5 — cheaper and more deterministic than a hash. + serialized: String, + /// Parsed score. Cached so the periodic reconcile tick and delete + /// handlers don't have to re-parse the JSON. + score: PodmanV0Score, +} + +pub struct Reconciler { + device_id: Id, + topology: Arc, + inventory: Arc, + /// Keyed by NATS KV key (`.`). A single entry per + /// KV key — in v0 there is no fan-out from one key to many scores. + state: Mutex>, + /// Current phase per deployment, used to decide whether a new + /// write to the `device-state` KV is needed. + phases: Mutex>, + /// Publish surface. Optional so unit tests without a live NATS + /// client still work; always populated in the real agent runtime. + fleet: Option>, +} + +impl Reconciler { + pub fn new( + device_id: Id, + topology: Arc, + inventory: Arc, + fleet: Option>, + ) -> Self { + Self { + device_id, + topology, + inventory, + state: Mutex::new(HashMap::new()), + phases: Mutex::new(HashMap::new()), + fleet, + } + } + + /// Record a new phase for a deployment and, if it changed, write + /// the updated [`DeploymentState`] to the KV. Same-phase + /// re-confirmations are no-ops so the periodic reconcile tick + /// doesn't churn the bucket. + async fn apply_phase( + &self, + deployment: &DeploymentName, + phase: Phase, + last_error: Option, + ) { + { + let mut phases = self.phases.lock().await; + if phases.get(deployment).copied() == Some(phase) { + return; + } + phases.insert(deployment.clone(), phase); + } + + if let Some(publisher) = &self.fleet { + let state = DeploymentState { + device_id: self.device_id.clone(), + deployment: deployment.clone(), + phase, + last_event_at: Utc::now(), + last_error, + }; + publisher.write_deployment_state(&state).await; + } + } + + /// Clear the in-memory phase for a deployment and delete its KV + /// entry. Idempotent: a delete for a never-applied deployment is + /// a no-op in memory and a harmless tombstone write on the wire. + async fn drop_phase(&self, deployment: &DeploymentName) { + let was_known = { + let mut phases = self.phases.lock().await; + phases.remove(deployment).is_some() + }; + if !was_known { + return; + } + if let Some(publisher) = &self.fleet { + publisher.delete_deployment_state(deployment).await; + } + } + + /// Handle a Put event (new or updated score on NATS KV). No-ops if the + /// serialized score is byte-identical to the last-seen value for this + /// key. + pub async fn apply(&self, key: &str, value: &[u8]) -> Result<()> { + let deployment = deployment_from_key(key); + let incoming = match serde_json::from_slice::(value) { + Ok(ReconcileScore::PodmanV0(s)) => s, + Err(e) => { + tracing::warn!(key, error = %e, "failed to deserialize score"); + if let Some(name) = &deployment { + self.apply_phase(name, Phase::Failed, Some(format!("bad payload: {e}"))) + .await; + } + return Ok(()); + } + }; + let serialized = String::from_utf8_lossy(value).into_owned(); + + { + let state = self.state.lock().await; + if let Some(existing) = state.get(key) { + if existing.serialized == serialized { + tracing::debug!(key, "score unchanged — noop"); + return Ok(()); + } + } + } + + if let Some(name) = &deployment { + self.apply_phase(name, Phase::Pending, None).await; + } + + match self.run_score(key, &incoming).await { + Ok(()) => { + if let Some(name) = &deployment { + self.apply_phase(name, Phase::Running, None).await; + } + } + Err(e) => { + if let Some(name) = &deployment { + self.apply_phase(name, Phase::Failed, Some(short(&e.to_string()))) + .await; + } + return Err(e); + } + } + + let mut state = self.state.lock().await; + state.insert( + key.to_string(), + CachedEntry { + serialized, + score: incoming, + }, + ); + Ok(()) + } + + /// Handle a Delete/Purge event. Stops and removes every container + /// referenced by the last cached score for this key. Idempotent: if we + /// never saw a Put for this key (agent restart after delete), logs and + /// returns ok. + pub async fn remove(&self, key: &str) -> Result<()> { + let deployment = deployment_from_key(key); + let mut state = self.state.lock().await; + let Some(entry) = state.remove(key) else { + tracing::info!(key, "delete for unknown key — nothing to remove"); + if let Some(name) = &deployment { + self.drop_phase(name).await; + } + return Ok(()); + }; + drop(state); + + use harmony::topology::ContainerRuntime; + for service in &entry.score.services { + if let Err(e) = self.topology.remove_service(&service.name).await { + tracing::warn!( + key, + service = %service.name, + error = %e, + "failed to remove container" + ); + } else { + tracing::info!(key, service = %service.name, "removed container"); + } + } + if let Some(name) = &deployment { + self.drop_phase(name).await; + } + Ok(()) + } + + /// Periodic ground-truth reconcile. ROADMAP §5.6 — "polling instead of + /// event-driven PLEG. Agent polls podman every 30s as ground truth; + /// KV watch events are accelerators." Re-runs each cached score against + /// podman-api; the underlying `ensure_service_running` is idempotent + /// so a converged state produces no log noise. + pub async fn tick(&self) -> Result<()> { + let snapshot: Vec<(String, PodmanV0Score)> = { + let state = self.state.lock().await; + state + .iter() + .map(|(k, v)| (k.clone(), v.score.clone())) + .collect() + }; + for (key, score) in snapshot { + let deployment = deployment_from_key(&key); + match self.run_score(&key, &score).await { + Ok(()) => { + if let Some(name) = &deployment { + self.apply_phase(name, Phase::Running, None).await; + } + } + Err(e) => { + tracing::warn!(key, error = %e, "periodic reconcile failed"); + if let Some(name) = &deployment { + self.apply_phase(name, Phase::Failed, Some(short(&e.to_string()))) + .await; + } + } + } + } + Ok(()) + } + + pub async fn run_periodic(self: Arc, interval: Duration) { + let mut ticker = tokio::time::interval(interval); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + loop { + ticker.tick().await; + if let Err(e) = self.tick().await { + tracing::warn!(error = %e, "reconcile tick error"); + } + } + } + + async fn run_score(&self, key: &str, score: &PodmanV0Score) -> Result<()> { + let interpret = Score::::create_interpret(score); + let outcome = interpret + .execute(&self.inventory, &self.topology) + .await + .map_err(|e| anyhow::anyhow!("PodmanV0Score interpret failed for {key}: {e}"))?; + tracing::info!(key, outcome = ?outcome, "reconciled"); + Ok(()) + } +} + +/// Extract the deployment name from a NATS KV key of the form +/// `.`. +fn deployment_from_key(key: &str) -> Option { + let (_, rest) = key.split_once('.')?; + DeploymentName::try_new(rest).ok() +} + +/// Truncate a long error message so the DeploymentState payload stays +/// comfortably below NATS JetStream's per-message limit. +fn short(s: &str) -> String { + const MAX: usize = 512; + if s.len() <= MAX { + s.to_string() + } else { + let mut cut = s[..MAX].to_string(); + cut.push('…'); + cut + } +} + +#[cfg(test)] +mod tests { + //! Focused tests for transition detection. Drive `apply_phase` / + //! `drop_phase` directly with an inert topology (no real podman + //! socket) and a `None` FleetPublisher. + use super::*; + use harmony::inventory::Inventory; + use harmony::modules::podman::PodmanTopology; + use std::path::PathBuf; + + fn reconciler() -> Reconciler { + let topology = Arc::new( + PodmanTopology::from_unix_socket(PathBuf::from("/nonexistent/for-tests")).unwrap(), + ); + let inventory = Arc::new(Inventory::empty()); + Reconciler::new( + Id::from("test-device".to_string()), + topology, + inventory, + None, + ) + } + + fn dn(s: &str) -> DeploymentName { + DeploymentName::try_new(s).expect("valid test name") + } + + #[tokio::test] + async fn apply_phase_records_new_phase() { + let r = reconciler(); + r.apply_phase(&dn("hello"), Phase::Running, None).await; + let phases = r.phases.lock().await; + assert_eq!(phases.get(&dn("hello")), Some(&Phase::Running)); + } + + #[tokio::test] + async fn apply_phase_idempotent_for_same_phase() { + let r = reconciler(); + r.apply_phase(&dn("hello"), Phase::Running, None).await; + r.apply_phase(&dn("hello"), Phase::Running, None).await; + let phases = r.phases.lock().await; + assert_eq!(phases.len(), 1); + } + + #[tokio::test] + async fn apply_phase_transitions_update_phase() { + let r = reconciler(); + r.apply_phase(&dn("hello"), Phase::Pending, None).await; + r.apply_phase(&dn("hello"), Phase::Running, None).await; + r.apply_phase(&dn("hello"), Phase::Failed, Some("oom".to_string())) + .await; + let phases = r.phases.lock().await; + assert_eq!(phases.get(&dn("hello")), Some(&Phase::Failed)); + } + + #[tokio::test] + async fn drop_phase_clears_known_deployment() { + let r = reconciler(); + r.apply_phase(&dn("hello"), Phase::Running, None).await; + r.drop_phase(&dn("hello")).await; + let phases = r.phases.lock().await; + assert!(!phases.contains_key(&dn("hello"))); + } + + #[tokio::test] + async fn drop_phase_on_unknown_deployment_is_noop() { + let r = reconciler(); + r.drop_phase(&dn("never-existed")).await; + let phases = r.phases.lock().await; + assert!(phases.is_empty()); + } +} diff --git a/iot/iot-operator-v0/Cargo.toml b/fleet/harmony-fleet-operator/Cargo.toml similarity index 82% rename from iot/iot-operator-v0/Cargo.toml rename to fleet/harmony-fleet-operator/Cargo.toml index bf140170..3fe5a2d4 100644 --- a/iot/iot-operator-v0/Cargo.toml +++ b/fleet/harmony-fleet-operator/Cargo.toml @@ -1,14 +1,13 @@ [package] -name = "iot-operator-v0" +name = "harmony-fleet-operator" version = "0.1.0" edition = "2024" rust-version = "1.85" [dependencies] harmony = { path = "../../harmony" } -harmony-k8s = { path = "../../harmony-k8s" } harmony-reconciler-contracts = { path = "../../harmony-reconciler-contracts" } -async-trait.workspace = true +chrono = { workspace = true, features = ["serde"] } kube = { workspace = true, features = ["runtime", "derive"] } k8s-openapi.workspace = true async-nats = { workspace = true } diff --git a/fleet/harmony-fleet-operator/Dockerfile b/fleet/harmony-fleet-operator/Dockerfile new file mode 100644 index 00000000..0eb0b632 --- /dev/null +++ b/fleet/harmony-fleet-operator/Dockerfile @@ -0,0 +1,26 @@ +# Minimal runtime container for the IoT operator. Assumes +# `target/release/harmony-fleet-operator` has already been built on the +# host (the load-test harness does this). Base image is +# archlinux:base to guarantee the host's glibc (ABI-matched) — +# debian:bookworm-slim and similar distros ship older glibcs and +# would error at startup with "version `GLIBC_2.x' not found". +# +# When the operator gets its own release pipeline, swap this for a +# two-stage build that produces the binary inside a pinned Rust +# toolchain image. +FROM docker.io/library/archlinux:base + +COPY target/release/harmony-fleet-operator /usr/local/bin/harmony-fleet-operator + +# Non-root runtime. Pairs with the Pod's `securityContext. +# runAsNonRoot: true` in the helm chart — k8s admission rejects +# pods with that flag unless either the image declares a non-root +# USER or the Pod pins runAsUser. We deliberately don't pin +# runAsUser (OpenShift's restricted-v2 SCC assigns a namespace- +# specific UID and rejects fixed UIDs); the image's USER is the +# portable mechanism. 65532 is the `nonroot` UID convention used +# by distroless + many security-hardened base images; it's +# arbitrary but safe — no overlap with typical system UIDs. +USER 65532:65532 + +ENTRYPOINT ["/usr/local/bin/harmony-fleet-operator"] diff --git a/fleet/harmony-fleet-operator/src/chart.rs b/fleet/harmony-fleet-operator/src/chart.rs new file mode 100644 index 00000000..a8e4138c --- /dev/null +++ b/fleet/harmony-fleet-operator/src/chart.rs @@ -0,0 +1,342 @@ +//! Generate the operator's helm chart from typed Rust. +//! +//! Produces a self-contained chart directory that `helm install` +//! accepts as a path. Resources are constructed as typed k8s_openapi +//! values and serialized at chart-build time, matching ADR 018 +//! (Template Hydration) — no hand-authored yaml in the source tree. +//! +//! The chart has no Helm templating (`{{ .Values.foo }}`); the caller +//! re-runs the generator whenever config changes. For a publishable +//! chart with user-facing values, layer a templating pass on top of +//! this output. +//! +//! Parity with `install` subcommand: both install the same two CRDs +//! (`Deployment`, `Device`). `install` applies the CRDs only, for +//! the host-side-operator path; `chart` packages CRDs + RBAC + the +//! operator Deployment into a helm chart the cluster runs itself. + +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use harmony::modules::application::helm::{HelmChart, HelmResourceKind}; +use k8s_openapi::api::apps::v1::{ + Deployment as K8sDeployment, DeploymentSpec as K8sDeploymentSpec, +}; +use k8s_openapi::api::core::v1::{ + Capabilities, Container, EnvVar, PodSpec, PodTemplateSpec, SeccompProfile, SecurityContext, + ServiceAccount, +}; +use k8s_openapi::api::rbac::v1::{ClusterRole, ClusterRoleBinding, PolicyRule, RoleRef, Subject}; +use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; +use kube::CustomResourceExt; +use kube::api::ObjectMeta; + +use crate::crd::{Deployment, Device}; + +/// Inputs for chart generation. Default values are aimed at a +/// local-dev k3d install; override via the `chart` subcommand flags. +pub struct ChartOptions { + /// Where to write the chart directory. The chart is created as a + /// subdirectory `harmony-fleet-operator` inside this path. + pub output_dir: PathBuf, + /// Container image tag the operator Deployment should pull. For + /// k3d with sideloaded images, `IfNotPresent` + a tag that's + /// already in the cluster store is enough. + pub image: String, + /// `Always` for registry-backed dev loops, `IfNotPresent` for + /// sideloaded k3d images, `Never` if the image must already be + /// present. + pub image_pull_policy: String, + /// Namespace the operator Deployment runs in. `helm install + /// --create-namespace` creates it if absent; the chart itself + /// doesn't include a Namespace resource so the chart stays + /// reusable across namespaces. + pub namespace: String, + /// NATS URL the operator connects to. For in-cluster NATS at + /// `fleet-nats.fleet-system` the default `nats://fleet-nats.fleet-system:4222` + /// works with no config. + pub nats_url: String, + /// `RUST_LOG` value for the operator process. + pub log_level: String, +} + +impl Default for ChartOptions { + fn default() -> Self { + Self { + output_dir: PathBuf::from("/tmp/fleet-load-test/chart"), + image: "localhost/harmony-fleet-operator:latest".to_string(), + image_pull_policy: "IfNotPresent".to_string(), + namespace: "fleet-system".to_string(), + nats_url: "nats://fleet-nats.fleet-system:4222".to_string(), + log_level: "info,kube_runtime=warn".to_string(), + } + } +} + +const RELEASE_NAME: &str = "harmony-fleet-operator"; +const SERVICE_ACCOUNT: &str = "harmony-fleet-operator"; +const CLUSTER_ROLE: &str = "harmony-fleet-operator"; +const CLUSTER_ROLE_BINDING: &str = "harmony-fleet-operator"; + +/// Build + write the chart to `opts.output_dir`. Returns the full +/// path to the generated chart directory (which is what `helm +/// install ` wants). +pub fn build_chart(opts: &ChartOptions) -> Result { + std::fs::create_dir_all(&opts.output_dir) + .with_context(|| format!("creating {:?}", opts.output_dir))?; + + let mut chart = HelmChart::new( + RELEASE_NAME.to_string(), + env!("CARGO_PKG_VERSION").to_string(), + ); + chart.description = "IoT operator — Deployment CRD → NATS KV".to_string(); + + chart.add_resource(HelmResourceKind::Crd(crd_with_keep_annotation( + Deployment::crd(), + ))); + chart.add_resource(HelmResourceKind::Crd(crd_with_keep_annotation( + Device::crd(), + ))); + + chart.add_resource(HelmResourceKind::ServiceAccount(service_account( + &opts.namespace, + ))); + chart.add_resource(HelmResourceKind::ClusterRole(cluster_role())); + chart.add_resource(HelmResourceKind::ClusterRoleBinding(cluster_role_binding( + &opts.namespace, + ))); + chart.add_resource(HelmResourceKind::Deployment(operator_deployment(opts))); + + let written = chart + .write_to(Path::new(&opts.output_dir)) + .map_err(|e| anyhow::anyhow!("writing chart: {e}"))?; + Ok(written) +} + +/// Annotate a CRD with `helm.sh/resource-policy: keep` so +/// `helm uninstall` **does not** cascade-delete the CRD and its +/// CRs. Without this, uninstall wipes every `Deployment` + `Device` +/// CR in the cluster via the GC → agents notice the desired-state +/// KV deletes → the whole fleet tears down its containers. One +/// typo on uninstall would be catastrophic. `keep` makes uninstall +/// idempotent and data-preserving; the user explicitly `kubectl +/// delete crd …` if they actually want to wipe. +fn crd_with_keep_annotation(mut crd: CustomResourceDefinition) -> CustomResourceDefinition { + let annotations = crd.metadata.annotations.get_or_insert_with(BTreeMap::new); + annotations.insert("helm.sh/resource-policy".to_string(), "keep".to_string()); + crd +} + +fn service_account(namespace: &str) -> ServiceAccount { + ServiceAccount { + metadata: ObjectMeta { + name: Some(SERVICE_ACCOUNT.to_string()), + namespace: Some(namespace.to_string()), + ..Default::default() + }, + ..Default::default() + } +} + +/// Verbs the operator actually uses — nothing aspirational. Tightening +/// later is a matter of deleting a line. +fn cluster_role() -> ClusterRole { + let group = "fleet.nationtech.io".to_string(); + ClusterRole { + metadata: ObjectMeta { + name: Some(CLUSTER_ROLE.to_string()), + ..Default::default() + }, + rules: Some(vec![ + // Deployments: controller lists + watches + patches + // (finalizer metadata); aggregator lists + watches + + // patches status. + PolicyRule { + api_groups: Some(vec![group.clone()]), + resources: Some(vec!["deployments".to_string()]), + verbs: vec!["get", "list", "watch", "patch", "update"] + .into_iter() + .map(String::from) + .collect(), + ..Default::default() + }, + PolicyRule { + api_groups: Some(vec![group.clone()]), + resources: Some(vec![ + "deployments/status".to_string(), + "deployments/finalizers".to_string(), + ]), + verbs: vec!["get", "update", "patch"] + .into_iter() + .map(String::from) + .collect(), + ..Default::default() + }, + // Devices: reconciler server-side-applies + deletes; + // aggregator lists + watches. + PolicyRule { + api_groups: Some(vec![group]), + resources: Some(vec!["devices".to_string()]), + verbs: vec![ + "get", "list", "watch", "create", "update", "patch", "delete", + ] + .into_iter() + .map(String::from) + .collect(), + ..Default::default() + }, + ]), + ..Default::default() + } +} + +fn cluster_role_binding(namespace: &str) -> ClusterRoleBinding { + ClusterRoleBinding { + metadata: ObjectMeta { + name: Some(CLUSTER_ROLE_BINDING.to_string()), + ..Default::default() + }, + role_ref: RoleRef { + api_group: "rbac.authorization.k8s.io".to_string(), + kind: "ClusterRole".to_string(), + name: CLUSTER_ROLE.to_string(), + }, + subjects: Some(vec![Subject { + kind: "ServiceAccount".to_string(), + name: SERVICE_ACCOUNT.to_string(), + namespace: Some(namespace.to_string()), + ..Default::default() + }]), + } +} + +fn operator_deployment(opts: &ChartOptions) -> K8sDeployment { + let mut match_labels = BTreeMap::new(); + match_labels.insert( + "app.kubernetes.io/name".to_string(), + RELEASE_NAME.to_string(), + ); + + K8sDeployment { + metadata: ObjectMeta { + name: Some(RELEASE_NAME.to_string()), + namespace: Some(opts.namespace.clone()), + labels: Some(match_labels.clone()), + ..Default::default() + }, + spec: Some(K8sDeploymentSpec { + replicas: Some(1), + selector: LabelSelector { + match_labels: Some(match_labels.clone()), + match_expressions: None, + }, + template: PodTemplateSpec { + metadata: Some(ObjectMeta { + labels: Some(match_labels), + ..Default::default() + }), + spec: Some(PodSpec { + service_account_name: Some(SERVICE_ACCOUNT.to_string()), + containers: vec![Container { + name: "operator".to_string(), + image: Some(opts.image.clone()), + image_pull_policy: Some(opts.image_pull_policy.clone()), + env: Some(vec![ + EnvVar { + name: "NATS_URL".to_string(), + value: Some(opts.nats_url.clone()), + ..Default::default() + }, + EnvVar { + name: "RUST_LOG".to_string(), + value: Some(opts.log_level.clone()), + ..Default::default() + }, + ]), + security_context: Some(container_security_context()), + ..Default::default() + }], + ..Default::default() + }), + }, + ..Default::default() + }), + ..Default::default() + } +} + +/// Minimum-privilege container security context. +/// +/// - `runAsNonRoot: true` — a compromised operator pod with +/// cluster-scoped write on Deployment + Device CRs is enough to +/// tear down the fleet; running as non-root limits blast radius. +/// - `readOnlyRootFilesystem: true` — the Rust operator logs to +/// stdout only; it never writes to `/`. +/// - `allowPrivilegeEscalation: false` — no setuid binaries, no +/// capability gain under any child exec. +/// - `capabilities: drop [ALL]` — no kernel capabilities retained. +/// - `seccompProfile: RuntimeDefault` — runtime's default syscall +/// filter (blocks the obscure/dangerous ones). +/// +/// **Deliberately no `runAsUser`** — OpenShift's `restricted-v2` +/// SCC assigns namespace-specific UIDs and rejects pods that pin +/// a fixed UID outside its range. Relying on the image's USER +/// directive (see Dockerfile) lets vanilla k8s and OpenShift pick +/// a compatible UID without custom SCC bindings. +fn container_security_context() -> SecurityContext { + SecurityContext { + run_as_non_root: Some(true), + read_only_root_filesystem: Some(true), + allow_privilege_escalation: Some(false), + capabilities: Some(Capabilities { + add: None, + drop: Some(vec!["ALL".to_string()]), + }), + seccomp_profile: Some(SeccompProfile { + type_: "RuntimeDefault".to_string(), + localhost_profile: None, + }), + ..Default::default() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn crds_carry_keep_annotation() { + let crd = crd_with_keep_annotation(Deployment::crd()); + assert_eq!( + crd.metadata + .annotations + .as_ref() + .and_then(|a| a.get("helm.sh/resource-policy")) + .map(String::as_str), + Some("keep"), + "CRDs must carry the keep annotation so helm uninstall doesn't \ + cascade-delete CRs and wipe the fleet" + ); + } + + #[test] + fn security_context_is_locked_down() { + let sc = container_security_context(); + assert_eq!(sc.run_as_non_root, Some(true)); + assert_eq!(sc.read_only_root_filesystem, Some(true)); + assert_eq!(sc.allow_privilege_escalation, Some(false)); + assert_eq!( + sc.capabilities.as_ref().and_then(|c| c.drop.as_ref()), + Some(&vec!["ALL".to_string()]) + ); + assert_eq!( + sc.seccomp_profile.as_ref().map(|s| s.type_.as_str()), + Some("RuntimeDefault") + ); + // OpenShift SCC compatibility: no fixed runAsUser, let the + // image/SCC negotiate. + assert!(sc.run_as_user.is_none()); + } +} diff --git a/fleet/harmony-fleet-operator/src/controller.rs b/fleet/harmony-fleet-operator/src/controller.rs new file mode 100644 index 00000000..340da116 --- /dev/null +++ b/fleet/harmony-fleet-operator/src/controller.rs @@ -0,0 +1,136 @@ +//! Deployment controller. +//! +//! With the selector-based model, the controller's job shrank to: +//! - validate that the CR name is a valid `DeploymentName` +//! (apiserver already validates RFC 1123 — this is the +//! additional NATS-subject-safety check), +//! - hold a finalizer so delete is synchronous with desired-state +//! KV cleanup. +//! +//! The aggregator owns: +//! - resolving `spec.targetSelector` against Device CRs, +//! - writing `desired-state..` KV entries, +//! - patching `.status.aggregate`. +//! +//! So on `apply` this function is a no-op past validation; the +//! aggregator notices the new CR via its own kube watch and +//! materializes KV entries for matched devices on the next tick. +//! +//! On `cleanup` we still need to remove every KV entry for this +//! deployment synchronously so agents stop reconciling before the +//! CR disappears. KV doesn't support prefix delete; we scan the +//! bucket and drop keys with the matching `.` +//! suffix. + +use std::sync::Arc; +use std::time::Duration; + +use async_nats::jetstream::kv::Store; +use futures_util::StreamExt; +use harmony_reconciler_contracts::DeploymentName; +use kube::runtime::Controller; +use kube::runtime::controller::Action; +use kube::runtime::finalizer::{Event as FinalizerEvent, finalizer}; +use kube::runtime::watcher::Config as WatcherConfig; +use kube::{Api, Client, ResourceExt}; + +use crate::crd::Deployment; + +const FINALIZER: &str = "fleet.nationtech.io/finalizer"; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("kube api: {0}")] + Kube(#[from] kube::Error), + #[error("nats kv: {0}")] + Kv(String), + #[error("missing namespace on resource")] + MissingNamespace, + #[error("invalid deployment name '{0}': {1}")] + InvalidName(String, String), +} + +pub struct Context { + pub client: Client, + pub kv: Store, +} + +pub async fn run(client: Client, kv: Store) -> anyhow::Result<()> { + let api: Api = Api::all(client.clone()); + let ctx = Arc::new(Context { client, kv }); + + tracing::info!("starting Deployment controller"); + Controller::new(api, WatcherConfig::default()) + .run(reconcile, error_policy, ctx) + .for_each(|res| async move { + match res { + Ok((obj, _)) => tracing::debug!(?obj, "reconciled"), + Err(e) => tracing::warn!(error = %e, "reconcile error"), + } + }) + .await; + Ok(()) +} + +async fn reconcile(obj: Arc, ctx: Arc) -> Result { + let ns = obj.namespace().ok_or(Error::MissingNamespace)?; + let name = obj.name_any(); + + // Validation pass: apiserver accepts any RFC 1123 name; we need + // the additional NATS-subject-safety properties before anything + // downstream tries to use it as a KV key fragment. + DeploymentName::try_new(&name).map_err(|e| Error::InvalidName(name.clone(), e.to_string()))?; + + let api: Api = Api::namespaced(ctx.client.clone(), &ns); + finalizer(&api, FINALIZER, obj, |event| async { + match event { + // No work on apply — the aggregator picks up the CR via + // its own kube watch and writes KV entries for matching + // devices. Long requeue so we're not pointlessly polling. + FinalizerEvent::Apply(_) => Ok(Action::requeue(Duration::from_secs(300))), + FinalizerEvent::Cleanup(d) => cleanup(d, &ctx.kv).await, + } + }) + .await + .map_err(|e| match e { + kube::runtime::finalizer::Error::ApplyFailed(e) + | kube::runtime::finalizer::Error::CleanupFailed(e) => e, + kube::runtime::finalizer::Error::AddFinalizer(e) + | kube::runtime::finalizer::Error::RemoveFinalizer(e) => Error::Kube(e), + kube::runtime::finalizer::Error::UnnamedObject => Error::Kv("unnamed object".into()), + kube::runtime::finalizer::Error::InvalidFinalizer => Error::Kv("invalid finalizer".into()), + }) +} + +async fn cleanup(obj: Arc, kv: &Store) -> Result { + let name = obj.name_any(); + let deployment_name = + DeploymentName::try_new(&name).map_err(|e| Error::InvalidName(name, e.to_string()))?; + let suffix = format!(".{}", deployment_name.as_str()); + + let mut removed = 0u64; + let mut keys = kv + .keys() + .await + .map_err(|e| Error::Kv(format!("listing keys: {e}")))?; + while let Some(key_res) = keys.next().await { + let key = key_res.map_err(|e| Error::Kv(format!("reading key: {e}")))?; + if key.ends_with(&suffix) { + kv.delete(&key) + .await + .map_err(|e| Error::Kv(format!("deleting {key}: {e}")))?; + removed += 1; + } + } + tracing::info!( + deployment = %deployment_name, + removed, + "cleanup: deleted desired-state entries" + ); + Ok(Action::await_change()) +} + +fn error_policy(_obj: Arc, err: &Error, _ctx: Arc) -> Action { + tracing::warn!(error = %err, "requeueing after error"); + Action::requeue(Duration::from_secs(30)) +} diff --git a/iot/iot-operator-v0/src/crd.rs b/fleet/harmony-fleet-operator/src/crd.rs similarity index 50% rename from iot/iot-operator-v0/src/crd.rs rename to fleet/harmony-fleet-operator/src/crd.rs index f815ac7b..0399af82 100644 --- a/iot/iot-operator-v0/src/crd.rs +++ b/fleet/harmony-fleet-operator/src/crd.rs @@ -1,3 +1,5 @@ +use harmony_reconciler_contracts::InventorySnapshot; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; use kube::CustomResource; use schemars::JsonSchema; use schemars::schema::{ @@ -5,19 +7,25 @@ use schemars::schema::{ }; use serde::{Deserialize, Serialize}; +/// Deployment intent. Targets devices by label selector — identical +/// to the pattern K8s itself uses for DaemonSet nodeSelector, Service +/// pod selector, etc. The operator resolves the selector against +/// `Device` CRs at reconcile time; no list of device ids on spec. #[derive(CustomResource, Serialize, Deserialize, Clone, Debug, JsonSchema)] #[kube( - group = "iot.nationtech.io", + group = "fleet.nationtech.io", version = "v1alpha1", kind = "Deployment", plural = "deployments", - shortname = "iotdep", + shortname = "fleetdep", namespaced, status = "DeploymentStatus" )] #[serde(rename_all = "camelCase")] pub struct DeploymentSpec { - pub target_devices: Vec, + /// Which devices this deployment targets. matches against + /// `Device.metadata.labels`. + pub target_selector: LabelSelector, #[schemars(schema_with = "score_payload_schema")] pub score: ScorePayload, pub rollout: Rollout, @@ -35,13 +43,11 @@ pub struct ScorePayload { /// /// 1. `x-kubernetes-preserve-unknown-fields: true` on `data` — the payload /// is routed opaquely; its shape is enforced on-device by the agent's -/// typed `IotScore` deserialization, not by the apiserver. +/// typed `ReconcileScore` deserialization, not by the apiserver. /// 2. An `x-kubernetes-validations` CEL rule on the enclosing `score` object /// requiring `type` to be a valid Rust identifier, so typos (`"pdoman"`) /// are rejected at `kubectl apply` time rather than silently reaching -/// the agent. This validates the *shape* of the discriminator without -/// listing the known variant catalog — the operator stays a generic -/// router (v0.3+ can add `OkdApplyV0` etc. without an operator release). +/// the agent. fn score_payload_schema(_: &mut schemars::r#gen::SchemaGenerator) -> Schema { let type_schema = Schema::Object(SchemaObject { instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::String))), @@ -100,6 +106,65 @@ pub enum RolloutStrategy { #[derive(Serialize, Deserialize, Clone, Debug, Default, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct DeploymentStatus { + /// Per-deployment rollup. Present once the aggregator has + /// evaluated the selector at least once. #[serde(skip_serializing_if = "Option::is_none")] - pub observed_score_string: Option, + pub aggregate: Option, +} + +/// Rollup of per-device deployment phases for this Deployment CR. +#[derive(Serialize, Deserialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct DeploymentAggregate { + /// How many Device CRs currently match `spec.targetSelector`. + /// The three phase counters below sum to this; targeted-but- + /// unreported devices are folded into `pending`. + pub matched_device_count: u32, + pub succeeded: u32, + pub failed: u32, + pub pending: u32, + /// Device id of the most recent device reporting a failure, with + /// its short error message. Cleared when that device transitions + /// back to Running. + #[serde(skip_serializing_if = "Option::is_none")] + pub last_error: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct AggregateLastError { + pub device_id: String, + pub message: String, + pub at: String, +} + +/// A physical/virtual device registered with the fleet. Cluster-scoped +/// because devices aren't tenant-isolated by namespace — they're +/// infrastructure, the same way K8s Nodes are cluster-scoped. +/// +/// Created by the operator from `DeviceInfo` entries in the NATS +/// `device-info` bucket. Agents never touch the kube apiserver +/// directly; they publish DeviceInfo to NATS and the operator +/// reflects it here. +/// +/// `metadata.labels` carries the device's routing labels. `spec. +/// inventory` holds the hardware/OS snapshot. No status subresource +/// today — liveness is queried from the NATS `device-heartbeat` +/// bucket directly; when a CR-side reflection (Reachable / Stale +/// conditions) becomes useful, it'll land with its own reconciler +/// rather than sitting here as speculative surface. +#[derive(CustomResource, Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[kube( + group = "fleet.nationtech.io", + version = "v1alpha1", + kind = "Device", + plural = "devices", + shortname = "fleetdev" +)] +#[serde(rename_all = "camelCase")] +pub struct DeviceSpec { + /// Hardware + OS facts reported by the agent at registration. + /// Rarely changes after first publish. + #[serde(skip_serializing_if = "Option::is_none")] + pub inventory: Option, } diff --git a/fleet/harmony-fleet-operator/src/device_reconciler.rs b/fleet/harmony-fleet-operator/src/device_reconciler.rs new file mode 100644 index 00000000..6f2dba9b --- /dev/null +++ b/fleet/harmony-fleet-operator/src/device_reconciler.rs @@ -0,0 +1,165 @@ +//! DeviceInfo (NATS `device-info` KV) → Device CR (kube). +//! +//! Agents publish a `DeviceInfo` payload to NATS on startup + on +//! label/inventory change. This reconciler watches that bucket and +//! materializes each entry as a cluster-scoped `Device` custom +//! resource, so label selectors and `kubectl get devices -l …` +//! work the way they do for K8s Nodes. +//! +//! Failure mode: idempotent server-side apply with a fixed field +//! manager, so repeated writes don't accumulate revisions and +//! concurrent edits from other sources stay merged safely. + +use anyhow::Result; +use async_nats::jetstream::kv::{Operation, Store}; +use futures_util::StreamExt; +use harmony_reconciler_contracts::{BUCKET_DEVICE_INFO, DeviceInfo}; +use kube::Client; +use kube::api::{Api, DeleteParams, Patch, PatchParams}; +use std::collections::BTreeMap; + +use crate::crd::{Device, DeviceSpec}; + +const FIELD_MANAGER: &str = "harmony-fleet-operator-device-reconciler"; + +pub async fn run(client: Client, js: async_nats::jetstream::Context) -> Result<()> { + let bucket = js + .create_key_value(async_nats::jetstream::kv::Config { + bucket: BUCKET_DEVICE_INFO.to_string(), + ..Default::default() + }) + .await?; + + run_loop(client, bucket).await +} + +async fn run_loop(client: Client, bucket: Store) -> Result<()> { + let devices: Api = Api::all(client); + // `watch_with_history` replays every current entry then streams + // live updates. Matches the aggregator's pattern and means we + // don't need a separate cold-start KV scan here. + let mut watch = bucket.watch_with_history(">").await?; + tracing::info!("device-reconciler: watching device-info KV"); + + while let Some(entry_res) = watch.next().await { + let entry = match entry_res { + Ok(e) => e, + Err(e) => { + tracing::warn!(error = %e, "device-reconciler: watch delivery error"); + continue; + } + }; + match entry.operation { + Operation::Put => { + let info: DeviceInfo = match serde_json::from_slice(&entry.value) { + Ok(d) => d, + Err(e) => { + tracing::warn!(key = %entry.key, error = %e, "device-reconciler: bad DeviceInfo payload"); + continue; + } + }; + if let Err(e) = upsert_device(&devices, &info).await { + tracing::warn!( + device = %info.device_id, + error = %e, + "device-reconciler: upsert failed" + ); + } + } + Operation::Delete | Operation::Purge => { + let Some(device_id) = entry.key.strip_prefix("info.") else { + continue; + }; + if let Err(e) = delete_device(&devices, device_id).await { + tracing::warn!(%device_id, error = %e, "device-reconciler: delete failed"); + } + } + } + } + Ok(()) +} + +async fn upsert_device(api: &Api, info: &DeviceInfo) -> Result<()> { + let name = info.device_id.to_string(); + let mut device = Device::new( + &name, + DeviceSpec { + inventory: info.inventory.clone(), + }, + ); + device.metadata.labels = Some(clean_labels(&info.labels)); + + api.patch( + &name, + &PatchParams::apply(FIELD_MANAGER).force(), + &Patch::Apply(&device), + ) + .await?; + tracing::debug!(%name, "device-reconciler: upserted"); + Ok(()) +} + +async fn delete_device(api: &Api, name: &str) -> Result<()> { + match api.delete(name, &DeleteParams::default()).await { + Ok(_) => { + tracing::debug!(%name, "device-reconciler: deleted"); + Ok(()) + } + Err(kube::Error::Api(ae)) if ae.code == 404 => Ok(()), + Err(e) => Err(e.into()), + } +} + +/// Drop labels whose keys or values violate k8s label-syntax rules. +/// Agents could in theory publish arbitrary strings; kube will reject +/// a whole apply if even one is malformed, which would take out that +/// device's registration. Skip-and-log beats block-everything. +fn clean_labels(raw: &BTreeMap) -> BTreeMap { + raw.iter() + .filter(|(k, v)| is_label_key(k) && is_label_value(v)) + .map(|(k, v)| (k.clone(), v.clone())) + .collect() +} + +fn is_label_key(s: &str) -> bool { + // Simplified: DNS-subdomain-like prefix + name ≤ 63 chars alnum/-/./_. + if s.is_empty() || s.len() > 253 { + return false; + } + let name = s.rsplit_once('/').map(|(_, n)| n).unwrap_or(s); + !name.is_empty() + && name.len() <= 63 + && name + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '.' || c == '_') +} + +fn is_label_value(s: &str) -> bool { + if s.len() > 63 { + return false; + } + s.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '.' || c == '_') +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn label_cleaner_accepts_common_cases() { + assert!(is_label_key("group")); + assert!(is_label_key("arch")); + assert!(is_label_key("fleet.nationtech.io/region")); + assert!(is_label_value("aarch64")); + assert!(is_label_value("site-01")); + } + + #[test] + fn label_cleaner_rejects_bad_cases() { + assert!(!is_label_key("")); + assert!(!is_label_key("has space")); + assert!(!is_label_value("has space")); + assert!(!is_label_value(&"x".repeat(64))); + } +} diff --git a/fleet/harmony-fleet-operator/src/fleet_aggregator.rs b/fleet/harmony-fleet-operator/src/fleet_aggregator.rs new file mode 100644 index 00000000..e333865a --- /dev/null +++ b/fleet/harmony-fleet-operator/src/fleet_aggregator.rs @@ -0,0 +1,831 @@ +//! Operator-side aggregator + desired-state writer. +//! +//! Maintains three in-memory caches driven by watches: +//! - Deployment CRs (kube watch) → what we want to run +//! - Device CRs (kube watch) → where we could run it +//! - DeploymentState KV (NATS watch) → what's actually running +//! +//! Outputs: +//! - Writes `desired-state..` KV entries when a +//! Deployment's selector matches a Device. Deletes them when the +//! match goes away. +//! - Patches `Deployment.status.aggregate` at 1 Hz for every CR +//! whose matched-device set or phase counts changed. +//! +//! No separate event stream, no per-key revision tracking: KV watches +//! are ordered and last-writer-wins, and the dirty set naturally +//! coalesces high-frequency state churn into one patch per tick. + +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::sync::Arc; +use std::time::Duration; + +use async_nats::jetstream::kv::{Operation, Store}; +use futures_util::{StreamExt, TryStreamExt}; +use harmony_reconciler_contracts::{ + BUCKET_DESIRED_STATE, BUCKET_DEVICE_STATE, DeploymentName, DeploymentState, Phase, + desired_state_key, +}; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; +use kube::api::{Api, Patch, PatchParams}; +use kube::runtime::watcher::{self, Config as WatcherConfig, Event}; +use kube::{Client, ResourceExt}; +use serde_json::json; +use tokio::sync::Mutex; + +use crate::crd::{AggregateLastError, Deployment, DeploymentAggregate, Device}; + +const PATCH_TICK: Duration = Duration::from_secs(1); + +// --------------------------------------------------------------------------- +// State +// --------------------------------------------------------------------------- + +/// (namespace, name) identifying a Deployment CR. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct DeploymentKey { + pub namespace: String, + pub name: String, +} + +impl DeploymentKey { + pub fn from_cr(cr: &Deployment) -> Option { + Some(Self { + namespace: cr.namespace()?, + name: cr.name_any(), + }) + } +} + +/// One `(device, deployment)` pair. +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct DevicePair { + pub device_id: String, + pub deployment: DeploymentName, +} + +/// Thin projection of a Deployment CR — everything we need for +/// selector evaluation + desired-state writes + status aggregation, +/// without borrowing the full kube object. +#[derive(Debug, Clone)] +pub struct CachedDeployment { + key: DeploymentKey, + deployment_name: DeploymentName, + selector: LabelSelector, + /// JSON-serialized score payload ready to `put` into + /// desired-state. Cached because the same bytes are written to + /// every matched device's KV entry. + score_json: Vec, +} + +#[derive(Debug, Default)] +pub struct FleetState { + /// Cached Deployment CRs, keyed by (namespace, name). + deployments: HashMap, + /// Cached Device labels, keyed by `metadata.name`. + devices: HashMap>, + /// Latest DeploymentState per (device, deployment) pair. + states: HashMap, + /// Which devices have we pushed desired-state for, per deployment? + /// Diff against recomputed targets on any change. Keyed by + /// `DeploymentName` (not `DeploymentKey`) because the + /// `desired-state` KV key space doesn't carry namespace — + /// deployment names are globally unique at the NATS level. This + /// lets cold-start seeding from the KV populate the map + /// correctly without having to guess namespaces. + owned_targets: HashMap>, + /// Per-deployment latest-failure surface for the CR status. + last_error: HashMap, + /// CR keys whose status needs re-patching on the next tick. + dirty: HashSet, +} + +pub type SharedFleetState = Arc>; + +// --------------------------------------------------------------------------- +// Selector evaluation +// --------------------------------------------------------------------------- + +/// Does `selector` match this label set? matchLabels only for MVP — +/// matchExpressions logs a warning once and is treated as "no match" +/// until we need it. +pub fn selector_matches(selector: &LabelSelector, labels: &BTreeMap) -> bool { + if let Some(match_labels) = &selector.match_labels { + for (k, v) in match_labels { + if labels.get(k) != Some(v) { + return false; + } + } + } + if selector + .match_expressions + .as_ref() + .is_some_and(|v| !v.is_empty()) + { + tracing::warn!( + "LabelSelector.matchExpressions is not yet supported; treating CR as empty-selector (matches nothing)" + ); + return false; + } + true +} + +/// Set of Device names currently matching `selector`. +fn matched_devices( + selector: &LabelSelector, + devices: &HashMap>, +) -> HashSet { + devices + .iter() + .filter(|(_, labels)| selector_matches(selector, labels)) + .map(|(name, _)| name.clone()) + .collect() +} + +// --------------------------------------------------------------------------- +// Top-level run +// --------------------------------------------------------------------------- + +pub async fn run(client: Client, js: async_nats::jetstream::Context) -> anyhow::Result<()> { + let state_bucket = js + .create_key_value(async_nats::jetstream::kv::Config { + bucket: BUCKET_DEVICE_STATE.to_string(), + ..Default::default() + }) + .await?; + let desired_bucket = js + .create_key_value(async_nats::jetstream::kv::Config { + bucket: BUCKET_DESIRED_STATE.to_string(), + ..Default::default() + }) + .await?; + + // Cold-start: initialize owned_targets from the current contents + // of the desired-state bucket so we don't orphan entries written + // by a previous operator run. + let state: SharedFleetState = Arc::new(Mutex::new(FleetState::default())); + seed_owned_targets(&desired_bucket, &state).await?; + + let deployments_api: Api = Api::all(client.clone()); + let devices_api: Api = Api::all(client.clone()); + let patch_api: Api = Api::all(client); + + tracing::info!( + owned = state + .lock() + .await + .owned_targets + .values() + .map(|s| s.len()) + .sum::(), + "aggregator: startup complete" + ); + + let state_watcher_handle = { + let state = state.clone(); + let bucket = state_bucket.clone(); + tokio::spawn(async move { + if let Err(e) = run_state_kv_watcher(bucket, state).await { + tracing::warn!(error = %e, "aggregator: state watcher exited"); + } + }) + }; + + let deployment_watcher_handle = { + let state = state.clone(); + let desired = desired_bucket.clone(); + tokio::spawn(async move { + if let Err(e) = run_deployment_watcher(deployments_api.clone(), state, desired).await { + tracing::warn!(error = %e, "aggregator: deployment watcher exited"); + } + }) + }; + + let device_watcher_handle = { + let state = state.clone(); + let desired = desired_bucket.clone(); + tokio::spawn(async move { + if let Err(e) = run_device_watcher(devices_api, state, desired).await { + tracing::warn!(error = %e, "aggregator: device watcher exited"); + } + }) + }; + + let patch_state = state.clone(); + let patch_loop = async move { + let mut ticker = tokio::time::interval(PATCH_TICK); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + loop { + ticker.tick().await; + if let Err(e) = patch_tick(&patch_api, &patch_state).await { + tracing::warn!(error = %e, "aggregator: patch tick failed"); + } + } + }; + + tokio::select! { + _ = patch_loop => Ok(()), + _ = state_watcher_handle => Ok(()), + _ = deployment_watcher_handle => Ok(()), + _ = device_watcher_handle => Ok(()), + } +} + +// --------------------------------------------------------------------------- +// Device-state KV watcher (unchanged path) +// --------------------------------------------------------------------------- + +fn parse_state_key(key: &str) -> Option { + let rest = key.strip_prefix("state.")?; + let (device, deployment) = rest.split_once('.')?; + Some(DevicePair { + device_id: device.to_string(), + deployment: DeploymentName::try_new(deployment).ok()?, + }) +} + +async fn run_state_kv_watcher(bucket: Store, state: SharedFleetState) -> anyhow::Result<()> { + let mut watch = bucket.watch_with_history(">").await?; + while let Some(entry_res) = watch.next().await { + let entry = match entry_res { + Ok(e) => e, + Err(e) => { + tracing::warn!(error = %e, "aggregator: state watch delivery error"); + continue; + } + }; + let Some(pair) = parse_state_key(&entry.key) else { + continue; + }; + match entry.operation { + Operation::Put => { + let ds: DeploymentState = match serde_json::from_slice(&entry.value) { + Ok(d) => d, + Err(e) => { + tracing::warn!(key = %entry.key, error = %e, "aggregator: bad device_state payload"); + continue; + } + }; + let mut guard = state.lock().await; + apply_state(&mut guard, pair, ds); + } + Operation::Delete | Operation::Purge => { + let mut guard = state.lock().await; + drop_state(&mut guard, &pair); + } + } + } + Ok(()) +} + +/// Record a device's latest state, dedup against older timestamps, +/// maintain last_error, mark the deployment dirty. +pub fn apply_state(state: &mut FleetState, pair: DevicePair, ds: DeploymentState) { + if let Some(prev) = state.states.get(&pair) { + if prev.last_event_at > ds.last_event_at { + return; + } + } + let phase = ds.phase; + let device_id = ds.device_id.to_string(); + let last_error_msg = ds.last_error.clone(); + let at = ds.last_event_at.to_rfc3339(); + state.states.insert(pair.clone(), ds); + + for key in matching_deployment_keys(state, &pair.deployment) { + match phase { + Phase::Failed => { + if let Some(msg) = last_error_msg.as_deref() { + state.last_error.insert( + key.clone(), + AggregateLastError { + device_id: device_id.clone(), + message: msg.to_string(), + at: at.clone(), + }, + ); + } + } + Phase::Running => { + if let Some(existing) = state.last_error.get(&key) { + if existing.device_id == device_id { + state.last_error.remove(&key); + } + } + } + Phase::Pending => {} + } + state.dirty.insert(key); + } +} + +pub fn drop_state(state: &mut FleetState, pair: &DevicePair) { + let Some(removed) = state.states.remove(pair) else { + return; + }; + let device_id = removed.device_id.to_string(); + for key in matching_deployment_keys(state, &pair.deployment) { + if let Some(existing) = state.last_error.get(&key) { + if existing.device_id == device_id { + state.last_error.remove(&key); + } + } + state.dirty.insert(key); + } +} + +/// CR keys that carry a given deployment name. Deployment names are +/// globally unique at the KV level, so typically 0 or 1 entry here; +/// Vec lets us surface a warning rather than panic if a misconfigured +/// cluster has duplicates across namespaces. +fn matching_deployment_keys(state: &FleetState, deployment: &DeploymentName) -> Vec { + state + .deployments + .values() + .filter(|d| &d.deployment_name == deployment) + .map(|d| d.key.clone()) + .collect() +} + +// --------------------------------------------------------------------------- +// Deployment CR watcher +// --------------------------------------------------------------------------- + +async fn run_deployment_watcher( + api: Api, + state: SharedFleetState, + desired: Store, +) -> anyhow::Result<()> { + let mut stream = watcher::watcher(api, WatcherConfig::default()).boxed(); + while let Some(event) = stream.try_next().await? { + match event { + Event::Apply(cr) | Event::InitApply(cr) => { + on_deployment_upsert(&state, &desired, cr).await; + } + Event::Delete(cr) => { + on_deployment_delete(&state, &desired, cr).await; + } + Event::Init | Event::InitDone => {} + } + } + Ok(()) +} + +async fn on_deployment_upsert(state: &SharedFleetState, desired: &Store, cr: Deployment) { + let Some(key) = DeploymentKey::from_cr(&cr) else { + return; + }; + let Ok(deployment_name) = DeploymentName::try_new(&key.name) else { + tracing::warn!(name = %key.name, "aggregator: CR name is not a valid DeploymentName, skipping"); + return; + }; + let selector = cr.spec.target_selector.clone(); + let score_json = match serde_json::to_vec(&cr.spec.score) { + Ok(v) => v, + Err(e) => { + tracing::warn!(namespace = %key.namespace, name = %key.name, error = %e, "aggregator: score payload not serializable"); + return; + } + }; + + let (new_targets, previous_targets) = { + let mut guard = state.lock().await; + let new_targets = matched_devices(&selector, &guard.devices); + guard.deployments.insert( + key.clone(), + CachedDeployment { + key: key.clone(), + deployment_name: deployment_name.clone(), + selector: selector.clone(), + score_json: score_json.clone(), + }, + ); + let previous = guard + .owned_targets + .remove(&deployment_name) + .unwrap_or_default(); + guard + .owned_targets + .insert(deployment_name.clone(), new_targets.clone()); + guard.dirty.insert(key.clone()); + (new_targets, previous) + }; + + reconcile_kv( + desired, + &deployment_name, + &new_targets, + &previous_targets, + &score_json, + ) + .await; +} + +async fn on_deployment_delete(state: &SharedFleetState, desired: &Store, cr: Deployment) { + let Some(key) = DeploymentKey::from_cr(&cr) else { + return; + }; + let Ok(deployment_name) = DeploymentName::try_new(&key.name) else { + return; + }; + + let previous = { + let mut guard = state.lock().await; + guard.deployments.remove(&key); + guard.last_error.remove(&key); + guard.dirty.remove(&key); + guard + .owned_targets + .remove(&deployment_name) + .unwrap_or_default() + }; + + // Every previously-owned target becomes a KV delete. Controller + // finalizer does a belt-and-suspenders scan, but we pull our own + // entries here too so agents react immediately. + for device in &previous { + let k = desired_state_key(device, &deployment_name); + if let Err(e) = desired.delete(&k).await { + tracing::debug!(key = %k, error = %e, "aggregator: desired-state delete on CR delete failed"); + } + } +} + +// --------------------------------------------------------------------------- +// Device CR watcher +// --------------------------------------------------------------------------- + +async fn run_device_watcher( + api: Api, + state: SharedFleetState, + desired: Store, +) -> anyhow::Result<()> { + let mut stream = watcher::watcher(api, WatcherConfig::default()).boxed(); + while let Some(event) = stream.try_next().await? { + match event { + Event::Apply(dev) | Event::InitApply(dev) => { + on_device_upsert(&state, &desired, dev).await; + } + Event::Delete(dev) => { + on_device_delete(&state, &desired, dev).await; + } + Event::Init | Event::InitDone => {} + } + } + Ok(()) +} + +async fn on_device_upsert(state: &SharedFleetState, desired: &Store, dev: Device) { + let name = dev.name_any(); + let labels: BTreeMap = dev.metadata.labels.clone().unwrap_or_default(); + + // For every deployment, compute whether this single device now + // matches vs. previously matched; diff against owned_targets; + // collect the KV writes/deletes to perform after the lock is + // released. + let per_deployment: Vec<(CachedDeployment, bool, bool)> = { + let mut guard = state.lock().await; + let snapshot: Vec = guard.deployments.values().cloned().collect(); + let previously_matched: HashMap = snapshot + .iter() + .map(|d| { + let was = guard + .owned_targets + .get(&d.deployment_name) + .is_some_and(|set| set.contains(&name)); + (d.deployment_name.clone(), was) + }) + .collect(); + guard.devices.insert(name.clone(), labels.clone()); + + let mut out = Vec::with_capacity(snapshot.len()); + for d in snapshot { + let was = previously_matched + .get(&d.deployment_name) + .copied() + .unwrap_or(false); + let now = selector_matches(&d.selector, &labels); + if was != now { + let targets = guard + .owned_targets + .entry(d.deployment_name.clone()) + .or_default(); + if now { + targets.insert(name.clone()); + } else { + targets.remove(&name); + } + guard.dirty.insert(d.key.clone()); + } + out.push((d, was, now)); + } + out + }; + + for (cached, was, now) in per_deployment { + match (was, now) { + (false, true) => { + let k = desired_state_key(&name, &cached.deployment_name); + if let Err(e) = desired.put(&k, cached.score_json.clone().into()).await { + tracing::debug!(key = %k, error = %e, "aggregator: desired-state put failed"); + } + } + (true, false) => { + let k = desired_state_key(&name, &cached.deployment_name); + if let Err(e) = desired.delete(&k).await { + tracing::debug!(key = %k, error = %e, "aggregator: desired-state delete failed"); + } + } + _ => {} + } + } +} + +async fn on_device_delete(state: &SharedFleetState, desired: &Store, dev: Device) { + let name = dev.name_any(); + let removed_from: Vec = { + let mut guard = state.lock().await; + guard.devices.remove(&name); + let mut out = Vec::new(); + let deployments_snapshot: Vec = + guard.deployments.values().cloned().collect(); + for cached in deployments_snapshot { + if let Some(set) = guard.owned_targets.get_mut(&cached.deployment_name) { + if set.remove(&name) { + out.push(cached.deployment_name.clone()); + guard.dirty.insert(cached.key.clone()); + } + } + } + out + }; + for deployment_name in removed_from { + let k = desired_state_key(&name, &deployment_name); + if let Err(e) = desired.delete(&k).await { + tracing::debug!(key = %k, error = %e, "aggregator: desired-state delete on device delete failed"); + } + } +} + +// --------------------------------------------------------------------------- +// Diff helper: write/delete desired-state entries for one deployment +// --------------------------------------------------------------------------- + +async fn reconcile_kv( + desired: &Store, + deployment_name: &DeploymentName, + new_targets: &HashSet, + previous_targets: &HashSet, + score_json: &[u8], +) { + // Writes: new_targets, unconditionally — idempotent put; agents + // byte-compare and no-op on unchanged content. + for device in new_targets { + let k = desired_state_key(device, deployment_name); + if let Err(e) = desired.put(&k, score_json.to_vec().into()).await { + tracing::debug!(key = %k, error = %e, "aggregator: desired-state put failed"); + } + } + // Deletes: anything we owned previously but no longer target. + for device in previous_targets.difference(new_targets) { + let k = desired_state_key(device, deployment_name); + if let Err(e) = desired.delete(&k).await { + tracing::debug!(key = %k, error = %e, "aggregator: desired-state delete failed"); + } + } +} + +/// Initialize `owned_targets` from the current contents of the +/// `desired-state` KV. After a restart, we need to know what was +/// previously written so we can diff correctly on the first +/// watch-driven reconcile (otherwise we'd leak orphans when a +/// selector change causes a deployment to stop targeting a device). +async fn seed_owned_targets(bucket: &Store, state: &SharedFleetState) -> anyhow::Result<()> { + let mut guard = state.lock().await; + let mut keys = bucket.keys().await?; + while let Some(key_res) = keys.next().await { + let key = key_res?; + // Keys are `.`. The KV key space carries + // no namespace — names are globally unique at this layer — + // which is exactly why `owned_targets` keys by DeploymentName. + let Some((device, deployment)) = key.split_once('.') else { + continue; + }; + let Ok(deployment_name) = DeploymentName::try_new(deployment) else { + continue; + }; + guard + .owned_targets + .entry(deployment_name) + .or_default() + .insert(device.to_string()); + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// Patch tick +// --------------------------------------------------------------------------- + +async fn patch_tick(api: &Api, state: &SharedFleetState) -> anyhow::Result<()> { + let dirty: Vec<(DeploymentKey, DeploymentAggregate)> = { + let mut guard = state.lock().await; + let keys: Vec = guard.dirty.drain().collect(); + keys.iter() + .filter_map(|k| { + let cached = guard.deployments.get(k)?.clone(); + let agg = compute_aggregate(&guard, &cached); + Some((k.clone(), agg)) + }) + .collect() + }; + + for (key, aggregate) in dirty { + let ns_api: Api = Api::namespaced(api.clone().into_client(), &key.namespace); + let status = json!({ "status": { "aggregate": aggregate } }); + if let Err(e) = ns_api + .patch_status(&key.name, &PatchParams::default(), &Patch::Merge(&status)) + .await + { + tracing::warn!( + namespace = %key.namespace, + name = %key.name, + error = %e, + "aggregator: status patch failed" + ); + } else { + tracing::debug!( + namespace = %key.namespace, + name = %key.name, + matched = aggregate.matched_device_count, + succeeded = aggregate.succeeded, + failed = aggregate.failed, + pending = aggregate.pending, + "aggregator: status patched" + ); + } + } + Ok(()) +} + +/// Compute the aggregate for one Deployment from current caches. +/// `owned_targets` is the authoritative "currently selector-matched" +/// set for the deployment, as maintained by the watchers. +pub fn compute_aggregate(state: &FleetState, cached: &CachedDeployment) -> DeploymentAggregate { + let empty = HashSet::new(); + let targets = state + .owned_targets + .get(&cached.deployment_name) + .unwrap_or(&empty); + + let mut agg = DeploymentAggregate { + matched_device_count: targets.len() as u32, + ..Default::default() + }; + + for device_id in targets { + let pair = DevicePair { + device_id: device_id.clone(), + deployment: cached.deployment_name.clone(), + }; + match state.states.get(&pair).map(|s| s.phase) { + Some(Phase::Running) => agg.succeeded += 1, + Some(Phase::Failed) => agg.failed += 1, + Some(Phase::Pending) | None => agg.pending += 1, + } + } + + agg.last_error = state.last_error.get(&cached.key).cloned(); + agg +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::{TimeZone, Utc}; + use harmony_reconciler_contracts::Id; + + fn dn(s: &str) -> DeploymentName { + DeploymentName::try_new(s).expect("valid test name") + } + + fn state(device: &str, deployment: &str, phase: Phase, seconds: i64) -> DeploymentState { + DeploymentState { + device_id: Id::from(device.to_string()), + deployment: dn(deployment), + phase, + last_event_at: Utc.timestamp_opt(1_700_000_000 + seconds, 0).unwrap(), + last_error: None, + } + } + + fn cached(namespace: &str, name: &str, match_key: &str, match_val: &str) -> CachedDeployment { + let mut ml = BTreeMap::new(); + ml.insert(match_key.to_string(), match_val.to_string()); + CachedDeployment { + key: DeploymentKey { + namespace: namespace.to_string(), + name: name.to_string(), + }, + deployment_name: dn(name), + selector: LabelSelector { + match_labels: Some(ml), + match_expressions: None, + }, + score_json: b"{}".to_vec(), + } + } + + fn pair(device: &str, deployment: &str) -> DevicePair { + DevicePair { + device_id: device.to_string(), + deployment: dn(deployment), + } + } + + #[test] + fn selector_match_labels_only() { + let mut ml = BTreeMap::new(); + ml.insert("group".to_string(), "edge-a".to_string()); + let sel = LabelSelector { + match_labels: Some(ml), + match_expressions: None, + }; + + let mut matching = BTreeMap::new(); + matching.insert("group".to_string(), "edge-a".to_string()); + matching.insert("arch".to_string(), "aarch64".to_string()); + assert!(selector_matches(&sel, &matching)); + + let mut non_matching = BTreeMap::new(); + non_matching.insert("group".to_string(), "edge-b".to_string()); + assert!(!selector_matches(&sel, &non_matching)); + + let empty = BTreeMap::new(); + assert!(!selector_matches(&sel, &empty)); + } + + #[test] + fn empty_selector_matches_everything() { + let sel = LabelSelector::default(); + let mut labels = BTreeMap::new(); + labels.insert("anything".to_string(), "goes".to_string()); + assert!(selector_matches(&sel, &labels)); + assert!(selector_matches(&sel, &BTreeMap::new())); + } + + #[test] + fn compute_aggregate_counts_matched_devices() { + let cached = cached("fleet-demo", "hello", "group", "edge-a"); + let key = cached.key.clone(); + + let mut s = FleetState::default(); + s.deployments.insert(key, cached.clone()); + // Three devices already in owned_targets (selector resolution + // is separate from the aggregate; aggregate reads owned_targets). + s.owned_targets.insert( + cached.deployment_name.clone(), + ["pi-01", "pi-02", "pi-03"] + .iter() + .map(|s| s.to_string()) + .collect(), + ); + s.states.insert( + pair("pi-01", "hello"), + state("pi-01", "hello", Phase::Running, 0), + ); + s.states.insert( + pair("pi-02", "hello"), + state("pi-02", "hello", Phase::Failed, 0), + ); + // pi-03 has no state entry → pending + + let agg = compute_aggregate(&s, &cached); + assert_eq!(agg.matched_device_count, 3); + assert_eq!(agg.succeeded, 1); + assert_eq!(agg.failed, 1); + assert_eq!(agg.pending, 1); + } + + #[test] + fn matched_devices_picks_by_label() { + let mut ml = BTreeMap::new(); + ml.insert("group".to_string(), "edge-a".to_string()); + let sel = LabelSelector { + match_labels: Some(ml), + match_expressions: None, + }; + + let mut devices: HashMap> = HashMap::new(); + let mut a = BTreeMap::new(); + a.insert("group".to_string(), "edge-a".to_string()); + devices.insert("pi-01".to_string(), a); + let mut b = BTreeMap::new(); + b.insert("group".to_string(), "edge-b".to_string()); + devices.insert("pi-02".to_string(), b); + + let matched = matched_devices(&sel, &devices); + assert_eq!(matched.len(), 1); + assert!(matched.contains("pi-01")); + } +} diff --git a/fleet/harmony-fleet-operator/src/install.rs b/fleet/harmony-fleet-operator/src/install.rs new file mode 100644 index 00000000..57b44e1e --- /dev/null +++ b/fleet/harmony-fleet-operator/src/install.rs @@ -0,0 +1,46 @@ +//! Install the operator's CRD into a target Kubernetes cluster +//! via a harmony Score — no yaml generation, no kubectl shell-out. +//! +//! The Score is just [`K8sResourceScore`] over `Deployment::crd()`; +//! the topology is the shared `K8sBareTopology`, which exposes a +//! `K8sclient` backed by the caller's `KUBECONFIG` without dragging +//! in `K8sAnywhereTopology`'s product-level `ensure_ready`. + +use anyhow::{Context, Result}; +use harmony::inventory::Inventory; +use harmony::modules::k8s::K8sBareTopology; +use harmony::modules::k8s::resource::K8sResourceScore; +use harmony::score::Score; +use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition; +use kube::CustomResourceExt; + +use crate::crd::{Deployment, Device}; + +/// Apply the operator's CRDs to whatever cluster `KUBECONFIG` points +/// at. Returns once the apply call completes — does **not** wait for +/// the apiserver to mark the CRD `Established`; the caller does that +/// (e.g. with `kubectl wait --for=condition=Established`) if it +/// cares. +pub async fn install_crds() -> Result<()> { + let topology = K8sBareTopology::from_kubeconfig("harmony-fleet-operator-install") + .await + .map_err(|e| anyhow::anyhow!(e)) + .context("building K8sBareTopology from KUBECONFIG")?; + let inventory = Inventory::empty(); + + let crds: Vec = vec![Deployment::crd(), Device::crd()]; + let score = K8sResourceScore:: { + resource: crds, + namespace: None, + }; + + let interpret = Score::::create_interpret(&score); + let outcome = interpret + .execute(&inventory, &topology) + .await + .map_err(|e| anyhow::anyhow!("install CRD: {e}")) + .context("executing K8sResourceScore for Deployment CRD")?; + + tracing::info!(?outcome, "CRD installed"); + Ok(()) +} diff --git a/fleet/harmony-fleet-operator/src/lib.rs b/fleet/harmony-fleet-operator/src/lib.rs new file mode 100644 index 00000000..c97049c8 --- /dev/null +++ b/fleet/harmony-fleet-operator/src/lib.rs @@ -0,0 +1,11 @@ +//! Library surface of the IoT operator crate. +//! +//! Most of the crate is a binary (reconcile loop, install subcommand). +//! The CRD type definitions are exposed here as a library so external +//! consumers — tooling that applies CRs, tests, documentation generators +//! — can import the typed `Deployment`, `DeploymentSpec`, +//! `ScorePayload`, etc. without duplicating them. + +pub mod crd; +pub mod device_reconciler; +pub mod fleet_aggregator; diff --git a/fleet/harmony-fleet-operator/src/main.rs b/fleet/harmony-fleet-operator/src/main.rs new file mode 100644 index 00000000..0e0bd347 --- /dev/null +++ b/fleet/harmony-fleet-operator/src/main.rs @@ -0,0 +1,146 @@ +mod chart; +mod controller; +mod install; + +use harmony_fleet_operator::{crd, device_reconciler, fleet_aggregator}; + +use anyhow::Result; +use async_nats::jetstream; +use clap::{Parser, Subcommand}; +use harmony_reconciler_contracts::BUCKET_DESIRED_STATE; +use kube::Client; +use std::path::PathBuf; + +#[derive(Parser)] +#[command( + name = "harmony-fleet-operator", + about = "IoT operator — Deployment CRD → NATS KV" +)] +struct Cli { + #[command(subcommand)] + command: Option, + + #[arg( + long, + env = "NATS_URL", + default_value = "nats://localhost:4222", + global = true + )] + nats_url: String, + + #[arg( + long, + env = "KV_BUCKET", + default_value = BUCKET_DESIRED_STATE, + global = true + )] + kv_bucket: String, +} + +#[derive(Subcommand)] +enum Command { + /// Run the controller (default when no subcommand is given). + Run, + /// Apply the operator's CRDs to the cluster `KUBECONFIG` points + /// at. Uses harmony's typed k8s client — no yaml, no kubectl. + Install, + /// Generate a helm chart directory that installs the operator + /// in-cluster (Deployment + RBAC + CRDs). Prints the written + /// chart path on success; `helm install ` takes it from + /// there. No registry publish — the chart lives on disk. + Chart { + #[arg(long, default_value = "/tmp/fleet-load-test/chart")] + output: PathBuf, + #[arg(long, default_value = "localhost/harmony-fleet-operator:latest")] + image: String, + #[arg(long, default_value = "IfNotPresent")] + image_pull_policy: String, + #[arg(long, default_value = "fleet-system")] + namespace: String, + #[arg(long, default_value = "nats://fleet-nats.fleet-system:4222")] + nats_url: String, + #[arg(long, default_value = "info,kube_runtime=warn")] + log_level: String, + }, +} + +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .init(); + + let cli = Cli::parse(); + match cli.command.unwrap_or(Command::Run) { + Command::Install => install::install_crds().await, + Command::Run => run(&cli.nats_url, &cli.kv_bucket).await, + Command::Chart { + output, + image, + image_pull_policy, + namespace, + nats_url, + log_level, + } => { + let written = chart::build_chart(&chart::ChartOptions { + output_dir: output, + image, + image_pull_policy, + namespace, + nats_url, + log_level, + })?; + println!("{}", written.display()); + Ok(()) + } + } +} + +async fn run(nats_url: &str, bucket: &str) -> Result<()> { + // Retry on the initial connect — startup races against the NATS + // server becoming fully ready. + let nats = connect_with_retry(nats_url).await?; + tracing::info!(url = %nats_url, "connected to NATS"); + let js = jetstream::new(nats); + let desired_state_kv = js + .create_key_value(jetstream::kv::Config { + bucket: bucket.to_string(), + ..Default::default() + }) + .await?; + tracing::info!(bucket = %bucket, "KV bucket ready"); + + let client = Client::try_default().await?; + + // Three concurrent tasks: + // controller — CR validation + finalizer-cleanup + // device_reconciler — NATS device-info → Device CR + // fleet_aggregator — watches Deployments + Devices + states, + // writes desired-state KV, patches CR status + // Any failing tears the process down; kube-rs Controller swallows + // its own transient reconcile errors. + let ctl_client = client.clone(); + let dr_client = client.clone(); + let dr_js = js.clone(); + tokio::select! { + r = controller::run(ctl_client, desired_state_kv) => r, + r = device_reconciler::run(dr_client, dr_js) => r, + r = fleet_aggregator::run(client, js) => r, + } +} + +async fn connect_with_retry(nats_url: &str) -> Result { + use std::time::Duration; + let mut last_err: Option = None; + for attempt in 0..15 { + match async_nats::connect(nats_url).await { + Ok(c) => return Ok(c), + Err(e) => { + tracing::warn!(attempt, error = %e, "NATS connect failed; retrying"); + last_err = Some(e.into()); + tokio::time::sleep(Duration::from_secs(2)).await; + } + } + } + Err(last_err.unwrap_or_else(|| anyhow::anyhow!("NATS connect failed after retries"))) +} diff --git a/fleet/scripts/load-test.sh b/fleet/scripts/load-test.sh new file mode 100755 index 00000000..b5ceb9f9 --- /dev/null +++ b/fleet/scripts/load-test.sh @@ -0,0 +1,301 @@ +#!/usr/bin/env bash +# Load-test harness for the Harmony fleet operator's fleet_aggregator. +# +# Brings up the minimum stack (k3d + in-cluster NATS + CRD + operator) +# with no VM or real agent, then runs the `fleet_load_test` binary +# which simulates N devices pushing DeploymentState to NATS. +# +# All stable paths under $WORK_DIR (default /tmp/fleet-load-test) so you +# can point kubectl / tail at them while the test is running. +# +# Quick usage: +# fleet/scripts/load-test.sh # 100-device default (55 + 9×5) +# HOLD=1 fleet/scripts/load-test.sh # leave stack running for exploration +# DEVICES=10000 GROUP_SIZES=5500,500,500,500,500,500,500,500,500,500 \ +# DURATION=90 fleet/scripts/load-test.sh +# +# While it's running, in another terminal: +# export KUBECONFIG=/tmp/fleet-load-test/kubeconfig +# kubectl get deployments.fleet.nationtech.io -A -w +# kubectl get deployments.fleet.nationtech.io -A \ +# -o custom-columns=NAME:.metadata.name,RUN:.status.aggregate.succeeded,FAIL:.status.aggregate.failed,PEND:.status.aggregate.pending +# tail -f /tmp/fleet-load-test/operator.log +# +# Set DEBUG=1 to bump RUST_LOG so the operator logs every status patch. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +OPERATOR_DIR="$REPO_ROOT/fleet/harmony-fleet-operator" + +# ---- config ----------------------------------------------------------------- + +K3D_BIN="${K3D_BIN:-$HOME/.local/share/harmony/k3d/k3d}" +CLUSTER_NAME="${CLUSTER_NAME:-fleet-load}" +NATS_NAMESPACE="${NATS_NAMESPACE:-fleet-system}" +NATS_NAME="${NATS_NAME:-fleet-nats}" +NATS_NODE_PORT="${NATS_NODE_PORT:-4222}" +NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}" + +DEVICES="${DEVICES:-100}" +GROUP_SIZES="${GROUP_SIZES:-55,5,5,5,5,5,5,5,5,5}" +TICK_MS="${TICK_MS:-1000}" +DURATION="${DURATION:-60}" +NAMESPACE="${NAMESPACE:-fleet-load}" + +# Keep the stack alive after the test completes so the user can poke +# at CRs + NATS interactively. Ctrl-C to tear everything down. +HOLD="${HOLD:-0}" + +# Stable working dir so kubectl + tail targets are predictable. +WORK_DIR="${WORK_DIR:-/tmp/fleet-load-test}" +mkdir -p "$WORK_DIR" + +KUBECONFIG_FILE="$WORK_DIR/kubeconfig" +OPERATOR_LOG="$WORK_DIR/operator.log" +CHART_DIR="$WORK_DIR/chart" +OPERATOR_IMAGE="${OPERATOR_IMAGE:-localhost/harmony-fleet-operator:latest}" +OPERATOR_NAMESPACE="${OPERATOR_NAMESPACE:-fleet-system}" +OPERATOR_RELEASE="${OPERATOR_RELEASE:-harmony-fleet-operator}" +OPERATOR_PID="" # unused in the helm path; kept so older trap-cleanup logic doesn't choke. + +log() { printf '\033[1;34m[load-test]\033[0m %s\n' "$*"; } +fail() { printf '\033[1;31m[load-test FAIL]\033[0m %s\n' "$*" >&2; exit 1; } + +dump_operator_log() { + [[ -n "$KUBECONFIG" && -f "$KUBECONFIG" ]] || return 0 + kubectl -n "$OPERATOR_NAMESPACE" logs "deployment/$OPERATOR_RELEASE" \ + --tail=1000 >"$OPERATOR_LOG" 2>/dev/null || true +} + +cleanup() { + local rc=$? + log "cleanup…" + # Capture the operator's in-cluster log before we kill the + # cluster, so the tail-on-failure hook has something to show. + dump_operator_log + "$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true + if [[ $rc -ne 0 && -s "$OPERATOR_LOG" ]]; then + log "operator log at $OPERATOR_LOG (kept for inspection)" + echo "----- operator log tail -----" + tail -n 60 "$OPERATOR_LOG" 2>/dev/null || true + elif [[ -s "$OPERATOR_LOG" ]]; then + log "operator log at $OPERATOR_LOG" + fi + exit $rc +} +trap cleanup EXIT INT TERM + +require() { command -v "$1" >/dev/null 2>&1 || fail "missing required tool: $1"; } +require cargo +require kubectl +require podman +require docker +require helm +[[ -x "$K3D_BIN" ]] || fail "k3d binary not executable at $K3D_BIN" + +# ---- phase 1: k3d cluster --------------------------------------------------- + +log "phase 1: create k3d cluster '$CLUSTER_NAME' (host port $NATS_NODE_PORT → loadbalancer)" +"$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true +"$K3D_BIN" cluster create "$CLUSTER_NAME" \ + --wait --timeout 90s \ + -p "${NATS_NODE_PORT}:${NATS_NODE_PORT}@loadbalancer" \ + >/dev/null +"$K3D_BIN" kubeconfig get "$CLUSTER_NAME" > "$KUBECONFIG_FILE" +export KUBECONFIG="$KUBECONFIG_FILE" + +# ---- phase 2: NATS in-cluster ------------------------------------------------ + +log "phase 2a: sideload NATS image ($NATS_IMAGE)" +if ! docker image inspect "$NATS_IMAGE" >/dev/null 2>&1; then + if ! podman image inspect "$NATS_IMAGE" >/dev/null 2>&1; then + podman pull "$NATS_IMAGE" >/dev/null || fail "podman pull $NATS_IMAGE failed" + fi + tmptar="$(mktemp -t nats-image.XXXXXX.tar)" + podman save "$NATS_IMAGE" -o "$tmptar" >/dev/null + docker load -i "$tmptar" >/dev/null + rm -f "$tmptar" +fi +"$K3D_BIN" image import "$NATS_IMAGE" -c "$CLUSTER_NAME" >/dev/null + +log "phase 2b: install NATS via NatsBasicScore" +( + cd "$REPO_ROOT" + cargo run -q --release -p example_fleet_nats_install -- \ + --namespace "$NATS_NAMESPACE" \ + --name "$NATS_NAME" \ + --expose load-balancer +) +# The upstream nats/nats helm chart provisions a StatefulSet, not a +# Deployment. Waiting on the pod-label condition works across both +# shapes without hardcoding a workload kind. +kubectl -n "$NATS_NAMESPACE" wait --for=condition=Ready \ + "pod" -l "app.kubernetes.io/name=nats" --timeout=180s >/dev/null + +log "probing nats://localhost:$NATS_NODE_PORT end-to-end" +for _ in $(seq 1 60); do + (echo >"/dev/tcp/127.0.0.1/$NATS_NODE_PORT") 2>/dev/null && break + sleep 1 +done +(echo >"/dev/tcp/127.0.0.1/$NATS_NODE_PORT") 2>/dev/null \ + || fail "TCP localhost:$NATS_NODE_PORT never came up" + +# ---- phase 3: operator container image + helm install --------------------- + +log "phase 3a: build operator release binary" +( + cd "$REPO_ROOT" + cargo build -q --release -p harmony-fleet-operator +) + +log "phase 3b: build container image $OPERATOR_IMAGE" +# The workspace's top-level .dockerignore excludes target/, which is +# the right default for most container builds but exactly what we +# need here. Stage the release binary into a dedicated clean build +# context so the Dockerfile's COPY sees it. +IMAGE_CTX="$WORK_DIR/image-ctx" +rm -rf "$IMAGE_CTX" +mkdir -p "$IMAGE_CTX/target/release" +cp "$REPO_ROOT/target/release/harmony-fleet-operator" "$IMAGE_CTX/target/release/harmony-fleet-operator" +cp "$REPO_ROOT/fleet/harmony-fleet-operator/Dockerfile" "$IMAGE_CTX/Dockerfile" +podman build -q -t "$OPERATOR_IMAGE" "$IMAGE_CTX" >/dev/null + +log "phase 3c: sideload operator image into k3d cluster" +tmptar="$(mktemp -t harmony-fleet-operator-image.XXXXXX.tar)" +podman save "$OPERATOR_IMAGE" -o "$tmptar" >/dev/null +docker load -i "$tmptar" >/dev/null +rm -f "$tmptar" +"$K3D_BIN" image import "$OPERATOR_IMAGE" -c "$CLUSTER_NAME" >/dev/null + +log "phase 3d: generate helm chart + install operator in-cluster" +# DEBUG=1 bumps operator logging so `kubectl logs` prints every +# status patch + transition. +if [[ "${DEBUG:-0}" == "1" ]]; then + OPERATOR_RUST_LOG="debug,async_nats=warn,hyper=warn,rustls=warn,kube=info" +else + OPERATOR_RUST_LOG="info,kube_runtime=warn" +fi + +rm -rf "$CHART_DIR" +mkdir -p "$CHART_DIR" +( + cd "$OPERATOR_DIR" + cargo run -q -- chart \ + --output "$CHART_DIR" \ + --image "$OPERATOR_IMAGE" \ + --image-pull-policy IfNotPresent \ + --namespace "$OPERATOR_NAMESPACE" \ + --nats-url "nats://${NATS_NAME}.${NATS_NAMESPACE}:4222" \ + --log-level "$OPERATOR_RUST_LOG" +) >/dev/null + +helm upgrade --install "$OPERATOR_RELEASE" "$CHART_DIR/$OPERATOR_RELEASE" \ + --namespace "$OPERATOR_NAMESPACE" \ + --create-namespace \ + --wait --timeout 120s >/dev/null + +kubectl wait --for=condition=Established \ + "crd/deployments.fleet.nationtech.io" --timeout=30s >/dev/null +kubectl wait --for=condition=Established \ + "crd/devices.fleet.nationtech.io" --timeout=30s >/dev/null +kubectl -n "$OPERATOR_NAMESPACE" wait --for=condition=Available \ + "deployment/$OPERATOR_RELEASE" --timeout=120s >/dev/null + +# Seed the operator log file from the pod so HOLD=1 banner + final +# summary both have something to read. We re-dump on cleanup. +dump_operator_log + +# ---- explore banner (before the load run so the user can start watching) ---- + +print_banner() { + cat </dev/null || echo 0)" +warnings="$(grep -c " WARN " "$OPERATOR_LOG" 2>/dev/null || echo 0)" +errors="$(grep -c " ERROR " "$OPERATOR_LOG" 2>/dev/null || echo 0)" +log " CR status patches logged (DEBUG-level; use DEBUG=1 to surface): $patches" +log " operator warnings: $warnings errors: $errors" +if [[ "$errors" -gt 0 ]]; then + echo "----- operator error lines -----" + grep " ERROR " "$OPERATOR_LOG" | tail -20 +fi + +# ---- hold open (optional) --------------------------------------------------- + +if [[ "$HOLD" == "1" ]]; then + print_banner + log "HOLD=1 — stack is still running. Ctrl-C to tear down." + # Block until user interrupts; cleanup trap does the teardown. + while true; do sleep 60; done +fi + +log "PASS" diff --git a/iot/scripts/smoke-a1.sh b/fleet/scripts/smoke-a1.sh similarity index 89% rename from iot/scripts/smoke-a1.sh rename to fleet/scripts/smoke-a1.sh index 5b8d60f8..2b13befa 100755 --- a/iot/scripts/smoke-a1.sh +++ b/fleet/scripts/smoke-a1.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# End-to-end smoke test for the IoT walking skeleton (ROADMAP/iot_platform/ +# End-to-end smoke test for the IoT walking skeleton (ROADMAP/fleet_platform/ # v0_walking_skeleton.md §9.A1 and §5.4 agent dispatch). # # Deployment CR ─apply─▶ operator ─KV put─▶ NATS ◀─watch─ agent ─podman─▶ nginx @@ -22,25 +22,25 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" -OPERATOR_DIR="$REPO_ROOT/iot/iot-operator-v0" -AGENT_DIR="$REPO_ROOT/iot/iot-agent-v0" +OPERATOR_DIR="$REPO_ROOT/fleet/harmony-fleet-operator" +AGENT_DIR="$REPO_ROOT/fleet/harmony-fleet-agent" K3D_BIN="${K3D_BIN:-$HOME/.local/share/harmony/k3d/k3d}" -CLUSTER_NAME="${CLUSTER_NAME:-iot-smoke}" -NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats}" -NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net}" +CLUSTER_NAME="${CLUSTER_NAME:-fleet-smoke}" +NATS_CONTAINER="${NATS_CONTAINER:-fleet-smoke-nats}" +NATS_NET_NAME="${NATS_NET_NAME:-fleet-smoke-net}" NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}" NATSBOX_IMAGE="${NATSBOX_IMAGE:-docker.io/natsio/nats-box:latest}" NATS_PORT="${NATS_PORT:-4222}" TARGET_DEVICE="${TARGET_DEVICE:-pi-demo-01}" DEPLOY_NAME="${DEPLOY_NAME:-hello-world}" -DEPLOY_NS="${DEPLOY_NS:-iot-demo}" +DEPLOY_NS="${DEPLOY_NS:-fleet-demo}" HELLO_CONTAINER="${HELLO_CONTAINER:-hello}" HELLO_PORT="${HELLO_PORT:-8080}" -OPERATOR_LOG="$(mktemp -t iot-operator.XXXXXX.log)" +OPERATOR_LOG="$(mktemp -t harmony-fleet-operator.XXXXXX.log)" OPERATOR_PID="" -AGENT_LOG="$(mktemp -t iot-agent.XXXXXX.log)" +AGENT_LOG="$(mktemp -t fleet-agent.XXXXXX.log)" AGENT_PID="" AGENT_CONFIG_FILE="" KUBECONFIG_FILE="" @@ -126,13 +126,13 @@ log "phase 2: create k3d cluster '$CLUSTER_NAME'" "$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true "$K3D_BIN" cluster create "$CLUSTER_NAME" --wait --timeout 90s >/dev/null -KUBECONFIG_FILE="$(mktemp -t iot-smoke-kubeconfig.XXXXXX)" +KUBECONFIG_FILE="$(mktemp -t fleet-smoke-kubeconfig.XXXXXX)" "$K3D_BIN" kubeconfig get "$CLUSTER_NAME" > "$KUBECONFIG_FILE" export KUBECONFIG="$KUBECONFIG_FILE" log "install CRD via operator's install subcommand (typed Rust — no yaml, no kubectl apply)" ( cd "$OPERATOR_DIR" && cargo run -q -- install ) >/dev/null -kubectl wait --for=condition=Established "crd/deployments.iot.nationtech.io" --timeout=30s >/dev/null +kubectl wait --for=condition=Established "crd/deployments.fleet.nationtech.io" --timeout=30s >/dev/null kubectl get ns "$DEPLOY_NS" >/dev/null 2>&1 || kubectl create namespace "$DEPLOY_NS" >/dev/null @@ -142,7 +142,7 @@ kubectl get ns "$DEPLOY_NS" >/dev/null 2>&1 || kubectl create namespace "$DEPLOY ############################################################################### log "phase 2b: apiserver rejects invalid score.type" BAD_CR=$(cat </dev/null 2>&1; then - kubectl -n "$DEPLOY_NS" delete deployment.iot.nationtech.io bad-discriminator >/dev/null 2>&1 || true +if kubectl -n "$DEPLOY_NS" get deployment.fleet.nationtech.io bad-discriminator >/dev/null 2>&1; then + kubectl -n "$DEPLOY_NS" delete deployment.fleet.nationtech.io bad-discriminator >/dev/null 2>&1 || true fail "apiserver should have rejected 'bad-discriminator' but it was persisted" fi @@ -179,7 +179,7 @@ log "phase 3: start operator" NATS_URL="nats://127.0.0.1:$NATS_PORT" \ KV_BUCKET="desired-state" \ RUST_LOG="info,kube_runtime=warn" \ - "$REPO_ROOT/target/debug/iot-operator-v0" \ + "$REPO_ROOT/target/debug/harmony-fleet-operator" \ >"$OPERATOR_LOG" 2>&1 & OPERATOR_PID=$! log "operator pid=$OPERATOR_PID (log: $OPERATOR_LOG)" @@ -207,7 +207,7 @@ log "phase 3b: build + start agent" # doesn't occupy the host port before we even start. podman rm -f "$HELLO_CONTAINER" >/dev/null 2>&1 || true -AGENT_CONFIG_FILE="$(mktemp -t iot-agent-config.XXXXXX.toml)" +AGENT_CONFIG_FILE="$(mktemp -t fleet-agent-config.XXXXXX.toml)" cat >"$AGENT_CONFIG_FILE" <"$AGENT_LOG" 2>&1 & AGENT_PID=$! log "agent pid=$AGENT_PID (log: $AGENT_LOG)" @@ -241,7 +241,7 @@ grep -q "watching KV keys" "$AGENT_LOG" \ ############################################################################### log "phase 4: apply Deployment CR" cat </dev/null -apiVersion: iot.nationtech.io/v1alpha1 +apiVersion: fleet.nationtech.io/v1alpha1 kind: Deployment metadata: name: $DEPLOY_NAME @@ -276,7 +276,7 @@ echo "$KV_VALUE" | grep -q '"image":"docker.io/library/nginx:alpine"' \ log "wait for .status.observedScoreString" OBSERVED="" for _ in $(seq 1 30); do - OBSERVED="$(kubectl -n "$DEPLOY_NS" get deployment.iot.nationtech.io "$DEPLOY_NAME" \ + OBSERVED="$(kubectl -n "$DEPLOY_NS" get deployment.fleet.nationtech.io "$DEPLOY_NAME" \ -o jsonpath='{.status.observedScoreString}' 2>/dev/null || true)" [[ -n "$OBSERVED" ]] && break sleep 1 @@ -315,7 +315,7 @@ log "nginx responded" # phase 5 — delete CR, expect cleanup via finalizer + agent ############################################################################### log "phase 5: delete Deployment CR — finalizer + agent should remove KV and container" -kubectl -n "$DEPLOY_NS" delete deployment.iot.nationtech.io "$DEPLOY_NAME" --wait=true >/dev/null +kubectl -n "$DEPLOY_NS" delete deployment.fleet.nationtech.io "$DEPLOY_NAME" --wait=true >/dev/null log "wait for KV key removal" for _ in $(seq 1 30); do diff --git a/iot/scripts/smoke-a3-arm.sh b/fleet/scripts/smoke-a3-arm.sh similarity index 69% rename from iot/scripts/smoke-a3-arm.sh rename to fleet/scripts/smoke-a3-arm.sh index 49812d5a..8cbcc6b7 100755 --- a/iot/scripts/smoke-a3-arm.sh +++ b/fleet/scripts/smoke-a3-arm.sh @@ -4,7 +4,7 @@ # native KVM when the host is already arm64). # # This is exactly equivalent to: -# ARCH=aarch64 VM_NAME=iot-smoke-vm-arm ./smoke-a3.sh +# ARCH=aarch64 VM_NAME=fleet-smoke-vm-arm ./smoke-a3.sh # with the VM name defaulted so it can live alongside an x86-64 # smoke run on the same host without clobbering libvirt state. @@ -13,9 +13,9 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" export ARCH=aarch64 -export VM_NAME="${VM_NAME:-iot-smoke-vm-arm}" +export VM_NAME="${VM_NAME:-fleet-smoke-vm-arm}" export DEVICE_ID="${DEVICE_ID:-$VM_NAME}" -export NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats-a3-arm}" -export NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net-a3-arm}" +export NATS_CONTAINER="${NATS_CONTAINER:-fleet-smoke-nats-a3-arm}" +export NATS_NET_NAME="${NATS_NET_NAME:-fleet-smoke-net-a3-arm}" exec "$SCRIPT_DIR/smoke-a3.sh" "$@" diff --git a/iot/scripts/smoke-a3.sh b/fleet/scripts/smoke-a3.sh similarity index 85% rename from iot/scripts/smoke-a3.sh rename to fleet/scripts/smoke-a3.sh index 8bb8d5a5..b0533c87 100755 --- a/iot/scripts/smoke-a3.sh +++ b/fleet/scripts/smoke-a3.sh @@ -6,7 +6,7 @@ # ssh+Ansible ◀────┘ # │ # ▼ -# IotDeviceSetupScore ──▶ podman + iot-agent on VM +# FleetDeviceSetupScore ──▶ podman + fleet-agent on VM # │ # ▼ # existing operator ──NATS────────┘ (agent joins fleet, reconciles CR) @@ -32,7 +32,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" -VM_NAME="${VM_NAME:-iot-smoke-vm}" +VM_NAME="${VM_NAME:-fleet-smoke-vm}" DEVICE_ID="${DEVICE_ID:-$VM_NAME}" GROUP="${GROUP:-group-a}" LIBVIRT_URI="${LIBVIRT_URI:-qemu:///system}" @@ -43,8 +43,8 @@ LIBVIRT_URI="${LIBVIRT_URI:-qemu:///system}" # target, phase 4 timeout. ARCH="${ARCH:-x86-64}" -NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats-a3}" -NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net-a3}" +NATS_CONTAINER="${NATS_CONTAINER:-fleet-smoke-nats-a3}" +NATS_NET_NAME="${NATS_NET_NAME:-fleet-smoke-net-a3}" NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}" NATS_PORT="${NATS_PORT:-4222}" @@ -99,20 +99,20 @@ NAT_GW="$(virsh --connect "$LIBVIRT_URI" net-dumpxml default \ log "libvirt network gateway = $NAT_GW (VM will dial NATS at nats://$NAT_GW:$NATS_PORT)" # ---------------------------- phase 2: build --------------------------- -log "phase 2: build iot-agent-v0 for guest arch=$ARCH (release — debug binary fills cloud rootfs)" +log "phase 2: build harmony-fleet-agent for guest arch=$ARCH (release — debug binary fills cloud rootfs)" ( cd "$REPO_ROOT" if [[ -n "$AGENT_TARGET" ]]; then rustup target add "$AGENT_TARGET" >/dev/null - cargo build -q --release --target "$AGENT_TARGET" -p iot-agent-v0 + cargo build -q --release --target "$AGENT_TARGET" -p harmony-fleet-agent else - cargo build -q --release -p iot-agent-v0 + cargo build -q --release -p harmony-fleet-agent fi ) if [[ -n "$AGENT_TARGET" ]]; then - AGENT_BINARY="$REPO_ROOT/target/$AGENT_TARGET/release/iot-agent-v0" + AGENT_BINARY="$REPO_ROOT/target/$AGENT_TARGET/release/harmony-fleet-agent" else - AGENT_BINARY="$REPO_ROOT/target/release/iot-agent-v0" + AGENT_BINARY="$REPO_ROOT/target/release/harmony-fleet-agent" fi [[ -f "$AGENT_BINARY" ]] || fail "agent binary missing after build: $AGENT_BINARY" @@ -120,7 +120,7 @@ fi log "phase 3: bootstrap assets + provision VM + onboard device (arch=$EXAMPLE_ARCH)" ( cd "$REPO_ROOT" - cargo run -q --release -p example_iot_vm_setup -- \ + cargo run -q --release -p example_fleet_vm_setup -- \ --arch "$EXAMPLE_ARCH" \ --vm-name "$VM_NAME" \ --device-id "$DEVICE_ID" \ @@ -136,34 +136,34 @@ case "$ARCH" in aarch64|arm64) STATUS_TIMEOUT=300 ;; *) STATUS_TIMEOUT=60 ;; esac -log "phase 4: wait for agent to report status to NATS (timeout=${STATUS_TIMEOUT}s)" +log "phase 4: wait for agent to report heartbeat to NATS (timeout=${STATUS_TIMEOUT}s)" wait_for_status() { local timeout=$1 for _ in $(seq 1 "$timeout"); do if podman run --rm --network "$NATS_NET_NAME" \ docker.io/natsio/nats-box:latest \ - nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \ - "status.$DEVICE_ID" --raw >/dev/null 2>&1; then + nats --server "nats://$NATS_CONTAINER:4222" kv get device-heartbeat \ + "heartbeat.$DEVICE_ID" --raw >/dev/null 2>&1; then return 0 fi sleep 1 done return 1 } -wait_for_status "$STATUS_TIMEOUT" || fail "agent-status never appeared for $DEVICE_ID" -log "agent status present on NATS" +wait_for_status "$STATUS_TIMEOUT" || fail "device-heartbeat never appeared for $DEVICE_ID" +log "agent heartbeat present on NATS" # ---------------------------- phase 5: hard power-cycle, expect recovery ---------------------------- log "phase 5: power-cycle VM (virsh destroy + start) → agent must reconnect to NATS" nats_status_timestamp() { - # Prints the "timestamp" field of the status. entry, or "". + # Prints the "at" field of the heartbeat. entry, or "". # Never errors (for `set -e` safety). podman run --rm --network "$NATS_NET_NAME" \ docker.io/natsio/nats-box:latest \ - nats --server "nats://$NATS_CONTAINER:4222" kv get agent-status \ - "status.$DEVICE_ID" --raw 2>/dev/null \ - | grep -oE '"timestamp":"[^"]+"' \ + nats --server "nats://$NATS_CONTAINER:4222" kv get device-heartbeat \ + "heartbeat.$DEVICE_ID" --raw 2>/dev/null \ + | grep -oE '"at":"[^"]+"' \ | head -1 | cut -d'"' -f4 || true } diff --git a/fleet/scripts/smoke-a4.sh b/fleet/scripts/smoke-a4.sh new file mode 100755 index 00000000..b57d590b --- /dev/null +++ b/fleet/scripts/smoke-a4.sh @@ -0,0 +1,529 @@ +#!/usr/bin/env bash +# End-to-end hands-on demo: operator + in-cluster NATS + ARM VM agent. +# +# [k3d cluster] +# ├── NATS (single-node, NodePort 4222) +# └── CRD: fleet.nationtech.io/v1alpha1/Deployment +# ▲ +# │ kubectl apply / harmony_apply_deployment +# │ +# [host] +# ├── operator (cargo run) ──▶ NATS KV desired-state +# └── libvirt VM +# └── fleet-agent ──▶ NATS KV (watch) ──▶ podman container +# +# By default the script brings the whole stack up, applies no +# Deployment CR, prints a "command menu" of user-runnable one-liners, +# and blocks on Ctrl-C. With `--auto`, it also drives an apply + +# upgrade + delete cycle for regression coverage. +# +# Prereqs on the runner host (one-time, generic): +# 1. podman (rootless), cargo, kubectl, virsh, xorriso, python3, +# libvirt, qemu-system-x86_64/aarch64 + edk2 firmware for the +# chosen ARCH. +# 2. Be in the `libvirt` group. +# 3. `sudo virsh net-start default` (once per boot unless autostart). +# 4. Rootless podman user socket running: +# `systemctl --user start podman.socket`. +# 5. k3d binary at $K3D_BIN (defaults to Harmony's downloaded copy). + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +OPERATOR_DIR="$REPO_ROOT/fleet/harmony-fleet-operator" + +# ---- config ----------------------------------------------------------------- + +K3D_BIN="${K3D_BIN:-$HOME/.local/share/harmony/k3d/k3d}" +CLUSTER_NAME="${CLUSTER_NAME:-fleet-demo}" + +ARCH="${ARCH:-x86-64}" +VM_NAME="${VM_NAME:-fleet-demo-vm}" +DEVICE_ID="${DEVICE_ID:-$VM_NAME}" +GROUP="${GROUP:-group-a}" +LIBVIRT_URI="${LIBVIRT_URI:-qemu:///system}" + +NATS_NAMESPACE="${NATS_NAMESPACE:-fleet-system}" +NATS_NAME="${NATS_NAME:-fleet-nats}" +NATS_NODE_PORT="${NATS_NODE_PORT:-4222}" + +DEPLOY_NS="${DEPLOY_NS:-fleet-demo}" +DEPLOY_NAME="${DEPLOY_NAME:-hello-world}" +DEPLOY_PORT="${DEPLOY_PORT:-8080:80}" + +# Source image we sideload into the VM's podman. Defaults to the +# `nginx:alpine` variant (~60 MB) which is almost always cached on +# dev boxes and keeps TCG-aarch64 boot budgets sane. The tarball +# transport + podman IfNotPresent semantics mean the agent never +# hits a public registry for this image. +SRC_IMAGE="${SRC_IMAGE:-docker.io/library/nginx:alpine}" + +AUTO=0 +[[ "${1:-}" == "--auto" ]] && AUTO=1 + +OPERATOR_LOG="$(mktemp -t harmony-fleet-operator.XXXXXX.log)" +OPERATOR_PID="" +KUBECONFIG_FILE="" + +# ---- arch demux ------------------------------------------------------------- + +case "$ARCH" in + x86-64|x86_64) + EXAMPLE_ARCH=x86-64 + AGENT_TARGET= + # Native-KVM x86: podman pull + layer unpack is seconds. + CONTAINER_WAIT_STEPS=90 # 180 s + ;; + aarch64|arm64) + EXAMPLE_ARCH=aarch64 + AGENT_TARGET=aarch64-unknown-linux-gnu + # TCG aarch64: network stack + userns layer unpack run + # ~3-5× slower than native. An `nginx:latest` pull (~250 MB) + # on a cold image takes 4-8 min observed here. Give it 15. + CONTAINER_WAIT_STEPS=450 # 900 s + ;; + *) printf '[smoke-a4 FAIL] unsupported ARCH=%s (expected: x86-64 | aarch64)\n' "$ARCH" >&2; exit 1 ;; +esac + +log() { printf '\033[1;34m[smoke-a4]\033[0m %s\n' "$*"; } +fail() { printf '\033[1;31m[smoke-a4 FAIL]\033[0m %s\n' "$*" >&2; exit 1; } + +cleanup() { + local rc=$? + log "cleanup…" + if [[ -n "$OPERATOR_PID" ]] && kill -0 "$OPERATOR_PID" 2>/dev/null; then + kill "$OPERATOR_PID" 2>/dev/null || true + wait "$OPERATOR_PID" 2>/dev/null || true + fi + if [[ "${KEEP:-0}" != "1" ]]; then + virsh --connect "$LIBVIRT_URI" destroy "$VM_NAME" 2>/dev/null || true + virsh --connect "$LIBVIRT_URI" undefine --nvram \ + --remove-all-storage "$VM_NAME" 2>/dev/null || true + "$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true + [[ -n "$KUBECONFIG_FILE" ]] && rm -f "$KUBECONFIG_FILE" + else + log "KEEP=1 — leaving cluster '$CLUSTER_NAME' and VM '$VM_NAME' running" + [[ -n "$KUBECONFIG_FILE" ]] && log "KUBECONFIG=$KUBECONFIG_FILE" + fi + if [[ $rc -ne 0 && -s "$OPERATOR_LOG" ]]; then + log "operator log at $OPERATOR_LOG" + echo "----- operator log tail -----" + tail -n 40 "$OPERATOR_LOG" 2>/dev/null || true + else + rm -f "$OPERATOR_LOG" + fi + exit $rc +} +trap cleanup EXIT INT TERM + +require() { command -v "$1" >/dev/null 2>&1 || fail "missing required tool: $1"; } +require cargo +require kubectl +require virsh +require podman +require docker # cross-runtime image transfer for k3d sideload +[[ -x "$K3D_BIN" ]] || fail "k3d binary not executable at $K3D_BIN (set K3D_BIN=…)" + +# ---- phase 1: k3d cluster with NATS port exposed ---------------------------- + +log "phase 1: create k3d cluster '$CLUSTER_NAME' (host port $NATS_NODE_PORT → loadbalancer)" +"$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true +"$K3D_BIN" cluster create "$CLUSTER_NAME" \ + --wait --timeout 90s \ + -p "${NATS_NODE_PORT}:${NATS_NODE_PORT}@loadbalancer" \ + >/dev/null +KUBECONFIG_FILE="$(mktemp -t fleet-demo-kubeconfig.XXXXXX)" +"$K3D_BIN" kubeconfig get "$CLUSTER_NAME" > "$KUBECONFIG_FILE" +export KUBECONFIG="$KUBECONFIG_FILE" + +# ---- phase 2: NATS in-cluster via NatsBasicScore ---------------------------- + +NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}" + +# Sideload the NATS image into k3d so the install doesn't race the +# Docker Hub rate limiter. `docker inspect` + `podman save` + `docker +# load` is the cross-runtime bridge on hosts that have both (rootful +# docker for k3d, rootless podman for IoT smokes). Cheap when the +# image is already in podman's store; a one-time Hub pull when not. +log "phase 2a: sideload NATS image ($NATS_IMAGE) into k3d cluster" +if ! docker image inspect "$NATS_IMAGE" >/dev/null 2>&1; then + if ! podman image inspect "$NATS_IMAGE" >/dev/null 2>&1; then + log "NATS image not cached locally — pulling from Docker Hub" + podman pull "$NATS_IMAGE" >/dev/null || fail "podman pull $NATS_IMAGE failed" + fi + tmptar="$(mktemp -t nats-image.XXXXXX.tar)" + podman save "$NATS_IMAGE" -o "$tmptar" >/dev/null + docker load -i "$tmptar" >/dev/null + rm -f "$tmptar" +fi +"$K3D_BIN" image import "$NATS_IMAGE" -c "$CLUSTER_NAME" >/dev/null + +log "phase 2b: install NATS in-cluster via NatsBasicScore (namespace=$NATS_NAMESPACE, expose=load-balancer)" +( + cd "$REPO_ROOT" + cargo run -q --release -p example_fleet_nats_install -- \ + --namespace "$NATS_NAMESPACE" \ + --name "$NATS_NAME" \ + --expose load-balancer +) +log "waiting for NATS Deployment to be Available" +kubectl -n "$NATS_NAMESPACE" wait --for=condition=Available \ + "deployment/$NATS_NAME" --timeout=120s >/dev/null + +# kubectl "Available" reports on pod readiness — k3d's klipper-lb +# takes a further few seconds to wire the host loadbalancer port to +# the Service endpoints. Probe the actual TCP port from the host +# before declaring NATS routable, else the operator's connect will +# race and die with "expected INFO, got nothing." +log "probing nats://localhost:$NATS_NODE_PORT end-to-end" +for _ in $(seq 1 60); do + if (echo >"/dev/tcp/127.0.0.1/$NATS_NODE_PORT") 2>/dev/null; then + break + fi + sleep 1 +done +(echo >"/dev/tcp/127.0.0.1/$NATS_NODE_PORT") 2>/dev/null \ + || fail "TCP localhost:$NATS_NODE_PORT never came up after Deployment Available" + +# ---- phase 3: install Deployment CRD via operator's Score-based install ----- + +log "phase 3: install Deployment CRD via operator \`install\` subcommand" +( + cd "$OPERATOR_DIR" + cargo run -q -- install +) +kubectl wait --for=condition=Established \ + "crd/deployments.fleet.nationtech.io" --timeout=30s >/dev/null + +kubectl get ns "$DEPLOY_NS" >/dev/null 2>&1 || \ + kubectl create namespace "$DEPLOY_NS" >/dev/null + +# ---- phase 4: operator running host-side ------------------------------------ + +log "phase 4: start operator (host-side) connected to nats://localhost:$NATS_NODE_PORT" +( + cd "$OPERATOR_DIR" + cargo build -q --release +) +NATS_URL="nats://localhost:$NATS_NODE_PORT" \ +KV_BUCKET="desired-state" \ +RUST_LOG="info,kube_runtime=warn" \ + "$REPO_ROOT/target/release/harmony-fleet-operator" \ + >"$OPERATOR_LOG" 2>&1 & +OPERATOR_PID=$! +log "operator pid=$OPERATOR_PID (log: $OPERATOR_LOG)" +for _ in $(seq 1 30); do + if grep -q "starting Deployment controller" "$OPERATOR_LOG"; then break; fi + if ! kill -0 "$OPERATOR_PID" 2>/dev/null; then fail "operator exited early"; fi + sleep 0.5 +done +grep -q "starting Deployment controller" "$OPERATOR_LOG" \ + || fail "operator never logged 'starting Deployment controller'" +grep -q "KV bucket ready" "$OPERATOR_LOG" \ + || fail "operator never confirmed KV bucket ready" + +# ---- phase 4.5: export the workload image to a tarball ---------------------- +# Instead of running a local OCI registry (which needs `registry:2` from +# Docker Hub — rate-limited!), sideload the image straight into the VM's +# podman via `podman save`/`scp`/`podman load`. Paired with harmony's +# `PodmanTopology::ensure_image_present` (IfNotPresent semantics: present +# = skip pull), the agent never touches a public registry for known +# images. This is the same compounding-framework-value move as the k3d +# NATS sideload in phase 2a. + +NAT_GW="$(virsh --connect "$LIBVIRT_URI" net-dumpxml default \ + | grep -oP "ip address='\K[^']+" | head -1)" +[[ -n "$NAT_GW" ]] || fail "couldn't determine libvirt 'default' gateway IP" +log "libvirt network gateway = $NAT_GW (VM agent will dial nats://$NAT_GW:$NATS_NODE_PORT)" + +log "phase 4.5: export $SRC_IMAGE to a local tarball for VM sideload" +# Arch the VM expects. +case "$ARCH" in + x86-64|x86_64) EXPECTED_IMAGE_ARCH=amd64 ;; + aarch64|arm64) EXPECTED_IMAGE_ARCH=arm64 ;; +esac +if ! podman image inspect "$SRC_IMAGE" >/dev/null 2>&1; then + log "source image $SRC_IMAGE not cached — attempting pull (platform=$EXPECTED_IMAGE_ARCH)" + podman pull --platform="linux/$EXPECTED_IMAGE_ARCH" "$SRC_IMAGE" >/dev/null || \ + fail "podman pull $SRC_IMAGE failed (Docker Hub rate limit?). \ +Pre-pull it when the quota is available (\`podman pull --platform=linux/$EXPECTED_IMAGE_ARCH $SRC_IMAGE\`), then re-run." +fi +# Verify arch matches. A podman cache shared across ARCH= runs can +# end up with a tag pointing at the wrong arch (pulling +# \`nginx:alpine\` for arm64 overwrites the tag's arm64/amd64 +# binding). Better to fail loudly here than ship the VM an image +# it can't exec. +IMAGE_ACTUAL_ARCH="$(podman inspect "$SRC_IMAGE" --format '{{.Architecture}}' 2>/dev/null || true)" +if [[ "$IMAGE_ACTUAL_ARCH" != "$EXPECTED_IMAGE_ARCH" ]]; then + fail "$SRC_IMAGE is arch '$IMAGE_ACTUAL_ARCH' but ARCH=$ARCH needs '$EXPECTED_IMAGE_ARCH'. \ +Either pre-pull the right platform (\`podman pull --platform=linux/$EXPECTED_IMAGE_ARCH $SRC_IMAGE\`) \ +or point SRC_IMAGE at a locally-tagged variant." +fi + +# The smoke upgrade test asserts container id change on image-tag +# change, so we'll expose two distinct local tag names pointing at +# the same bits. Tagging happens on the VM side after `podman load` +# so we stay compatible with older podman versions that don't grok +# the multi-image archive format (`podman save -m`). +V1_IMAGE="localdev/nginx:v1" +V2_IMAGE="localdev/nginx:v2" + +IMAGE_TARBALL="$(mktemp -t fleet-demo-images.XXXXXX.tar)" +podman save -o "$IMAGE_TARBALL" "$SRC_IMAGE" >/dev/null \ + || fail "podman save failed" +log "exported $SRC_IMAGE → $IMAGE_TARBALL ($(du -h "$IMAGE_TARBALL" | cut -f1))" + +# ---- phase 5: provision VM + install agent ---------------------------------- + +log "phase 5: build harmony-fleet-agent for arch=$ARCH + provision VM" +( + cd "$REPO_ROOT" + if [[ -n "$AGENT_TARGET" ]]; then + rustup target add "$AGENT_TARGET" >/dev/null + cargo build -q --release --target "$AGENT_TARGET" -p harmony-fleet-agent + else + cargo build -q --release -p harmony-fleet-agent + fi +) +if [[ -n "$AGENT_TARGET" ]]; then + AGENT_BINARY="$REPO_ROOT/target/$AGENT_TARGET/release/harmony-fleet-agent" +else + AGENT_BINARY="$REPO_ROOT/target/release/harmony-fleet-agent" +fi +[[ -f "$AGENT_BINARY" ]] || fail "agent binary missing: $AGENT_BINARY" + +( + cd "$REPO_ROOT" + # Pass through FLEET_VM_ADMIN_PASSWORD if set so the VM admin user + # accepts SSH password auth. Useful for chaos / reliability + # testing sessions where the operator wants to log in and break + # things on purpose. Unset by default = key-only auth. + cargo run -q --release -p example_fleet_vm_setup -- \ + --arch "$EXAMPLE_ARCH" \ + --vm-name "$VM_NAME" \ + --device-id "$DEVICE_ID" \ + --group "$GROUP" \ + --agent-binary "$AGENT_BINARY" \ + --nats-url "nats://$NAT_GW:$NATS_NODE_PORT" +) + +VM_IP="$(virsh --connect "$LIBVIRT_URI" domifaddr "$VM_NAME" \ + | awk '/ipv4/ { print $4 }' | head -1 | cut -d/ -f1)" +[[ -n "$VM_IP" ]] || fail "couldn't resolve VM IP" + +# ---- phase 5c: sideload workload images into fleet-agent's podman ------------- + +log "phase 5c: sideload $V1_IMAGE + $V2_IMAGE into fleet-agent's podman on VM" +# scp the tarball (ssh as the admin user, the only one with sshd +# access), then `podman load` inside an fleet-agent user session. +# Post-load the fleet-agent's podman has both tags locally, so +# `ensure_image_present` in harmony's PodmanTopology takes the +# "already present, skip pull" branch — no Docker Hub hit. +scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \ + "$IMAGE_TARBALL" "fleet-admin@$VM_IP:/tmp/fleet-demo-images.tar" >/dev/null \ + || fail "scp image tarball to VM failed" +ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \ + "fleet-admin@$VM_IP" -- \ + "sudo chown fleet-agent:fleet-agent /tmp/fleet-demo-images.tar && \ + sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman load -i /tmp/fleet-demo-images.tar' && \ + sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman tag $SRC_IMAGE $V1_IMAGE' && \ + sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman tag $SRC_IMAGE $V2_IMAGE' && \ + sudo rm -f /tmp/fleet-demo-images.tar" >/dev/null \ + || fail "podman load + tag on VM failed" +rm -f "$IMAGE_TARBALL" +log "sideload complete — fleet-agent's podman has $V1_IMAGE + $V2_IMAGE" + +# ---- phase 6: sanity -------------------------------------------------------- + +log "phase 6: sanity — operator + agent + KV" +for _ in $(seq 1 60); do + if kubectl -n "$NATS_NAMESPACE" get pod -l app="$NATS_NAME" \ + -o jsonpath='{.items[0].status.phase}' 2>/dev/null \ + | grep -q Running; then + break + fi + sleep 1 +done + +# NATS box one-liner we'll reuse in the hand-off too. Uses the host +# loadbalancer port so no pod-network plumbing needed. +NATSBOX_HOST="podman run --rm docker.io/natsio/nats-box:latest \ +nats --server nats://host.containers.internal:$NATS_NODE_PORT" + +log "checking agent heartbeat in NATS KV (device-heartbeat bucket)" +for _ in $(seq 1 30); do + if $NATSBOX_HOST kv get device-heartbeat "heartbeat.$DEVICE_ID" --raw \ + >/dev/null 2>&1; then + break + fi + sleep 2 +done +$NATSBOX_HOST kv get device-heartbeat "heartbeat.$DEVICE_ID" --raw >/dev/null \ + || fail "agent never published heartbeat to NATS" +log "agent heartbeat present: heartbeat.$DEVICE_ID" + +# ---- phase 7: either hand off to user, or drive regression ------------------ + +if [[ "$AUTO" == "1" ]]; then + log "phase 7 (--auto): apply nginx via typed CR, verify, upgrade, delete" + + log "applying $V1_IMAGE deployment" + ( + cd "$REPO_ROOT" + cargo run -q -p example_harmony_apply_deployment -- \ + --namespace "$DEPLOY_NS" \ + --name "$DEPLOY_NAME" \ + --target-device "$DEVICE_ID" \ + --image "$V1_IMAGE" \ + --port "$DEPLOY_PORT" + ) + + log "waiting for container on VM (up to $((CONTAINER_WAIT_STEPS * 2))s)" + CONTAINER_ID_V1="" + for _ in $(seq 1 "$CONTAINER_WAIT_STEPS"); do + id="$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \ + "fleet-admin@$VM_IP" -- \ + "sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman ps -q --filter name=$DEPLOY_NAME'" \ + 2>/dev/null | head -1)" || true + if [[ -n "$id" ]]; then CONTAINER_ID_V1="$id"; break; fi + sleep 2 + done + [[ -n "$CONTAINER_ID_V1" ]] || fail "nginx container never appeared on VM" + log "container id (v1): $CONTAINER_ID_V1" + + log "curl http://$VM_IP:${DEPLOY_PORT%%:*}/" + for _ in $(seq 1 30); do + if curl -sf "http://$VM_IP:${DEPLOY_PORT%%:*}/" >/dev/null; then + log "nginx responded (v1)"; break + fi + sleep 2 + done + + log "waiting for operator to aggregate .status.aggregate.succeeded == 1" + for _ in $(seq 1 30); do + got="$(kubectl -n "$DEPLOY_NS" get deployment.fleet.nationtech.io "$DEPLOY_NAME" \ + -o jsonpath='{.status.aggregate.succeeded}' 2>/dev/null || true)" + if [[ "$got" == "1" ]]; then + log ".status.aggregate.succeeded = 1 — aggregator reflected agent state" + break + fi + sleep 2 + done + got="$(kubectl -n "$DEPLOY_NS" get deployment.fleet.nationtech.io "$DEPLOY_NAME" \ + -o jsonpath='{.status.aggregate.succeeded}' 2>/dev/null || true)" + [[ "$got" == "1" ]] || fail ".status.aggregate.succeeded never reached 1 (got '$got')" + + log "upgrading to $V2_IMAGE" + ( + cd "$REPO_ROOT" + cargo run -q -p example_harmony_apply_deployment -- \ + --namespace "$DEPLOY_NS" \ + --name "$DEPLOY_NAME" \ + --target-device "$DEVICE_ID" \ + --image "$V2_IMAGE" \ + --port "$DEPLOY_PORT" + ) + log "waiting for container id to change (upgrade, up to $((CONTAINER_WAIT_STEPS * 2))s)" + CONTAINER_ID_V2="" + for _ in $(seq 1 "$CONTAINER_WAIT_STEPS"); do + id="$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \ + "fleet-admin@$VM_IP" -- \ + "sudo su - fleet-agent -c 'XDG_RUNTIME_DIR=/run/user/\$(id -u) podman ps -q --filter name=$DEPLOY_NAME'" \ + 2>/dev/null | head -1)" || true + if [[ -n "$id" && "$id" != "$CONTAINER_ID_V1" ]]; then + CONTAINER_ID_V2="$id"; break + fi + sleep 2 + done + [[ -n "$CONTAINER_ID_V2" ]] || fail "container id did not change after upgrade" + log "container id (v2): $CONTAINER_ID_V2 — upgrade confirmed" + + log "deleting deployment" + ( + cd "$REPO_ROOT" + cargo run -q -p example_harmony_apply_deployment -- \ + --namespace "$DEPLOY_NS" \ + --name "$DEPLOY_NAME" \ + --target-device "$DEVICE_ID" \ + --delete + ) + for _ in $(seq 1 60); do + if ! ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -i "$HOME/.local/share/harmony/fleet/ssh/id_ed25519" \ + "fleet-admin@$VM_IP" -- podman ps -q --filter "name=$DEPLOY_NAME" 2>/dev/null \ + | grep -q .; then + log "container removed from VM" + break + fi + sleep 2 + done + + log "PASS (--auto)" + exit 0 +fi + +# ---- hand-off mode ---------------------------------------------------------- + +SSH_KEY="$HOME/.local/share/harmony/fleet/ssh/id_ed25519" + +cat <.`) and every in-memory map +/// keyed by "which deployment." A raw `String` here would let an +/// invalid name (containing a `.`, splitting into extra subject +/// tokens) break routing at runtime. +/// +/// Validation: +/// - Not empty. +/// - No `.` (would alias an extra subject token). +/// - No `*` / `>` (NATS wildcards). +/// - No ASCII whitespace. +/// - ≤ 253 bytes (RFC 1123 max, matches Kubernetes name limit). +/// +/// The constructor is fallible; deserialization runs the same +/// validation so malformed payloads are rejected at the wire. +#[derive(Debug, Clone, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize)] +#[serde(transparent)] +pub struct DeploymentName(String); + +#[derive(Debug, thiserror::Error, PartialEq, Eq)] +pub enum InvalidDeploymentName { + #[error("deployment name must not be empty")] + Empty, + #[error("deployment name must not exceed 253 bytes")] + TooLong, + #[error("deployment name must not contain '.' (would alias an extra NATS subject token)")] + ContainsDot, + #[error("deployment name must not contain NATS wildcards '*' or '>'")] + ContainsWildcard, + #[error("deployment name must not contain whitespace")] + ContainsWhitespace, +} + +impl DeploymentName { + pub fn try_new(s: impl Into) -> Result { + let s = s.into(); + if s.is_empty() { + return Err(InvalidDeploymentName::Empty); + } + if s.len() > 253 { + return Err(InvalidDeploymentName::TooLong); + } + if s.contains('.') { + return Err(InvalidDeploymentName::ContainsDot); + } + if s.contains('*') || s.contains('>') { + return Err(InvalidDeploymentName::ContainsWildcard); + } + if s.chars().any(|c| c.is_ascii_whitespace()) { + return Err(InvalidDeploymentName::ContainsWhitespace); + } + Ok(Self(s)) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for DeploymentName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +impl<'de> Deserialize<'de> for DeploymentName { + fn deserialize>(de: D) -> Result { + let s = String::deserialize(de)?; + Self::try_new(s).map_err(serde::de::Error::custom) + } +} + +/// Static-ish per-device facts: routing labels, hardware, agent +/// version. Written to KV key `info.` in +/// [`crate::BUCKET_DEVICE_INFO`]. Rewritten by the agent on startup +/// and whenever its labels change — **not** on every heartbeat. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct DeviceInfo { + pub device_id: Id, + /// Routing labels. Operator resolves Deployment + /// `targetSelector.matchLabels` against this map. + #[serde(default)] + pub labels: BTreeMap, + /// Hardware / OS snapshot. `None` until the first post-startup + /// publish. + #[serde(default)] + pub inventory: Option, + /// RFC 3339 UTC timestamp of this publish. + pub updated_at: DateTime, +} + +/// Authoritative current phase for one `(device, deployment)` pair. +/// Written to KV key `state..` in +/// [`crate::BUCKET_DEVICE_STATE`]. Deleted when the deployment is +/// removed from the device. +/// +/// The operator's KV watch sees every write + delete in order, so +/// this value alone — plus the operator's in-memory belief about +/// the last phase for the pair — is enough to drive the aggregate +/// counters. No separate event stream, no per-write revision. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct DeploymentState { + pub device_id: Id, + pub deployment: DeploymentName, + pub phase: Phase, + pub last_event_at: DateTime, + #[serde(default)] + pub last_error: Option, +} + +/// Tiny liveness ping. Written to KV key `heartbeat.` in +/// [`crate::BUCKET_DEVICE_HEARTBEAT`]. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct HeartbeatPayload { + pub device_id: Id, + pub at: DateTime, +} + +#[cfg(test)] +mod tests { + use super::*; + + fn ts(s: &str) -> DateTime { + DateTime::parse_from_rfc3339(s).unwrap().with_timezone(&Utc) + } + + fn dn(s: &str) -> DeploymentName { + DeploymentName::try_new(s).expect("valid") + } + + #[test] + fn deployment_name_accepts_rfc1123() { + assert!(DeploymentName::try_new("hello-world").is_ok()); + assert!(DeploymentName::try_new("a").is_ok()); + assert!(DeploymentName::try_new("a-b-c-1-2-3").is_ok()); + } + + #[test] + fn deployment_name_rejects_dot() { + assert_eq!( + DeploymentName::try_new("hello.world"), + Err(InvalidDeploymentName::ContainsDot) + ); + } + + #[test] + fn deployment_name_rejects_nats_wildcards() { + assert_eq!( + DeploymentName::try_new("hello*"), + Err(InvalidDeploymentName::ContainsWildcard) + ); + assert_eq!( + DeploymentName::try_new("hello>"), + Err(InvalidDeploymentName::ContainsWildcard) + ); + } + + #[test] + fn deployment_name_rejects_empty_and_too_long() { + assert_eq!( + DeploymentName::try_new(""), + Err(InvalidDeploymentName::Empty) + ); + assert_eq!( + DeploymentName::try_new("x".repeat(254)), + Err(InvalidDeploymentName::TooLong) + ); + } + + #[test] + fn deployment_name_rejects_whitespace() { + assert_eq!( + DeploymentName::try_new("hello world"), + Err(InvalidDeploymentName::ContainsWhitespace) + ); + assert_eq!( + DeploymentName::try_new("hello\tworld"), + Err(InvalidDeploymentName::ContainsWhitespace) + ); + } + + #[test] + fn deployment_name_deserialization_validates() { + let json = r#""bad.name""#; + let result: Result = serde_json::from_str(json); + assert!(result.is_err()); + } + + #[test] + fn deployment_name_roundtrip() { + let name = dn("hello-world"); + let json = serde_json::to_string(&name).unwrap(); + assert_eq!(json, r#""hello-world""#); + let back: DeploymentName = serde_json::from_str(&json).unwrap(); + assert_eq!(name, back); + } + + #[test] + fn deployment_state_roundtrip() { + let original = DeploymentState { + device_id: Id::from("pi-01".to_string()), + deployment: dn("hello-web"), + phase: Phase::Failed, + last_event_at: ts("2026-04-22T10:05:00Z"), + last_error: Some("image pull 429".to_string()), + }; + let json = serde_json::to_string(&original).unwrap(); + let back: DeploymentState = serde_json::from_str(&json).unwrap(); + assert_eq!(original, back); + } + + #[test] + fn heartbeat_is_tiny() { + let hb = HeartbeatPayload { + device_id: Id::from("pi-01".to_string()), + at: ts("2026-04-22T10:00:30Z"), + }; + let bytes = serde_json::to_vec(&hb).unwrap(); + assert!( + bytes.len() < 96, + "heartbeat payload grew to {} bytes: {}", + bytes.len(), + String::from_utf8_lossy(&bytes), + ); + } + + #[test] + fn device_info_roundtrip() { + let original = DeviceInfo { + device_id: Id::from("pi-01".to_string()), + labels: BTreeMap::from([("group".to_string(), "site-a".to_string())]), + inventory: Some(InventorySnapshot { + hostname: "pi-01".to_string(), + arch: "aarch64".to_string(), + os: "Ubuntu 24.04".to_string(), + kernel: "6.8.0".to_string(), + cpu_cores: 4, + memory_mb: 8192, + agent_version: "0.1.0".to_string(), + }), + updated_at: ts("2026-04-22T10:00:00Z"), + }; + let json = serde_json::to_string(&original).unwrap(); + let back: DeviceInfo = serde_json::from_str(&json).unwrap(); + assert_eq!(original, back); + } +} diff --git a/harmony-reconciler-contracts/src/kv.rs b/harmony-reconciler-contracts/src/kv.rs index c773eba4..0a0971ec 100644 --- a/harmony-reconciler-contracts/src/kv.rs +++ b/harmony-reconciler-contracts/src/kv.rs @@ -7,47 +7,88 @@ //! here; agent + operator consume the constants directly, and smoke //! scripts grep for the literal values locked in the tests below. +use crate::fleet::DeploymentName; + /// Operator-written bucket. One entry per `(device, deployment)` pair. /// Values are the JSON-serialized Score envelope — today -/// `harmony::modules::podman::IotScore`, tomorrow any variant of +/// `harmony::modules::podman::ReconcileScore`, tomorrow any variant of /// a polymorphic `Score` enum the framework ships. pub const BUCKET_DESIRED_STATE: &str = "desired-state"; -/// Agent-written bucket. One entry per device at `status.`. -/// Values are JSON-serialized [`crate::AgentStatus`]. -pub const BUCKET_AGENT_STATUS: &str = "agent-status"; +/// Static-ish per-device facts: routing labels, inventory, agent +/// version. Agent rewrites the entry on startup and whenever its +/// labels change. Key format: `info.`. +pub const BUCKET_DEVICE_INFO: &str = "device-info"; + +/// Current reconcile phase for each `(device, deployment)` pair. +/// Agent writes on phase transition; operator watches this bucket +/// to drive CR `.status.aggregate`. Authoritative source of truth +/// for "what's running where." Key format: +/// `state..`. +pub const BUCKET_DEVICE_STATE: &str = "device-state"; + +/// Tiny liveness ping from each device every N seconds. Separate +/// from [`BUCKET_DEVICE_STATE`] so routine heartbeats don't churn +/// the state bucket. Key format: `heartbeat.`. +pub const BUCKET_DEVICE_HEARTBEAT: &str = "device-heartbeat"; /// KV key for a `(device, deployment)` pair in [`BUCKET_DESIRED_STATE`]. /// Format: `.`. -pub fn desired_state_key(device_id: &str, deployment_name: &str) -> String { - format!("{device_id}.{deployment_name}") +pub fn desired_state_key(device_id: &str, deployment_name: &DeploymentName) -> String { + format!("{device_id}.{}", deployment_name.as_str()) } -/// KV key for a device's last-known status in [`BUCKET_AGENT_STATUS`]. -/// Format: `status.`. -pub fn status_key(device_id: &str) -> String { - format!("status.{device_id}") +/// KV key for a device's `DeviceInfo` entry in [`BUCKET_DEVICE_INFO`]. +/// Format: `info.`. +pub fn device_info_key(device_id: &str) -> String { + format!("info.{device_id}") +} + +/// KV key for a `(device, deployment)` state entry in +/// [`BUCKET_DEVICE_STATE`]. Format: `state..`. +pub fn device_state_key(device_id: &str, deployment_name: &DeploymentName) -> String { + format!("state.{device_id}.{}", deployment_name.as_str()) +} + +/// KV key for a device's liveness entry in +/// [`BUCKET_DEVICE_HEARTBEAT`]. Format: `heartbeat.`. +pub fn device_heartbeat_key(device_id: &str) -> String { + format!("heartbeat.{device_id}") } #[cfg(test)] mod tests { use super::*; + fn dn(s: &str) -> crate::DeploymentName { + crate::DeploymentName::try_new(s).expect("valid") + } + #[test] fn desired_state_key_format() { - assert_eq!(desired_state_key("pi-01", "hello-web"), "pi-01.hello-web"); + assert_eq!( + desired_state_key("pi-01", &dn("hello-web")), + "pi-01.hello-web" + ); } #[test] - fn status_key_format() { - assert_eq!(status_key("pi-01"), "status.pi-01"); - } - - #[test] - fn bucket_names_match_smoke_scripts() { - // These strings are also grepped by iot/scripts/smoke-*.sh — - // flipping them here must be paired with a script update. + fn bucket_names_stable() { + // Flipping these is a cross-component break — operator, + // agent, and smoke scripts all grep for the literal values. assert_eq!(BUCKET_DESIRED_STATE, "desired-state"); - assert_eq!(BUCKET_AGENT_STATUS, "agent-status"); + assert_eq!(BUCKET_DEVICE_INFO, "device-info"); + assert_eq!(BUCKET_DEVICE_STATE, "device-state"); + assert_eq!(BUCKET_DEVICE_HEARTBEAT, "device-heartbeat"); + } + + #[test] + fn key_formats() { + assert_eq!(device_info_key("pi-01"), "info.pi-01"); + assert_eq!( + device_state_key("pi-01", &dn("hello-web")), + "state.pi-01.hello-web" + ); + assert_eq!(device_heartbeat_key("pi-01"), "heartbeat.pi-01"); } } diff --git a/harmony-reconciler-contracts/src/lib.rs b/harmony-reconciler-contracts/src/lib.rs index 24aeeae4..5127d0a8 100644 --- a/harmony-reconciler-contracts/src/lib.rs +++ b/harmony-reconciler-contracts/src/lib.rs @@ -3,28 +3,31 @@ //! Harmony's "reconciler" pattern is: a central **operator** writes //! desired state into NATS JetStream KV; a remote **agent** watches //! the KV, deserializes each entry as a Score, and drives the host -//! toward that state. This split lets one operator orchestrate a -//! fleet of agents across network boundaries it can't reach -//! directly — IoT devices today, OKD cluster agents or edge-compute -//! reconcilers tomorrow. +//! toward that state. The agent writes back per-device info and +//! per-deployment state into separate KV buckets; the operator reads +//! those to aggregate `.status.aggregate` onto the CR. //! //! This crate holds the wire-format bits both sides must agree on: -//! NATS bucket names, KV key formats, and the `AgentStatus` -//! heartbeat payload. The Score types themselves (`PodmanV0Score`, -//! future variants) live in their respective harmony modules — -//! consumers import them from there and serialize them over the -//! transport this crate describes. +//! NATS bucket names, KV key formats, and the typed payloads +//! (`DeviceInfo`, `DeploymentState`, `HeartbeatPayload`). The Score +//! types themselves live in their respective harmony modules. //! //! **Deliberately lean** — no tokio, no async-nats, no harmony. //! The on-device agent build pulls it in alongside a minimal //! async-nats client; the operator pulls it alongside kube-rs. -//! Neither should pay for the other's dependencies. +pub mod fleet; pub mod kv; pub mod status; -pub use kv::{BUCKET_AGENT_STATUS, BUCKET_DESIRED_STATE, desired_state_key, status_key}; -pub use status::AgentStatus; +pub use fleet::{ + DeploymentName, DeploymentState, DeviceInfo, HeartbeatPayload, InvalidDeploymentName, +}; +pub use kv::{ + BUCKET_DESIRED_STATE, BUCKET_DEVICE_HEARTBEAT, BUCKET_DEVICE_INFO, BUCKET_DEVICE_STATE, + desired_state_key, device_heartbeat_key, device_info_key, device_state_key, +}; +pub use status::{InventorySnapshot, Phase}; // Re-exports so consumers (agent, operator) don't need a direct // harmony_types dependency purely to name the cross-boundary types. diff --git a/harmony-reconciler-contracts/src/status.rs b/harmony-reconciler-contracts/src/status.rs index e57a1e53..1a406e0b 100644 --- a/harmony-reconciler-contracts/src/status.rs +++ b/harmony-reconciler-contracts/src/status.rs @@ -1,74 +1,37 @@ -//! Agent → NATS KV status payload. -//! -//! The agent publishes a heartbeat + rollup status to the -//! `agent-status` bucket every 30 s (see -//! [`crate::BUCKET_AGENT_STATUS`]). Today the payload is intentionally -//! minimal — a single `"running"` state + a timestamp — so the -//! operator can implement §12 v0.1 "Status aggregation in operator" -//! without waiting on richer per-workload reporting. -//! -//! When the agent grows richer status (per-container state, rollout -//! progress) this struct gains fields with `#[serde(default)]`; old -//! operators keep working against newer agents. +//! Shared status primitives reused across the fleet wire format. -use chrono::{DateTime, Utc}; -use harmony_types::id::Id; +use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -/// A single heartbeat published by the agent at -/// `status.` in the `agent-status` bucket. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -pub struct AgentStatus { - /// Echoed from the agent's own config so the operator can - /// cross-check which device it came from if the KV key is ever - /// ambiguous. Serializes transparently as a plain string. - pub device_id: Id, - /// Coarse rollup state. v0 only ever writes `"running"`; richer - /// variants are a v0.1+ concern. A String (not an enum) so old - /// operators parsing this payload don't fail on a new variant. - pub status: String, - /// RFC 3339 UTC timestamp. Used by the smoke test's reboot- - /// detection gate — any timestamp strictly greater than the gate - /// is evidence of a post-reboot write. `chrono::DateTime` - /// serde-serializes as RFC 3339, so the wire format stays - /// lex-comparable (the smoke's string `>` still works). - pub timestamp: DateTime, +/// Coarse state of a single reconcile on one device. +/// +/// Deliberately coarse — richer granularity (ImagePulling, +/// ContainerCreating, …) is agent-internal; the operator's +/// aggregation only needs success/failure/pending counts. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] +pub enum Phase { + /// Agent has applied the Score and the container is up. + Running, + /// Reconcile hit an error. See paired `last_error` for the message. + Failed, + /// Reconcile is in flight or waiting on an external dependency + /// (image pull, network, etc.). Agents may also report this + /// between a CR apply and the first reconcile tick. + Pending, } -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn status_roundtrip() { - let s = AgentStatus { - device_id: Id::from("pi-01".to_string()), - status: "running".to_string(), - timestamp: DateTime::parse_from_rfc3339("2026-04-21T18:15:42Z") - .unwrap() - .with_timezone(&Utc), - }; - let json = serde_json::to_string(&s).unwrap(); - let back: AgentStatus = serde_json::from_str(&json).unwrap(); - assert_eq!(s, back); - } - - #[test] - fn status_has_expected_wire_keys() { - let s = AgentStatus { - device_id: Id::from("pi-01".to_string()), - status: "running".to_string(), - timestamp: DateTime::parse_from_rfc3339("2026-04-21T18:15:42Z") - .unwrap() - .with_timezone(&Utc), - }; - let json = serde_json::to_string(&s).unwrap(); - // device_id must serialize as a flat string (not {"value": …}). - // Relies on `#[serde(transparent)]` on `harmony_types::id::Id`. - assert!(json.contains("\"device_id\":\"pi-01\""), "got {json}"); - assert!(json.contains("\"status\":\"running\"")); - // RFC 3339 output — the smoke script greps a `"timestamp":""` - // literal and compares lexicographically against a gate. - assert!(json.contains("\"timestamp\":\"2026-04-21T18:15:42Z\"")); - } +/// Static-ish facts about the device. Embedded in +/// [`crate::DeviceInfo`]; republished on change. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct InventorySnapshot { + pub hostname: String, + pub arch: String, + pub os: String, + pub kernel: String, + pub cpu_cores: u32, + pub memory_mb: u64, + /// Agent semver (e.g. `"0.1.0"`). Lets the operator flag + /// agents that are behind the current release. + pub agent_version: String, } diff --git a/harmony/src/domain/interpret/mod.rs b/harmony/src/domain/interpret/mod.rs index de9289e5..0f012aae 100644 --- a/harmony/src/domain/interpret/mod.rs +++ b/harmony/src/domain/interpret/mod.rs @@ -39,7 +39,7 @@ pub enum InterpretName { K8sIngress, PodmanV0, KvmVm, - IotDeviceSetup, + FleetDeviceSetup, } impl std::fmt::Display for InterpretName { @@ -75,7 +75,7 @@ impl std::fmt::Display for InterpretName { InterpretName::K8sIngress => f.write_str("K8sIngress"), InterpretName::PodmanV0 => f.write_str("PodmanV0"), InterpretName::KvmVm => f.write_str("KvmVm"), - InterpretName::IotDeviceSetup => f.write_str("IotDeviceSetup"), + InterpretName::FleetDeviceSetup => f.write_str("FleetDeviceSetup"), } } } diff --git a/harmony/src/domain/topology/host_configuration.rs b/harmony/src/domain/topology/host_configuration.rs index 0a8c6710..efbeb447 100644 --- a/harmony/src/domain/topology/host_configuration.rs +++ b/harmony/src/domain/topology/host_configuration.rs @@ -89,7 +89,7 @@ pub trait SystemdManager: Send + Sync { ) -> Result; /// Enable+start a user-scoped unit (e.g. `podman.socket` under - /// `iot-agent`). Assumes [`UnixUserManager::ensure_linger`] has + /// `fleet-agent`). Assumes [`UnixUserManager::ensure_linger`] has /// already been called for the user. async fn ensure_user_unit_active( &self, diff --git a/harmony/src/domain/topology/virtualization.rs b/harmony/src/domain/topology/virtualization.rs index c4b30ec2..11deecbf 100644 --- a/harmony/src/domain/topology/virtualization.rs +++ b/harmony/src/domain/topology/virtualization.rs @@ -119,6 +119,14 @@ pub struct VmFirstBootConfig { /// Public SSH keys (OpenSSH single-line format) to authorize for /// the admin user. pub authorized_keys: Vec, + /// Optional plaintext password for the admin user. When set, + /// the account is unlocked + SSH password auth is enabled on + /// the guest. Intended for interactive debugging / chaos + /// testing where the operator wants to log in and break things + /// manually. Leave `None` for production deployments — key-only + /// auth is the default. + #[serde(default)] + pub admin_password: Option, } /// Observed runtime info for a VM. diff --git a/harmony/src/modules/application/helm/mod.rs b/harmony/src/modules/application/helm/mod.rs index 6b73b087..6d2a9e07 100644 --- a/harmony/src/modules/application/helm/mod.rs +++ b/harmony/src/modules/application/helm/mod.rs @@ -2,10 +2,12 @@ pub use k8s_openapi::api::{ apps::v1::{Deployment, DeploymentSpec}, core::v1::{ - Container, ContainerPort, EnvVar, PodSpec, PodTemplateSpec, Service as K8sService, - ServicePort, ServiceSpec, + Container, ContainerPort, EnvVar, Namespace, PodSpec, PodTemplateSpec, + Service as K8sService, ServiceAccount, ServicePort, ServiceSpec, }, + rbac::v1::{ClusterRole, ClusterRoleBinding}, }; +pub use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition; use k8s_openapi::apimachinery::pkg::util::intstr::IntOrString; use kube::core::ObjectMeta; @@ -14,16 +16,36 @@ use crate::modules::application::config::{ApplicationNetworkPort, NetworkProtoco use std::fs; use std::path::{Path, PathBuf}; -/// Enum representing all supported Kubernetes resource types for Helm charts. -/// Supports built-in typed resources and custom CRDs via YAML strings. +/// A rendered Kubernetes resource ready to drop into a helm chart's +/// `templates/` directory. +/// +/// Each variant wraps a strongly-typed `k8s_openapi` struct — the chart +/// writer serializes via `serde_yaml` at package time, keeping the +/// `templates/` directory a pure data-transfer format (ADR 018 +/// template hydration). The `CustomYaml` escape hatch is here for +/// resources we haven't typed yet; **prefer adding a typed variant +/// over using it**. pub enum HelmResourceKind { - /// Built-in typed Service resource + /// `v1` Service (namespaced). Service(K8sService), - /// Built-in typed Deployment resource + /// `apps/v1` Deployment (namespaced). Deployment(Deployment), - /// Custom resource as pre-serialized YAML (e.g., CRDs, custom types) + /// `v1` Namespace (cluster-scoped). + Namespace(Namespace), + /// `v1` ServiceAccount (namespaced). + ServiceAccount(ServiceAccount), + /// `rbac.authorization.k8s.io/v1` ClusterRole (cluster-scoped). + ClusterRole(ClusterRole), + /// `rbac.authorization.k8s.io/v1` ClusterRoleBinding (cluster-scoped). + ClusterRoleBinding(ClusterRoleBinding), + /// `apiextensions.k8s.io/v1` CustomResourceDefinition + /// (cluster-scoped). Expected to be produced by + /// `kube::CustomResourceExt::crd()` on a derive-built type — + /// never hand-authored. + Crd(CustomResourceDefinition), + /// Escape hatch for resources without a typed variant yet. + /// Adding a typed variant above is always preferred. CustomYaml { filename: String, content: String }, - // Can add more typed variants as needed: ConfigMap, Secret, Ingress, etc. } impl HelmResourceKind { @@ -31,6 +53,23 @@ impl HelmResourceKind { match self { HelmResourceKind::Service(_) => "service.yaml".to_string(), HelmResourceKind::Deployment(_) => "deployment.yaml".to_string(), + HelmResourceKind::Namespace(_) => "namespace.yaml".to_string(), + HelmResourceKind::ServiceAccount(sa) => format!( + "serviceaccount-{}.yaml", + sa.metadata.name.as_deref().unwrap_or("unnamed") + ), + HelmResourceKind::ClusterRole(cr) => format!( + "clusterrole-{}.yaml", + cr.metadata.name.as_deref().unwrap_or("unnamed") + ), + HelmResourceKind::ClusterRoleBinding(crb) => format!( + "clusterrolebinding-{}.yaml", + crb.metadata.name.as_deref().unwrap_or("unnamed") + ), + HelmResourceKind::Crd(c) => format!( + "crd-{}.yaml", + c.metadata.name.as_deref().unwrap_or("unnamed") + ), HelmResourceKind::CustomYaml { filename, .. } => filename.clone(), } } @@ -39,6 +78,11 @@ impl HelmResourceKind { match self { HelmResourceKind::Service(s) => serde_yaml::to_string(s), HelmResourceKind::Deployment(d) => serde_yaml::to_string(d), + HelmResourceKind::Namespace(n) => serde_yaml::to_string(n), + HelmResourceKind::ServiceAccount(sa) => serde_yaml::to_string(sa), + HelmResourceKind::ClusterRole(cr) => serde_yaml::to_string(cr), + HelmResourceKind::ClusterRoleBinding(crb) => serde_yaml::to_string(crb), + HelmResourceKind::Crd(c) => serde_yaml::to_string(c), HelmResourceKind::CustomYaml { content, .. } => Ok(content.clone()), } } @@ -65,7 +109,8 @@ impl HelmResourceKind { } } - /// Add a custom resource from any type that implements Serialize + /// Add a custom resource from any type that implements Serialize. + /// Prefer a typed variant constructor over this where one exists. pub fn from_serializable( filename: impl Into, resource: &T, @@ -444,3 +489,85 @@ pub fn create_service_from_ports( ..Default::default() }) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn typed_variants_have_unique_filenames() { + let ns = Namespace { + metadata: ObjectMeta { + name: Some("fleet-system".to_string()), + ..Default::default() + }, + ..Default::default() + }; + let sa = ServiceAccount { + metadata: ObjectMeta { + name: Some("harmony-fleet-operator".to_string()), + namespace: Some("fleet-system".to_string()), + ..Default::default() + }, + ..Default::default() + }; + let cr = ClusterRole { + metadata: ObjectMeta { + name: Some("harmony-fleet-operator".to_string()), + ..Default::default() + }, + rules: None, + ..Default::default() + }; + let crb = ClusterRoleBinding { + metadata: ObjectMeta { + name: Some("harmony-fleet-operator".to_string()), + ..Default::default() + }, + role_ref: k8s_openapi::api::rbac::v1::RoleRef { + api_group: "rbac.authorization.k8s.io".to_string(), + kind: "ClusterRole".to_string(), + name: "harmony-fleet-operator".to_string(), + }, + subjects: None, + }; + let crd = CustomResourceDefinition { + metadata: ObjectMeta { + name: Some("widgets.example.io".to_string()), + ..Default::default() + }, + ..Default::default() + }; + let resources = [ + HelmResourceKind::Namespace(ns), + HelmResourceKind::ServiceAccount(sa), + HelmResourceKind::ClusterRole(cr), + HelmResourceKind::ClusterRoleBinding(crb), + HelmResourceKind::Crd(crd), + ]; + let mut seen = std::collections::HashSet::new(); + for r in &resources { + let f = r.filename(); + assert!(seen.insert(f.clone()), "duplicate filename {f}"); + // Make sure it serializes cleanly — catches any missing + // arm in `serialize_to_yaml`. + let yaml = r.serialize_to_yaml().expect("serialize"); + assert!(!yaml.is_empty()); + } + } + + #[test] + fn crd_filename_carries_crd_name() { + let crd = CustomResourceDefinition { + metadata: ObjectMeta { + name: Some("deployments.fleet.nationtech.io".to_string()), + ..Default::default() + }, + ..Default::default() + }; + assert_eq!( + HelmResourceKind::Crd(crd).filename(), + "crd-deployments.fleet.nationtech.io.yaml" + ); + } +} diff --git a/harmony/src/modules/iot/assets.rs b/harmony/src/modules/fleet/assets.rs similarity index 93% rename from harmony/src/modules/iot/assets.rs rename to harmony/src/modules/fleet/assets.rs index dcbe1bf9..49900a10 100644 --- a/harmony/src/modules/iot/assets.rs +++ b/harmony/src/modules/fleet/assets.rs @@ -1,7 +1,7 @@ //! Bootstrapped assets shared across IoT workflows. //! //! Everything here follows the `ensure_*` pattern — idempotent, caches -//! results under [`HARMONY_DATA_DIR`]`/iot/…`, and runs at most once per +//! results under [`HARMONY_DATA_DIR`]`/fleet/…`, and runs at most once per //! process (enforced by a `tokio::sync::OnceCell`). The goal is that an //! operator can run the IoT smoke test against a freshly-installed host //! with nothing but `libvirt + qemu + xorriso + python3 + cargo + @@ -127,7 +127,7 @@ async fn ensure_cloud_image( return Err(exec(format!( "downloaded image sha256 mismatch: expected {expected_sha256}, got {actual}. \ Ubuntu may have rotated the 'current release' pointer — bump the pin in \ - modules::iot::assets.rs." + modules::fleet::assets.rs." ))); } // World-readable so libvirt-qemu can open it without a chmod ritual. @@ -195,7 +195,7 @@ async fn sha256_of_file(path: &Path) -> Result { } fn cloud_images_dir() -> PathBuf { - HARMONY_DATA_DIR.join("iot").join("cloud-images") + HARMONY_DATA_DIR.join("fleet").join("cloud-images") } // --------------------------------------------------------------------- @@ -206,20 +206,20 @@ fn cloud_images_dir() -> PathBuf { /// same key identifies every VM we provision for smoke/integration /// testing — cheap to reuse, easy to discard (just `rm -rf` the dir). #[derive(Debug, Clone)] -pub struct IotSshKeypair { +pub struct FleetSshKeypair { pub private_key: PathBuf, pub public_key: PathBuf, } -/// Ensure `$HARMONY_DATA_DIR/iot/ssh/id_ed25519[.pub]` exists. Runs +/// Ensure `$HARMONY_DATA_DIR/fleet/ssh/id_ed25519[.pub]` exists. Runs /// `ssh-keygen` once; subsequent calls return the existing paths. -pub async fn ensure_iot_ssh_keypair() -> Result { - static CELL: OnceCell = OnceCell::const_new(); +pub async fn ensure_fleet_ssh_keypair() -> Result { + static CELL: OnceCell = OnceCell::const_new(); CELL.get_or_try_init(provision_ssh_keypair).await.cloned() } -async fn provision_ssh_keypair() -> Result { - let dir = HARMONY_DATA_DIR.join("iot").join("ssh"); +async fn provision_ssh_keypair() -> Result { + let dir = HARMONY_DATA_DIR.join("fleet").join("ssh"); tokio::fs::create_dir_all(&dir) .await .map_err(|e| exec(format!("create ssh dir {dir:?}: {e}")))?; @@ -231,7 +231,7 @@ async fn provision_ssh_keypair() -> Result { let pub_path = dir.join("id_ed25519.pub"); if priv_path.exists() && pub_path.exists() { info!("ssh keypair cache hit at {priv_path:?}"); - return Ok(IotSshKeypair { + return Ok(FleetSshKeypair { private_key: priv_path, public_key: pub_path, }); @@ -248,7 +248,7 @@ async fn provision_ssh_keypair() -> Result { "-N", "", // no passphrase "-C", - "harmony-iot-smoke", + "harmony-fleet-smoke", "-f", ]) .arg(&priv_path) // PathBuf — kept separate so we don't force &str conversion @@ -263,7 +263,7 @@ async fn provision_ssh_keypair() -> Result { String::from_utf8_lossy(&status.stderr).trim() ))); } - Ok(IotSshKeypair { + Ok(FleetSshKeypair { private_key: priv_path, public_key: pub_path, }) @@ -271,7 +271,7 @@ async fn provision_ssh_keypair() -> Result { /// Read the generated public key (one line, openssh format) into a string /// suitable for cloud-init's `authorized_keys`. -pub async fn read_public_key(kp: &IotSshKeypair) -> Result { +pub async fn read_public_key(kp: &FleetSshKeypair) -> Result { let content = tokio::fs::read_to_string(&kp.public_key) .await .map_err(|e| exec(format!("read {:?}: {e}", kp.public_key)))?; diff --git a/harmony/src/modules/iot/libvirt_pool.rs b/harmony/src/modules/fleet/libvirt_pool.rs similarity index 86% rename from harmony/src/modules/iot/libvirt_pool.rs rename to harmony/src/modules/fleet/libvirt_pool.rs index e893d6b0..9df29bd5 100644 --- a/harmony/src/modules/iot/libvirt_pool.rs +++ b/harmony/src/modules/fleet/libvirt_pool.rs @@ -4,14 +4,14 @@ //! writable place to drop per-VM overlay disks + cloud-init seed ISOs. //! Rather than ask the operator to set that up, we create a user- //! owned dir-backed libvirt pool at -//! `$HARMONY_DATA_DIR/iot/kvm/pool/` and let libvirt handle: +//! `$HARMONY_DATA_DIR/fleet/kvm/pool/` and let libvirt handle: //! //! - **Perms**: dir contents get chowned to libvirt-qemu on VM start //! via dynamic-ownership (default-on), and back to us on VM stop //! (via remember_owner, also default-on). No `chmod 644` gymnastics. -//! - **Visibility**: `virsh vol-list harmony-iot` shows every +//! - **Visibility**: `virsh vol-list harmony-fleet` shows every //! artifact we've created. -//! - **Cleanup**: `virsh vol-delete harmony-iot` removes +//! - **Cleanup**: `virsh vol-delete harmony-fleet` removes //! managed volumes alongside `virsh undefine --remove-all-storage`. //! //! We *don't* rewrite the VM XML to use `` @@ -30,11 +30,11 @@ use virt::storage_pool::StoragePool; use crate::domain::config::HARMONY_DATA_DIR; use crate::executors::ExecutorError; -pub const HARMONY_IOT_POOL_NAME: &str = "harmony-iot"; +pub const HARMONY_FLEET_POOL_NAME: &str = "harmony-fleet"; /// Filesystem path + libvirt name of the managed pool. #[derive(Debug, Clone)] -pub struct HarmonyIotPool { +pub struct HarmonyFleetPool { pub name: String, pub path: PathBuf, } @@ -46,13 +46,13 @@ pub struct HarmonyIotPool { /// **Requires libvirt-group membership**. When the user isn't in the /// group, libvirt rejects the `qemu:///system` connection — the /// preflight check catches that upstream. -pub async fn ensure_harmony_iot_pool() -> Result { - static CELL: OnceCell = OnceCell::const_new(); +pub async fn ensure_harmony_fleet_pool() -> Result { + static CELL: OnceCell = OnceCell::const_new(); CELL.get_or_try_init(provision_pool).await.cloned() } -async fn provision_pool() -> Result { - let pool_dir = HARMONY_DATA_DIR.join("iot").join("kvm").join("pool"); +async fn provision_pool() -> Result { + let pool_dir = HARMONY_DATA_DIR.join("fleet").join("kvm").join("pool"); tokio::fs::create_dir_all(&pool_dir) .await .map_err(|e| exec(format!("create pool dir {pool_dir:?}: {e}")))?; @@ -66,7 +66,7 @@ async fn provision_pool() -> Result { .map_err(|e| exec(format!("chmod pool dir: {e}")))?; let pool_path = pool_dir.clone(); - let pool_name = HARMONY_IOT_POOL_NAME.to_string(); + let pool_name = HARMONY_FLEET_POOL_NAME.to_string(); // virt-rs is blocking C bindings — bounce into spawn_blocking. let pool_name_blocking = pool_name.clone(); @@ -106,7 +106,7 @@ async fn provision_pool() -> Result { .await .map_err(|e| exec(format!("spawn_blocking pool setup: {e}")))??; - Ok(HarmonyIotPool { + Ok(HarmonyFleetPool { name: pool_name, path: pool_path, }) diff --git a/harmony/src/modules/fleet/mod.rs b/harmony/src/modules/fleet/mod.rs new file mode 100644 index 00000000..2e42849d --- /dev/null +++ b/harmony/src/modules/fleet/mod.rs @@ -0,0 +1,40 @@ +//! Harmony-side Scores for fleet device onboarding. +//! +//! Today this module exposes [`FleetDeviceSetupScore`] — a customer +//! runs it against a freshly-booted device (Pi, VM, bare-metal node +//! later) to install podman, place the `fleet-agent` binary, drop +//! the TOML config, and bring up the agent under systemd. Re-running +//! with a changed config (different labels, new NATS URL, new +//! credentials) is how a device is moved between fleet partitions. +//! +//! The operator + agent crates live outside `harmony/` under +//! `fleet/harmony-fleet-operator/` and `fleet/harmony-fleet-agent/`. +//! What belongs here is the harmony-framework side: the Scores a +//! customer runs through `harmony_cli::run` to provision devices +//! before they ever talk to NATS. +//! +//! "Fleet" is deliberately domain-agnostic — IoT was the first +//! customer's use case but the reconciler pattern (operator → NATS +//! KV → agent → target) applies equally to Pi podman, OKD apply, +//! KVM VMs, etc. + +pub mod assets; +#[cfg(feature = "kvm")] +pub mod libvirt_pool; +pub mod preflight; +mod setup_score; +#[cfg(feature = "kvm")] +mod vm_score; + +pub use assets::{ + FleetSshKeypair, UBUNTU_2404_CLOUDIMG_ARM64_FILENAME, UBUNTU_2404_CLOUDIMG_ARM64_SHA256, + UBUNTU_2404_CLOUDIMG_ARM64_URL, UBUNTU_2404_CLOUDIMG_FILENAME, UBUNTU_2404_CLOUDIMG_SHA256, + UBUNTU_2404_CLOUDIMG_URL, ensure_fleet_ssh_keypair, ensure_ubuntu_2404_cloud_image, + ensure_ubuntu_2404_cloud_image_for_arch, read_public_key, +}; +#[cfg(feature = "kvm")] +pub use libvirt_pool::{HARMONY_FLEET_POOL_NAME, HarmonyFleetPool, ensure_harmony_fleet_pool}; +pub use preflight::{check_fleet_smoke_preflight, check_fleet_smoke_preflight_for_arch}; +pub use setup_score::{FleetDeviceSetupConfig, FleetDeviceSetupScore}; +#[cfg(feature = "kvm")] +pub use vm_score::ProvisionVmScore; diff --git a/harmony/src/modules/iot/preflight.rs b/harmony/src/modules/fleet/preflight.rs similarity index 95% rename from harmony/src/modules/iot/preflight.rs rename to harmony/src/modules/fleet/preflight.rs index f15b4750..93b08f81 100644 --- a/harmony/src/modules/iot/preflight.rs +++ b/harmony/src/modules/fleet/preflight.rs @@ -19,18 +19,20 @@ use crate::executors::ExecutorError; use crate::modules::kvm::firmware::discover_aarch64_firmware; /// Run every preflight check for an x86_64 smoke run — equivalent -/// to [`check_iot_smoke_preflight_for_arch`] with +/// to [`check_fleet_smoke_preflight_for_arch`] with /// [`VmArchitecture::X86_64`]. Kept as a distinct function so /// existing callers don't need to thread an arch through yet. -pub async fn check_iot_smoke_preflight() -> Result<(), ExecutorError> { - check_iot_smoke_preflight_for_arch(VmArchitecture::X86_64).await +pub async fn check_fleet_smoke_preflight() -> Result<(), ExecutorError> { + check_fleet_smoke_preflight_for_arch(VmArchitecture::X86_64).await } /// Arch-aware preflight. On top of the host-generic checks /// (virsh, qemu-img, xorriso, python3, ssh-keygen, libvirt group, /// default network), an aarch64 target requires /// `qemu-system-aarch64` and a usable AAVMF firmware pair. -pub async fn check_iot_smoke_preflight_for_arch(arch: VmArchitecture) -> Result<(), ExecutorError> { +pub async fn check_fleet_smoke_preflight_for_arch( + arch: VmArchitecture, +) -> Result<(), ExecutorError> { check_tool_on_path("virsh", "libvirt client").await?; check_tool_on_path("qemu-img", "qemu-utils").await?; check_tool_on_path("xorriso", "ISO image builder").await?; diff --git a/harmony/src/modules/iot/setup_score.rs b/harmony/src/modules/fleet/setup_score.rs similarity index 53% rename from harmony/src/modules/iot/setup_score.rs rename to harmony/src/modules/fleet/setup_score.rs index 9f59cf70..35ee960a 100644 --- a/harmony/src/modules/iot/setup_score.rs +++ b/harmony/src/modules/fleet/setup_score.rs @@ -1,8 +1,9 @@ -//! [`IotDeviceSetupScore`] — install podman + the iot-agent, wire the +//! [`FleetDeviceSetupScore`] — install podman + the fleet-agent, wire the //! agent's TOML config, enable the systemd unit. Idempotent: re-running -//! with a changed config (e.g. a different `group`) updates only what -//! differs and restarts the agent once. +//! with a changed config (different labels, new NATS url, etc.) updates +//! only what differs and restarts the agent once. +use std::collections::BTreeMap; use std::path::PathBuf; use async_trait::async_trait; @@ -25,43 +26,46 @@ use crate::score::Score; /// User-visible configuration for the setup Score. Everything a customer /// needs to tell us to bring a device into the fleet. /// -/// **On `group`.** For v0 the group is a *label*, written into the -/// agent's TOML config and reported back via the status bucket. It does -/// not yet drive deployment routing — `Deployment.spec.targetDevices` -/// still takes explicit device IDs. `targetGroups` is a v0.1+ item -/// (ROADMAP §6.5). Running this Score twice against the same device -/// with different `group` values is how a device is moved between -/// fleet partitions once group routing lands. +/// **On `labels`.** The label map is published verbatim in every +/// DeviceInfo heartbeat so the operator can resolve a Deployment's +/// `spec.targetSelector` against this device (K8s-Node-analogue flow). +/// `group` is the conventional primary label but any key/value pair +/// is legal. Re-running this Score with a changed label map is how a +/// device is moved between fleet partitions: the config file is +/// regenerated, byte-compare idempotency fires, the agent restarts, +/// new labels propagate. #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct IotDeviceSetupConfig { +pub struct FleetDeviceSetupConfig { /// Stable device identifier. Written into the agent's TOML and /// used as the KV key prefix (`.`). Harmony /// `Id` values are sortable-by-creation-time and collision-safe /// at up to ~10k devices/sec, which matches the feel of a fleet /// registry. pub device_id: Id, - /// Fleet partition this device belongs to. - pub group: String, + /// Routing labels. Published in every DeviceInfo heartbeat; the + /// operator reflects them into `Device.metadata.labels` so + /// Deployment selectors can match. Typical keys: `group`, + /// `arch`, `role`, `region`. + pub labels: BTreeMap, /// NATS URLs the agent should connect to. Typically one entry. pub nats_urls: Vec, /// Shared v0 credentials (Zitadel-issued per-device tokens in v0.2). pub nats_user: String, pub nats_pass: String, - /// Local filesystem path to the cross-compiled `iot-agent-v0` + /// Local filesystem path to the cross-compiled `fleet-agent-v0` /// binary. The Score uploads it to the device and installs to - /// `/usr/local/bin/iot-agent`. Future v0.1: this becomes a + /// `/usr/local/bin/fleet-agent`. Future v0.1: this becomes a /// `DownloadableAsset` pointing at CI-published artifacts. pub agent_binary_path: PathBuf, } -impl IotDeviceSetupConfig { - /// Render the agent's `/etc/iot-agent/config.toml` content. +impl FleetDeviceSetupConfig { + /// Render the agent's `/etc/fleet-agent/config.toml` content. pub fn render_toml(&self) -> String { // Raw-string template with format! — the TOML escape rules for // double-quoted strings are just `\` and `"`, handled by // [`toml_escape`]. let device_id = toml_escape(&self.device_id.to_string()); - let group = toml_escape(&self.group); let nats_user = toml_escape(&self.nats_user); let nats_pass = toml_escape(&self.nats_pass); let urls = self @@ -70,10 +74,18 @@ impl IotDeviceSetupConfig { .map(|u| format!("\"{}\"", toml_escape(u))) .collect::>() .join(", "); + // BTreeMap iteration is ordered — same labels render to + // byte-identical TOML across runs, which is what the + // Score's byte-compare idempotency relies on. + let labels = self + .labels + .iter() + .map(|(k, v)| format!("{} = \"{}\"", toml_escape(k), toml_escape(v))) + .collect::>() + .join("\n"); format!( r#"[agent] device_id = "{device_id}" -group = "{group}" [credentials] type = "toml-shared" @@ -82,6 +94,9 @@ nats_pass = "{nats_pass}" [nats] urls = [{urls}] + +[labels] +{labels} "# ) } @@ -95,10 +110,10 @@ Wants=network-online.target [Service] Type=simple -User=iot-agent -Environment=IOT_AGENT_CONFIG=/etc/iot-agent/config.toml +User=fleet-agent +Environment=FLEET_AGENT_CONFIG=/etc/fleet-agent/config.toml Environment=RUST_LOG=info -ExecStart=/usr/local/bin/iot-agent +ExecStart=/usr/local/bin/fleet-agent Restart=on-failure RestartSec=5 StandardOutput=journal @@ -115,23 +130,23 @@ fn toml_escape(s: &str) -> String { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct IotDeviceSetupScore { - pub config: IotDeviceSetupConfig, +pub struct FleetDeviceSetupScore { + pub config: FleetDeviceSetupConfig, } -impl IotDeviceSetupScore { - pub fn new(config: IotDeviceSetupConfig) -> Self { +impl FleetDeviceSetupScore { + pub fn new(config: FleetDeviceSetupConfig) -> Self { Self { config } } } -impl Score for IotDeviceSetupScore { +impl Score for FleetDeviceSetupScore { fn name(&self) -> String { - format!("IotDeviceSetupScore({})", self.config.device_id) + format!("FleetDeviceSetupScore({})", self.config.device_id) } fn create_interpret(&self) -> Box> { - Box::new(IotDeviceSetupInterpret { + Box::new(FleetDeviceSetupInterpret { config: self.config.clone(), version: Version::from("0.1.0").expect("static version"), status: InterpretStatus::QUEUED, @@ -140,16 +155,16 @@ impl Score for IotDeviceSetupScore { } #[derive(Debug)] -struct IotDeviceSetupInterpret { - config: IotDeviceSetupConfig, +struct FleetDeviceSetupInterpret { + config: FleetDeviceSetupConfig, version: Version, status: InterpretStatus, } #[async_trait] -impl Interpret for IotDeviceSetupInterpret { +impl Interpret for FleetDeviceSetupInterpret { fn get_name(&self) -> InterpretName { - InterpretName::IotDeviceSetup + InterpretName::FleetDeviceSetup } fn get_version(&self) -> Version { self.version.clone() @@ -179,33 +194,38 @@ impl Interpret for IotDeviceSetupInterp log_change(&mut change_log, format!("package:{pkg}"), r); } - // 2. iot-agent system user. Lingered so its user-systemd survives - // logout (needed for the user podman.socket we'll enable below). - // No explicit primary group — useradd on Debian-family systems - // defaults to `USERGROUPS_ENAB yes` which auto-creates a group - // matching the username. Setting `group:` here would require a - // separate `ensure_group` step to pre-create it. + // 2. fleet-agent user. Not `--system`: Ubuntu's useradd skips + // subuid/subgid auto-allocation for system users on the + // assumption that service accounts don't run user namespaces. + // Rootless podman needs those ranges in /etc/subuid + + // /etc/subgid before the container runtime ever starts. A + // regular useradd auto-allocates a non-overlapping range, so + // we get correct behavior for free and can coexist with any + // other user on the host that also runs rootless containers. + // + // Lingered so the user-systemd instance survives logout — + // required for the user podman.socket we enable below. let user_spec = UserSpec { - name: "iot-agent".to_string(), + name: "fleet-agent".to_string(), group: None, supplementary_groups: vec![], shell: Some("/bin/bash".to_string()), - system: true, + system: false, create_home: true, }; let r = UnixUserManager::ensure_user(topology, &user_spec) .await .map_err(wrap)?; - log_change(&mut change_log, "user:iot-agent", r); + log_change(&mut change_log, "user:fleet-agent", r); - let r = UnixUserManager::ensure_linger(topology, "iot-agent") + let r = UnixUserManager::ensure_linger(topology, "fleet-agent") .await .map_err(wrap)?; - log_change(&mut change_log, "linger:iot-agent", r); + log_change(&mut change_log, "linger:fleet-agent", r); // 3. User-scoped podman socket. Required by `PodmanTopology` on // the agent so it reaches /run/user//podman/podman.sock. - let r = SystemdManager::ensure_user_unit_active(topology, "iot-agent", "podman.socket") + let r = SystemdManager::ensure_user_unit_active(topology, "fleet-agent", "podman.socket") .await .map_err(wrap)?; log_change(&mut change_log, "user-unit:podman.socket", r); @@ -218,7 +238,7 @@ impl Interpret for IotDeviceSetupInterp let binary_r = FileDelivery::ensure_file( topology, &FileSpec { - path: "/usr/local/bin/iot-agent".to_string(), + path: "/usr/local/bin/fleet-agent".to_string(), source: FileSource::LocalPath(cfg.agent_binary_path.clone()), owner: Some("root".to_string()), group: Some("root".to_string()), @@ -227,25 +247,25 @@ impl Interpret for IotDeviceSetupInterp ) .await .map_err(wrap)?; - log_change(&mut change_log, "file:/usr/local/bin/iot-agent", binary_r); + log_change(&mut change_log, "file:/usr/local/bin/fleet-agent", binary_r); - // 5. /etc/iot-agent/ + config.toml + // 5. /etc/fleet-agent/ + config.toml let config_toml = cfg.render_toml(); let toml_spec = FileSpec { - path: "/etc/iot-agent/config.toml".to_string(), + path: "/etc/fleet-agent/config.toml".to_string(), source: FileSource::Content(config_toml), - owner: Some("iot-agent".to_string()), - group: Some("iot-agent".to_string()), + owner: Some("fleet-agent".to_string()), + group: Some("fleet-agent".to_string()), mode: Some(0o600), }; let toml_r = FileDelivery::ensure_file(topology, &toml_spec) .await .map_err(wrap)?; - log_change(&mut change_log, "file:/etc/iot-agent/config.toml", toml_r); + log_change(&mut change_log, "file:/etc/fleet-agent/config.toml", toml_r); // 6. systemd unit for the agent itself. let unit = SystemdUnitSpec { - name: "iot-agent".to_string(), + name: "fleet-agent".to_string(), unit_content: cfg.render_systemd_unit().to_string(), scope: SystemdScope::System, start_immediately: true, @@ -253,18 +273,18 @@ impl Interpret for IotDeviceSetupInterp let unit_r = SystemdManager::ensure_systemd_unit(topology, &unit) .await .map_err(wrap)?; - log_change(&mut change_log, "unit:iot-agent", unit_r); + log_change(&mut change_log, "unit:fleet-agent", unit_r); // 7. Restart the agent iff anything that affects it changed. let needs_restart = toml_r.changed || unit_r.changed || binary_r.changed; if needs_restart { - SystemdManager::restart_service(topology, "iot-agent", SystemdScope::System) + SystemdManager::restart_service(topology, "fleet-agent", SystemdScope::System) .await .map_err(wrap)?; - change_log.push("restart:iot-agent".to_string()); - info!("iot-agent restarted to pick up config/unit change"); + change_log.push("restart:fleet-agent".to_string()); + info!("fleet-agent restarted to pick up config/unit change"); } else { - debug!("iot-agent config + unit unchanged; no restart"); + debug!("fleet-agent config + unit unchanged; no restart"); } let outcome = if change_log.is_empty() { @@ -292,3 +312,55 @@ fn log_change(change_log: &mut Vec, what: impl Into, r: ChangeRe change_log.push(what.into()); } } + +#[cfg(test)] +mod tests { + use super::*; + + fn base_config(labels: BTreeMap) -> FleetDeviceSetupConfig { + FleetDeviceSetupConfig { + device_id: Id::from("pi-42".to_string()), + labels, + nats_urls: vec!["nats://nats:4222".to_string()], + nats_user: "admin".to_string(), + nats_pass: "pw".to_string(), + agent_binary_path: PathBuf::from("/dev/null"), + } + } + + #[test] + fn render_toml_includes_labels_section() { + let mut labels = BTreeMap::new(); + labels.insert("group".to_string(), "site-a".to_string()); + labels.insert("arch".to_string(), "aarch64".to_string()); + let toml = base_config(labels).render_toml(); + assert!(toml.contains("[labels]")); + // BTreeMap sorts keys: `arch` before `group`. + let labels_block = toml.split("[labels]").nth(1).unwrap(); + let arch_idx = labels_block.find("arch").unwrap(); + let group_idx = labels_block.find("group").unwrap(); + assert!(arch_idx < group_idx, "labels must render sorted"); + assert!(labels_block.contains(r#"arch = "aarch64""#)); + assert!(labels_block.contains(r#"group = "site-a""#)); + } + + #[test] + fn render_toml_same_labels_yields_identical_output() { + // Core idempotency invariant: two structurally-identical + // configs render byte-identical TOML. The Score's change + // detection relies on this. + let mut labels = BTreeMap::new(); + labels.insert("group".to_string(), "site-a".to_string()); + let a = base_config(labels.clone()).render_toml(); + let b = base_config(labels).render_toml(); + assert_eq!(a, b); + } + + #[test] + fn render_toml_escapes_label_values() { + let mut labels = BTreeMap::new(); + labels.insert("group".to_string(), r#"has"quote"#.to_string()); + let toml = base_config(labels).render_toml(); + assert!(toml.contains(r#"group = "has\"quote""#)); + } +} diff --git a/harmony/src/modules/iot/vm_score.rs b/harmony/src/modules/fleet/vm_score.rs similarity index 100% rename from harmony/src/modules/iot/vm_score.rs rename to harmony/src/modules/fleet/vm_score.rs diff --git a/harmony/src/modules/iot/mod.rs b/harmony/src/modules/iot/mod.rs deleted file mode 100644 index 23ec2987..00000000 --- a/harmony/src/modules/iot/mod.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! IoT fleet primitives exposed to customers. -//! -//! Right now that's the single [`IotDeviceSetupScore`] — a customer runs -//! it against a freshly-booted device (Pi or VM) to install podman, -//! place the iot-agent binary, drop the TOML config, and bring up the -//! agent under systemd. Re-running with a different config (e.g. -//! different `group`) is what moves a device between fleet partitions. -//! -//! The operator + agent crates live outside of `harmony/` in `iot/`. -//! This module is where *Harmony Scores* that target IoT fleets live — -//! they run inside the Harmony framework proper, driven by the same -//! `harmony_cli::run` story every other Score uses. - -pub mod assets; -#[cfg(feature = "kvm")] -pub mod libvirt_pool; -pub mod preflight; -mod setup_score; -#[cfg(feature = "kvm")] -mod vm_score; - -pub use assets::{ - IotSshKeypair, UBUNTU_2404_CLOUDIMG_ARM64_FILENAME, UBUNTU_2404_CLOUDIMG_ARM64_SHA256, - UBUNTU_2404_CLOUDIMG_ARM64_URL, UBUNTU_2404_CLOUDIMG_FILENAME, UBUNTU_2404_CLOUDIMG_SHA256, - UBUNTU_2404_CLOUDIMG_URL, ensure_iot_ssh_keypair, ensure_ubuntu_2404_cloud_image, - ensure_ubuntu_2404_cloud_image_for_arch, read_public_key, -}; -#[cfg(feature = "kvm")] -pub use libvirt_pool::{HARMONY_IOT_POOL_NAME, HarmonyIotPool, ensure_harmony_iot_pool}; -pub use preflight::{check_iot_smoke_preflight, check_iot_smoke_preflight_for_arch}; -pub use setup_score::{IotDeviceSetupConfig, IotDeviceSetupScore}; -#[cfg(feature = "kvm")] -pub use vm_score::ProvisionVmScore; diff --git a/harmony/src/modules/k8s/bare_topology.rs b/harmony/src/modules/k8s/bare_topology.rs new file mode 100644 index 00000000..e6e9c58d --- /dev/null +++ b/harmony/src/modules/k8s/bare_topology.rs @@ -0,0 +1,98 @@ +//! Minimal Kubernetes topology for ad-hoc Score execution. +//! +//! Harmony's opinionated topologies (`K8sAnywhereTopology`, +//! `HAClusterTopology`) do a lot of product-level setup inside +//! `ensure_ready` — cert-manager install, tenant-manager bootstrap, +//! helm probes, TLS routing. That's appropriate when a caller is +//! standing up an entire NationTech-style product stack. It is +//! **not** appropriate when a caller just wants to apply a typed +//! resource (a CRD, a Deployment, a Secret, …) against an existing +//! Kubernetes cluster. +//! +//! `K8sBareTopology` is what that narrow use case needs: it carries +//! a single [`K8sClient`], implements [`K8sclient`] by handing it +//! out, and its `ensure_ready` is a noop. No helm, no certs, no +//! tenant-manager, no PLEG. Compose it with whichever +//! `K8sResourceScore` / domain score needs a cluster client and +//! nothing more. +//! +//! History: this type is the promotion of a three-dozen-line +//! `InstallTopology` that lived inside `harmony-fleet-operator`'s +//! `install.rs`. When the NATS single-node install work added a +//! second consumer wanting the same shape, the extraction became +//! obvious (see ROADMAP/12-code-review-april-2026.md §12.6). + +use std::process::Command; +use std::sync::Arc; + +use async_trait::async_trait; +use harmony_k8s::K8sClient; + +use crate::domain::topology::{HelmCommand, PreparationError, PreparationOutcome, Topology}; +use crate::topology::K8sclient; + +/// Minimal `Topology` that only knows how to hand out a pre-built +/// `K8sClient`. Use for Scores that need `K8sclient` but nothing +/// else from their topology. +/// +/// Construct via [`K8sBareTopology::from_kubeconfig`] or +/// [`K8sBareTopology::from_client`]. +#[derive(Clone)] +pub struct K8sBareTopology { + name: String, + client: Arc, +} + +impl K8sBareTopology { + /// Wrap a pre-built `K8sClient`. Caller is responsible for + /// having loaded it from the right place (KUBECONFIG, explicit + /// path, in-cluster service account, …). + pub fn from_client(name: impl Into, client: Arc) -> Self { + Self { + name: name.into(), + client, + } + } + + /// Build a client from the standard kube client config + /// resolution (`KUBECONFIG` env var → `~/.kube/config` → + /// in-cluster service account, in that order). + pub async fn from_kubeconfig(name: impl Into) -> Result { + let kube_client = kube::Client::try_default() + .await + .map_err(|e| format!("building kube client: {e}"))?; + Ok(Self::from_client( + name, + Arc::new(K8sClient::new(kube_client)), + )) + } +} + +#[async_trait] +impl Topology for K8sBareTopology { + fn name(&self) -> &str { + &self.name + } + + async fn ensure_ready(&self) -> Result { + Ok(PreparationOutcome::Noop) + } +} + +#[async_trait] +impl K8sclient for K8sBareTopology { + async fn k8s_client(&self) -> Result, String> { + Ok(self.client.clone()) + } +} + +/// Run the host's `helm` binary with whatever KUBECONFIG resolution +/// was used to build the `K8sBareTopology`. No extra context / ns +/// args — callers pass those on the command line. Lets NATS + +/// operator-install flows go through `HelmChartScore` against the +/// same cluster the bare topology already targets. +impl HelmCommand for K8sBareTopology { + fn get_helm_command(&self) -> Command { + Command::new("helm") + } +} diff --git a/harmony/src/modules/k8s/mod.rs b/harmony/src/modules/k8s/mod.rs index a6aa47b0..03882d37 100644 --- a/harmony/src/modules/k8s/mod.rs +++ b/harmony/src/modules/k8s/mod.rs @@ -1,7 +1,10 @@ pub mod apps; +pub mod bare_topology; pub mod coredns; pub mod deployment; mod failover; pub mod ingress; pub mod namespace; pub mod resource; + +pub use bare_topology::K8sBareTopology; diff --git a/harmony/src/modules/kvm/cloudinit.rs b/harmony/src/modules/kvm/cloudinit.rs index 4b1031fe..496514ba 100644 --- a/harmony/src/modules/kvm/cloudinit.rs +++ b/harmony/src/modules/kvm/cloudinit.rs @@ -48,6 +48,13 @@ pub struct CloudInitSeedConfig<'a> { pub authorized_key: &'a str, /// Local username to create with passwordless sudo. pub user: &'a str, + /// Optional plaintext password for the admin user. `None` keeps + /// the account SSH-key-only (the default). Setting a password + /// unlocks the account *and* enables `ssh_pwauth: true` on the + /// guest — intended for interactive debugging / chaos-testing + /// workflows where the operator wants console or SSH password + /// access to break things on purpose. + pub admin_password: Option<&'a str>, /// Extra `runcmd` lines to append to the user-data. Mostly useful /// for no-op debugging; keep empty in production paths. pub extra_runcmd: Vec, @@ -144,6 +151,21 @@ fn render_user_data(cfg: &CloudInitSeedConfig<'_>) -> String { } s }; + + // Password handling is split into user-level (lock_passwd + + // plain_text_passwd) and daemon-level (ssh_pwauth). When a + // password is provided, cloud-init hashes + sets the password and + // we allow SSH password auth. When it isn't, the account stays + // locked and sshd denies password logins — the production default. + let (lock_passwd, plain_text_passwd_line, ssh_pwauth) = match cfg.admin_password { + Some(pw) => ( + "false", + format!(" plain_text_passwd: \"{}\"\n", yaml_escape(pw)), + "true", + ), + None => ("true", String::new(), "false"), + }; + format!( r#"#cloud-config hostname: {hostname} @@ -153,10 +175,10 @@ users: - name: {user} sudo: ALL=(ALL) NOPASSWD:ALL shell: /bin/bash - lock_passwd: true - ssh_authorized_keys: + lock_passwd: {lock_passwd} +{plain_text_passwd_line} ssh_authorized_keys: - {authorized_key} -ssh_pwauth: false +ssh_pwauth: {ssh_pwauth} disable_root: true {runcmd}"#, hostname = cfg.hostname, @@ -165,6 +187,11 @@ disable_root: true ) } +fn yaml_escape(s: &str) -> String { + // Double-quoted YAML: backslash and double-quote need escaping. + s.replace('\\', "\\\\").replace('"', "\\\"") +} + async fn write_file(path: &Path, content: &str) -> Result<(), KvmError> { let mut f = tokio::fs::File::create(path).await.map_err(KvmError::Io)?; f.write_all(content.as_bytes()) @@ -188,3 +215,60 @@ async fn which_xorriso() -> Option { None } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn no_password_locks_account_and_disables_ssh_pwauth() { + let cfg = CloudInitSeedConfig { + hostname: "pi-01", + authorized_key: "ssh-ed25519 AAAA test", + user: "fleet-admin", + admin_password: None, + extra_runcmd: vec![], + }; + let out = render_user_data(&cfg); + assert!(out.contains("lock_passwd: true"), "got:\n{out}"); + assert!(out.contains("ssh_pwauth: false"), "got:\n{out}"); + assert!( + !out.contains("plain_text_passwd"), + "password leaked into cloud-init without admin_password set:\n{out}" + ); + } + + #[test] + fn with_password_unlocks_account_and_enables_ssh_pwauth() { + let cfg = CloudInitSeedConfig { + hostname: "pi-01", + authorized_key: "ssh-ed25519 AAAA test", + user: "fleet-admin", + admin_password: Some("break-things-123"), + extra_runcmd: vec![], + }; + let out = render_user_data(&cfg); + assert!(out.contains("lock_passwd: false"), "got:\n{out}"); + assert!(out.contains("ssh_pwauth: true"), "got:\n{out}"); + assert!( + out.contains("plain_text_passwd: \"break-things-123\""), + "password not inlined in cloud-init:\n{out}" + ); + } + + #[test] + fn password_with_quotes_is_yaml_escaped() { + let cfg = CloudInitSeedConfig { + hostname: "pi-01", + authorized_key: "ssh-ed25519 AAAA", + user: "fleet-admin", + admin_password: Some("he said \"hi\""), + extra_runcmd: vec![], + }; + let out = render_user_data(&cfg); + assert!( + out.contains(r#"plain_text_passwd: "he said \"hi\"""#), + "got:\n{out}" + ); + } +} diff --git a/harmony/src/modules/kvm/topology.rs b/harmony/src/modules/kvm/topology.rs index c0f30c67..4d780d58 100644 --- a/harmony/src/modules/kvm/topology.rs +++ b/harmony/src/modules/kvm/topology.rs @@ -35,7 +35,7 @@ pub const DEFAULT_ADMIN_USER: &str = "harmony-admin"; /// /// Composes with a caller-chosen storage pool directory where per-VM /// overlays + seed ISOs are placed. Harmony's IoT workflows use -/// [`crate::modules::iot::ensure_harmony_iot_pool`] to populate that +/// [`crate::modules::fleet::ensure_harmony_fleet_pool`] to populate that /// dir; other callers can point at any user-owned libvirt pool root. pub struct KvmVirtualMachineHost { name: String, @@ -120,7 +120,7 @@ impl VirtualMachineHost for KvmVirtualMachineHost { .await .map_err(|e| exec(format!("remove stale overlay: {e}")))?; } - create_overlay(&self.base_image_path, &overlay_path).await?; + create_overlay(&self.base_image_path, &overlay_path, spec.disk_size_gb).await?; info!( "created overlay disk {overlay_path:?} backed by {:?}", self.base_image_path @@ -297,21 +297,36 @@ async fn ensure_vm_firmware( async fn create_overlay( base: &std::path::Path, overlay: &std::path::Path, + size_gb: Option, ) -> Result<(), ExecutorError> { + let base_str = base + .to_str() + .ok_or_else(|| exec("base image path is not valid UTF-8"))?; + let overlay_str = overlay + .to_str() + .ok_or_else(|| exec("overlay path is not valid UTF-8"))?; + // qemu-img takes an optional trailing SIZE. Without it, the + // overlay inherits the backing image's virtual size (2-3 GiB + // for the stock Ubuntu cloud image) which is tight as soon as + // a couple of container images land. Ubuntu cloud-init ships + // `cloud-initramfs-growroot`, so a larger virtual size is + // resized on first boot without extra glue. + let size_arg = size_gb.filter(|g| *g > 0).map(|g| format!("{g}G")); + let mut args: Vec<&str> = vec![ + "create", + "-f", + "qcow2", + "-F", + "qcow2", + "-b", + base_str, + overlay_str, + ]; + if let Some(s) = size_arg.as_deref() { + args.push(s); + } let output = Command::new("qemu-img") - .args([ - "create", - "-f", - "qcow2", - "-F", - "qcow2", - "-b", - base.to_str() - .ok_or_else(|| exec("base image path is not valid UTF-8"))?, - overlay - .to_str() - .ok_or_else(|| exec("overlay path is not valid UTF-8"))?, - ]) + .args(&args) .stdout(Stdio::null()) .stderr(Stdio::piped()) .output() @@ -349,6 +364,7 @@ async fn build_cloud_init_seed( hostname: &hostname, authorized_key: &authorized_key, user: &admin_user, + admin_password: first_boot.admin_password.as_deref(), extra_runcmd: vec![], }, pool_dir, diff --git a/harmony/src/modules/linux/ansible_configurator.rs b/harmony/src/modules/linux/ansible_configurator.rs index 3ee9087c..af78bc03 100644 --- a/harmony/src/modules/linux/ansible_configurator.rs +++ b/harmony/src/modules/linux/ansible_configurator.rs @@ -57,7 +57,7 @@ impl AnsibleHostConfigurator { // encapsulation we want. Callers say "install podman"; we // pick apt/dnf/pacman/apk. Debian-family is the only dispatch // currently wired because it's our first concrete target (IoT - // runs on Raspbian/Ubuntu per ROADMAP/iot_platform/ + // runs on Raspbian/Ubuntu per ROADMAP/fleet_platform/ // v0_walking_skeleton.md §5.3). Extending to RHEL/Fedora/ // Alpine is a matter of detecting the family here and picking // `ansible.builtin.dnf` / `community.general.pacman` / @@ -112,7 +112,7 @@ impl AnsibleHostConfigurator { spec: &FileSpec, ) -> Result { // Ansible's `copy` module doesn't auto-create parent dirs, so - // writes into fresh paths like `/etc/iot-agent/config.toml` + // writes into fresh paths like `/etc/fleet-agent/config.toml` // fail with "Destination directory … does not exist". Create // the parent first via the `file` module; state=directory is // idempotent so this is a cheap noop on re-run. diff --git a/harmony/src/modules/mod.rs b/harmony/src/modules/mod.rs index db62415e..86e1e338 100644 --- a/harmony/src/modules/mod.rs +++ b/harmony/src/modules/mod.rs @@ -5,10 +5,10 @@ pub mod cert_manager; pub mod dhcp; pub mod dns; pub mod dummy; +pub mod fleet; pub mod helm; pub mod http; pub mod inventory; -pub mod iot; pub mod k3d; pub mod k8s; #[cfg(feature = "kvm")] diff --git a/harmony/src/modules/nats/helm_chart.rs b/harmony/src/modules/nats/helm_chart.rs new file mode 100644 index 00000000..5a1f17b5 --- /dev/null +++ b/harmony/src/modules/nats/helm_chart.rs @@ -0,0 +1,185 @@ +//! Shared helm-chart primitive for every NATS deployment shape. +//! +//! The upstream `nats/nats` helm chart is the single source of truth +//! for how a NATS pod / STS is actually built: probes, resource +//! shapes, RBAC, stateful-set options, JetStream storage volumes, +//! clustering, TLS, gateways, leaf nodes. Every high-level NATS +//! Score — `NatsBasicScore` for single-node, `NatsK8sScore` for +//! supercluster — delegates here. Differences between shapes are +//! expressed as `values_yaml`, not as parallel resource constructors. +//! +//! Why this is the right primitive: +//! +//! - The NATS project's chart tracks upstream server features +//! automatically; we get new knobs (`websocket.enabled`, +//! `gateway.merge.advertise`, …) without shipping code. +//! - One helm release per NATS deployment means `helm upgrade` / +//! `helm uninstall` / `helm list` all work naturally. +//! - Chapter 4 of the harmony review learned this the hard way: a +//! parallel k8s_openapi-based NATS primitive diverged on probe +//! shape + pod-anti-affinity and was deleted. + +use std::str::FromStr; + +use async_trait::async_trait; +use harmony_macros::hurl; +use harmony_types::id::Id; +use non_blank_string_rs::NonBlankString; +use serde::Serialize; + +use crate::data::Version; +use crate::interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}; +use crate::inventory::Inventory; +use crate::modules::helm::chart::{HelmChartScore, HelmRepository}; +use crate::score::Score; +use crate::topology::{HelmCommand, Topology}; + +/// The NATS-IO project's published helm chart. `hurl!` needs a +/// literal so the URL is inlined at the one call site below rather +/// than being a `const &str`. +const CHART_NAME: &str = "nats/nats"; +const REPO_NAME: &str = "nats"; + +/// Thin preset over [`HelmChartScore`] that pins the NATS chart + +/// repository and leaves `values_yaml` as the one parameter. +/// +/// Callers should almost never construct this directly — build a +/// high-level preset (`NatsBasicScore`, `NatsK8sScore`) instead. +/// The type is `pub` so those presets across different files can +/// share a single definition. +#[derive(Debug, Clone, Serialize)] +pub struct NatsHelmChartScore { + pub namespace: NonBlankString, + pub release_name: NonBlankString, + /// Helm values YAML specific to this shape. Build with the + /// preset's dedicated rendering function; `values_overrides` + /// style is intentionally not exposed — values_yaml is readable + /// + diffable, overrides are not. + pub values_yaml: String, + /// Whether helm should create the target namespace if missing. + pub create_namespace: bool, + /// `true` = `helm install` (fail on re-apply), `false` = + /// `helm upgrade --install` (idempotent). Presets default to + /// upgrade-install so re-running a Score is safe. + pub install_only: bool, +} + +impl NatsHelmChartScore { + /// Build a score targeting the upstream NATS chart at the given + /// release name + namespace with the caller's values yaml. + pub fn new( + release_name: impl Into, + namespace: impl Into, + values_yaml: String, + ) -> Self { + Self { + release_name: NonBlankString::from_str(&release_name.into()) + .expect("non-blank release_name"), + namespace: NonBlankString::from_str(&namespace.into()).expect("non-blank namespace"), + values_yaml, + create_namespace: true, + install_only: false, + } + } + + /// Convert into the underlying [`HelmChartScore`]. Exists for the + /// rare callers that need to hand the result to a non-NATS + /// pipeline (e.g. `ArgoCD`-backed deploy wrappers); presets + /// normally just use the `Score` impl. + pub fn into_helm_chart_score(self) -> HelmChartScore { + HelmChartScore { + namespace: Some(self.namespace), + release_name: self.release_name, + chart_name: NonBlankString::from_str(CHART_NAME).expect("chart name const is valid"), + chart_version: None, + values_overrides: None, + values_yaml: Some(self.values_yaml), + create_namespace: self.create_namespace, + install_only: self.install_only, + repository: Some(HelmRepository::new( + REPO_NAME.to_string(), + hurl!("https://nats-io.github.io/k8s/helm/charts/"), + true, + )), + } + } +} + +impl Score for NatsHelmChartScore { + fn create_interpret(&self) -> Box> { + Box::new(NatsHelmChartInterpret { + score: self.clone(), + }) + } + + fn name(&self) -> String { + format!("NatsHelmChartScore({})", self.release_name) + } +} + +#[derive(Debug)] +pub struct NatsHelmChartInterpret { + score: NatsHelmChartScore, +} + +#[async_trait] +impl Interpret for NatsHelmChartInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + self.score + .clone() + .into_helm_chart_score() + .create_interpret() + .execute(inventory, topology) + .await + } + + fn get_name(&self) -> InterpretName { + InterpretName::HelmChart + } + + fn get_version(&self) -> Version { + Version::from("0.1.0").expect("static version literal") + } + + fn get_status(&self) -> InterpretStatus { + InterpretStatus::QUEUED + } + + fn get_children(&self) -> Vec { + vec![] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn into_helm_chart_score_pins_chart_and_repo() { + let s = NatsHelmChartScore::new( + "fleet-nats", + "fleet-system", + "replicaCount: 1\n".to_string(), + ); + let hc = s.into_helm_chart_score(); + assert_eq!(hc.chart_name.to_string(), CHART_NAME); + let repo = hc.repository.expect("repo must be pinned"); + // We're not inspecting the fields further — HelmRepository's + // fields are private — but pinning `repository = Some(..)` + // at all is what matters: without it `helm install` would + // try the release-name as a local path. + let _ = repo; + assert_eq!(hc.values_yaml.as_deref(), Some("replicaCount: 1\n")); + } + + #[test] + fn defaults_are_upgrade_install_with_namespace_creation() { + let s = NatsHelmChartScore::new("n", "ns", "".to_string()); + assert!(s.create_namespace, "presets expect namespace creation"); + assert!(!s.install_only, "presets expect upgrade-install semantics"); + } +} diff --git a/harmony/src/modules/nats/mod.rs b/harmony/src/modules/nats/mod.rs index 6758c77b..04a42fd6 100644 --- a/harmony/src/modules/nats/mod.rs +++ b/harmony/src/modules/nats/mod.rs @@ -1,5 +1,10 @@ pub mod capability; pub mod decentralized; +pub mod helm_chart; pub mod pki; +pub mod score_nats_basic; pub mod score_nats_k8s; pub mod score_nats_supercluster; + +pub use helm_chart::NatsHelmChartScore; +pub use score_nats_basic::{NatsBasicScore, NatsServiceType}; diff --git a/harmony/src/modules/nats/score_nats_basic.rs b/harmony/src/modules/nats/score_nats_basic.rs new file mode 100644 index 00000000..368d02a5 --- /dev/null +++ b/harmony/src/modules/nats/score_nats_basic.rs @@ -0,0 +1,307 @@ +//! Single-node NATS — high-level preset over [`NatsHelmChartScore`]. +//! +//! The shape this Score covers: one NATS server pod in a cluster, +//! JetStream on by default, exposed via ClusterIP / NodePort / +//! LoadBalancer. No TLS, no clustering, no auth. For any of those, +//! graduate to `NatsK8sScore` (supercluster + TLS + gateways). +//! +//! Everything concrete — probes, resource limits, statefulset +//! options — comes from the upstream `nats/nats` helm chart. +//! This Score just picks the chart values that select a minimal +//! single-node install. +//! +//! Typical usage: +//! +//! ```ignore +//! use harmony::modules::k8s::K8sBareTopology; +//! use harmony::modules::nats::NatsBasicScore; +//! use harmony::score::Score; +//! use harmony::inventory::Inventory; +//! +//! let topology = K8sBareTopology::from_kubeconfig("nats-install").await?; +//! let score = NatsBasicScore::new("fleet-nats", "fleet-system").load_balancer(); +//! score.create_interpret().execute(&Inventory::empty(), &topology).await?; +//! ``` + +use async_trait::async_trait; +use harmony_types::id::Id; +use serde::Serialize; + +use crate::data::Version; +use crate::interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}; +use crate::inventory::Inventory; +use crate::modules::nats::helm_chart::NatsHelmChartScore; +use crate::score::Score; +use crate::topology::{HelmCommand, Topology}; + +/// How the NATS client port is exposed. +#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)] +pub enum NatsServiceType { + /// In-cluster only. Caller reaches NATS via + /// `..svc.cluster.local:4222`. + ClusterIp, + /// NodePort on the given host port — must fall in the cluster's + /// configured service-node-port range (default 30000-32767). + NodePort(i32), + /// LoadBalancer service. On k3d this uses the built-in + /// `klipper-lb`, which pairs naturally with + /// `k3d cluster create -p PORT:PORT@loadbalancer`. + LoadBalancer, +} + +/// Declarative single-node NATS. Construct via [`new`], tune with +/// the builder-style methods, hand to a topology that implements +/// [`HelmCommand`]. +#[derive(Debug, Clone, Serialize)] +pub struct NatsBasicScore { + release_name: String, + namespace: String, + jetstream: bool, + service_type: NatsServiceType, + /// Optional image override (`repository:tag` or full ref). + /// `None` = use the chart's default image. + image: Option, +} + +impl NatsBasicScore { + /// Build a single-node NATS score with JetStream on and + /// ClusterIP exposure. Use the builder methods to change the + /// exposure or image. + pub fn new(release_name: impl Into, namespace: impl Into) -> Self { + Self { + release_name: release_name.into(), + namespace: namespace.into(), + jetstream: true, + service_type: NatsServiceType::ClusterIp, + image: None, + } + } + + pub fn jetstream(mut self, enabled: bool) -> Self { + self.jetstream = enabled; + self + } + + pub fn node_port(mut self, port: i32) -> Self { + self.service_type = NatsServiceType::NodePort(port); + self + } + + pub fn load_balancer(mut self) -> Self { + self.service_type = NatsServiceType::LoadBalancer; + self + } + + pub fn image(mut self, image: impl Into) -> Self { + self.image = Some(image.into()); + self + } + + /// Render the chart values for this preset. Public so tests + + /// downstream tools (e.g. `helm template` diffs) can inspect + /// exactly what the Score will install. + pub fn render_values(&self) -> String { + let mut y = String::new(); + y.push_str(&format!("fullnameOverride: {}\n", self.release_name)); + y.push_str("replicaCount: 1\n"); + y.push_str("config:\n"); + y.push_str(" cluster:\n"); + y.push_str(" enabled: false\n"); + y.push_str(" jetstream:\n"); + y.push_str(&format!(" enabled: {}\n", self.jetstream)); + if self.jetstream { + y.push_str(" fileStorage:\n"); + y.push_str(" enabled: true\n"); + y.push_str(" size: 10Gi\n"); + } + match self.service_type { + NatsServiceType::ClusterIp => { + // Chart default. No overrides needed. + } + NatsServiceType::NodePort(port) => { + y.push_str("service:\n"); + y.push_str(" merge:\n"); + y.push_str(" spec:\n"); + y.push_str(" type: NodePort\n"); + y.push_str(" ports:\n"); + y.push_str(" nats:\n"); + y.push_str(" merge:\n"); + y.push_str(&format!(" nodePort: {port}\n")); + } + NatsServiceType::LoadBalancer => { + y.push_str("service:\n"); + y.push_str(" merge:\n"); + y.push_str(" spec:\n"); + y.push_str(" type: LoadBalancer\n"); + } + } + if let Some(img) = &self.image { + let (repo, tag) = split_image_ref(img); + y.push_str("container:\n"); + y.push_str(" image:\n"); + y.push_str(&format!(" repository: {repo}\n")); + if let Some(tag) = tag { + y.push_str(&format!(" tag: {tag}\n")); + } + } + y + } + + /// Name accessors — used by downstream presets + tests that + /// need to reference what this Score will name its resources. + pub fn release_name(&self) -> &str { + &self.release_name + } + pub fn namespace(&self) -> &str { + &self.namespace + } +} + +fn split_image_ref(image: &str) -> (String, Option) { + // Split on the *last* colon that isn't part of a registry port + // (`registry.io:5000/foo:v1`). Good enough for the shapes we + // see in practice (`nats:2.10-alpine`, `ghcr.io/nats-io/nats:v2`). + match image.rsplit_once(':') { + Some((r, t)) if !t.contains('/') => (r.to_string(), Some(t.to_string())), + _ => (image.to_string(), None), + } +} + +impl Score for NatsBasicScore { + fn create_interpret(&self) -> Box> { + Box::new(NatsBasicInterpret { + score: self.clone(), + }) + } + + fn name(&self) -> String { + "NatsBasicScore".to_string() + } +} + +#[derive(Debug)] +pub struct NatsBasicInterpret { + score: NatsBasicScore, +} + +#[async_trait] +impl Interpret for NatsBasicInterpret { + async fn execute( + &self, + inventory: &Inventory, + topology: &T, + ) -> Result { + let values_yaml = self.score.render_values(); + NatsHelmChartScore::new(&self.score.release_name, &self.score.namespace, values_yaml) + .create_interpret() + .execute(inventory, topology) + .await + } + + fn get_name(&self) -> InterpretName { + InterpretName::Custom("NatsBasicInterpret") + } + + fn get_version(&self) -> Version { + Version::from("0.1.0").expect("static version literal") + } + + fn get_status(&self) -> InterpretStatus { + InterpretStatus::QUEUED + } + + fn get_children(&self) -> Vec { + vec![] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn defaults_are_clusterip_jetstream_on() { + let s = NatsBasicScore::new("n", "ns"); + assert_eq!(s.service_type, NatsServiceType::ClusterIp); + assert!(s.jetstream); + assert!(s.image.is_none()); + } + + #[test] + fn render_values_includes_fullname_and_replica() { + let y = NatsBasicScore::new("fleet-nats", "fleet-system").render_values(); + assert!(y.contains("fullnameOverride: fleet-nats")); + assert!(y.contains("replicaCount: 1")); + // cluster.enabled stays false for a single-node shape. + assert!(y.contains("cluster:\n enabled: false")); + } + + #[test] + fn render_values_enables_jetstream_with_storage_by_default() { + let y = NatsBasicScore::new("n", "ns").render_values(); + assert!(y.contains("jetstream:\n enabled: true")); + assert!(y.contains("fileStorage:\n enabled: true")); + } + + #[test] + fn render_values_omits_storage_when_jetstream_off() { + let y = NatsBasicScore::new("n", "ns") + .jetstream(false) + .render_values(); + assert!(y.contains("jetstream:\n enabled: false")); + assert!(!y.contains("fileStorage")); + } + + #[test] + fn render_values_node_port_patches_service_and_port() { + let y = NatsBasicScore::new("n", "ns") + .node_port(30222) + .render_values(); + assert!(y.contains("type: NodePort")); + assert!(y.contains("nodePort: 30222")); + } + + #[test] + fn render_values_load_balancer_sets_service_type() { + let y = NatsBasicScore::new("n", "ns") + .load_balancer() + .render_values(); + assert!(y.contains("type: LoadBalancer")); + // LoadBalancer doesn't specify a nodePort — let kube assign. + assert!(!y.contains("nodePort:")); + } + + #[test] + fn render_values_clusterip_has_no_service_block() { + let y = NatsBasicScore::new("n", "ns").render_values(); + assert!(!y.contains("service:")); + } + + #[test] + fn render_values_image_override_splits_repo_and_tag() { + let y = NatsBasicScore::new("n", "ns") + .image("registry.io/custom/nats:2.10-alpine") + .render_values(); + assert!(y.contains("repository: registry.io/custom/nats")); + assert!(y.contains("tag: 2.10-alpine")); + } + + #[test] + fn render_values_image_without_tag_omits_tag_line() { + let y = NatsBasicScore::new("n", "ns") + .image("my.internal/nats-no-tag") + .render_values(); + assert!(y.contains("repository: my.internal/nats-no-tag")); + assert!(!y.contains("tag:")); + } + + #[test] + fn setters_return_self_for_chaining() { + let s = NatsBasicScore::new("n", "ns") + .jetstream(true) + .load_balancer() + .image("nats:latest"); + assert_eq!(s.release_name(), "n"); + assert_eq!(s.namespace(), "ns"); + } +} diff --git a/harmony/src/modules/nats/score_nats_k8s.rs b/harmony/src/modules/nats/score_nats_k8s.rs index 4aac85fc..880b9a5f 100644 --- a/harmony/src/modules/nats/score_nats_k8s.rs +++ b/harmony/src/modules/nats/score_nats_k8s.rs @@ -1,14 +1,12 @@ -use std::{collections::BTreeMap, str::FromStr}; +use std::collections::BTreeMap; use async_trait::async_trait; use harmony_k8s::KubernetesDistribution; -use harmony_macros::hurl; use harmony_secret::{Secret, SecretManager}; use harmony_types::id::Id; use k8s_openapi::{ByteString, api::core::v1::Secret as K8sSecret}; use kube::api::ObjectMeta; use log::{debug, info}; -use non_blank_string_rs::NonBlankString; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -17,9 +15,11 @@ use crate::{ interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome}, inventory::Inventory, modules::{ - helm::chart::{HelmChartScore, HelmRepository}, k8s::{ingress::K8sIngressScore, resource::K8sResourceScore}, - nats::capability::{Nats, NatsCluster, NatsEndpoint}, + nats::{ + capability::{Nats, NatsCluster, NatsEndpoint}, + helm_chart::NatsHelmChartScore, + }, okd::{ crd::route::{RoutePort, RouteSpec, RouteTargetReference, TLSConfig}, route::OKDRouteScore, @@ -325,21 +325,8 @@ natsBox: )); debug!("Prepared Helm Chart values : \n{values_yaml:#?}"); - let nats = HelmChartScore { - namespace: Some(NonBlankString::from_str(&namespace).unwrap()), - release_name: NonBlankString::from_str(&cluster.name).unwrap(), - chart_name: NonBlankString::from_str("nats/nats").unwrap(), - chart_version: None, - values_overrides: None, - values_yaml, - create_namespace: true, - install_only: false, - repository: Some(HelmRepository::new( - "nats".to_string(), - hurl!("https://nats-io.github.io/k8s/helm/charts/"), - true, - )), - }; + let values_yaml = values_yaml.expect("supercluster always builds a values_yaml"); + let nats = NatsHelmChartScore::new(cluster.name.clone(), namespace, values_yaml); nats.interpret(inventory, topology).await } } diff --git a/harmony/src/modules/podman/mod.rs b/harmony/src/modules/podman/mod.rs index b25ab85c..7d786ff8 100644 --- a/harmony/src/modules/podman/mod.rs +++ b/harmony/src/modules/podman/mod.rs @@ -3,5 +3,5 @@ mod score; mod topology; pub use interpret::PodmanV0Interpret; -pub use score::{IotScore, PodmanService, PodmanV0Score}; +pub use score::{PodmanService, PodmanV0Score, ReconcileScore}; pub use topology::PodmanTopology; diff --git a/harmony/src/modules/podman/score.rs b/harmony/src/modules/podman/score.rs index e795cf0c..c1ea95a1 100644 --- a/harmony/src/modules/podman/score.rs +++ b/harmony/src/modules/podman/score.rs @@ -55,7 +55,7 @@ impl PodmanV0Score { /// log-and-skip the unknown tag. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(tag = "type", content = "data")] -pub enum IotScore { +pub enum ReconcileScore { PodmanV0(PodmanV0Score), } @@ -69,16 +69,16 @@ impl Score for PodmanV0Score { } } -impl Score for IotScore { +impl Score for ReconcileScore { fn create_interpret(&self) -> Box> { match self { - IotScore::PodmanV0(score) => score.create_interpret(), + ReconcileScore::PodmanV0(score) => score.create_interpret(), } } fn name(&self) -> String { match self { - IotScore::PodmanV0(_) => "PodmanV0Score".to_string(), + ReconcileScore::PodmanV0(_) => "PodmanV0Score".to_string(), } } } @@ -89,7 +89,7 @@ mod tests { #[test] fn podman_v0_score_serializes_with_adjacent_tag() { - let score = IotScore::PodmanV0(PodmanV0Score { + let score = ReconcileScore::PodmanV0(PodmanV0Score { services: vec![PodmanService { name: "web".to_string(), image: "nginx:latest".to_string(), @@ -103,7 +103,7 @@ mod tests { #[test] fn podman_v0_score_roundtrip() { - let score = IotScore::PodmanV0(PodmanV0Score { + let score = ReconcileScore::PodmanV0(PodmanV0Score { services: vec![ PodmanService { name: "web".to_string(), @@ -118,7 +118,7 @@ mod tests { ], }); let serialized = serde_json::to_string(&score).unwrap(); - let deserialized: IotScore = serde_json::from_str(&serialized).unwrap(); + let deserialized: ReconcileScore = serde_json::from_str(&serialized).unwrap(); assert_eq!(score, deserialized); } diff --git a/harmony/src/modules/podman/topology.rs b/harmony/src/modules/podman/topology.rs index 9116bc37..10bee004 100644 --- a/harmony/src/modules/podman/topology.rs +++ b/harmony/src/modules/podman/topology.rs @@ -62,8 +62,21 @@ impl PodmanTopology { } async fn ensure_image_present(&self, image: &str) -> Result<(), ExecutorError> { - let opts = PullOpts::builder().reference(image).build(); + // Fast path: image already in the local store → no network + // call, no rate-limit exposure. Matches the behaviour a + // Kubernetes `imagePullPolicy: IfNotPresent` would give, and + // it's the right default for a long-lived device agent — + // every podman `pull` against a public registry is rate- + // limited traffic we only want to spend when strictly + // necessary. Upgrades (different `image` string / tag) hit + // this function with a reference that's NOT locally + // present yet and still do the pull below. let images = self.podman.images(); + if images.get(image).exists().await.map_err(to_exec_error)? { + return Ok(()); + } + + let opts = PullOpts::builder().reference(image).build(); let mut stream = images.pull(&opts); while let Some(event) = stream.next().await { let event = event.map_err(to_exec_error)?; diff --git a/iot/iot-agent-v0/src/reconciler.rs b/iot/iot-agent-v0/src/reconciler.rs deleted file mode 100644 index 939e330a..00000000 --- a/iot/iot-agent-v0/src/reconciler.rs +++ /dev/null @@ -1,145 +0,0 @@ -use std::collections::HashMap; -use std::sync::Arc; -use std::time::Duration; - -use anyhow::Result; -use tokio::sync::Mutex; - -use harmony::inventory::Inventory; -use harmony::modules::podman::{IotScore, PodmanTopology, PodmanV0Score}; -use harmony::score::Score; - -/// Cache key → last-seen state, populated by `apply` and consulted by the -/// 30-second periodic tick and the delete path. -struct CachedEntry { - /// Serialized score JSON. Used for string-compare idempotency per - /// ROADMAP §5.5 — cheaper and more deterministic than a hash. - serialized: String, - /// Parsed score. Cached so the periodic reconcile tick and delete - /// handlers don't have to re-parse the JSON. - score: PodmanV0Score, -} - -pub struct Reconciler { - topology: Arc, - inventory: Arc, - /// Keyed by NATS KV key (`.`). A single entry per - /// KV key — in v0 there is no fan-out from one key to many scores. - state: Mutex>, -} - -impl Reconciler { - pub fn new(topology: Arc, inventory: Arc) -> Self { - Self { - topology, - inventory, - state: Mutex::new(HashMap::new()), - } - } - - /// Handle a Put event (new or updated score on NATS KV). No-ops if the - /// serialized score is byte-identical to the last-seen value for this - /// key. - pub async fn apply(&self, key: &str, value: &[u8]) -> Result<()> { - let incoming = match serde_json::from_slice::(value) { - Ok(IotScore::PodmanV0(s)) => s, - Err(e) => { - tracing::warn!(key, error = %e, "failed to deserialize score"); - return Ok(()); - } - }; - let serialized = String::from_utf8_lossy(value).into_owned(); - - { - let state = self.state.lock().await; - if let Some(existing) = state.get(key) { - if existing.serialized == serialized { - tracing::debug!(key, "score unchanged — noop"); - return Ok(()); - } - } - } - - self.run_score(key, &incoming).await?; - - let mut state = self.state.lock().await; - state.insert( - key.to_string(), - CachedEntry { - serialized, - score: incoming, - }, - ); - Ok(()) - } - - /// Handle a Delete/Purge event. Stops and removes every container - /// referenced by the last cached score for this key. Idempotent: if we - /// never saw a Put for this key (agent restart after delete), logs and - /// returns ok. - pub async fn remove(&self, key: &str) -> Result<()> { - let mut state = self.state.lock().await; - let Some(entry) = state.remove(key) else { - tracing::info!(key, "delete for unknown key — nothing to remove"); - return Ok(()); - }; - drop(state); - - use harmony::topology::ContainerRuntime; - for service in &entry.score.services { - if let Err(e) = self.topology.remove_service(&service.name).await { - tracing::warn!( - key, - service = %service.name, - error = %e, - "failed to remove container" - ); - } else { - tracing::info!(key, service = %service.name, "removed container"); - } - } - Ok(()) - } - - /// Periodic ground-truth reconcile. ROADMAP §5.6 — "polling instead of - /// event-driven PLEG. Agent polls podman every 30s as ground truth; - /// KV watch events are accelerators." Re-runs each cached score against - /// podman-api; the underlying `ensure_service_running` is idempotent - /// so a converged state produces no log noise. - pub async fn tick(&self) -> Result<()> { - let snapshot: Vec<(String, PodmanV0Score)> = { - let state = self.state.lock().await; - state - .iter() - .map(|(k, v)| (k.clone(), v.score.clone())) - .collect() - }; - for (key, score) in snapshot { - if let Err(e) = self.run_score(&key, &score).await { - tracing::warn!(key, error = %e, "periodic reconcile failed"); - } - } - Ok(()) - } - - pub async fn run_periodic(self: Arc, interval: Duration) { - let mut ticker = tokio::time::interval(interval); - ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); - loop { - ticker.tick().await; - if let Err(e) = self.tick().await { - tracing::warn!(error = %e, "reconcile tick error"); - } - } - } - - async fn run_score(&self, key: &str, score: &PodmanV0Score) -> Result<()> { - let interpret = Score::::create_interpret(score); - let outcome = interpret - .execute(&self.inventory, &self.topology) - .await - .map_err(|e| anyhow::anyhow!("PodmanV0Score interpret failed for {key}: {e}"))?; - tracing::info!(key, outcome = ?outcome, "reconciled"); - Ok(()) - } -} diff --git a/iot/iot-operator-v0/src/controller.rs b/iot/iot-operator-v0/src/controller.rs deleted file mode 100644 index 54cc37a2..00000000 --- a/iot/iot-operator-v0/src/controller.rs +++ /dev/null @@ -1,137 +0,0 @@ -use std::sync::Arc; -use std::time::Duration; - -use async_nats::jetstream::kv::Store; -use futures_util::StreamExt; -use harmony_reconciler_contracts::desired_state_key; -use kube::api::{Patch, PatchParams}; -use kube::runtime::Controller; -use kube::runtime::controller::Action; -use kube::runtime::finalizer::{Event as FinalizerEvent, finalizer}; -use kube::runtime::watcher::Config as WatcherConfig; -use kube::{Api, Client, ResourceExt}; -use serde_json::json; - -use crate::crd::{Deployment, DeploymentStatus, ScorePayload}; - -const FINALIZER: &str = "iot.nationtech.io/finalizer"; - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("kube api: {0}")] - Kube(#[from] kube::Error), - #[error("nats kv: {0}")] - Kv(String), - #[error("serde: {0}")] - Serde(#[from] serde_json::Error), - #[error("missing namespace on resource")] - MissingNamespace, - #[error("missing target devices")] - MissingTargets, -} - -pub struct Context { - pub client: Client, - pub kv: Store, -} - -pub async fn run(client: Client, kv: Store) -> anyhow::Result<()> { - let api: Api = Api::all(client.clone()); - let ctx = Arc::new(Context { client, kv }); - - tracing::info!("starting Deployment controller"); - Controller::new(api, WatcherConfig::default()) - .run(reconcile, error_policy, ctx) - .for_each(|res| async move { - match res { - Ok((obj, _)) => tracing::debug!(?obj, "reconciled"), - Err(e) => tracing::warn!(error = %e, "reconcile error"), - } - }) - .await; - Ok(()) -} - -async fn reconcile(obj: Arc, ctx: Arc) -> Result { - let ns = obj.namespace().ok_or(Error::MissingNamespace)?; - let name = obj.name_any(); - tracing::info!(%ns, %name, "reconcile"); - - let api: Api = Api::namespaced(ctx.client.clone(), &ns); - finalizer(&api, FINALIZER, obj, |event| async { - match event { - FinalizerEvent::Apply(d) => apply(d, &api, &ctx.kv).await, - FinalizerEvent::Cleanup(d) => cleanup(d, &ctx.kv).await, - } - }) - .await - .map_err(|e| match e { - kube::runtime::finalizer::Error::ApplyFailed(e) - | kube::runtime::finalizer::Error::CleanupFailed(e) => e, - kube::runtime::finalizer::Error::AddFinalizer(e) - | kube::runtime::finalizer::Error::RemoveFinalizer(e) => Error::Kube(e), - kube::runtime::finalizer::Error::UnnamedObject => Error::Kv("unnamed object".into()), - kube::runtime::finalizer::Error::InvalidFinalizer => Error::Kv("invalid finalizer".into()), - }) -} - -async fn apply(obj: Arc, api: &Api, kv: &Store) -> Result { - let name = obj.name_any(); - if obj.spec.target_devices.is_empty() { - return Err(Error::MissingTargets); - } - let score_json = serialize_score(&obj.spec.score)?; - - let already_observed = obj - .status - .as_ref() - .and_then(|s| s.observed_score_string.as_deref()) - == Some(score_json.as_str()); - if already_observed { - tracing::debug!(%name, "score unchanged; skipping KV write and status patch"); - return Ok(Action::requeue(Duration::from_secs(300))); - } - - for device_id in &obj.spec.target_devices { - let key = kv_key(device_id, &name); - kv.put(key.clone(), score_json.clone().into_bytes().into()) - .await - .map_err(|e| Error::Kv(e.to_string()))?; - tracing::info!(%key, "wrote desired state"); - } - - let status = json!({ - "status": DeploymentStatus { - observed_score_string: Some(score_json), - } - }); - api.patch_status(&name, &PatchParams::default(), &Patch::Merge(&status)) - .await?; - - Ok(Action::requeue(Duration::from_secs(300))) -} - -async fn cleanup(obj: Arc, kv: &Store) -> Result { - let name = obj.name_any(); - for device_id in &obj.spec.target_devices { - let key = kv_key(device_id, &name); - kv.delete(&key) - .await - .map_err(|e| Error::Kv(e.to_string()))?; - tracing::info!(%key, "deleted desired state"); - } - Ok(Action::await_change()) -} - -fn serialize_score(score: &ScorePayload) -> Result { - Ok(serde_json::to_string(score)?) -} - -fn kv_key(device_id: &str, deployment_name: &str) -> String { - desired_state_key(device_id, deployment_name) -} - -fn error_policy(_obj: Arc, err: &Error, _ctx: Arc) -> Action { - tracing::warn!(error = %err, "requeueing after error"); - Action::requeue(Duration::from_secs(30)) -} diff --git a/iot/iot-operator-v0/src/install.rs b/iot/iot-operator-v0/src/install.rs deleted file mode 100644 index 48bead63..00000000 --- a/iot/iot-operator-v0/src/install.rs +++ /dev/null @@ -1,96 +0,0 @@ -//! Install the operator's CRD into a target Kubernetes cluster -//! via a harmony Score — no yaml generation, no kubectl shell-out. -//! -//! The Score side is just [`K8sResourceScore`] over -//! [`Deployment::crd()`]; what this module owns is a thin -//! [`InstallTopology`] that satisfies `K8sclient` by loading the -//! current `KUBECONFIG` directly. We don't use -//! [`K8sAnywhereTopology`] because its `ensure_ready` does a lot of -//! product-level setup (cert-manager, tenant manager, helm probes) -//! that isn't appropriate for a narrow "apply a CRD" action. - -use std::sync::Arc; - -use anyhow::{Context, Result}; -use async_trait::async_trait; -use harmony::inventory::Inventory; -use harmony::modules::k8s::resource::K8sResourceScore; -use harmony::score::Score; -use harmony::topology::{K8sclient, PreparationOutcome, Topology}; -use harmony_k8s::K8sClient; -use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition; -use kube::CustomResourceExt; - -use crate::crd::Deployment; - -/// Topology that only knows how to hand out a pre-built `K8sClient`. -/// Used by [`install_crds`] so the Score machinery has something -/// that satisfies `K8sclient` without dragging in the full -/// `K8sAnywhereTopology` bootstrap. -/// -/// # Architectural smell — do not copy this pattern without reading the roadmap -/// -/// Vendoring an ad-hoc `Topology` impl in a module that just wants to -/// apply a CRD is a symptom of a bigger problem: the existing -/// opinionated topologies (`K8sAnywhereTopology`, `HAClusterTopology`) -/// have accumulated product-level side effects in their `ensure_ready` -/// — cert-manager install, tenant manager setup, helm probes — that -/// make them unfit for narrow actions. The correct long-term fix is a -/// minimal reusable `K8sBareTopology` in harmony that carries a -/// `K8sClient` and exposes `K8sclient` with a noop `ensure_ready`, so -/// every narrow Score isn't tempted to vendor its own copy. -/// -/// See `ROADMAP/12-code-review-april-2026.md` §12.6 "Topology -/// proliferation". The explicit smoke test for "that roadmap item is -/// done" is: this file can delete `InstallTopology` and replace -/// `topology` construction with a one-liner against the shared type. -struct InstallTopology { - client: Arc, -} - -#[async_trait] -impl Topology for InstallTopology { - fn name(&self) -> &str { - "iot-operator-install" - } - async fn ensure_ready( - &self, - ) -> Result { - Ok(PreparationOutcome::Noop) - } -} - -#[async_trait] -impl K8sclient for InstallTopology { - async fn k8s_client(&self) -> Result, String> { - Ok(self.client.clone()) - } -} - -/// Apply the operator's CRDs to whatever cluster `KUBECONFIG` points -/// at. Returns once the apply call completes — does **not** wait for -/// the apiserver to mark the CRD `Established`; the caller does that -/// (e.g. with `kubectl wait --for=condition=Established`) if it -/// cares. -pub async fn install_crds() -> Result<()> { - let kube_client = kube::Client::try_default() - .await - .context("building kube client from KUBECONFIG")?; - let topology = InstallTopology { - client: Arc::new(K8sClient::new(kube_client)), - }; - let inventory = Inventory::empty(); - - let crd: CustomResourceDefinition = Deployment::crd(); - let score = K8sResourceScore::::single(crd, None); - - let interpret = Score::::create_interpret(&score); - let outcome = interpret - .execute(&inventory, &topology) - .await - .map_err(|e| anyhow::anyhow!("install CRD: {e}")) - .context("executing K8sResourceScore for Deployment CRD")?; - - tracing::info!(?outcome, "CRD installed"); - Ok(()) -} diff --git a/iot/iot-operator-v0/src/main.rs b/iot/iot-operator-v0/src/main.rs deleted file mode 100644 index f62983e5..00000000 --- a/iot/iot-operator-v0/src/main.rs +++ /dev/null @@ -1,73 +0,0 @@ -mod controller; -mod crd; -mod install; - -use anyhow::Result; -use async_nats::jetstream; -use clap::{Parser, Subcommand}; -use harmony_reconciler_contracts::BUCKET_DESIRED_STATE; -use kube::Client; - -#[derive(Parser)] -#[command( - name = "iot-operator-v0", - about = "IoT operator — Deployment CRD → NATS KV" -)] -struct Cli { - #[command(subcommand)] - command: Option, - - #[arg( - long, - env = "NATS_URL", - default_value = "nats://localhost:4222", - global = true - )] - nats_url: String, - - #[arg( - long, - env = "KV_BUCKET", - default_value = BUCKET_DESIRED_STATE, - global = true - )] - kv_bucket: String, -} - -#[derive(Subcommand)] -enum Command { - /// Run the controller (default when no subcommand is given). - Run, - /// Apply the operator's CRD to the cluster `KUBECONFIG` points - /// at. Uses harmony's typed k8s client — no yaml, no kubectl. - Install, -} - -#[tokio::main] -async fn main() -> Result<()> { - tracing_subscriber::fmt() - .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) - .init(); - - let cli = Cli::parse(); - match cli.command.unwrap_or(Command::Run) { - Command::Install => install::install_crds().await, - Command::Run => run(&cli.nats_url, &cli.kv_bucket).await, - } -} - -async fn run(nats_url: &str, bucket: &str) -> Result<()> { - let nats = async_nats::connect(nats_url).await?; - tracing::info!(url = %nats_url, "connected to NATS"); - let js = jetstream::new(nats); - let kv = js - .create_key_value(jetstream::kv::Config { - bucket: bucket.to_string(), - ..Default::default() - }) - .await?; - tracing::info!(bucket = %bucket, "KV bucket ready"); - - let client = Client::try_default().await?; - controller::run(client, kv).await -}