chore/rename-release-to-publish #318

Closed
johnride wants to merge 15 commits from chore/rename-release-to-publish into master
25 changed files with 1113 additions and 459 deletions

View File

@@ -4,7 +4,7 @@ name: harmony-fleet-operator — release
# published chart is `harmony apply` # published chart is `harmony apply`
# (harmony-fleet-deploy --operator-chart-version), run manually today; a # (harmony-fleet-deploy --operator-chart-version), run manually today; a
# CD job lands once the cluster KUBECONFIG + NATS secrets are provisioned. # CD job lands once the cluster KUBECONFIG + NATS secrets are provisioned.
# Tag parsing lives in Rust (harmony-fleet-release), not in YAML. # Tag parsing lives in Rust (harmony-fleet-publish), not in YAML.
on: on:
push: push:
tags: tags:
@@ -48,4 +48,4 @@ jobs:
- name: Build + push image and chart - name: Build + push image and chart
env: env:
TAG: ${{ inputs.tag || github.ref_name }} TAG: ${{ inputs.tag || github.ref_name }}
run: cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-release -- --from-tag "$TAG" run: cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-publish -- --from-tag "$TAG"

23
Cargo.lock generated
View File

@@ -3213,12 +3213,15 @@ dependencies = [
name = "example-openbao" name = "example-openbao"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow",
"clap",
"harmony", "harmony",
"harmony_cli", "harmony_cli",
"harmony_macros", "harmony_config",
"harmony_types", "schemars 0.8.22",
"serde",
"tokio", "tokio",
"url", "tracing",
] ]
[[package]] [[package]]
@@ -3495,16 +3498,19 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
"env_logger",
"harmony", "harmony",
"harmony-fleet-deploy", "harmony-fleet-deploy",
"harmony-k8s", "harmony-k8s",
"harmony-nats-callout", "harmony-nats-callout",
"harmony_cli", "harmony_cli",
"log", "harmony_config",
"harmony_secret",
"nkeys", "nkeys",
"rand 0.9.4", "rand 0.9.4",
"schemars 0.8.22",
"serde",
"tokio", "tokio",
"tracing",
] ]
[[package]] [[package]]
@@ -4308,18 +4314,17 @@ name = "harmony_cli"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"assert_cmd", "assert_cmd",
"chrono", "async-trait",
"clap", "clap",
"console", "console",
"env_logger",
"harmony", "harmony",
"harmony_tui", "harmony_tui",
"indicatif", "indicatif",
"indicatif-log-bridge",
"inquire 0.7.5", "inquire 0.7.5",
"lazy_static", "lazy_static",
"log",
"tokio", "tokio",
"tracing",
"tracing-subscriber",
] ]
[[package]] [[package]]

281
ROADMAP/13-unified-cli.md Normal file
View File

@@ -0,0 +1,281 @@
# Phase 13: Unified CLI — Extensible, Composable, Subcommand-Driven
## Goal
Replace the current landscape of disconnected `harmony-*` binaries and
60+ example `main.rs` files with a single, extensible CLI where:
- The framework provides global concerns (config, SSO, topology selection,
Score runner, TUI) as shared infrastructure.
- Each module (fleet, tenant, okd, …) registers its own subcommands.
- Third-party `MyAppScore` authors get `harmony myapp deploy` with zero
framework boilerplate.
The CLI is the user-facing surface of Harmony. Every design decision here
shapes the developer experience for the entire ecosystem.
## Current State
- `harmony_cli::Args` — flat Score-runner flags (`--yes`, `--filter`,
`--list`, `--number`, `--interactive`). Drives the Maestro loop over
a `Vec<Score>`.
- `harmony_cli::run(Inventory, Topology, Vec<Score>, Option<Args>)`
the single entry point consumed by 60+ example binaries.
- `harmony_tui::run()` — separate crate, separate `run()`, same inputs.
- `harmony-fleet-deploy` — deploy binary with `deploy`/`publish`
subcommands (just merged from two separate binaries).
- `harmony_composer` — infrastructure composition tool, separate binary.
- ADR-023 principle 8 describes the staged evolution (B → C) but defers
the plugin protocol.
## Design
### Top-level binary with subcommands
```
harmony [global flags] <module> <action> [action flags]
harmony --config-namespace fleet-staging fleet deploy --from-tag v0.1.0
harmony --config-namespace fleet-staging fleet publish --from-tag v0.1.0 --no-push
harmony --config-namespace okd-staging okd bootstrap
harmony --config-namespace tenant-c1 tenant create --name c1
harmony --config-namespace harmony myapp deploy --image foo:latest
```
Global flags (owned by the top-level binary):
- `--config-namespace` — maps to `ConfigClient::for_namespace()`
- `--kubeconfig` — topology selection
- `--topology` — explicit topology choice (k3d, okd, bare, …)
- `--yes` — skip confirmation prompts
- `--interactive` — delegate to TUI
Module subcommands (owned by each module):
- `fleet deploy`, `fleet publish`
- `tenant create`, `tenant list`, `tenant health`, `tenant install`
- `okd bootstrap`, `okd add-node`
- User-defined: `myapp deploy`, `myapp publish`, …
### Two kinds of subcommands
**Score-runner subcommands** — compose multiple Scores, need
`--filter`/`--list`/`--number`. Examples, ad-hoc orchestration, the
current `harmony_cli::run()` use case. The Maestro loop lives here.
**Action subcommands** — single-purpose (deploy a chart, publish an
image, create a tenant). No filter/list/number. Run one Score or a
fixed composition.
The distinction matters: forcing action subcommands through the
filter/list/number machinery is ceremony; forcing Score-runner
subcommands into a rigid single-action shape is constraining.
### Deploy crates become library-only
Per ADR-023 principle 5, deploy logic lives in `*-deploy` crates. The
unified CLI absorbs the **binaries** — deploy crates lose their
`[[bin]]` entries and become libraries consumed by the top-level
`harmony` binary. The crate boundary stays; the binary boundary goes
away.
```
harmony-fleet-deploy/
Cargo.toml # [lib] only, no [[bin]]
src/
lib.rs # FleetDeployConfig, FleetDeploySecrets, FleetOperatorScore
commands.rs # DeployCommand, PublishCommand (clap Subcommand structs)
```
The top-level `harmony` binary imports `harmony_fleet_deploy::commands`
and wires them into its own `Command` enum.
### Publish logic as Scores
Build/push logic (currently imperative `Command::new("docker")` in
`harmony-fleet-publish`) should be encapsulated in Scores, following
the `Application` trait + feature composition pattern
(`examples/try_rust_webapp` + `PackagingDeployment`). The publish
subcommand becomes a thin CLI wrapper over a Score composition, not
a shell-out script.
This is not `PackagingDeployment` specifically — the operator isn't a
`RustWebapp`. The pattern is the **`Application` trait + feature
composition** model: a typed application description with composable
features (build, push, deploy, monitor).
### Plugin discovery (stage C, deferred)
ADR-023 principle 8 envisions `harmony` discovering `harmony-*`
binaries on `$PATH` (kubectl-style). This is the third-party
extensibility story: a `MyAppScore` author ships a `harmony-myapp`
binary, and `harmony myapp deploy` works without rebuilding the
framework.
**Open question**: is the end state a monolithic binary with
composable subcommands (first-party modules compiled in), or
kubectl-style plugin discovery for everything? Likely both:
first-party modules are compiled-in subcommands (tighter integration,
shared types), third-party modules are discovered plugins (loose
coupling, separate release cycles). The protocol for plugin
communication (env vars, stdin JSON, exit codes) is a separate design
effort.
### TUI integration
`harmony_tui` is a separate crate with its own `run()`. The unified
CLI's `--interactive` global flag delegates to `harmony_tui::run()`
for Score-runner subcommands. Action subcommands may or may not have
TUI equivalents — that's per-subcommand, not global.
### `harmony_composer`
Stays separate for now. It's an infrastructure composition tool with
a different audience (platform engineers building topologies, not
operators deploying apps). May become `harmony compose` later if the
use cases converge.
## Tasks
### 13.1 Rewrite `harmony_cli` — subcommand-aware runner
Replace the flat `Args` struct with a subcommand-aware `Cli` struct.
Global flags move to the top level. The `run()` function accepts a
`Command` enum instead of `Option<Args>`.
```rust
#[derive(Parser)]
struct Cli {
#[arg(long, env = "HARMONY_CONFIG_NAMESPACE", global = true)]
config_namespace: String,
#[arg(long, global = true)]
kubeconfig: Option<String>,
#[arg(long, global = true)]
yes: bool,
#[command(subcommand)]
command: Command,
}
```
**Files**: `harmony_cli/src/lib.rs`, `harmony_cli/src/args.rs` (new)
**Blocked by**: Phase 02 (config migration — so the new CLI is born
on `harmony_config`, not retrofitted)
**Blocks**: 13.2, 13.3
### 13.2 Migrate one deploy binary to subcommand pattern
Proof of concept: `harmony-fleet-deploy` already has `deploy`/`publish`
subcommands. Migrate it to the new `harmony_cli` runner: deploy crate
becomes library-only, exports `Command` enum, top-level binary wires
it in.
**Files**: `fleet/harmony-fleet-deploy/`, new top-level `harmony` binary
**Blocked by**: 13.1
### 13.3 Migrate examples
Each of the 60+ examples currently calls `harmony_cli::run()` with
flat args. Migration: each example becomes a subcommand of the
top-level `harmony` binary, or stays as a standalone binary that
imports the new `harmony_cli` runner.
**Migration shape** (before/after):
```rust
// Before (standalone binary)
fn main() {
harmony_cli::run(Inventory::autoload(), topology, scores, None).await;
}
// After (subcommand of top-level binary)
// In the example's crate:
pub struct MyExampleCommand { /* clap args */ }
impl Subcommand for MyExampleCommand { ... }
// In the top-level binary:
enum Command {
MyExample(MyExampleCommand),
Fleet(FleetCommand),
...
}
```
**Files**: 60+ example crates
**Blocked by**: 13.2 (prove the pattern works on one)
### 13.4 Publish-as-Score
Extract build/push logic from `harmony-fleet-publish` into Scores
following the `Application` trait + feature composition pattern.
The `publish` subcommand becomes a thin wrapper.
**Files**: `harmony/src/modules/application/` (extend), `fleet/harmony-fleet-deploy/`
**Blocked by**: 13.2
### 13.5 Topology selection in the CLI
Global `--topology` flag or auto-detection. Requires Phase 12.6
(topology proliferation / `K8sBareTopology`) to land first — the
CLI's topology selection is simpler if the topology landscape is
clean.
**Blocked by**: Phase 12.6
### 13.6 Plugin discovery protocol (stage C)
Design the protocol for third-party `harmony-*` binaries to
communicate with the top-level `harmony` binary. Env vars for
global args? stdin JSON? Exit codes for outcomes?
**Status**: Research + ADR first. No implementation until the
protocol is locked.
**Blocked by**: 13.5 (first-party subcommands working end-to-end)
## Dependencies
```
Phase 02 (config migration) ──→ 13.1 (CLI rewrite)
Phase 12.6 (topology cleanup) ──→ 13.5 (topology selection)
13.1 ──→ 13.2 (fleet-deploy migration)
13.2 ──→ 13.3 (example migration)
13.2 ──→ 13.4 (publish-as-Score)
13.5 ──→ 13.6 (plugin discovery)
```
Phase 11 (named config instances) can land after the CLI rewrite —
the global `--config-namespace` flag maps directly to
`ConfigClient::for_namespace()`, and named instances
(`get_named::<T>("fw-primary")`) become a CLI concern too.
## ADR-023 Tensions
These need resolution during implementation:
1. **Principle 5 vs. absorbing binaries.** Deploy crates keep their
crate boundary (library + Scores) but lose their `[[bin]]`. The
unified binary is the sole entry point. This is a refinement of
principle 5, not a violation — the deploy logic still lives in
the deploy crate.
2. **Principle 8 monolith vs. plugin.** First-party modules are
compiled-in subcommands. Third-party modules are discovered
plugins. The boundary between "first-party" and "third-party"
needs a clear doctrine (likely: anything in the harmony repo is
first-party; everything else is a plugin).
3. **`harmony_composer` placement.** Stays separate for now. If the
use cases converge with the unified CLI, it becomes `harmony
compose`. Not a blocker.
## References
- ADR-023 principle 8 — CLI: hybrid, staged (B → C)
- ADR-023 principle 5 — deploy logic in `*-deploy` crates
- ADR draft 024 §Q5 — runtime tools in the dependency graph
- `examples/try_rust_webapp``Application` trait + feature composition
- `harmony/src/modules/application/features/packaging_deployment.rs`
build/push as a Score feature
- Phase 02 — config migration (prerequisite)
- Phase 11 — named config instances (parallel)
- Phase 12.6 — topology proliferation (prerequisite for 13.5)

View File

@@ -0,0 +1,134 @@
# Fleet Platform v0.3 — Staging to production-ready
Written 2026-05-31. Picks up after OpenBao + Zitadel + NATS + callout + operator are deployed and functional on staging (2-3 weeks old versions).
## Current state
- [x] OpenBao running at `secrets-stg.cb1.nationtech.io`
- [x] Zitadel running at `sso-stg.cb1.nationtech.io`
- [x] NATS + auth callout deployed in `fleet-staging` namespace
- [x] Operator deployed (older version, 2-3 weeks old)
- [x] Config-driven OpenBao installer (`examples/openbao`)
- [x] `harmony-fleet-deploy` binary reads `FleetDeployConfig` + `FleetDeploySecrets` from OpenBao
## Immediate next steps
### 1. Provision operator credentials in OpenBao
- [ ] Fetch existing creds from the running cluster:
```bash
oc -n fleet-staging get secret harmony-fleet-operator-secrets -o jsonpath='{.data.credentials\.toml}' | base64 -d
```
- [ ] Seed into OpenBao at `secret/data/fleet-staging/FleetDeploySecrets`:
```bash
export VAULT_ADDR=https://secrets-stg.cb1.nationtech.io
export VAULT_TOKEN=<root token>
oc -n fleet-staging get secret harmony-fleet-operator-secrets -o jsonpath='{.data.credentials\.toml}' | base64 -d \
| jq -Rs '{value: ({operator_credentials_toml: .} | tojson)}' \
| bao kv put secret/fleet-staging/FleetDeploySecrets -
```
- [ ] Verify the secret is readable: `bao kv get secret/fleet-staging/FleetDeploySecrets`
### 2. Private repo deploy script
- [ ] Create `.envrc` with minimal env:
```bash
export OPENBAO_URL=https://secrets-stg.cb1.nationtech.io
export HARMONY_CONFIG_NAMESPACE=fleet-staging
# export OPENBAO_TOKEN=<root token for now; SSO later>
```
- [ ] Write deploy invocation (shell script or just `harmony-fleet-deploy` call):
```bash
harmony-fleet-deploy --from-tag harmony-fleet-operator-vX.Y.Z --yes
```
- [ ] Commit `.envrc` + script to private repo (shared with teammates)
### 3. Execute operator upgrade
- [ ] Run the deploy script from the private repo
- [ ] Verify operator pod starts and connects to NATS
- [ ] Verify operator reconciles existing CRs (check logs)
- [ ] Confirm no regression in existing fleet functionality
### 4. Operator UI ingress (trivial)
- [ ] Expose operator UI with TLS ingress on `fleet-stg.<base_domain>`
- [ ] Verify the UI loads and serves the SPA
- [ ] Confirm no auth gate yet (SSO is next)
### 5. SSO login flow
- [ ] Wire operator UI to Zitadel SSO at `sso-stg.<base_domain>`
- [ ] Test login/logout flow end-to-end
- [ ] Verify session persistence across page reloads
- [ ] Confirm RBAC: only authorized Zitadel users can access the UI
### 6. Real data in UI
- [ ] Replace mock device list with live `device-info` KV data
- [ ] Replace mock deployment list with live `Deployment` CR data
- [ ] Wire per-device drilldown to real `DeviceInfo` + last-heartbeat + agent version
- [ ] NATS tail panel: SSE stream of `device-info` and `device-state` updates (plain text)
- [ ] Verify data refreshes without manual reload
## Configuration model
### Environment (minimal, committed in private repo)
```bash
OPENBAO_URL=https://secrets-stg.cb1.nationtech.io
HARMONY_CONFIG_NAMESPACE=fleet-staging
# SSO auth or root token (SSO is the goal)
```
### OpenBao (read via ConfigClient)
- `FleetDeployConfig` (k8s namespaces, NATS URL, chart coords) at `secret/data/fleet-staging/FleetDeployConfig`
- `FleetDeploySecrets` (operator creds) at `secret/data/fleet-staging/FleetDeploySecrets`
## Missing features (post-UI)
### Auth & credentials
- [ ] Per-device OpenBao policies (templated policies, one role per device type)
- [ ] Device identity claim in JWT (Zitadel `client_id` with `device-` prefix)
- [ ] OpenBao JWT auth role granularity (extend `OpenbaoJwtAuth` to list of roles)
- [x] Move k8s namespaces + chart coords into `ConfigClient` config struct (env = only identifier + auth)
### Operator capabilities
- [ ] Agent upgrade path (ADR-022 exists; implementation pending)
- [ ] Device enrollment flow (operator-facing runbook)
- [ ] Revoke device / rotate key operations
- [ ] Fleet-wide rollout strategies (canary, %-based) on top of agent-upgrade primitive
### Observability
- [ ] Operator logs every CR it acquires (verify output reads well)
- [ ] NATS debugging one-liners in hand-off menu
- [ ] Journald log streaming (currently only `.status.aggregate.lastError`)
- [ ] Metrics dashboard (deferred until >100 devices)
### Quality & hardening
- [ ] Agent config-driven labels (`[labels]` in agent toml → DeviceInfo)
- [ ] `matchExpressions` in selectors (currently `matchLabels` only)
- [ ] `Device.status.conditions` populated from heartbeat staleness
- [ ] Operator graceful degradation on bad device_id (log + skip, don't restart-loop)
- [ ] Persist `nats_auth_pass` and issuer NKey via `harmony_secret` (regenerate-every-run footgun)
### Refactors (deferred, non-blocking)
- [ ] Decompose `FleetServerScore` into independent, ConfigClient-glued Scores
- [ ] Move `harmony/modules/fleet/` → `fleet/harmony-fleet/` (ADR-021 pending)
- [ ] Delete `examples/fleet_staging_deploy` (superseded by `fleet_staging_install`)
- [ ] Drop `K8sAnywhereTopology` for ad-hoc Score execution; introduce `K8sBareTopology`
## Principles (carried forward)
- No yaml in framework code paths
- Scores describe desired state; topologies expose capabilities
- Cross-boundary wire types in `harmony-reconciler-contracts`
- Never ship untested code
- Prove claims about upstream before blaming upstream
- Design the brick before moving the brick

View File

@@ -13,6 +13,8 @@ path = "src/main.rs"
[dependencies] [dependencies]
harmony = { path = "../../harmony" } harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" } harmony_cli = { path = "../../harmony_cli" }
harmony_config = { path = "../../harmony_config" }
harmony_secret = { path = "../../harmony_secret" }
harmony-k8s = { path = "../../harmony-k8s" } harmony-k8s = { path = "../../harmony-k8s" }
harmony-nats-callout = { path = "../../nats/callout" } harmony-nats-callout = { path = "../../nats/callout" }
harmony-fleet-deploy = { path = "../../fleet/harmony-fleet-deploy" } harmony-fleet-deploy = { path = "../../fleet/harmony-fleet-deploy" }
@@ -21,5 +23,6 @@ rand = "0.9"
anyhow.workspace = true anyhow.workspace = true
clap = { version = "4", features = ["derive", "env"] } clap = { version = "4", features = ["derive", "env"] }
tokio.workspace = true tokio.workspace = true
log.workspace = true tracing = { workspace = true }
env_logger.workspace = true serde = { workspace = true }
schemars = "0.8"

View File

@@ -1,32 +1,19 @@
//! Production-shape fleet install for OKD (or any cluster with the //! Production-shape fleet install for OKD (or any cluster with the same
//! same capabilities). Composes: //! capabilities): Zitadel SSO + NATS (auth-callout) + operator + OpenBao,
//! composed from Scores.
//! //!
//! 1. Zitadel + Postgres helm install in `--zitadel-namespace`, //! Tunables come from [`ConfigClient`] (`HARMONY_CONFIG_FleetStagingConfig`
//! edge-TLS Route at `sso-staging.<base>` via cert-manager. //! env JSON → OpenBao → interactive prompt), not a bespoke CLI. The only
//! 2. ZitadelSetupScore in the same call so we have the //! flags are `harmony_cli`'s: `--filter`/`--list`/`-y` select which workload
//! `fleet-operator` machine key BEFORE the operator pod starts. //! Scores to (re)deploy — e.g. `--filter FleetOperatorScore` bumps the
//! 3. Single-instance NATS (JetStream) in `--fleet-namespace` with //! operator without touching NATS or the callout.
//! the auth_callout block wired to the callout's issuer NKey
//! pubkey + WebSocket listener (no_tls — Route owns TLS).
//! 4. NATS WebSocket Route at `nats-fleet-staging.<base>`,
//! edge-TLS, cert-manager-managed cert.
//! 5. NatsAuthCalloutScore deployment (Secret-based env vars only,
//! no volume mounts — OKD restricted-v2 SCC compat).
//! 6. FleetOperatorScore with credentials TOML inlining the
//! `fleet-operator` JSON keyfile (env-var-from-Secret only).
//! //!
//! One required CLI flag — `--base-domain` — drives every public //! Zitadel + OpenBao are an idempotent bootstrap: ZitadelSetupScore mints the
//! hostname. Per-cluster overrides for the cluster issuer name and //! `project_id` + `fleet-operator` machine key that the callout and operator
//! image refs follow. //! Scores consume, so it must converge (and cache to disk) before they're
//! //! built. That data flow is why those two can't sit in the filterable batch.
//! Usage:
//! use std::sync::Arc;
//! ```text
//! KUBECONFIG=$ADMIN_KUBECONFIG cargo run -p example_fleet_staging_install -- \
//! --base-domain cb1.nationtech.io \
//! --operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev \
//! --callout-image hub.nationtech.io/harmony/harmony-nats-callout:dev
//! ```
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use clap::Parser; use clap::Parser;
@@ -34,390 +21,349 @@ use harmony::inventory::Inventory;
use harmony::modules::nats::capability::NatsCluster; use harmony::modules::nats::capability::NatsCluster;
use harmony::modules::nats::score_nats_k8s::{AuthCalloutCfg, NatsK8sScore, WebSocketRouteCfg}; use harmony::modules::nats::score_nats_k8s::{AuthCalloutCfg, NatsK8sScore, WebSocketRouteCfg};
use harmony::modules::nats_auth_callout::NatsAuthCalloutScore; use harmony::modules::nats_auth_callout::NatsAuthCalloutScore;
use harmony::modules::openbao::{
OpenbaoInstance, OpenbaoPolicy, OpenbaoScore, OpenbaoSetupScore, cached_root_token,
};
use harmony::modules::zitadel::{ use harmony::modules::zitadel::{
MachineKeyType, ZitadelApiApp, ZitadelAppType, ZitadelApplication, ZitadelClientConfig, MachineKeyType, ZitadelApiApp, ZitadelAppType, ZitadelApplication, ZitadelClientConfig,
ZitadelMachineUser, ZitadelRole, ZitadelScore, ZitadelSetupScore, ZitadelMachineUser, ZitadelRole, ZitadelScore, ZitadelSetupScore,
}; };
use harmony::score::Score; use harmony::score::Score;
use harmony::topology::{K8sAnywhereTopology, Topology}; use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology};
use harmony_fleet_deploy::{FleetOperatorScore, OperatorCredentials}; use harmony_config::{Config, ConfigClient, StoreSource};
use harmony_fleet_deploy::{FleetDeploySecrets, FleetOperatorScore, OperatorCredentials};
use harmony_k8s::KubernetesDistribution; use harmony_k8s::KubernetesDistribution;
use harmony_secret::OpenbaoSecretStore;
use nkeys::KeyPair; use nkeys::KeyPair;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use tracing::info;
#[derive(Parser, Debug)] /// Non-secret install tunables. `base_domain` drives every public hostname;
#[command( /// the image refs and `*-stg.<base>` hosts have no safe default, so an empty
name = "fleet_staging_install", /// value is rejected at startup. Everything else defaults to the staging
about = "Install fleet staging stack (Zitadel + NATS + callout + operator) on OKD" /// conventions.
)] #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Config)]
struct Cli { #[serde(default)]
/// Cluster's public base domain. Hostnames are derived from it: struct FleetStagingConfig {
/// sso-staging.<base> ← Zitadel
/// nats-fleet-staging.<base> ← NATS WebSocket
///
/// To deploy on a different cluster, change this and re-run.
#[arg(long)]
base_domain: String, base_domain: String,
/// cert-manager `ClusterIssuer` name. Drives the
/// `cert-manager.io/cluster-issuer` annotation on the Zitadel
/// and NATS Routes. Override per cluster if your operator uses
/// a different issuer name.
#[arg(long, default_value = "letsencrypt-prod")]
cluster_issuer: String,
/// Namespace for NATS, callout, operator.
#[arg(long, default_value = "fleet-staging")]
fleet_namespace: String,
/// Namespace for Zitadel + Postgres.
#[arg(long, default_value = "zitadel-staging")]
zitadel_namespace: String,
/// Operator container image (`repository:tag`). Public on
/// hub.nationtech.io for the demo; ImagePullSecret for that
/// registry must already be present in `--fleet-namespace`.
#[arg(long)]
operator_image: String, operator_image: String,
/// Auth callout container image (`repository:tag`).
#[arg(long)]
callout_image: String, callout_image: String,
cluster_issuer: String,
/// NATS account name auth-callout-issued users land in. Must fleet_namespace: String,
/// match the NATS Helm `auth_callout.account` field. Default zitadel_namespace: String,
/// `FLEET` matches the rest of the staging conventions.
#[arg(long, default_value = "FLEET")]
nats_account: String, nats_account: String,
/// Zitadel chart version pin.
#[arg(long, default_value = "v4.12.1")]
zitadel_version: String, zitadel_version: String,
/// Project name created inside Zitadel for fleet auth.
#[arg(long, default_value = "fleet")]
project_name: String, project_name: String,
/// Role name granting full admin (operator + manual ops). The
/// callout maps this role to `pub/sub: [">"]`.
#[arg(long, default_value = "fleet-admin")]
admin_role: String, admin_role: String,
/// Role name granting per-device scoped permissions.
#[arg(long, default_value = "device")]
device_role: String, device_role: String,
/// Username of the operator's Zitadel machine user. Distinct
/// from `fleet-ops` (manual admin tooling) for audit trail.
#[arg(long, default_value = "fleet-operator")]
operator_username: String, operator_username: String,
/// Username of the manual-admin Zitadel machine user (the one
/// you mint tokens with from your laptop).
#[arg(long, default_value = "fleet-ops")]
admin_username: String, admin_username: String,
} }
impl Default for FleetStagingConfig {
fn default() -> Self {
Self {
base_domain: String::new(),
operator_image: String::new(),
callout_image: String::new(),
cluster_issuer: "letsencrypt-prod".to_string(),
fleet_namespace: "fleet-staging".to_string(),
zitadel_namespace: "zitadel-staging".to_string(),
nats_account: "FLEET".to_string(),
zitadel_version: "v4.12.1".to_string(),
project_name: "fleet".to_string(),
admin_role: "fleet-admin".to_string(),
device_role: "device".to_string(),
operator_username: "fleet-operator".to_string(),
admin_username: "fleet-ops".to_string(),
}
}
}
#[tokio::main] #[tokio::main]
async fn main() -> Result<()> { async fn main() -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) harmony_cli::cli_logger::init();
.try_init() let args = harmony_cli::Args::parse();
.ok();
let cfg: FleetStagingConfig = ConfigClient::for_namespace("harmony")
.await
.get_or_prompt()
.await
.context("loading FleetStagingConfig")?;
anyhow::ensure!(
!cfg.base_domain.is_empty()
&& !cfg.operator_image.is_empty()
&& !cfg.callout_image.is_empty(),
"base_domain, operator_image and callout_image must be set"
);
let cli = Cli::parse();
let topology = K8sAnywhereTopology::from_env(); let topology = K8sAnywhereTopology::from_env();
topology.ensure_ready().await?; topology.ensure_ready().await?;
let zitadel_host = format!("sso-stg.{}", cli.base_domain); let zitadel_host = format!("sso-stg.{}", cfg.base_domain);
let nats_ws_host = format!("nats-fleet-stg.{}", cli.base_domain); let nats_ws_host = format!("nats-fleet-stg.{}", cfg.base_domain);
let secrets_host = format!("secrets-stg.{}", cfg.base_domain);
let nats_release = "fleet-nats";
let cli_app_name = "harmony-cli";
// ---- 1. Zitadel helm install ---------------------------------------- // ---- Bootstrap (idempotent): Zitadel mints the project + operator key ---
let zitadel = ZitadelScore { ZitadelScore {
host: zitadel_host.clone(), host: zitadel_host.clone(),
zitadel_version: cli.zitadel_version.clone(), zitadel_version: cfg.zitadel_version.clone(),
external_secure: true, external_secure: true,
external_port: None, external_port: None,
namespace: cli.zitadel_namespace.clone(), namespace: cfg.zitadel_namespace.clone(),
cluster_issuer: cli.cluster_issuer.clone(), cluster_issuer: cfg.cluster_issuer.clone(),
..Default::default() ..Default::default()
}; }
log::info!( .interpret(&Inventory::empty(), &topology)
"[1/6] Zitadel helm: ns={} host={}", .await
cli.zitadel_namespace, .context("Zitadel helm install")?;
zitadel_host
);
zitadel
.interpret(&Inventory::empty(), &topology)
.await
.context("Zitadel helm install")?;
// ---- 2. ZitadelSetupScore: project + roles + machine users ---------- ZitadelSetupScore {
// Run this BEFORE building the operator score so we have the
// `fleet-operator` machine key in hand when filling
// OperatorCredentials. The Score caches keys to
// ZitadelClientConfig on disk; we read them back here.
log::info!(
"[2/6] Zitadel setup: project={} admin={} operator={}",
cli.project_name,
cli.admin_username,
cli.operator_username
);
let api_app_name = "nats";
let cli_app_name = "harmony-cli";
let zitadel_setup = ZitadelSetupScore {
host: zitadel_host.clone(), host: zitadel_host.clone(),
scheme: Default::default(), scheme: Default::default(),
port: None, port: None,
skip_tls: false, skip_tls: false,
endpoint: None, endpoint: None,
admin_org_id: None, admin_org_id: None,
namespace: cli.zitadel_namespace.clone(), namespace: cfg.zitadel_namespace.clone(),
// Device-code OIDC app for human admin login from // Device-code OIDC app for human admin login from
// `fleet_device_enroll`'s SSO flow. Operators sign in here // `fleet_device_enroll`'s SSO flow. The numeric `client_id` Zitadel
// with their personal Zitadel account; their resulting // generates is read back below and printed for `--admin-oidc-client-id`.
// access token is what `mint_device_credentials` uses to
// create per-device users + keys. The numeric `client_id`
// generated by Zitadel for this app is what gets passed to
// `--admin-oidc-client-id`; we read it back from the
// ZitadelClientConfig cache below and print it in the
// success banner.
applications: vec![ZitadelApplication { applications: vec![ZitadelApplication {
project_name: cli.project_name.clone(), project_name: cfg.project_name.clone(),
app_name: cli_app_name.to_string(), app_name: cli_app_name.to_string(),
app_type: ZitadelAppType::DeviceCode, app_type: ZitadelAppType::DeviceCode,
}], }],
api_apps: vec![ZitadelApiApp { api_apps: vec![ZitadelApiApp {
project_name: cli.project_name.clone(), project_name: cfg.project_name.clone(),
app_name: api_app_name.to_string(), app_name: "nats".to_string(),
}], }],
roles: vec![ roles: vec![
ZitadelRole { ZitadelRole {
project_name: cli.project_name.clone(), project_name: cfg.project_name.clone(),
key: cli.admin_role.clone(), key: cfg.admin_role.clone(),
display_name: "Fleet Admin".to_string(), display_name: "Fleet Admin".to_string(),
group: None, group: None,
}, },
ZitadelRole { ZitadelRole {
project_name: cli.project_name.clone(), project_name: cfg.project_name.clone(),
key: cli.device_role.clone(), key: cfg.device_role.clone(),
display_name: "Device".to_string(), display_name: "Device".to_string(),
group: None, group: None,
}, },
], ],
machine_users: vec![ machine_users: vec![
ZitadelMachineUser { ZitadelMachineUser {
username: cli.admin_username.clone(), username: cfg.admin_username.clone(),
name: "Fleet Operations".to_string(), name: "Fleet Operations".to_string(),
create_pat: false, create_pat: false,
machine_key: Some(MachineKeyType::Json), machine_key: Some(MachineKeyType::Json),
project_name: Some(cli.project_name.clone()), project_name: Some(cfg.project_name.clone()),
grant_roles: vec![cli.admin_role.clone()], grant_roles: vec![cfg.admin_role.clone()],
}, },
ZitadelMachineUser { ZitadelMachineUser {
username: cli.operator_username.clone(), username: cfg.operator_username.clone(),
name: "Fleet Operator (in-cluster)".to_string(), name: "Fleet Operator (in-cluster)".to_string(),
create_pat: false, create_pat: false,
machine_key: Some(MachineKeyType::Json), machine_key: Some(MachineKeyType::Json),
project_name: Some(cli.project_name.clone()), project_name: Some(cfg.project_name.clone()),
grant_roles: vec![cli.admin_role.clone()], grant_roles: vec![cfg.admin_role.clone()],
}, },
], ],
}; }
zitadel_setup .interpret(&Inventory::empty(), &topology)
.interpret(&Inventory::empty(), &topology) .await
.await .context("Zitadel setup (project + roles + machine users)")?;
.context("Zitadel setup (project + roles + machine users)")?;
// Read back the project_id + operator key from cache. // Read back the project_id + operator key + device-code client_id.
let zcfg = ZitadelClientConfig::load() let zcfg = ZitadelClientConfig::load()
.context("ZitadelSetupScore did not produce a client config cache")?; .context("ZitadelSetupScore did not produce a client config cache")?;
let project_id = zcfg let project_id = zcfg
.project_id_by_name(&cli.project_name) .project_id_by_name(&cfg.project_name)
.or(zcfg.project_id.as_ref()) .or(zcfg.project_id.as_ref())
.context("project_id missing from cache after setup")? .context("project_id missing from cache after setup")?
.clone(); .clone();
let operator_machine_key = zcfg let operator_machine_key = zcfg
.machine_key(&cli.operator_username) .machine_key(&cfg.operator_username)
.with_context(|| { .with_context(|| {
format!( format!(
"machine key for {} missing from cache after setup", "machine key for {} missing from cache",
cli.operator_username cfg.operator_username
) )
})? })?
.clone(); .clone();
let cli_client_id = zcfg let cli_client_id = zcfg
.client_id(cli_app_name) .client_id(cli_app_name)
.with_context(|| { .with_context(|| format!("OIDC client_id for app '{cli_app_name}' missing from cache"))?
format!(
"OIDC client_id for app '{cli_app_name}' missing from cache — \
ZitadelSetupScore should have created the app and populated \
ZitadelClientConfig.apps"
)
})?
.clone(); .clone();
log::info!("[2/6] project_id resolved: {project_id}");
log::info!("[2/6] device-code client_id for '{cli_app_name}' resolved: {cli_client_id}");
// ---- 3. Issuer NKey + auth callout pieces --------------------------- // ---- OpenBao: deploy + policy, co-located in the fleet namespace --------
// The callout signs user JWTs with this account NKey. NATS server // The operator's credentials are seeded here so a later
// is configured with the matching pubkey via the auth_callout // `harmony-fleet-deploy --from-tag <tag>` upgrades the operator alone.
// block in the helm values rendered by NatsK8sScore. let openbao = OpenbaoInstance {
namespace: cfg.fleet_namespace.clone(),
release: "openbao".to_string(),
};
OpenbaoScore {
instance: openbao.clone(),
host: secrets_host.clone(),
openshift: true,
tls_issuer: Some(cfg.cluster_issuer.clone()),
}
.interpret(&Inventory::empty(), &topology)
.await
.context("OpenBao deploy")?;
OpenbaoSetupScore {
instance: openbao.clone(),
policies: vec![OpenbaoPolicy {
name: "fleet-deployer".to_string(),
hcl: r#"path "secret/data/harmony/*" { capabilities = ["read"] }
path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"#
.to_string(),
}],
..Default::default()
}
.interpret(&Inventory::empty(), &topology)
.await
.context("OpenBao setup")?;
// ---- Workload Scores: filterable via `harmony_cli::Args` ----------------
// The callout signs user JWTs with this account NKey; NATS is configured
// with the matching pubkey via the auth_callout block in its helm values.
let issuer_kp = KeyPair::new_account(); let issuer_kp = KeyPair::new_account();
let issuer_seed = issuer_kp let issuer_seed = issuer_kp
.seed() .seed()
.map_err(|e| anyhow::anyhow!("issuer NKey seed: {e}"))?; .map_err(|e| anyhow::anyhow!("issuer NKey seed: {e}"))?;
let issuer_pubkey = issuer_kp.public_key();
let nats_auth_user = "auth"; let nats_auth_user = "auth";
let nats_auth_pass = generate_alphanum(24); let nats_auth_pass = generate_alphanum(24);
let nats_url = format!(
// ---- 4. NATS install ------------------------------------------------ "nats://{nats_release}.{}.svc.cluster.local:4222",
let nats_release = "fleet-nats"; cfg.fleet_namespace
log::info!(
"[3/6] NATS install: ns={} release={} ws={}",
cli.fleet_namespace,
nats_release,
nats_ws_host
); );
let nats_cluster = NatsCluster {
namespace: cli.fleet_namespace.clone(),
// `domain` is unused in single-instance mode (gateway off).
// Kept here for the legacy supercluster code path which the
// staging install doesn't take.
domain: cli.base_domain.clone(),
replicas: 1,
name: nats_release.to_string(),
gateway_advertise: String::new(),
dns_name: nats_ws_host.clone(),
// Static-string fields the NatsCluster shape requires; only
// referenced when `gateway` is Some, which it isn't here.
supercluster_ca_secret_name: "fleet-nats-supercluster-ca",
tls_cert_name: "fleet-nats-tls",
jetstream_enabled: "true",
};
let nats = NatsK8sScore { let nats = NatsK8sScore {
distribution: KubernetesDistribution::OpenshiftFamily, distribution: KubernetesDistribution::OpenshiftFamily,
cluster: nats_cluster, cluster: NatsCluster {
namespace: cfg.fleet_namespace.clone(),
// `domain` and the static-string fields below are only read in the
// supercluster path (gateway Some), which staging doesn't take.
domain: cfg.base_domain.clone(),
replicas: 1,
name: nats_release.to_string(),
gateway_advertise: String::new(),
dns_name: nats_ws_host.clone(),
supercluster_ca_secret_name: "fleet-nats-supercluster-ca",
tls_cert_name: "fleet-nats-tls",
jetstream_enabled: "true",
},
peers: None, peers: None,
ca_bundle: None, ca_bundle: None,
gateway: None, // single-instance — drop the gateway block gateway: None,
auth_callout: Some(AuthCalloutCfg { auth_callout: Some(AuthCalloutCfg {
issuer_pubkey: issuer_pubkey.clone(), issuer_pubkey: issuer_kp.public_key(),
auth_user: nats_auth_user.to_string(), auth_user: nats_auth_user.to_string(),
auth_pass: nats_auth_pass.clone(), auth_pass: nats_auth_pass.clone(),
account: cli.nats_account.clone(), account: cfg.nats_account.clone(),
}), }),
websocket: Some(WebSocketRouteCfg { websocket: Some(WebSocketRouteCfg {
host: nats_ws_host.clone(), host: nats_ws_host.clone(),
cluster_issuer: cli.cluster_issuer.clone(), cluster_issuer: cfg.cluster_issuer.clone(),
}), }),
}; };
nats.interpret(&Inventory::empty(), &topology)
.await
.context("NATS install (single-instance + auth_callout + WS Route)")?;
// ---- 5. Auth callout deployment -------------------------------------
log::info!(
"[4/6] Auth callout: image={} project_id={}",
cli.callout_image,
project_id
);
let mut callout = NatsAuthCalloutScore::new( let mut callout = NatsAuthCalloutScore::new(
"fleet-callout", "fleet-callout",
&cli.fleet_namespace, &cfg.fleet_namespace,
format!( nats_url.clone(),
"nats://{nats_release}.{}.svc.cluster.local:4222",
cli.fleet_namespace
),
format!("https://{zitadel_host}"), format!("https://{zitadel_host}"),
project_id.clone(), project_id.clone(),
nats_auth_user, nats_auth_user,
&nats_auth_pass, &nats_auth_pass,
&issuer_seed, &issuer_seed,
) )
.image(&cli.callout_image) .image(&cfg.callout_image)
.target_account(&cli.nats_account) .target_account(&cfg.nats_account)
.admin_role(&cli.admin_role) .admin_role(&cfg.admin_role)
.device_role(&cli.device_role) .device_role(&cfg.device_role)
.danger_accept_invalid_certs(false); .danger_accept_invalid_certs(false);
callout.device_id_claim = "client_id".to_string(); callout.device_id_claim = "client_id".to_string();
callout.device_id_prefix_strip = "device-".to_string(); callout.device_id_prefix_strip = "device-".to_string();
callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles"); callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles");
callout
.interpret(&Inventory::empty(), &topology)
.await
.context("auth callout deploy")?;
// ---- 6. Operator deployment with credentials ------------------------ let credentials = OperatorCredentials::zitadel_jwt(
log::info!("[5/6] Operator: image={}", cli.operator_image); &format!("https://{zitadel_host}"),
// `key_json` MUST use TOML literal multi-line strings (`'''...'''`), &project_id,
// not basic multi-line (`"""..."""`). Basic strings interpret &operator_machine_key,
// backslash escapes, which corrupts the JSON keyfile: every `\n`
// inside the embedded RSA private key gets expanded to a literal
// newline (0x0A) before JSON parsing sees it, and JSON disallows
// raw control chars inside strings ("control character found while
// parsing a string"). Literal strings preserve `\n` as-is so the
// downstream JSON parser interprets it as an escape and decodes
// the multi-line PEM correctly.
let credentials_toml = format!(
r#"type = "zitadel-jwt"
oidc_issuer_url = "https://{zitadel_host}"
audience = "{project_id}"
key_json = '''{operator_key}'''
"#,
zitadel_host = zitadel_host,
project_id = project_id,
operator_key = operator_machine_key,
); );
let mut operator = FleetOperatorScore::new() let mut operator = FleetOperatorScore::new()
.namespace(&cli.fleet_namespace) .namespace(&cfg.fleet_namespace)
.release_name("harmony-fleet-operator") .release_name("harmony-fleet-operator")
.image(&cli.operator_image) .image(&cfg.operator_image)
.image_pull_policy("Always") .image_pull_policy("Always")
.nats_url(format!( .nats_url(nats_url.clone())
"nats://{nats_release}.{}.svc.cluster.local:4222",
cli.fleet_namespace
))
.log_level("info,kube_runtime=warn"); .log_level("info,kube_runtime=warn");
operator.credentials = Some(OperatorCredentials { credentials_toml }); operator.credentials = Some(credentials.clone());
operator
.interpret(&Inventory::empty(), &topology)
.await
.context("operator deploy")?;
log::info!("[6/6] Stack installed."); let scores: Vec<Box<dyn Score<K8sAnywhereTopology>>> =
println!("\n=== fleet-staging install complete ==="); vec![Box::new(nats), Box::new(callout), Box::new(operator)];
println!("Zitadel: https://{zitadel_host}/"); harmony_cli::run(Inventory::empty(), topology.clone(), scores, Some(args))
println!("NATS WS public: wss://{nats_ws_host}/"); .await
println!( .map_err(|e| anyhow::anyhow!("{e}"))?;
"NATS in-cluster: nats://{nats_release}.{}.svc.cluster.local:4222",
cli.fleet_namespace // ---- Seed operator credentials as FleetDeploySecrets --------------------
// Reached via port-forward with the cached root token, so it doesn't wait
// on the public route/cert. No kubeconfig — CD callers use their own context.
let k8s = topology
.k8s_client()
.await
.map_err(|e| anyhow::anyhow!(e))?;
let pf = k8s
.port_forward(&openbao.pod(), &openbao.namespace, 8200, 8200)
.await
.context("port-forward to OpenBao")?;
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
let store = OpenbaoSecretStore::new(
format!("http://127.0.0.1:{}", pf.port()),
"secret".to_string(),
"token".to_string(),
true,
Some(cached_root_token(&openbao).map_err(|e| anyhow::anyhow!(e))?),
None,
None,
None,
None,
None,
None,
)
.await
.context("OpenBao client")?;
ConfigClient::new(vec![
Arc::new(StoreSource::new("harmony".to_string(), store))
as Arc<dyn harmony_config::ConfigSource>,
])
.set(&FleetDeploySecrets {
operator_credentials_toml: credentials.credentials_toml.clone(),
kubeconfig: None,
})
.await
.context("seed FleetDeploySecrets")?;
info!("=== fleet-staging install complete ===");
info!(
"Zitadel: https://{zitadel_host}/ (admin user {})",
cfg.admin_username
); );
println!( info!("NATS WS public: wss://{nats_ws_host}/");
"Operator: oc -n {} get deploy/harmony-fleet-operator", info!("OpenBao: https://{secrets_host}/");
cli.fleet_namespace info!("Project id: {project_id}");
); info!("SSO client_id: {cli_client_id} (app '{cli_app_name}', device-code grant)");
println!( info!(
"Auth callout: oc -n {} get deploy/fleet-callout", "Enroll a device: fleet_device_enroll --target ssh://<user>@<device> \
cli.fleet_namespace --issuer-url https://{zitadel_host} --audience {project_id} \
); --nats-url wss://{nats_ws_host} --admin-oidc-client-id {cli_client_id} \
println!("Project id: {project_id}");
println!(
"Admin user: {} (machine key in ~/.local/share/harmony/zitadel/client-config.json)",
cli.admin_username
);
println!(
"Operator user: {} (machine key embedded in operator's Secret)",
cli.operator_username
);
println!("SSO client_id: {cli_client_id} (app '{cli_app_name}', device-code grant)");
println!();
println!("To enroll a device, pass the SSO client_id explicitly:");
println!(
" fleet_device_enroll \\\n \
--target ssh://<user>@<device> \\\n \
--issuer-url https://{zitadel_host} \\\n \
--audience {project_id} \\\n \
--nats-url wss://{nats_ws_host} \\\n \
--admin-oidc-client-id {cli_client_id} \\\n \
--agent-binary <path>" --agent-binary <path>"
); );

View File

@@ -8,7 +8,10 @@ license.workspace = true
[dependencies] [dependencies]
harmony = { path = "../../harmony" } harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" } harmony_cli = { path = "../../harmony_cli" }
harmony_macros = { path = "../../harmony_macros" } harmony_config = { path = "../../harmony_config" }
harmony_types = { path = "../../harmony_types" }
tokio.workspace = true tokio.workspace = true
url.workspace = true anyhow.workspace = true
clap = { version = "4", features = ["derive"] }
serde = { workspace = true }
schemars = "0.8"
tracing = { workspace = true }

View File

@@ -1,7 +1,36 @@
To install an openbao instance with harmony simply `cargo run -p example-openbao` . # example-openbao
Depending on your environement configuration, it will either install a k3d cluster locally and deploy on it, or install to a remote cluster. Installs a standalone OpenBao instance and makes it immediately usable as a
`harmony_config` store: deploy → init → unseal → KV v2. Depending on your
environment it either spins up a local k3d cluster or targets the remote
cluster `KUBECONFIG` points at.
Then follow the openbao documentation to initialize and unseal, this will make openbao usable. Configuration comes from `ConfigClient` (`HARMONY_CONFIG_OpenbaoInstallConfig`
env JSON → OpenBao → interactive prompt). The only required field is `host`.
https://openbao.org/docs/platform/k8s/helm/run/ ```bash
# Non-interactive: provide the config as JSON.
export HARMONY_CONFIG_OpenbaoInstallConfig='{
"host": "secrets-stg.cb1.nationtech.io",
"namespace": "openbao",
"release": "openbao",
"openshift": true,
"tls_issuer": "letsencrypt-prod"
}'
cargo run -p example-openbao -- --yes
```
`cargo run -p example-openbao -- --list` lists the scores without touching the
cluster. Run without `HARMONY_CONFIG_*` to be prompted for each field.
Optional features compose from config presence:
| Config field(s) | Effect |
|---------------------------------|------------------------------------------------------------|
| `tls_issuer` | cert-manager edge TLS on the ingress (omit for plain HTTP) |
| `oidc_issuer` + `oidc_audience` | JWT auth + a `harmony` role scoped to `secret/harmony/*` |
After it runs, point `harmony_config` at it with `OPENBAO_URL=https://<host>`
and `OPENBAO_TOKEN=<cached root token>` (the root token is at
`~/.local/share/harmony/openbao/unseal-keys.json`). Once `oidc_*` is set, SSO
callers can authenticate via `HARMONY_SSO_*` instead of the root token.

View File

@@ -1,22 +1,140 @@
use harmony::{ //! Standalone OpenBao installer, configured entirely from
inventory::Inventory, modules::openbao::OpenbaoScore, topology::K8sAnywhereTopology, //! [`ConfigClient`] (`HARMONY_CONFIG_OpenbaoInstallConfig` env JSON →
//! OpenBao → interactive prompt). Deploys the chart, then initializes,
//! unseals, and enables KV v2 — so the result is immediately usable as a
//! `harmony_config` store (point `OPENBAO_URL` at the ingress host and
//! `OPENBAO_TOKEN` at the cached root token).
//!
//! Optional features compose purely from config presence — nothing is wired
//! unless its inputs are set:
//! - `tls_issuer` → cert-manager edge TLS on the ingress.
//! - `oidc_issuer` + `oidc_audience` → JWT auth method plus a `harmony`
//! role scoped to the `harmony-config` policy, letting SSO callers
//! (e.g. `harmony_config` via `HARMONY_SSO_*`) read/write
//! `secret/harmony/*` without the root token.
//!
//! This Score knows nothing about Zitadel: the OIDC issuer/audience are plain
//! config strings, so any OIDC provider works.
use anyhow::{Context, Result};
use clap::Parser;
use harmony::inventory::Inventory;
use harmony::modules::openbao::{
OpenbaoInstance, OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoScore, OpenbaoSetupScore,
}; };
use harmony::score::Score;
use harmony::topology::K8sAnywhereTopology;
use harmony_config::{Config, ConfigClient};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use tracing::info;
/// Policy granting read/write to the `harmony_config` store path. Bound to the
/// JWT `harmony` role when OIDC is configured.
const HARMONY_CONFIG_POLICY: &str = "harmony-config";
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Config)]
#[serde(default)]
struct OpenbaoInstallConfig {
/// Public ingress host (e.g. `secrets-stg.<base-domain>`). Required.
host: String,
/// Helm release name + namespace the StatefulSet lives in.
namespace: String,
release: String,
/// OKD/OpenShift target — flips the chart's SCC-aware values.
openshift: bool,
/// cert-manager `ClusterIssuer` for edge TLS. Empty → plain HTTP ingress.
tls_issuer: String,
/// OIDC issuer URL for JWT auth (e.g. `https://sso-stg.<base>`). Empty,
/// or `oidc_audience` empty, disables JWT auth entirely.
oidc_issuer: String,
/// Expected `aud` for JWT auth (the OIDC project/client id).
oidc_audience: String,
}
impl Default for OpenbaoInstallConfig {
fn default() -> Self {
Self {
host: String::new(),
namespace: "openbao".to_string(),
release: "openbao".to_string(),
openshift: false,
tls_issuer: String::new(),
oidc_issuer: String::new(),
oidc_audience: String::new(),
}
}
}
#[tokio::main] #[tokio::main]
async fn main() { async fn main() -> Result<()> {
let openbao = OpenbaoScore { harmony_cli::cli_logger::init();
instance: Default::default(), let args = harmony_cli::Args::parse();
host: "openbao.sebastien.sto1.nationtech.io".to_string(), let config = ConfigClient::for_namespace("harmony").await;
openshift: false,
tls_issuer: None, let cfg: OpenbaoInstallConfig = config
.get_or_prompt()
.await
.context("loading OpenbaoInstallConfig")?;
info!("Got full config {cfg:?}");
anyhow::ensure!(
!cfg.host.is_empty(),
"host must be set (e.g. secrets-stg.<base>)"
);
let instance = OpenbaoInstance {
namespace: cfg.namespace.clone(),
release: cfg.release.clone(),
}; };
let deploy = OpenbaoScore {
instance: instance.clone(),
host: cfg.host.clone(),
openshift: cfg.openshift,
tls_issuer: (!cfg.tls_issuer.is_empty()).then(|| cfg.tls_issuer.clone()),
};
// JWT auth composes in only when both issuer and audience are set; it
// pulls in the harmony-config policy so the role has something to grant.
let jwt_auth =
(!cfg.oidc_issuer.is_empty() && !cfg.oidc_audience.is_empty()).then(|| OpenbaoJwtAuth {
oidc_discovery_url: cfg.oidc_issuer.clone(),
bound_issuer: cfg.oidc_issuer.clone(),
role_name: "harmony".to_string(),
bound_audiences: cfg.oidc_audience.clone(),
user_claim: "sub".to_string(),
policies: vec![HARMONY_CONFIG_POLICY.to_string()],
ttl: "1h".to_string(),
max_ttl: "8h".to_string(),
});
let policies = if jwt_auth.is_some() {
vec![OpenbaoPolicy {
name: HARMONY_CONFIG_POLICY.to_string(),
hcl: r#"path "secret/data/harmony/*" { capabilities = ["create","read","update","delete"] }
path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"#
.to_string(),
}]
} else {
vec![]
};
let setup = OpenbaoSetupScore {
instance,
kv_mount: "secret".to_string(),
policies,
users: vec![],
jwt_auth,
};
let scores: Vec<Box<dyn Score<K8sAnywhereTopology>>> = vec![Box::new(deploy), Box::new(setup)];
harmony_cli::run( harmony_cli::run(
Inventory::autoload(), Inventory::empty(),
K8sAnywhereTopology::from_env(), K8sAnywhereTopology::from_env(),
vec![Box::new(openbao)], scores,
None, Some(args),
) )
.await .await
.unwrap(); .map_err(|e| anyhow::anyhow!("{e}"))
} }

View File

@@ -20,11 +20,11 @@ Laptop fallback (does exactly what the workflow's job does):
```sh ```sh
# docker + helm must be logged in to hub.nationtech.io first. # docker + helm must be logged in to hub.nationtech.io first.
cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-release -- \ cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-publish -- \
--from-tag harmony-fleet-operator-v0.0.2 --from-tag harmony-fleet-operator-v0.0.2
# build + package only, no push (local k3d smoke-test): # build + package only, no push (local k3d smoke-test):
cargo run -p harmony-fleet-deploy --bin harmony-fleet-release -- \ cargo run -p harmony-fleet-deploy --bin harmony-fleet-publish -- \
--from-tag harmony-fleet-operator-v0.0.2 --no-push --from-tag harmony-fleet-operator-v0.0.2 --no-push
``` ```
@@ -42,7 +42,7 @@ harmony-fleet-deploy --filter FleetOperatorScore \
``` ```
It installs the published It installs the published
`oci://hub.nationtech.io/harmony/harmony-fleet-operator:<version>` chart; `oci://hub.nationtech.io/harmony/harmony-fleet-operator-chart:<version>` chart;
the version is parsed from the tag in Rust (the tag is the only source the version is parsed from the tag in Rust (the tag is the only source
of truth). Same command bootstraps and upgrades; re-running the same tag of truth). Same command bootstraps and upgrades; re-running the same tag
is a no-op. Auth is Zitadel-SSO-only: the operator gets its zitadel-jwt is a no-op. Auth is Zitadel-SSO-only: the operator gets its zitadel-jwt

View File

@@ -15,11 +15,11 @@ path = "src/lib.rs"
name = "harmony-fleet-deploy" name = "harmony-fleet-deploy"
path = "src/main.rs" path = "src/main.rs"
# `harmony-fleet-release --from-tag <tag>` builds + publishes the # `harmony-fleet-publish --from-tag <tag>` builds + publishes the
# operator's image + chart for a release. # operator's image + chart for a release.
[[bin]] [[bin]]
name = "harmony-fleet-release" name = "harmony-fleet-publish"
path = "src/bin/harmony-fleet-release.rs" path = "src/bin/harmony-fleet-publish.rs"
[dependencies] [dependencies]
harmony = { path = "../../harmony", features = ["podman"] } harmony = { path = "../../harmony", features = ["podman"] }

View File

@@ -1,4 +1,4 @@
//! `harmony-fleet-release` — build + publish the operator image + chart //! `harmony-fleet-publish` — build + publish the operator image + chart
//! for a tagged release. `docker` / `helm` must be on PATH and logged in //! for a tagged release. `docker` / `helm` must be on PATH and logged in
//! to the registry (CI's login actions; dev's manual login). //! to the registry (CI's login actions; dev's manual login).
@@ -8,7 +8,7 @@ use harmony_fleet_deploy::release::{release_operator, version_from_tag};
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command( #[command(
name = "harmony-fleet-release", name = "harmony-fleet-publish",
about = "Build + publish the operator image + chart for a tagged release" about = "Build + publish the operator image + chart for a tagged release"
)] )]
struct Cli { struct Cli {

View File

@@ -40,5 +40,5 @@ pub use companion::AgentObservation;
pub use nats::{FleetNatsScore, UserPassCredentials}; pub use nats::{FleetNatsScore, UserPassCredentials};
pub use operator::{FleetOperatorScore, OperatorCredentials, PublishedChart}; pub use operator::{FleetOperatorScore, OperatorCredentials, PublishedChart};
pub use release::{release_operator, version_from_tag}; pub use release::{release_operator, version_from_tag};
pub use secrets::FleetDeploySecrets; pub use secrets::{FleetDeployConfig, FleetDeploySecrets};
pub use server::FleetServerScore; pub use server::FleetServerScore;

View File

@@ -15,7 +15,10 @@ use harmony::inventory::Inventory;
use harmony::topology::K8sAnywhereTopology; use harmony::topology::K8sAnywhereTopology;
use harmony_cli::Args as HarmonyCliArgs; use harmony_cli::Args as HarmonyCliArgs;
use harmony_config::ConfigClient; use harmony_config::ConfigClient;
use harmony_fleet_deploy::{FleetDeploySecrets, FleetOperatorScore, version_from_tag}; use harmony_fleet_deploy::{
FleetDeployConfig, FleetDeploySecrets, FleetOperatorScore, version_from_tag,
};
use tracing::info;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command( #[command(
@@ -23,12 +26,9 @@ use harmony_fleet_deploy::{FleetDeploySecrets, FleetOperatorScore, version_from_
about = "Deploy the published harmony fleet operator chart" about = "Deploy the published harmony fleet operator chart"
)] )]
struct CliConfig { struct CliConfig {
#[arg( /// Override the k8s namespace from config (e.g. `fleet-staging`).
long, #[arg(long, env = "HARMONY_FLEET_NAMESPACE")]
env = "HARMONY_FLEET_NAMESPACE", namespace: Option<String>,
default_value = "harmony-fleet-system"
)]
namespace: String,
/// Release tag to deploy (e.g. `harmony-fleet-operator-v0.0.2`); the /// Release tag to deploy (e.g. `harmony-fleet-operator-v0.0.2`); the
/// version is parsed from it in Rust so the workflow passes a tag and /// version is parsed from it in Rust so the workflow passes a tag and
@@ -40,22 +40,20 @@ struct CliConfig {
#[arg(long, env = "HARMONY_FLEET_OPERATOR_CHART_VERSION")] #[arg(long, env = "HARMONY_FLEET_OPERATOR_CHART_VERSION")]
operator_chart_version: Option<String>, operator_chart_version: Option<String>,
/// Override the OCI chart registry from config.
#[arg(long, env = "HARMONY_FLEET_OPERATOR_CHART_REGISTRY")]
operator_chart_registry: Option<String>,
/// Override the OCI chart project from config.
#[arg(long, env = "HARMONY_FLEET_OPERATOR_CHART_PROJECT")]
operator_chart_project: Option<String>,
/// Config namespace `FleetDeploySecrets` and `FleetDeployConfig` resolve under (Env → OpenBao).
#[arg( #[arg(
long, long,
env = "HARMONY_FLEET_OPERATOR_CHART_REGISTRY", env = "HARMONY_CONFIG_NAMESPACE",
default_value = "hub.nationtech.io" default_value = "fleet-staging"
)] )]
operator_chart_registry: String,
#[arg(
long,
env = "HARMONY_FLEET_OPERATOR_CHART_PROJECT",
default_value = "harmony"
)]
operator_chart_project: String,
/// Config namespace `FleetDeploySecrets` resolves under (Env → OpenBao).
#[arg(long, env = "HARMONY_SECRET_NAMESPACE", default_value = "harmony")]
config_namespace: String, config_namespace: String,
#[command(flatten)] #[command(flatten)]
@@ -74,15 +72,22 @@ impl CliConfig {
#[tokio::main] #[tokio::main]
async fn main() -> Result<()> { async fn main() -> Result<()> {
harmony_cli::cli_logger::init();
let cli = CliConfig::parse(); let cli = CliConfig::parse();
let version = cli.chart_version()?; let version = cli.chart_version()?;
let secrets: FleetDeploySecrets = ConfigClient::for_namespace(&cli.config_namespace) let config_client = ConfigClient::for_namespace(&cli.config_namespace).await;
.await
let secrets: FleetDeploySecrets = config_client
.get() .get()
.await .await
.context("loading FleetDeploySecrets (set HARMONY_CONFIG_FleetDeploySecrets or OpenBao)")?; .context("loading FleetDeploySecrets (set HARMONY_CONFIG_FleetDeploySecrets or OpenBao)")?;
let config: FleetDeployConfig = config_client
.get_or_prompt()
.await
.context("loading FleetDeployConfig")?;
// Point KUBECONFIG at the scoped deployer credential before the // Point KUBECONFIG at the scoped deployer credential before the
// topology reads it, so the runner pod needs no standing permissions. // topology reads it, so the runner pod needs no standing permissions.
// Held to end of scope so the tempfile outlives the deploy. // Held to end of scope so the tempfile outlives the deploy.
@@ -98,14 +103,19 @@ async fn main() -> Result<()> {
None => None, None => None,
}; };
let namespace = cli.namespace.unwrap_or(config.namespace);
let registry = cli
.operator_chart_registry
.unwrap_or(config.operator_chart_registry);
let project = cli
.operator_chart_project
.unwrap_or(config.operator_chart_project);
let operator = FleetOperatorScore::new() let operator = FleetOperatorScore::new()
.namespace(cli.namespace) .namespace(namespace)
.nats_url(config.nats_url)
.credentials(secrets.operator_credentials_toml) .credentials(secrets.operator_credentials_toml)
.published_chart( .published_chart(registry, project, version);
cli.operator_chart_registry,
cli.operator_chart_project,
version,
);
harmony_cli::run( harmony_cli::run(
Inventory::autoload(), Inventory::autoload(),

View File

@@ -66,7 +66,7 @@ pub struct ChartOptions {
/// to the deploy crate's `CARGO_PKG_VERSION` — fine for in-process /// to the deploy crate's `CARGO_PKG_VERSION` — fine for in-process
/// uses (e2e harness, runtime operator Score). The release binary /// uses (e2e harness, runtime operator Score). The release binary
/// sets this to the released tag so the OCI chart artifact lands /// sets this to the released tag so the OCI chart artifact lands
/// at `…/harmony-fleet-operator:<tag>` matching the image tag. /// at `…/harmony-fleet-operator-chart:<tag>` matching the image tag.
pub chart_version: Option<String>, pub chart_version: Option<String>,
} }
@@ -102,6 +102,22 @@ pub struct OperatorCredentials {
pub credentials_toml: String, pub credentials_toml: String,
} }
impl OperatorCredentials {
/// Build the auth-callout credentials from a Zitadel machine key.
/// `key_json` goes in a TOML *literal* string — see the field docs
/// for why basic strings corrupt the embedded PEM.
pub fn zitadel_jwt(oidc_issuer_url: &str, audience: &str, key_json: &str) -> Self {
Self {
credentials_toml: format!(
"type = \"zitadel-jwt\"\n\
oidc_issuer_url = \"{oidc_issuer_url}\"\n\
audience = \"{audience}\"\n\
key_json = '''{key_json}'''\n"
),
}
}
}
impl Default for ChartOptions { impl Default for ChartOptions {
fn default() -> Self { fn default() -> Self {
Self { Self {
@@ -120,6 +136,7 @@ impl Default for ChartOptions {
} }
} }
pub const CHART_NAME: &str = "harmony-fleet-operator-chart";
pub const RELEASE_NAME: &str = "harmony-fleet-operator"; pub const RELEASE_NAME: &str = "harmony-fleet-operator";
pub const SERVICE_ACCOUNT: &str = "harmony-fleet-operator"; pub const SERVICE_ACCOUNT: &str = "harmony-fleet-operator";
pub const CLUSTER_ROLE: &str = "harmony-fleet-operator"; pub const CLUSTER_ROLE: &str = "harmony-fleet-operator";
@@ -145,7 +162,7 @@ pub fn build_chart(opts: &ChartOptions) -> Result<PathBuf> {
.chart_version .chart_version
.clone() .clone()
.unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string()); .unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string());
let mut chart = HelmChart::new(RELEASE_NAME.to_string(), chart_version.clone()); let mut chart = HelmChart::new(CHART_NAME.to_string(), chart_version.clone());
chart.version = chart_version; chart.version = chart_version;
chart.description = "IoT operator — Deployment CRD → NATS KV".to_string(); chart.description = "IoT operator — Deployment CRD → NATS KV".to_string();

View File

@@ -43,7 +43,7 @@ use crate::operator::chart::{ChartOptions, OperatorCredentials, build_chart, ope
/// The already-published OCI chart to install (the CD `harmony apply` /// The already-published OCI chart to install (the CD `harmony apply`
/// path). When set, the operator installs /// path). When set, the operator installs
/// `oci://{registry}/{project}/harmony-fleet-operator:{version}` and the /// `oci://{registry}/{project}/harmony-fleet-operator-chart:{version}` and the
/// score's `image` is ignored (the image is baked into the chart). /// score's `image` is ignored (the image is baked into the chart).
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
pub struct PublishedChart { pub struct PublishedChart {
@@ -205,7 +205,7 @@ impl<T: Topology + HelmCommand + K8sclient> Interpret<T> for FleetOperatorInterp
// branch runs its own install so the local tempdir stays alive // branch runs its own install so the local tempdir stays alive
// across it. // across it.
let helm_outcome = if let Some(p) = &self.score.published_chart { let helm_outcome = if let Some(p) = &self.score.published_chart {
let chart_ref = format!("oci://{}/{}/{}", p.registry, p.project, chart::RELEASE_NAME); let chart_ref = format!("oci://{}/{}/{}", p.registry, p.project, chart::CHART_NAME);
info!( info!(
"Installing helm release '{}' from published chart {chart_ref} version {}", "Installing helm release '{}' from published chart {chart_ref} version {}",
self.score.release_name, p.version self.score.release_name, p.version

View File

@@ -11,7 +11,7 @@ use std::process::Command;
use anyhow::{Context, Result, bail}; use anyhow::{Context, Result, bail};
use crate::operator::chart::{ChartOptions, build_chart}; use crate::operator::chart::{CHART_NAME, ChartOptions, build_chart};
const TAG_PREFIX: &str = "harmony-fleet-operator-"; const TAG_PREFIX: &str = "harmony-fleet-operator-";
const IMAGE_NAME: &str = "harmony-fleet-operator"; const IMAGE_NAME: &str = "harmony-fleet-operator";
@@ -59,7 +59,7 @@ pub fn release_operator(version: &str, registry: &str, project: &str, push: bool
run("helm", &["push", path_str(&tgz)?, &oci_repo])?; run("helm", &["push", path_str(&tgz)?, &oci_repo])?;
} }
log::info!("released image={image} chart={oci_repo}/{IMAGE_NAME}:{version} pushed={push}"); log::info!("released image={image} chart={oci_repo}/{CHART_NAME}:{version} pushed={push}");
Ok(()) Ok(())
} }

View File

@@ -1,4 +1,4 @@
//! Secrets for the published-chart (CD) operator deploy, via //! Secrets and config for the published-chart (CD) operator deploy, via
//! [`harmony_config::ConfigClient`]. SSO-only by construction: no //! [`harmony_config::ConfigClient`]. SSO-only by construction: no
//! user/pass field exists, so dev-only user/pass auth can't reach a prod //! user/pass field exists, so dev-only user/pass auth can't reach a prod
//! deploy. Resolved EnvSource → OpenBao, so the in-cluster runner pulls //! deploy. Resolved EnvSource → OpenBao, so the in-cluster runner pulls
@@ -24,6 +24,42 @@ pub struct FleetDeploySecrets {
pub kubeconfig: Option<String>, pub kubeconfig: Option<String>,
} }
/// Non-secret deploy config: k8s namespaces + chart coords. Loaded via
/// `ConfigClient::for_namespace("fleet-staging")` alongside `FleetDeploySecrets`.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Config)]
pub struct FleetDeployConfig {
/// K8s namespace where the operator, NATS, and callout live.
pub namespace: String,
/// Full NATS URL the operator connects to (e.g. `nats://fleet-nats.fleet-staging:4222`).
pub nats_url: String,
/// K8s namespace where Zitadel lives (for operator UI SSO).
pub zitadel_namespace: String,
/// K8s namespace where OpenBao lives (for operator secret fetching).
pub openbao_namespace: String,
/// OCI chart registry (e.g. `hub.nationtech.io`).
pub operator_chart_registry: String,
/// OCI chart project (e.g. `harmony`).
pub operator_chart_project: String,
}
impl Default for FleetDeployConfig {
fn default() -> Self {
Self {
namespace: "fleet-staging".to_string(),
nats_url: "nats://fleet-nats.fleet-staging:4222".to_string(),
zitadel_namespace: "zitadel-staging".to_string(),
openbao_namespace: "openbao-staging".to_string(),
operator_chart_registry: "hub.nationtech.io".to_string(),
operator_chart_project: "harmony".to_string(),
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -37,4 +73,17 @@ mod tests {
// Secrets must never land in cleartext SQLite. // Secrets must never land in cleartext SQLite.
assert_eq!(FleetDeploySecrets::CLASS, ConfigClass::Secret); assert_eq!(FleetDeploySecrets::CLASS, ConfigClass::Secret);
} }
#[test]
fn config_class_is_not_secret() {
assert_eq!(FleetDeployConfig::CLASS, ConfigClass::Standard);
}
#[test]
fn config_defaults() {
let c = FleetDeployConfig::default();
assert_eq!(c.namespace, "fleet-staging");
assert_eq!(c.zitadel_namespace, "zitadel-staging");
assert_eq!(c.openbao_namespace, "openbao-staging");
}
} }

View File

@@ -43,7 +43,7 @@ impl<T: Topology> Maestro<T> {
/// Ensures the associated Topology is ready for operations. /// Ensures the associated Topology is ready for operations.
/// Delegates the readiness check and potential setup actions to the Topology. /// Delegates the readiness check and potential setup actions to the Topology.
async fn prepare_topology(&mut self) -> Result<PreparationOutcome, PreparationError> { pub async fn prepare_topology(&mut self) -> Result<PreparationOutcome, PreparationError> {
self.topology_state.prepare(); self.topology_state.prepare();
let result = self.topology.ensure_ready().await; let result = self.topology.ensure_ready().await;

View File

@@ -13,7 +13,7 @@ use crate::{
topology::{HelmCommand, K8sclient, Topology}, topology::{HelmCommand, K8sclient, Topology},
}; };
pub use setup::{OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoSetupScore, OpenbaoUser}; pub use setup::{OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoSetupScore, OpenbaoUser, cached_root_token};
const DEFAULT_NAMESPACE: &str = "openbao"; const DEFAULT_NAMESPACE: &str = "openbao";
const DEFAULT_RELEASE: &str = "openbao"; const DEFAULT_RELEASE: &str = "openbao";

View File

@@ -143,8 +143,26 @@ fn keys_dir() -> PathBuf {
.unwrap_or_else(|| PathBuf::from("/tmp/harmony-openbao")) .unwrap_or_else(|| PathBuf::from("/tmp/harmony-openbao"))
} }
fn keys_file() -> PathBuf { /// Per-instance keys file. Keyed by namespace+release so multiple OpenBao
keys_dir().join("unseal-keys.json") /// instances don't clobber each other's unseal keys in one shared file —
/// losing them means the instance can never be unsealed again.
fn keys_file(instance: &OpenbaoInstance) -> PathBuf {
keys_dir().join(format!(
"unseal-keys-{}-{}.json",
instance.namespace, instance.release
))
}
/// The root token from the cached unseal-keys file written at init.
/// Dev/staging convenience for callers that need to seed OpenBao right
/// after [`OpenbaoSetupScore`] runs; production uses auto-unseal and
/// wouldn't persist this.
pub fn cached_root_token(instance: &OpenbaoInstance) -> Result<String, String> {
let path = keys_file(instance);
let content = std::fs::read_to_string(&path).map_err(|e| format!("read {path:?}: {e}"))?;
let init: InitOutput =
serde_json::from_str(&content).map_err(|e| format!("parse {path:?}: {e}"))?;
Ok(init.root_token)
} }
impl OpenbaoSetupInterpret { impl OpenbaoSetupInterpret {
@@ -188,7 +206,7 @@ impl OpenbaoSetupInterpret {
InterpretError::new(format!("Failed to create keys directory {:?}: {}", dir, e)) InterpretError::new(format!("Failed to create keys directory {:?}: {}", dir, e))
})?; })?;
let path = keys_file(); let path = keys_file(&self.score.instance);
// Source of truth for "is this vault initialized?" is OpenBao itself, // Source of truth for "is this vault initialized?" is OpenBao itself,
// not a `bao status` pre-check parsed from stderr — that probe is // not a `bao status` pre-check parsed from stderr — that probe is
@@ -306,7 +324,7 @@ impl OpenbaoSetupInterpret {
} }
info!("[OpenbaoSetup] Unsealing..."); info!("[OpenbaoSetup] Unsealing...");
let path = keys_file(); let path = keys_file(&self.score.instance);
let content = std::fs::read_to_string(&path) let content = std::fs::read_to_string(&path)
.map_err(|e| InterpretError::new(format!("Failed to read keys: {e}")))?; .map_err(|e| InterpretError::new(format!("Failed to read keys: {e}")))?;
let init: InitOutput = serde_json::from_str(&content) let init: InitOutput = serde_json::from_str(&content)

View File

@@ -16,13 +16,12 @@ harmony = { path = "../harmony" }
harmony_tui = { path = "../harmony_tui", optional = true } harmony_tui = { path = "../harmony_tui", optional = true }
inquire.workspace = true inquire.workspace = true
tokio.workspace = true tokio.workspace = true
env_logger.workspace = true
console = "0.16.0" console = "0.16.0"
indicatif = "0.18.0" indicatif = "0.18.0"
lazy_static = "1.5.0" lazy_static = "1.5.0"
log.workspace = true tracing.workspace = true
indicatif-log-bridge = "0.2.3" tracing-subscriber.workspace = true
chrono.workspace = true
[dev-dependencies] [dev-dependencies]
harmony = { path = "../harmony", features = ["testing"] } harmony = { path = "../harmony", features = ["testing"] }
async-trait = "0.1"

View File

@@ -1,13 +1,11 @@
use chrono::Local;
use console::style;
use harmony::{ use harmony::{
instrumentation::{self, HarmonyEvent}, instrumentation::{self, HarmonyEvent},
modules::application::ApplicationFeatureStatus, modules::application::ApplicationFeatureStatus,
topology::TopologyStatus, topology::TopologyStatus,
}; };
use log::{error, info, log_enabled};
use std::io::Write;
use std::sync::{Mutex, OnceLock}; use std::sync::{Mutex, OnceLock};
use tracing::{error, info};
use tracing_subscriber::EnvFilter;
pub fn init() { pub fn init() {
static INITIALIZED: OnceLock<()> = OnceLock::new(); static INITIALIZED: OnceLock<()> = OnceLock::new();
@@ -17,68 +15,36 @@ pub fn init() {
}); });
} }
// The framework still emits via the `log` crate; tracing-subscriber's default
// `tracing-log` bridge captures those records, so this subscriber covers both.
// Normal runs stay terse (level + message, ANSI-coloured); debug/trace adds the
// timestamp + target needed to actually debug — matching the old env_logger UX.
fn configure_logger() { fn configure_logger() {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
.format(|buf, record| { let verbose = std::env::var("RUST_LOG")
let debug_mode = log_enabled!(log::Level::Debug); .map(|v| v.to_lowercase().contains("debug") || v.to_lowercase().contains("trace"))
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S"); .unwrap_or(false);
let builder = tracing_subscriber::fmt().with_env_filter(filter);
let _ = if verbose {
builder.with_target(true).try_init()
} else {
builder.without_time().with_target(false).try_init()
};
}
let level = match record.level() { // Plain emojis — no `console::style` colour codes. The emoji already conveys
log::Level::Error => style("ERROR").red(), // status, the level is coloured by the fmt layer, and embedding ANSI here
log::Level::Warn => style("WARN").yellow(), // leaks escape codes when console's TTY detection disagrees with the writer.
log::Level::Info => style("INFO").green(), fn ok() -> String {
log::Level::Debug => style("DEBUG").blue(), crate::theme::EMOJI_SUCCESS.to_string()
log::Level::Trace => style("TRACE").magenta(), }
};
if let Some(status) = record.key_values().get(log::kv::Key::from("status")) { fn skipped() -> String {
let status = status.to_borrowed_str().unwrap(); crate::theme::EMOJI_SKIP.to_string()
let emoji = match status { }
"finished" => style(crate::theme::EMOJI_SUCCESS.to_string()).green(),
"skipped" => style(crate::theme::EMOJI_SKIP.to_string()).yellow(), fn failed() -> String {
"failed" => style(crate::theme::EMOJI_ERROR.to_string()).red(), crate::theme::EMOJI_ERROR.to_string()
_ => style("".into()),
};
if debug_mode {
writeln!(
buf,
"[{} {:<5} {}] {} {}",
timestamp,
level,
record.target(),
emoji,
record.args()
)
} else {
writeln!(buf, "[{:<5}] {} {}", level, emoji, record.args())
}
} else if let Some(emoji) = record.key_values().get(log::kv::Key::from("emoji")) {
if debug_mode {
writeln!(
buf,
"[{} {:<5} {}] {} {}",
timestamp,
level,
record.target(),
emoji,
record.args()
)
} else {
writeln!(buf, "[{:<5}] {} {}", level, emoji, record.args())
}
} else if debug_mode {
writeln!(
buf,
"[{} {:<5} {}] {}",
timestamp,
level,
record.target(),
record.args()
)
} else {
writeln!(buf, "[{:<5}] {}", level, record.args())
}
})
.init();
} }
fn handle_events() { fn handle_events() {
@@ -93,8 +59,7 @@ fn handle_events() {
match event { match event {
HarmonyEvent::HarmonyStarted => {} HarmonyEvent::HarmonyStarted => {}
HarmonyEvent::HarmonyFinished => { HarmonyEvent::HarmonyFinished => {
let emoji = crate::theme::EMOJI_HARMONY.to_string(); info!("{} Harmony completed", crate::theme::EMOJI_HARMONY);
info!(emoji = emoji.as_str(); "Harmony completed");
} }
HarmonyEvent::TopologyStateChanged { HarmonyEvent::TopologyStateChanged {
topology, topology,
@@ -103,29 +68,28 @@ fn handle_events() {
} => match status { } => match status {
TopologyStatus::Queued => {} TopologyStatus::Queued => {}
TopologyStatus::Preparing => { TopologyStatus::Preparing => {
let emoji = format!( info!(
"{}", "{} Preparing environment: {topology}...",
style(crate::theme::EMOJI_TOPOLOGY.to_string()).yellow() crate::theme::EMOJI_TOPOLOGY
); );
info!(emoji = emoji.as_str(); "Preparing environment: {topology}...");
(*preparing_topology) = true; (*preparing_topology) = true;
} }
TopologyStatus::Success => { TopologyStatus::Success => {
(*preparing_topology) = false; (*preparing_topology) = false;
if let Some(message) = message { if let Some(message) = message {
info!(status = "finished"; "{message}"); info!("{} {message}", ok());
} }
} }
TopologyStatus::Noop => { TopologyStatus::Noop => {
(*preparing_topology) = false; (*preparing_topology) = false;
if let Some(message) = message { if let Some(message) = message {
info!(status = "skipped"; "{message}"); info!("{} {message}", skipped());
} }
} }
TopologyStatus::Error => { TopologyStatus::Error => {
(*preparing_topology) = false; (*preparing_topology) = false;
if let Some(message) = message { if let Some(message) = message {
error!(status = "failed"; "{message}"); error!("{} {message}", failed());
} }
} }
}, },
@@ -140,8 +104,10 @@ fn handle_events() {
info!("{message}"); info!("{message}");
} else { } else {
(*current_score) = Some(score.clone()); (*current_score) = Some(score.clone());
let emoji = format!("{}", style(crate::theme::EMOJI_SCORE).blue()); info!(
info!(emoji = emoji.as_str(); "Interpreting score: {score}..."); "{} Interpreting score: {score}...",
crate::theme::EMOJI_SCORE
);
} }
} }
HarmonyEvent::InterpretExecutionFinished { HarmonyEvent::InterpretExecutionFinished {
@@ -158,17 +124,17 @@ fn handle_events() {
match outcome { match outcome {
Ok(outcome) => match outcome.status { Ok(outcome) => match outcome.status {
harmony::interpret::InterpretStatus::SUCCESS => { harmony::interpret::InterpretStatus::SUCCESS => {
info!(status = "finished"; "{}", outcome.message); info!("{} {}", ok(), outcome.message);
} }
harmony::interpret::InterpretStatus::NOOP => { harmony::interpret::InterpretStatus::NOOP => {
info!(status = "skipped"; "{}", outcome.message); info!("{} {}", skipped(), outcome.message);
} }
_ => { _ => {
error!(status = "failed"; "{}", outcome.message); error!("{} {}", failed(), outcome.message);
} }
}, },
Err(err) => { Err(err) => {
error!(status = "failed"; "{err}"); error!("{} {err}", failed());
} }
} }
} }
@@ -182,10 +148,13 @@ fn handle_events() {
info!("Installing feature '{feature}' for '{application}'..."); info!("Installing feature '{feature}' for '{application}'...");
} }
ApplicationFeatureStatus::Installed { details: _ } => { ApplicationFeatureStatus::Installed { details: _ } => {
info!(status = "finished"; "Feature '{feature}' installed"); info!("{} Feature '{feature}' installed", ok());
} }
ApplicationFeatureStatus::Failed { message: details } => { ApplicationFeatureStatus::Failed { message: details } => {
error!(status = "failed"; "Feature '{feature}' installation failed: {details}"); error!(
"{} Feature '{feature}' installation failed: {details}",
failed()
);
} }
}, },
} }

View File

@@ -5,7 +5,7 @@ use harmony::inventory::Inventory;
use harmony::maestro::Maestro; use harmony::maestro::Maestro;
use harmony::{score::Score, topology::Topology}; use harmony::{score::Score, topology::Topology};
use inquire::Confirm; use inquire::Confirm;
use log::debug; use tracing::debug;
pub mod cli_logger; // FIXME: Don't make me pub pub mod cli_logger; // FIXME: Don't make me pub
mod cli_reporter; mod cli_reporter;
@@ -119,7 +119,9 @@ pub async fn run_cli<T: Topology + Send + Sync + 'static>(
cli_logger::init(); cli_logger::init();
cli_reporter::init(); cli_reporter::init();
let mut maestro = Maestro::initialize(inventory, topology).await.unwrap(); // Build the maestro WITHOUT preparing the topology — listing scores or
// declining the run must not touch the cluster. Prep is deferred to `init`.
let mut maestro = Maestro::new_without_initialization(inventory, topology);
maestro.register_all(scores); maestro.register_all(scores);
let result = init(maestro, args).await; let result = init(maestro, args).await;
@@ -129,11 +131,9 @@ pub async fn run_cli<T: Topology + Send + Sync + 'static>(
} }
async fn init<T: Topology + Send + Sync + 'static>( async fn init<T: Topology + Send + Sync + 'static>(
maestro: harmony::maestro::Maestro<T>, mut maestro: harmony::maestro::Maestro<T>,
args: Args, args: Args,
) -> Result<(), Box<dyn std::error::Error>> { ) -> Result<(), Box<dyn std::error::Error>> {
let _ = env_logger::builder().try_init();
let scores_vec = maestro_scores_filter(&maestro, args.all, args.filter, args.number); let scores_vec = maestro_scores_filter(&maestro, args.all, args.filter, args.number);
if scores_vec.is_empty() { if scores_vec.is_empty() {
@@ -166,6 +166,13 @@ async fn init<T: Topology + Send + Sync + 'static>(
} }
} }
// We're committed to running — only now prepare the topology (the
// expensive, cluster-touching step) so list/decline paths stay no-ops.
maestro
.prepare_topology()
.await
.map_err(|e| format!("topology preparation failed: {e}"))?;
// Run filtered scores // Run filtered scores
for s in scores_vec { for s in scores_vec {
debug!("Running: {}", s.name()); debug!("Running: {}", s.name());
@@ -182,8 +189,72 @@ mod tests {
maestro::Maestro, maestro::Maestro,
modules::dummy::{ErrorScore, PanicScore, SuccessScore}, modules::dummy::{ErrorScore, PanicScore, SuccessScore},
topology::HAClusterTopology, topology::HAClusterTopology,
topology::{PreparationError, PreparationOutcome, Topology},
}; };
/// Topology whose readiness check always fails. Lets a test assert that
/// `--list` never reaches `prepare_topology` (it would error if it did),
/// while the run path does.
struct ExplodingTopology;
#[async_trait::async_trait]
impl Topology for ExplodingTopology {
fn name(&self) -> &str {
"ExplodingTopology"
}
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
Err(PreparationError::new(
"ensure_ready must not run on the list path".to_string(),
))
}
}
fn exploding_maestro() -> Maestro<ExplodingTopology> {
let mut maestro =
Maestro::new_without_initialization(Inventory::autoload(), ExplodingTopology);
maestro.register_all(vec![Box::new(SuccessScore {})]);
maestro
}
#[tokio::test]
async fn list_does_not_prepare_topology() {
// Topology prep fails; listing must still succeed because it never
// touches the topology.
let res = crate::init(
exploding_maestro(),
crate::Args {
yes: true,
filter: None,
interactive: false,
all: true,
number: 0,
list: true,
},
)
.await;
assert!(res.is_ok(), "--list should not prepare the topology");
}
#[tokio::test]
async fn run_prepares_topology_before_interpreting() {
// Same topology, but actually running: prep runs and its failure
// aborts before any score is interpreted.
let res = crate::init(
exploding_maestro(),
crate::Args {
yes: true,
filter: None,
interactive: false,
all: true,
number: 0,
list: false,
},
)
.await;
assert!(res.is_err(), "run path must prepare the topology first");
}
fn init_test_maestro() -> Maestro<HAClusterTopology> { fn init_test_maestro() -> Maestro<HAClusterTopology> {
let inventory = Inventory::autoload(); let inventory = Inventory::autoload();
let topology = HAClusterTopology::autoload(); let topology = HAClusterTopology::autoload();

View File

@@ -1,5 +1,6 @@
use crate::{ConfigClass, ConfigError, ConfigSource}; use crate::{ConfigClass, ConfigError, ConfigSource};
use async_trait::async_trait; use async_trait::async_trait;
use log::{debug, info};
pub struct EnvSource; pub struct EnvSource;
@@ -16,6 +17,7 @@ impl ConfigSource for EnvSource {
) -> Result<Option<serde_json::Value>, ConfigError> { ) -> Result<Option<serde_json::Value>, ConfigError> {
let env_key = env_key_for(key); let env_key = env_key_for(key);
debug!("Loading config from env var {env_key}");
match std::env::var(&env_key) { match std::env::var(&env_key) {
Ok(value) => serde_json::from_str(&value).map(Some).map_err(|e| { Ok(value) => serde_json::from_str(&value).map(Some).map_err(|e| {
ConfigError::EnvError(format!( ConfigError::EnvError(format!(