From 0400e9d45415ff3da7cf5b4f8e309a9c06a308af Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Fri, 29 May 2026 20:25:22 -0400 Subject: [PATCH 01/14] feat(fleet-staging): add OpenBao + seed FleetDeploySecrets; route operator creds through the deploy crate fleet_staging_install now deploys OpenBao (co-located in fleet-staging, cert-manager TLS at secrets-stg.), configures it (fleet-deployer read policy), and seeds the operator's FleetDeploySecrets so the operator can be upgraded alone via 'harmony-fleet-deploy --from-tag'. Behavior of the existing bring-up is unchanged. Credential-TOML construction moved out of the example into OperatorCredentials::zitadel_jwt (deploy crate) so all callers share it. New openbao::cached_root_token() lets the seed reuse the root token setup already cached. Seeding mirrors the harmony_sso port-forward pattern. --- Cargo.lock | 2 + examples/fleet_staging_install/Cargo.toml | 2 + examples/fleet_staging_install/src/main.rs | 117 ++++++++++++++---- .../src/operator/chart.rs | 16 +++ harmony/src/modules/openbao/mod.rs | 2 +- harmony/src/modules/openbao/setup.rs | 12 ++ 6 files changed, 129 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5ab989e6..d2123b3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3501,6 +3501,8 @@ dependencies = [ "harmony-k8s", "harmony-nats-callout", "harmony_cli", + "harmony_config", + "harmony_secret", "log", "nkeys", "rand 0.9.4", diff --git a/examples/fleet_staging_install/Cargo.toml b/examples/fleet_staging_install/Cargo.toml index 738008d8..b56657a2 100644 --- a/examples/fleet_staging_install/Cargo.toml +++ b/examples/fleet_staging_install/Cargo.toml @@ -13,6 +13,8 @@ path = "src/main.rs" [dependencies] harmony = { path = "../../harmony" } harmony_cli = { path = "../../harmony_cli" } +harmony_config = { path = "../../harmony_config" } +harmony_secret = { path = "../../harmony_secret" } harmony-k8s = { path = "../../harmony-k8s" } harmony-nats-callout = { path = "../../nats/callout" } harmony-fleet-deploy = { path = "../../fleet/harmony-fleet-deploy" } diff --git a/examples/fleet_staging_install/src/main.rs b/examples/fleet_staging_install/src/main.rs index 837c018e..885f53c8 100644 --- a/examples/fleet_staging_install/src/main.rs +++ b/examples/fleet_staging_install/src/main.rs @@ -28,20 +28,27 @@ //! --callout-image hub.nationtech.io/harmony/harmony-nats-callout:dev //! ``` +use std::sync::Arc; + use anyhow::{Context, Result}; use clap::Parser; use harmony::inventory::Inventory; use harmony::modules::nats::capability::NatsCluster; use harmony::modules::nats::score_nats_k8s::{AuthCalloutCfg, NatsK8sScore, WebSocketRouteCfg}; use harmony::modules::nats_auth_callout::NatsAuthCalloutScore; +use harmony::modules::openbao::{ + OpenbaoInstance, OpenbaoPolicy, OpenbaoScore, OpenbaoSetupScore, cached_root_token, +}; use harmony::modules::zitadel::{ MachineKeyType, ZitadelApiApp, ZitadelAppType, ZitadelApplication, ZitadelClientConfig, ZitadelMachineUser, ZitadelRole, ZitadelScore, ZitadelSetupScore, }; use harmony::score::Score; -use harmony::topology::{K8sAnywhereTopology, Topology}; -use harmony_fleet_deploy::{FleetOperatorScore, OperatorCredentials}; +use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology}; +use harmony_config::{ConfigClient, StoreSource}; +use harmony_fleet_deploy::{FleetDeploySecrets, FleetOperatorScore, OperatorCredentials}; use harmony_k8s::KubernetesDistribution; +use harmony_secret::OpenbaoSecretStore; use nkeys::KeyPair; #[derive(Parser, Debug)] @@ -348,24 +355,10 @@ async fn main() -> Result<()> { // ---- 6. Operator deployment with credentials ------------------------ log::info!("[5/6] Operator: image={}", cli.operator_image); - // `key_json` MUST use TOML literal multi-line strings (`'''...'''`), - // not basic multi-line (`"""..."""`). Basic strings interpret - // backslash escapes, which corrupts the JSON keyfile: every `\n` - // inside the embedded RSA private key gets expanded to a literal - // newline (0x0A) before JSON parsing sees it, and JSON disallows - // raw control chars inside strings ("control character found while - // parsing a string"). Literal strings preserve `\n` as-is so the - // downstream JSON parser interprets it as an escape and decodes - // the multi-line PEM correctly. - let credentials_toml = format!( - r#"type = "zitadel-jwt" -oidc_issuer_url = "https://{zitadel_host}" -audience = "{project_id}" -key_json = '''{operator_key}''' -"#, - zitadel_host = zitadel_host, - project_id = project_id, - operator_key = operator_machine_key, + let credentials = OperatorCredentials::zitadel_jwt( + &format!("https://{zitadel_host}"), + &project_id, + &operator_machine_key, ); let mut operator = FleetOperatorScore::new() .namespace(&cli.fleet_namespace) @@ -377,12 +370,85 @@ key_json = '''{operator_key}''' cli.fleet_namespace )) .log_level("info,kube_runtime=warn"); - operator.credentials = Some(OperatorCredentials { credentials_toml }); + operator.credentials = Some(credentials.clone()); operator .interpret(&Inventory::empty(), &topology) .await .context("operator deploy")?; + // ---- 7. OpenBao: deploy + setup + seed FleetDeploySecrets ----------- + // Co-located in the fleet namespace. The operator's credentials live + // here so a later `harmony-fleet-deploy --from-tag ` upgrades the + // operator alone, pulling them back through harmony_config. + let openbao = OpenbaoInstance { + namespace: cli.fleet_namespace.clone(), + release: "openbao".to_string(), + }; + let secrets_host = format!("secrets-stg.{}", cli.base_domain); + log::info!("OpenBao: ns={} host={}", openbao.namespace, secrets_host); + OpenbaoScore { + instance: openbao.clone(), + host: secrets_host.clone(), + openshift: true, + tls_issuer: Some(cli.cluster_issuer.clone()), + } + .interpret(&Inventory::empty(), &topology) + .await + .context("OpenBao deploy")?; + + OpenbaoSetupScore { + instance: openbao.clone(), + policies: vec![OpenbaoPolicy { + name: "fleet-deployer".to_string(), + hcl: r#"path "secret/data/harmony/*" { capabilities = ["read"] } +path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"# + .to_string(), + }], + ..Default::default() + } + .interpret(&Inventory::empty(), &topology) + .await + .context("OpenBao setup")?; + + // Seed the operator credentials as FleetDeploySecrets (no kubeconfig — + // callers use their own context). Reached via port-forward with the + // root token setup cached, so it doesn't wait on the public route/cert. + log::info!("Seeding FleetDeploySecrets into OpenBao"); + let k8s = topology + .k8s_client() + .await + .map_err(|e| anyhow::anyhow!(e))?; + let _pf = k8s + .port_forward(&openbao.pod(), &openbao.namespace, 8200, 8200) + .await + .context("port-forward to OpenBao")?; + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + let store = OpenbaoSecretStore::new( + format!("http://127.0.0.1:{}", _pf.port()), + "secret".to_string(), + "token".to_string(), + true, + Some(cached_root_token().map_err(|e| anyhow::anyhow!(e))?), + None, + None, + None, + None, + None, + None, + ) + .await + .context("OpenBao client")?; + ConfigClient::new(vec![ + Arc::new(StoreSource::new("harmony".to_string(), store)) + as Arc, + ]) + .set(&FleetDeploySecrets { + operator_credentials_toml: credentials.credentials_toml.clone(), + kubeconfig: None, + }) + .await + .context("seed FleetDeploySecrets")?; + log::info!("[6/6] Stack installed."); println!("\n=== fleet-staging install complete ==="); println!("Zitadel: https://{zitadel_host}/"); @@ -399,6 +465,7 @@ key_json = '''{operator_key}''' "Auth callout: oc -n {} get deploy/fleet-callout", cli.fleet_namespace ); + println!("OpenBao: https://{secrets_host}/"); println!("Project id: {project_id}"); println!( "Admin user: {} (machine key in ~/.local/share/harmony/zitadel/client-config.json)", @@ -420,6 +487,14 @@ key_json = '''{operator_key}''' --admin-oidc-client-id {cli_client_id} \\\n \ --agent-binary " ); + println!(); + println!("Upgrade just the operator when a new build is published:"); + println!( + " OPENBAO_URL=https://{secrets_host} OPENBAO_TOKEN= \\\n \ + harmony-fleet-deploy --filter FleetOperatorScore \\\n \ + --from-tag harmony-fleet-operator-vX.Y.Z --namespace {} --yes", + cli.fleet_namespace + ); Ok(()) } diff --git a/fleet/harmony-fleet-deploy/src/operator/chart.rs b/fleet/harmony-fleet-deploy/src/operator/chart.rs index 779c246e..c022cef2 100644 --- a/fleet/harmony-fleet-deploy/src/operator/chart.rs +++ b/fleet/harmony-fleet-deploy/src/operator/chart.rs @@ -102,6 +102,22 @@ pub struct OperatorCredentials { pub credentials_toml: String, } +impl OperatorCredentials { + /// Build the auth-callout credentials from a Zitadel machine key. + /// `key_json` goes in a TOML *literal* string — see the field docs + /// for why basic strings corrupt the embedded PEM. + pub fn zitadel_jwt(oidc_issuer_url: &str, audience: &str, key_json: &str) -> Self { + Self { + credentials_toml: format!( + "type = \"zitadel-jwt\"\n\ + oidc_issuer_url = \"{oidc_issuer_url}\"\n\ + audience = \"{audience}\"\n\ + key_json = '''{key_json}'''\n" + ), + } + } +} + impl Default for ChartOptions { fn default() -> Self { Self { diff --git a/harmony/src/modules/openbao/mod.rs b/harmony/src/modules/openbao/mod.rs index 6ab454b4..8b612d9f 100644 --- a/harmony/src/modules/openbao/mod.rs +++ b/harmony/src/modules/openbao/mod.rs @@ -13,7 +13,7 @@ use crate::{ topology::{HelmCommand, K8sclient, Topology}, }; -pub use setup::{OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoSetupScore, OpenbaoUser}; +pub use setup::{OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoSetupScore, OpenbaoUser, cached_root_token}; const DEFAULT_NAMESPACE: &str = "openbao"; const DEFAULT_RELEASE: &str = "openbao"; diff --git a/harmony/src/modules/openbao/setup.rs b/harmony/src/modules/openbao/setup.rs index 52f355b3..e1360f4a 100644 --- a/harmony/src/modules/openbao/setup.rs +++ b/harmony/src/modules/openbao/setup.rs @@ -147,6 +147,18 @@ fn keys_file() -> PathBuf { keys_dir().join("unseal-keys.json") } +/// The root token from the cached unseal-keys file written at init. +/// Dev/staging convenience for callers that need to seed OpenBao right +/// after [`OpenbaoSetupScore`] runs; production uses auto-unseal and +/// wouldn't persist this. +pub fn cached_root_token() -> Result { + let path = keys_file(); + let content = std::fs::read_to_string(&path).map_err(|e| format!("read {path:?}: {e}"))?; + let init: InitOutput = + serde_json::from_str(&content).map_err(|e| format!("parse {path:?}: {e}"))?; + Ok(init.root_token) +} + impl OpenbaoSetupInterpret { async fn exec( &self, -- 2.39.5 From fac83d853d4d8439275fe404b3b4e248c6af9d3c Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Fri, 29 May 2026 22:39:39 -0400 Subject: [PATCH 02/14] refactor(fleet-staging): use tracing instead of println for output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Swap env_logger for tracing_subscriber (its fmt bridges the framework's log:: deploy-progress output) and route the install banner + step logs through tracing::info! — no raw println. --- Cargo.lock | 4 +- examples/fleet_staging_install/Cargo.toml | 4 +- examples/fleet_staging_install/src/main.rs | 61 +++++++++++----------- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2123b3a..65f1a8cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3495,7 +3495,6 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "env_logger", "harmony", "harmony-fleet-deploy", "harmony-k8s", @@ -3503,10 +3502,11 @@ dependencies = [ "harmony_cli", "harmony_config", "harmony_secret", - "log", "nkeys", "rand 0.9.4", "tokio", + "tracing", + "tracing-subscriber", ] [[package]] diff --git a/examples/fleet_staging_install/Cargo.toml b/examples/fleet_staging_install/Cargo.toml index b56657a2..a9bb0252 100644 --- a/examples/fleet_staging_install/Cargo.toml +++ b/examples/fleet_staging_install/Cargo.toml @@ -23,5 +23,5 @@ rand = "0.9" anyhow.workspace = true clap = { version = "4", features = ["derive", "env"] } tokio.workspace = true -log.workspace = true -env_logger.workspace = true +tracing = { workspace = true } +tracing-subscriber = { workspace = true } diff --git a/examples/fleet_staging_install/src/main.rs b/examples/fleet_staging_install/src/main.rs index 885f53c8..9052cf47 100644 --- a/examples/fleet_staging_install/src/main.rs +++ b/examples/fleet_staging_install/src/main.rs @@ -126,9 +126,12 @@ struct Cli { #[tokio::main] async fn main() -> Result<()> { - env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) - .try_init() - .ok(); + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .init(); let cli = Cli::parse(); let topology = K8sAnywhereTopology::from_env(); @@ -147,7 +150,7 @@ async fn main() -> Result<()> { cluster_issuer: cli.cluster_issuer.clone(), ..Default::default() }; - log::info!( + tracing::info!( "[1/6] Zitadel helm: ns={} host={}", cli.zitadel_namespace, zitadel_host @@ -162,7 +165,7 @@ async fn main() -> Result<()> { // `fleet-operator` machine key in hand when filling // OperatorCredentials. The Score caches keys to // ZitadelClientConfig on disk; we read them back here. - log::info!( + tracing::info!( "[2/6] Zitadel setup: project={} admin={} operator={}", cli.project_name, cli.admin_username, @@ -261,8 +264,8 @@ async fn main() -> Result<()> { ) })? .clone(); - log::info!("[2/6] project_id resolved: {project_id}"); - log::info!("[2/6] device-code client_id for '{cli_app_name}' resolved: {cli_client_id}"); + tracing::info!("[2/6] project_id resolved: {project_id}"); + tracing::info!("[2/6] device-code client_id for '{cli_app_name}' resolved: {cli_client_id}"); // ---- 3. Issuer NKey + auth callout pieces --------------------------- // The callout signs user JWTs with this account NKey. NATS server @@ -278,7 +281,7 @@ async fn main() -> Result<()> { // ---- 4. NATS install ------------------------------------------------ let nats_release = "fleet-nats"; - log::info!( + tracing::info!( "[3/6] NATS install: ns={} release={} ws={}", cli.fleet_namespace, nats_release, @@ -322,7 +325,7 @@ async fn main() -> Result<()> { .context("NATS install (single-instance + auth_callout + WS Route)")?; // ---- 5. Auth callout deployment ------------------------------------- - log::info!( + tracing::info!( "[4/6] Auth callout: image={} project_id={}", cli.callout_image, project_id @@ -354,7 +357,7 @@ async fn main() -> Result<()> { .context("auth callout deploy")?; // ---- 6. Operator deployment with credentials ------------------------ - log::info!("[5/6] Operator: image={}", cli.operator_image); + tracing::info!("[5/6] Operator: image={}", cli.operator_image); let credentials = OperatorCredentials::zitadel_jwt( &format!("https://{zitadel_host}"), &project_id, @@ -385,7 +388,7 @@ async fn main() -> Result<()> { release: "openbao".to_string(), }; let secrets_host = format!("secrets-stg.{}", cli.base_domain); - log::info!("OpenBao: ns={} host={}", openbao.namespace, secrets_host); + tracing::info!("OpenBao: ns={} host={}", openbao.namespace, secrets_host); OpenbaoScore { instance: openbao.clone(), host: secrets_host.clone(), @@ -413,7 +416,7 @@ path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"# // Seed the operator credentials as FleetDeploySecrets (no kubeconfig — // callers use their own context). Reached via port-forward with the // root token setup cached, so it doesn't wait on the public route/cert. - log::info!("Seeding FleetDeploySecrets into OpenBao"); + tracing::info!("Seeding FleetDeploySecrets into OpenBao"); let k8s = topology .k8s_client() .await @@ -449,36 +452,35 @@ path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"# .await .context("seed FleetDeploySecrets")?; - log::info!("[6/6] Stack installed."); - println!("\n=== fleet-staging install complete ==="); - println!("Zitadel: https://{zitadel_host}/"); - println!("NATS WS public: wss://{nats_ws_host}/"); - println!( + tracing::info!("[6/6] Stack installed."); + tracing::info!("\n=== fleet-staging install complete ==="); + tracing::info!("Zitadel: https://{zitadel_host}/"); + tracing::info!("NATS WS public: wss://{nats_ws_host}/"); + tracing::info!( "NATS in-cluster: nats://{nats_release}.{}.svc.cluster.local:4222", cli.fleet_namespace ); - println!( + tracing::info!( "Operator: oc -n {} get deploy/harmony-fleet-operator", cli.fleet_namespace ); - println!( + tracing::info!( "Auth callout: oc -n {} get deploy/fleet-callout", cli.fleet_namespace ); - println!("OpenBao: https://{secrets_host}/"); - println!("Project id: {project_id}"); - println!( + tracing::info!("OpenBao: https://{secrets_host}/"); + tracing::info!("Project id: {project_id}"); + tracing::info!( "Admin user: {} (machine key in ~/.local/share/harmony/zitadel/client-config.json)", cli.admin_username ); - println!( + tracing::info!( "Operator user: {} (machine key embedded in operator's Secret)", cli.operator_username ); - println!("SSO client_id: {cli_client_id} (app '{cli_app_name}', device-code grant)"); - println!(); - println!("To enroll a device, pass the SSO client_id explicitly:"); - println!( + tracing::info!("SSO client_id: {cli_client_id} (app '{cli_app_name}', device-code grant)"); + tracing::info!("To enroll a device, pass the SSO client_id explicitly:"); + tracing::info!( " fleet_device_enroll \\\n \ --target ssh://@ \\\n \ --issuer-url https://{zitadel_host} \\\n \ @@ -487,9 +489,8 @@ path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"# --admin-oidc-client-id {cli_client_id} \\\n \ --agent-binary " ); - println!(); - println!("Upgrade just the operator when a new build is published:"); - println!( + tracing::info!("Upgrade just the operator when a new build is published:"); + tracing::info!( " OPENBAO_URL=https://{secrets_host} OPENBAO_TOKEN= \\\n \ harmony-fleet-deploy --filter FleetOperatorScore \\\n \ --from-tag harmony-fleet-operator-vX.Y.Z --namespace {} --yes", -- 2.39.5 From 199e285e5299e3bde1022f5033411594bb20eede Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sat, 30 May 2026 05:04:34 -0400 Subject: [PATCH 03/14] feat: Use tracing instead of logger in harmon_cli and work on fleet_staging_install refactor to use harmony_cli properly, still some more work to do --- Cargo.lock | 9 +- examples/fleet_staging_install/Cargo.toml | 3 +- examples/fleet_staging_install/src/main.rs | 542 ++++++++------------- harmony_cli/Cargo.toml | 6 +- harmony_cli/src/cli_logger.rs | 125 ++--- harmony_cli/src/lib.rs | 4 +- 6 files changed, 259 insertions(+), 430 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65f1a8cf..7fd5ef84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3504,9 +3504,10 @@ dependencies = [ "harmony_secret", "nkeys", "rand 0.9.4", + "schemars 0.8.22", + "serde", "tokio", "tracing", - "tracing-subscriber", ] [[package]] @@ -4310,18 +4311,16 @@ name = "harmony_cli" version = "0.1.0" dependencies = [ "assert_cmd", - "chrono", "clap", "console", - "env_logger", "harmony", "harmony_tui", "indicatif", - "indicatif-log-bridge", "inquire 0.7.5", "lazy_static", - "log", "tokio", + "tracing", + "tracing-subscriber", ] [[package]] diff --git a/examples/fleet_staging_install/Cargo.toml b/examples/fleet_staging_install/Cargo.toml index a9bb0252..24bdaa4b 100644 --- a/examples/fleet_staging_install/Cargo.toml +++ b/examples/fleet_staging_install/Cargo.toml @@ -24,4 +24,5 @@ anyhow.workspace = true clap = { version = "4", features = ["derive", "env"] } tokio.workspace = true tracing = { workspace = true } -tracing-subscriber = { workspace = true } +serde = { workspace = true } +schemars = "0.8" diff --git a/examples/fleet_staging_install/src/main.rs b/examples/fleet_staging_install/src/main.rs index 9052cf47..7357a4da 100644 --- a/examples/fleet_staging_install/src/main.rs +++ b/examples/fleet_staging_install/src/main.rs @@ -1,32 +1,17 @@ -//! Production-shape fleet install for OKD (or any cluster with the -//! same capabilities). Composes: +//! Production-shape fleet install for OKD (or any cluster with the same +//! capabilities): Zitadel SSO + NATS (auth-callout) + operator + OpenBao, +//! composed from Scores. //! -//! 1. Zitadel + Postgres helm install in `--zitadel-namespace`, -//! edge-TLS Route at `sso-staging.` via cert-manager. -//! 2. ZitadelSetupScore in the same call so we have the -//! `fleet-operator` machine key BEFORE the operator pod starts. -//! 3. Single-instance NATS (JetStream) in `--fleet-namespace` with -//! the auth_callout block wired to the callout's issuer NKey -//! pubkey + WebSocket listener (no_tls — Route owns TLS). -//! 4. NATS WebSocket Route at `nats-fleet-staging.`, -//! edge-TLS, cert-manager-managed cert. -//! 5. NatsAuthCalloutScore deployment (Secret-based env vars only, -//! no volume mounts — OKD restricted-v2 SCC compat). -//! 6. FleetOperatorScore with credentials TOML inlining the -//! `fleet-operator` JSON keyfile (env-var-from-Secret only). +//! Tunables come from [`ConfigClient`] (`HARMONY_CONFIG_FleetStagingConfig` +//! env JSON → OpenBao → interactive prompt), not a bespoke CLI. The only +//! flags are `harmony_cli`'s: `--filter`/`--list`/`-y` select which workload +//! Scores to (re)deploy — e.g. `--filter FleetOperatorScore` bumps the +//! operator without touching NATS or the callout. //! -//! One required CLI flag — `--base-domain` — drives every public -//! hostname. Per-cluster overrides for the cluster issuer name and -//! image refs follow. -//! -//! Usage: -//! -//! ```text -//! KUBECONFIG=$ADMIN_KUBECONFIG cargo run -p example_fleet_staging_install -- \ -//! --base-domain cb1.nationtech.io \ -//! --operator-image hub.nationtech.io/harmony/harmony-fleet-operator:dev \ -//! --callout-image hub.nationtech.io/harmony/harmony-nats-callout:dev -//! ``` +//! Zitadel + OpenBao are an idempotent bootstrap: ZitadelSetupScore mints the +//! `project_id` + `fleet-operator` machine key that the callout and operator +//! Scores consume, so it must converge (and cache to disk) before they're +//! built. That data flow is why those two can't sit in the filterable batch. use std::sync::Arc; @@ -45,360 +30,192 @@ use harmony::modules::zitadel::{ }; use harmony::score::Score; use harmony::topology::{K8sAnywhereTopology, K8sclient, Topology}; -use harmony_config::{ConfigClient, StoreSource}; +use harmony_config::{Config, ConfigClient, StoreSource}; use harmony_fleet_deploy::{FleetDeploySecrets, FleetOperatorScore, OperatorCredentials}; use harmony_k8s::KubernetesDistribution; use harmony_secret::OpenbaoSecretStore; use nkeys::KeyPair; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use tracing::info; -#[derive(Parser, Debug)] -#[command( - name = "fleet_staging_install", - about = "Install fleet staging stack (Zitadel + NATS + callout + operator) on OKD" -)] -struct Cli { - /// Cluster's public base domain. Hostnames are derived from it: - /// sso-staging. ← Zitadel - /// nats-fleet-staging. ← NATS WebSocket - /// - /// To deploy on a different cluster, change this and re-run. - #[arg(long)] +/// Non-secret install tunables. `base_domain` drives every public hostname; +/// the image refs and `*-stg.` hosts have no safe default, so an empty +/// value is rejected at startup. Everything else defaults to the staging +/// conventions. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Config)] +#[serde(default)] +struct FleetStagingConfig { base_domain: String, - - /// cert-manager `ClusterIssuer` name. Drives the - /// `cert-manager.io/cluster-issuer` annotation on the Zitadel - /// and NATS Routes. Override per cluster if your operator uses - /// a different issuer name. - #[arg(long, default_value = "letsencrypt-prod")] - cluster_issuer: String, - - /// Namespace for NATS, callout, operator. - #[arg(long, default_value = "fleet-staging")] - fleet_namespace: String, - - /// Namespace for Zitadel + Postgres. - #[arg(long, default_value = "zitadel-staging")] - zitadel_namespace: String, - - /// Operator container image (`repository:tag`). Public on - /// hub.nationtech.io for the demo; ImagePullSecret for that - /// registry must already be present in `--fleet-namespace`. - #[arg(long)] operator_image: String, - - /// Auth callout container image (`repository:tag`). - #[arg(long)] callout_image: String, - - /// NATS account name auth-callout-issued users land in. Must - /// match the NATS Helm `auth_callout.account` field. Default - /// `FLEET` matches the rest of the staging conventions. - #[arg(long, default_value = "FLEET")] + cluster_issuer: String, + fleet_namespace: String, + zitadel_namespace: String, nats_account: String, - - /// Zitadel chart version pin. - #[arg(long, default_value = "v4.12.1")] zitadel_version: String, - - /// Project name created inside Zitadel for fleet auth. - #[arg(long, default_value = "fleet")] project_name: String, - - /// Role name granting full admin (operator + manual ops). The - /// callout maps this role to `pub/sub: [">"]`. - #[arg(long, default_value = "fleet-admin")] admin_role: String, - - /// Role name granting per-device scoped permissions. - #[arg(long, default_value = "device")] device_role: String, - - /// Username of the operator's Zitadel machine user. Distinct - /// from `fleet-ops` (manual admin tooling) for audit trail. - #[arg(long, default_value = "fleet-operator")] operator_username: String, - - /// Username of the manual-admin Zitadel machine user (the one - /// you mint tokens with from your laptop). - #[arg(long, default_value = "fleet-ops")] admin_username: String, } +impl Default for FleetStagingConfig { + fn default() -> Self { + Self { + base_domain: String::new(), + operator_image: String::new(), + callout_image: String::new(), + cluster_issuer: "letsencrypt-prod".to_string(), + fleet_namespace: "fleet-staging".to_string(), + zitadel_namespace: "zitadel-staging".to_string(), + nats_account: "FLEET".to_string(), + zitadel_version: "v4.12.1".to_string(), + project_name: "fleet".to_string(), + admin_role: "fleet-admin".to_string(), + device_role: "device".to_string(), + operator_username: "fleet-operator".to_string(), + admin_username: "fleet-ops".to_string(), + } + } +} + #[tokio::main] async fn main() -> Result<()> { - tracing_subscriber::fmt() - .with_env_filter( - tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), - ) - .init(); + harmony_cli::cli_logger::init(); + let args = harmony_cli::Args::parse(); + + let cfg: FleetStagingConfig = ConfigClient::for_namespace("harmony") + .await + .get_or_prompt() + .await + .context("loading FleetStagingConfig")?; + anyhow::ensure!( + !cfg.base_domain.is_empty() + && !cfg.operator_image.is_empty() + && !cfg.callout_image.is_empty(), + "base_domain, operator_image and callout_image must be set" + ); - let cli = Cli::parse(); let topology = K8sAnywhereTopology::from_env(); topology.ensure_ready().await?; - let zitadel_host = format!("sso-stg.{}", cli.base_domain); - let nats_ws_host = format!("nats-fleet-stg.{}", cli.base_domain); + let zitadel_host = format!("sso-stg.{}", cfg.base_domain); + let nats_ws_host = format!("nats-fleet-stg.{}", cfg.base_domain); + let secrets_host = format!("secrets-stg.{}", cfg.base_domain); + let nats_release = "fleet-nats"; + let cli_app_name = "harmony-cli"; - // ---- 1. Zitadel helm install ---------------------------------------- - let zitadel = ZitadelScore { + // ---- Bootstrap (idempotent): Zitadel mints the project + operator key --- + ZitadelScore { host: zitadel_host.clone(), - zitadel_version: cli.zitadel_version.clone(), + zitadel_version: cfg.zitadel_version.clone(), external_secure: true, external_port: None, - namespace: cli.zitadel_namespace.clone(), - cluster_issuer: cli.cluster_issuer.clone(), + namespace: cfg.zitadel_namespace.clone(), + cluster_issuer: cfg.cluster_issuer.clone(), ..Default::default() - }; - tracing::info!( - "[1/6] Zitadel helm: ns={} host={}", - cli.zitadel_namespace, - zitadel_host - ); - zitadel - .interpret(&Inventory::empty(), &topology) - .await - .context("Zitadel helm install")?; + } + .interpret(&Inventory::empty(), &topology) + .await + .context("Zitadel helm install")?; - // ---- 2. ZitadelSetupScore: project + roles + machine users ---------- - // Run this BEFORE building the operator score so we have the - // `fleet-operator` machine key in hand when filling - // OperatorCredentials. The Score caches keys to - // ZitadelClientConfig on disk; we read them back here. - tracing::info!( - "[2/6] Zitadel setup: project={} admin={} operator={}", - cli.project_name, - cli.admin_username, - cli.operator_username - ); - let api_app_name = "nats"; - let cli_app_name = "harmony-cli"; - let zitadel_setup = ZitadelSetupScore { + ZitadelSetupScore { host: zitadel_host.clone(), scheme: Default::default(), port: None, skip_tls: false, endpoint: None, admin_org_id: None, - namespace: cli.zitadel_namespace.clone(), + namespace: cfg.zitadel_namespace.clone(), // Device-code OIDC app for human admin login from - // `fleet_device_enroll`'s SSO flow. Operators sign in here - // with their personal Zitadel account; their resulting - // access token is what `mint_device_credentials` uses to - // create per-device users + keys. The numeric `client_id` - // generated by Zitadel for this app is what gets passed to - // `--admin-oidc-client-id`; we read it back from the - // ZitadelClientConfig cache below and print it in the - // success banner. + // `fleet_device_enroll`'s SSO flow. The numeric `client_id` Zitadel + // generates is read back below and printed for `--admin-oidc-client-id`. applications: vec![ZitadelApplication { - project_name: cli.project_name.clone(), + project_name: cfg.project_name.clone(), app_name: cli_app_name.to_string(), app_type: ZitadelAppType::DeviceCode, }], api_apps: vec![ZitadelApiApp { - project_name: cli.project_name.clone(), - app_name: api_app_name.to_string(), + project_name: cfg.project_name.clone(), + app_name: "nats".to_string(), }], roles: vec![ ZitadelRole { - project_name: cli.project_name.clone(), - key: cli.admin_role.clone(), + project_name: cfg.project_name.clone(), + key: cfg.admin_role.clone(), display_name: "Fleet Admin".to_string(), group: None, }, ZitadelRole { - project_name: cli.project_name.clone(), - key: cli.device_role.clone(), + project_name: cfg.project_name.clone(), + key: cfg.device_role.clone(), display_name: "Device".to_string(), group: None, }, ], machine_users: vec![ ZitadelMachineUser { - username: cli.admin_username.clone(), + username: cfg.admin_username.clone(), name: "Fleet Operations".to_string(), create_pat: false, machine_key: Some(MachineKeyType::Json), - project_name: Some(cli.project_name.clone()), - grant_roles: vec![cli.admin_role.clone()], + project_name: Some(cfg.project_name.clone()), + grant_roles: vec![cfg.admin_role.clone()], }, ZitadelMachineUser { - username: cli.operator_username.clone(), + username: cfg.operator_username.clone(), name: "Fleet Operator (in-cluster)".to_string(), create_pat: false, machine_key: Some(MachineKeyType::Json), - project_name: Some(cli.project_name.clone()), - grant_roles: vec![cli.admin_role.clone()], + project_name: Some(cfg.project_name.clone()), + grant_roles: vec![cfg.admin_role.clone()], }, ], - }; - zitadel_setup - .interpret(&Inventory::empty(), &topology) - .await - .context("Zitadel setup (project + roles + machine users)")?; + } + .interpret(&Inventory::empty(), &topology) + .await + .context("Zitadel setup (project + roles + machine users)")?; - // Read back the project_id + operator key from cache. + // Read back the project_id + operator key + device-code client_id. let zcfg = ZitadelClientConfig::load() .context("ZitadelSetupScore did not produce a client config cache")?; let project_id = zcfg - .project_id_by_name(&cli.project_name) + .project_id_by_name(&cfg.project_name) .or(zcfg.project_id.as_ref()) .context("project_id missing from cache after setup")? .clone(); let operator_machine_key = zcfg - .machine_key(&cli.operator_username) + .machine_key(&cfg.operator_username) .with_context(|| { format!( - "machine key for {} missing from cache after setup", - cli.operator_username + "machine key for {} missing from cache", + cfg.operator_username ) })? .clone(); let cli_client_id = zcfg .client_id(cli_app_name) - .with_context(|| { - format!( - "OIDC client_id for app '{cli_app_name}' missing from cache — \ - ZitadelSetupScore should have created the app and populated \ - ZitadelClientConfig.apps" - ) - })? + .with_context(|| format!("OIDC client_id for app '{cli_app_name}' missing from cache"))? .clone(); - tracing::info!("[2/6] project_id resolved: {project_id}"); - tracing::info!("[2/6] device-code client_id for '{cli_app_name}' resolved: {cli_client_id}"); - // ---- 3. Issuer NKey + auth callout pieces --------------------------- - // The callout signs user JWTs with this account NKey. NATS server - // is configured with the matching pubkey via the auth_callout - // block in the helm values rendered by NatsK8sScore. - let issuer_kp = KeyPair::new_account(); - let issuer_seed = issuer_kp - .seed() - .map_err(|e| anyhow::anyhow!("issuer NKey seed: {e}"))?; - let issuer_pubkey = issuer_kp.public_key(); - let nats_auth_user = "auth"; - let nats_auth_pass = generate_alphanum(24); - - // ---- 4. NATS install ------------------------------------------------ - let nats_release = "fleet-nats"; - tracing::info!( - "[3/6] NATS install: ns={} release={} ws={}", - cli.fleet_namespace, - nats_release, - nats_ws_host - ); - let nats_cluster = NatsCluster { - namespace: cli.fleet_namespace.clone(), - // `domain` is unused in single-instance mode (gateway off). - // Kept here for the legacy supercluster code path which the - // staging install doesn't take. - domain: cli.base_domain.clone(), - replicas: 1, - name: nats_release.to_string(), - gateway_advertise: String::new(), - dns_name: nats_ws_host.clone(), - // Static-string fields the NatsCluster shape requires; only - // referenced when `gateway` is Some, which it isn't here. - supercluster_ca_secret_name: "fleet-nats-supercluster-ca", - tls_cert_name: "fleet-nats-tls", - jetstream_enabled: "true", - }; - let nats = NatsK8sScore { - distribution: KubernetesDistribution::OpenshiftFamily, - cluster: nats_cluster, - peers: None, - ca_bundle: None, - gateway: None, // single-instance — drop the gateway block - auth_callout: Some(AuthCalloutCfg { - issuer_pubkey: issuer_pubkey.clone(), - auth_user: nats_auth_user.to_string(), - auth_pass: nats_auth_pass.clone(), - account: cli.nats_account.clone(), - }), - websocket: Some(WebSocketRouteCfg { - host: nats_ws_host.clone(), - cluster_issuer: cli.cluster_issuer.clone(), - }), - }; - nats.interpret(&Inventory::empty(), &topology) - .await - .context("NATS install (single-instance + auth_callout + WS Route)")?; - - // ---- 5. Auth callout deployment ------------------------------------- - tracing::info!( - "[4/6] Auth callout: image={} project_id={}", - cli.callout_image, - project_id - ); - let mut callout = NatsAuthCalloutScore::new( - "fleet-callout", - &cli.fleet_namespace, - format!( - "nats://{nats_release}.{}.svc.cluster.local:4222", - cli.fleet_namespace - ), - format!("https://{zitadel_host}"), - project_id.clone(), - nats_auth_user, - &nats_auth_pass, - &issuer_seed, - ) - .image(&cli.callout_image) - .target_account(&cli.nats_account) - .admin_role(&cli.admin_role) - .device_role(&cli.device_role) - .danger_accept_invalid_certs(false); - callout.device_id_claim = "client_id".to_string(); - callout.device_id_prefix_strip = "device-".to_string(); - callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles"); - callout - .interpret(&Inventory::empty(), &topology) - .await - .context("auth callout deploy")?; - - // ---- 6. Operator deployment with credentials ------------------------ - tracing::info!("[5/6] Operator: image={}", cli.operator_image); - let credentials = OperatorCredentials::zitadel_jwt( - &format!("https://{zitadel_host}"), - &project_id, - &operator_machine_key, - ); - let mut operator = FleetOperatorScore::new() - .namespace(&cli.fleet_namespace) - .release_name("harmony-fleet-operator") - .image(&cli.operator_image) - .image_pull_policy("Always") - .nats_url(format!( - "nats://{nats_release}.{}.svc.cluster.local:4222", - cli.fleet_namespace - )) - .log_level("info,kube_runtime=warn"); - operator.credentials = Some(credentials.clone()); - operator - .interpret(&Inventory::empty(), &topology) - .await - .context("operator deploy")?; - - // ---- 7. OpenBao: deploy + setup + seed FleetDeploySecrets ----------- - // Co-located in the fleet namespace. The operator's credentials live - // here so a later `harmony-fleet-deploy --from-tag ` upgrades the - // operator alone, pulling them back through harmony_config. + // ---- OpenBao: deploy + policy, co-located in the fleet namespace -------- + // The operator's credentials are seeded here so a later + // `harmony-fleet-deploy --from-tag ` upgrades the operator alone. let openbao = OpenbaoInstance { - namespace: cli.fleet_namespace.clone(), + namespace: cfg.fleet_namespace.clone(), release: "openbao".to_string(), }; - let secrets_host = format!("secrets-stg.{}", cli.base_domain); - tracing::info!("OpenBao: ns={} host={}", openbao.namespace, secrets_host); OpenbaoScore { instance: openbao.clone(), host: secrets_host.clone(), openshift: true, - tls_issuer: Some(cli.cluster_issuer.clone()), + tls_issuer: Some(cfg.cluster_issuer.clone()), } .interpret(&Inventory::empty(), &topology) .await .context("OpenBao deploy")?; - OpenbaoSetupScore { instance: openbao.clone(), policies: vec![OpenbaoPolicy { @@ -413,21 +230,103 @@ path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"# .await .context("OpenBao setup")?; - // Seed the operator credentials as FleetDeploySecrets (no kubeconfig — - // callers use their own context). Reached via port-forward with the - // root token setup cached, so it doesn't wait on the public route/cert. - tracing::info!("Seeding FleetDeploySecrets into OpenBao"); + // ---- Workload Scores: filterable via `harmony_cli::Args` ---------------- + // The callout signs user JWTs with this account NKey; NATS is configured + // with the matching pubkey via the auth_callout block in its helm values. + let issuer_kp = KeyPair::new_account(); + let issuer_seed = issuer_kp + .seed() + .map_err(|e| anyhow::anyhow!("issuer NKey seed: {e}"))?; + let nats_auth_user = "auth"; + let nats_auth_pass = generate_alphanum(24); + let nats_url = format!( + "nats://{nats_release}.{}.svc.cluster.local:4222", + cfg.fleet_namespace + ); + + let nats = NatsK8sScore { + distribution: KubernetesDistribution::OpenshiftFamily, + cluster: NatsCluster { + namespace: cfg.fleet_namespace.clone(), + // `domain` and the static-string fields below are only read in the + // supercluster path (gateway Some), which staging doesn't take. + domain: cfg.base_domain.clone(), + replicas: 1, + name: nats_release.to_string(), + gateway_advertise: String::new(), + dns_name: nats_ws_host.clone(), + supercluster_ca_secret_name: "fleet-nats-supercluster-ca", + tls_cert_name: "fleet-nats-tls", + jetstream_enabled: "true", + }, + peers: None, + ca_bundle: None, + gateway: None, + auth_callout: Some(AuthCalloutCfg { + issuer_pubkey: issuer_kp.public_key(), + auth_user: nats_auth_user.to_string(), + auth_pass: nats_auth_pass.clone(), + account: cfg.nats_account.clone(), + }), + websocket: Some(WebSocketRouteCfg { + host: nats_ws_host.clone(), + cluster_issuer: cfg.cluster_issuer.clone(), + }), + }; + + let mut callout = NatsAuthCalloutScore::new( + "fleet-callout", + &cfg.fleet_namespace, + nats_url.clone(), + format!("https://{zitadel_host}"), + project_id.clone(), + nats_auth_user, + &nats_auth_pass, + &issuer_seed, + ) + .image(&cfg.callout_image) + .target_account(&cfg.nats_account) + .admin_role(&cfg.admin_role) + .device_role(&cfg.device_role) + .danger_accept_invalid_certs(false); + callout.device_id_claim = "client_id".to_string(); + callout.device_id_prefix_strip = "device-".to_string(); + callout.roles_claim = format!("urn:zitadel:iam:org:project:{project_id}:roles"); + + let credentials = OperatorCredentials::zitadel_jwt( + &format!("https://{zitadel_host}"), + &project_id, + &operator_machine_key, + ); + let mut operator = FleetOperatorScore::new() + .namespace(&cfg.fleet_namespace) + .release_name("harmony-fleet-operator") + .image(&cfg.operator_image) + .image_pull_policy("Always") + .nats_url(nats_url.clone()) + .log_level("info,kube_runtime=warn"); + operator.credentials = Some(credentials.clone()); + + let scores: Vec>> = + vec![Box::new(nats), Box::new(callout), Box::new(operator)]; + harmony_cli::run(Inventory::empty(), topology.clone(), scores, Some(args)) + .await + .map_err(|e| anyhow::anyhow!("{e}"))?; + + // ---- Seed operator credentials as FleetDeploySecrets -------------------- + // Reached via port-forward with the cached root token, so it doesn't wait + // on the public route/cert. No kubeconfig — CD callers use their own context. let k8s = topology .k8s_client() .await .map_err(|e| anyhow::anyhow!(e))?; - let _pf = k8s + let pf = k8s .port_forward(&openbao.pod(), &openbao.namespace, 8200, 8200) .await .context("port-forward to OpenBao")?; tokio::time::sleep(std::time::Duration::from_secs(1)).await; let store = OpenbaoSecretStore::new( - format!("http://127.0.0.1:{}", _pf.port()), + format!("http://127.0.0.1:{}", pf.port()), "secret".to_string(), "token".to_string(), true, @@ -452,50 +351,21 @@ path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"# .await .context("seed FleetDeploySecrets")?; - tracing::info!("[6/6] Stack installed."); - tracing::info!("\n=== fleet-staging install complete ==="); - tracing::info!("Zitadel: https://{zitadel_host}/"); - tracing::info!("NATS WS public: wss://{nats_ws_host}/"); - tracing::info!( - "NATS in-cluster: nats://{nats_release}.{}.svc.cluster.local:4222", - cli.fleet_namespace + info!("=== fleet-staging install complete ==="); + info!( + "Zitadel: https://{zitadel_host}/ (admin user {})", + cfg.admin_username ); - tracing::info!( - "Operator: oc -n {} get deploy/harmony-fleet-operator", - cli.fleet_namespace - ); - tracing::info!( - "Auth callout: oc -n {} get deploy/fleet-callout", - cli.fleet_namespace - ); - tracing::info!("OpenBao: https://{secrets_host}/"); - tracing::info!("Project id: {project_id}"); - tracing::info!( - "Admin user: {} (machine key in ~/.local/share/harmony/zitadel/client-config.json)", - cli.admin_username - ); - tracing::info!( - "Operator user: {} (machine key embedded in operator's Secret)", - cli.operator_username - ); - tracing::info!("SSO client_id: {cli_client_id} (app '{cli_app_name}', device-code grant)"); - tracing::info!("To enroll a device, pass the SSO client_id explicitly:"); - tracing::info!( - " fleet_device_enroll \\\n \ - --target ssh://@ \\\n \ - --issuer-url https://{zitadel_host} \\\n \ - --audience {project_id} \\\n \ - --nats-url wss://{nats_ws_host} \\\n \ - --admin-oidc-client-id {cli_client_id} \\\n \ + info!("NATS WS public: wss://{nats_ws_host}/"); + info!("OpenBao: https://{secrets_host}/"); + info!("Project id: {project_id}"); + info!("SSO client_id: {cli_client_id} (app '{cli_app_name}', device-code grant)"); + info!( + "Enroll a device: fleet_device_enroll --target ssh://@ \ + --issuer-url https://{zitadel_host} --audience {project_id} \ + --nats-url wss://{nats_ws_host} --admin-oidc-client-id {cli_client_id} \ --agent-binary " ); - tracing::info!("Upgrade just the operator when a new build is published:"); - tracing::info!( - " OPENBAO_URL=https://{secrets_host} OPENBAO_TOKEN= \\\n \ - harmony-fleet-deploy --filter FleetOperatorScore \\\n \ - --from-tag harmony-fleet-operator-vX.Y.Z --namespace {} --yes", - cli.fleet_namespace - ); Ok(()) } diff --git a/harmony_cli/Cargo.toml b/harmony_cli/Cargo.toml index 248b702c..a8774d2f 100644 --- a/harmony_cli/Cargo.toml +++ b/harmony_cli/Cargo.toml @@ -16,13 +16,11 @@ harmony = { path = "../harmony" } harmony_tui = { path = "../harmony_tui", optional = true } inquire.workspace = true tokio.workspace = true -env_logger.workspace = true console = "0.16.0" indicatif = "0.18.0" lazy_static = "1.5.0" -log.workspace = true -indicatif-log-bridge = "0.2.3" -chrono.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true [dev-dependencies] harmony = { path = "../harmony", features = ["testing"] } diff --git a/harmony_cli/src/cli_logger.rs b/harmony_cli/src/cli_logger.rs index 03501ef8..c3e56436 100644 --- a/harmony_cli/src/cli_logger.rs +++ b/harmony_cli/src/cli_logger.rs @@ -1,13 +1,12 @@ -use chrono::Local; use console::style; use harmony::{ instrumentation::{self, HarmonyEvent}, modules::application::ApplicationFeatureStatus, topology::TopologyStatus, }; -use log::{error, info, log_enabled}; -use std::io::Write; use std::sync::{Mutex, OnceLock}; +use tracing::{error, info}; +use tracing_subscriber::EnvFilter; pub fn init() { static INITIALIZED: OnceLock<()> = OnceLock::new(); @@ -17,68 +16,33 @@ pub fn init() { }); } +// The framework still emits via the `log` crate; tracing-subscriber's default +// `tracing-log` bridge captures those records, so this subscriber covers both. +// Normal runs stay terse (level + message, ANSI-coloured); debug/trace adds the +// timestamp + target needed to actually debug — matching the old env_logger UX. fn configure_logger() { - env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) - .format(|buf, record| { - let debug_mode = log_enabled!(log::Level::Debug); - let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S"); + let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); + let verbose = std::env::var("RUST_LOG") + .map(|v| v.to_lowercase().contains("debug") || v.to_lowercase().contains("trace")) + .unwrap_or(false); + let builder = tracing_subscriber::fmt().with_env_filter(filter); + let _ = if verbose { + builder.with_target(true).try_init() + } else { + builder.without_time().with_target(false).try_init() + }; +} - let level = match record.level() { - log::Level::Error => style("ERROR").red(), - log::Level::Warn => style("WARN").yellow(), - log::Level::Info => style("INFO").green(), - log::Level::Debug => style("DEBUG").blue(), - log::Level::Trace => style("TRACE").magenta(), - }; - if let Some(status) = record.key_values().get(log::kv::Key::from("status")) { - let status = status.to_borrowed_str().unwrap(); - let emoji = match status { - "finished" => style(crate::theme::EMOJI_SUCCESS.to_string()).green(), - "skipped" => style(crate::theme::EMOJI_SKIP.to_string()).yellow(), - "failed" => style(crate::theme::EMOJI_ERROR.to_string()).red(), - _ => style("".into()), - }; - if debug_mode { - writeln!( - buf, - "[{} {:<5} {}] {} {}", - timestamp, - level, - record.target(), - emoji, - record.args() - ) - } else { - writeln!(buf, "[{:<5}] {} {}", level, emoji, record.args()) - } - } else if let Some(emoji) = record.key_values().get(log::kv::Key::from("emoji")) { - if debug_mode { - writeln!( - buf, - "[{} {:<5} {}] {} {}", - timestamp, - level, - record.target(), - emoji, - record.args() - ) - } else { - writeln!(buf, "[{:<5}] {} {}", level, emoji, record.args()) - } - } else if debug_mode { - writeln!( - buf, - "[{} {:<5} {}] {}", - timestamp, - level, - record.target(), - record.args() - ) - } else { - writeln!(buf, "[{:<5}] {}", level, record.args()) - } - }) - .init(); +fn ok() -> String { + format!("{}", style(crate::theme::EMOJI_SUCCESS.to_string()).green()) +} + +fn skipped() -> String { + format!("{}", style(crate::theme::EMOJI_SKIP.to_string()).yellow()) +} + +fn failed() -> String { + format!("{}", style(crate::theme::EMOJI_ERROR.to_string()).red()) } fn handle_events() { @@ -93,8 +57,7 @@ fn handle_events() { match event { HarmonyEvent::HarmonyStarted => {} HarmonyEvent::HarmonyFinished => { - let emoji = crate::theme::EMOJI_HARMONY.to_string(); - info!(emoji = emoji.as_str(); "Harmony completed"); + info!("{} Harmony completed", crate::theme::EMOJI_HARMONY); } HarmonyEvent::TopologyStateChanged { topology, @@ -103,29 +66,26 @@ fn handle_events() { } => match status { TopologyStatus::Queued => {} TopologyStatus::Preparing => { - let emoji = format!( - "{}", - style(crate::theme::EMOJI_TOPOLOGY.to_string()).yellow() - ); - info!(emoji = emoji.as_str(); "Preparing environment: {topology}..."); + let emoji = style(crate::theme::EMOJI_TOPOLOGY.to_string()).yellow(); + info!("{emoji} Preparing environment: {topology}..."); (*preparing_topology) = true; } TopologyStatus::Success => { (*preparing_topology) = false; if let Some(message) = message { - info!(status = "finished"; "{message}"); + info!("{} {message}", ok()); } } TopologyStatus::Noop => { (*preparing_topology) = false; if let Some(message) = message { - info!(status = "skipped"; "{message}"); + info!("{} {message}", skipped()); } } TopologyStatus::Error => { (*preparing_topology) = false; if let Some(message) = message { - error!(status = "failed"; "{message}"); + error!("{} {message}", failed()); } } }, @@ -140,8 +100,8 @@ fn handle_events() { info!("{message}"); } else { (*current_score) = Some(score.clone()); - let emoji = format!("{}", style(crate::theme::EMOJI_SCORE).blue()); - info!(emoji = emoji.as_str(); "Interpreting score: {score}..."); + let emoji = style(crate::theme::EMOJI_SCORE).blue(); + info!("{emoji} Interpreting score: {score}..."); } } HarmonyEvent::InterpretExecutionFinished { @@ -158,17 +118,17 @@ fn handle_events() { match outcome { Ok(outcome) => match outcome.status { harmony::interpret::InterpretStatus::SUCCESS => { - info!(status = "finished"; "{}", outcome.message); + info!("{} {}", ok(), outcome.message); } harmony::interpret::InterpretStatus::NOOP => { - info!(status = "skipped"; "{}", outcome.message); + info!("{} {}", skipped(), outcome.message); } _ => { - error!(status = "failed"; "{}", outcome.message); + error!("{} {}", failed(), outcome.message); } }, Err(err) => { - error!(status = "failed"; "{err}"); + error!("{} {err}", failed()); } } } @@ -182,10 +142,13 @@ fn handle_events() { info!("Installing feature '{feature}' for '{application}'..."); } ApplicationFeatureStatus::Installed { details: _ } => { - info!(status = "finished"; "Feature '{feature}' installed"); + info!("{} Feature '{feature}' installed", ok()); } ApplicationFeatureStatus::Failed { message: details } => { - error!(status = "failed"; "Feature '{feature}' installation failed: {details}"); + error!( + "{} Feature '{feature}' installation failed: {details}", + failed() + ); } }, } diff --git a/harmony_cli/src/lib.rs b/harmony_cli/src/lib.rs index 4a0dbe7e..1feee9a6 100644 --- a/harmony_cli/src/lib.rs +++ b/harmony_cli/src/lib.rs @@ -5,7 +5,7 @@ use harmony::inventory::Inventory; use harmony::maestro::Maestro; use harmony::{score::Score, topology::Topology}; use inquire::Confirm; -use log::debug; +use tracing::debug; pub mod cli_logger; // FIXME: Don't make me pub mod cli_reporter; @@ -132,8 +132,6 @@ async fn init( maestro: harmony::maestro::Maestro, args: Args, ) -> Result<(), Box> { - let _ = env_logger::builder().try_init(); - let scores_vec = maestro_scores_filter(&maestro, args.all, args.filter, args.number); if scores_vec.is_empty() { -- 2.39.5 From af3205d353a99fcd444b05b514c91eccb8e6ed70 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sat, 30 May 2026 05:55:49 -0400 Subject: [PATCH 04/14] refactor(harmony_cli): defer topology prep so --list/declined runs are no-ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `Maestro::initialize` (hence `topology.ensure_ready()`) ran before `init`'s `--list` / confirmation short-circuits, so merely listing a binary's scores — or declining to run them — still prepared the topology (cert-manager install, etc.). Build the maestro unprepared and call `prepare_topology()` only once we commit to interpreting. Expose `Maestro::prepare_topology`; add tests proving `--list` skips prep while the run path triggers it. --- Cargo.lock | 1 + harmony/src/domain/maestro/mod.rs | 2 +- harmony_cli/Cargo.toml | 1 + harmony_cli/src/lib.rs | 77 ++++++++++++++++++++++++++++++- 4 files changed, 78 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7fd5ef84..3a5ea116 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4311,6 +4311,7 @@ name = "harmony_cli" version = "0.1.0" dependencies = [ "assert_cmd", + "async-trait", "clap", "console", "harmony", diff --git a/harmony/src/domain/maestro/mod.rs b/harmony/src/domain/maestro/mod.rs index 25210bef..7dfb10d4 100644 --- a/harmony/src/domain/maestro/mod.rs +++ b/harmony/src/domain/maestro/mod.rs @@ -43,7 +43,7 @@ impl Maestro { /// Ensures the associated Topology is ready for operations. /// Delegates the readiness check and potential setup actions to the Topology. - async fn prepare_topology(&mut self) -> Result { + pub async fn prepare_topology(&mut self) -> Result { self.topology_state.prepare(); let result = self.topology.ensure_ready().await; diff --git a/harmony_cli/Cargo.toml b/harmony_cli/Cargo.toml index a8774d2f..1493df19 100644 --- a/harmony_cli/Cargo.toml +++ b/harmony_cli/Cargo.toml @@ -24,3 +24,4 @@ tracing-subscriber.workspace = true [dev-dependencies] harmony = { path = "../harmony", features = ["testing"] } +async-trait = "0.1" diff --git a/harmony_cli/src/lib.rs b/harmony_cli/src/lib.rs index 1feee9a6..d1f6de33 100644 --- a/harmony_cli/src/lib.rs +++ b/harmony_cli/src/lib.rs @@ -119,7 +119,9 @@ pub async fn run_cli( cli_logger::init(); cli_reporter::init(); - let mut maestro = Maestro::initialize(inventory, topology).await.unwrap(); + // Build the maestro WITHOUT preparing the topology — listing scores or + // declining the run must not touch the cluster. Prep is deferred to `init`. + let mut maestro = Maestro::new_without_initialization(inventory, topology); maestro.register_all(scores); let result = init(maestro, args).await; @@ -129,7 +131,7 @@ pub async fn run_cli( } async fn init( - maestro: harmony::maestro::Maestro, + mut maestro: harmony::maestro::Maestro, args: Args, ) -> Result<(), Box> { let scores_vec = maestro_scores_filter(&maestro, args.all, args.filter, args.number); @@ -164,6 +166,13 @@ async fn init( } } + // We're committed to running — only now prepare the topology (the + // expensive, cluster-touching step) so list/decline paths stay no-ops. + maestro + .prepare_topology() + .await + .map_err(|e| format!("topology preparation failed: {e}"))?; + // Run filtered scores for s in scores_vec { debug!("Running: {}", s.name()); @@ -180,8 +189,72 @@ mod tests { maestro::Maestro, modules::dummy::{ErrorScore, PanicScore, SuccessScore}, topology::HAClusterTopology, + topology::{PreparationError, PreparationOutcome, Topology}, }; + /// Topology whose readiness check always fails. Lets a test assert that + /// `--list` never reaches `prepare_topology` (it would error if it did), + /// while the run path does. + struct ExplodingTopology; + + #[async_trait::async_trait] + impl Topology for ExplodingTopology { + fn name(&self) -> &str { + "ExplodingTopology" + } + + async fn ensure_ready(&self) -> Result { + Err(PreparationError::new( + "ensure_ready must not run on the list path".to_string(), + )) + } + } + + fn exploding_maestro() -> Maestro { + let mut maestro = + Maestro::new_without_initialization(Inventory::autoload(), ExplodingTopology); + maestro.register_all(vec![Box::new(SuccessScore {})]); + maestro + } + + #[tokio::test] + async fn list_does_not_prepare_topology() { + // Topology prep fails; listing must still succeed because it never + // touches the topology. + let res = crate::init( + exploding_maestro(), + crate::Args { + yes: true, + filter: None, + interactive: false, + all: true, + number: 0, + list: true, + }, + ) + .await; + assert!(res.is_ok(), "--list should not prepare the topology"); + } + + #[tokio::test] + async fn run_prepares_topology_before_interpreting() { + // Same topology, but actually running: prep runs and its failure + // aborts before any score is interpreted. + let res = crate::init( + exploding_maestro(), + crate::Args { + yes: true, + filter: None, + interactive: false, + all: true, + number: 0, + list: false, + }, + ) + .await; + assert!(res.is_err(), "run path must prepare the topology first"); + } + fn init_test_maestro() -> Maestro { let inventory = Inventory::autoload(); let topology = HAClusterTopology::autoload(); -- 2.39.5 From 4fef957edb3739d7aac3ec516f7e131919c0a2a3 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sat, 30 May 2026 08:40:54 -0400 Subject: [PATCH 05/14] feat: Example openbao now can do openbao setup and better readme --- Cargo.lock | 9 +- examples/openbao/Cargo.toml | 9 +- examples/openbao/README.md | 37 +++++++- examples/openbao/src/main.rs | 143 ++++++++++++++++++++++++++++--- harmony_config/src/source/env.rs | 2 + 5 files changed, 178 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a5ea116..52da3fe0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3213,12 +3213,15 @@ dependencies = [ name = "example-openbao" version = "0.1.0" dependencies = [ + "anyhow", + "clap", "harmony", "harmony_cli", - "harmony_macros", - "harmony_types", + "harmony_config", + "schemars 0.8.22", + "serde", "tokio", - "url", + "tracing", ] [[package]] diff --git a/examples/openbao/Cargo.toml b/examples/openbao/Cargo.toml index ae0a7937..7df8d926 100644 --- a/examples/openbao/Cargo.toml +++ b/examples/openbao/Cargo.toml @@ -8,7 +8,10 @@ license.workspace = true [dependencies] harmony = { path = "../../harmony" } harmony_cli = { path = "../../harmony_cli" } -harmony_macros = { path = "../../harmony_macros" } -harmony_types = { path = "../../harmony_types" } +harmony_config = { path = "../../harmony_config" } tokio.workspace = true -url.workspace = true +anyhow.workspace = true +clap = { version = "4", features = ["derive"] } +serde = { workspace = true } +schemars = "0.8" +tracing = { workspace = true } diff --git a/examples/openbao/README.md b/examples/openbao/README.md index d78556cf..9d860daa 100644 --- a/examples/openbao/README.md +++ b/examples/openbao/README.md @@ -1,7 +1,36 @@ -To install an openbao instance with harmony simply `cargo run -p example-openbao` . +# example-openbao -Depending on your environement configuration, it will either install a k3d cluster locally and deploy on it, or install to a remote cluster. +Installs a standalone OpenBao instance and makes it immediately usable as a +`harmony_config` store: deploy → init → unseal → KV v2. Depending on your +environment it either spins up a local k3d cluster or targets the remote +cluster `KUBECONFIG` points at. -Then follow the openbao documentation to initialize and unseal, this will make openbao usable. +Configuration comes from `ConfigClient` (`HARMONY_CONFIG_OpenbaoInstallConfig` +env JSON → OpenBao → interactive prompt). The only required field is `host`. -https://openbao.org/docs/platform/k8s/helm/run/ +```bash +# Non-interactive: provide the config as JSON. +export HARMONY_CONFIG_OpenbaoInstallConfig='{ + "host": "secrets-stg.cb1.nationtech.io", + "namespace": "openbao", + "release": "openbao", + "openshift": true, + "tls_issuer": "letsencrypt-prod" +}' +cargo run -p example-openbao -- --yes +``` + +`cargo run -p example-openbao -- --list` lists the scores without touching the +cluster. Run without `HARMONY_CONFIG_*` to be prompted for each field. + +Optional features compose from config presence: + +| Config field(s) | Effect | +|---------------------------------|------------------------------------------------------------| +| `tls_issuer` | cert-manager edge TLS on the ingress (omit for plain HTTP) | +| `oidc_issuer` + `oidc_audience` | JWT auth + a `harmony` role scoped to `secret/harmony/*` | + +After it runs, point `harmony_config` at it with `OPENBAO_URL=https://` +and `OPENBAO_TOKEN=` (the root token is at +`~/.local/share/harmony/openbao/unseal-keys.json`). Once `oidc_*` is set, SSO +callers can authenticate via `HARMONY_SSO_*` instead of the root token. diff --git a/examples/openbao/src/main.rs b/examples/openbao/src/main.rs index eee92944..c539e2f8 100644 --- a/examples/openbao/src/main.rs +++ b/examples/openbao/src/main.rs @@ -1,22 +1,141 @@ -use harmony::{ - inventory::Inventory, modules::openbao::OpenbaoScore, topology::K8sAnywhereTopology, +//! Standalone OpenBao installer, configured entirely from +//! [`ConfigClient`] (`HARMONY_CONFIG_OpenbaoInstallConfig` env JSON → +//! OpenBao → interactive prompt). Deploys the chart, then initializes, +//! unseals, and enables KV v2 — so the result is immediately usable as a +//! `harmony_config` store (point `OPENBAO_URL` at the ingress host and +//! `OPENBAO_TOKEN` at the cached root token). +//! +//! Optional features compose purely from config presence — nothing is wired +//! unless its inputs are set: +//! - `tls_issuer` → cert-manager edge TLS on the ingress. +//! - `oidc_issuer` + `oidc_audience` → JWT auth method plus a `harmony` +//! role scoped to the `harmony-config` policy, letting SSO callers +//! (e.g. `harmony_config` via `HARMONY_SSO_*`) read/write +//! `secret/harmony/*` without the root token. +//! +//! This Score knows nothing about Zitadel: the OIDC issuer/audience are plain +//! config strings, so any OIDC provider works. + +use anyhow::{Context, Result}; +use clap::Parser; +use harmony::inventory::Inventory; +use harmony::modules::openbao::{ + OpenbaoInstance, OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoScore, OpenbaoSetupScore, }; +use harmony::score::Score; +use harmony::topology::K8sAnywhereTopology; +use harmony_config::{Config, ConfigClient}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use tracing::info; + +/// Policy granting read/write to the `harmony_config` store path. Bound to the +/// JWT `harmony` role when OIDC is configured. +const HARMONY_CONFIG_POLICY: &str = "harmony-config"; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Config)] +#[serde(default)] +struct OpenbaoInstallConfig { + /// Public ingress host (e.g. `secrets-stg.`). Required. + host: String, + /// Helm release name + namespace the StatefulSet lives in. + namespace: String, + release: String, + /// OKD/OpenShift target — flips the chart's SCC-aware values. + openshift: bool, + /// cert-manager `ClusterIssuer` for edge TLS. Empty → plain HTTP ingress. + tls_issuer: String, + /// OIDC issuer URL for JWT auth (e.g. `https://sso-stg.`). Empty, + /// or `oidc_audience` empty, disables JWT auth entirely. + oidc_issuer: String, + /// Expected `aud` for JWT auth (the OIDC project/client id). + oidc_audience: String, +} + +impl Default for OpenbaoInstallConfig { + fn default() -> Self { + Self { + host: String::new(), + namespace: "openbao".to_string(), + release: "openbao".to_string(), + openshift: false, + tls_issuer: String::new(), + oidc_issuer: String::new(), + oidc_audience: String::new(), + } + } +} #[tokio::main] -async fn main() { - let openbao = OpenbaoScore { - instance: Default::default(), - host: "openbao.sebastien.sto1.nationtech.io".to_string(), - openshift: false, - tls_issuer: None, +async fn main() -> Result<()> { + harmony_cli::cli_logger::init(); + let args = harmony_cli::Args::parse(); + let config = ConfigClient::for_namespace("harmony").await; + + let cfg: OpenbaoInstallConfig = config + .get_or_prompt() + .await + .context("loading OpenbaoInstallConfig")?; + + info!("Got full config {cfg:?}"); + + anyhow::ensure!( + !cfg.host.is_empty(), + "host must be set (e.g. secrets-stg.)" + ); + + + let instance = OpenbaoInstance { + namespace: cfg.namespace.clone(), + release: cfg.release.clone(), }; + let deploy = OpenbaoScore { + instance: instance.clone(), + host: cfg.host.clone(), + openshift: cfg.openshift, + tls_issuer: (!cfg.tls_issuer.is_empty()).then(|| cfg.tls_issuer.clone()), + }; + + // JWT auth composes in only when both issuer and audience are set; it + // pulls in the harmony-config policy so the role has something to grant. + let jwt_auth = + (!cfg.oidc_issuer.is_empty() && !cfg.oidc_audience.is_empty()).then(|| OpenbaoJwtAuth { + oidc_discovery_url: cfg.oidc_issuer.clone(), + bound_issuer: cfg.oidc_issuer.clone(), + role_name: "harmony".to_string(), + bound_audiences: cfg.oidc_audience.clone(), + user_claim: "sub".to_string(), + policies: vec![HARMONY_CONFIG_POLICY.to_string()], + ttl: "1h".to_string(), + max_ttl: "8h".to_string(), + }); + let policies = if jwt_auth.is_some() { + vec![OpenbaoPolicy { + name: HARMONY_CONFIG_POLICY.to_string(), + hcl: r#"path "secret/data/harmony/*" { capabilities = ["create","read","update","delete"] } +path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"# + .to_string(), + }] + } else { + vec![] + }; + + let setup = OpenbaoSetupScore { + instance, + kv_mount: "secret".to_string(), + policies, + users: vec![], + jwt_auth, + }; + + let scores: Vec>> = vec![Box::new(deploy), Box::new(setup)]; harmony_cli::run( - Inventory::autoload(), + Inventory::empty(), K8sAnywhereTopology::from_env(), - vec![Box::new(openbao)], - None, + scores, + Some(args), ) .await - .unwrap(); + .map_err(|e| anyhow::anyhow!("{e}")) } diff --git a/harmony_config/src/source/env.rs b/harmony_config/src/source/env.rs index e976bbf3..259711e3 100644 --- a/harmony_config/src/source/env.rs +++ b/harmony_config/src/source/env.rs @@ -1,5 +1,6 @@ use crate::{ConfigClass, ConfigError, ConfigSource}; use async_trait::async_trait; +use log::{debug, info}; pub struct EnvSource; @@ -16,6 +17,7 @@ impl ConfigSource for EnvSource { ) -> Result, ConfigError> { let env_key = env_key_for(key); + debug!("Loading config from env var {env_key}"); match std::env::var(&env_key) { Ok(value) => serde_json::from_str(&value).map(Some).map_err(|e| { ConfigError::EnvError(format!( -- 2.39.5 From 44aa83199aa7356ab4f8c70ec4811987b4868df3 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sat, 30 May 2026 11:05:30 -0400 Subject: [PATCH 06/14] fix(harmony_cli): drop ANSI colour codes around log emojis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `console::style(emoji).green()/.yellow()/.red()/.blue()` embedded raw ANSI escapes in the message string. `console` force-emits them off its own TTY detection, which disagrees with the tracing writer, so they leaked as literal `\x1b[..m` garbage around the emoji. Emit plain emojis — the glyph already conveys status and the tracing fmt layer still colours the level. --- harmony_cli/src/cli_logger.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/harmony_cli/src/cli_logger.rs b/harmony_cli/src/cli_logger.rs index c3e56436..db53008f 100644 --- a/harmony_cli/src/cli_logger.rs +++ b/harmony_cli/src/cli_logger.rs @@ -1,4 +1,3 @@ -use console::style; use harmony::{ instrumentation::{self, HarmonyEvent}, modules::application::ApplicationFeatureStatus, @@ -33,16 +32,19 @@ fn configure_logger() { }; } +// Plain emojis — no `console::style` colour codes. The emoji already conveys +// status, the level is coloured by the fmt layer, and embedding ANSI here +// leaks escape codes when console's TTY detection disagrees with the writer. fn ok() -> String { - format!("{}", style(crate::theme::EMOJI_SUCCESS.to_string()).green()) + crate::theme::EMOJI_SUCCESS.to_string() } fn skipped() -> String { - format!("{}", style(crate::theme::EMOJI_SKIP.to_string()).yellow()) + crate::theme::EMOJI_SKIP.to_string() } fn failed() -> String { - format!("{}", style(crate::theme::EMOJI_ERROR.to_string()).red()) + crate::theme::EMOJI_ERROR.to_string() } fn handle_events() { @@ -66,8 +68,10 @@ fn handle_events() { } => match status { TopologyStatus::Queued => {} TopologyStatus::Preparing => { - let emoji = style(crate::theme::EMOJI_TOPOLOGY.to_string()).yellow(); - info!("{emoji} Preparing environment: {topology}..."); + info!( + "{} Preparing environment: {topology}...", + crate::theme::EMOJI_TOPOLOGY + ); (*preparing_topology) = true; } TopologyStatus::Success => { @@ -100,8 +104,10 @@ fn handle_events() { info!("{message}"); } else { (*current_score) = Some(score.clone()); - let emoji = style(crate::theme::EMOJI_SCORE).blue(); - info!("{emoji} Interpreting score: {score}..."); + info!( + "{} Interpreting score: {score}...", + crate::theme::EMOJI_SCORE + ); } } HarmonyEvent::InterpretExecutionFinished { -- 2.39.5 From 57d056fced29fdb0476aabe4f0449527def1aa2b Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sat, 30 May 2026 11:07:03 -0400 Subject: [PATCH 07/14] fix(openbao): scope unseal-keys cache file per instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The root token + unseal keys were written to a single fixed `~/.local/share/harmony/openbao/unseal-keys.json`, so deploying a second OpenBao instance (different namespace/release) overwrote the first's keys — after which the first could never be unsealed. Key the file by namespace+release (`unseal-keys--.json`); `cached_root_token` now takes the `OpenbaoInstance` to read the right one. --- examples/fleet_staging_install/src/main.rs | 2 +- harmony/src/modules/openbao/setup.rs | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/examples/fleet_staging_install/src/main.rs b/examples/fleet_staging_install/src/main.rs index 7357a4da..a3dfa9d8 100644 --- a/examples/fleet_staging_install/src/main.rs +++ b/examples/fleet_staging_install/src/main.rs @@ -330,7 +330,7 @@ path "secret/metadata/harmony/*" { capabilities = ["list","read"] }"# "secret".to_string(), "token".to_string(), true, - Some(cached_root_token().map_err(|e| anyhow::anyhow!(e))?), + Some(cached_root_token(&openbao).map_err(|e| anyhow::anyhow!(e))?), None, None, None, diff --git a/harmony/src/modules/openbao/setup.rs b/harmony/src/modules/openbao/setup.rs index e1360f4a..5c6c62f4 100644 --- a/harmony/src/modules/openbao/setup.rs +++ b/harmony/src/modules/openbao/setup.rs @@ -143,16 +143,22 @@ fn keys_dir() -> PathBuf { .unwrap_or_else(|| PathBuf::from("/tmp/harmony-openbao")) } -fn keys_file() -> PathBuf { - keys_dir().join("unseal-keys.json") +/// Per-instance keys file. Keyed by namespace+release so multiple OpenBao +/// instances don't clobber each other's unseal keys in one shared file — +/// losing them means the instance can never be unsealed again. +fn keys_file(instance: &OpenbaoInstance) -> PathBuf { + keys_dir().join(format!( + "unseal-keys-{}-{}.json", + instance.namespace, instance.release + )) } /// The root token from the cached unseal-keys file written at init. /// Dev/staging convenience for callers that need to seed OpenBao right /// after [`OpenbaoSetupScore`] runs; production uses auto-unseal and /// wouldn't persist this. -pub fn cached_root_token() -> Result { - let path = keys_file(); +pub fn cached_root_token(instance: &OpenbaoInstance) -> Result { + let path = keys_file(instance); let content = std::fs::read_to_string(&path).map_err(|e| format!("read {path:?}: {e}"))?; let init: InitOutput = serde_json::from_str(&content).map_err(|e| format!("parse {path:?}: {e}"))?; @@ -200,7 +206,7 @@ impl OpenbaoSetupInterpret { InterpretError::new(format!("Failed to create keys directory {:?}: {}", dir, e)) })?; - let path = keys_file(); + let path = keys_file(&self.score.instance); // Source of truth for "is this vault initialized?" is OpenBao itself, // not a `bao status` pre-check parsed from stderr — that probe is @@ -318,7 +324,7 @@ impl OpenbaoSetupInterpret { } info!("[OpenbaoSetup] Unsealing..."); - let path = keys_file(); + let path = keys_file(&self.score.instance); let content = std::fs::read_to_string(&path) .map_err(|e| InterpretError::new(format!("Failed to read keys: {e}")))?; let init: InitOutput = serde_json::from_str(&content) -- 2.39.5 From d39aa151526a26a0a080022de19423015aa8111b Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sun, 31 May 2026 09:06:20 -0400 Subject: [PATCH 08/14] feat: fleet deploy uses configuration from configclient for all settings, update the 0_3 plan --- ROADMAP/fleet_platform/v0_3_plan.md | 136 ++++++++++++++++++++++ fleet/harmony-fleet-deploy/src/lib.rs | 2 +- fleet/harmony-fleet-deploy/src/main.rs | 56 +++++---- fleet/harmony-fleet-deploy/src/secrets.rs | 51 +++++++- 4 files changed, 214 insertions(+), 31 deletions(-) create mode 100644 ROADMAP/fleet_platform/v0_3_plan.md diff --git a/ROADMAP/fleet_platform/v0_3_plan.md b/ROADMAP/fleet_platform/v0_3_plan.md new file mode 100644 index 00000000..d241ef40 --- /dev/null +++ b/ROADMAP/fleet_platform/v0_3_plan.md @@ -0,0 +1,136 @@ +# Fleet Platform v0.3 — Staging to production-ready + +Written 2026-05-31. Picks up after OpenBao + Zitadel + NATS + callout + operator are deployed and functional on staging (2-3 weeks old versions). + +## Current state + +- [x] OpenBao running at `secrets-stg.cb1.nationtech.io` +- [x] Zitadel running at `sso-stg.cb1.nationtech.io` +- [x] NATS + auth callout deployed in `fleet-staging` namespace +- [x] Operator deployed (older version, 2-3 weeks old) +- [x] Config-driven OpenBao installer (`examples/openbao`) +- [x] `harmony-fleet-deploy` binary reads `FleetDeploySecrets` from OpenBao + +## Immediate next steps + +### 1. Provision operator credentials in OpenBao + +- [ ] Fetch existing creds from the running cluster: + ```bash + oc -n fleet-staging get secret harmony-fleet-operator-secrets -o jsonpath='{.data.credentials\.toml}' | base64 -d + ``` +- [ ] Seed into OpenBao at `secret/data/fleet-staging/FleetDeploySecrets`: + ```bash + export VAULT_ADDR=https://secrets-stg.cb1.nationtech.io + export VAULT_TOKEN= + oc -n fleet-staging get secret harmony-fleet-operator-secrets -o jsonpath='{.data.credentials\.toml}' | base64 -d \ + | jq -Rs '{value: ({operator_credentials_toml: .} | tojson)}' \ + | bao kv put secret/fleet-staging/FleetDeploySecrets - + ``` +- [ ] Verify the secret is readable: `bao kv get secret/fleet-staging/FleetDeploySecrets` + +### 2. Private repo deploy script + +- [ ] Create `.envrc` with minimal env: + ```bash + export OPENBAO_URL=https://secrets-stg.cb1.nationtech.io + export HARMONY_SECRET_NAMESPACE=fleet-staging + export HARMONY_FLEET_NAMESPACE=fleet-staging + # export OPENBAO_TOKEN= + ``` +- [ ] Write deploy invocation (shell script or just `harmony-fleet-deploy` call): + ```bash + harmony-fleet-deploy --from-tag harmony-fleet-operator-vX.Y.Z --yes + ``` +- [ ] Commit `.envrc` + script to private repo (shared with teammates) + +### 3. Execute operator upgrade + +- [ ] Run the deploy script from the private repo +- [ ] Verify operator pod starts and connects to NATS +- [ ] Verify operator reconciles existing CRs (check logs) +- [ ] Confirm no regression in existing fleet functionality + +### 4. Operator UI ingress (trivial) + +- [ ] Expose operator UI with TLS ingress on `fleet-stg.` +- [ ] Verify the UI loads and serves the SPA +- [ ] Confirm no auth gate yet (SSO is next) + +### 5. SSO login flow + +- [ ] Wire operator UI to Zitadel SSO at `sso-stg.` +- [ ] Test login/logout flow end-to-end +- [ ] Verify session persistence across page reloads +- [ ] Confirm RBAC: only authorized Zitadel users can access the UI + +### 6. Real data in UI + +- [ ] Replace mock device list with live `device-info` KV data +- [ ] Replace mock deployment list with live `Deployment` CR data +- [ ] Wire per-device drilldown to real `DeviceInfo` + last-heartbeat + agent version +- [ ] NATS tail panel: SSE stream of `device-info` and `device-state` updates (plain text) +- [ ] Verify data refreshes without manual reload + +## Configuration model + +### Environment (minimal, committed in private repo) + +```bash +OPENBAO_URL=https://secrets-stg.cb1.nationtech.io +HARMONY_SECRET_NAMESPACE=fleet-staging +# SSO auth or root token (SSO is the goal) +``` + +### OpenBao (read via ConfigClient) + +- `FleetDeploySecrets` (operator creds) at `secret/data/fleet-staging/FleetDeploySecrets` +- k8s namespaces (`fleet-staging`, `zitadel-staging`, `openbao-staging`) as config values +- Chart registry/project coords as config values + +## Missing features (post-UI) + +### Auth & credentials + +- [ ] Per-device OpenBao policies (templated policies, one role per device type) +- [ ] Device identity claim in JWT (Zitadel `client_id` with `device-` prefix) +- [ ] OpenBao JWT auth role granularity (extend `OpenbaoJwtAuth` to list of roles) +- [ ] Move k8s namespaces + chart coords into `ConfigClient` config struct (env = only identifier + auth) + +### Operator capabilities + +- [ ] Agent upgrade path (ADR-022 exists; implementation pending) +- [ ] Device enrollment flow (operator-facing runbook) +- [ ] Revoke device / rotate key operations +- [ ] Fleet-wide rollout strategies (canary, %-based) on top of agent-upgrade primitive + +### Observability + +- [ ] Operator logs every CR it acquires (verify output reads well) +- [ ] NATS debugging one-liners in hand-off menu +- [ ] Journald log streaming (currently only `.status.aggregate.lastError`) +- [ ] Metrics dashboard (deferred until >100 devices) + +### Quality & hardening + +- [ ] Agent config-driven labels (`[labels]` in agent toml → DeviceInfo) +- [ ] `matchExpressions` in selectors (currently `matchLabels` only) +- [ ] `Device.status.conditions` populated from heartbeat staleness +- [ ] Operator graceful degradation on bad device_id (log + skip, don't restart-loop) +- [ ] Persist `nats_auth_pass` and issuer NKey via `harmony_secret` (regenerate-every-run footgun) + +### Refactors (deferred, non-blocking) + +- [ ] Decompose `FleetServerScore` into independent, ConfigClient-glued Scores +- [ ] Move `harmony/modules/fleet/` → `fleet/harmony-fleet/` (ADR-021 pending) +- [ ] Delete `examples/fleet_staging_deploy` (superseded by `fleet_staging_install`) +- [ ] Drop `K8sAnywhereTopology` for ad-hoc Score execution; introduce `K8sBareTopology` + +## Principles (carried forward) + +- No yaml in framework code paths +- Scores describe desired state; topologies expose capabilities +- Cross-boundary wire types in `harmony-reconciler-contracts` +- Never ship untested code +- Prove claims about upstream before blaming upstream +- Design the brick before moving the brick diff --git a/fleet/harmony-fleet-deploy/src/lib.rs b/fleet/harmony-fleet-deploy/src/lib.rs index 0cef22c3..445fb3e4 100644 --- a/fleet/harmony-fleet-deploy/src/lib.rs +++ b/fleet/harmony-fleet-deploy/src/lib.rs @@ -40,5 +40,5 @@ pub use companion::AgentObservation; pub use nats::{FleetNatsScore, UserPassCredentials}; pub use operator::{FleetOperatorScore, OperatorCredentials, PublishedChart}; pub use release::{release_operator, version_from_tag}; -pub use secrets::FleetDeploySecrets; +pub use secrets::{FleetDeployConfig, FleetDeploySecrets}; pub use server::FleetServerScore; diff --git a/fleet/harmony-fleet-deploy/src/main.rs b/fleet/harmony-fleet-deploy/src/main.rs index 8d449dd0..86994420 100644 --- a/fleet/harmony-fleet-deploy/src/main.rs +++ b/fleet/harmony-fleet-deploy/src/main.rs @@ -15,7 +15,7 @@ use harmony::inventory::Inventory; use harmony::topology::K8sAnywhereTopology; use harmony_cli::Args as HarmonyCliArgs; use harmony_config::ConfigClient; -use harmony_fleet_deploy::{FleetDeploySecrets, FleetOperatorScore, version_from_tag}; +use harmony_fleet_deploy::{FleetDeployConfig, FleetDeploySecrets, FleetOperatorScore, version_from_tag}; #[derive(Parser, Debug)] #[command( @@ -23,12 +23,9 @@ use harmony_fleet_deploy::{FleetDeploySecrets, FleetOperatorScore, version_from_ about = "Deploy the published harmony fleet operator chart" )] struct CliConfig { - #[arg( - long, - env = "HARMONY_FLEET_NAMESPACE", - default_value = "harmony-fleet-system" - )] - namespace: String, + /// Override the k8s namespace from config (e.g. `fleet-staging`). + #[arg(long, env = "HARMONY_FLEET_NAMESPACE")] + namespace: Option, /// Release tag to deploy (e.g. `harmony-fleet-operator-v0.0.2`); the /// version is parsed from it in Rust so the workflow passes a tag and @@ -40,22 +37,16 @@ struct CliConfig { #[arg(long, env = "HARMONY_FLEET_OPERATOR_CHART_VERSION")] operator_chart_version: Option, - #[arg( - long, - env = "HARMONY_FLEET_OPERATOR_CHART_REGISTRY", - default_value = "hub.nationtech.io" - )] - operator_chart_registry: String, + /// Override the OCI chart registry from config. + #[arg(long, env = "HARMONY_FLEET_OPERATOR_CHART_REGISTRY")] + operator_chart_registry: Option, - #[arg( - long, - env = "HARMONY_FLEET_OPERATOR_CHART_PROJECT", - default_value = "harmony" - )] - operator_chart_project: String, + /// Override the OCI chart project from config. + #[arg(long, env = "HARMONY_FLEET_OPERATOR_CHART_PROJECT")] + operator_chart_project: Option, - /// Config namespace `FleetDeploySecrets` resolves under (Env → OpenBao). - #[arg(long, env = "HARMONY_SECRET_NAMESPACE", default_value = "harmony")] + /// Config namespace `FleetDeploySecrets` and `FleetDeployConfig` resolve under (Env → OpenBao). + #[arg(long, env = "HARMONY_SECRET_NAMESPACE", default_value = "fleet-staging")] config_namespace: String, #[command(flatten)] @@ -77,12 +68,18 @@ async fn main() -> Result<()> { let cli = CliConfig::parse(); let version = cli.chart_version()?; - let secrets: FleetDeploySecrets = ConfigClient::for_namespace(&cli.config_namespace) - .await + let config_client = ConfigClient::for_namespace(&cli.config_namespace).await; + + let secrets: FleetDeploySecrets = config_client .get() .await .context("loading FleetDeploySecrets (set HARMONY_CONFIG_FleetDeploySecrets or OpenBao)")?; + let config: FleetDeployConfig = config_client + .get_or_prompt() + .await + .context("loading FleetDeployConfig")?; + // Point KUBECONFIG at the scoped deployer credential before the // topology reads it, so the runner pod needs no standing permissions. // Held to end of scope so the tempfile outlives the deploy. @@ -98,14 +95,15 @@ async fn main() -> Result<()> { None => None, }; + let namespace = cli.namespace.unwrap_or(config.namespace); + let registry = cli.operator_chart_registry.unwrap_or(config.operator_chart_registry); + let project = cli.operator_chart_project.unwrap_or(config.operator_chart_project); + let operator = FleetOperatorScore::new() - .namespace(cli.namespace) + .namespace(namespace) + .nats_url(config.nats_url) .credentials(secrets.operator_credentials_toml) - .published_chart( - cli.operator_chart_registry, - cli.operator_chart_project, - version, - ); + .published_chart(registry, project, version); harmony_cli::run( Inventory::autoload(), diff --git a/fleet/harmony-fleet-deploy/src/secrets.rs b/fleet/harmony-fleet-deploy/src/secrets.rs index 54126774..ec67147e 100644 --- a/fleet/harmony-fleet-deploy/src/secrets.rs +++ b/fleet/harmony-fleet-deploy/src/secrets.rs @@ -1,4 +1,4 @@ -//! Secrets for the published-chart (CD) operator deploy, via +//! Secrets and config for the published-chart (CD) operator deploy, via //! [`harmony_config::ConfigClient`]. SSO-only by construction: no //! user/pass field exists, so dev-only user/pass auth can't reach a prod //! deploy. Resolved EnvSource → OpenBao, so the in-cluster runner pulls @@ -24,6 +24,42 @@ pub struct FleetDeploySecrets { pub kubeconfig: Option, } +/// Non-secret deploy config: k8s namespaces + chart coords. Loaded via +/// `ConfigClient::for_namespace("fleet-staging")` alongside `FleetDeploySecrets`. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Config)] +pub struct FleetDeployConfig { + /// K8s namespace where the operator, NATS, and callout live. + pub namespace: String, + + /// Full NATS URL the operator connects to (e.g. `nats://fleet-nats.fleet-staging:4222`). + pub nats_url: String, + + /// K8s namespace where Zitadel lives (for operator UI SSO). + pub zitadel_namespace: String, + + /// K8s namespace where OpenBao lives (for operator secret fetching). + pub openbao_namespace: String, + + /// OCI chart registry (e.g. `hub.nationtech.io`). + pub operator_chart_registry: String, + + /// OCI chart project (e.g. `harmony`). + pub operator_chart_project: String, +} + +impl Default for FleetDeployConfig { + fn default() -> Self { + Self { + namespace: "fleet-staging".to_string(), + nats_url: "nats://fleet-nats.fleet-staging:4222".to_string(), + zitadel_namespace: "zitadel-staging".to_string(), + openbao_namespace: "openbao-staging".to_string(), + operator_chart_registry: "hub.nationtech.io".to_string(), + operator_chart_project: "harmony".to_string(), + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -37,4 +73,17 @@ mod tests { // Secrets must never land in cleartext SQLite. assert_eq!(FleetDeploySecrets::CLASS, ConfigClass::Secret); } + + #[test] + fn config_class_is_not_secret() { + assert_eq!(FleetDeployConfig::CLASS, ConfigClass::Standard); + } + + #[test] + fn config_defaults() { + let c = FleetDeployConfig::default(); + assert_eq!(c.namespace, "fleet-staging"); + assert_eq!(c.zitadel_namespace, "zitadel-staging"); + assert_eq!(c.openbao_namespace, "openbao-staging"); + } } -- 2.39.5 From f7299ebe2bff2a267e7632c290a97424a8f5137d Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sun, 31 May 2026 09:13:39 -0400 Subject: [PATCH 09/14] refactor(fleet-deploy): rename HARMONY_SECRET_NAMESPACE to HARMONY_CONFIG_NAMESPACE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The env var name was a misnomer — ConfigClient resolves both config and secrets, not just secrets. The struct field was already config_namespace. Legacy SecretManager keeps the old var; this forces migration to ConfigClient for new code. --- ROADMAP/fleet_platform/v0_3_plan.md | 12 +++++------- fleet/harmony-fleet-deploy/src/main.rs | 18 ++++++++++++++---- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/ROADMAP/fleet_platform/v0_3_plan.md b/ROADMAP/fleet_platform/v0_3_plan.md index d241ef40..5aa5917e 100644 --- a/ROADMAP/fleet_platform/v0_3_plan.md +++ b/ROADMAP/fleet_platform/v0_3_plan.md @@ -9,7 +9,7 @@ Written 2026-05-31. Picks up after OpenBao + Zitadel + NATS + callout + operator - [x] NATS + auth callout deployed in `fleet-staging` namespace - [x] Operator deployed (older version, 2-3 weeks old) - [x] Config-driven OpenBao installer (`examples/openbao`) -- [x] `harmony-fleet-deploy` binary reads `FleetDeploySecrets` from OpenBao +- [x] `harmony-fleet-deploy` binary reads `FleetDeployConfig` + `FleetDeploySecrets` from OpenBao ## Immediate next steps @@ -34,8 +34,7 @@ Written 2026-05-31. Picks up after OpenBao + Zitadel + NATS + callout + operator - [ ] Create `.envrc` with minimal env: ```bash export OPENBAO_URL=https://secrets-stg.cb1.nationtech.io - export HARMONY_SECRET_NAMESPACE=fleet-staging - export HARMONY_FLEET_NAMESPACE=fleet-staging + export HARMONY_CONFIG_NAMESPACE=fleet-staging # export OPENBAO_TOKEN= ``` - [ ] Write deploy invocation (shell script or just `harmony-fleet-deploy` call): @@ -78,15 +77,14 @@ Written 2026-05-31. Picks up after OpenBao + Zitadel + NATS + callout + operator ```bash OPENBAO_URL=https://secrets-stg.cb1.nationtech.io -HARMONY_SECRET_NAMESPACE=fleet-staging +HARMONY_CONFIG_NAMESPACE=fleet-staging # SSO auth or root token (SSO is the goal) ``` ### OpenBao (read via ConfigClient) +- `FleetDeployConfig` (k8s namespaces, NATS URL, chart coords) at `secret/data/fleet-staging/FleetDeployConfig` - `FleetDeploySecrets` (operator creds) at `secret/data/fleet-staging/FleetDeploySecrets` -- k8s namespaces (`fleet-staging`, `zitadel-staging`, `openbao-staging`) as config values -- Chart registry/project coords as config values ## Missing features (post-UI) @@ -95,7 +93,7 @@ HARMONY_SECRET_NAMESPACE=fleet-staging - [ ] Per-device OpenBao policies (templated policies, one role per device type) - [ ] Device identity claim in JWT (Zitadel `client_id` with `device-` prefix) - [ ] OpenBao JWT auth role granularity (extend `OpenbaoJwtAuth` to list of roles) -- [ ] Move k8s namespaces + chart coords into `ConfigClient` config struct (env = only identifier + auth) +- [x] Move k8s namespaces + chart coords into `ConfigClient` config struct (env = only identifier + auth) ### Operator capabilities diff --git a/fleet/harmony-fleet-deploy/src/main.rs b/fleet/harmony-fleet-deploy/src/main.rs index 86994420..f0afc9a7 100644 --- a/fleet/harmony-fleet-deploy/src/main.rs +++ b/fleet/harmony-fleet-deploy/src/main.rs @@ -15,7 +15,9 @@ use harmony::inventory::Inventory; use harmony::topology::K8sAnywhereTopology; use harmony_cli::Args as HarmonyCliArgs; use harmony_config::ConfigClient; -use harmony_fleet_deploy::{FleetDeployConfig, FleetDeploySecrets, FleetOperatorScore, version_from_tag}; +use harmony_fleet_deploy::{ + FleetDeployConfig, FleetDeploySecrets, FleetOperatorScore, version_from_tag, +}; #[derive(Parser, Debug)] #[command( @@ -46,7 +48,11 @@ struct CliConfig { operator_chart_project: Option, /// Config namespace `FleetDeploySecrets` and `FleetDeployConfig` resolve under (Env → OpenBao). - #[arg(long, env = "HARMONY_SECRET_NAMESPACE", default_value = "fleet-staging")] + #[arg( + long, + env = "HARMONY_CONFIG_NAMESPACE", + default_value = "fleet-staging" + )] config_namespace: String, #[command(flatten)] @@ -96,8 +102,12 @@ async fn main() -> Result<()> { }; let namespace = cli.namespace.unwrap_or(config.namespace); - let registry = cli.operator_chart_registry.unwrap_or(config.operator_chart_registry); - let project = cli.operator_chart_project.unwrap_or(config.operator_chart_project); + let registry = cli + .operator_chart_registry + .unwrap_or(config.operator_chart_registry); + let project = cli + .operator_chart_project + .unwrap_or(config.operator_chart_project); let operator = FleetOperatorScore::new() .namespace(namespace) -- 2.39.5 From 2e9052b21702fb177c100bbd11cae1191b597d60 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sun, 31 May 2026 10:12:54 -0400 Subject: [PATCH 10/14] fix(openbao): remove extra blank line in example Pre-existing formatting issue caught by cargo fmt --check. --- examples/openbao/src/main.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/openbao/src/main.rs b/examples/openbao/src/main.rs index c539e2f8..f5824b3e 100644 --- a/examples/openbao/src/main.rs +++ b/examples/openbao/src/main.rs @@ -84,7 +84,6 @@ async fn main() -> Result<()> { "host must be set (e.g. secrets-stg.)" ); - let instance = OpenbaoInstance { namespace: cfg.namespace.clone(), release: cfg.release.clone(), -- 2.39.5 From f2ecccb4ab62dfc4cdf49c066c7b4be1a76b1662 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sun, 31 May 2026 12:32:19 -0400 Subject: [PATCH 11/14] refactor(fleet-deploy): rename harmony-fleet-release to harmony-fleet-publish Deploy/publish wording is more intuitive than deploy/release. --- .gitea/workflows/harmony-fleet-operator.yaml | 4 ++-- fleet/deployment-process.md | 4 ++-- fleet/harmony-fleet-deploy/Cargo.toml | 6 +++--- .../{harmony-fleet-release.rs => harmony-fleet-publish.rs} | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) rename fleet/harmony-fleet-deploy/src/bin/{harmony-fleet-release.rs => harmony-fleet-publish.rs} (91%) diff --git a/.gitea/workflows/harmony-fleet-operator.yaml b/.gitea/workflows/harmony-fleet-operator.yaml index 674b2542..c900d095 100644 --- a/.gitea/workflows/harmony-fleet-operator.yaml +++ b/.gitea/workflows/harmony-fleet-operator.yaml @@ -4,7 +4,7 @@ name: harmony-fleet-operator — release # published chart is `harmony apply` # (harmony-fleet-deploy --operator-chart-version), run manually today; a # CD job lands once the cluster KUBECONFIG + NATS secrets are provisioned. -# Tag parsing lives in Rust (harmony-fleet-release), not in YAML. +# Tag parsing lives in Rust (harmony-fleet-publish), not in YAML. on: push: tags: @@ -48,4 +48,4 @@ jobs: - name: Build + push image and chart env: TAG: ${{ inputs.tag || github.ref_name }} - run: cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-release -- --from-tag "$TAG" + run: cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-publish -- --from-tag "$TAG" diff --git a/fleet/deployment-process.md b/fleet/deployment-process.md index 70e6e115..2b125e63 100644 --- a/fleet/deployment-process.md +++ b/fleet/deployment-process.md @@ -20,11 +20,11 @@ Laptop fallback (does exactly what the workflow's job does): ```sh # docker + helm must be logged in to hub.nationtech.io first. -cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-release -- \ +cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-publish -- \ --from-tag harmony-fleet-operator-v0.0.2 # build + package only, no push (local k3d smoke-test): -cargo run -p harmony-fleet-deploy --bin harmony-fleet-release -- \ +cargo run -p harmony-fleet-deploy --bin harmony-fleet-publish -- \ --from-tag harmony-fleet-operator-v0.0.2 --no-push ``` diff --git a/fleet/harmony-fleet-deploy/Cargo.toml b/fleet/harmony-fleet-deploy/Cargo.toml index fb855a03..1c740e33 100644 --- a/fleet/harmony-fleet-deploy/Cargo.toml +++ b/fleet/harmony-fleet-deploy/Cargo.toml @@ -15,11 +15,11 @@ path = "src/lib.rs" name = "harmony-fleet-deploy" path = "src/main.rs" -# `harmony-fleet-release --from-tag ` builds + publishes the +# `harmony-fleet-publish --from-tag ` builds + publishes the # operator's image + chart for a release. [[bin]] -name = "harmony-fleet-release" -path = "src/bin/harmony-fleet-release.rs" +name = "harmony-fleet-publish" +path = "src/bin/harmony-fleet-publish.rs" [dependencies] harmony = { path = "../../harmony", features = ["podman"] } diff --git a/fleet/harmony-fleet-deploy/src/bin/harmony-fleet-release.rs b/fleet/harmony-fleet-deploy/src/bin/harmony-fleet-publish.rs similarity index 91% rename from fleet/harmony-fleet-deploy/src/bin/harmony-fleet-release.rs rename to fleet/harmony-fleet-deploy/src/bin/harmony-fleet-publish.rs index a49b7a7c..00298cec 100644 --- a/fleet/harmony-fleet-deploy/src/bin/harmony-fleet-release.rs +++ b/fleet/harmony-fleet-deploy/src/bin/harmony-fleet-publish.rs @@ -1,4 +1,4 @@ -//! `harmony-fleet-release` — build + publish the operator image + chart +//! `harmony-fleet-publish` — build + publish the operator image + chart //! for a tagged release. `docker` / `helm` must be on PATH and logged in //! to the registry (CI's login actions; dev's manual login). @@ -8,7 +8,7 @@ use harmony_fleet_deploy::release::{release_operator, version_from_tag}; #[derive(Parser, Debug)] #[command( - name = "harmony-fleet-release", + name = "harmony-fleet-publish", about = "Build + publish the operator image + chart for a tagged release" )] struct Cli { -- 2.39.5 From edb62668b6f5d0a6aa1d109daac62433a93a0bd3 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Sun, 31 May 2026 12:56:36 -0400 Subject: [PATCH 12/14] doc: Roadmap entry for cli design and implementation --- ROADMAP/13-unified-cli.md | 281 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 281 insertions(+) create mode 100644 ROADMAP/13-unified-cli.md diff --git a/ROADMAP/13-unified-cli.md b/ROADMAP/13-unified-cli.md new file mode 100644 index 00000000..2241430d --- /dev/null +++ b/ROADMAP/13-unified-cli.md @@ -0,0 +1,281 @@ +# Phase 13: Unified CLI — Extensible, Composable, Subcommand-Driven + +## Goal + +Replace the current landscape of disconnected `harmony-*` binaries and +60+ example `main.rs` files with a single, extensible CLI where: + +- The framework provides global concerns (config, SSO, topology selection, + Score runner, TUI) as shared infrastructure. +- Each module (fleet, tenant, okd, …) registers its own subcommands. +- Third-party `MyAppScore` authors get `harmony myapp deploy` with zero + framework boilerplate. + +The CLI is the user-facing surface of Harmony. Every design decision here +shapes the developer experience for the entire ecosystem. + +## Current State + +- `harmony_cli::Args` — flat Score-runner flags (`--yes`, `--filter`, + `--list`, `--number`, `--interactive`). Drives the Maestro loop over + a `Vec`. +- `harmony_cli::run(Inventory, Topology, Vec, Option)` — + the single entry point consumed by 60+ example binaries. +- `harmony_tui::run()` — separate crate, separate `run()`, same inputs. +- `harmony-fleet-deploy` — deploy binary with `deploy`/`publish` + subcommands (just merged from two separate binaries). +- `harmony_composer` — infrastructure composition tool, separate binary. +- ADR-023 principle 8 describes the staged evolution (B → C) but defers + the plugin protocol. + +## Design + +### Top-level binary with subcommands + +``` +harmony [global flags] [action flags] + +harmony --config-namespace fleet-staging fleet deploy --from-tag v0.1.0 +harmony --config-namespace fleet-staging fleet publish --from-tag v0.1.0 --no-push +harmony --config-namespace okd-staging okd bootstrap +harmony --config-namespace tenant-c1 tenant create --name c1 +harmony --config-namespace harmony myapp deploy --image foo:latest +``` + +Global flags (owned by the top-level binary): +- `--config-namespace` — maps to `ConfigClient::for_namespace()` +- `--kubeconfig` — topology selection +- `--topology` — explicit topology choice (k3d, okd, bare, …) +- `--yes` — skip confirmation prompts +- `--interactive` — delegate to TUI + +Module subcommands (owned by each module): +- `fleet deploy`, `fleet publish` +- `tenant create`, `tenant list`, `tenant health`, `tenant install` +- `okd bootstrap`, `okd add-node` +- User-defined: `myapp deploy`, `myapp publish`, … + +### Two kinds of subcommands + +**Score-runner subcommands** — compose multiple Scores, need +`--filter`/`--list`/`--number`. Examples, ad-hoc orchestration, the +current `harmony_cli::run()` use case. The Maestro loop lives here. + +**Action subcommands** — single-purpose (deploy a chart, publish an +image, create a tenant). No filter/list/number. Run one Score or a +fixed composition. + +The distinction matters: forcing action subcommands through the +filter/list/number machinery is ceremony; forcing Score-runner +subcommands into a rigid single-action shape is constraining. + +### Deploy crates become library-only + +Per ADR-023 principle 5, deploy logic lives in `*-deploy` crates. The +unified CLI absorbs the **binaries** — deploy crates lose their +`[[bin]]` entries and become libraries consumed by the top-level +`harmony` binary. The crate boundary stays; the binary boundary goes +away. + +``` +harmony-fleet-deploy/ + Cargo.toml # [lib] only, no [[bin]] + src/ + lib.rs # FleetDeployConfig, FleetDeploySecrets, FleetOperatorScore + commands.rs # DeployCommand, PublishCommand (clap Subcommand structs) +``` + +The top-level `harmony` binary imports `harmony_fleet_deploy::commands` +and wires them into its own `Command` enum. + +### Publish logic as Scores + +Build/push logic (currently imperative `Command::new("docker")` in +`harmony-fleet-publish`) should be encapsulated in Scores, following +the `Application` trait + feature composition pattern +(`examples/try_rust_webapp` + `PackagingDeployment`). The publish +subcommand becomes a thin CLI wrapper over a Score composition, not +a shell-out script. + +This is not `PackagingDeployment` specifically — the operator isn't a +`RustWebapp`. The pattern is the **`Application` trait + feature +composition** model: a typed application description with composable +features (build, push, deploy, monitor). + +### Plugin discovery (stage C, deferred) + +ADR-023 principle 8 envisions `harmony` discovering `harmony-*` +binaries on `$PATH` (kubectl-style). This is the third-party +extensibility story: a `MyAppScore` author ships a `harmony-myapp` +binary, and `harmony myapp deploy` works without rebuilding the +framework. + +**Open question**: is the end state a monolithic binary with +composable subcommands (first-party modules compiled in), or +kubectl-style plugin discovery for everything? Likely both: +first-party modules are compiled-in subcommands (tighter integration, +shared types), third-party modules are discovered plugins (loose +coupling, separate release cycles). The protocol for plugin +communication (env vars, stdin JSON, exit codes) is a separate design +effort. + +### TUI integration + +`harmony_tui` is a separate crate with its own `run()`. The unified +CLI's `--interactive` global flag delegates to `harmony_tui::run()` +for Score-runner subcommands. Action subcommands may or may not have +TUI equivalents — that's per-subcommand, not global. + +### `harmony_composer` + +Stays separate for now. It's an infrastructure composition tool with +a different audience (platform engineers building topologies, not +operators deploying apps). May become `harmony compose` later if the +use cases converge. + +## Tasks + +### 13.1 Rewrite `harmony_cli` — subcommand-aware runner + +Replace the flat `Args` struct with a subcommand-aware `Cli` struct. +Global flags move to the top level. The `run()` function accepts a +`Command` enum instead of `Option`. + +```rust +#[derive(Parser)] +struct Cli { + #[arg(long, env = "HARMONY_CONFIG_NAMESPACE", global = true)] + config_namespace: String, + + #[arg(long, global = true)] + kubeconfig: Option, + + #[arg(long, global = true)] + yes: bool, + + #[command(subcommand)] + command: Command, +} +``` + +**Files**: `harmony_cli/src/lib.rs`, `harmony_cli/src/args.rs` (new) +**Blocked by**: Phase 02 (config migration — so the new CLI is born +on `harmony_config`, not retrofitted) +**Blocks**: 13.2, 13.3 + +### 13.2 Migrate one deploy binary to subcommand pattern + +Proof of concept: `harmony-fleet-deploy` already has `deploy`/`publish` +subcommands. Migrate it to the new `harmony_cli` runner: deploy crate +becomes library-only, exports `Command` enum, top-level binary wires +it in. + +**Files**: `fleet/harmony-fleet-deploy/`, new top-level `harmony` binary +**Blocked by**: 13.1 + +### 13.3 Migrate examples + +Each of the 60+ examples currently calls `harmony_cli::run()` with +flat args. Migration: each example becomes a subcommand of the +top-level `harmony` binary, or stays as a standalone binary that +imports the new `harmony_cli` runner. + +**Migration shape** (before/after): + +```rust +// Before (standalone binary) +fn main() { + harmony_cli::run(Inventory::autoload(), topology, scores, None).await; +} + +// After (subcommand of top-level binary) +// In the example's crate: +pub struct MyExampleCommand { /* clap args */ } +impl Subcommand for MyExampleCommand { ... } + +// In the top-level binary: +enum Command { + MyExample(MyExampleCommand), + Fleet(FleetCommand), + ... +} +``` + +**Files**: 60+ example crates +**Blocked by**: 13.2 (prove the pattern works on one) + +### 13.4 Publish-as-Score + +Extract build/push logic from `harmony-fleet-publish` into Scores +following the `Application` trait + feature composition pattern. +The `publish` subcommand becomes a thin wrapper. + +**Files**: `harmony/src/modules/application/` (extend), `fleet/harmony-fleet-deploy/` +**Blocked by**: 13.2 + +### 13.5 Topology selection in the CLI + +Global `--topology` flag or auto-detection. Requires Phase 12.6 +(topology proliferation / `K8sBareTopology`) to land first — the +CLI's topology selection is simpler if the topology landscape is +clean. + +**Blocked by**: Phase 12.6 + +### 13.6 Plugin discovery protocol (stage C) + +Design the protocol for third-party `harmony-*` binaries to +communicate with the top-level `harmony` binary. Env vars for +global args? stdin JSON? Exit codes for outcomes? + +**Status**: Research + ADR first. No implementation until the +protocol is locked. +**Blocked by**: 13.5 (first-party subcommands working end-to-end) + +## Dependencies + +``` +Phase 02 (config migration) ──→ 13.1 (CLI rewrite) +Phase 12.6 (topology cleanup) ──→ 13.5 (topology selection) +13.1 ──→ 13.2 (fleet-deploy migration) +13.2 ──→ 13.3 (example migration) +13.2 ──→ 13.4 (publish-as-Score) +13.5 ──→ 13.6 (plugin discovery) +``` + +Phase 11 (named config instances) can land after the CLI rewrite — +the global `--config-namespace` flag maps directly to +`ConfigClient::for_namespace()`, and named instances +(`get_named::("fw-primary")`) become a CLI concern too. + +## ADR-023 Tensions + +These need resolution during implementation: + +1. **Principle 5 vs. absorbing binaries.** Deploy crates keep their + crate boundary (library + Scores) but lose their `[[bin]]`. The + unified binary is the sole entry point. This is a refinement of + principle 5, not a violation — the deploy logic still lives in + the deploy crate. + +2. **Principle 8 monolith vs. plugin.** First-party modules are + compiled-in subcommands. Third-party modules are discovered + plugins. The boundary between "first-party" and "third-party" + needs a clear doctrine (likely: anything in the harmony repo is + first-party; everything else is a plugin). + +3. **`harmony_composer` placement.** Stays separate for now. If the + use cases converge with the unified CLI, it becomes `harmony + compose`. Not a blocker. + +## References + +- ADR-023 principle 8 — CLI: hybrid, staged (B → C) +- ADR-023 principle 5 — deploy logic in `*-deploy` crates +- ADR draft 024 §Q5 — runtime tools in the dependency graph +- `examples/try_rust_webapp` — `Application` trait + feature composition +- `harmony/src/modules/application/features/packaging_deployment.rs` — + build/push as a Score feature +- Phase 02 — config migration (prerequisite) +- Phase 11 — named config instances (parallel) +- Phase 12.6 — topology proliferation (prerequisite for 13.5) -- 2.39.5 From 12c8d9cfa0f78542b22d3630190354f35d22136d Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Mon, 1 Jun 2026 11:12:23 -0400 Subject: [PATCH 13/14] feat: Init cli logger in fleet deploy --- fleet/harmony-fleet-deploy/src/main.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fleet/harmony-fleet-deploy/src/main.rs b/fleet/harmony-fleet-deploy/src/main.rs index f0afc9a7..ed69d401 100644 --- a/fleet/harmony-fleet-deploy/src/main.rs +++ b/fleet/harmony-fleet-deploy/src/main.rs @@ -18,6 +18,7 @@ use harmony_config::ConfigClient; use harmony_fleet_deploy::{ FleetDeployConfig, FleetDeploySecrets, FleetOperatorScore, version_from_tag, }; +use tracing::info; #[derive(Parser, Debug)] #[command( @@ -71,6 +72,7 @@ impl CliConfig { #[tokio::main] async fn main() -> Result<()> { + harmony_cli::cli_logger::init(); let cli = CliConfig::parse(); let version = cli.chart_version()?; -- 2.39.5 From e7148aa85f6b7c8c463a5967fcab753f07144af4 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Mon, 1 Jun 2026 11:35:15 -0400 Subject: [PATCH 14/14] fix: fleet operator chart name was conflicting with the container name. Append -chart to the chart name --- fleet/deployment-process.md | 2 +- fleet/harmony-fleet-deploy/src/operator/chart.rs | 5 +++-- fleet/harmony-fleet-deploy/src/operator/score.rs | 4 ++-- fleet/harmony-fleet-deploy/src/release.rs | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fleet/deployment-process.md b/fleet/deployment-process.md index 2b125e63..767a7fac 100644 --- a/fleet/deployment-process.md +++ b/fleet/deployment-process.md @@ -42,7 +42,7 @@ harmony-fleet-deploy --filter FleetOperatorScore \ ``` It installs the published -`oci://hub.nationtech.io/harmony/harmony-fleet-operator:` chart; +`oci://hub.nationtech.io/harmony/harmony-fleet-operator-chart:` chart; the version is parsed from the tag in Rust (the tag is the only source of truth). Same command bootstraps and upgrades; re-running the same tag is a no-op. Auth is Zitadel-SSO-only: the operator gets its zitadel-jwt diff --git a/fleet/harmony-fleet-deploy/src/operator/chart.rs b/fleet/harmony-fleet-deploy/src/operator/chart.rs index c022cef2..1befead0 100644 --- a/fleet/harmony-fleet-deploy/src/operator/chart.rs +++ b/fleet/harmony-fleet-deploy/src/operator/chart.rs @@ -66,7 +66,7 @@ pub struct ChartOptions { /// to the deploy crate's `CARGO_PKG_VERSION` — fine for in-process /// uses (e2e harness, runtime operator Score). The release binary /// sets this to the released tag so the OCI chart artifact lands - /// at `…/harmony-fleet-operator:` matching the image tag. + /// at `…/harmony-fleet-operator-chart:` matching the image tag. pub chart_version: Option, } @@ -136,6 +136,7 @@ impl Default for ChartOptions { } } +pub const CHART_NAME: &str = "harmony-fleet-operator-chart"; pub const RELEASE_NAME: &str = "harmony-fleet-operator"; pub const SERVICE_ACCOUNT: &str = "harmony-fleet-operator"; pub const CLUSTER_ROLE: &str = "harmony-fleet-operator"; @@ -161,7 +162,7 @@ pub fn build_chart(opts: &ChartOptions) -> Result { .chart_version .clone() .unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string()); - let mut chart = HelmChart::new(RELEASE_NAME.to_string(), chart_version.clone()); + let mut chart = HelmChart::new(CHART_NAME.to_string(), chart_version.clone()); chart.version = chart_version; chart.description = "IoT operator — Deployment CRD → NATS KV".to_string(); diff --git a/fleet/harmony-fleet-deploy/src/operator/score.rs b/fleet/harmony-fleet-deploy/src/operator/score.rs index cc60dd61..bef47eef 100644 --- a/fleet/harmony-fleet-deploy/src/operator/score.rs +++ b/fleet/harmony-fleet-deploy/src/operator/score.rs @@ -43,7 +43,7 @@ use crate::operator::chart::{ChartOptions, OperatorCredentials, build_chart, ope /// The already-published OCI chart to install (the CD `harmony apply` /// path). When set, the operator installs -/// `oci://{registry}/{project}/harmony-fleet-operator:{version}` and the +/// `oci://{registry}/{project}/harmony-fleet-operator-chart:{version}` and the /// score's `image` is ignored (the image is baked into the chart). #[derive(Debug, Clone, Serialize)] pub struct PublishedChart { @@ -205,7 +205,7 @@ impl Interpret for FleetOperatorInterp // branch runs its own install so the local tempdir stays alive // across it. let helm_outcome = if let Some(p) = &self.score.published_chart { - let chart_ref = format!("oci://{}/{}/{}", p.registry, p.project, chart::RELEASE_NAME); + let chart_ref = format!("oci://{}/{}/{}", p.registry, p.project, chart::CHART_NAME); info!( "Installing helm release '{}' from published chart {chart_ref} version {}", self.score.release_name, p.version diff --git a/fleet/harmony-fleet-deploy/src/release.rs b/fleet/harmony-fleet-deploy/src/release.rs index a7fabcaa..5cf766dd 100644 --- a/fleet/harmony-fleet-deploy/src/release.rs +++ b/fleet/harmony-fleet-deploy/src/release.rs @@ -11,7 +11,7 @@ use std::process::Command; use anyhow::{Context, Result, bail}; -use crate::operator::chart::{ChartOptions, build_chart}; +use crate::operator::chart::{CHART_NAME, ChartOptions, build_chart}; const TAG_PREFIX: &str = "harmony-fleet-operator-"; const IMAGE_NAME: &str = "harmony-fleet-operator"; @@ -59,7 +59,7 @@ pub fn release_operator(version: &str, registry: &str, project: &str, push: bool run("helm", &["push", path_str(&tgz)?, &oci_repo])?; } - log::info!("released image={image} chart={oci_repo}/{IMAGE_NAME}:{version} pushed={push}"); + log::info!("released image={image} chart={oci_repo}/{CHART_NAME}:{version} pushed={push}"); Ok(()) } -- 2.39.5