From d687b29f3558517c7ee30fab2d3920db9d4fab1b Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Fri, 29 May 2026 16:07:01 -0400 Subject: [PATCH 1/3] docs(fleet): manual staging deploy is interim; drop premature CD workflow Full in-cluster CD is blocked on headless OpenBao auth (Zitadel machine identity), so the clickable deploy-staging workflow + its runner would be dead config. Drop it; document the manual operator deploy (same secure OpenBao-config path) until the auth flow lands. --- .../harmony-fleet-operator-deploy.yaml | 45 ----------- fleet/deployment-process.md | 81 +++++++------------ 2 files changed, 27 insertions(+), 99 deletions(-) delete mode 100644 .gitea/workflows/harmony-fleet-operator-deploy.yaml diff --git a/.gitea/workflows/harmony-fleet-operator-deploy.yaml b/.gitea/workflows/harmony-fleet-operator-deploy.yaml deleted file mode 100644 index fe132247..00000000 --- a/.gitea/workflows/harmony-fleet-operator-deploy.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: harmony-fleet-operator — deploy (staging) -# Manual "deploy" click for a published chart version (the release -# workflow publishes on tag). Runs on the in-cluster, permissionless -# staging runner, which pulls its kube + operator credentials from -# OpenBao via harmony_config. Runner/OpenBao bootstrap: -# fleet/deployment-process.md. -on: - workflow_dispatch: - inputs: - tag: - description: 'Release tag to deploy, e.g. harmony-fleet-operator-v0.0.2' - required: true - type: string - -jobs: - deploy-staging: - container: - image: hub.nationtech.io/harmony/harmony_composer:latest - runs-on: fleet-staging - steps: - - name: Checkout code - uses: actions/checkout@v4 - - # TODO: bake helm into harmony_composer so this step disappears. - - name: Install helm - run: curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash - - - name: Log in to hub.nationtech.io (helm OCI) - run: | - echo "${{ secrets.HUB_BOT_PASSWORD }}" \ - | helm registry login hub.nationtech.io \ - --username "${{ secrets.HUB_BOT_USER }}" --password-stdin - - - name: Deploy published operator chart - env: - # OpenBao auth comes from the runner env (machine-identity PR); - # FleetDeploySecrets is then resolved from OpenBao. - OPENBAO_URL: ${{ secrets.OPENBAO_URL }} - HARMONY_SECRET_NAMESPACE: harmony - run: | - cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-deploy -- \ - --filter FleetOperatorScore \ - --from-tag "${{ inputs.tag }}" \ - --namespace fleet-system \ - --yes diff --git a/fleet/deployment-process.md b/fleet/deployment-process.md index 79dfd7c4..70e6e115 100644 --- a/fleet/deployment-process.md +++ b/fleet/deployment-process.md @@ -28,73 +28,46 @@ cargo run -p harmony-fleet-deploy --bin harmony-fleet-release -- \ --from-tag harmony-fleet-operator-v0.0.2 --no-push ``` -## 2. Deploy a published version to staging (a click) +## 2. Deploy a published version to staging (manual, for now) -The deploy is a manual `workflow_dispatch` — -`.gitea/workflows/harmony-fleet-operator-deploy.yaml` — run after the -release workflow finishes. Enter the **release tag** (e.g. -`harmony-fleet-operator-v0.0.2`); the version is parsed from it in Rust, -so the tag is the single source of truth and YAML never parses it. It -executes on the **in-cluster staging runner** and runs -`harmony-fleet-deploy --filter FleetOperatorScore --from-tag `, -which installs the published -`oci://hub.nationtech.io/harmony/harmony-fleet-operator:` chart -instead of rendering from source. Same command bootstraps and upgrades; -re-running the same tag is a no-op. - -**Auth is Zitadel-SSO-only on this path.** The job carries no plaintext -secrets and the runner pod carries no Kubernetes permissions. It pulls a -`FleetDeploySecrets` config from OpenBao via `harmony_config`: - -- `operator_credentials_toml` — the operator's zitadel-jwt NATS - credentials (applied as the operator Secret before `helm install`), -- `kubeconfig` — the scoped `fleet-deployer` ServiceAccount, so the - permissionless runner authenticates to the API only for this job. - -There is no user/pass on the published-chart path — it's structurally -unreachable. (`--nats-*` flags exist only for the dev/e2e rendered-chart -path in step 1's `--no-push` smoke-test.) - -Laptop fallback (override the config via env instead of OpenBao): +Push to staging is manual until headless OpenBao auth (Zitadel machine +identity) lands; secrets still come from shared OpenBao config. Point at +your staging kube context and OpenBao, then run the operator deploy: ```sh -export HARMONY_CONFIG_FleetDeploySecrets='{"operator_credentials_toml":"…","kubeconfig":"…"}' +export OPENBAO_URL= +export OPENBAO_TOKEN=/*> harmony-fleet-deploy --filter FleetOperatorScore \ - --operator-chart-version 0.0.2 --namespace fleet-system --yes + --from-tag --namespace fleet-staging --yes ``` +It installs the published +`oci://hub.nationtech.io/harmony/harmony-fleet-operator:` chart; +the version is parsed from the tag in Rust (the tag is the only source +of truth). Same command bootstraps and upgrades; re-running the same tag +is a no-op. Auth is Zitadel-SSO-only: the operator gets its zitadel-jwt +`operator_credentials_toml` from `FleetDeploySecrets` in OpenBao (no +user/pass on the published-chart path). For manual deploy, store that +config **without** a `kubeconfig` field so your own kube context is used. + ## 3. Roll forward -Re-run the deploy workflow with a newer (or previous-good) version. -`helm upgrade --install` applies it and fails loudly if convergence -fails — no automatic rollback. Fix the spec, bump, re-run. +Re-run with a newer (or previous-good) tag. `helm upgrade --install` +applies it and fails loudly if convergence fails — no automatic +rollback. Fix the spec, bump, re-run. ## Automated vs. manual | Step | Where | |---|---| | Build + push image + chart on tag | CI (`release` job, on tag) | -| Deploy a published version + roll forward | CI (`deploy-staging`, manual `workflow_dispatch` click) | +| Push to staging + roll forward | Manual (operator runs the deploy) | -## One-time bootstrap (in-cluster staging runner) +## Future: in-cluster CD (blocked on headless OpenBao auth) -`deploy-staging` runs `runs-on: fleet-staging` — a Gitea `act_runner` -provisioned once inside an isolated tenant namespace in the cluster. -This is what makes the API reachable without exposing it publicly or -over the VPN: the runner is already inside. Provision (reproducibly, via -Harmony Scores — not handrolled manifests): - -- **Runner pod** with `automountServiceAccountToken: false` — zero - standing Kubernetes permissions. -- **Egress NetworkPolicy** allowing only the LB IP on :443 (OpenBao / - Zitadel / hub via the DNS alias) plus the API endpoint; deny the rest. -- **`fleet-deployer` ServiceAccount + RBAC** scoped to exactly what the - operator chart installs (its CRDs, ClusterRole, ClusterRoleBinding + - the `fleet-system` namespaced resources). -- **Store the deployer kubeconfig + operator credentials in OpenBao** - under `FleetDeploySecrets` so the job resolves them at runtime. - -Until the Zitadel→OpenBao machine-identity auth lands, the runner's -OpenBao auth env is supplied directly; afterward the runner authenticates -as a Zitadel machine identity. Production-gated promotion is a follow-up -(ADR-012-2). +Once `harmony_config` can authenticate to OpenBao headlessly (Zitadel +machine identity), these exports become a `deploy-staging` workflow on an +in-cluster, permissionless Gitea runner that pulls a `fleet-deployer` +`kubeconfig` + operator credentials from OpenBao at job time (provisioned +via a `TenantScore` with one extra egress CIDR to the OpenBao/Zitadel +ingress). Production-gated promotion is a later step (ADR-012-2). -- 2.39.5 From 1f525cd5d1ac5263217ad46c32286ef6e0f440f2 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Fri, 29 May 2026 17:02:23 -0400 Subject: [PATCH 2/3] feat(openbao): optional cert-manager ingress TLS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenbaoScore.tls_issuer: Some(issuer) adds the cert-manager cluster-issuer annotation + tls block (edge TLS, listener stays plain); None keeps plain HTTP. Option not bool — cert-manager needs the issuer name. Rendering extracted to values() and covered by tests. --- examples/harmony_sso/src/main.rs | 1 + examples/openbao/src/main.rs | 1 + harmony/src/modules/openbao/mod.rs | 77 +++++++++++++++++++++++++----- 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/examples/harmony_sso/src/main.rs b/examples/harmony_sso/src/main.rs index 1fe895b3..4c5c2944 100644 --- a/examples/harmony_sso/src/main.rs +++ b/examples/harmony_sso/src/main.rs @@ -247,6 +247,7 @@ async fn main() -> anyhow::Result<()> { OpenbaoScore { host: OPENBAO_HOST.to_string(), openshift: false, + tls_issuer: None, } .interpret(&Inventory::autoload(), &topology) .await diff --git a/examples/openbao/src/main.rs b/examples/openbao/src/main.rs index adcb5f45..1329c87f 100644 --- a/examples/openbao/src/main.rs +++ b/examples/openbao/src/main.rs @@ -7,6 +7,7 @@ async fn main() { let openbao = OpenbaoScore { host: "openbao.sebastien.sto1.nationtech.io".to_string(), openshift: false, + tls_issuer: None, }; harmony_cli::run( diff --git a/harmony/src/modules/openbao/mod.rs b/harmony/src/modules/openbao/mod.rs index e7e11bc5..80d8eabb 100644 --- a/harmony/src/modules/openbao/mod.rs +++ b/harmony/src/modules/openbao/mod.rs @@ -22,19 +22,31 @@ pub struct OpenbaoScore { /// Set to true when deploying to OpenShift. Defaults to false for k3d/Kubernetes. #[serde(default)] pub openshift: bool, + /// cert-manager `ClusterIssuer` for ingress TLS. `None` serves plain + /// HTTP (TLS terminated elsewhere); `Some` adds the annotation + a + /// `tls` block so cert-manager issues and renews the cert. Carries the + /// issuer name because a bare bool can't address an issuer. + #[serde(default)] + pub tls_issuer: Option, } -impl Score for OpenbaoScore { - fn name(&self) -> String { - "OpenbaoScore".to_string() - } +impl OpenbaoScore { + fn values(&self) -> String { + let Self { + host, + openshift, + tls_issuer, + } = self; + // Edge TLS: the listener stays plain HTTP behind the ingress, which + // terminates with the cert-manager-issued cert. + let ingress_tls = match tls_issuer { + Some(issuer) => format!( + "\n annotations:\n cert-manager.io/cluster-issuer: {issuer}\n tls:\n - hosts: [{host}]\n secretName: openbao-tls" + ), + None => String::new(), + }; - #[doc(hidden)] - fn create_interpret(&self) -> Box> { - let host = &self.host; - let openshift = self.openshift; - - let values_yaml = Some(format!( + format!( r#"global: openshift: {openshift} server: @@ -65,7 +77,7 @@ server: ingress: enabled: true hosts: - - host: {host} + - host: {host}{ingress_tls} dataStorage: enabled: true size: 10Gi @@ -79,15 +91,24 @@ server: accessMode: ReadWriteOnce ui: enabled: true"# - )); + ) + } +} +impl Score for OpenbaoScore { + fn name(&self) -> String { + "OpenbaoScore".to_string() + } + + #[doc(hidden)] + fn create_interpret(&self) -> Box> { HelmChartScore { namespace: Some(NonBlankString::from_str("openbao").unwrap()), release_name: NonBlankString::from_str("openbao").unwrap(), chart_name: NonBlankString::from_str("openbao/openbao").unwrap(), chart_version: None, values_overrides: None, - values_yaml, + values_yaml: Some(self.values()), create_namespace: true, install_only: false, repository: Some(HelmRepository::new( @@ -99,3 +120,33 @@ ui: .create_interpret() } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn no_issuer_renders_plain_ingress() { + let v = OpenbaoScore { + host: "bao.example".into(), + openshift: false, + tls_issuer: None, + } + .values(); + assert!(!v.contains("cert-manager.io/cluster-issuer")); + assert!(!v.contains("secretName")); + } + + #[test] + fn issuer_renders_certmanager_tls_for_host() { + let v = OpenbaoScore { + host: "bao.example".into(), + openshift: false, + tls_issuer: Some("letsencrypt".into()), + } + .values(); + assert!(v.contains("cert-manager.io/cluster-issuer: letsencrypt")); + assert!(v.contains("- hosts: [bao.example]")); + assert!(v.contains("secretName: openbao-tls")); + } +} -- 2.39.5 From 7638611b9f2eca4bf2824b321d4d9f4b2ca94536 Mon Sep 17 00:00:00 2001 From: Jean-Gabriel Gill-Couture Date: Fri, 29 May 2026 17:32:26 -0400 Subject: [PATCH 3/3] refactor(openbao): share OpenbaoInstance across deploy + setup scores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit namespace/release/pod were duplicated as independent literals across OpenbaoScore (hardcoded) and OpenbaoSetupScore (defaults) — pod was a derived fact (`{release}-0`) stored as a literal that rots if release changes, and namespace agreement was by coincidence. Introduce OpenbaoInstance { namespace, release } with a derived pod(); both scores take it. Only the shared identity moves; per-score knobs (host, tls, kv_mount, policies, …) stay on their owner. --- examples/harmony_sso/src/main.rs | 1 + examples/openbao/src/main.rs | 1 + harmony/src/modules/openbao/mod.rs | 40 ++++++++++++++++++- harmony/src/modules/openbao/setup.rs | 59 +++++++++++++--------------- 4 files changed, 68 insertions(+), 33 deletions(-) diff --git a/examples/harmony_sso/src/main.rs b/examples/harmony_sso/src/main.rs index 4c5c2944..96d70766 100644 --- a/examples/harmony_sso/src/main.rs +++ b/examples/harmony_sso/src/main.rs @@ -245,6 +245,7 @@ async fn main() -> anyhow::Result<()> { // Deploy + configure OpenBao (no JWT auth yet -- Zitadel isn't up) cleanup_openbao_webhook(&k8s).await?; OpenbaoScore { + instance: Default::default(), host: OPENBAO_HOST.to_string(), openshift: false, tls_issuer: None, diff --git a/examples/openbao/src/main.rs b/examples/openbao/src/main.rs index 1329c87f..eee92944 100644 --- a/examples/openbao/src/main.rs +++ b/examples/openbao/src/main.rs @@ -5,6 +5,7 @@ use harmony::{ #[tokio::main] async fn main() { let openbao = OpenbaoScore { + instance: Default::default(), host: "openbao.sebastien.sto1.nationtech.io".to_string(), openshift: false, tls_issuer: None, diff --git a/harmony/src/modules/openbao/mod.rs b/harmony/src/modules/openbao/mod.rs index 80d8eabb..6ab454b4 100644 --- a/harmony/src/modules/openbao/mod.rs +++ b/harmony/src/modules/openbao/mod.rs @@ -15,8 +15,41 @@ use crate::{ pub use setup::{OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoSetupScore, OpenbaoUser}; +const DEFAULT_NAMESPACE: &str = "openbao"; +const DEFAULT_RELEASE: &str = "openbao"; + +/// Where one OpenBao instance lives — the single authority both the +/// deploy ([`OpenbaoScore`]) and the setup ([`OpenbaoSetupScore`]) take, +/// so namespace and release can't drift apart. +#[derive(Debug, Clone, Serialize)] +pub struct OpenbaoInstance { + pub namespace: String, + /// Helm release name; the chart names its StatefulSet after it. + pub release: String, +} + +impl OpenbaoInstance { + /// `{release}-0` — the chart deploys a StatefulSet, so a stored pod + /// literal would rot the moment `release` changes. + pub fn pod(&self) -> String { + format!("{}-0", self.release) + } +} + +impl Default for OpenbaoInstance { + fn default() -> Self { + Self { + namespace: DEFAULT_NAMESPACE.to_string(), + release: DEFAULT_RELEASE.to_string(), + } + } +} + #[derive(Debug, Serialize, Clone)] pub struct OpenbaoScore { + /// Where this OpenBao is deployed (namespace + helm release). + #[serde(default)] + pub instance: OpenbaoInstance, /// Host used for external access (ingress) pub host: String, /// Set to true when deploying to OpenShift. Defaults to false for k3d/Kubernetes. @@ -36,6 +69,7 @@ impl OpenbaoScore { host, openshift, tls_issuer, + instance: _, } = self; // Edge TLS: the listener stays plain HTTP behind the ingress, which // terminates with the cert-manager-issued cert. @@ -103,8 +137,8 @@ impl Score for OpenbaoScore { #[doc(hidden)] fn create_interpret(&self) -> Box> { HelmChartScore { - namespace: Some(NonBlankString::from_str("openbao").unwrap()), - release_name: NonBlankString::from_str("openbao").unwrap(), + namespace: Some(NonBlankString::from_str(&self.instance.namespace).unwrap()), + release_name: NonBlankString::from_str(&self.instance.release).unwrap(), chart_name: NonBlankString::from_str("openbao/openbao").unwrap(), chart_version: None, values_overrides: None, @@ -128,6 +162,7 @@ mod tests { #[test] fn no_issuer_renders_plain_ingress() { let v = OpenbaoScore { + instance: Default::default(), host: "bao.example".into(), openshift: false, tls_issuer: None, @@ -140,6 +175,7 @@ mod tests { #[test] fn issuer_renders_certmanager_tls_for_host() { let v = OpenbaoScore { + instance: Default::default(), host: "bao.example".into(), openshift: false, tls_issuer: Some("letsencrypt".into()), diff --git a/harmony/src/modules/openbao/setup.rs b/harmony/src/modules/openbao/setup.rs index 1971ed4b..52f355b3 100644 --- a/harmony/src/modules/openbao/setup.rs +++ b/harmony/src/modules/openbao/setup.rs @@ -13,8 +13,8 @@ use crate::{ }; use harmony_types::id::Id; -const DEFAULT_NAMESPACE: &str = "openbao"; -const DEFAULT_POD: &str = "openbao-0"; +use super::OpenbaoInstance; + const DEFAULT_KV_MOUNT: &str = "secret"; /// A policy to create in OpenBao. @@ -72,13 +72,9 @@ pub struct OpenbaoJwtAuth { /// deployments should use auto-unseal (Transit, cloud KMS, etc.). #[derive(Debug, Clone, Serialize)] pub struct OpenbaoSetupScore { - /// Kubernetes namespace where OpenBao is deployed. - #[serde(default = "default_namespace")] - pub namespace: String, - - /// StatefulSet pod name to exec into. - #[serde(default = "default_pod")] - pub pod: String, + /// Where the target OpenBao is deployed (namespace + release). + #[serde(default)] + pub instance: OpenbaoInstance, /// KV v2 mount path to enable. #[serde(default = "default_kv_mount")] @@ -97,12 +93,6 @@ pub struct OpenbaoSetupScore { pub jwt_auth: Option, } -fn default_namespace() -> String { - DEFAULT_NAMESPACE.to_string() -} -fn default_pod() -> String { - DEFAULT_POD.to_string() -} fn default_kv_mount() -> String { DEFAULT_KV_MOUNT.to_string() } @@ -110,8 +100,7 @@ fn default_kv_mount() -> String { impl Default for OpenbaoSetupScore { fn default() -> Self { Self { - namespace: default_namespace(), - pod: default_pod(), + instance: OpenbaoInstance::default(), kv_mount: default_kv_mount(), policies: Vec::new(), users: Vec::new(), @@ -164,8 +153,12 @@ impl OpenbaoSetupInterpret { k8s: &harmony_k8s::K8sClient, command: Vec<&str>, ) -> Result { - k8s.exec_pod_capture_output(&self.score.pod, Some(&self.score.namespace), command) - .await + k8s.exec_pod_capture_output( + &self.score.instance.pod(), + Some(&self.score.instance.namespace), + command, + ) + .await } async fn bao_command( @@ -279,8 +272,8 @@ impl OpenbaoSetupInterpret { // status and parse the `sealed` field authoritatively. let sealed = match k8s .exec_pod_capture( - &self.score.pod, - Some(&self.score.namespace), + &self.score.instance.pod(), + Some(&self.score.instance.namespace), vec!["bao", "status", "-format=json"], ) .await @@ -514,14 +507,18 @@ impl Interpret for OpenbaoSetupInterpret { .map_err(|e| InterpretError::new(format!("Failed to get K8s client: {e}")))?; // Wait for the pod to be running before attempting any operations. - k8s.wait_for_pod_ready(&self.score.pod, Some(&self.score.namespace)) - .await - .map_err(|e| { - InterpretError::new(format!( - "Pod {}/{} not ready: {e}", - self.score.namespace, self.score.pod - )) - })?; + k8s.wait_for_pod_ready( + &self.score.instance.pod(), + Some(&self.score.instance.namespace), + ) + .await + .map_err(|e| { + InterpretError::new(format!( + "Pod {}/{} not ready: {e}", + self.score.instance.namespace, + self.score.instance.pod() + )) + })?; let root_token = self.init(&k8s).await?; self.unseal(&k8s).await?; @@ -574,8 +571,8 @@ mod tests { #[test] fn default_score_carries_expected_mounts() { let s = OpenbaoSetupScore::default(); - assert_eq!(s.namespace, "openbao"); - assert_eq!(s.pod, "openbao-0"); + assert_eq!(s.instance.namespace, "openbao"); + assert_eq!(s.instance.pod(), "openbao-0"); assert_eq!(s.kv_mount, "secret"); } } -- 2.39.5