2026-05-29 21:50:47 +00:00
6 changed files with 161 additions and 145 deletions
--- a/.gitea/workflows/harmony-fleet-operator-deploy.yaml
+++ b/.gitea/workflows/harmony-fleet-operator-deploy.yaml
@@ -1,45 +0,0 @@
-name: harmony-fleet-operator — deploy (staging)
-# Manual "deploy" click for a published chart version (the release
-# workflow publishes on tag). Runs on the in-cluster, permissionless
-# staging runner, which pulls its kube + operator credentials from
-# OpenBao via harmony_config. Runner/OpenBao bootstrap:
-# fleet/deployment-process.md.
-on:
-  workflow_dispatch:
-    inputs:
-      tag:
-        description: 'Release tag to deploy, e.g. harmony-fleet-operator-v0.0.2'
-        required: true
-        type: string
-
-jobs:
-  deploy-staging:
-    container:
-      image: hub.nationtech.io/harmony/harmony_composer:latest
-    runs-on: fleet-staging
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      # TODO: bake helm into harmony_composer so this step disappears.
-      - name: Install helm
-        run: curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-
-      - name: Log in to hub.nationtech.io (helm OCI)
-        run: |
-          echo "${{ secrets.HUB_BOT_PASSWORD }}" \
-            | helm registry login hub.nationtech.io \
-                --username "${{ secrets.HUB_BOT_USER }}" --password-stdin
-
-      - name: Deploy published operator chart
-        env:
-          # OpenBao auth comes from the runner env (machine-identity PR);
-          # FleetDeploySecrets is then resolved from OpenBao.
-          OPENBAO_URL: ${{ secrets.OPENBAO_URL }}
-          HARMONY_SECRET_NAMESPACE: harmony
-        run: |
-          cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-deploy -- \
-            --filter FleetOperatorScore \
-            --from-tag "${{ inputs.tag }}" \
-            --namespace fleet-system \
-            --yes
--- a/examples/harmony_sso/src/main.rs
+++ b/examples/harmony_sso/src/main.rs
@@ -245,8 +245,10 @@ async fn main() -> anyhow::Result<()> {
    // Deploy + configure OpenBao (no JWT auth yet -- Zitadel isn't up)
    cleanup_openbao_webhook(&k8s).await?;
    OpenbaoScore {
+        instance: Default::default(),
        host: OPENBAO_HOST.to_string(),
        openshift: false,
+        tls_issuer: None,
    }
    .interpret(&Inventory::autoload(), &topology)
    .await
--- a/examples/openbao/src/main.rs
+++ b/examples/openbao/src/main.rs
@@ -5,8 +5,10 @@ use harmony::{
 #[tokio::main]
 async fn main() {
    let openbao = OpenbaoScore {
+        instance: Default::default(),
        host: "openbao.sebastien.sto1.nationtech.io".to_string(),
        openshift: false,
+        tls_issuer: None,
    };

    harmony_cli::run(
--- a/fleet/deployment-process.md
+++ b/fleet/deployment-process.md
@@ -28,73 +28,46 @@ cargo run -p harmony-fleet-deploy --bin harmony-fleet-release -- \
  --from-tag harmony-fleet-operator-v0.0.2 --no-push
 ```

-## 2. Deploy a published version to staging (a click)
+## 2. Deploy a published version to staging (manual, for now)

-The deploy is a manual `workflow_dispatch` —
-`.gitea/workflows/harmony-fleet-operator-deploy.yaml` — run after the
-release workflow finishes. Enter the **release tag** (e.g.
-`harmony-fleet-operator-v0.0.2`); the version is parsed from it in Rust,
-so the tag is the single source of truth and YAML never parses it. It
-executes on the **in-cluster staging runner** and runs
-`harmony-fleet-deploy --filter FleetOperatorScore --from-tag <tag>`,
-which installs the published
-`oci://hub.nationtech.io/harmony/harmony-fleet-operator:<version>` chart
-instead of rendering from source. Same command bootstraps and upgrades;
-re-running the same tag is a no-op.
-
-**Auth is Zitadel-SSO-only on this path.** The job carries no plaintext
-secrets and the runner pod carries no Kubernetes permissions. It pulls a
-`FleetDeploySecrets` config from OpenBao via `harmony_config`:
-
- `operator_credentials_toml` — the operator's zitadel-jwt NATS
-  credentials (applied as the operator Secret before `helm install`),
- `kubeconfig` — the scoped `fleet-deployer` ServiceAccount, so the
-  permissionless runner authenticates to the API only for this job.
-
-There is no user/pass on the published-chart path — it's structurally
-unreachable. (`--nats-*` flags exist only for the dev/e2e rendered-chart
-path in step 1's `--no-push` smoke-test.)
-
-Laptop fallback (override the config via env instead of OpenBao):
+Push to staging is manual until headless OpenBao auth (Zitadel machine
+identity) lands; secrets still come from shared OpenBao config. Point at
+your staging kube context and OpenBao, then run the operator deploy:

 ```sh
-export HARMONY_CONFIG_FleetDeploySecrets='{"operator_credentials_toml":"…","kubeconfig":"…"}'
+export OPENBAO_URL=<your OpenBao URL>
+export OPENBAO_TOKEN=<scoped read token for secret/<ns>/*>
 harmony-fleet-deploy --filter FleetOperatorScore \
-  --operator-chart-version 0.0.2 --namespace fleet-system --yes
+  --from-tag <release-tag> --namespace fleet-staging --yes
 ```

+It installs the published
+`oci://hub.nationtech.io/harmony/harmony-fleet-operator:<version>` chart;
+the version is parsed from the tag in Rust (the tag is the only source
+of truth). Same command bootstraps and upgrades; re-running the same tag
+is a no-op. Auth is Zitadel-SSO-only: the operator gets its zitadel-jwt
+`operator_credentials_toml` from `FleetDeploySecrets` in OpenBao (no
+user/pass on the published-chart path). For manual deploy, store that
+config **without** a `kubeconfig` field so your own kube context is used.
+
 ## 3. Roll forward

-Re-run the deploy workflow with a newer (or previous-good) version.
-`helm upgrade --install` applies it and fails loudly if convergence
-fails — no automatic rollback. Fix the spec, bump, re-run.
+Re-run with a newer (or previous-good) tag. `helm upgrade --install`
+applies it and fails loudly if convergence fails — no automatic
+rollback. Fix the spec, bump, re-run.

 ## Automated vs. manual

 | Step | Where |
 |---|---|
 | Build + push image + chart on tag | CI (`release` job, on tag) |
-| Deploy a published version + roll forward | CI (`deploy-staging`, manual `workflow_dispatch` click) |
+| Push to staging + roll forward | Manual (operator runs the deploy) |

-## One-time bootstrap (in-cluster staging runner)
+## Future: in-cluster CD (blocked on headless OpenBao auth)

-`deploy-staging` runs `runs-on: fleet-staging` — a Gitea `act_runner`
-provisioned once inside an isolated tenant namespace in the cluster.
-This is what makes the API reachable without exposing it publicly or
-over the VPN: the runner is already inside. Provision (reproducibly, via
-Harmony Scores — not handrolled manifests):
-
- **Runner pod** with `automountServiceAccountToken: false` — zero
-  standing Kubernetes permissions.
- **Egress NetworkPolicy** allowing only the LB IP on :443 (OpenBao /
-  Zitadel / hub via the DNS alias) plus the API endpoint; deny the rest.
- **`fleet-deployer` ServiceAccount + RBAC** scoped to exactly what the
-  operator chart installs (its CRDs, ClusterRole, ClusterRoleBinding +
-  the `fleet-system` namespaced resources).
- **Store the deployer kubeconfig + operator credentials in OpenBao**
-  under `FleetDeploySecrets` so the job resolves them at runtime.
-
-Until the Zitadel→OpenBao machine-identity auth lands, the runner's
-OpenBao auth env is supplied directly; afterward the runner authenticates
-as a Zitadel machine identity. Production-gated promotion is a follow-up
-(ADR-012-2).
+Once `harmony_config` can authenticate to OpenBao headlessly (Zitadel
+machine identity), these exports become a `deploy-staging` workflow on an
+in-cluster, permissionless Gitea runner that pulls a `fleet-deployer`
+`kubeconfig` + operator credentials from OpenBao at job time (provisioned
+via a `TenantScore` with one extra egress CIDR to the OpenBao/Zitadel
+ingress). Production-gated promotion is a later step (ADR-012-2).
--- a/harmony/src/modules/openbao/mod.rs
+++ b/harmony/src/modules/openbao/mod.rs
@@ -15,26 +15,72 @@ use crate::{

 pub use setup::{OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoSetupScore, OpenbaoUser};

+const DEFAULT_NAMESPACE: &str = "openbao";
+const DEFAULT_RELEASE: &str = "openbao";
+
+/// Where one OpenBao instance lives — the single authority both the
+/// deploy ([`OpenbaoScore`]) and the setup ([`OpenbaoSetupScore`]) take,
+/// so namespace and release can't drift apart.
+#[derive(Debug, Clone, Serialize)]
+pub struct OpenbaoInstance {
+    pub namespace: String,
+    /// Helm release name; the chart names its StatefulSet after it.
+    pub release: String,
+}
+
+impl OpenbaoInstance {
+    /// `{release}-0` — the chart deploys a StatefulSet, so a stored pod
+    /// literal would rot the moment `release` changes.
+    pub fn pod(&self) -> String {
+        format!("{}-0", self.release)
+    }
+}
+
+impl Default for OpenbaoInstance {
+    fn default() -> Self {
+        Self {
+            namespace: DEFAULT_NAMESPACE.to_string(),
+            release: DEFAULT_RELEASE.to_string(),
+        }
+    }
+}
+
 #[derive(Debug, Serialize, Clone)]
 pub struct OpenbaoScore {
+    /// Where this OpenBao is deployed (namespace + helm release).
+    #[serde(default)]
+    pub instance: OpenbaoInstance,
    /// Host used for external access (ingress)
    pub host: String,
    /// Set to true when deploying to OpenShift. Defaults to false for k3d/Kubernetes.
    #[serde(default)]
    pub openshift: bool,
+    /// cert-manager `ClusterIssuer` for ingress TLS. `None` serves plain
+    /// HTTP (TLS terminated elsewhere); `Some` adds the annotation + a
+    /// `tls` block so cert-manager issues and renews the cert. Carries the
+    /// issuer name because a bare bool can't address an issuer.
+    #[serde(default)]
+    pub tls_issuer: Option<String>,
 }

-impl<T: Topology + K8sclient + HelmCommand> Score<T> for OpenbaoScore {
-    fn name(&self) -> String {
-        "OpenbaoScore".to_string()
-    }
+impl OpenbaoScore {
+    fn values(&self) -> String {
+        let Self {
+            host,
+            openshift,
+            tls_issuer,
+            instance: _,
+        } = self;
+        // Edge TLS: the listener stays plain HTTP behind the ingress, which
+        // terminates with the cert-manager-issued cert.
+        let ingress_tls = match tls_issuer {
+            Some(issuer) => format!(
+                "\n    annotations:\n      cert-manager.io/cluster-issuer: {issuer}\n    tls:\n      - hosts: [{host}]\n        secretName: openbao-tls"
+            ),
+            None => String::new(),
+        };

-    #[doc(hidden)]
-    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
-        let host = &self.host;
-        let openshift = self.openshift;
-
-        let values_yaml = Some(format!(
+        format!(
            r#"global:
  openshift: {openshift}
 server:
@@ -65,7 +111,7 @@ server:
  ingress:
    enabled: true
    hosts:
-      - host: {host}
+      - host: {host}{ingress_tls}
  dataStorage:
    enabled: true
    size: 10Gi
@@ -79,15 +125,24 @@ server:
    accessMode: ReadWriteOnce
 ui:
  enabled: true"#
-        ));
+        )
+    }
+}

+impl<T: Topology + K8sclient + HelmCommand> Score<T> for OpenbaoScore {
+    fn name(&self) -> String {
+        "OpenbaoScore".to_string()
+    }
+
+    #[doc(hidden)]
+    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
        HelmChartScore {
-            namespace: Some(NonBlankString::from_str("openbao").unwrap()),
-            release_name: NonBlankString::from_str("openbao").unwrap(),
+            namespace: Some(NonBlankString::from_str(&self.instance.namespace).unwrap()),
+            release_name: NonBlankString::from_str(&self.instance.release).unwrap(),
            chart_name: NonBlankString::from_str("openbao/openbao").unwrap(),
            chart_version: None,
            values_overrides: None,
-            values_yaml,
+            values_yaml: Some(self.values()),
            create_namespace: true,
            install_only: false,
            repository: Some(HelmRepository::new(
@@ -99,3 +154,35 @@ ui:
        .create_interpret()
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn no_issuer_renders_plain_ingress() {
+        let v = OpenbaoScore {
+            instance: Default::default(),
+            host: "bao.example".into(),
+            openshift: false,
+            tls_issuer: None,
+        }
+        .values();
+        assert!(!v.contains("cert-manager.io/cluster-issuer"));
+        assert!(!v.contains("secretName"));
+    }
+
+    #[test]
+    fn issuer_renders_certmanager_tls_for_host() {
+        let v = OpenbaoScore {
+            instance: Default::default(),
+            host: "bao.example".into(),
+            openshift: false,
+            tls_issuer: Some("letsencrypt".into()),
+        }
+        .values();
+        assert!(v.contains("cert-manager.io/cluster-issuer: letsencrypt"));
+        assert!(v.contains("- hosts: [bao.example]"));
+        assert!(v.contains("secretName: openbao-tls"));
+    }
+}
--- a/harmony/src/modules/openbao/setup.rs
+++ b/harmony/src/modules/openbao/setup.rs
@@ -13,8 +13,8 @@ use crate::{
 };
 use harmony_types::id::Id;

-const DEFAULT_NAMESPACE: &str = "openbao";
-const DEFAULT_POD: &str = "openbao-0";
+use super::OpenbaoInstance;
+
 const DEFAULT_KV_MOUNT: &str = "secret";

 /// A policy to create in OpenBao.
@@ -72,13 +72,9 @@ pub struct OpenbaoJwtAuth {
 /// deployments should use auto-unseal (Transit, cloud KMS, etc.).
 #[derive(Debug, Clone, Serialize)]
 pub struct OpenbaoSetupScore {
-    /// Kubernetes namespace where OpenBao is deployed.
-    #[serde(default = "default_namespace")]
-    pub namespace: String,
-
-    /// StatefulSet pod name to exec into.
-    #[serde(default = "default_pod")]
-    pub pod: String,
+    /// Where the target OpenBao is deployed (namespace + release).
+    #[serde(default)]
+    pub instance: OpenbaoInstance,

    /// KV v2 mount path to enable.
    #[serde(default = "default_kv_mount")]
@@ -97,12 +93,6 @@ pub struct OpenbaoSetupScore {
    pub jwt_auth: Option<OpenbaoJwtAuth>,
 }

-fn default_namespace() -> String {
-    DEFAULT_NAMESPACE.to_string()
-}
-fn default_pod() -> String {
-    DEFAULT_POD.to_string()
-}
 fn default_kv_mount() -> String {
    DEFAULT_KV_MOUNT.to_string()
 }
@@ -110,8 +100,7 @@ fn default_kv_mount() -> String {
 impl Default for OpenbaoSetupScore {
    fn default() -> Self {
        Self {
-            namespace: default_namespace(),
-            pod: default_pod(),
+            instance: OpenbaoInstance::default(),
            kv_mount: default_kv_mount(),
            policies: Vec::new(),
            users: Vec::new(),
@@ -164,7 +153,11 @@ impl OpenbaoSetupInterpret {
        k8s: &harmony_k8s::K8sClient,
        command: Vec<&str>,
    ) -> Result<String, String> {
-        k8s.exec_pod_capture_output(&self.score.pod, Some(&self.score.namespace), command)
+        k8s.exec_pod_capture_output(
+            &self.score.instance.pod(),
+            Some(&self.score.instance.namespace),
+            command,
+        )
        .await
    }

@@ -279,8 +272,8 @@ impl OpenbaoSetupInterpret {
        // status and parse the `sealed` field authoritatively.
        let sealed = match k8s
            .exec_pod_capture(
-                &self.score.pod,
-                Some(&self.score.namespace),
+                &self.score.instance.pod(),
+                Some(&self.score.instance.namespace),
                vec!["bao", "status", "-format=json"],
            )
            .await
@@ -514,12 +507,16 @@ impl<T: Topology + K8sclient> Interpret<T> for OpenbaoSetupInterpret {
            .map_err(|e| InterpretError::new(format!("Failed to get K8s client: {e}")))?;

        // Wait for the pod to be running before attempting any operations.
-        k8s.wait_for_pod_ready(&self.score.pod, Some(&self.score.namespace))
+        k8s.wait_for_pod_ready(
+            &self.score.instance.pod(),
+            Some(&self.score.instance.namespace),
+        )
        .await
        .map_err(|e| {
            InterpretError::new(format!(
                "Pod {}/{} not ready: {e}",
-                    self.score.namespace, self.score.pod
+                self.score.instance.namespace,
+                self.score.instance.pod()
            ))
        })?;

@@ -574,8 +571,8 @@ mod tests {
    #[test]
    fn default_score_carries_expected_mounts() {
        let s = OpenbaoSetupScore::default();
-        assert_eq!(s.namespace, "openbao");
-        assert_eq!(s.pod, "openbao-0");
+        assert_eq!(s.instance.namespace, "openbao");
+        assert_eq!(s.instance.pod(), "openbao-0");
        assert_eq!(s.kv_mount, "secret");
    }
 }