refactor/openbao-instance #312

Merged
johnride merged 3 commits from refactor/openbao-instance into feat/fleet-cd-staging-deploy 2026-05-29 21:50:47 +00:00
6 changed files with 161 additions and 145 deletions

View File

@@ -1,45 +0,0 @@
name: harmony-fleet-operator — deploy (staging)
# Manual "deploy" click for a published chart version (the release
# workflow publishes on tag). Runs on the in-cluster, permissionless
# staging runner, which pulls its kube + operator credentials from
# OpenBao via harmony_config. Runner/OpenBao bootstrap:
# fleet/deployment-process.md.
on:
workflow_dispatch:
inputs:
tag:
description: 'Release tag to deploy, e.g. harmony-fleet-operator-v0.0.2'
required: true
type: string
jobs:
deploy-staging:
container:
image: hub.nationtech.io/harmony/harmony_composer:latest
runs-on: fleet-staging
steps:
- name: Checkout code
uses: actions/checkout@v4
# TODO: bake helm into harmony_composer so this step disappears.
- name: Install helm
run: curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
- name: Log in to hub.nationtech.io (helm OCI)
run: |
echo "${{ secrets.HUB_BOT_PASSWORD }}" \
| helm registry login hub.nationtech.io \
--username "${{ secrets.HUB_BOT_USER }}" --password-stdin
- name: Deploy published operator chart
env:
# OpenBao auth comes from the runner env (machine-identity PR);
# FleetDeploySecrets is then resolved from OpenBao.
OPENBAO_URL: ${{ secrets.OPENBAO_URL }}
HARMONY_SECRET_NAMESPACE: harmony
run: |
cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-deploy -- \
--filter FleetOperatorScore \
--from-tag "${{ inputs.tag }}" \
--namespace fleet-system \
--yes

View File

@@ -245,8 +245,10 @@ async fn main() -> anyhow::Result<()> {
// Deploy + configure OpenBao (no JWT auth yet -- Zitadel isn't up)
cleanup_openbao_webhook(&k8s).await?;
OpenbaoScore {
instance: Default::default(),
host: OPENBAO_HOST.to_string(),
openshift: false,
tls_issuer: None,
}
.interpret(&Inventory::autoload(), &topology)
.await

View File

@@ -5,8 +5,10 @@ use harmony::{
#[tokio::main]
async fn main() {
let openbao = OpenbaoScore {
instance: Default::default(),
host: "openbao.sebastien.sto1.nationtech.io".to_string(),
openshift: false,
tls_issuer: None,
};
harmony_cli::run(

View File

@@ -28,73 +28,46 @@ cargo run -p harmony-fleet-deploy --bin harmony-fleet-release -- \
--from-tag harmony-fleet-operator-v0.0.2 --no-push
```
## 2. Deploy a published version to staging (a click)
## 2. Deploy a published version to staging (manual, for now)
The deploy is a manual `workflow_dispatch`
`.gitea/workflows/harmony-fleet-operator-deploy.yaml` — run after the
release workflow finishes. Enter the **release tag** (e.g.
`harmony-fleet-operator-v0.0.2`); the version is parsed from it in Rust,
so the tag is the single source of truth and YAML never parses it. It
executes on the **in-cluster staging runner** and runs
`harmony-fleet-deploy --filter FleetOperatorScore --from-tag <tag>`,
which installs the published
`oci://hub.nationtech.io/harmony/harmony-fleet-operator:<version>` chart
instead of rendering from source. Same command bootstraps and upgrades;
re-running the same tag is a no-op.
**Auth is Zitadel-SSO-only on this path.** The job carries no plaintext
secrets and the runner pod carries no Kubernetes permissions. It pulls a
`FleetDeploySecrets` config from OpenBao via `harmony_config`:
- `operator_credentials_toml` — the operator's zitadel-jwt NATS
credentials (applied as the operator Secret before `helm install`),
- `kubeconfig` — the scoped `fleet-deployer` ServiceAccount, so the
permissionless runner authenticates to the API only for this job.
There is no user/pass on the published-chart path — it's structurally
unreachable. (`--nats-*` flags exist only for the dev/e2e rendered-chart
path in step 1's `--no-push` smoke-test.)
Laptop fallback (override the config via env instead of OpenBao):
Push to staging is manual until headless OpenBao auth (Zitadel machine
identity) lands; secrets still come from shared OpenBao config. Point at
your staging kube context and OpenBao, then run the operator deploy:
```sh
export HARMONY_CONFIG_FleetDeploySecrets='{"operator_credentials_toml":"…","kubeconfig":"…"}'
export OPENBAO_URL=<your OpenBao URL>
export OPENBAO_TOKEN=<scoped read token for secret/<ns>/*>
harmony-fleet-deploy --filter FleetOperatorScore \
--operator-chart-version 0.0.2 --namespace fleet-system --yes
--from-tag <release-tag> --namespace fleet-staging --yes
```
It installs the published
`oci://hub.nationtech.io/harmony/harmony-fleet-operator:<version>` chart;
the version is parsed from the tag in Rust (the tag is the only source
of truth). Same command bootstraps and upgrades; re-running the same tag
is a no-op. Auth is Zitadel-SSO-only: the operator gets its zitadel-jwt
`operator_credentials_toml` from `FleetDeploySecrets` in OpenBao (no
user/pass on the published-chart path). For manual deploy, store that
config **without** a `kubeconfig` field so your own kube context is used.
## 3. Roll forward
Re-run the deploy workflow with a newer (or previous-good) version.
`helm upgrade --install` applies it and fails loudly if convergence
fails — no automatic rollback. Fix the spec, bump, re-run.
Re-run with a newer (or previous-good) tag. `helm upgrade --install`
applies it and fails loudly if convergence fails — no automatic
rollback. Fix the spec, bump, re-run.
## Automated vs. manual
| Step | Where |
|---|---|
| Build + push image + chart on tag | CI (`release` job, on tag) |
| Deploy a published version + roll forward | CI (`deploy-staging`, manual `workflow_dispatch` click) |
| Push to staging + roll forward | Manual (operator runs the deploy) |
## One-time bootstrap (in-cluster staging runner)
## Future: in-cluster CD (blocked on headless OpenBao auth)
`deploy-staging` runs `runs-on: fleet-staging` — a Gitea `act_runner`
provisioned once inside an isolated tenant namespace in the cluster.
This is what makes the API reachable without exposing it publicly or
over the VPN: the runner is already inside. Provision (reproducibly, via
Harmony Scores — not handrolled manifests):
- **Runner pod** with `automountServiceAccountToken: false` — zero
standing Kubernetes permissions.
- **Egress NetworkPolicy** allowing only the LB IP on :443 (OpenBao /
Zitadel / hub via the DNS alias) plus the API endpoint; deny the rest.
- **`fleet-deployer` ServiceAccount + RBAC** scoped to exactly what the
operator chart installs (its CRDs, ClusterRole, ClusterRoleBinding +
the `fleet-system` namespaced resources).
- **Store the deployer kubeconfig + operator credentials in OpenBao**
under `FleetDeploySecrets` so the job resolves them at runtime.
Until the Zitadel→OpenBao machine-identity auth lands, the runner's
OpenBao auth env is supplied directly; afterward the runner authenticates
as a Zitadel machine identity. Production-gated promotion is a follow-up
(ADR-012-2).
Once `harmony_config` can authenticate to OpenBao headlessly (Zitadel
machine identity), these exports become a `deploy-staging` workflow on an
in-cluster, permissionless Gitea runner that pulls a `fleet-deployer`
`kubeconfig` + operator credentials from OpenBao at job time (provisioned
via a `TenantScore` with one extra egress CIDR to the OpenBao/Zitadel
ingress). Production-gated promotion is a later step (ADR-012-2).

View File

@@ -15,26 +15,72 @@ use crate::{
pub use setup::{OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoSetupScore, OpenbaoUser};
const DEFAULT_NAMESPACE: &str = "openbao";
const DEFAULT_RELEASE: &str = "openbao";
/// Where one OpenBao instance lives — the single authority both the
/// deploy ([`OpenbaoScore`]) and the setup ([`OpenbaoSetupScore`]) take,
/// so namespace and release can't drift apart.
#[derive(Debug, Clone, Serialize)]
pub struct OpenbaoInstance {
pub namespace: String,
/// Helm release name; the chart names its StatefulSet after it.
pub release: String,
}
impl OpenbaoInstance {
/// `{release}-0` — the chart deploys a StatefulSet, so a stored pod
/// literal would rot the moment `release` changes.
pub fn pod(&self) -> String {
format!("{}-0", self.release)
}
}
impl Default for OpenbaoInstance {
fn default() -> Self {
Self {
namespace: DEFAULT_NAMESPACE.to_string(),
release: DEFAULT_RELEASE.to_string(),
}
}
}
#[derive(Debug, Serialize, Clone)]
pub struct OpenbaoScore {
/// Where this OpenBao is deployed (namespace + helm release).
#[serde(default)]
pub instance: OpenbaoInstance,
/// Host used for external access (ingress)
pub host: String,
/// Set to true when deploying to OpenShift. Defaults to false for k3d/Kubernetes.
#[serde(default)]
pub openshift: bool,
/// cert-manager `ClusterIssuer` for ingress TLS. `None` serves plain
/// HTTP (TLS terminated elsewhere); `Some` adds the annotation + a
/// `tls` block so cert-manager issues and renews the cert. Carries the
/// issuer name because a bare bool can't address an issuer.
#[serde(default)]
pub tls_issuer: Option<String>,
}
impl<T: Topology + K8sclient + HelmCommand> Score<T> for OpenbaoScore {
fn name(&self) -> String {
"OpenbaoScore".to_string()
}
impl OpenbaoScore {
fn values(&self) -> String {
let Self {
host,
openshift,
tls_issuer,
instance: _,
} = self;
// Edge TLS: the listener stays plain HTTP behind the ingress, which
// terminates with the cert-manager-issued cert.
let ingress_tls = match tls_issuer {
Some(issuer) => format!(
"\n annotations:\n cert-manager.io/cluster-issuer: {issuer}\n tls:\n - hosts: [{host}]\n secretName: openbao-tls"
),
None => String::new(),
};
#[doc(hidden)]
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
let host = &self.host;
let openshift = self.openshift;
let values_yaml = Some(format!(
format!(
r#"global:
openshift: {openshift}
server:
@@ -65,7 +111,7 @@ server:
ingress:
enabled: true
hosts:
- host: {host}
- host: {host}{ingress_tls}
dataStorage:
enabled: true
size: 10Gi
@@ -79,15 +125,24 @@ server:
accessMode: ReadWriteOnce
ui:
enabled: true"#
));
)
}
}
impl<T: Topology + K8sclient + HelmCommand> Score<T> for OpenbaoScore {
fn name(&self) -> String {
"OpenbaoScore".to_string()
}
#[doc(hidden)]
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
HelmChartScore {
namespace: Some(NonBlankString::from_str("openbao").unwrap()),
release_name: NonBlankString::from_str("openbao").unwrap(),
namespace: Some(NonBlankString::from_str(&self.instance.namespace).unwrap()),
release_name: NonBlankString::from_str(&self.instance.release).unwrap(),
chart_name: NonBlankString::from_str("openbao/openbao").unwrap(),
chart_version: None,
values_overrides: None,
values_yaml,
values_yaml: Some(self.values()),
create_namespace: true,
install_only: false,
repository: Some(HelmRepository::new(
@@ -99,3 +154,35 @@ ui:
.create_interpret()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn no_issuer_renders_plain_ingress() {
let v = OpenbaoScore {
instance: Default::default(),
host: "bao.example".into(),
openshift: false,
tls_issuer: None,
}
.values();
assert!(!v.contains("cert-manager.io/cluster-issuer"));
assert!(!v.contains("secretName"));
}
#[test]
fn issuer_renders_certmanager_tls_for_host() {
let v = OpenbaoScore {
instance: Default::default(),
host: "bao.example".into(),
openshift: false,
tls_issuer: Some("letsencrypt".into()),
}
.values();
assert!(v.contains("cert-manager.io/cluster-issuer: letsencrypt"));
assert!(v.contains("- hosts: [bao.example]"));
assert!(v.contains("secretName: openbao-tls"));
}
}

View File

@@ -13,8 +13,8 @@ use crate::{
};
use harmony_types::id::Id;
const DEFAULT_NAMESPACE: &str = "openbao";
const DEFAULT_POD: &str = "openbao-0";
use super::OpenbaoInstance;
const DEFAULT_KV_MOUNT: &str = "secret";
/// A policy to create in OpenBao.
@@ -72,13 +72,9 @@ pub struct OpenbaoJwtAuth {
/// deployments should use auto-unseal (Transit, cloud KMS, etc.).
#[derive(Debug, Clone, Serialize)]
pub struct OpenbaoSetupScore {
/// Kubernetes namespace where OpenBao is deployed.
#[serde(default = "default_namespace")]
pub namespace: String,
/// StatefulSet pod name to exec into.
#[serde(default = "default_pod")]
pub pod: String,
/// Where the target OpenBao is deployed (namespace + release).
#[serde(default)]
pub instance: OpenbaoInstance,
/// KV v2 mount path to enable.
#[serde(default = "default_kv_mount")]
@@ -97,12 +93,6 @@ pub struct OpenbaoSetupScore {
pub jwt_auth: Option<OpenbaoJwtAuth>,
}
fn default_namespace() -> String {
DEFAULT_NAMESPACE.to_string()
}
fn default_pod() -> String {
DEFAULT_POD.to_string()
}
fn default_kv_mount() -> String {
DEFAULT_KV_MOUNT.to_string()
}
@@ -110,8 +100,7 @@ fn default_kv_mount() -> String {
impl Default for OpenbaoSetupScore {
fn default() -> Self {
Self {
namespace: default_namespace(),
pod: default_pod(),
instance: OpenbaoInstance::default(),
kv_mount: default_kv_mount(),
policies: Vec::new(),
users: Vec::new(),
@@ -164,7 +153,11 @@ impl OpenbaoSetupInterpret {
k8s: &harmony_k8s::K8sClient,
command: Vec<&str>,
) -> Result<String, String> {
k8s.exec_pod_capture_output(&self.score.pod, Some(&self.score.namespace), command)
k8s.exec_pod_capture_output(
&self.score.instance.pod(),
Some(&self.score.instance.namespace),
command,
)
.await
}
@@ -279,8 +272,8 @@ impl OpenbaoSetupInterpret {
// status and parse the `sealed` field authoritatively.
let sealed = match k8s
.exec_pod_capture(
&self.score.pod,
Some(&self.score.namespace),
&self.score.instance.pod(),
Some(&self.score.instance.namespace),
vec!["bao", "status", "-format=json"],
)
.await
@@ -514,12 +507,16 @@ impl<T: Topology + K8sclient> Interpret<T> for OpenbaoSetupInterpret {
.map_err(|e| InterpretError::new(format!("Failed to get K8s client: {e}")))?;
// Wait for the pod to be running before attempting any operations.
k8s.wait_for_pod_ready(&self.score.pod, Some(&self.score.namespace))
k8s.wait_for_pod_ready(
&self.score.instance.pod(),
Some(&self.score.instance.namespace),
)
.await
.map_err(|e| {
InterpretError::new(format!(
"Pod {}/{} not ready: {e}",
self.score.namespace, self.score.pod
self.score.instance.namespace,
self.score.instance.pod()
))
})?;
@@ -574,8 +571,8 @@ mod tests {
#[test]
fn default_score_carries_expected_mounts() {
let s = OpenbaoSetupScore::default();
assert_eq!(s.namespace, "openbao");
assert_eq!(s.pod, "openbao-0");
assert_eq!(s.instance.namespace, "openbao");
assert_eq!(s.instance.pod(), "openbao-0");
assert_eq!(s.kv_mount, "secret");
}
}