refactor/openbao-instance #312
@@ -1,45 +0,0 @@
|
||||
name: harmony-fleet-operator — deploy (staging)
|
||||
# Manual "deploy" click for a published chart version (the release
|
||||
# workflow publishes on tag). Runs on the in-cluster, permissionless
|
||||
# staging runner, which pulls its kube + operator credentials from
|
||||
# OpenBao via harmony_config. Runner/OpenBao bootstrap:
|
||||
# fleet/deployment-process.md.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: 'Release tag to deploy, e.g. harmony-fleet-operator-v0.0.2'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
deploy-staging:
|
||||
container:
|
||||
image: hub.nationtech.io/harmony/harmony_composer:latest
|
||||
runs-on: fleet-staging
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# TODO: bake helm into harmony_composer so this step disappears.
|
||||
- name: Install helm
|
||||
run: curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
|
||||
- name: Log in to hub.nationtech.io (helm OCI)
|
||||
run: |
|
||||
echo "${{ secrets.HUB_BOT_PASSWORD }}" \
|
||||
| helm registry login hub.nationtech.io \
|
||||
--username "${{ secrets.HUB_BOT_USER }}" --password-stdin
|
||||
|
||||
- name: Deploy published operator chart
|
||||
env:
|
||||
# OpenBao auth comes from the runner env (machine-identity PR);
|
||||
# FleetDeploySecrets is then resolved from OpenBao.
|
||||
OPENBAO_URL: ${{ secrets.OPENBAO_URL }}
|
||||
HARMONY_SECRET_NAMESPACE: harmony
|
||||
run: |
|
||||
cargo run --release -p harmony-fleet-deploy --bin harmony-fleet-deploy -- \
|
||||
--filter FleetOperatorScore \
|
||||
--from-tag "${{ inputs.tag }}" \
|
||||
--namespace fleet-system \
|
||||
--yes
|
||||
@@ -245,8 +245,10 @@ async fn main() -> anyhow::Result<()> {
|
||||
// Deploy + configure OpenBao (no JWT auth yet -- Zitadel isn't up)
|
||||
cleanup_openbao_webhook(&k8s).await?;
|
||||
OpenbaoScore {
|
||||
instance: Default::default(),
|
||||
host: OPENBAO_HOST.to_string(),
|
||||
openshift: false,
|
||||
tls_issuer: None,
|
||||
}
|
||||
.interpret(&Inventory::autoload(), &topology)
|
||||
.await
|
||||
|
||||
@@ -5,8 +5,10 @@ use harmony::{
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let openbao = OpenbaoScore {
|
||||
instance: Default::default(),
|
||||
host: "openbao.sebastien.sto1.nationtech.io".to_string(),
|
||||
openshift: false,
|
||||
tls_issuer: None,
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
|
||||
@@ -28,73 +28,46 @@ cargo run -p harmony-fleet-deploy --bin harmony-fleet-release -- \
|
||||
--from-tag harmony-fleet-operator-v0.0.2 --no-push
|
||||
```
|
||||
|
||||
## 2. Deploy a published version to staging (a click)
|
||||
## 2. Deploy a published version to staging (manual, for now)
|
||||
|
||||
The deploy is a manual `workflow_dispatch` —
|
||||
`.gitea/workflows/harmony-fleet-operator-deploy.yaml` — run after the
|
||||
release workflow finishes. Enter the **release tag** (e.g.
|
||||
`harmony-fleet-operator-v0.0.2`); the version is parsed from it in Rust,
|
||||
so the tag is the single source of truth and YAML never parses it. It
|
||||
executes on the **in-cluster staging runner** and runs
|
||||
`harmony-fleet-deploy --filter FleetOperatorScore --from-tag <tag>`,
|
||||
which installs the published
|
||||
`oci://hub.nationtech.io/harmony/harmony-fleet-operator:<version>` chart
|
||||
instead of rendering from source. Same command bootstraps and upgrades;
|
||||
re-running the same tag is a no-op.
|
||||
|
||||
**Auth is Zitadel-SSO-only on this path.** The job carries no plaintext
|
||||
secrets and the runner pod carries no Kubernetes permissions. It pulls a
|
||||
`FleetDeploySecrets` config from OpenBao via `harmony_config`:
|
||||
|
||||
- `operator_credentials_toml` — the operator's zitadel-jwt NATS
|
||||
credentials (applied as the operator Secret before `helm install`),
|
||||
- `kubeconfig` — the scoped `fleet-deployer` ServiceAccount, so the
|
||||
permissionless runner authenticates to the API only for this job.
|
||||
|
||||
There is no user/pass on the published-chart path — it's structurally
|
||||
unreachable. (`--nats-*` flags exist only for the dev/e2e rendered-chart
|
||||
path in step 1's `--no-push` smoke-test.)
|
||||
|
||||
Laptop fallback (override the config via env instead of OpenBao):
|
||||
Push to staging is manual until headless OpenBao auth (Zitadel machine
|
||||
identity) lands; secrets still come from shared OpenBao config. Point at
|
||||
your staging kube context and OpenBao, then run the operator deploy:
|
||||
|
||||
```sh
|
||||
export HARMONY_CONFIG_FleetDeploySecrets='{"operator_credentials_toml":"…","kubeconfig":"…"}'
|
||||
export OPENBAO_URL=<your OpenBao URL>
|
||||
export OPENBAO_TOKEN=<scoped read token for secret/<ns>/*>
|
||||
harmony-fleet-deploy --filter FleetOperatorScore \
|
||||
--operator-chart-version 0.0.2 --namespace fleet-system --yes
|
||||
--from-tag <release-tag> --namespace fleet-staging --yes
|
||||
```
|
||||
|
||||
It installs the published
|
||||
`oci://hub.nationtech.io/harmony/harmony-fleet-operator:<version>` chart;
|
||||
the version is parsed from the tag in Rust (the tag is the only source
|
||||
of truth). Same command bootstraps and upgrades; re-running the same tag
|
||||
is a no-op. Auth is Zitadel-SSO-only: the operator gets its zitadel-jwt
|
||||
`operator_credentials_toml` from `FleetDeploySecrets` in OpenBao (no
|
||||
user/pass on the published-chart path). For manual deploy, store that
|
||||
config **without** a `kubeconfig` field so your own kube context is used.
|
||||
|
||||
## 3. Roll forward
|
||||
|
||||
Re-run the deploy workflow with a newer (or previous-good) version.
|
||||
`helm upgrade --install` applies it and fails loudly if convergence
|
||||
fails — no automatic rollback. Fix the spec, bump, re-run.
|
||||
Re-run with a newer (or previous-good) tag. `helm upgrade --install`
|
||||
applies it and fails loudly if convergence fails — no automatic
|
||||
rollback. Fix the spec, bump, re-run.
|
||||
|
||||
## Automated vs. manual
|
||||
|
||||
| Step | Where |
|
||||
|---|---|
|
||||
| Build + push image + chart on tag | CI (`release` job, on tag) |
|
||||
| Deploy a published version + roll forward | CI (`deploy-staging`, manual `workflow_dispatch` click) |
|
||||
| Push to staging + roll forward | Manual (operator runs the deploy) |
|
||||
|
||||
## One-time bootstrap (in-cluster staging runner)
|
||||
## Future: in-cluster CD (blocked on headless OpenBao auth)
|
||||
|
||||
`deploy-staging` runs `runs-on: fleet-staging` — a Gitea `act_runner`
|
||||
provisioned once inside an isolated tenant namespace in the cluster.
|
||||
This is what makes the API reachable without exposing it publicly or
|
||||
over the VPN: the runner is already inside. Provision (reproducibly, via
|
||||
Harmony Scores — not handrolled manifests):
|
||||
|
||||
- **Runner pod** with `automountServiceAccountToken: false` — zero
|
||||
standing Kubernetes permissions.
|
||||
- **Egress NetworkPolicy** allowing only the LB IP on :443 (OpenBao /
|
||||
Zitadel / hub via the DNS alias) plus the API endpoint; deny the rest.
|
||||
- **`fleet-deployer` ServiceAccount + RBAC** scoped to exactly what the
|
||||
operator chart installs (its CRDs, ClusterRole, ClusterRoleBinding +
|
||||
the `fleet-system` namespaced resources).
|
||||
- **Store the deployer kubeconfig + operator credentials in OpenBao**
|
||||
under `FleetDeploySecrets` so the job resolves them at runtime.
|
||||
|
||||
Until the Zitadel→OpenBao machine-identity auth lands, the runner's
|
||||
OpenBao auth env is supplied directly; afterward the runner authenticates
|
||||
as a Zitadel machine identity. Production-gated promotion is a follow-up
|
||||
(ADR-012-2).
|
||||
Once `harmony_config` can authenticate to OpenBao headlessly (Zitadel
|
||||
machine identity), these exports become a `deploy-staging` workflow on an
|
||||
in-cluster, permissionless Gitea runner that pulls a `fleet-deployer`
|
||||
`kubeconfig` + operator credentials from OpenBao at job time (provisioned
|
||||
via a `TenantScore` with one extra egress CIDR to the OpenBao/Zitadel
|
||||
ingress). Production-gated promotion is a later step (ADR-012-2).
|
||||
|
||||
@@ -15,26 +15,72 @@ use crate::{
|
||||
|
||||
pub use setup::{OpenbaoJwtAuth, OpenbaoPolicy, OpenbaoSetupScore, OpenbaoUser};
|
||||
|
||||
const DEFAULT_NAMESPACE: &str = "openbao";
|
||||
const DEFAULT_RELEASE: &str = "openbao";
|
||||
|
||||
/// Where one OpenBao instance lives — the single authority both the
|
||||
/// deploy ([`OpenbaoScore`]) and the setup ([`OpenbaoSetupScore`]) take,
|
||||
/// so namespace and release can't drift apart.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OpenbaoInstance {
|
||||
pub namespace: String,
|
||||
/// Helm release name; the chart names its StatefulSet after it.
|
||||
pub release: String,
|
||||
}
|
||||
|
||||
impl OpenbaoInstance {
|
||||
/// `{release}-0` — the chart deploys a StatefulSet, so a stored pod
|
||||
/// literal would rot the moment `release` changes.
|
||||
pub fn pod(&self) -> String {
|
||||
format!("{}-0", self.release)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OpenbaoInstance {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
namespace: DEFAULT_NAMESPACE.to_string(),
|
||||
release: DEFAULT_RELEASE.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct OpenbaoScore {
|
||||
/// Where this OpenBao is deployed (namespace + helm release).
|
||||
#[serde(default)]
|
||||
pub instance: OpenbaoInstance,
|
||||
/// Host used for external access (ingress)
|
||||
pub host: String,
|
||||
/// Set to true when deploying to OpenShift. Defaults to false for k3d/Kubernetes.
|
||||
#[serde(default)]
|
||||
pub openshift: bool,
|
||||
/// cert-manager `ClusterIssuer` for ingress TLS. `None` serves plain
|
||||
/// HTTP (TLS terminated elsewhere); `Some` adds the annotation + a
|
||||
/// `tls` block so cert-manager issues and renews the cert. Carries the
|
||||
/// issuer name because a bare bool can't address an issuer.
|
||||
#[serde(default)]
|
||||
pub tls_issuer: Option<String>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient + HelmCommand> Score<T> for OpenbaoScore {
|
||||
fn name(&self) -> String {
|
||||
"OpenbaoScore".to_string()
|
||||
}
|
||||
impl OpenbaoScore {
|
||||
fn values(&self) -> String {
|
||||
let Self {
|
||||
host,
|
||||
openshift,
|
||||
tls_issuer,
|
||||
instance: _,
|
||||
} = self;
|
||||
// Edge TLS: the listener stays plain HTTP behind the ingress, which
|
||||
// terminates with the cert-manager-issued cert.
|
||||
let ingress_tls = match tls_issuer {
|
||||
Some(issuer) => format!(
|
||||
"\n annotations:\n cert-manager.io/cluster-issuer: {issuer}\n tls:\n - hosts: [{host}]\n secretName: openbao-tls"
|
||||
),
|
||||
None => String::new(),
|
||||
};
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
let host = &self.host;
|
||||
let openshift = self.openshift;
|
||||
|
||||
let values_yaml = Some(format!(
|
||||
format!(
|
||||
r#"global:
|
||||
openshift: {openshift}
|
||||
server:
|
||||
@@ -65,7 +111,7 @@ server:
|
||||
ingress:
|
||||
enabled: true
|
||||
hosts:
|
||||
- host: {host}
|
||||
- host: {host}{ingress_tls}
|
||||
dataStorage:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
@@ -79,15 +125,24 @@ server:
|
||||
accessMode: ReadWriteOnce
|
||||
ui:
|
||||
enabled: true"#
|
||||
));
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient + HelmCommand> Score<T> for OpenbaoScore {
|
||||
fn name(&self) -> String {
|
||||
"OpenbaoScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str("openbao").unwrap()),
|
||||
release_name: NonBlankString::from_str("openbao").unwrap(),
|
||||
namespace: Some(NonBlankString::from_str(&self.instance.namespace).unwrap()),
|
||||
release_name: NonBlankString::from_str(&self.instance.release).unwrap(),
|
||||
chart_name: NonBlankString::from_str("openbao/openbao").unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml,
|
||||
values_yaml: Some(self.values()),
|
||||
create_namespace: true,
|
||||
install_only: false,
|
||||
repository: Some(HelmRepository::new(
|
||||
@@ -99,3 +154,35 @@ ui:
|
||||
.create_interpret()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn no_issuer_renders_plain_ingress() {
|
||||
let v = OpenbaoScore {
|
||||
instance: Default::default(),
|
||||
host: "bao.example".into(),
|
||||
openshift: false,
|
||||
tls_issuer: None,
|
||||
}
|
||||
.values();
|
||||
assert!(!v.contains("cert-manager.io/cluster-issuer"));
|
||||
assert!(!v.contains("secretName"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn issuer_renders_certmanager_tls_for_host() {
|
||||
let v = OpenbaoScore {
|
||||
instance: Default::default(),
|
||||
host: "bao.example".into(),
|
||||
openshift: false,
|
||||
tls_issuer: Some("letsencrypt".into()),
|
||||
}
|
||||
.values();
|
||||
assert!(v.contains("cert-manager.io/cluster-issuer: letsencrypt"));
|
||||
assert!(v.contains("- hosts: [bao.example]"));
|
||||
assert!(v.contains("secretName: openbao-tls"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,8 +13,8 @@ use crate::{
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
const DEFAULT_NAMESPACE: &str = "openbao";
|
||||
const DEFAULT_POD: &str = "openbao-0";
|
||||
use super::OpenbaoInstance;
|
||||
|
||||
const DEFAULT_KV_MOUNT: &str = "secret";
|
||||
|
||||
/// A policy to create in OpenBao.
|
||||
@@ -72,13 +72,9 @@ pub struct OpenbaoJwtAuth {
|
||||
/// deployments should use auto-unseal (Transit, cloud KMS, etc.).
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OpenbaoSetupScore {
|
||||
/// Kubernetes namespace where OpenBao is deployed.
|
||||
#[serde(default = "default_namespace")]
|
||||
pub namespace: String,
|
||||
|
||||
/// StatefulSet pod name to exec into.
|
||||
#[serde(default = "default_pod")]
|
||||
pub pod: String,
|
||||
/// Where the target OpenBao is deployed (namespace + release).
|
||||
#[serde(default)]
|
||||
pub instance: OpenbaoInstance,
|
||||
|
||||
/// KV v2 mount path to enable.
|
||||
#[serde(default = "default_kv_mount")]
|
||||
@@ -97,12 +93,6 @@ pub struct OpenbaoSetupScore {
|
||||
pub jwt_auth: Option<OpenbaoJwtAuth>,
|
||||
}
|
||||
|
||||
fn default_namespace() -> String {
|
||||
DEFAULT_NAMESPACE.to_string()
|
||||
}
|
||||
fn default_pod() -> String {
|
||||
DEFAULT_POD.to_string()
|
||||
}
|
||||
fn default_kv_mount() -> String {
|
||||
DEFAULT_KV_MOUNT.to_string()
|
||||
}
|
||||
@@ -110,8 +100,7 @@ fn default_kv_mount() -> String {
|
||||
impl Default for OpenbaoSetupScore {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
namespace: default_namespace(),
|
||||
pod: default_pod(),
|
||||
instance: OpenbaoInstance::default(),
|
||||
kv_mount: default_kv_mount(),
|
||||
policies: Vec::new(),
|
||||
users: Vec::new(),
|
||||
@@ -164,8 +153,12 @@ impl OpenbaoSetupInterpret {
|
||||
k8s: &harmony_k8s::K8sClient,
|
||||
command: Vec<&str>,
|
||||
) -> Result<String, String> {
|
||||
k8s.exec_pod_capture_output(&self.score.pod, Some(&self.score.namespace), command)
|
||||
.await
|
||||
k8s.exec_pod_capture_output(
|
||||
&self.score.instance.pod(),
|
||||
Some(&self.score.instance.namespace),
|
||||
command,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn bao_command(
|
||||
@@ -279,8 +272,8 @@ impl OpenbaoSetupInterpret {
|
||||
// status and parse the `sealed` field authoritatively.
|
||||
let sealed = match k8s
|
||||
.exec_pod_capture(
|
||||
&self.score.pod,
|
||||
Some(&self.score.namespace),
|
||||
&self.score.instance.pod(),
|
||||
Some(&self.score.instance.namespace),
|
||||
vec!["bao", "status", "-format=json"],
|
||||
)
|
||||
.await
|
||||
@@ -514,14 +507,18 @@ impl<T: Topology + K8sclient> Interpret<T> for OpenbaoSetupInterpret {
|
||||
.map_err(|e| InterpretError::new(format!("Failed to get K8s client: {e}")))?;
|
||||
|
||||
// Wait for the pod to be running before attempting any operations.
|
||||
k8s.wait_for_pod_ready(&self.score.pod, Some(&self.score.namespace))
|
||||
.await
|
||||
.map_err(|e| {
|
||||
InterpretError::new(format!(
|
||||
"Pod {}/{} not ready: {e}",
|
||||
self.score.namespace, self.score.pod
|
||||
))
|
||||
})?;
|
||||
k8s.wait_for_pod_ready(
|
||||
&self.score.instance.pod(),
|
||||
Some(&self.score.instance.namespace),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
InterpretError::new(format!(
|
||||
"Pod {}/{} not ready: {e}",
|
||||
self.score.instance.namespace,
|
||||
self.score.instance.pod()
|
||||
))
|
||||
})?;
|
||||
|
||||
let root_token = self.init(&k8s).await?;
|
||||
self.unseal(&k8s).await?;
|
||||
@@ -574,8 +571,8 @@ mod tests {
|
||||
#[test]
|
||||
fn default_score_carries_expected_mounts() {
|
||||
let s = OpenbaoSetupScore::default();
|
||||
assert_eq!(s.namespace, "openbao");
|
||||
assert_eq!(s.pod, "openbao-0");
|
||||
assert_eq!(s.instance.namespace, "openbao");
|
||||
assert_eq!(s.instance.pod(), "openbao-0");
|
||||
assert_eq!(s.kv_mount, "secret");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user