Compare commits
1 Commits
feat/chang
...
feat/zitad
| Author | SHA1 | Date | |
|---|---|---|---|
| a0c0905c3b |
18
Cargo.lock
generated
18
Cargo.lock
generated
@@ -2293,6 +2293,18 @@ dependencies = [
|
|||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "example-zitadel"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"harmony",
|
||||||
|
"harmony_cli",
|
||||||
|
"harmony_macros",
|
||||||
|
"harmony_types",
|
||||||
|
"tokio",
|
||||||
|
"url",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "example_validate_ceph_cluster_health"
|
name = "example_validate_ceph_cluster_health"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@@ -2779,7 +2791,6 @@ name = "harmony-node-readiness-endpoint"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"chrono",
|
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"k8s-openapi",
|
"k8s-openapi",
|
||||||
"kube",
|
"kube",
|
||||||
@@ -2788,7 +2799,6 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tower",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -6917,9 +6927,9 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tower"
|
name = "tower"
|
||||||
version = "0.5.3"
|
version = "0.5.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
|
checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
resolver = "2"
|
resolver = "2"
|
||||||
members = [
|
members = [
|
||||||
"private_repos/*",
|
"private_repos/*",
|
||||||
|
"examples/*",
|
||||||
"harmony",
|
"harmony",
|
||||||
"harmony_types",
|
"harmony_types",
|
||||||
"harmony_macros",
|
"harmony_macros",
|
||||||
@@ -19,7 +20,6 @@ members = [
|
|||||||
"brocade",
|
"brocade",
|
||||||
"harmony_agent",
|
"harmony_agent",
|
||||||
"harmony_agent/deploy", "harmony_node_readiness",
|
"harmony_agent/deploy", "harmony_node_readiness",
|
||||||
"examples/*",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
|
|||||||
@@ -103,6 +103,12 @@ pub struct DrainOptions {
|
|||||||
pub timeout: Duration,
|
pub timeout: Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub enum WriteMode {
|
||||||
|
CreateOrUpdate,
|
||||||
|
Create,
|
||||||
|
Update,
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for DrainOptions {
|
impl Default for DrainOptions {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
@@ -834,6 +840,16 @@ impl K8sClient {
|
|||||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
|
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
|
||||||
<K as kube::Resource>::DynamicType: Default,
|
<K as kube::Resource>::DynamicType: Default,
|
||||||
{
|
{
|
||||||
|
self.apply_with_strategy(resource, namespace, WriteMode::CreateOrUpdate).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn apply_with_strategy<K>(&self, resource: &K, namespace: Option<&str>, apply_strategy: WriteMode) -> Result<K, Error>
|
||||||
|
where
|
||||||
|
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
|
||||||
|
<K as kube::Resource>::DynamicType: Default,
|
||||||
|
{
|
||||||
|
todo!("Refactoring in progress: Handle the apply_strategy parameter and add utility functions like apply that set it for ease of use (create, update)");
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"Applying resource {:?} with ns {:?}",
|
"Applying resource {:?} with ns {:?}",
|
||||||
resource.meta().name,
|
resource.meta().name,
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
interpret::Outcome,
|
|
||||||
inventory::Inventory,
|
inventory::Inventory,
|
||||||
modules::postgresql::{
|
modules::postgresql::{
|
||||||
K8sPostgreSQLScore,
|
K8sPostgreSQLScore,
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use k8s_openapi::{NamespaceResourceScope, ResourceScope};
|
use k8s_openapi::ResourceScope;
|
||||||
use kube::Resource;
|
use kube::Resource;
|
||||||
use log::info;
|
use log::info;
|
||||||
use serde::{Serialize, de::DeserializeOwned};
|
use serde::{Serialize, de::DeserializeOwned};
|
||||||
@@ -109,7 +109,7 @@ where
|
|||||||
topology
|
topology
|
||||||
.k8s_client()
|
.k8s_client()
|
||||||
.await
|
.await
|
||||||
.expect("Environment should provide enough information to instanciate a client")
|
.map_err(|e| InterpretError::new(format!("Failed to get k8s client : {e}")))
|
||||||
.apply_many(&self.score.resource, self.score.namespace.as_deref())
|
.apply_many(&self.score.resource, self.score.namespace.as_deref())
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
use kube::{CustomResource, api::ObjectMeta};
|
use kube::{CustomResource, api::ObjectMeta};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
@@ -16,6 +18,10 @@ pub struct ClusterSpec {
|
|||||||
pub image_name: Option<String>,
|
pub image_name: Option<String>,
|
||||||
pub storage: Storage,
|
pub storage: Storage,
|
||||||
pub bootstrap: Bootstrap,
|
pub bootstrap: Bootstrap,
|
||||||
|
/// This must be set to None if you want cnpg to generate a superuser secret
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub superuser_secret: Option<BTreeMap<String, String>>,
|
||||||
|
pub enable_superuser_access: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Cluster {
|
impl Default for Cluster {
|
||||||
@@ -34,6 +40,8 @@ impl Default for ClusterSpec {
|
|||||||
image_name: None,
|
image_name: None,
|
||||||
storage: Storage::default(),
|
storage: Storage::default(),
|
||||||
bootstrap: Bootstrap::default(),
|
bootstrap: Bootstrap::default(),
|
||||||
|
superuser_secret: None,
|
||||||
|
enable_superuser_access: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -52,8 +52,8 @@ pub struct CloudNativePgOperatorScore {
|
|||||||
pub source_namespace: String,
|
pub source_namespace: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for CloudNativePgOperatorScore {
|
impl CloudNativePgOperatorScore {
|
||||||
fn default() -> Self {
|
fn default_openshift() -> Self {
|
||||||
Self {
|
Self {
|
||||||
namespace: "openshift-operators".to_string(),
|
namespace: "openshift-operators".to_string(),
|
||||||
channel: "stable-v1".to_string(),
|
channel: "stable-v1".to_string(),
|
||||||
@@ -68,7 +68,7 @@ impl CloudNativePgOperatorScore {
|
|||||||
pub fn new(namespace: &str) -> Self {
|
pub fn new(namespace: &str) -> Self {
|
||||||
Self {
|
Self {
|
||||||
namespace: namespace.to_string(),
|
namespace: namespace.to_string(),
|
||||||
..Default::default()
|
..Self::default_openshift()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use crate::interpret::Interpret;
|
use crate::interpret::Interpret;
|
||||||
@@ -66,6 +68,11 @@ impl<T: Topology + K8sclient> Score<T> for K8sPostgreSQLScore {
|
|||||||
owner: "app".to_string(),
|
owner: "app".to_string(),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
// superuser_secret: Some(BTreeMap::from([(
|
||||||
|
// "name".to_string(),
|
||||||
|
// format!("{}-superuser", self.config.cluster_name.clone()),
|
||||||
|
// )])),
|
||||||
|
enable_superuser_access: true,
|
||||||
..ClusterSpec::default()
|
..ClusterSpec::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1,43 +1,387 @@
|
|||||||
|
use base64::{Engine, prelude::BASE64_STANDARD};
|
||||||
|
use rand::{thread_rng, Rng};
|
||||||
|
use rand::distributions::Alphanumeric;
|
||||||
|
use k8s_openapi::api::core::v1::Namespace;
|
||||||
|
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
|
||||||
|
use k8s_openapi::{ByteString, api::core::v1::Secret};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
use harmony_macros::hurl;
|
use harmony_macros::hurl;
|
||||||
|
use harmony_types::id::Id;
|
||||||
|
use harmony_types::storage::StorageSize;
|
||||||
|
use log::{debug, error, info, trace, warn};
|
||||||
use non_blank_string_rs::NonBlankString;
|
use non_blank_string_rs::NonBlankString;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
interpret::Interpret,
|
data::Version,
|
||||||
|
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||||
|
inventory::Inventory,
|
||||||
modules::helm::chart::{HelmChartScore, HelmRepository},
|
modules::helm::chart::{HelmChartScore, HelmRepository},
|
||||||
|
modules::k8s::resource::K8sResourceScore,
|
||||||
|
modules::postgresql::capability::{PostgreSQL, PostgreSQLClusterRole, PostgreSQLConfig},
|
||||||
score::Score,
|
score::Score,
|
||||||
topology::{HelmCommand, K8sclient, Topology},
|
topology::{HelmCommand, K8sclient, Topology},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const NAMESPACE: &str = "zitadel";
|
||||||
|
const PG_CLUSTER_NAME: &str = "zitadel-pg";
|
||||||
|
const MASTERKEY_SECRET_NAME: &str = "zitadel-masterkey";
|
||||||
|
|
||||||
|
/// Opinionated Zitadel deployment score.
|
||||||
|
///
|
||||||
|
/// Deploys a PostgreSQL cluster (via the [`PostgreSQL`] trait) and the Zitadel
|
||||||
|
/// Helm chart into the same namespace. Intended as a central multi-tenant IdP
|
||||||
|
/// with SSO for OKD/OpenShift, OpenBao, Harbor, Grafana, Nextcloud, Ente
|
||||||
|
/// Photos, and others.
|
||||||
|
///
|
||||||
|
/// # Ingress annotations
|
||||||
|
/// No controller-specific ingress annotations are set. The Zitadel service
|
||||||
|
/// already carries the Traefik h2c annotation for k3s/k3d by default.
|
||||||
|
/// Add annotations via `values_overrides` depending on your distribution:
|
||||||
|
/// - NGINX: `nginx.ingress.kubernetes.io/backend-protocol: GRPC`
|
||||||
|
/// - OpenShift HAProxy: `haproxy.router.openshift.io/*` or use OpenShift Routes
|
||||||
|
/// - AWS ALB: set `ingress.controller: aws`
|
||||||
|
///
|
||||||
|
/// # Database credentials
|
||||||
|
/// CNPG creates a `<cluster>-superuser` secret with key `password`. Because
|
||||||
|
/// `envVarsSecret` injects secret keys verbatim as env var names and the CNPG
|
||||||
|
/// key (`password`) does not match ZITADEL's expected name
|
||||||
|
/// (`ZITADEL_DATABASE_POSTGRES_USER_PASSWORD`), individual `env` entries with
|
||||||
|
/// `valueFrom.secretKeyRef` are used instead. For environments with an
|
||||||
|
/// External Secrets Operator or similar, create a dedicated secret with the
|
||||||
|
/// correct ZITADEL env var names and switch to `envVarsSecret`.
|
||||||
#[derive(Debug, Serialize, Clone)]
|
#[derive(Debug, Serialize, Clone)]
|
||||||
pub struct ZitadelScore {
|
pub struct ZitadelScore {
|
||||||
/// Host used for external access (ingress)
|
/// External domain (e.g. `"auth.example.com"`).
|
||||||
pub host: String,
|
pub host: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Topology + K8sclient + HelmCommand> Score<T> for ZitadelScore {
|
impl<T: Topology + K8sclient + HelmCommand + PostgreSQL> Score<T> for ZitadelScore {
|
||||||
fn name(&self) -> String {
|
fn name(&self) -> String {
|
||||||
"ZitadelScore".to_string()
|
"ZitadelScore".to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||||
// TODO exec pod commands to initialize secret store if not already done
|
Box::new(ZitadelInterpret {
|
||||||
|
host: self.host.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct ZitadelInterpret {
|
||||||
|
host: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<T: Topology + K8sclient + HelmCommand + PostgreSQL> Interpret<T> for ZitadelInterpret {
|
||||||
|
async fn execute(
|
||||||
|
&self,
|
||||||
|
inventory: &Inventory,
|
||||||
|
topology: &T,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
info!(
|
||||||
|
"[Zitadel] Starting full deployment — namespace: '{NAMESPACE}', host: '{}'",
|
||||||
|
self.host
|
||||||
|
);
|
||||||
|
|
||||||
|
info!("Creating namespace {NAMESPACE} if it does not exist");
|
||||||
|
K8sResourceScore::single(
|
||||||
|
Namespace {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(NAMESPACE.to_string()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.interpret(inventory, topology)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// --- Step 1: PostgreSQL -------------------------------------------
|
||||||
|
|
||||||
|
let pg_config = PostgreSQLConfig {
|
||||||
|
cluster_name: PG_CLUSTER_NAME.to_string(),
|
||||||
|
instances: 2,
|
||||||
|
storage_size: StorageSize::gi(10),
|
||||||
|
role: PostgreSQLClusterRole::Primary,
|
||||||
|
namespace: NAMESPACE.to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"[Zitadel] Deploying PostgreSQL cluster '{}' — instances: {}, storage: 10Gi, namespace: '{}'",
|
||||||
|
pg_config.cluster_name, pg_config.instances, pg_config.namespace
|
||||||
|
);
|
||||||
|
|
||||||
|
topology.deploy(&pg_config).await.map_err(|e| {
|
||||||
|
let msg = format!(
|
||||||
|
"[Zitadel] PostgreSQL deployment failed for '{}': {e}",
|
||||||
|
pg_config.cluster_name
|
||||||
|
);
|
||||||
|
error!("{msg}");
|
||||||
|
InterpretError::new(msg)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"[Zitadel] PostgreSQL cluster '{}' deployed",
|
||||||
|
pg_config.cluster_name
|
||||||
|
);
|
||||||
|
|
||||||
|
// --- Step 2: Resolve internal DB endpoint -------------------------
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"[Zitadel] Resolving internal endpoint for cluster '{}'",
|
||||||
|
pg_config.cluster_name
|
||||||
|
);
|
||||||
|
|
||||||
|
let endpoint = topology.get_endpoint(&pg_config).await.map_err(|e| {
|
||||||
|
let msg = format!(
|
||||||
|
"[Zitadel] Failed to resolve endpoint for cluster '{}': {e}",
|
||||||
|
pg_config.cluster_name
|
||||||
|
);
|
||||||
|
error!("{msg}");
|
||||||
|
InterpretError::new(msg)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"[Zitadel] DB endpoint resolved — host: '{}', port: {}",
|
||||||
|
endpoint.host, endpoint.port
|
||||||
|
);
|
||||||
|
|
||||||
|
// The CNPG-managed superuser secret contains 'password', 'username',
|
||||||
|
// 'host', 'port', 'dbname', 'uri'. We reference 'password' directly
|
||||||
|
// via env.valueFrom.secretKeyRef because CNPG's key names do not
|
||||||
|
// match ZITADEL's required env var names.
|
||||||
|
let pg_user_secret = format!("{PG_CLUSTER_NAME}-app");
|
||||||
|
let pg_superuser_secret = format!("{PG_CLUSTER_NAME}-superuser");
|
||||||
|
let db_host = &endpoint.host;
|
||||||
|
let db_port = endpoint.port;
|
||||||
let host = &self.host;
|
let host = &self.host;
|
||||||
|
|
||||||
let values_yaml = Some(format!(r#""#));
|
debug!(
|
||||||
|
"[Zitadel] DB credentials source — secret: '{pg_user_secret}', key: 'password'"
|
||||||
|
);
|
||||||
|
debug!(
|
||||||
|
"[Zitadel] DB credentials source — superuser secret: '{pg_superuser_secret}', key: 'password'"
|
||||||
|
);
|
||||||
|
|
||||||
todo!("This is not complete yet");
|
// --- Step 3: Create masterkey secret ------------------------------------
|
||||||
|
|
||||||
HelmChartScore {
|
debug!(
|
||||||
namespace: Some(NonBlankString::from_str("zitadel").unwrap()),
|
"[Zitadel] Creating masterkey secret '{}' in namespace '{}'",
|
||||||
|
MASTERKEY_SECRET_NAME, NAMESPACE
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
// Masterkey for symmetric encryption — must be exactly 32 ASCII bytes.
|
||||||
|
let masterkey: String = thread_rng()
|
||||||
|
.sample_iter(&Alphanumeric)
|
||||||
|
.take(32)
|
||||||
|
.map(char::from)
|
||||||
|
.collect();
|
||||||
|
let masterkey_bytes = BASE64_STANDARD.encode(&masterkey);
|
||||||
|
|
||||||
|
let mut masterkey_data: BTreeMap<String, ByteString> = BTreeMap::new();
|
||||||
|
masterkey_data.insert("masterkey".to_string(), ByteString(masterkey_bytes.into()));
|
||||||
|
|
||||||
|
let masterkey_secret = Secret {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(MASTERKEY_SECRET_NAME.to_string()),
|
||||||
|
namespace: Some(NAMESPACE.to_string()),
|
||||||
|
..ObjectMeta::default()
|
||||||
|
},
|
||||||
|
data: Some(masterkey_data),
|
||||||
|
..Secret::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
topology
|
||||||
|
.k8s_client()
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(format!("Failed to get k8s client : {e}")))
|
||||||
|
.create(masterkey_secret)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
K8sResourceScore::single(masterkey_secret, Some(NAMESPACE.to_string()))
|
||||||
|
.interpret(inventory, topology)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
let msg = format!("[Zitadel] Failed to create masterkey secret: {e}");
|
||||||
|
error!("{msg}");
|
||||||
|
InterpretError::new(msg)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"[Zitadel] Masterkey secret '{}' created",
|
||||||
|
MASTERKEY_SECRET_NAME
|
||||||
|
);
|
||||||
|
|
||||||
|
// --- Step 4: Build Helm values ------------------------------------
|
||||||
|
|
||||||
|
warn!(
|
||||||
|
"[Zitadel] No ingress controller annotations are set. \
|
||||||
|
Add controller-specific annotations for your distribution: \
|
||||||
|
NGINX → 'nginx.ingress.kubernetes.io/backend-protocol: GRPC'; \
|
||||||
|
OpenShift HAProxy → 'haproxy.router.openshift.io/*' or use Routes; \
|
||||||
|
AWS ALB → set ingress.controller=aws."
|
||||||
|
);
|
||||||
|
|
||||||
|
let values_yaml = format!(
|
||||||
|
r#"zitadel:
|
||||||
|
masterkeySecretName: "{MASTERKEY_SECRET_NAME}"
|
||||||
|
configmapConfig:
|
||||||
|
ExternalDomain: "{host}"
|
||||||
|
ExternalSecure: true
|
||||||
|
TLS:
|
||||||
|
Enabled: false
|
||||||
|
Database:
|
||||||
|
Postgres:
|
||||||
|
Host: "{db_host}"
|
||||||
|
Port: {db_port}
|
||||||
|
Database: zitadel
|
||||||
|
MaxOpenConns: 20
|
||||||
|
MaxIdleConns: 10
|
||||||
|
User:
|
||||||
|
Username: postgres
|
||||||
|
SSL:
|
||||||
|
Mode: require
|
||||||
|
Admin:
|
||||||
|
Username: postgres
|
||||||
|
SSL:
|
||||||
|
Mode: require
|
||||||
|
# Directly import credentials from the postgres secret
|
||||||
|
# TODO : use a less privileged postgres user
|
||||||
|
env:
|
||||||
|
- name: ZITADEL_DATABASE_POSTGRES_USER_USERNAME
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: "{pg_superuser_secret}"
|
||||||
|
key: user
|
||||||
|
- name: ZITADEL_DATABASE_POSTGRES_USER_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: "{pg_superuser_secret}"
|
||||||
|
key: password
|
||||||
|
- name: ZITADEL_DATABASE_POSTGRES_ADMIN_USERNAME
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: "{pg_superuser_secret}"
|
||||||
|
key: user
|
||||||
|
- name: ZITADEL_DATABASE_POSTGRES_ADMIN_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: "{pg_superuser_secret}"
|
||||||
|
key: password
|
||||||
|
# Security context for OpenShift restricted PSA compliance
|
||||||
|
podSecurityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: null
|
||||||
|
fsGroup: null
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: null
|
||||||
|
fsGroup: null
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
# Init job security context (runs before main deployment)
|
||||||
|
initJob:
|
||||||
|
podSecurityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: null
|
||||||
|
fsGroup: null
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: null
|
||||||
|
fsGroup: null
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
# Setup job security context
|
||||||
|
setupJob:
|
||||||
|
podSecurityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: null
|
||||||
|
fsGroup: null
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: null
|
||||||
|
fsGroup: null
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
ingress:
|
||||||
|
enabled: true
|
||||||
|
annotations: {{}}
|
||||||
|
hosts:
|
||||||
|
- host: "{host}"
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
login:
|
||||||
|
enabled: true
|
||||||
|
podSecurityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: null
|
||||||
|
fsGroup: null
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: null
|
||||||
|
fsGroup: null
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
ingress:
|
||||||
|
enabled: true
|
||||||
|
annotations: {{}}
|
||||||
|
hosts:
|
||||||
|
- host: "{host}"
|
||||||
|
paths:
|
||||||
|
- path: /ui/v2/login
|
||||||
|
pathType: Prefix"#
|
||||||
|
);
|
||||||
|
|
||||||
|
trace!("[Zitadel] Helm values YAML:\n{values_yaml}");
|
||||||
|
|
||||||
|
// --- Step 5: Deploy Helm chart ------------------------------------
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"[Zitadel] Deploying Helm chart 'zitadel/zitadel' as release 'zitadel' in namespace '{NAMESPACE}'"
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = HelmChartScore {
|
||||||
|
namespace: Some(NonBlankString::from_str(NAMESPACE).unwrap()),
|
||||||
release_name: NonBlankString::from_str("zitadel").unwrap(),
|
release_name: NonBlankString::from_str("zitadel").unwrap(),
|
||||||
chart_name: NonBlankString::from_str("zitadel/zitadel").unwrap(),
|
chart_name: NonBlankString::from_str("zitadel/zitadel").unwrap(),
|
||||||
chart_version: None,
|
chart_version: None,
|
||||||
values_overrides: None,
|
values_overrides: None,
|
||||||
values_yaml,
|
values_yaml: Some(values_yaml),
|
||||||
create_namespace: true,
|
create_namespace: true,
|
||||||
install_only: false,
|
install_only: false,
|
||||||
repository: Some(HelmRepository::new(
|
repository: Some(HelmRepository::new(
|
||||||
@@ -46,6 +390,30 @@ impl<T: Topology + K8sclient + HelmCommand> Score<T> for ZitadelScore {
|
|||||||
true,
|
true,
|
||||||
)),
|
)),
|
||||||
}
|
}
|
||||||
.create_interpret()
|
.interpret(inventory, topology)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match &result {
|
||||||
|
Ok(_) => info!("[Zitadel] Helm chart deployed successfully"),
|
||||||
|
Err(e) => error!("[Zitadel] Helm chart deployment failed: {e}"),
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_name(&self) -> InterpretName {
|
||||||
|
InterpretName::Custom("Zitadel")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_version(&self) -> Version {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_status(&self) -> InterpretStatus {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_children(&self) -> Vec<Id> {
|
||||||
|
vec![]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,5 +13,3 @@ env_logger.workspace = true
|
|||||||
log.workspace = true
|
log.workspace = true
|
||||||
tokio.workspace = true
|
tokio.workspace = true
|
||||||
reqwest.workspace = true
|
reqwest.workspace = true
|
||||||
chrono.workspace = true
|
|
||||||
tower = "0.5.3"
|
|
||||||
|
|||||||
@@ -4,11 +4,10 @@
|
|||||||
|
|
||||||
Designed for **bare-metal Kubernetes clusters** with external load balancers (HAProxy, OPNsense, F5, etc.).
|
Designed for **bare-metal Kubernetes clusters** with external load balancers (HAProxy, OPNsense, F5, etc.).
|
||||||
|
|
||||||
Exposes a simple HTTP endpoint (`/health`) on each node:
|
It exposes a simple, reliable HTTP endpoint (`/health`) on each node that returns:
|
||||||
|
|
||||||
- **200 OK** — node is healthy and ready to receive traffic
|
- **200 OK** — node is healthy and ready to receive traffic
|
||||||
- **503 Service Unavailable** — node should be removed from the load balancer pool
|
- **503 Service Unavailable** — node should be removed from the load balancer pool
|
||||||
- **500 Internal Server Error** — misconfiguration (e.g. `NODE_NAME` not set)
|
|
||||||
|
|
||||||
This project is **not dependent on Harmony**, but is commonly used as part of Harmony bare-metal Kubernetes deployments.
|
This project is **not dependent on Harmony**, but is commonly used as part of Harmony bare-metal Kubernetes deployments.
|
||||||
|
|
||||||
@@ -17,181 +16,199 @@ This project is **not dependent on Harmony**, but is commonly used as part of Ha
|
|||||||
In bare-metal environments, external load balancers often rely on pod-level or router-level checks that can lag behind the authoritative Kubernetes `Node.status.conditions[Ready]`.
|
In bare-metal environments, external load balancers often rely on pod-level or router-level checks that can lag behind the authoritative Kubernetes `Node.status.conditions[Ready]`.
|
||||||
This service provides the true source-of-truth with fast reaction time.
|
This service provides the true source-of-truth with fast reaction time.
|
||||||
|
|
||||||
## Available checks
|
## Features & Roadmap
|
||||||
|
|
||||||
| Check name | Description | Status |
|
| Check | Description | Status | Check Name |
|
||||||
|--------------------|-------------------------------------------------------------|-------------------|
|
|------------------------------------|--------------------------------------------------|---------------------|--------------------|
|
||||||
| `node_ready` | Queries `Node.status.conditions[Ready]` via Kubernetes API | Implemented |
|
| **Node readiness (API)** | Queries `Node.status.conditions[Ready]` via Kubernetes API | **Implemented** | `node_ready` |
|
||||||
| `okd_router_1936` | Probes OpenShift router `/healthz/ready` on port 1936 | Implemented |
|
| **OKD Router health** | Probes OpenShift router healthz on port 1936 | **Implemented** | `okd_router_1936` |
|
||||||
| `filesystem_ro` | Detects read-only mounts via `/proc/mounts` | To be implemented |
|
| Filesystem readonly | Detects read-only mounts via `/proc/mounts` | To be implemented | `filesystem_ro` |
|
||||||
| `kubelet` | Local probe to kubelet `/healthz` (port 10248) | To be implemented |
|
| Kubelet running | Local probe to kubelet `/healthz` (port 10248) | To be implemented | `kubelet` |
|
||||||
| `container_runtime`| Socket check + runtime status | To be implemented |
|
| CRI-O / container runtime health | Socket check + runtime status | To be implemented | `container_runtime`|
|
||||||
| `disk_pressure` | Threshold checks on key filesystems | To be implemented |
|
| Disk / inode pressure | Threshold checks on key filesystems | To be implemented | `disk_pressure` |
|
||||||
| `network` | DNS resolution + gateway connectivity | To be implemented |
|
| Network reachability | DNS resolution + gateway connectivity | To be implemented | `network` |
|
||||||
| `custom_conditions`| Reacts to extra conditions (NPD, etc.) | To be implemented |
|
| Custom NodeConditions | Reacts to extra conditions (NPD, etc.) | To be implemented | `custom_conditions`|
|
||||||
|
|
||||||
All checks are combined with logical **AND** — any single failure results in 503.
|
All checks are combined with logical **AND** — any failure results in 503.
|
||||||
|
|
||||||
## Behavior
|
|
||||||
|
|
||||||
### `node_ready` check — fail-open design
|
|
||||||
|
|
||||||
The `node_ready` check queries the Kubernetes API server to read `Node.status.conditions[Ready]`.
|
|
||||||
Because this service runs on the node it is checking, there are scenarios where the API server is temporarily
|
|
||||||
unreachable (e.g. during a control-plane restart). To avoid incorrectly draining a healthy node in such cases,
|
|
||||||
the check is **fail-open**: it passes (reports ready) whenever the Kubernetes API is unavailable.
|
|
||||||
|
|
||||||
| Situation | Result | HTTP status |
|
|
||||||
|------------------------------------------------------|-------------------|-------------|
|
|
||||||
| `Node.conditions[Ready] == True` | Pass | 200 |
|
|
||||||
| `Node.conditions[Ready] == False` | Fail | 503 |
|
|
||||||
| `Ready` condition absent | Fail | 503 |
|
|
||||||
| API server unreachable or timed out (1 s timeout) | Pass (assumes ready) | 200 |
|
|
||||||
| Kubernetes client initialization failed | Pass (assumes ready) | 200 |
|
|
||||||
| `NODE_NAME` env var not set | Hard error | 500 |
|
|
||||||
|
|
||||||
A warning is logged whenever the API is unavailable and the check falls back to assuming ready.
|
|
||||||
|
|
||||||
### `okd_router_1936` check
|
|
||||||
|
|
||||||
Sends `GET http://127.0.0.1:1936/healthz/ready` with a 5-second timeout.
|
|
||||||
Returns pass on any 2xx response, fail otherwise.
|
|
||||||
|
|
||||||
### Unknown check names
|
|
||||||
|
|
||||||
Requesting an unknown check name (e.g. `check=bogus`) results in that check returning `passed: false`
|
|
||||||
with reason `"Unknown check: bogus"`, and the overall response is 503.
|
|
||||||
|
|
||||||
## How it works
|
## How it works
|
||||||
|
|
||||||
### Node name discovery
|
### Node Name Discovery
|
||||||
|
The service automatically discovers its own node name using the **Kubernetes Downward API**:
|
||||||
The service reads the `NODE_NAME` environment variable, which must be injected via the Kubernetes Downward API:
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
env:
|
env:
|
||||||
- name: NODE_NAME
|
- name: NODE_NAME
|
||||||
valueFrom:
|
valueFrom:
|
||||||
fieldRef:
|
fieldRef:
|
||||||
fieldPath: spec.nodeName
|
fieldPath: metadata.name
|
||||||
```
|
```
|
||||||
|
|
||||||
### Kubernetes API authentication
|
### Kubernetes API Authentication
|
||||||
|
|
||||||
- Uses standard **in-cluster configuration** — no external credentials needed.
|
- Uses standard **in-cluster configuration** (no external credentials needed).
|
||||||
- The ServiceAccount token and CA certificate are automatically mounted at `/var/run/secrets/kubernetes.io/serviceaccount/`.
|
- The ServiceAccount token and CA certificate are automatically mounted by Kubernetes at `/var/run/secrets/kubernetes.io/serviceaccount/`.
|
||||||
- Requires only minimal RBAC: `get` and `list` on the `nodes` resource (see `deploy/resources.yaml`).
|
- The application (via `kube-rs` or your Harmony higher-level client) calls the equivalent of `Config::incluster_config()`.
|
||||||
- Connect and write timeouts are set to **1 second** to keep checks fast.
|
- Requires only minimal RBAC: `get` permission on the `nodes` resource (see `deploy/rbac.yaml`).
|
||||||
|
|
||||||
## Deploy
|
## Quick Start
|
||||||
|
|
||||||
All Kubernetes resources (Namespace, ServiceAccount, ClusterRole, ClusterRoleBinding, and an OpenShift SCC RoleBinding for `hostnetwork`) are in a single file.
|
|
||||||
|
|
||||||
|
### 1. Build and push
|
||||||
```bash
|
```bash
|
||||||
kubectl apply -f deploy/resources.yaml
|
cargo build --release --bin harmony-node-readiness-endpoint
|
||||||
|
|
||||||
|
docker build -t your-registry/harmony-node-readiness-endpoint:v1.0.0 .
|
||||||
|
docker push your-registry/harmony-node-readiness-endpoint:v1.0.0
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Deploy
|
||||||
|
```bash
|
||||||
|
kubectl apply -f deploy/namespace.yaml
|
||||||
|
kubectl apply -f deploy/rbac.yaml
|
||||||
kubectl apply -f deploy/daemonset.yaml
|
kubectl apply -f deploy/daemonset.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
The DaemonSet uses `hostNetwork: true` and `hostPort: 25001`, so the endpoint is reachable directly on the node's IP at port 25001.
|
(The DaemonSet uses `hostPort: 25001` by default so the endpoint is reachable directly on the node's IP.)
|
||||||
It tolerates all taints, ensuring it runs even on nodes marked unschedulable.
|
|
||||||
|
|
||||||
### Configure your external load balancer
|
### 3. Configure your external load balancer
|
||||||
|
|
||||||
**Example for HAProxy / OPNsense:**
|
**Example for HAProxy / OPNsense:**
|
||||||
- Check type: **HTTP**
|
- Check type: **HTTP**
|
||||||
- URI: `/health`
|
- URI: `/health`
|
||||||
- Port: `25001` (configurable via `LISTEN_PORT` env var)
|
- Port: `25001` (configurable via `LISTEN_PORT`)
|
||||||
- Interval: 5–10 s
|
- Interval: 5–10 s
|
||||||
- Rise: 2
|
- Rise: 2
|
||||||
- Fall: 3
|
- Fall: 3
|
||||||
- Expect: `2xx`
|
- Expect: `2xx`
|
||||||
|
|
||||||
## Endpoint usage
|
## Health Endpoint Examples
|
||||||
|
|
||||||
### Query parameter
|
### Query Parameter
|
||||||
|
|
||||||
Use the `check` query parameter to select which checks to run (comma-separated).
|
Use the `check` query parameter to specify which checks to run. Multiple checks can be comma-separated.
|
||||||
When omitted, only `node_ready` runs.
|
|
||||||
|
|
||||||
| Request | Checks run |
|
| Request | Behavior |
|
||||||
|------------------------------------------------|-----------------------------------|
|
|--------------------------------------|---------------------------------------------|
|
||||||
| `GET /health` | `node_ready` |
|
| `GET /health` | Runs `node_ready` (default) |
|
||||||
| `GET /health?check=okd_router_1936` | `okd_router_1936` only |
|
| `GET /health?check=okd_router_1936` | Runs only OKD router check |
|
||||||
| `GET /health?check=node_ready,okd_router_1936` | `node_ready` and `okd_router_1936`|
|
| `GET /health?check=node_ready,okd_router_1936` | Runs both checks |
|
||||||
|
|
||||||
> **Note:** specifying `check=` replaces the default. Include `node_ready` explicitly if you need it alongside other checks.
|
**Note:** When the `check` parameter is provided, only the specified checks run. You must explicitly include `node_ready` if you want it along with other checks.
|
||||||
|
|
||||||
### Response format
|
### Response Format
|
||||||
|
|
||||||
```json
|
Each check result includes:
|
||||||
{
|
- `name`: The check identifier
|
||||||
"status": "ready" | "not-ready",
|
- `passed`: Boolean indicating success or failure
|
||||||
"checks": [
|
- `reason`: (Optional) Failure reason if the check failed
|
||||||
{
|
- `duration_ms`: Time taken to execute the check in milliseconds
|
||||||
"name": "<check-name>",
|
|
||||||
"passed": true | false,
|
|
||||||
"reason": "<failure reason, omitted on success>",
|
|
||||||
"duration_ms": 42
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"total_duration_ms": 42
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Healthy node (default)**
|
**Healthy node (default check)**
|
||||||
```http
|
```http
|
||||||
HTTP/1.1 200 OK
|
HTTP/1.1 200 OK
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
"checks": [{ "name": "node_ready", "passed": true, "duration_ms": 42 }],
|
**Healthy node (multiple checks)**
|
||||||
"total_duration_ms": 42
|
```http
|
||||||
|
GET /health?check=node_ready,okd_router_1936
|
||||||
|
|
||||||
|
HTTP/1.1 200 OK
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**Unhealthy node (one check failed)**
|
**Unhealthy node (one check failed)**
|
||||||
**Unhealthy node**
|
```http
|
||||||
GET /health?check=node_ready,okd_router_1936
|
GET /health?check=node_ready,okd_router_1936
|
||||||
|
|
||||||
|
HTTP/1.1 503 Service Unavailable
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
**Unhealthy node (default check)**
|
||||||
|
```http
|
||||||
|
HTTP/1.1 503 Service Unavailable
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration (via DaemonSet env vars)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
env:
|
||||||
|
- name: NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: LISTEN_PORT
|
||||||
|
value: "25001"
|
||||||
|
```
|
||||||
|
|
||||||
|
Checks are selected via the `check` query parameter on the `/health` endpoint. See the usage examples above.
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Run locally (set NODE_NAME env var)
|
# Run locally (set NODE_NAME env var)
|
||||||
NODE_NAME=my-test-node cargo run
|
NODE_NAME=my-test-node cargo run
|
||||||
{ "name": "node_ready", "passed": false, "reason": "KubeletNotReady", "duration_ms": 35 }
|
```
|
||||||
],
|
|
||||||
"total_duration_ms": 35
|
---
|
||||||
|
|
||||||
|
*Minimal, auditable, and built for production bare-metal Kubernetes environments.*
|
||||||
|
|
||||||
|
"name": "okd_router_1936",
|
||||||
|
"passed": false,
|
||||||
|
"reason": "Failed to connect to OKD router: connection refused",
|
||||||
|
"duration_ms": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
**API server unreachable (fail-open)**
|
**Unhealthy node (default check)**
|
||||||
```http
|
```http
|
||||||
HTTP/1.1 200 OK
|
HTTP/1.1 503 Service Unavailable
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
{
|
{
|
||||||
"status": "ready",
|
"status": "not-ready",
|
||||||
"checks": [{ "name": "node_ready", "passed": true, "duration_ms": 1001 }],
|
"checks": [
|
||||||
"total_duration_ms": 1001
|
{
|
||||||
|
"name": "node_ready",
|
||||||
|
"passed": false,
|
||||||
|
"reason": "KubeletNotReady",
|
||||||
|
"duration_ms": 35
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
*(A warning is logged: `Kubernetes API appears to be down … Assuming node is ready.`)*
|
|
||||||
|
|
||||||
## Configuration
|
## Configuration (via DaemonSet env vars)
|
||||||
|
|
||||||
| Env var | Default | Description |
|
```yaml
|
||||||
|---------------|----------|--------------------------------------|
|
env:
|
||||||
| `NODE_NAME` | required | Node name, injected via Downward API |
|
- name: NODE_NAME
|
||||||
| `LISTEN_PORT` | `25001` | TCP port the HTTP server binds to |
|
valueFrom:
|
||||||
| `RUST_LOG` | — | Log level (e.g. `info`, `debug`) |
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: LISTEN_PORT
|
||||||
|
value: "25001"
|
||||||
|
```
|
||||||
|
|
||||||
|
Checks are selected via the `check` query parameter on the `/health` endpoint. See the usage examples above.
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Run locally
|
# Run locally (set NODE_NAME env var)
|
||||||
NODE_NAME=my-test-node cargo run
|
NODE_NAME=my-test-node cargo run
|
||||||
|
|
||||||
# Run tests
|
|
||||||
cargo test
|
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
*Minimal, auditable, and built for production bare-metal Kubernetes environments.*
|
*Minimal, auditable, and built for production bare-metal Kubernetes environments.*
|
||||||
|
|
||||||
|
|||||||
0
harmony_node_readiness/build-docker.sh
Executable file → Normal file
0
harmony_node_readiness/build-docker.sh
Executable file → Normal file
@@ -27,8 +27,8 @@ spec:
|
|||||||
fieldRef:
|
fieldRef:
|
||||||
fieldPath: spec.nodeName
|
fieldPath: spec.nodeName
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 25001
|
- containerPort: 8080
|
||||||
hostPort: 25001
|
hostPort: 8080
|
||||||
name: health-port
|
name: health-port
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
|
|||||||
@@ -1,16 +1,13 @@
|
|||||||
use actix_web::{App, HttpResponse, HttpServer, Responder, get, web};
|
use actix_web::{App, HttpResponse, HttpServer, Responder, get, web};
|
||||||
use k8s_openapi::api::core::v1::Node;
|
use k8s_openapi::api::core::v1::Node;
|
||||||
use kube::{Api, Client, Config};
|
use kube::{Api, Client};
|
||||||
|
|
||||||
use log::{debug, error, info, warn};
|
use log::{debug, error, info, warn};
|
||||||
use reqwest;
|
use reqwest;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::Instant;
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
|
|
||||||
const K8S_CLIENT_TIMEOUT: Duration = Duration::from_secs(1);
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
struct HealthStatus {
|
struct HealthStatus {
|
||||||
status: String,
|
status: String,
|
||||||
@@ -43,16 +40,10 @@ struct HealthQuery {
|
|||||||
async fn check_node_ready(client: Client, node_name: &str) -> Result<(), String> {
|
async fn check_node_ready(client: Client, node_name: &str) -> Result<(), String> {
|
||||||
let nodes: Api<Node> = Api::all(client);
|
let nodes: Api<Node> = Api::all(client);
|
||||||
|
|
||||||
let node = match nodes.get(node_name).await {
|
let node = nodes
|
||||||
Ok(n) => n,
|
.get(node_name)
|
||||||
Err(e) => {
|
.await
|
||||||
warn!(
|
.map_err(|e| format!("Failed to get node '{}': {}", node_name, e))?;
|
||||||
"Kubernetes API appears to be down, unreachable, or timed out for node '{}': {}. Assuming node is ready.",
|
|
||||||
node_name, e
|
|
||||||
);
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let conditions = node.status.and_then(|s| s.conditions).unwrap_or_default();
|
let conditions = node.status.and_then(|s| s.conditions).unwrap_or_default();
|
||||||
|
|
||||||
@@ -113,13 +104,7 @@ async fn run_check(check_name: &str, client: Option<Client>, node_name: &str) ->
|
|||||||
let result = match check_name {
|
let result = match check_name {
|
||||||
"node_ready" => match client {
|
"node_ready" => match client {
|
||||||
Some(c) => check_node_ready(c, node_name).await,
|
Some(c) => check_node_ready(c, node_name).await,
|
||||||
None => {
|
None => Err("Kubernetes client not available".to_string()),
|
||||||
warn!(
|
|
||||||
"Kubernetes client not available for node '{}'. Assuming node is ready.",
|
|
||||||
node_name
|
|
||||||
);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"okd_router_1936" => check_okd_router_1936().await,
|
"okd_router_1936" => check_okd_router_1936().await,
|
||||||
_ => Err(format!("Unknown check: {}", check_name)),
|
_ => Err(format!("Unknown check: {}", check_name)),
|
||||||
@@ -164,30 +149,16 @@ async fn health(query: web::Query<HealthQuery>) -> impl Responder {
|
|||||||
|
|
||||||
// Initialize Kubernetes client only if needed
|
// Initialize Kubernetes client only if needed
|
||||||
let k8s_client = if needs_k8s_client {
|
let k8s_client = if needs_k8s_client {
|
||||||
match Config::infer().await {
|
match Client::try_default().await {
|
||||||
Ok(mut config) => {
|
Ok(c) => Some(c),
|
||||||
config.write_timeout = Some(K8S_CLIENT_TIMEOUT);
|
|
||||||
config.connect_timeout = Some(K8S_CLIENT_TIMEOUT);
|
|
||||||
Some(Client::try_from(config).map_err(|e| e.to_string()))
|
|
||||||
}
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!(
|
error!("Failed to create Kubernetes client: {}", e);
|
||||||
"Failed to infer Kubernetes config for node '{}': {}. Assuming node_ready is healthy.",
|
return HttpResponse::InternalServerError().json(HealthError {
|
||||||
node_name, e
|
status: "error".to_string(),
|
||||||
);
|
error: format!("Failed to create Kubernetes client: {}", e),
|
||||||
None
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
.and_then(|result| match result {
|
|
||||||
Ok(client) => Some(client),
|
|
||||||
Err(e) => {
|
|
||||||
warn!(
|
|
||||||
"Failed to create Kubernetes client for node '{}': {}. Assuming node_ready is healthy.",
|
|
||||||
node_name, e
|
|
||||||
);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
@@ -255,28 +226,7 @@ async fn main() -> std::io::Result<()> {
|
|||||||
info!("Starting harmony-node-readiness-endpoint on {}", bind_addr);
|
info!("Starting harmony-node-readiness-endpoint on {}", bind_addr);
|
||||||
|
|
||||||
HttpServer::new(|| App::new().service(health))
|
HttpServer::new(|| App::new().service(health))
|
||||||
.workers(3)
|
|
||||||
.bind(&bind_addr)?
|
.bind(&bind_addr)?
|
||||||
.run()
|
.run()
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use kube::error::ErrorResponse;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_checks_defaults_to_node_ready() {
|
|
||||||
assert_eq!(parse_checks(None), vec!["node_ready"]);
|
|
||||||
assert_eq!(parse_checks(Some("")), vec!["node_ready"]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_checks_splits_and_trims_values() {
|
|
||||||
assert_eq!(
|
|
||||||
parse_checks(Some("node_ready, okd_router_1936 ")),
|
|
||||||
vec!["node_ready", "okd_router_1936"]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user