Compare commits

...

7 Commits

7 changed files with 176 additions and 139 deletions

6
Cargo.lock generated
View File

@@ -2779,6 +2779,7 @@ name = "harmony-node-readiness-endpoint"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"chrono",
"env_logger", "env_logger",
"k8s-openapi", "k8s-openapi",
"kube", "kube",
@@ -2787,6 +2788,7 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"tokio", "tokio",
"tower",
] ]
[[package]] [[package]]
@@ -6915,9 +6917,9 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]] [[package]]
name = "tower" name = "tower"
version = "0.5.2" version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-util", "futures-util",

View File

@@ -2,7 +2,6 @@
resolver = "2" resolver = "2"
members = [ members = [
"private_repos/*", "private_repos/*",
"examples/*",
"harmony", "harmony",
"harmony_types", "harmony_types",
"harmony_macros", "harmony_macros",
@@ -20,6 +19,7 @@ members = [
"brocade", "brocade",
"harmony_agent", "harmony_agent",
"harmony_agent/deploy", "harmony_node_readiness", "harmony_agent/deploy", "harmony_node_readiness",
"examples/*",
] ]
[workspace.package] [workspace.package]

View File

@@ -13,3 +13,5 @@ env_logger.workspace = true
log.workspace = true log.workspace = true
tokio.workspace = true tokio.workspace = true
reqwest.workspace = true reqwest.workspace = true
chrono.workspace = true
tower = "0.5.3"

View File

@@ -4,10 +4,11 @@
Designed for **bare-metal Kubernetes clusters** with external load balancers (HAProxy, OPNsense, F5, etc.). Designed for **bare-metal Kubernetes clusters** with external load balancers (HAProxy, OPNsense, F5, etc.).
It exposes a simple, reliable HTTP endpoint (`/health`) on each node that returns: Exposes a simple HTTP endpoint (`/health`) on each node:
- **200 OK** — node is healthy and ready to receive traffic - **200 OK** — node is healthy and ready to receive traffic
- **503 Service Unavailable** — node should be removed from the load balancer pool - **503 Service Unavailable** — node should be removed from the load balancer pool
- **500 Internal Server Error** — misconfiguration (e.g. `NODE_NAME` not set)
This project is **not dependent on Harmony**, but is commonly used as part of Harmony bare-metal Kubernetes deployments. This project is **not dependent on Harmony**, but is commonly used as part of Harmony bare-metal Kubernetes deployments.
@@ -16,199 +17,181 @@ This project is **not dependent on Harmony**, but is commonly used as part of Ha
In bare-metal environments, external load balancers often rely on pod-level or router-level checks that can lag behind the authoritative Kubernetes `Node.status.conditions[Ready]`. In bare-metal environments, external load balancers often rely on pod-level or router-level checks that can lag behind the authoritative Kubernetes `Node.status.conditions[Ready]`.
This service provides the true source-of-truth with fast reaction time. This service provides the true source-of-truth with fast reaction time.
## Features & Roadmap ## Available checks
| Check | Description | Status | Check Name | | Check name | Description | Status |
|------------------------------------|--------------------------------------------------|---------------------|--------------------| |--------------------|-------------------------------------------------------------|-------------------|
| **Node readiness (API)** | Queries `Node.status.conditions[Ready]` via Kubernetes API | **Implemented** | `node_ready` | | `node_ready` | Queries `Node.status.conditions[Ready]` via Kubernetes API | Implemented |
| **OKD Router health** | Probes OpenShift router healthz on port 1936 | **Implemented** | `okd_router_1936` | | `okd_router_1936` | Probes OpenShift router `/healthz/ready` on port 1936 | Implemented |
| Filesystem readonly | Detects read-only mounts via `/proc/mounts` | To be implemented | `filesystem_ro` | | `filesystem_ro` | Detects read-only mounts via `/proc/mounts` | To be implemented |
| Kubelet running | Local probe to kubelet `/healthz` (port 10248) | To be implemented | `kubelet` | | `kubelet` | Local probe to kubelet `/healthz` (port 10248) | To be implemented |
| CRI-O / container runtime health | Socket check + runtime status | To be implemented | `container_runtime`| | `container_runtime`| Socket check + runtime status | To be implemented |
| Disk / inode pressure | Threshold checks on key filesystems | To be implemented | `disk_pressure` | | `disk_pressure` | Threshold checks on key filesystems | To be implemented |
| Network reachability | DNS resolution + gateway connectivity | To be implemented | `network` | | `network` | DNS resolution + gateway connectivity | To be implemented |
| Custom NodeConditions | Reacts to extra conditions (NPD, etc.) | To be implemented | `custom_conditions`| | `custom_conditions`| Reacts to extra conditions (NPD, etc.) | To be implemented |
All checks are combined with logical **AND** — any failure results in 503. All checks are combined with logical **AND** — any single failure results in 503.
## Behavior
### `node_ready` check — fail-open design
The `node_ready` check queries the Kubernetes API server to read `Node.status.conditions[Ready]`.
Because this service runs on the node it is checking, there are scenarios where the API server is temporarily
unreachable (e.g. during a control-plane restart). To avoid incorrectly draining a healthy node in such cases,
the check is **fail-open**: it passes (reports ready) whenever the Kubernetes API is unavailable.
| Situation | Result | HTTP status |
|------------------------------------------------------|-------------------|-------------|
| `Node.conditions[Ready] == True` | Pass | 200 |
| `Node.conditions[Ready] == False` | Fail | 503 |
| `Ready` condition absent | Fail | 503 |
| API server unreachable or timed out (1 s timeout) | Pass (assumes ready) | 200 |
| Kubernetes client initialization failed | Pass (assumes ready) | 200 |
| `NODE_NAME` env var not set | Hard error | 500 |
A warning is logged whenever the API is unavailable and the check falls back to assuming ready.
### `okd_router_1936` check
Sends `GET http://127.0.0.1:1936/healthz/ready` with a 5-second timeout.
Returns pass on any 2xx response, fail otherwise.
### Unknown check names
Requesting an unknown check name (e.g. `check=bogus`) results in that check returning `passed: false`
with reason `"Unknown check: bogus"`, and the overall response is 503.
## How it works ## How it works
### Node Name Discovery ### Node name discovery
The service automatically discovers its own node name using the **Kubernetes Downward API**:
The service reads the `NODE_NAME` environment variable, which must be injected via the Kubernetes Downward API:
```yaml ```yaml
env: env:
- name: NODE_NAME - name: NODE_NAME
valueFrom: valueFrom:
fieldRef: fieldRef:
fieldPath: metadata.name fieldPath: spec.nodeName
``` ```
### Kubernetes API Authentication ### Kubernetes API authentication
- Uses standard **in-cluster configuration** (no external credentials needed). - Uses standard **in-cluster configuration** no external credentials needed.
- The ServiceAccount token and CA certificate are automatically mounted by Kubernetes at `/var/run/secrets/kubernetes.io/serviceaccount/`. - The ServiceAccount token and CA certificate are automatically mounted at `/var/run/secrets/kubernetes.io/serviceaccount/`.
- The application (via `kube-rs` or your Harmony higher-level client) calls the equivalent of `Config::incluster_config()`. - Requires only minimal RBAC: `get` and `list` on the `nodes` resource (see `deploy/resources.yaml`).
- Requires only minimal RBAC: `get` permission on the `nodes` resource (see `deploy/rbac.yaml`). - Connect and write timeouts are set to **1 second** to keep checks fast.
## Quick Start ## Deploy
All Kubernetes resources (Namespace, ServiceAccount, ClusterRole, ClusterRoleBinding, and an OpenShift SCC RoleBinding for `hostnetwork`) are in a single file.
### 1. Build and push
```bash ```bash
cargo build --release --bin harmony-node-readiness-endpoint kubectl apply -f deploy/resources.yaml
docker build -t your-registry/harmony-node-readiness-endpoint:v1.0.0 .
docker push your-registry/harmony-node-readiness-endpoint:v1.0.0
```
### 2. Deploy
```bash
kubectl apply -f deploy/namespace.yaml
kubectl apply -f deploy/rbac.yaml
kubectl apply -f deploy/daemonset.yaml kubectl apply -f deploy/daemonset.yaml
``` ```
(The DaemonSet uses `hostPort: 25001` by default so the endpoint is reachable directly on the node's IP.) The DaemonSet uses `hostNetwork: true` and `hostPort: 25001`, so the endpoint is reachable directly on the node's IP at port 25001.
It tolerates all taints, ensuring it runs even on nodes marked unschedulable.
### 3. Configure your external load balancer ### Configure your external load balancer
**Example for HAProxy / OPNsense:** **Example for HAProxy / OPNsense:**
- Check type: **HTTP** - Check type: **HTTP**
- URI: `/health` - URI: `/health`
- Port: `25001` (configurable via `LISTEN_PORT`) - Port: `25001` (configurable via `LISTEN_PORT` env var)
- Interval: 510 s - Interval: 510 s
- Rise: 2 - Rise: 2
- Fall: 3 - Fall: 3
- Expect: `2xx` - Expect: `2xx`
## Health Endpoint Examples ## Endpoint usage
### Query Parameter ### Query parameter
Use the `check` query parameter to specify which checks to run. Multiple checks can be comma-separated. Use the `check` query parameter to select which checks to run (comma-separated).
When omitted, only `node_ready` runs.
| Request | Behavior | | Request | Checks run |
|--------------------------------------|---------------------------------------------| |------------------------------------------------|-----------------------------------|
| `GET /health` | Runs `node_ready` (default) | | `GET /health` | `node_ready` |
| `GET /health?check=okd_router_1936` | Runs only OKD router check | | `GET /health?check=okd_router_1936` | `okd_router_1936` only |
| `GET /health?check=node_ready,okd_router_1936` | Runs both checks | | `GET /health?check=node_ready,okd_router_1936` | `node_ready` and `okd_router_1936`|
**Note:** When the `check` parameter is provided, only the specified checks run. You must explicitly include `node_ready` if you want it along with other checks. > **Note:** specifying `check=` replaces the default. Include `node_ready` explicitly if you need it alongside other checks.
### Response Format ### Response format
Each check result includes:
- `name`: The check identifier
- `passed`: Boolean indicating success or failure
- `reason`: (Optional) Failure reason if the check failed
- `duration_ms`: Time taken to execute the check in milliseconds
**Healthy node (default check)**
```http
HTTP/1.1 200 OK
Content-Type: application/json
```json
{ {
"status": "ready" | "not-ready",
"checks": [ "checks": [
{ {
GET /health?check=node_ready,okd_router_1936 "name": "<check-name>",
"passed": true | false,
"reason": "<failure reason, omitted on success>",
"duration_ms": 42 "duration_ms": 42
} }
],
"total_duration_ms": 42
} }
``` ```
```http **Healthy node (default)**
```http ```http
HTTP/1.1 503 Service Unavailable
HTTP/1.1 200 OK HTTP/1.1 200 OK
{ {
"status": "ready", "status": "ready",
```http "checks": [{ "name": "node_ready", "passed": true, "duration_ms": 42 }],
HTTP/1.1 503 Service Unavailable "total_duration_ms": 42
Content-Type: application/json
```
## Configuration (via DaemonSet env vars)
```yaml
env:
- name: NODE_NAME
valueFrom:
} }
``` ```
value: "25001" **Unhealthy node**
```http ```http
Checks are selected via the `check` query parameter on the `/health` endpoint. See the usage examples above.
HTTP/1.1 503 Service Unavailable HTTP/1.1 503 Service Unavailable
## Development
{ {
"status": "not-ready", "status": "not-ready",
"checks": [ "checks": [
``` { "name": "node_ready", "passed": false, "reason": "KubeletNotReady", "duration_ms": 35 }
],
--- "total_duration_ms": 35
*Minimal, auditable, and built for production bare-metal Kubernetes environments.*
"name": "okd_router_1936",
"passed": false,
"reason": "Failed to connect to OKD router: connection refused",
"duration_ms": 5
}
]
} }
``` ```
**Unhealthy node (default check)** **API server unreachable (fail-open)**
```http ```http
HTTP/1.1 503 Service Unavailable HTTP/1.1 200 OK
Content-Type: application/json
{ {
"status": "not-ready", "status": "ready",
"checks": [ "checks": [{ "name": "node_ready", "passed": true, "duration_ms": 1001 }],
{ "total_duration_ms": 1001
"name": "node_ready",
"passed": false,
"reason": "KubeletNotReady",
"duration_ms": 35
}
]
} }
``` ```
*(A warning is logged: `Kubernetes API appears to be down … Assuming node is ready.`)*
## Configuration (via DaemonSet env vars) ## Configuration
```yaml | Env var | Default | Description |
env: |---------------|----------|--------------------------------------|
- name: NODE_NAME | `NODE_NAME` | required | Node name, injected via Downward API |
valueFrom: | `LISTEN_PORT` | `25001` | TCP port the HTTP server binds to |
fieldRef: | `RUST_LOG` | — | Log level (e.g. `info`, `debug`) |
fieldPath: metadata.name
- name: LISTEN_PORT
value: "25001"
```
Checks are selected via the `check` query parameter on the `/health` endpoint. See the usage examples above.
## Development ## Development
```bash ```bash
# Run locally (set NODE_NAME env var) # Run locally
NODE_NAME=my-test-node cargo run NODE_NAME=my-test-node cargo run
# Run tests
cargo test
``` ```
--- ---
*Minimal, auditable, and built for production bare-metal Kubernetes environments.* *Minimal, auditable, and built for production bare-metal Kubernetes environments.*

0
harmony_node_readiness/build-docker.sh Normal file → Executable file
View File

View File

@@ -27,8 +27,8 @@ spec:
fieldRef: fieldRef:
fieldPath: spec.nodeName fieldPath: spec.nodeName
ports: ports:
- containerPort: 8080 - containerPort: 25001
hostPort: 8080 hostPort: 25001
name: health-port name: health-port
resources: resources:
requests: requests:

View File

@@ -1,13 +1,16 @@
use actix_web::{App, HttpResponse, HttpServer, Responder, get, web}; use actix_web::{App, HttpResponse, HttpServer, Responder, get, web};
use k8s_openapi::api::core::v1::Node; use k8s_openapi::api::core::v1::Node;
use kube::{Api, Client}; use kube::{Api, Client, Config};
use log::{debug, error, info, warn}; use log::{debug, error, info, warn};
use reqwest; use reqwest;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::env; use std::env;
use std::time::Instant; use std::time::{Duration, Instant};
use tokio::task::JoinSet; use tokio::task::JoinSet;
const K8S_CLIENT_TIMEOUT: Duration = Duration::from_secs(1);
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
struct HealthStatus { struct HealthStatus {
status: String, status: String,
@@ -40,10 +43,16 @@ struct HealthQuery {
async fn check_node_ready(client: Client, node_name: &str) -> Result<(), String> { async fn check_node_ready(client: Client, node_name: &str) -> Result<(), String> {
let nodes: Api<Node> = Api::all(client); let nodes: Api<Node> = Api::all(client);
let node = nodes let node = match nodes.get(node_name).await {
.get(node_name) Ok(n) => n,
.await Err(e) => {
.map_err(|e| format!("Failed to get node '{}': {}", node_name, e))?; warn!(
"Kubernetes API appears to be down, unreachable, or timed out for node '{}': {}. Assuming node is ready.",
node_name, e
);
return Ok(());
}
};
let conditions = node.status.and_then(|s| s.conditions).unwrap_or_default(); let conditions = node.status.and_then(|s| s.conditions).unwrap_or_default();
@@ -104,7 +113,13 @@ async fn run_check(check_name: &str, client: Option<Client>, node_name: &str) ->
let result = match check_name { let result = match check_name {
"node_ready" => match client { "node_ready" => match client {
Some(c) => check_node_ready(c, node_name).await, Some(c) => check_node_ready(c, node_name).await,
None => Err("Kubernetes client not available".to_string()), None => {
warn!(
"Kubernetes client not available for node '{}'. Assuming node is ready.",
node_name
);
Ok(())
}
}, },
"okd_router_1936" => check_okd_router_1936().await, "okd_router_1936" => check_okd_router_1936().await,
_ => Err(format!("Unknown check: {}", check_name)), _ => Err(format!("Unknown check: {}", check_name)),
@@ -149,16 +164,30 @@ async fn health(query: web::Query<HealthQuery>) -> impl Responder {
// Initialize Kubernetes client only if needed // Initialize Kubernetes client only if needed
let k8s_client = if needs_k8s_client { let k8s_client = if needs_k8s_client {
match Client::try_default().await { match Config::infer().await {
Ok(c) => Some(c), Ok(mut config) => {
config.write_timeout = Some(K8S_CLIENT_TIMEOUT);
config.connect_timeout = Some(K8S_CLIENT_TIMEOUT);
Some(Client::try_from(config).map_err(|e| e.to_string()))
}
Err(e) => { Err(e) => {
error!("Failed to create Kubernetes client: {}", e); warn!(
return HttpResponse::InternalServerError().json(HealthError { "Failed to infer Kubernetes config for node '{}': {}. Assuming node_ready is healthy.",
status: "error".to_string(), node_name, e
error: format!("Failed to create Kubernetes client: {}", e), );
}); None
} }
} }
.and_then(|result| match result {
Ok(client) => Some(client),
Err(e) => {
warn!(
"Failed to create Kubernetes client for node '{}': {}. Assuming node_ready is healthy.",
node_name, e
);
None
}
})
} else { } else {
None None
}; };
@@ -226,7 +255,28 @@ async fn main() -> std::io::Result<()> {
info!("Starting harmony-node-readiness-endpoint on {}", bind_addr); info!("Starting harmony-node-readiness-endpoint on {}", bind_addr);
HttpServer::new(|| App::new().service(health)) HttpServer::new(|| App::new().service(health))
.workers(3)
.bind(&bind_addr)? .bind(&bind_addr)?
.run() .run()
.await .await
} }
#[cfg(test)]
mod tests {
use super::*;
use kube::error::ErrorResponse;
#[test]
fn parse_checks_defaults_to_node_ready() {
assert_eq!(parse_checks(None), vec!["node_ready"]);
assert_eq!(parse_checks(Some("")), vec!["node_ready"]);
}
#[test]
fn parse_checks_splits_and_trims_values() {
assert_eq!(
parse_checks(Some("node_ready, okd_router_1936 ")),
vec!["node_ready", "okd_router_1936"]
);
}
}