Compare commits
66 Commits
adr-nats-c
...
f463cd1e94
| Author | SHA1 | Date | |
|---|---|---|---|
| f463cd1e94 | |||
| 0e9b23a320 | |||
| f532ba2b40 | |||
| fafca31798 | |||
| 5412c34957 | |||
| 787cc8feab | |||
| ce041f495b | |||
| 55de206523 | |||
| 64893a84f5 | |||
| f941672662 | |||
| a98113dd40 | |||
| 5db1a31d33 | |||
| f5aac67af8 | |||
| d7e5bf11d5 | |||
| 2e1f1b8447 | |||
| 2b157ad7fd | |||
| a0c0905c3b | |||
| fe52f69473 | |||
| d8338ad12c | |||
| ac9fedf853 | |||
| fd3705e382 | |||
| 4840c7fdc2 | |||
| 20172a7801 | |||
| 6bb33c5845 | |||
| d9357adad3 | |||
| a25ca86bdf | |||
| 646c5e723e | |||
| 69c382e8c6 | |||
| dca764395d | |||
| 2738985edb | |||
| d9a21bf94b | |||
| 8f8bd34168 | |||
| b5e971b3b6 | |||
| a1c0e0e246 | |||
| d084cee8d5 | |||
| 63ef1c0ea7 | |||
| ff7d2fb89e | |||
| 9bb38b930a | |||
| c677487a5e | |||
| c1d46612ac | |||
| 4fba01338d | |||
| 913ed17453 | |||
| 9e185cbbd5 | |||
| 752526f831 | |||
| f9bd6ad260 | |||
| 111181c300 | |||
| 3257cd9569 | |||
| 4b1915c594 | |||
| cf3050ce87 | |||
| c3e27c60be | |||
| 2d26790c82 | |||
| 2e89308b82 | |||
| d8936a8307 | |||
| e2fa12508f | |||
| bea2a75882 | |||
| a1528665d0 | |||
| 613225a00b | |||
| dd1c088f0d | |||
| b4ef009804 | |||
| 191e92048b | |||
| f4a70d8978 | |||
| 2ddc9c0579 | |||
| fececc2efd | |||
| 8afcacbd24 | |||
| b885c35706 | |||
|
|
bb6b4b7f88 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -26,3 +26,6 @@ Cargo.lock
|
||||
*.pdb
|
||||
|
||||
.harmony_generated
|
||||
|
||||
# Useful to create ignore folders for temp files and notes
|
||||
ignore
|
||||
|
||||
2590
Cargo.lock
generated
2590
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
10
Cargo.toml
10
Cargo.toml
@@ -2,7 +2,6 @@
|
||||
resolver = "2"
|
||||
members = [
|
||||
"private_repos/*",
|
||||
"examples/*",
|
||||
"harmony",
|
||||
"harmony_types",
|
||||
"harmony_macros",
|
||||
@@ -17,9 +16,9 @@ members = [
|
||||
"harmony_secret_derive",
|
||||
"harmony_secret",
|
||||
"adr/agent_discovery/mdns",
|
||||
"brocade",
|
||||
"harmony_agent",
|
||||
"harmony_agent/deploy",
|
||||
"brocade",
|
||||
"harmony_agent",
|
||||
"harmony_agent/deploy", "harmony_node_readiness", "harmony-k8s",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -38,6 +37,8 @@ tokio = { version = "1.40", features = [
|
||||
"macros",
|
||||
"rt-multi-thread",
|
||||
] }
|
||||
tokio-retry = "0.3.0"
|
||||
tokio-util = "0.7.15"
|
||||
cidr = { features = ["serde"], version = "0.2" }
|
||||
russh = "0.45"
|
||||
russh-keys = "0.45"
|
||||
@@ -52,6 +53,7 @@ kube = { version = "1.1.0", features = [
|
||||
"jsonpatch",
|
||||
] }
|
||||
k8s-openapi = { version = "0.25", features = ["v1_30"] }
|
||||
# TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml
|
||||
serde_yaml = "0.9"
|
||||
serde-value = "0.7"
|
||||
http = "1.2"
|
||||
|
||||
87
README.md
87
README.md
@@ -1,4 +1,6 @@
|
||||
# Harmony : Open-source infrastructure orchestration that treats your platform like first-class code
|
||||
# Harmony
|
||||
|
||||
Open-source infrastructure orchestration that treats your platform like first-class code.
|
||||
|
||||
In other words, Harmony is a **next-generation platform engineering framework**.
|
||||
|
||||
@@ -20,9 +22,7 @@ All in **one strongly-typed Rust codebase**.
|
||||
|
||||
From a **developer laptop** to a **global production cluster**, a single **source of truth** drives the **full software lifecycle.**
|
||||
|
||||
---
|
||||
|
||||
## 1 · The Harmony Philosophy
|
||||
## The Harmony Philosophy
|
||||
|
||||
Infrastructure is essential, but it shouldn’t be your core business. Harmony is built on three guiding principles that make modern platforms reliable, repeatable, and easy to reason about.
|
||||
|
||||
@@ -34,9 +34,18 @@ Infrastructure is essential, but it shouldn’t be your core business. Harmony i
|
||||
|
||||
These principles surface as simple, ergonomic Rust APIs that let teams focus on their product while trusting the platform underneath.
|
||||
|
||||
---
|
||||
## Where to Start
|
||||
|
||||
## 2 · Quick Start
|
||||
We have a comprehensive set of documentation right here in the repository.
|
||||
|
||||
| I want to... | Start Here |
|
||||
| ----------------- | ------------------------------------------------------------------ |
|
||||
| Get Started | [Getting Started Guide](./docs/guides/getting-started.md) |
|
||||
| See an Example | [Use Case: Deploy a Rust Web App](./docs/use-cases/rust-webapp.md) |
|
||||
| Explore | [Documentation Hub](./docs/README.md) |
|
||||
| See Core Concepts | [Core Concepts Explained](./docs/concepts.md) |
|
||||
|
||||
## Quick Look: Deploy a Rust Webapp
|
||||
|
||||
The snippet below spins up a complete **production-grade Rust + Leptos Webapp** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
|
||||
|
||||
@@ -94,63 +103,33 @@ async fn main() {
|
||||
}
|
||||
```
|
||||
|
||||
Run it:
|
||||
To run this:
|
||||
|
||||
```bash
|
||||
cargo run
|
||||
```
|
||||
- Clone the repository: `git clone https://git.nationtech.io/nationtech/harmony`
|
||||
- Install dependencies: `cargo build --release`
|
||||
- Run the example: `cargo run --example try_rust_webapp`
|
||||
|
||||
Harmony analyses the code, shows an execution plan in a TUI, and applies it once you confirm. Same code, same binary—every environment.
|
||||
## Documentation
|
||||
|
||||
---
|
||||
All documentation is in the `/docs` directory.
|
||||
|
||||
## 3 · Core Concepts
|
||||
- [Documentation Hub](./docs/README.md): The main entry point for all documentation.
|
||||
- [Core Concepts](./docs/concepts.md): A detailed look at Score, Topology, Capability, Inventory, and Interpret.
|
||||
- [Component Catalogs](./docs/catalogs/README.md): Discover all available Scores, Topologies, and Capabilities.
|
||||
- [Developer Guide](./docs/guides/developer-guide.md): Learn how to write your own Scores and Topologies.
|
||||
|
||||
| Term | One-liner |
|
||||
| ---------------- | ---------------------------------------------------------------------------------------------------- |
|
||||
| **Score<T>** | Declarative description of the desired state (e.g., `LAMPScore`). |
|
||||
| **Interpret<T>** | Imperative logic that realises a `Score` on a specific environment. |
|
||||
| **Topology** | An environment (local k3d, AWS, bare-metal) exposing verified _Capabilities_ (Kubernetes, DNS, …). |
|
||||
| **Maestro** | Orchestrator that compiles Scores + Topology, ensuring all capabilities line up **at compile-time**. |
|
||||
| **Inventory** | Optional catalogue of physical assets for bare-metal and edge deployments. |
|
||||
## Architectural Decision Records
|
||||
|
||||
A visual overview is in the diagram below.
|
||||
- [ADR-001 · Why Rust](adr/001-rust.md)
|
||||
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
|
||||
- [ADR-006 · Secret Management](adr/006-secret-management.md)
|
||||
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
|
||||
|
||||
[Harmony Core Architecture](docs/diagrams/Harmony_Core_Architecture.drawio.svg)
|
||||
## Contribute
|
||||
|
||||
---
|
||||
Discussions and roadmap live in [Issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
|
||||
|
||||
## 4 · Install
|
||||
|
||||
Prerequisites:
|
||||
|
||||
- Rust
|
||||
- Docker (if you deploy locally)
|
||||
- `kubectl` / `helm` for Kubernetes-based topologies
|
||||
|
||||
```bash
|
||||
git clone https://git.nationtech.io/nationtech/harmony
|
||||
cd harmony
|
||||
cargo build --release # builds the CLI, TUI and libraries
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5 · Learning More
|
||||
|
||||
- **Architectural Decision Records** – dive into the rationale
|
||||
- [ADR-001 · Why Rust](adr/001-rust.md)
|
||||
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
|
||||
- [ADR-006 · Secret Management](adr/006-secret-management.md)
|
||||
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
|
||||
|
||||
- **Extending Harmony** – write new Scores / Interprets, add hardware like OPNsense firewalls, or embed Harmony in your own tooling (`/docs`).
|
||||
|
||||
- **Community** – discussions and roadmap live in [GitLab issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
|
||||
|
||||
---
|
||||
|
||||
## 6 · License
|
||||
## License
|
||||
|
||||
Harmony is released under the **GNU AGPL v3**.
|
||||
|
||||
|
||||
65
adr/019-Network-bond-setup.md
Normal file
65
adr/019-Network-bond-setup.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# Architecture Decision Record: Network Bonding Configuration via External Automation
|
||||
|
||||
Initial Author: Jean-Gabriel Gill-Couture & Sylvain Tremblay
|
||||
|
||||
Initial Date: 2026-02-13
|
||||
|
||||
Last Updated Date: 2026-02-13
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
We need to configure LACP bonds on 10GbE interfaces across all worker nodes in the OpenShift cluster. A significant challenge is that interface names (e.g., `enp1s0f0` vs `ens1f0`) vary across different hardware nodes.
|
||||
|
||||
The standard OpenShift mechanism (MachineConfig) applies identical configurations to all nodes in a MachineConfigPool. Since the interface names differ, a single static MachineConfig cannot target specific physical devices across the entire cluster without complex workarounds.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use the existing "Harmony" automation tool to generate and apply host-specific NetworkManager configuration files directly to the nodes.
|
||||
|
||||
1. Harmony will generate the specific `.nmconnection` files for the bond and slaves based on its inventory of interface names.
|
||||
2. Files will be pushed to `/etc/NetworkManager/system-connections/` on each node.
|
||||
3. Configuration will be applied via `nmcli` reload or a node reboot.
|
||||
|
||||
## Rationale
|
||||
|
||||
* **Inventory Awareness:** Harmony already possesses the specific interface mapping data for each host.
|
||||
* **Persistence:** Fedora CoreOS/SCOS allows writing to `/etc`, and these files persist across reboots and OS upgrades (rpm-ostree updates).
|
||||
* **Avoids Complexity:** This approach avoids the operational overhead of creating unique MachineConfigPools for every single host or hardware variant.
|
||||
* **Safety:** Unlike wildcard matching, this ensures explicit interface selection, preventing accidental bonding of reserved interfaces (e.g., future separation of Ceph storage traffic).
|
||||
|
||||
## Consequences
|
||||
|
||||
**Pros:**
|
||||
* Precise, per-host configuration without polluting the Kubernetes API with hundreds of MachineConfigs.
|
||||
* Standard Linux networking behavior; easy to debug locally.
|
||||
* Prevents accidental interface capture (unlike wildcards).
|
||||
|
||||
**Cons:**
|
||||
* **Loss of Declarative K8s State:** The network config is not managed by the Machine Config Operator (MCO).
|
||||
* **Node Replacement Friction:** Newly provisioned nodes (replacements) will boot with default config. Harmony must be run against new nodes manually or via a hook before they can fully join the cluster workload.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
1. **Wildcard Matching in NetworkManager (e.g., `interface-name=enp*`):**
|
||||
* *Pros:* Single MachineConfig for the whole cluster.
|
||||
* *Cons:* Rejected because it is too broad. It risks capturing interfaces intended for other purposes (e.g., splitting storage and cluster networks later).
|
||||
|
||||
2. **"Kitchen Sink" Configuration:**
|
||||
* *Pros:* Single file listing every possible interface name as a slave.
|
||||
* *Cons:* "Dirty" configuration; results in many inactive connections on every host; brittle if new naming schemes appear.
|
||||
|
||||
3. **Per-Host MachineConfig:**
|
||||
* *Pros:* Fully declarative within OpenShift.
|
||||
* *Cons:* Requires a unique `MachineConfigPool` per host, which is an anti-pattern and unmaintainable at scale.
|
||||
|
||||
4. **On-boot Generation Script:**
|
||||
* *Pros:* Dynamic detection.
|
||||
* *Cons:* Increases boot complexity; harder to debug if the script fails during startup.
|
||||
|
||||
## Additional Notes
|
||||
|
||||
While `/etc` is writable and persistent on CoreOS, this configuration falls outside the "Day 1" Ignition process. Operational runbooks must be updated to ensure Harmony runs on any node replacement events.
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::net::{IpAddr, Ipv4Addr};
|
||||
|
||||
use brocade::{BrocadeOptions, ssh};
|
||||
use harmony_secret::Secret;
|
||||
use harmony_secret::{Secret, SecretManager};
|
||||
use harmony_types::switch::PortLocation;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -21,17 +21,15 @@ async fn main() {
|
||||
// let ip = IpAddr::V4(Ipv4Addr::new(192, 168, 4, 11)); // brocade @ st
|
||||
let switch_addresses = vec![ip];
|
||||
|
||||
// let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
|
||||
// .await
|
||||
// .unwrap();
|
||||
let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let brocade = brocade::init(
|
||||
&switch_addresses,
|
||||
// &config.username,
|
||||
// &config.password,
|
||||
"admin",
|
||||
"password",
|
||||
BrocadeOptions {
|
||||
&config.username,
|
||||
&config.password,
|
||||
&BrocadeOptions {
|
||||
dry_run: true,
|
||||
ssh: ssh::SshOptions {
|
||||
port: 2222,
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use super::BrocadeClient;
|
||||
use crate::{
|
||||
BrocadeInfo, Error, ExecutionMode, InterSwitchLink, InterfaceInfo, MacAddressEntry,
|
||||
PortChannelId, PortOperatingMode, SecurityLevel, parse_brocade_mac_address,
|
||||
shell::BrocadeShell,
|
||||
PortChannelId, PortOperatingMode, parse_brocade_mac_address, shell::BrocadeShell,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
@@ -144,7 +144,7 @@ pub async fn init(
|
||||
ip_addresses: &[IpAddr],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
options: &BrocadeOptions,
|
||||
) -> Result<Box<dyn BrocadeClient + Send + Sync>, Error> {
|
||||
let shell = BrocadeShell::init(ip_addresses, username, password, options).await?;
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ use regex::Regex;
|
||||
use crate::{
|
||||
BrocadeClient, BrocadeInfo, Error, ExecutionMode, InterSwitchLink, InterfaceInfo,
|
||||
InterfaceStatus, InterfaceType, MacAddressEntry, PortChannelId, PortOperatingMode,
|
||||
SecurityLevel, parse_brocade_mac_address, shell::BrocadeShell,
|
||||
parse_brocade_mac_address, shell::BrocadeShell,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -28,7 +28,7 @@ impl BrocadeShell {
|
||||
ip_addresses: &[IpAddr],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
options: &BrocadeOptions,
|
||||
) -> Result<Self, Error> {
|
||||
let ip = ip_addresses
|
||||
.first()
|
||||
|
||||
@@ -70,7 +70,7 @@ pub async fn try_init_client(
|
||||
username: &str,
|
||||
password: &str,
|
||||
ip: &std::net::IpAddr,
|
||||
base_options: BrocadeOptions,
|
||||
base_options: &BrocadeOptions,
|
||||
) -> Result<BrocadeOptions, Error> {
|
||||
let mut default = SshOptions::default();
|
||||
default.port = base_options.ssh.port;
|
||||
|
||||
@@ -1 +1,33 @@
|
||||
Not much here yet, see the `adr` folder for now. More to come in time!
|
||||
# Harmony Documentation Hub
|
||||
|
||||
Welcome to the Harmony documentation. This is the main entry point for learning everything from core concepts to building your own Score, Topologies, and Capabilities.
|
||||
|
||||
## 1. Getting Started
|
||||
|
||||
If you're new to Harmony, start here:
|
||||
|
||||
- [**Getting Started Guide**](./guides/getting-started.md): A step-by-step tutorial that takes you from an empty project to deploying your first application.
|
||||
- [**Core Concepts**](./concepts.md): A high-level overview of the key concepts in Harmony: `Score`, `Topology`, `Capability`, `Inventory`, `Interpret`, ...
|
||||
|
||||
## 2. Use Cases & Examples
|
||||
|
||||
See how to use Harmony to solve real-world problems.
|
||||
|
||||
- [**OKD on Bare Metal**](./use-cases/okd-on-bare-metal.md): A detailed walkthrough of bootstrapping a high-availability OKD cluster from physical hardware.
|
||||
- [**Deploy a Rust Web App**](./use-cases/deploy-rust-webapp.md): A quick guide to deploying a monitored, containerized web application to a Kubernetes cluster.
|
||||
|
||||
## 3. Component Catalogs
|
||||
|
||||
Discover existing, reusable components you can use in your Harmony projects.
|
||||
|
||||
- [**Scores Catalog**](./catalogs/scores.md): A categorized list of all available `Scores` (the "what").
|
||||
- [**Topologies Catalog**](./catalogs/topologies.md): A list of all available `Topologies` (the "where").
|
||||
- [**Capabilities Catalog**](./catalogs/capabilities.md): A list of all available `Capabilities` (the "how").
|
||||
|
||||
## 4. Developer Guides
|
||||
|
||||
Ready to build your own components? These guides show you how.
|
||||
|
||||
- [**Writing a Score**](./guides/writing-a-score.md): Learn how to create your own `Score` and `Interpret` logic to define a new desired state.
|
||||
- [**Writing a Topology**](./guides/writing-a-topology.md): Learn how to model a new environment (like AWS, GCP, or custom hardware) as a `Topology`.
|
||||
- [**Adding Capabilities**](./guides/adding-capabilities.md): See how to add a `Capability` to your custom `Topology`.
|
||||
|
||||
7
docs/catalogs/README.md
Normal file
7
docs/catalogs/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Component Catalogs
|
||||
|
||||
This section is the "dictionary" for Harmony. It lists all the reusable components available out-of-the-box.
|
||||
|
||||
- [**Scores Catalog**](./scores.md): Discover all available `Scores` (the "what").
|
||||
- [**Topologies Catalog**](./topologies.md): A list of all available `Topologies` (the "where").
|
||||
- [**Capabilities Catalog**](./capabilities.md): A list of all available `Capabilities` (the "how").
|
||||
40
docs/catalogs/capabilities.md
Normal file
40
docs/catalogs/capabilities.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Capabilities Catalog
|
||||
|
||||
A `Capability` is a specific feature or API that a `Topology` offers. `Interpret` logic uses these capabilities to execute a `Score`.
|
||||
|
||||
This list is primarily for developers **writing new Topologies or Scores**. As a user, you just need to know that the `Topology` you pick (like `K8sAnywhereTopology`) provides the capabilities your `Scores` (like `ApplicationScore`) need.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Capabilities Catalog](#capabilities-catalog)
|
||||
- [Kubernetes & Application](#kubernetes-application)
|
||||
- [Monitoring & Observability](#monitoring-observability)
|
||||
- [Networking (Core Services)](#networking-core-services)
|
||||
- [Networking (Hardware & Host)](#networking-hardware-host)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
## Kubernetes & Application
|
||||
|
||||
- **K8sClient**: Provides an authenticated client to interact with a Kubernetes API (create/read/update/delete resources).
|
||||
- **HelmCommand**: Provides the ability to execute Helm commands (install, upgrade, template).
|
||||
- **TenantManager**: Provides methods for managing tenants in a multi-tenant cluster.
|
||||
- **Ingress**: Provides an interface for managing ingress controllers and resources.
|
||||
|
||||
## Monitoring & Observability
|
||||
|
||||
- **Grafana**: Provides an API for configuring Grafana (datasources, dashboards).
|
||||
- **Monitoring**: A general capability for configuring monitoring (e.g., creating Prometheus rules).
|
||||
|
||||
## Networking (Core Services)
|
||||
|
||||
- **DnsServer**: Provides an interface for creating and managing DNS records.
|
||||
- **LoadBalancer**: Provides an interface for configuring a load balancer (e.g., OPNsense, MetalLB).
|
||||
- **DhcpServer**: Provides an interface for managing DHCP leases and host bindings.
|
||||
- **TftpServer**: Provides an interface for managing files on a TFTP server (e.g., iPXE boot files).
|
||||
|
||||
## Networking (Hardware & Host)
|
||||
|
||||
- **Router**: Provides an interface for configuring routing rules, typically on a firewall like OPNsense.
|
||||
- **Switch**: Provides an interface for configuring a physical network switch (e.g., managing VLANs and port channels).
|
||||
- **NetworkManager**: Provides an interface for configuring host-level networking (e.g., creating bonds and bridges on a node).
|
||||
102
docs/catalogs/scores.md
Normal file
102
docs/catalogs/scores.md
Normal file
@@ -0,0 +1,102 @@
|
||||
# Scores Catalog
|
||||
|
||||
A `Score` is a declarative description of a desired state. Find the Score you need and add it to your `harmony!` block's `scores` array.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Scores Catalog](#scores-catalog)
|
||||
- [Application Deployment](#application-deployment)
|
||||
- [OKD / Kubernetes Cluster Setup](#okd-kubernetes-cluster-setup)
|
||||
- [Cluster Services & Management](#cluster-services-management)
|
||||
- [Monitoring & Alerting](#monitoring-alerting)
|
||||
- [Infrastructure & Networking (Bare Metal)](#infrastructure-networking-bare-metal)
|
||||
- [Infrastructure & Networking (Cluster)](#infrastructure-networking-cluster)
|
||||
- [Tenant Management](#tenant-management)
|
||||
- [Utility](#utility)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
## Application Deployment
|
||||
|
||||
Scores for deploying and managing end-user applications.
|
||||
|
||||
- **ApplicationScore**: The primary score for deploying a web application. Describes the application, its framework, and the features it requires (e.g., monitoring, CI/CD).
|
||||
- **HelmChartScore**: Deploys a generic Helm chart to a Kubernetes cluster.
|
||||
- **ArgoHelmScore**: Deploys an application using an ArgoCD Helm chart.
|
||||
- **LAMPScore**: A specialized score for deploying a classic LAMP (Linux, Apache, MySQL, PHP) stack.
|
||||
|
||||
## OKD / Kubernetes Cluster Setup
|
||||
|
||||
This collection of Scores is used to provision an entire OKD cluster from bare metal. They are typically used in order.
|
||||
|
||||
- **OKDSetup01InventoryScore**: Discovers and catalogs the physical hardware.
|
||||
- **OKDSetup02BootstrapScore**: Configures the bootstrap node, renders iPXE files, and kicks off the SCOS installation.
|
||||
- **OKDSetup03ControlPlaneScore**: Renders iPXE configurations for the control plane nodes.
|
||||
- **OKDSetupPersistNetworkBondScore**: Configures network bonds on the nodes and port channels on the switches.
|
||||
- **OKDSetup04WorkersScore**: Renders iPXE configurations for the worker nodes.
|
||||
- **OKDSetup06InstallationReportScore**: Runs post-installation checks and generates a report.
|
||||
- **OKDUpgradeScore**: Manages the upgrade process for an existing OKD cluster.
|
||||
|
||||
## Cluster Services & Management
|
||||
|
||||
Scores for installing and managing services _inside_ a Kubernetes cluster.
|
||||
|
||||
- **K3DInstallationScore**: Installs and configes a local K3D (k3s-in-docker) cluster. Used by `K8sAnywhereTopology`.
|
||||
- **CertManagerHelmScore**: Deploys the `cert-manager` Helm chart.
|
||||
- **ClusterIssuerScore**: Configures a `ClusterIssuer` for `cert-manager`, (e.g., for Let's Encrypt).
|
||||
- **K8sNamespaceScore**: Ensures a Kubernetes namespace exists.
|
||||
- **K8sDeploymentScore**: Deploys a generic `Deployment` resource to Kubernetes.
|
||||
- **K8sIngressScore**: Configures an `Ingress` resource for a service.
|
||||
|
||||
## Monitoring & Alerting
|
||||
|
||||
Scores for configuring observability, dashboards, and alerts.
|
||||
|
||||
- **ApplicationMonitoringScore**: A generic score to set up monitoring for an application.
|
||||
- **ApplicationRHOBMonitoringScore**: A specialized score for setting up monitoring via the Red Hat Observability stack.
|
||||
- **HelmPrometheusAlertingScore**: Configures Prometheus alerts via a Helm chart.
|
||||
- **K8sPrometheusCRDAlertingScore**: Configures Prometheus alerts using the `PrometheusRule` CRD.
|
||||
- **PrometheusAlertScore**: A generic score for creating a Prometheus alert.
|
||||
- **RHOBAlertingScore**: Configures alerts specifically for the Red Hat Observability stack.
|
||||
- **NtfyScore**: Configures alerts to be sent to a `ntfy.sh` server.
|
||||
|
||||
## Infrastructure & Networking (Bare Metal)
|
||||
|
||||
Low-level scores for managing physical hardware and network services.
|
||||
|
||||
- **DhcpScore**: Configures a DHCP server.
|
||||
- **OKDDhcpScore**: A specialized DHCP configuration for the OKD bootstrap process.
|
||||
- **OKDBootstrapDhcpScore**: Configures DHCP specifically for the bootstrap node.
|
||||
- **DhcpHostBindingScore**: Creates a specific MAC-to-IP binding in the DHCP server.
|
||||
- **DnsScore**: Configures a DNS server.
|
||||
- **OKDDnsScore**: A specialized DNS configuration for the OKD cluster (e.g., `api.*`, `*.apps.*`).
|
||||
- **StaticFilesHttpScore**: Serves a directory of static files (e.g., a documentation site) over HTTP.
|
||||
- **TftpScore**: Configures a TFTP server, typically for serving iPXE boot files.
|
||||
- **IPxeMacBootFileScore**: Assigns a specific iPXE boot file to a MAC address in the TFTP server.
|
||||
- **OKDIpxeScore**: A specialized score for generating the iPXE boot scripts for OKD.
|
||||
- **OPNsenseShellCommandScore**: Executes a shell command on an OPNsense firewall.
|
||||
|
||||
## Infrastructure & Networking (Cluster)
|
||||
|
||||
Network services that run inside the cluster or as part of the topology.
|
||||
|
||||
- **LoadBalancerScore**: Configures a general-purpose load balancer.
|
||||
- **OKDLoadBalancerScore**: Configures the high-availability load balancers for the OKD API and ingress.
|
||||
- **OKDBootstrapLoadBalancerScore**: Configures the load balancer specifically for the bootstrap-time API endpoint.
|
||||
- **K8sIngressScore**: Configures an Ingress controller or resource.
|
||||
- [HighAvailabilityHostNetworkScore](../../harmony/src/modules/okd/host_network.rs): Configures network bonds on a host and the corresponding port-channels on the switch stack for high-availability.
|
||||
|
||||
## Tenant Management
|
||||
|
||||
Scores for managing multi-tenancy within a cluster.
|
||||
|
||||
- **TenantScore**: Creates a new tenant (e.g., a namespace, quotas, network policies).
|
||||
- **TenantCredentialScore**: Generates and provisions credentials for a new tenant.
|
||||
|
||||
## Utility
|
||||
|
||||
Helper scores for discovery and inspection.
|
||||
|
||||
- **LaunchDiscoverInventoryAgentScore**: Launches the agent responsible for the `OKDSetup01InventoryScore`.
|
||||
- **DiscoverHostForRoleScore**: A utility score to find a host matching a specific role in the inventory.
|
||||
- **InspectInventoryScore**: Dumps the discovered inventory for inspection.
|
||||
59
docs/catalogs/topologies.md
Normal file
59
docs/catalogs/topologies.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Topologies Catalog
|
||||
|
||||
A `Topology` is the logical representation of your infrastructure and its `Capabilities`. You select a `Topology` in your Harmony project to define _where_ your `Scores` will be applied.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Topologies Catalog](#topologies-catalog)
|
||||
- [HAClusterTopology](#haclustertopology)
|
||||
- [K8sAnywhereTopology](#k8sanywheretopology)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
### HAClusterTopology
|
||||
|
||||
- **`HAClusterTopology::autoload()`**
|
||||
|
||||
This `Topology` represents a high-availability, bare-metal cluster. It is designed for production-grade deployments like OKD.
|
||||
|
||||
It models an environment consisting of:
|
||||
|
||||
- At least 3 cluster nodes (for control plane/workers)
|
||||
- 2 redundant firewalls (e.g., OPNsense)
|
||||
- 2 redundant network switches
|
||||
|
||||
**Provided Capabilities:**
|
||||
This topology provides a rich set of capabilities required for bare-metal provisioning and cluster management, including:
|
||||
|
||||
- `K8sClient` (once the cluster is bootstrapped)
|
||||
- `DnsServer`
|
||||
- `LoadBalancer`
|
||||
- `DhcpServer`
|
||||
- `TftpServer`
|
||||
- `Router` (via the firewalls)
|
||||
- `Switch`
|
||||
- `NetworkManager` (for host-level network config)
|
||||
|
||||
---
|
||||
|
||||
### K8sAnywhereTopology
|
||||
|
||||
- **`K8sAnywhereTopology::from_env()`**
|
||||
|
||||
This `Topology` is designed for development and application deployment. It provides a simple, abstract way to deploy to _any_ Kubernetes cluster.
|
||||
|
||||
**How it works:**
|
||||
|
||||
1. By default (`from_env()` with no env vars), it automatically provisions a **local K3D (k3s-in-docker) cluster** on your machine. This is perfect for local development and testing.
|
||||
2. If you provide a `KUBECONFIG` environment variable, it will instead connect to that **existing Kubernetes cluster** (e.g., your staging or production OKD cluster).
|
||||
|
||||
This allows you to use the _exact same code_ to deploy your application locally as you do to deploy it to production.
|
||||
|
||||
**Provided Capabilities:**
|
||||
|
||||
- `K8sClient`
|
||||
- `HelmCommand`
|
||||
- `TenantManager`
|
||||
- `Ingress`
|
||||
- `Monitoring`
|
||||
- ...and more.
|
||||
40
docs/concepts.md
Normal file
40
docs/concepts.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Core Concepts
|
||||
|
||||
Harmony's design is based on a few key concepts. Understanding them is the key to unlocking the framework's power.
|
||||
|
||||
### 1. Score
|
||||
|
||||
- **What it is:** A **Score** is a declarative description of a desired state. It's a "resource" that defines _what_ you want to achieve, not _how_ to do it.
|
||||
- **Example:** `ApplicationScore` declares "I want this web application to be running and monitored."
|
||||
|
||||
### 2. Topology
|
||||
|
||||
- **What it is:** A **Topology** is the logical representation of your infrastructure and its abilities. It's the "where" your Scores will be applied.
|
||||
- **Key Job:** A Topology's most important job is to expose which `Capabilities` it supports.
|
||||
- **Example:** `HAClusterTopology` represents a bare-metal cluster and exposes `Capabilities` like `NetworkManager` and `Switch`. `K8sAnywhereTopology` represents a Kubernetes cluster and exposes the `K8sClient` `Capability`.
|
||||
|
||||
### 3. Capability
|
||||
|
||||
- **What it is:** A **Capability** is a specific feature or API that a `Topology` offers. It's the "how" a `Topology` can fulfill a `Score`'s request.
|
||||
- **Example:** The `K8sClient` capability offers a way to interact with a Kubernetes API. The `Switch` capability offers a way to configure a physical network switch.
|
||||
|
||||
### 4. Interpret
|
||||
|
||||
- **What it is:** An **Interpret** is the execution logic that makes a `Score` a reality. It's the "glue" that connects the _desired state_ (`Score`) to the _environment's abilities_ (`Topology`'s `Capabilities`).
|
||||
- **How it works:** When you apply a `Score`, Harmony finds the matching `Interpret` for your `Topology`. This `Interpret` then uses the `Capabilities` provided by the `Topology` to execute the necessary steps.
|
||||
|
||||
### 5. Inventory
|
||||
|
||||
- **What it is:** An **Inventory** is the physical material (the "what") used in a cluster. This is most relevant for bare-metal or on-premise topologies.
|
||||
- **Example:** A list of nodes with their roles (control plane, worker), CPU, RAM, and network interfaces. For the `K8sAnywhereTopology`, the inventory might be empty or autoloaded, as the infrastructure is more abstract.
|
||||
|
||||
---
|
||||
|
||||
### How They Work Together (The Compile-Time Check)
|
||||
|
||||
1. You **write a `Score`** (e.g., `ApplicationScore`).
|
||||
2. Your `Score`'s `Interpret` logic requires certain **`Capabilities`** (e.g., `K8sClient` and `Ingress`).
|
||||
3. You choose a **`Topology`** to run it on (e.g., `HAClusterTopology`).
|
||||
4. **At compile-time**, Harmony checks: "Does `HAClusterTopology` provide the `K8sClient` and `Ingress` capabilities that `ApplicationScore` needs?"
|
||||
- **If Yes:** Your code compiles. You can be confident it will run.
|
||||
- **If No:** The compiler gives you an error. You've just prevented a "config-is-valid-but-platform-is-wrong" runtime error before you even deployed.
|
||||
42
docs/guides/getting-started.md
Normal file
42
docs/guides/getting-started.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Getting Started Guide
|
||||
|
||||
Welcome to Harmony! This guide will walk you through installing the Harmony framework, setting up a new project, and deploying your first application.
|
||||
|
||||
We will build and deploy the "Rust Web App" example, which automatically:
|
||||
|
||||
1. Provisions a local K3D (Kubernetes in Docker) cluster.
|
||||
2. Deploys a sample Rust web application.
|
||||
3. Sets up monitoring for the application.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before you begin, you'll need a few tools installed on your system:
|
||||
|
||||
- **Rust & Cargo:** [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
- **Docker:** [Install Docker](https://docs.docker.com/get-docker/) (Required for the K3D local cluster)
|
||||
- **kubectl:** [Install kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) (For inspecting the cluster)
|
||||
|
||||
## 1. Install Harmony
|
||||
|
||||
First, clone the Harmony repository and build the project. This gives you the `harmony` CLI and all the core libraries.
|
||||
|
||||
```bash
|
||||
# Clone the main repository
|
||||
git clone https://git.nationtech.io/nationtech/harmony
|
||||
cd harmony
|
||||
|
||||
# Build the project (this may take a few minutes)
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
...
|
||||
|
||||
## Next Steps
|
||||
|
||||
Congratulations, you've just deployed an application using true infrastructure-as-code!
|
||||
|
||||
From here, you can:
|
||||
|
||||
- [Explore the Catalogs](../catalogs/README.md): See what other [Scores](../catalogs/scores.md) and [Topologies](../catalogs/topologies.md) are available.
|
||||
- [Read the Use Cases](../use-cases/README.md): Check out the [OKD on Bare Metal](./use-cases/okd-on-bare-metal.md) guide for a more advanced scenario.
|
||||
- [Write your own Score](../guides/writing-a-score.md): Dive into the [Developer Guide](./guides/developer-guide.md) to start building your own components.
|
||||
@@ -1,22 +1,28 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use brocade::{BrocadeOptions, PortOperatingMode};
|
||||
use harmony::{
|
||||
data::Version,
|
||||
infra::brocade::BrocadeSwitchClient,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
infra::brocade::BrocadeSwitchConfig,
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HostNetworkConfig, PortConfig, PreparationError, PreparationOutcome, Switch, SwitchClient,
|
||||
SwitchError, Topology,
|
||||
},
|
||||
modules::brocade::{BrocadeSwitchAuth, BrocadeSwitchScore, SwitchTopology},
|
||||
};
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{id::Id, net::MacAddress, switch::PortLocation};
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
use harmony_types::{id::Id, switch::PortLocation};
|
||||
|
||||
fn get_switch_config() -> BrocadeSwitchConfig {
|
||||
let mut options = BrocadeOptions::default();
|
||||
options.ssh.port = 2222;
|
||||
let auth = BrocadeSwitchAuth {
|
||||
username: "admin".to_string(),
|
||||
password: "password".to_string(),
|
||||
};
|
||||
|
||||
BrocadeSwitchConfig {
|
||||
ips: vec![ip!("127.0.0.1")],
|
||||
auth,
|
||||
options,
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
@@ -32,126 +38,13 @@ async fn main() {
|
||||
(PortLocation(1, 0, 18), PortOperatingMode::Trunk),
|
||||
],
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
SwitchTopology::new().await,
|
||||
SwitchTopology::new(get_switch_config()).await,
|
||||
vec![Box::new(switch_score)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
struct BrocadeSwitchScore {
|
||||
port_channels_to_clear: Vec<Id>,
|
||||
ports_to_configure: Vec<PortConfig>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Switch> Score<T> for BrocadeSwitchScore {
|
||||
fn name(&self) -> String {
|
||||
"BrocadeSwitchScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(BrocadeSwitchInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct BrocadeSwitchInterpret {
|
||||
score: BrocadeSwitchScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + Switch> Interpret<T> for BrocadeSwitchInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Applying switch configuration {:?}", self.score);
|
||||
debug!(
|
||||
"Clearing port channel {:?}",
|
||||
self.score.port_channels_to_clear
|
||||
);
|
||||
topology
|
||||
.clear_port_channel(&self.score.port_channels_to_clear)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
debug!("Configuring interfaces {:?}", self.score.ports_to_configure);
|
||||
topology
|
||||
.configure_interface(&self.score.ports_to_configure)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success("switch configured".to_string()))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("BrocadeSwitchInterpret")
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
struct SwitchTopology {
|
||||
client: Box<dyn SwitchClient>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Topology for SwitchTopology {
|
||||
fn name(&self) -> &str {
|
||||
"SwitchTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
impl SwitchTopology {
|
||||
async fn new() -> Self {
|
||||
let mut options = BrocadeOptions::default();
|
||||
options.ssh.port = 2222;
|
||||
let client =
|
||||
BrocadeSwitchClient::init(&vec![ip!("127.0.0.1")], &"admin", &"password", options)
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
let client = Box::new(client);
|
||||
Self { client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Switch for SwitchTopology {
|
||||
async fn setup_switch(&self) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_port_for_mac_address(
|
||||
&self,
|
||||
_mac_address: &MacAddress,
|
||||
) -> Result<Option<PortLocation>, SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn configure_port_channel(&self, _config: &HostNetworkConfig) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
self.client.clear_port_channel(ids).await
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
self.client.configure_interface(ports).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::cert_manager::{
|
||||
capability::CertificateManagementConfig, score_cert_management::CertificateManagementScore,
|
||||
score_certificate::CertificateScore, score_issuer::CertificateIssuerScore,
|
||||
capability::CertificateManagementConfig, score_certificate::CertificateScore,
|
||||
score_issuer::CertificateIssuerScore,
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
|
||||
21
examples/k8s_drain_node/Cargo.toml
Normal file
21
examples/k8s_drain_node/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[package]
|
||||
name = "example-k8s-drain-node"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
harmony-k8s = { path = "../../harmony-k8s" }
|
||||
cidr.workspace = true
|
||||
tokio.workspace = true
|
||||
log.workspace = true
|
||||
env_logger.workspace = true
|
||||
url.workspace = true
|
||||
assert_cmd = "2.0.16"
|
||||
inquire.workspace = true
|
||||
61
examples/k8s_drain_node/src/main.rs
Normal file
61
examples/k8s_drain_node/src/main.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use harmony_k8s::{DrainOptions, K8sClient};
|
||||
use log::{info, trace};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
env_logger::init();
|
||||
let k8s = K8sClient::try_default().await.unwrap();
|
||||
let nodes = k8s.get_nodes(None).await.unwrap();
|
||||
trace!("Got nodes : {nodes:#?}");
|
||||
let node_names = nodes
|
||||
.iter()
|
||||
.map(|n| n.metadata.name.as_ref().unwrap())
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
info!("Got nodes : {:?}", node_names);
|
||||
|
||||
let node_name = inquire::Select::new("What node do you want to operate on?", node_names)
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
let drain = inquire::Confirm::new("Do you wish to drain the node now ?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if drain {
|
||||
let mut options = DrainOptions::default_ignore_daemonset_delete_emptydir_data();
|
||||
options.timeout = Duration::from_secs(1);
|
||||
k8s.drain_node(&node_name, &options).await.unwrap();
|
||||
|
||||
info!("Node {node_name} successfully drained");
|
||||
}
|
||||
|
||||
let uncordon =
|
||||
inquire::Confirm::new("Do you wish to uncordon node to resume scheduling workloads now?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if uncordon {
|
||||
info!("Uncordoning node {node_name}");
|
||||
k8s.uncordon_node(node_name).await.unwrap();
|
||||
info!("Node {node_name} uncordoned");
|
||||
}
|
||||
|
||||
let reboot = inquire::Confirm::new("Do you wish to reboot node now?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if reboot {
|
||||
k8s.reboot_node(
|
||||
&node_name,
|
||||
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
|
||||
Duration::from_secs(3600),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
info!("All done playing with nodes, happy harmonizing!");
|
||||
}
|
||||
21
examples/k8s_write_file_on_node/Cargo.toml
Normal file
21
examples/k8s_write_file_on_node/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[package]
|
||||
name = "example-k8s-write-file-on-node"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
harmony-k8s = { path = "../../harmony-k8s" }
|
||||
cidr.workspace = true
|
||||
tokio.workspace = true
|
||||
log.workspace = true
|
||||
env_logger.workspace = true
|
||||
url.workspace = true
|
||||
assert_cmd = "2.0.16"
|
||||
inquire.workspace = true
|
||||
45
examples/k8s_write_file_on_node/src/main.rs
Normal file
45
examples/k8s_write_file_on_node/src/main.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use harmony_k8s::{K8sClient, NodeFile};
|
||||
use log::{info, trace};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
env_logger::init();
|
||||
let k8s = K8sClient::try_default().await.unwrap();
|
||||
let nodes = k8s.get_nodes(None).await.unwrap();
|
||||
trace!("Got nodes : {nodes:#?}");
|
||||
let node_names = nodes
|
||||
.iter()
|
||||
.map(|n| n.metadata.name.as_ref().unwrap())
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
info!("Got nodes : {:?}", node_names);
|
||||
|
||||
let node = inquire::Select::new("What node do you want to write file to?", node_names)
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
let path = inquire::Text::new("File path on node").prompt().unwrap();
|
||||
let content = inquire::Text::new("File content").prompt().unwrap();
|
||||
|
||||
let node_file = NodeFile {
|
||||
path: path,
|
||||
content: content,
|
||||
mode: 0o600,
|
||||
};
|
||||
|
||||
k8s.write_files_to_node(&node, &vec![node_file.clone()])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let cmd = inquire::Text::new("Command to run on node")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
k8s.run_privileged_command_on_node(&node, &cmd)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
info!(
|
||||
"File {} mode {} written in node {node}",
|
||||
node_file.path, node_file.mode
|
||||
);
|
||||
}
|
||||
@@ -215,7 +215,7 @@ fn site(
|
||||
dns_name: format!("{cluster_name}-gw.{domain}"),
|
||||
supercluster_ca_secret_name: "nats-supercluster-ca-bundle",
|
||||
tls_cert_name: "nats-gateway",
|
||||
jetstream_enabled: "false",
|
||||
jetstream_enabled: "true",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
16
examples/node_health/Cargo.toml
Normal file
16
examples/node_health/Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "example-node-health"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
tokio = { workspace = true }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
17
examples/node_health/src/main.rs
Normal file
17
examples/node_health/src/main.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
use harmony::{
|
||||
inventory::Inventory, modules::node_health::NodeHealthScore, topology::K8sAnywhereTopology,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let node_health = NodeHealthScore {};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
vec![Box::new(node_health)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
@@ -2,8 +2,12 @@ use brocade::BrocadeOptions;
|
||||
use cidr::Ipv4Cidr;
|
||||
use harmony::{
|
||||
hardware::{Location, SwitchGroup},
|
||||
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
|
||||
infra::{
|
||||
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
opnsense::OPNSenseManagementInterface,
|
||||
},
|
||||
inventory::Inventory,
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
|
||||
};
|
||||
use harmony_macros::{ip, ipv4};
|
||||
@@ -36,12 +40,11 @@ pub async fn get_topology() -> HAClusterTopology {
|
||||
dry_run: *harmony::config::DRY_RUN,
|
||||
..Default::default()
|
||||
};
|
||||
let switch_client = BrocadeSwitchClient::init(
|
||||
&switches,
|
||||
&switch_auth.username,
|
||||
&switch_auth.password,
|
||||
brocade_options,
|
||||
)
|
||||
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
|
||||
ips: switches,
|
||||
auth: switch_auth,
|
||||
options: brocade_options,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
@@ -103,9 +106,3 @@ pub fn get_inventory() -> Inventory {
|
||||
control_plane_host: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
@@ -3,14 +3,16 @@ use cidr::Ipv4Cidr;
|
||||
use harmony::{
|
||||
config::secret::OPNSenseFirewallCredentials,
|
||||
hardware::{Location, SwitchGroup},
|
||||
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
|
||||
infra::{
|
||||
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
opnsense::OPNSenseManagementInterface,
|
||||
},
|
||||
inventory::Inventory,
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
|
||||
};
|
||||
use harmony_macros::{ip, ipv4};
|
||||
use harmony_secret::{Secret, SecretManager};
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use harmony_secret::SecretManager;
|
||||
use std::{
|
||||
net::IpAddr,
|
||||
sync::{Arc, OnceLock},
|
||||
@@ -31,12 +33,11 @@ pub async fn get_topology() -> HAClusterTopology {
|
||||
dry_run: *harmony::config::DRY_RUN,
|
||||
..Default::default()
|
||||
};
|
||||
let switch_client = BrocadeSwitchClient::init(
|
||||
&switches,
|
||||
&switch_auth.username,
|
||||
&switch_auth.password,
|
||||
brocade_options,
|
||||
)
|
||||
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
|
||||
ips: switches,
|
||||
auth: switch_auth,
|
||||
options: brocade_options,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
@@ -98,9 +99,3 @@ pub fn get_inventory() -> Inventory {
|
||||
control_plane_host: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
@@ -1,63 +1,13 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::helm::chart::{HelmChartScore, HelmRepository, NonBlankString},
|
||||
topology::K8sAnywhereTopology,
|
||||
inventory::Inventory, modules::openbao::OpenbaoScore, topology::K8sAnywhereTopology,
|
||||
};
|
||||
use harmony_macros::hurl;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let values_yaml = Some(
|
||||
r#"server:
|
||||
standalone:
|
||||
enabled: true
|
||||
config: |
|
||||
listener "tcp" {
|
||||
tls_disable = true
|
||||
address = "[::]:8200"
|
||||
cluster_address = "[::]:8201"
|
||||
}
|
||||
|
||||
storage "file" {
|
||||
path = "/openbao/data"
|
||||
}
|
||||
|
||||
service:
|
||||
enabled: true
|
||||
|
||||
dataStorage:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClass: null
|
||||
accessMode: ReadWriteOnce
|
||||
|
||||
auditStorage:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClass: null
|
||||
accessMode: ReadWriteOnce"#
|
||||
.to_string(),
|
||||
);
|
||||
let openbao = HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str("openbao").unwrap()),
|
||||
release_name: NonBlankString::from_str("openbao").unwrap(),
|
||||
chart_name: NonBlankString::from_str("openbao/openbao").unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml,
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: Some(HelmRepository::new(
|
||||
"openbao".to_string(),
|
||||
hurl!("https://openbao.github.io/openbao-helm"),
|
||||
true,
|
||||
)),
|
||||
let openbao = OpenbaoScore {
|
||||
host: "openbao.sebastien.sto1.nationtech.io".to_string(),
|
||||
};
|
||||
|
||||
// TODO exec pod commands to initialize secret store if not already done
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::{k8s::apps::OperatorHubCatalogSourceScore, postgresql::CloudNativePgOperatorScore},
|
||||
@@ -9,7 +7,7 @@ use harmony::{
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let operatorhub_catalog = OperatorHubCatalogSourceScore::default();
|
||||
let cnpg_operator = CloudNativePgOperatorScore::default();
|
||||
let cnpg_operator = CloudNativePgOperatorScore::default_openshift();
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
|
||||
@@ -1,22 +1,13 @@
|
||||
use std::{
|
||||
net::{IpAddr, Ipv4Addr},
|
||||
sync::Arc,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use cidr::Ipv4Cidr;
|
||||
use harmony::{
|
||||
executors::ExecutorError,
|
||||
hardware::{HostCategory, Location, PhysicalHost, SwitchGroup},
|
||||
infra::opnsense::OPNSenseManagementInterface,
|
||||
inventory::Inventory,
|
||||
modules::opnsense::node_exporter::NodeExporterScore,
|
||||
topology::{
|
||||
HAClusterTopology, LogicalHost, PreparationError, PreparationOutcome, Topology,
|
||||
UnmanagedRouter, node_exporter::NodeExporter,
|
||||
},
|
||||
topology::{PreparationError, PreparationOutcome, Topology, node_exporter::NodeExporter},
|
||||
};
|
||||
use harmony_macros::{ip, ipv4, mac_address};
|
||||
use harmony_macros::ip;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct OpnSenseTopology {
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::postgresql::{
|
||||
K8sPostgreSQLScore, PostgreSQLConnectionScore, PublicPostgreSQLScore,
|
||||
capability::PostgreSQLConfig,
|
||||
PostgreSQLConnectionScore, PublicPostgreSQLScore, capability::PostgreSQLConfig,
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::{collections::HashMap, path::PathBuf, sync::Arc};
|
||||
use std::{path::PathBuf, sync::Arc};
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::{collections::HashMap, path::PathBuf, sync::Arc};
|
||||
use std::{path::PathBuf, sync::Arc};
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
|
||||
@@ -5,6 +5,10 @@ version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[[example]]
|
||||
name = "try_rust_webapp"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
|
||||
14
examples/zitadel/Cargo.toml
Normal file
14
examples/zitadel/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "example-zitadel"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
tokio.workspace = true
|
||||
url.workspace = true
|
||||
20
examples/zitadel/src/main.rs
Normal file
20
examples/zitadel/src/main.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
use harmony::{
|
||||
inventory::Inventory, modules::zitadel::ZitadelScore, topology::K8sAnywhereTopology,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let zitadel = ZitadelScore {
|
||||
host: "sso.sto1.nationtech.io".to_string(),
|
||||
zitadel_version: "v4.12.1".to_string(),
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
vec![Box::new(zitadel)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
BIN
examples/zitadel/zitadel-9.24.0.tgz
Normal file
BIN
examples/zitadel/zitadel-9.24.0.tgz
Normal file
Binary file not shown.
23
harmony-k8s/Cargo.toml
Normal file
23
harmony-k8s/Cargo.toml
Normal file
@@ -0,0 +1,23 @@
|
||||
[package]
|
||||
name = "harmony-k8s"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
kube.workspace = true
|
||||
k8s-openapi.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-retry.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_yaml.workspace = true
|
||||
log.workspace = true
|
||||
similar.workspace = true
|
||||
reqwest.workspace = true
|
||||
url.workspace = true
|
||||
inquire.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions.workspace = true
|
||||
593
harmony-k8s/src/apply.rs
Normal file
593
harmony-k8s/src/apply.rs
Normal file
@@ -0,0 +1,593 @@
|
||||
use kube::{
|
||||
Client, Error, Resource,
|
||||
api::{
|
||||
Api, ApiResource, DynamicObject, GroupVersionKind, Patch, PatchParams, PostParams,
|
||||
ResourceExt,
|
||||
},
|
||||
core::ErrorResponse,
|
||||
discovery::Scope,
|
||||
error::DiscoveryError,
|
||||
};
|
||||
use log::{debug, error, trace, warn};
|
||||
use serde::{Serialize, de::DeserializeOwned};
|
||||
use serde_json::Value;
|
||||
use similar::TextDiff;
|
||||
use url::Url;
|
||||
|
||||
use crate::client::K8sClient;
|
||||
use crate::helper;
|
||||
use crate::types::WriteMode;
|
||||
|
||||
/// The field-manager token sent with every server-side apply request.
|
||||
pub const FIELD_MANAGER: &str = "harmony-k8s";
|
||||
|
||||
// ── Private helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
/// Serialise any `Serialize` payload to a [`DynamicObject`] via JSON.
|
||||
fn to_dynamic<T: Serialize>(payload: &T) -> Result<DynamicObject, Error> {
|
||||
serde_json::from_value(serde_json::to_value(payload).map_err(Error::SerdeError)?)
|
||||
.map_err(Error::SerdeError)
|
||||
}
|
||||
|
||||
/// Fetch the current resource, display a unified diff against `payload`, and
|
||||
/// return `()`. All output goes to stdout (same behaviour as before).
|
||||
///
|
||||
/// A 404 is treated as "resource would be created" — not an error.
|
||||
async fn show_dry_run<T: Serialize>(
|
||||
api: &Api<DynamicObject>,
|
||||
name: &str,
|
||||
payload: &T,
|
||||
) -> Result<(), Error> {
|
||||
let new_yaml = serde_yaml::to_string(payload)
|
||||
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
|
||||
|
||||
match api.get(name).await {
|
||||
Ok(current) => {
|
||||
println!("\nDry-run for resource: '{name}'");
|
||||
let mut current_val = serde_yaml::to_value(¤t).unwrap_or(serde_yaml::Value::Null);
|
||||
if let Some(map) = current_val.as_mapping_mut() {
|
||||
map.remove(&serde_yaml::Value::String("status".to_string()));
|
||||
}
|
||||
let current_yaml = serde_yaml::to_string(¤t_val)
|
||||
.unwrap_or_else(|_| "Failed to serialize current resource".to_string());
|
||||
|
||||
if current_yaml == new_yaml {
|
||||
println!("No changes detected.");
|
||||
} else {
|
||||
println!("Changes detected:");
|
||||
let diff = TextDiff::from_lines(¤t_yaml, &new_yaml);
|
||||
for change in diff.iter_all_changes() {
|
||||
let sign = match change.tag() {
|
||||
similar::ChangeTag::Delete => "-",
|
||||
similar::ChangeTag::Insert => "+",
|
||||
similar::ChangeTag::Equal => " ",
|
||||
};
|
||||
print!("{sign}{change}");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Err(Error::Api(ErrorResponse { code: 404, .. })) => {
|
||||
println!("\nDry-run for new resource: '{name}'");
|
||||
println!("Resource does not exist. Would be created:");
|
||||
for line in new_yaml.lines() {
|
||||
println!("+{line}");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to fetch resource '{name}' for dry-run: {e}");
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the real (non-dry-run) apply, respecting [`WriteMode`].
|
||||
async fn do_apply<T: Serialize + std::fmt::Debug>(
|
||||
api: &Api<DynamicObject>,
|
||||
name: &str,
|
||||
payload: &T,
|
||||
patch_params: &PatchParams,
|
||||
write_mode: &WriteMode,
|
||||
) -> Result<DynamicObject, Error> {
|
||||
match write_mode {
|
||||
WriteMode::CreateOrUpdate => {
|
||||
// TODO refactor this arm to perform self.update and if fail with 404 self.create
|
||||
// This will avoid the repetition of the api.patch and api.create calls within this
|
||||
// function body. This makes the code more maintainable
|
||||
match api.patch(name, patch_params, &Patch::Apply(payload)).await {
|
||||
Ok(obj) => Ok(obj),
|
||||
Err(Error::Api(ErrorResponse { code: 404, .. })) => {
|
||||
debug!("Resource '{name}' not found via SSA, falling back to POST");
|
||||
let dyn_obj = to_dynamic(payload)?;
|
||||
api.create(&PostParams::default(), &dyn_obj)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!("Failed to create '{name}': {e}");
|
||||
e
|
||||
})
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to apply '{name}': {e}");
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
WriteMode::Create => {
|
||||
let dyn_obj = to_dynamic(payload)?;
|
||||
api.create(&PostParams::default(), &dyn_obj)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!("Failed to create '{name}': {e}");
|
||||
e
|
||||
})
|
||||
}
|
||||
WriteMode::Update => match api.patch(name, patch_params, &Patch::Apply(payload)).await {
|
||||
Ok(obj) => Ok(obj),
|
||||
Err(Error::Api(ErrorResponse { code: 404, .. })) => Err(Error::Api(ErrorResponse {
|
||||
code: 404,
|
||||
message: format!("Resource '{name}' not found and WriteMode is UpdateOnly"),
|
||||
reason: "NotFound".to_string(),
|
||||
status: "Failure".to_string(),
|
||||
})),
|
||||
Err(e) => {
|
||||
error!("Failed to update '{name}': {e}");
|
||||
Err(e)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ── Public API ───────────────────────────────────────────────────────────────
|
||||
|
||||
impl K8sClient {
|
||||
/// Server-side apply: create if absent, update if present.
|
||||
/// Equivalent to `kubectl apply`.
|
||||
pub async fn apply<K>(&self, resource: &K, namespace: Option<&str>) -> Result<K, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + Serialize,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
self.apply_with_strategy(resource, namespace, WriteMode::CreateOrUpdate)
|
||||
.await
|
||||
}
|
||||
|
||||
/// POST only — returns an error if the resource already exists.
|
||||
pub async fn create<K>(&self, resource: &K, namespace: Option<&str>) -> Result<K, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + Serialize,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
self.apply_with_strategy(resource, namespace, WriteMode::Create)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Server-side apply only — returns an error if the resource does not exist.
|
||||
pub async fn update<K>(&self, resource: &K, namespace: Option<&str>) -> Result<K, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + Serialize,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
self.apply_with_strategy(resource, namespace, WriteMode::Update)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn apply_with_strategy<K>(
|
||||
&self,
|
||||
resource: &K,
|
||||
namespace: Option<&str>,
|
||||
write_mode: WriteMode,
|
||||
) -> Result<K, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + Serialize,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
debug!(
|
||||
"apply_with_strategy: {:?} ns={:?}",
|
||||
resource.meta().name,
|
||||
namespace
|
||||
);
|
||||
trace!("{:#}", serde_json::to_value(resource).unwrap_or_default());
|
||||
|
||||
let dyntype = K::DynamicType::default();
|
||||
let gvk = GroupVersionKind {
|
||||
group: K::group(&dyntype).to_string(),
|
||||
version: K::version(&dyntype).to_string(),
|
||||
kind: K::kind(&dyntype).to_string(),
|
||||
};
|
||||
|
||||
let discovery = self.discovery().await?;
|
||||
let (ar, caps) = discovery.resolve_gvk(&gvk).ok_or_else(|| {
|
||||
Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Cannot resolve GVK: {gvk:?}"
|
||||
)))
|
||||
})?;
|
||||
|
||||
let effective_ns = if caps.scope == Scope::Cluster {
|
||||
None
|
||||
} else {
|
||||
namespace.or_else(|| resource.meta().namespace.as_deref())
|
||||
};
|
||||
|
||||
let api: Api<DynamicObject> =
|
||||
get_dynamic_api(ar, caps, self.client.clone(), effective_ns, false);
|
||||
|
||||
let name = resource
|
||||
.meta()
|
||||
.name
|
||||
.as_deref()
|
||||
.expect("Kubernetes resource must have a name");
|
||||
|
||||
if self.dry_run {
|
||||
show_dry_run(&api, name, resource).await?;
|
||||
return Ok(resource.clone());
|
||||
}
|
||||
|
||||
let patch_params = PatchParams::apply(FIELD_MANAGER);
|
||||
do_apply(&api, name, resource, &patch_params, &write_mode)
|
||||
.await
|
||||
.and_then(helper::dyn_to_typed)
|
||||
}
|
||||
|
||||
/// Applies resources in order, one at a time
|
||||
pub async fn apply_many<K>(&self, resources: &[K], ns: Option<&str>) -> Result<Vec<K>, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + Serialize,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
let mut result = Vec::new();
|
||||
for r in resources.iter() {
|
||||
let res = self.apply(r, ns).await;
|
||||
if res.is_err() {
|
||||
// NOTE: this may log sensitive data; downgrade to debug if needed.
|
||||
warn!(
|
||||
"Failed to apply k8s resource: {}",
|
||||
serde_json::to_string_pretty(r).map_err(Error::SerdeError)?
|
||||
);
|
||||
}
|
||||
result.push(res?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Apply a [`DynamicObject`] resource using server-side apply.
|
||||
pub async fn apply_dynamic(
|
||||
&self,
|
||||
resource: &DynamicObject,
|
||||
namespace: Option<&str>,
|
||||
force_conflicts: bool,
|
||||
) -> Result<DynamicObject, Error> {
|
||||
trace!("apply_dynamic {resource:#?} ns={namespace:?} force={force_conflicts}");
|
||||
|
||||
let discovery = self.discovery().await?;
|
||||
let type_meta = resource.types.as_ref().ok_or_else(|| {
|
||||
Error::BuildRequest(kube::core::request::Error::Validation(
|
||||
"DynamicObject must have types (apiVersion and kind)".to_string(),
|
||||
))
|
||||
})?;
|
||||
|
||||
let gvk = GroupVersionKind::try_from(type_meta).map_err(|_| {
|
||||
Error::BuildRequest(kube::core::request::Error::Validation(format!(
|
||||
"Invalid GVK in DynamicObject: {type_meta:?}"
|
||||
)))
|
||||
})?;
|
||||
|
||||
let (ar, caps) = discovery.resolve_gvk(&gvk).ok_or_else(|| {
|
||||
Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Cannot resolve GVK: {gvk:?}"
|
||||
)))
|
||||
})?;
|
||||
|
||||
let effective_ns = if caps.scope == Scope::Cluster {
|
||||
None
|
||||
} else {
|
||||
namespace.or_else(|| resource.metadata.namespace.as_deref())
|
||||
};
|
||||
|
||||
let api = get_dynamic_api(ar, caps, self.client.clone(), effective_ns, false);
|
||||
let name = resource.metadata.name.as_deref().ok_or_else(|| {
|
||||
Error::BuildRequest(kube::core::request::Error::Validation(
|
||||
"DynamicObject must have metadata.name".to_string(),
|
||||
))
|
||||
})?;
|
||||
|
||||
debug!(
|
||||
"apply_dynamic kind={:?} name='{name}' ns={effective_ns:?}",
|
||||
resource.types.as_ref().map(|t| &t.kind),
|
||||
);
|
||||
|
||||
// NOTE would be nice to improve cohesion between the dynamic and typed apis and avoid copy
|
||||
// pasting the dry_run and some more logic
|
||||
if self.dry_run {
|
||||
show_dry_run(&api, name, resource).await?;
|
||||
return Ok(resource.clone());
|
||||
}
|
||||
|
||||
let mut patch_params = PatchParams::apply(FIELD_MANAGER);
|
||||
patch_params.force = force_conflicts;
|
||||
|
||||
do_apply(
|
||||
&api,
|
||||
name,
|
||||
resource,
|
||||
&patch_params,
|
||||
&WriteMode::CreateOrUpdate,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn apply_dynamic_many(
|
||||
&self,
|
||||
resources: &[DynamicObject],
|
||||
namespace: Option<&str>,
|
||||
force_conflicts: bool,
|
||||
) -> Result<Vec<DynamicObject>, Error> {
|
||||
let mut result = Vec::new();
|
||||
for r in resources.iter() {
|
||||
result.push(self.apply_dynamic(r, namespace, force_conflicts).await?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn apply_yaml_many(
|
||||
&self,
|
||||
#[allow(clippy::ptr_arg)] yaml: &Vec<serde_yaml::Value>,
|
||||
ns: Option<&str>,
|
||||
) -> Result<(), Error> {
|
||||
for y in yaml.iter() {
|
||||
self.apply_yaml(y, ns).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn apply_yaml(
|
||||
&self,
|
||||
yaml: &serde_yaml::Value,
|
||||
ns: Option<&str>,
|
||||
) -> Result<(), Error> {
|
||||
// NOTE wouldn't it be possible to parse this into a DynamicObject and simply call
|
||||
// apply_dynamic instead of reimplementing api interactions?
|
||||
let obj: DynamicObject =
|
||||
serde_yaml::from_value(yaml.clone()).expect("YAML must deserialise to DynamicObject");
|
||||
let name = obj.metadata.name.as_ref().expect("YAML must have a name");
|
||||
|
||||
let api_version = yaml["apiVersion"].as_str().expect("missing apiVersion");
|
||||
let kind = yaml["kind"].as_str().expect("missing kind");
|
||||
|
||||
let mut it = api_version.splitn(2, '/');
|
||||
let first = it.next().unwrap();
|
||||
let (g, v) = match it.next() {
|
||||
Some(second) => (first, second),
|
||||
None => ("", first),
|
||||
};
|
||||
|
||||
let api_resource = ApiResource::from_gvk(&GroupVersionKind::gvk(g, v, kind));
|
||||
let namespace = ns.unwrap_or_else(|| {
|
||||
obj.metadata
|
||||
.namespace
|
||||
.as_deref()
|
||||
.expect("YAML must have a namespace when ns is not provided")
|
||||
});
|
||||
|
||||
let api: Api<DynamicObject> =
|
||||
Api::namespaced_with(self.client.clone(), namespace, &api_resource);
|
||||
|
||||
println!("Applying '{name}' in namespace '{namespace}'...");
|
||||
let patch_params = PatchParams::apply(FIELD_MANAGER);
|
||||
let result = api.patch(name, &patch_params, &Patch::Apply(&obj)).await?;
|
||||
println!("Successfully applied '{}'.", result.name_any());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Equivalent to `kubectl apply -f <url>`.
|
||||
pub async fn apply_url(&self, url: Url, ns: Option<&str>) -> Result<(), Error> {
|
||||
let patch_params = PatchParams::apply(FIELD_MANAGER);
|
||||
let discovery = self.discovery().await?;
|
||||
|
||||
let yaml = reqwest::get(url)
|
||||
.await
|
||||
.expect("Could not fetch URL")
|
||||
.text()
|
||||
.await
|
||||
.expect("Could not read response body");
|
||||
|
||||
for doc in multidoc_deserialize(&yaml).expect("Failed to parse YAML from URL") {
|
||||
let obj: DynamicObject =
|
||||
serde_yaml::from_value(doc).expect("YAML document is not a valid object");
|
||||
let namespace = obj.metadata.namespace.as_deref().or(ns);
|
||||
let type_meta = obj.types.as_ref().expect("Object is missing TypeMeta");
|
||||
let gvk =
|
||||
GroupVersionKind::try_from(type_meta).expect("Object has invalid GroupVersionKind");
|
||||
let name = obj.name_any();
|
||||
|
||||
if let Some((ar, caps)) = discovery.resolve_gvk(&gvk) {
|
||||
let api = get_dynamic_api(ar, caps, self.client.clone(), namespace, false);
|
||||
trace!(
|
||||
"Applying {}:\n{}",
|
||||
gvk.kind,
|
||||
serde_yaml::to_string(&obj).unwrap_or_default()
|
||||
);
|
||||
let data: Value = serde_json::to_value(&obj).expect("serialisation failed");
|
||||
let _r = api.patch(&name, &patch_params, &Patch::Apply(data)).await?;
|
||||
debug!("Applied {} '{name}'", gvk.kind);
|
||||
} else {
|
||||
warn!("Skipping document with unknown GVK: {gvk:?}");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Build a dynamic API client from a [`DynamicObject`]'s type metadata.
|
||||
pub(crate) fn get_api_for_dynamic_object(
|
||||
&self,
|
||||
object: &DynamicObject,
|
||||
ns: Option<&str>,
|
||||
) -> Result<Api<DynamicObject>, Error> {
|
||||
let ar = object
|
||||
.types
|
||||
.as_ref()
|
||||
.and_then(|t| {
|
||||
let parts: Vec<&str> = t.api_version.split('/').collect();
|
||||
match parts.as_slice() {
|
||||
[version] => Some(ApiResource::from_gvk(&GroupVersionKind::gvk(
|
||||
"", version, &t.kind,
|
||||
))),
|
||||
[group, version] => Some(ApiResource::from_gvk(&GroupVersionKind::gvk(
|
||||
group, version, &t.kind,
|
||||
))),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.ok_or_else(|| {
|
||||
Error::BuildRequest(kube::core::request::Error::Validation(format!(
|
||||
"Invalid apiVersion in DynamicObject: {object:#?}"
|
||||
)))
|
||||
})?;
|
||||
|
||||
Ok(match ns {
|
||||
Some(ns) => Api::namespaced_with(self.client.clone(), ns, &ar),
|
||||
None => Api::default_namespaced_with(self.client.clone(), &ar),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ── Free functions ───────────────────────────────────────────────────────────
|
||||
|
||||
pub(crate) fn get_dynamic_api(
|
||||
resource: kube::api::ApiResource,
|
||||
capabilities: kube::discovery::ApiCapabilities,
|
||||
client: Client,
|
||||
ns: Option<&str>,
|
||||
all: bool,
|
||||
) -> Api<DynamicObject> {
|
||||
if capabilities.scope == Scope::Cluster || all {
|
||||
Api::all_with(client, &resource)
|
||||
} else if let Some(namespace) = ns {
|
||||
Api::namespaced_with(client, namespace, &resource)
|
||||
} else {
|
||||
Api::default_namespaced_with(client, &resource)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn multidoc_deserialize(
|
||||
data: &str,
|
||||
) -> Result<Vec<serde_yaml::Value>, serde_yaml::Error> {
|
||||
use serde::Deserialize;
|
||||
let mut docs = vec![];
|
||||
for de in serde_yaml::Deserializer::from_str(data) {
|
||||
docs.push(serde_yaml::Value::deserialize(de)?);
|
||||
}
|
||||
Ok(docs)
|
||||
}
|
||||
|
||||
// ── Tests ────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod apply_tests {
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use k8s_openapi::api::core::v1::ConfigMap;
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
|
||||
use kube::api::{DeleteParams, TypeMeta};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore = "requires kubernetes cluster"]
|
||||
async fn apply_creates_new_configmap() {
|
||||
let client = K8sClient::try_default().await.unwrap();
|
||||
let ns = "default";
|
||||
let name = format!(
|
||||
"test-cm-{}",
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis()
|
||||
);
|
||||
|
||||
let cm = ConfigMap {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.clone()),
|
||||
namespace: Some(ns.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
data: Some(BTreeMap::from([("key1".to_string(), "value1".to_string())])),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert!(client.apply(&cm, Some(ns)).await.is_ok());
|
||||
|
||||
let api: Api<ConfigMap> = Api::namespaced(client.client.clone(), ns);
|
||||
let _ = api.delete(&name, &DeleteParams::default()).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore = "requires kubernetes cluster"]
|
||||
async fn apply_is_idempotent() {
|
||||
let client = K8sClient::try_default().await.unwrap();
|
||||
let ns = "default";
|
||||
let name = format!(
|
||||
"test-idem-{}",
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis()
|
||||
);
|
||||
|
||||
let cm = ConfigMap {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.clone()),
|
||||
namespace: Some(ns.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
data: Some(BTreeMap::from([("key".to_string(), "value".to_string())])),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert!(
|
||||
client.apply(&cm, Some(ns)).await.is_ok(),
|
||||
"first apply failed"
|
||||
);
|
||||
assert!(
|
||||
client.apply(&cm, Some(ns)).await.is_ok(),
|
||||
"second apply failed (not idempotent)"
|
||||
);
|
||||
|
||||
let api: Api<ConfigMap> = Api::namespaced(client.client.clone(), ns);
|
||||
let _ = api.delete(&name, &DeleteParams::default()).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore = "requires kubernetes cluster"]
|
||||
async fn apply_dynamic_creates_new_resource() {
|
||||
let client = K8sClient::try_default().await.unwrap();
|
||||
let ns = "default";
|
||||
let name = format!(
|
||||
"test-dyn-{}",
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis()
|
||||
);
|
||||
|
||||
let obj = DynamicObject {
|
||||
types: Some(TypeMeta {
|
||||
api_version: "v1".to_string(),
|
||||
kind: "ConfigMap".to_string(),
|
||||
}),
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.clone()),
|
||||
namespace: Some(ns.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
data: serde_json::json!({}),
|
||||
};
|
||||
|
||||
let result = client.apply_dynamic(&obj, Some(ns), false).await;
|
||||
assert!(result.is_ok(), "apply_dynamic failed: {:?}", result.err());
|
||||
|
||||
let api: Api<ConfigMap> = Api::namespaced(client.client.clone(), ns);
|
||||
let _ = api.delete(&name, &DeleteParams::default()).await;
|
||||
}
|
||||
}
|
||||
133
harmony-k8s/src/bundle.rs
Normal file
133
harmony-k8s/src/bundle.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
//! Resource Bundle Pattern Implementation
|
||||
//!
|
||||
//! This module implements the Resource Bundle pattern for managing groups of
|
||||
//! Kubernetes resources that form a logical unit of work.
|
||||
//!
|
||||
//! ## Purpose
|
||||
//!
|
||||
//! The ResourceBundle pattern addresses the need to manage ephemeral privileged
|
||||
//! pods along with their platform-specific security requirements (e.g., OpenShift
|
||||
//! Security Context Constraints).
|
||||
//!
|
||||
//! ## Use Cases
|
||||
//!
|
||||
//! - Writing files to node filesystems (e.g., NetworkManager configurations for
|
||||
//! network bonding as described in ADR-019)
|
||||
//! - Running privileged commands on nodes (e.g., reboots, system configuration)
|
||||
//!
|
||||
//! ## Benefits
|
||||
//!
|
||||
//! - **Separation of Concerns**: Client code doesn't need to know about
|
||||
//! platform-specific RBAC requirements
|
||||
//! - **Atomic Operations**: Resources are applied and deleted as a unit
|
||||
//! - **Clean Abstractions**: Privileged operations are encapsulated in bundles
|
||||
//! rather than scattered throughout client methods
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```
|
||||
//! use harmony_k8s::{K8sClient, helper};
|
||||
//! use harmony_k8s::KubernetesDistribution;
|
||||
//!
|
||||
//! async fn write_network_config(client: &K8sClient, node: &str) {
|
||||
//! // Create a bundle with platform-specific RBAC
|
||||
//! let bundle = helper::build_privileged_bundle(
|
||||
//! helper::PrivilegedPodConfig {
|
||||
//! name: "network-config".to_string(),
|
||||
//! namespace: "default".to_string(),
|
||||
//! node_name: node.to_string(),
|
||||
//! // ... other config
|
||||
//! ..Default::default()
|
||||
//! },
|
||||
//! &KubernetesDistribution::OpenshiftFamily,
|
||||
//! );
|
||||
//!
|
||||
//! // Apply all resources (RBAC + Pod) atomically
|
||||
//! bundle.apply(client).await.unwrap();
|
||||
//!
|
||||
//! // ... wait for completion ...
|
||||
//!
|
||||
//! // Cleanup all resources
|
||||
//! bundle.delete(client).await.unwrap();
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use kube::{Error, Resource, ResourceExt, api::DynamicObject};
|
||||
use serde::Serialize;
|
||||
use serde_json;
|
||||
|
||||
use crate::K8sClient;
|
||||
|
||||
/// A ResourceBundle represents a logical unit of work consisting of multiple
|
||||
/// Kubernetes resources that should be applied or deleted together.
|
||||
///
|
||||
/// This pattern is useful for managing ephemeral privileged pods along with
|
||||
/// their required RBAC bindings (e.g., OpenShift SCC bindings).
|
||||
#[derive(Debug)]
|
||||
pub struct ResourceBundle {
|
||||
pub resources: Vec<DynamicObject>,
|
||||
}
|
||||
|
||||
impl ResourceBundle {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
resources: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a Kubernetes resource to this bundle.
|
||||
/// The resource is converted to a DynamicObject for generic handling.
|
||||
pub fn add<K>(&mut self, resource: K)
|
||||
where
|
||||
K: Resource + Serialize,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
// Convert the typed resource to JSON, then to DynamicObject
|
||||
let json = serde_json::to_value(&resource).expect("Failed to serialize resource");
|
||||
let mut obj: DynamicObject =
|
||||
serde_json::from_value(json).expect("Failed to convert to DynamicObject");
|
||||
|
||||
// Ensure type metadata is set
|
||||
if obj.types.is_none() {
|
||||
let api_version = Default::default();
|
||||
let kind = Default::default();
|
||||
let gvk = K::api_version(&api_version);
|
||||
let kind = K::kind(&kind);
|
||||
obj.types = Some(kube::api::TypeMeta {
|
||||
api_version: gvk.to_string(),
|
||||
kind: kind.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
self.resources.push(obj);
|
||||
}
|
||||
|
||||
/// Apply all resources in this bundle to the cluster.
|
||||
/// Resources are applied in the order they were added.
|
||||
pub async fn apply(&self, client: &K8sClient) -> Result<(), Error> {
|
||||
for res in &self.resources {
|
||||
let namespace = res.namespace();
|
||||
client
|
||||
.apply_dynamic(res, namespace.as_deref(), true)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete all resources in this bundle from the cluster.
|
||||
/// Resources are deleted in reverse order to respect dependencies.
|
||||
pub async fn delete(&self, client: &K8sClient) -> Result<(), Error> {
|
||||
// FIXME delete all in parallel and retry using kube::client::retry::RetryPolicy
|
||||
for res in self.resources.iter().rev() {
|
||||
let api = client.get_api_for_dynamic_object(res, res.namespace().as_deref())?;
|
||||
let name = res.name_any();
|
||||
// FIXME this swallows all errors. Swallowing a 404 is ok but other errors must be
|
||||
// handled properly (such as retrying). A normal error case is when we delete a
|
||||
// resource bundle with dependencies between various resources. Such as a pod with a
|
||||
// dependency on a ClusterRoleBinding. Trying to delete the ClusterRoleBinding first
|
||||
// is expected to fail
|
||||
let _ = api.delete(&name, &kube::api::DeleteParams::default()).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
99
harmony-k8s/src/client.rs
Normal file
99
harmony-k8s/src/client.rs
Normal file
@@ -0,0 +1,99 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use kube::config::{KubeConfigOptions, Kubeconfig};
|
||||
use kube::{Client, Config, Discovery, Error};
|
||||
use log::error;
|
||||
use serde::Serialize;
|
||||
use tokio::sync::OnceCell;
|
||||
|
||||
use crate::types::KubernetesDistribution;
|
||||
|
||||
// TODO not cool, should use a proper configuration mechanism
|
||||
// cli arg, env var, config file
|
||||
fn read_dry_run_from_env() -> bool {
|
||||
std::env::var("DRY_RUN")
|
||||
.map(|v| v == "true" || v == "1")
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct K8sClient {
|
||||
pub(crate) client: Client,
|
||||
/// When `true` no mutation is sent to the API server; diffs are printed
|
||||
/// to stdout instead. Initialised from the `DRY_RUN` environment variable.
|
||||
pub(crate) dry_run: bool,
|
||||
pub(crate) k8s_distribution: Arc<OnceCell<KubernetesDistribution>>,
|
||||
pub(crate) discovery: Arc<OnceCell<Discovery>>,
|
||||
}
|
||||
|
||||
impl Serialize for K8sClient {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!("K8sClient serialization is not meaningful; remove this impl if unused")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for K8sClient {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_fmt(format_args!(
|
||||
"K8sClient {{ namespace: {}, dry_run: {} }}",
|
||||
self.client.default_namespace(),
|
||||
self.dry_run,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl K8sClient {
|
||||
/// Create a client, reading `DRY_RUN` from the environment.
|
||||
pub fn new(client: Client) -> Self {
|
||||
Self {
|
||||
dry_run: read_dry_run_from_env(),
|
||||
client,
|
||||
k8s_distribution: Arc::new(OnceCell::new()),
|
||||
discovery: Arc::new(OnceCell::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a client that always operates in dry-run mode, regardless of
|
||||
/// the environment variable.
|
||||
pub fn new_dry_run(client: Client) -> Self {
|
||||
Self {
|
||||
dry_run: true,
|
||||
..Self::new(client)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this client is operating in dry-run mode.
|
||||
pub fn is_dry_run(&self) -> bool {
|
||||
self.dry_run
|
||||
}
|
||||
|
||||
pub async fn try_default() -> Result<Self, Error> {
|
||||
Ok(Self::new(Client::try_default().await?))
|
||||
}
|
||||
|
||||
pub async fn from_kubeconfig(path: &str) -> Option<Self> {
|
||||
Self::from_kubeconfig_with_opts(path, &KubeConfigOptions::default()).await
|
||||
}
|
||||
|
||||
pub async fn from_kubeconfig_with_context(path: &str, context: Option<String>) -> Option<Self> {
|
||||
let mut opts = KubeConfigOptions::default();
|
||||
opts.context = context;
|
||||
Self::from_kubeconfig_with_opts(path, &opts).await
|
||||
}
|
||||
|
||||
pub async fn from_kubeconfig_with_opts(path: &str, opts: &KubeConfigOptions) -> Option<Self> {
|
||||
let k = match Kubeconfig::read_from(path) {
|
||||
Ok(k) => k,
|
||||
Err(e) => {
|
||||
error!("Failed to load kubeconfig from {path}: {e}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
Some(Self::new(
|
||||
Client::try_from(Config::from_custom_kubeconfig(k, opts).await.unwrap()).unwrap(),
|
||||
))
|
||||
}
|
||||
}
|
||||
1
harmony-k8s/src/config.rs
Normal file
1
harmony-k8s/src/config.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub const PRIVILEGED_POD_IMAGE: &str = "hub.nationtech.io/redhat/ubi10:latest";
|
||||
83
harmony-k8s/src/discovery.rs
Normal file
83
harmony-k8s/src/discovery.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use kube::{Discovery, Error};
|
||||
use log::{debug, error, info, trace, warn};
|
||||
use tokio::sync::Mutex;
|
||||
use tokio_retry::{Retry, strategy::ExponentialBackoff};
|
||||
|
||||
use crate::client::K8sClient;
|
||||
use crate::types::KubernetesDistribution;
|
||||
|
||||
impl K8sClient {
|
||||
pub async fn get_apiserver_version(
|
||||
&self,
|
||||
) -> Result<k8s_openapi::apimachinery::pkg::version::Info, Error> {
|
||||
self.client.clone().apiserver_version().await
|
||||
}
|
||||
|
||||
/// Runs (and caches) Kubernetes API discovery with exponential-backoff retries.
|
||||
pub async fn discovery(&self) -> Result<&Discovery, Error> {
|
||||
let retry_strategy = ExponentialBackoff::from_millis(1000)
|
||||
.max_delay(Duration::from_secs(32))
|
||||
.take(6);
|
||||
|
||||
let attempt = Mutex::new(0u32);
|
||||
Retry::spawn(retry_strategy, || async {
|
||||
let mut n = attempt.lock().await;
|
||||
*n += 1;
|
||||
match self
|
||||
.discovery
|
||||
.get_or_try_init(async || {
|
||||
debug!("Running Kubernetes API discovery (attempt {})", *n);
|
||||
let d = Discovery::new(self.client.clone()).run().await?;
|
||||
debug!("Kubernetes API discovery completed");
|
||||
Ok(d)
|
||||
})
|
||||
.await
|
||||
{
|
||||
Ok(d) => Ok(d),
|
||||
Err(e) => {
|
||||
warn!("Kubernetes API discovery failed (attempt {}): {}", *n, e);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!("Kubernetes API discovery failed after all retries: {}", e);
|
||||
e
|
||||
})
|
||||
}
|
||||
|
||||
/// Detect which Kubernetes distribution is running. Result is cached for
|
||||
/// the lifetime of the client.
|
||||
pub async fn get_k8s_distribution(&self) -> Result<KubernetesDistribution, Error> {
|
||||
self.k8s_distribution
|
||||
.get_or_try_init(async || {
|
||||
debug!("Detecting Kubernetes distribution");
|
||||
let api_groups = self.client.list_api_groups().await?;
|
||||
trace!("list_api_groups: {:?}", api_groups);
|
||||
|
||||
let version = self.get_apiserver_version().await?;
|
||||
|
||||
if api_groups
|
||||
.groups
|
||||
.iter()
|
||||
.any(|g| g.name == "project.openshift.io")
|
||||
{
|
||||
info!("Detected distribution: OpenshiftFamily");
|
||||
return Ok(KubernetesDistribution::OpenshiftFamily);
|
||||
}
|
||||
|
||||
if version.git_version.contains("k3s") {
|
||||
info!("Detected distribution: K3sFamily");
|
||||
return Ok(KubernetesDistribution::K3sFamily);
|
||||
}
|
||||
|
||||
info!("Distribution not identified, using Default");
|
||||
Ok(KubernetesDistribution::Default)
|
||||
})
|
||||
.await
|
||||
.cloned()
|
||||
}
|
||||
}
|
||||
613
harmony-k8s/src/helper.rs
Normal file
613
harmony-k8s/src/helper.rs
Normal file
@@ -0,0 +1,613 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::KubernetesDistribution;
|
||||
|
||||
use super::bundle::ResourceBundle;
|
||||
use super::config::PRIVILEGED_POD_IMAGE;
|
||||
use k8s_openapi::api::core::v1::{
|
||||
Container, HostPathVolumeSource, Pod, PodSpec, SecurityContext, Volume, VolumeMount,
|
||||
};
|
||||
use k8s_openapi::api::rbac::v1::{ClusterRoleBinding, RoleRef, Subject};
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
|
||||
use kube::api::DynamicObject;
|
||||
use kube::error::DiscoveryError;
|
||||
use log::{debug, error, info, warn};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PrivilegedPodConfig {
|
||||
pub name: String,
|
||||
pub namespace: String,
|
||||
pub node_name: String,
|
||||
pub container_name: String,
|
||||
pub command: Vec<String>,
|
||||
pub volumes: Vec<Volume>,
|
||||
pub volume_mounts: Vec<VolumeMount>,
|
||||
pub host_pid: bool,
|
||||
pub host_network: bool,
|
||||
}
|
||||
|
||||
impl Default for PrivilegedPodConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: "privileged-pod".to_string(),
|
||||
namespace: "harmony".to_string(),
|
||||
node_name: "".to_string(),
|
||||
container_name: "privileged-container".to_string(),
|
||||
command: vec![],
|
||||
volumes: vec![],
|
||||
volume_mounts: vec![],
|
||||
host_pid: false,
|
||||
host_network: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_privileged_pod(
|
||||
config: PrivilegedPodConfig,
|
||||
k8s_distribution: &KubernetesDistribution,
|
||||
) -> Pod {
|
||||
let annotations = match k8s_distribution {
|
||||
KubernetesDistribution::OpenshiftFamily => Some(BTreeMap::from([
|
||||
("openshift.io/scc".to_string(), "privileged".to_string()),
|
||||
(
|
||||
"openshift.io/required-scc".to_string(),
|
||||
"privileged".to_string(),
|
||||
),
|
||||
])),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Pod {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(config.name),
|
||||
namespace: Some(config.namespace),
|
||||
annotations,
|
||||
..Default::default()
|
||||
},
|
||||
spec: Some(PodSpec {
|
||||
node_name: Some(config.node_name),
|
||||
restart_policy: Some("Never".to_string()),
|
||||
host_pid: Some(config.host_pid),
|
||||
host_network: Some(config.host_network),
|
||||
containers: vec![Container {
|
||||
name: config.container_name,
|
||||
image: Some(PRIVILEGED_POD_IMAGE.to_string()),
|
||||
command: Some(config.command),
|
||||
security_context: Some(SecurityContext {
|
||||
privileged: Some(true),
|
||||
..Default::default()
|
||||
}),
|
||||
volume_mounts: Some(config.volume_mounts),
|
||||
..Default::default()
|
||||
}],
|
||||
volumes: Some(config.volumes),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn host_root_volume() -> (Volume, VolumeMount) {
|
||||
(
|
||||
Volume {
|
||||
name: "host".to_string(),
|
||||
host_path: Some(HostPathVolumeSource {
|
||||
path: "/".to_string(),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
VolumeMount {
|
||||
name: "host".to_string(),
|
||||
mount_path: "/host".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Build a ResourceBundle containing a privileged pod and any required RBAC.
|
||||
///
|
||||
/// This function implements the Resource Bundle pattern to encapsulate platform-specific
|
||||
/// security requirements for running privileged operations on nodes.
|
||||
///
|
||||
/// # Platform-Specific Behavior
|
||||
///
|
||||
/// - **OpenShift**: Creates a ClusterRoleBinding to grant the default ServiceAccount
|
||||
/// access to the `system:openshift:scc:privileged` ClusterRole, which allows the pod
|
||||
/// to use the privileged Security Context Constraint (SCC).
|
||||
/// - **Standard Kubernetes/K3s**: Only creates the Pod resource, as these distributions
|
||||
/// use standard PodSecurityPolicy or don't enforce additional security constraints.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `config` - Configuration for the privileged pod (name, namespace, command, etc.)
|
||||
/// * `k8s_distribution` - The detected Kubernetes distribution to determine RBAC requirements
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A `ResourceBundle` containing 1-2 resources:
|
||||
/// - ClusterRoleBinding (OpenShift only)
|
||||
/// - Pod (all distributions)
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use harmony_k8s::helper::{build_privileged_bundle, PrivilegedPodConfig};
|
||||
/// use harmony_k8s::KubernetesDistribution;
|
||||
/// let bundle = build_privileged_bundle(
|
||||
/// PrivilegedPodConfig {
|
||||
/// name: "network-setup".to_string(),
|
||||
/// namespace: "default".to_string(),
|
||||
/// node_name: "worker-01".to_string(),
|
||||
/// container_name: "setup".to_string(),
|
||||
/// command: vec!["nmcli".to_string(), "connection".to_string(), "reload".to_string()],
|
||||
/// ..Default::default()
|
||||
/// },
|
||||
/// &KubernetesDistribution::OpenshiftFamily,
|
||||
/// );
|
||||
/// // Bundle now contains ClusterRoleBinding + Pod
|
||||
/// ```
|
||||
pub fn build_privileged_bundle(
|
||||
config: PrivilegedPodConfig,
|
||||
k8s_distribution: &KubernetesDistribution,
|
||||
) -> ResourceBundle {
|
||||
debug!(
|
||||
"Building privileged bundle for config {config:#?} on distribution {k8s_distribution:?}"
|
||||
);
|
||||
let mut bundle = ResourceBundle::new();
|
||||
let pod_name = config.name.clone();
|
||||
let namespace = config.namespace.clone();
|
||||
|
||||
// 1. On OpenShift, create RBAC binding to privileged SCC
|
||||
if let KubernetesDistribution::OpenshiftFamily = k8s_distribution {
|
||||
// The default ServiceAccount needs to be bound to the privileged SCC
|
||||
// via the system:openshift:scc:privileged ClusterRole
|
||||
let crb = ClusterRoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("{}-scc-binding", pod_name)),
|
||||
..Default::default()
|
||||
},
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".to_string(),
|
||||
kind: "ClusterRole".to_string(),
|
||||
name: "system:openshift:scc:privileged".to_string(),
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".to_string(),
|
||||
name: "default".to_string(),
|
||||
namespace: Some(namespace.clone()),
|
||||
api_group: None,
|
||||
..Default::default()
|
||||
}]),
|
||||
};
|
||||
bundle.add(crb);
|
||||
}
|
||||
|
||||
// 2. Build the privileged pod
|
||||
let pod = build_privileged_pod(config, k8s_distribution);
|
||||
bundle.add(pod);
|
||||
|
||||
bundle
|
||||
}
|
||||
|
||||
/// Action to take when a drain operation times out.
|
||||
pub enum DrainTimeoutAction {
|
||||
/// Accept the partial drain and continue
|
||||
Accept,
|
||||
/// Retry the drain for another timeout period
|
||||
Retry,
|
||||
/// Abort the drain operation
|
||||
Abort,
|
||||
}
|
||||
|
||||
/// Prompts the user to confirm acceptance of a partial drain.
|
||||
///
|
||||
/// Returns `Ok(true)` if the user confirms acceptance, `Ok(false)` if the user
|
||||
/// chooses to retry or abort, and `Err` if the prompt system fails entirely.
|
||||
pub fn prompt_drain_timeout_action(
|
||||
node_name: &str,
|
||||
pending_count: usize,
|
||||
timeout_duration: Duration,
|
||||
) -> Result<DrainTimeoutAction, kube::Error> {
|
||||
let prompt_msg = format!(
|
||||
"Drain operation timed out on node '{}' with {} pod(s) remaining. What would you like to do?",
|
||||
node_name, pending_count
|
||||
);
|
||||
|
||||
loop {
|
||||
let choices = vec![
|
||||
"Accept drain failure (requires confirmation)".to_string(),
|
||||
format!("Retry drain for another {:?}", timeout_duration),
|
||||
"Abort operation".to_string(),
|
||||
];
|
||||
|
||||
let selection = inquire::Select::new(&prompt_msg, choices)
|
||||
.with_help_message("Use arrow keys to navigate, Enter to select")
|
||||
.prompt()
|
||||
.map_err(|e| {
|
||||
kube::Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Prompt failed: {}",
|
||||
e
|
||||
)))
|
||||
})?;
|
||||
|
||||
if selection.starts_with("Accept") {
|
||||
// Require typed confirmation - retry until correct or user cancels
|
||||
let required_confirmation = format!("yes-accept-drain:{}={}", node_name, pending_count);
|
||||
|
||||
let confirmation_prompt = format!(
|
||||
"To accept this partial drain, type exactly: {}",
|
||||
required_confirmation
|
||||
);
|
||||
|
||||
match inquire::Text::new(&confirmation_prompt)
|
||||
.with_help_message(&format!(
|
||||
"This action acknowledges {} pods will remain on the node",
|
||||
pending_count
|
||||
))
|
||||
.prompt()
|
||||
{
|
||||
Ok(input) if input == required_confirmation => {
|
||||
warn!(
|
||||
"User accepted partial drain of node '{}' with {} pods remaining (confirmation: {})",
|
||||
node_name, pending_count, required_confirmation
|
||||
);
|
||||
return Ok(DrainTimeoutAction::Accept);
|
||||
}
|
||||
Ok(input) => {
|
||||
warn!(
|
||||
"Confirmation failed. Expected '{}', got '{}'. Please try again.",
|
||||
required_confirmation, input
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
// User cancelled (Ctrl+C) or prompt system failed
|
||||
error!("Confirmation prompt cancelled or failed: {}", e);
|
||||
return Ok(DrainTimeoutAction::Abort);
|
||||
}
|
||||
}
|
||||
} else if selection.starts_with("Retry") {
|
||||
info!(
|
||||
"User chose to retry drain operation for another {:?}",
|
||||
timeout_duration
|
||||
);
|
||||
return Ok(DrainTimeoutAction::Retry);
|
||||
} else {
|
||||
error!("Drain operation aborted by user");
|
||||
return Ok(DrainTimeoutAction::Abort);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// JSON round-trip: DynamicObject → K
|
||||
///
|
||||
/// Safe because the DynamicObject was produced by the apiserver from a
|
||||
/// payload that was originally serialized from K, so the schema is identical.
|
||||
pub(crate) fn dyn_to_typed<K: DeserializeOwned>(obj: DynamicObject) -> Result<K, kube::Error> {
|
||||
serde_json::to_value(obj)
|
||||
.and_then(serde_json::from_value)
|
||||
.map_err(kube::Error::SerdeError)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_host_root_volume() {
|
||||
let (volume, mount) = host_root_volume();
|
||||
|
||||
assert_eq!(volume.name, "host");
|
||||
assert_eq!(volume.host_path.as_ref().unwrap().path, "/");
|
||||
|
||||
assert_eq!(mount.name, "host");
|
||||
assert_eq!(mount.mount_path, "/host");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_minimal() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "minimal-pod".to_string(),
|
||||
namespace: "kube-system".to_string(),
|
||||
node_name: "node-123".to_string(),
|
||||
container_name: "debug-container".to_string(),
|
||||
command: vec!["sleep".to_string(), "3600".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
assert_eq!(pod.metadata.name, Some("minimal-pod".to_string()));
|
||||
assert_eq!(pod.metadata.namespace, Some("kube-system".to_string()));
|
||||
|
||||
let spec = pod.spec.as_ref().expect("Pod spec should be present");
|
||||
assert_eq!(spec.node_name, Some("node-123".to_string()));
|
||||
assert_eq!(spec.restart_policy, Some("Never".to_string()));
|
||||
assert_eq!(spec.host_pid, Some(false));
|
||||
assert_eq!(spec.host_network, Some(false));
|
||||
|
||||
assert_eq!(spec.containers.len(), 1);
|
||||
let container = &spec.containers[0];
|
||||
assert_eq!(container.name, "debug-container");
|
||||
assert_eq!(container.image, Some(PRIVILEGED_POD_IMAGE.to_string()));
|
||||
assert_eq!(
|
||||
container.command,
|
||||
Some(vec!["sleep".to_string(), "3600".to_string()])
|
||||
);
|
||||
|
||||
// Security context check
|
||||
let sec_ctx = container
|
||||
.security_context
|
||||
.as_ref()
|
||||
.expect("Security context missing");
|
||||
assert_eq!(sec_ctx.privileged, Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_with_volumes_and_host_access() {
|
||||
let (host_vol, host_mount) = host_root_volume();
|
||||
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "full-pod".to_string(),
|
||||
namespace: "default".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "runner".to_string(),
|
||||
command: vec!["/bin/sh".to_string()],
|
||||
volumes: vec![host_vol.clone()],
|
||||
volume_mounts: vec![host_mount.clone()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
let spec = pod.spec.as_ref().expect("Pod spec should be present");
|
||||
assert_eq!(spec.host_pid, Some(true));
|
||||
assert_eq!(spec.host_network, Some(true));
|
||||
|
||||
// Check volumes in Spec
|
||||
let volumes = spec.volumes.as_ref().expect("Volumes should be present");
|
||||
assert_eq!(volumes.len(), 1);
|
||||
assert_eq!(volumes[0].name, "host");
|
||||
|
||||
// Check mounts in Container
|
||||
let container = &spec.containers[0];
|
||||
let mounts = container
|
||||
.volume_mounts
|
||||
.as_ref()
|
||||
.expect("Mounts should be present");
|
||||
assert_eq!(mounts.len(), 1);
|
||||
assert_eq!(mounts[0].name, "host");
|
||||
assert_eq!(mounts[0].mount_path, "/host");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_structure_correctness() {
|
||||
// This test validates that the construction logic puts things in the right places
|
||||
// effectively validating the "template".
|
||||
|
||||
let custom_vol = Volume {
|
||||
name: "custom-vol".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
let custom_mount = VolumeMount {
|
||||
name: "custom-vol".to_string(),
|
||||
mount_path: "/custom".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "structure-test".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "test-node".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["cmd".to_string()],
|
||||
volumes: vec![custom_vol],
|
||||
volume_mounts: vec![custom_mount],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
// Validate structure depth
|
||||
let spec = pod.spec.as_ref().unwrap();
|
||||
|
||||
// 1. Spec level fields
|
||||
assert!(spec.node_name.is_some());
|
||||
assert!(spec.volumes.is_some());
|
||||
|
||||
// 2. Container level fields
|
||||
let container = &spec.containers[0];
|
||||
assert!(container.security_context.is_some());
|
||||
assert!(container.volume_mounts.is_some());
|
||||
|
||||
// 3. Nested fields
|
||||
assert!(
|
||||
container
|
||||
.security_context
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.privileged
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(spec.volumes.as_ref().unwrap()[0].name, "custom-vol");
|
||||
assert_eq!(
|
||||
container.volume_mounts.as_ref().unwrap()[0].mount_path,
|
||||
"/custom"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_default_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
// For Default distribution, only the Pod should be in the bundle
|
||||
assert_eq!(bundle.resources.len(), 1);
|
||||
|
||||
let pod_obj = &bundle.resources[0];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle"));
|
||||
assert_eq!(pod_obj.metadata.namespace.as_deref(), Some("test-ns"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_openshift_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle-ocp".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::OpenshiftFamily,
|
||||
);
|
||||
|
||||
// For OpenShift, both ClusterRoleBinding and Pod should be in the bundle
|
||||
assert_eq!(bundle.resources.len(), 2);
|
||||
|
||||
// First resource should be the ClusterRoleBinding
|
||||
let crb_obj = &bundle.resources[0];
|
||||
assert_eq!(
|
||||
crb_obj.metadata.name.as_deref(),
|
||||
Some("test-bundle-ocp-scc-binding")
|
||||
);
|
||||
|
||||
// Verify it's targeting the privileged SCC
|
||||
if let Some(role_ref) = crb_obj.data.get("roleRef") {
|
||||
assert_eq!(
|
||||
role_ref.get("name").and_then(|v| v.as_str()),
|
||||
Some("system:openshift:scc:privileged")
|
||||
);
|
||||
}
|
||||
|
||||
// Second resource should be the Pod
|
||||
let pod_obj = &bundle.resources[1];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle-ocp"));
|
||||
assert_eq!(pod_obj.metadata.namespace.as_deref(), Some("test-ns"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_k3s_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle-k3s".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::K3sFamily,
|
||||
);
|
||||
|
||||
// For K3s, only the Pod should be in the bundle (no special SCC)
|
||||
assert_eq!(bundle.resources.len(), 1);
|
||||
|
||||
let pod_obj = &bundle.resources[0];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle-k3s"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_yaml_rendering_expected() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "pod_name".to_string(),
|
||||
namespace: "pod_namespace".to_string(),
|
||||
node_name: "node name".to_string(),
|
||||
container_name: "container name".to_string(),
|
||||
command: vec!["command".to_string(), "argument".to_string()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&serde_yaml::to_string(&pod).unwrap(),
|
||||
"apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: pod_name
|
||||
namespace: pod_namespace
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- command
|
||||
- argument
|
||||
image: hub.nationtech.io/redhat/ubi10:latest
|
||||
name: container name
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts: []
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeName: node name
|
||||
restartPolicy: Never
|
||||
volumes: []
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_yaml_rendering_openshift() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "pod_name".to_string(),
|
||||
namespace: "pod_namespace".to_string(),
|
||||
node_name: "node name".to_string(),
|
||||
container_name: "container name".to_string(),
|
||||
command: vec!["command".to_string(), "argument".to_string()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::OpenshiftFamily,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&serde_yaml::to_string(&pod).unwrap(),
|
||||
"apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
annotations:
|
||||
openshift.io/required-scc: privileged
|
||||
openshift.io/scc: privileged
|
||||
name: pod_name
|
||||
namespace: pod_namespace
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- command
|
||||
- argument
|
||||
image: hub.nationtech.io/redhat/ubi10:latest
|
||||
name: container name
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts: []
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeName: node name
|
||||
restartPolicy: Never
|
||||
volumes: []
|
||||
"
|
||||
);
|
||||
}
|
||||
}
|
||||
13
harmony-k8s/src/lib.rs
Normal file
13
harmony-k8s/src/lib.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
pub mod apply;
|
||||
pub mod bundle;
|
||||
pub mod client;
|
||||
pub mod config;
|
||||
pub mod discovery;
|
||||
pub mod helper;
|
||||
pub mod node;
|
||||
pub mod pod;
|
||||
pub mod resources;
|
||||
pub mod types;
|
||||
|
||||
pub use client::K8sClient;
|
||||
pub use types::{DrainOptions, KubernetesDistribution, NodeFile, ScopeResolver, WriteMode};
|
||||
3
harmony-k8s/src/main.rs
Normal file
3
harmony-k8s/src/main.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
}
|
||||
722
harmony-k8s/src/node.rs
Normal file
722
harmony-k8s/src/node.rs
Normal file
@@ -0,0 +1,722 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use k8s_openapi::api::core::v1::{
|
||||
ConfigMap, ConfigMapVolumeSource, Node, Pod, Volume, VolumeMount,
|
||||
};
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
|
||||
use kube::{
|
||||
Error,
|
||||
api::{Api, DeleteParams, EvictParams, ListParams, PostParams},
|
||||
core::ErrorResponse,
|
||||
error::DiscoveryError,
|
||||
};
|
||||
use log::{debug, error, info, warn};
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::client::K8sClient;
|
||||
use crate::helper::{self, PrivilegedPodConfig};
|
||||
use crate::types::{DrainOptions, NodeFile};
|
||||
|
||||
impl K8sClient {
|
||||
pub async fn cordon_node(&self, node_name: &str) -> Result<(), Error> {
|
||||
Api::<Node>::all(self.client.clone())
|
||||
.cordon(node_name)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn uncordon_node(&self, node_name: &str) -> Result<(), Error> {
|
||||
Api::<Node>::all(self.client.clone())
|
||||
.uncordon(node_name)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn wait_for_node_ready(&self, node_name: &str) -> Result<(), Error> {
|
||||
self.wait_for_node_ready_with_timeout(node_name, Duration::from_secs(600))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn wait_for_node_ready_with_timeout(
|
||||
&self,
|
||||
node_name: &str,
|
||||
timeout: Duration,
|
||||
) -> Result<(), Error> {
|
||||
let api: Api<Node> = Api::all(self.client.clone());
|
||||
let start = tokio::time::Instant::now();
|
||||
let poll = Duration::from_secs(5);
|
||||
loop {
|
||||
if start.elapsed() > timeout {
|
||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Node '{node_name}' did not become Ready within {timeout:?}"
|
||||
))));
|
||||
}
|
||||
match api.get(node_name).await {
|
||||
Ok(node) => {
|
||||
if node
|
||||
.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.conditions.as_ref())
|
||||
.map(|conds| {
|
||||
conds
|
||||
.iter()
|
||||
.any(|c| c.type_ == "Ready" && c.status == "True")
|
||||
})
|
||||
.unwrap_or(false)
|
||||
{
|
||||
debug!("Node '{node_name}' is Ready");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => debug!("Error polling node '{node_name}': {e}"),
|
||||
}
|
||||
sleep(poll).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_node_not_ready(
|
||||
&self,
|
||||
node_name: &str,
|
||||
timeout: Duration,
|
||||
) -> Result<(), Error> {
|
||||
let api: Api<Node> = Api::all(self.client.clone());
|
||||
let start = tokio::time::Instant::now();
|
||||
let poll = Duration::from_secs(5);
|
||||
loop {
|
||||
if start.elapsed() > timeout {
|
||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Node '{node_name}' did not become NotReady within {timeout:?}"
|
||||
))));
|
||||
}
|
||||
match api.get(node_name).await {
|
||||
Ok(node) => {
|
||||
let is_ready = node
|
||||
.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.conditions.as_ref())
|
||||
.map(|conds| {
|
||||
conds
|
||||
.iter()
|
||||
.any(|c| c.type_ == "Ready" && c.status == "True")
|
||||
})
|
||||
.unwrap_or(false);
|
||||
if !is_ready {
|
||||
debug!("Node '{node_name}' is NotReady");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => debug!("Error polling node '{node_name}': {e}"),
|
||||
}
|
||||
sleep(poll).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_pods_on_node(&self, node_name: &str) -> Result<Vec<Pod>, Error> {
|
||||
let api: Api<Pod> = Api::all(self.client.clone());
|
||||
Ok(api
|
||||
.list(&ListParams::default().fields(&format!("spec.nodeName={node_name}")))
|
||||
.await?
|
||||
.items)
|
||||
}
|
||||
|
||||
fn is_mirror_pod(pod: &Pod) -> bool {
|
||||
pod.metadata
|
||||
.annotations
|
||||
.as_ref()
|
||||
.map(|a| a.contains_key("kubernetes.io/config.mirror"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn is_daemonset_pod(pod: &Pod) -> bool {
|
||||
pod.metadata
|
||||
.owner_references
|
||||
.as_ref()
|
||||
.map(|refs| refs.iter().any(|r| r.kind == "DaemonSet"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn has_emptydir_volume(pod: &Pod) -> bool {
|
||||
pod.spec
|
||||
.as_ref()
|
||||
.and_then(|s| s.volumes.as_ref())
|
||||
.map(|vols| vols.iter().any(|v| v.empty_dir.is_some()))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn is_completed_pod(pod: &Pod) -> bool {
|
||||
pod.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.phase.as_deref())
|
||||
.map(|phase| phase == "Succeeded" || phase == "Failed")
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn classify_pods_for_drain(
|
||||
pods: &[Pod],
|
||||
options: &DrainOptions,
|
||||
) -> Result<(Vec<Pod>, Vec<String>), String> {
|
||||
let mut evictable = Vec::new();
|
||||
let mut skipped = Vec::new();
|
||||
let mut blocking = Vec::new();
|
||||
|
||||
for pod in pods {
|
||||
let name = pod.metadata.name.as_deref().unwrap_or("<unknown>");
|
||||
let ns = pod.metadata.namespace.as_deref().unwrap_or("<unknown>");
|
||||
let qualified = format!("{ns}/{name}");
|
||||
|
||||
if Self::is_mirror_pod(pod) {
|
||||
skipped.push(format!("{qualified} (mirror pod)"));
|
||||
continue;
|
||||
}
|
||||
if Self::is_completed_pod(pod) {
|
||||
skipped.push(format!("{qualified} (completed)"));
|
||||
continue;
|
||||
}
|
||||
if Self::is_daemonset_pod(pod) {
|
||||
if options.ignore_daemonsets {
|
||||
skipped.push(format!("{qualified} (DaemonSet-managed)"));
|
||||
} else {
|
||||
blocking.push(format!(
|
||||
"{qualified} is managed by a DaemonSet (set ignore_daemonsets to skip)"
|
||||
));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if Self::has_emptydir_volume(pod) && !options.delete_emptydir_data {
|
||||
blocking.push(format!(
|
||||
"{qualified} uses emptyDir volumes (set delete_emptydir_data to allow eviction)"
|
||||
));
|
||||
continue;
|
||||
}
|
||||
evictable.push(pod.clone());
|
||||
}
|
||||
|
||||
if !blocking.is_empty() {
|
||||
return Err(format!(
|
||||
"Cannot drain node — the following pods block eviction:\n - {}",
|
||||
blocking.join("\n - ")
|
||||
));
|
||||
}
|
||||
Ok((evictable, skipped))
|
||||
}
|
||||
|
||||
async fn evict_pod(&self, pod: &Pod) -> Result<(), Error> {
|
||||
let name = pod.metadata.name.as_deref().unwrap_or_default();
|
||||
let ns = pod.metadata.namespace.as_deref().unwrap_or_default();
|
||||
debug!("Evicting pod {ns}/{name}");
|
||||
Api::<Pod>::namespaced(self.client.clone(), ns)
|
||||
.evict(name, &EvictParams::default())
|
||||
.await
|
||||
.map(|_| ())
|
||||
}
|
||||
|
||||
/// Drains a node: cordon → classify → evict & wait.
|
||||
pub async fn drain_node(&self, node_name: &str, options: &DrainOptions) -> Result<(), Error> {
|
||||
debug!("Cordoning '{node_name}'");
|
||||
self.cordon_node(node_name).await?;
|
||||
|
||||
let pods = self.list_pods_on_node(node_name).await?;
|
||||
debug!("Found {} pod(s) on '{node_name}'", pods.len());
|
||||
|
||||
let (evictable, skipped) =
|
||||
Self::classify_pods_for_drain(&pods, options).map_err(|msg| {
|
||||
error!("{msg}");
|
||||
Error::Discovery(DiscoveryError::MissingResource(msg))
|
||||
})?;
|
||||
|
||||
for s in &skipped {
|
||||
info!("Skipping pod: {s}");
|
||||
}
|
||||
if evictable.is_empty() {
|
||||
info!("No pods to evict on '{node_name}'");
|
||||
return Ok(());
|
||||
}
|
||||
info!("Evicting {} pod(s) from '{node_name}'", evictable.len());
|
||||
|
||||
let mut start = tokio::time::Instant::now();
|
||||
let poll = Duration::from_secs(5);
|
||||
let mut pending = evictable;
|
||||
|
||||
loop {
|
||||
for pod in &pending {
|
||||
match self.evict_pod(pod).await {
|
||||
Ok(()) => {}
|
||||
Err(Error::Api(ErrorResponse { code: 404, .. })) => {}
|
||||
Err(Error::Api(ErrorResponse { code: 429, .. })) => {
|
||||
warn!(
|
||||
"PDB blocked eviction of {}/{}; will retry",
|
||||
pod.metadata.namespace.as_deref().unwrap_or(""),
|
||||
pod.metadata.name.as_deref().unwrap_or("")
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to evict {}/{}: {e}",
|
||||
pod.metadata.namespace.as_deref().unwrap_or(""),
|
||||
pod.metadata.name.as_deref().unwrap_or("")
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sleep(poll).await;
|
||||
|
||||
let mut still_present = Vec::new();
|
||||
for pod in pending {
|
||||
let ns = pod.metadata.namespace.as_deref().unwrap_or_default();
|
||||
let name = pod.metadata.name.as_deref().unwrap_or_default();
|
||||
match self.get_pod(name, Some(ns)).await? {
|
||||
Some(_) => still_present.push(pod),
|
||||
None => debug!("Pod {ns}/{name} evicted"),
|
||||
}
|
||||
}
|
||||
pending = still_present;
|
||||
|
||||
if pending.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
if start.elapsed() > options.timeout {
|
||||
match helper::prompt_drain_timeout_action(
|
||||
node_name,
|
||||
pending.len(),
|
||||
options.timeout,
|
||||
)? {
|
||||
helper::DrainTimeoutAction::Accept => break,
|
||||
helper::DrainTimeoutAction::Retry => {
|
||||
start = tokio::time::Instant::now();
|
||||
continue;
|
||||
}
|
||||
helper::DrainTimeoutAction::Abort => {
|
||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Drain aborted. {} pod(s) remaining on '{node_name}'",
|
||||
pending.len()
|
||||
))));
|
||||
}
|
||||
}
|
||||
}
|
||||
debug!("Waiting for {} pod(s) on '{node_name}'", pending.len());
|
||||
}
|
||||
|
||||
debug!("'{node_name}' drained successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Safely reboots a node: drain → reboot → wait for Ready → uncordon.
|
||||
pub async fn reboot_node(
|
||||
&self,
|
||||
node_name: &str,
|
||||
drain_options: &DrainOptions,
|
||||
timeout: Duration,
|
||||
) -> Result<(), Error> {
|
||||
info!("Starting reboot for '{node_name}'");
|
||||
let node_api: Api<Node> = Api::all(self.client.clone());
|
||||
|
||||
let boot_id_before = node_api
|
||||
.get(node_name)
|
||||
.await?
|
||||
.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.node_info.as_ref())
|
||||
.map(|ni| ni.boot_id.clone())
|
||||
.ok_or_else(|| {
|
||||
Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Node '{node_name}' has no boot_id in status"
|
||||
)))
|
||||
})?;
|
||||
|
||||
info!("Draining '{node_name}'");
|
||||
self.drain_node(node_name, drain_options).await?;
|
||||
|
||||
let start = tokio::time::Instant::now();
|
||||
|
||||
info!("Scheduling reboot for '{node_name}'");
|
||||
let reboot_cmd =
|
||||
"echo rebooting ; nohup bash -c 'sleep 5 && nsenter -t 1 -m -- systemctl reboot'";
|
||||
match self
|
||||
.run_privileged_command_on_node(node_name, reboot_cmd)
|
||||
.await
|
||||
{
|
||||
Ok(_) => debug!("Reboot command dispatched"),
|
||||
Err(e) => debug!("Reboot command error (expected if node began shutdown): {e}"),
|
||||
}
|
||||
|
||||
info!("Waiting for '{node_name}' to begin shutdown");
|
||||
self.wait_for_node_not_ready(node_name, timeout.saturating_sub(start.elapsed()))
|
||||
.await?;
|
||||
|
||||
if start.elapsed() > timeout {
|
||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Timeout during reboot of '{node_name}' (shutdown phase)"
|
||||
))));
|
||||
}
|
||||
|
||||
info!("Waiting for '{node_name}' to come back online");
|
||||
self.wait_for_node_ready_with_timeout(node_name, timeout.saturating_sub(start.elapsed()))
|
||||
.await?;
|
||||
|
||||
if start.elapsed() > timeout {
|
||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Timeout during reboot of '{node_name}' (ready phase)"
|
||||
))));
|
||||
}
|
||||
|
||||
let boot_id_after = node_api
|
||||
.get(node_name)
|
||||
.await?
|
||||
.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.node_info.as_ref())
|
||||
.map(|ni| ni.boot_id.clone())
|
||||
.ok_or_else(|| {
|
||||
Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Node '{node_name}' has no boot_id after reboot"
|
||||
)))
|
||||
})?;
|
||||
|
||||
if boot_id_before == boot_id_after {
|
||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Node '{node_name}' did not actually reboot (boot_id unchanged: {boot_id_before})"
|
||||
))));
|
||||
}
|
||||
|
||||
info!("'{node_name}' rebooted ({boot_id_before} → {boot_id_after})");
|
||||
self.uncordon_node(node_name).await?;
|
||||
info!("'{node_name}' reboot complete ({:?})", start.elapsed());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write a set of files to a node's filesystem via a privileged ephemeral pod.
|
||||
pub async fn write_files_to_node(
|
||||
&self,
|
||||
node_name: &str,
|
||||
files: &[NodeFile],
|
||||
) -> Result<String, Error> {
|
||||
let ns = self.client.default_namespace();
|
||||
let suffix = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis();
|
||||
let name = format!("harmony-k8s-writer-{suffix}");
|
||||
|
||||
debug!("Writing {} file(s) to '{node_name}'", files.len());
|
||||
|
||||
let mut data = BTreeMap::new();
|
||||
let mut script = String::from("set -e\n");
|
||||
for (i, file) in files.iter().enumerate() {
|
||||
let key = format!("f{i}");
|
||||
data.insert(key.clone(), file.content.clone());
|
||||
script.push_str(&format!("mkdir -p \"$(dirname \"/host{}\")\"\n", file.path));
|
||||
script.push_str(&format!("cp \"/payload/{key}\" \"/host{}\"\n", file.path));
|
||||
script.push_str(&format!("chmod {:o} \"/host{}\"\n", file.mode, file.path));
|
||||
}
|
||||
|
||||
let cm = ConfigMap {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.clone()),
|
||||
namespace: Some(ns.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
data: Some(data),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let cm_api: Api<ConfigMap> = Api::namespaced(self.client.clone(), ns);
|
||||
cm_api.create(&PostParams::default(), &cm).await?;
|
||||
debug!("Created ConfigMap '{name}'");
|
||||
|
||||
let (host_vol, host_mount) = helper::host_root_volume();
|
||||
let payload_vol = Volume {
|
||||
name: "payload".to_string(),
|
||||
config_map: Some(ConfigMapVolumeSource {
|
||||
name: name.clone(),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
let payload_mount = VolumeMount {
|
||||
name: "payload".to_string(),
|
||||
mount_path: "/payload".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let bundle = helper::build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: name.clone(),
|
||||
namespace: ns.to_string(),
|
||||
node_name: node_name.to_string(),
|
||||
container_name: "writer".to_string(),
|
||||
command: vec!["/bin/bash".to_string(), "-c".to_string(), script],
|
||||
volumes: vec![payload_vol, host_vol],
|
||||
volume_mounts: vec![payload_mount, host_mount],
|
||||
host_pid: false,
|
||||
host_network: false,
|
||||
},
|
||||
&self.get_k8s_distribution().await?,
|
||||
);
|
||||
|
||||
bundle.apply(self).await?;
|
||||
debug!("Created privileged pod bundle '{name}'");
|
||||
|
||||
let result = self.wait_for_pod_completion(&name, ns).await;
|
||||
|
||||
debug!("Cleaning up '{name}'");
|
||||
let _ = bundle.delete(self).await;
|
||||
let _ = cm_api.delete(&name, &DeleteParams::default()).await;
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Run a privileged command on a node via an ephemeral pod.
|
||||
pub async fn run_privileged_command_on_node(
|
||||
&self,
|
||||
node_name: &str,
|
||||
command: &str,
|
||||
) -> Result<String, Error> {
|
||||
let namespace = self.client.default_namespace();
|
||||
let suffix = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis();
|
||||
let name = format!("harmony-k8s-cmd-{suffix}");
|
||||
|
||||
debug!("Running privileged command on '{node_name}': {command}");
|
||||
|
||||
let (host_vol, host_mount) = helper::host_root_volume();
|
||||
let bundle = helper::build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: name.clone(),
|
||||
namespace: namespace.to_string(),
|
||||
node_name: node_name.to_string(),
|
||||
container_name: "runner".to_string(),
|
||||
command: vec![
|
||||
"/bin/bash".to_string(),
|
||||
"-c".to_string(),
|
||||
command.to_string(),
|
||||
],
|
||||
volumes: vec![host_vol],
|
||||
volume_mounts: vec![host_mount],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
},
|
||||
&self.get_k8s_distribution().await?,
|
||||
);
|
||||
|
||||
bundle.apply(self).await?;
|
||||
debug!("Privileged pod '{name}' created");
|
||||
|
||||
let result = self.wait_for_pod_completion(&name, namespace).await;
|
||||
|
||||
debug!("Cleaning up '{name}'");
|
||||
let _ = bundle.delete(self).await;
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
// ── Tests ────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use k8s_openapi::api::core::v1::{EmptyDirVolumeSource, PodSpec, PodStatus, Volume};
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::{ObjectMeta, OwnerReference};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn base_pod(name: &str, ns: &str) -> Pod {
|
||||
Pod {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.to_string()),
|
||||
namespace: Some(ns.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: Some(PodSpec::default()),
|
||||
status: Some(PodStatus {
|
||||
phase: Some("Running".to_string()),
|
||||
..Default::default()
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn mirror_pod(name: &str, ns: &str) -> Pod {
|
||||
let mut pod = base_pod(name, ns);
|
||||
pod.metadata.annotations = Some(std::collections::BTreeMap::from([(
|
||||
"kubernetes.io/config.mirror".to_string(),
|
||||
"abc123".to_string(),
|
||||
)]));
|
||||
pod
|
||||
}
|
||||
|
||||
fn daemonset_pod(name: &str, ns: &str) -> Pod {
|
||||
let mut pod = base_pod(name, ns);
|
||||
pod.metadata.owner_references = Some(vec![OwnerReference {
|
||||
api_version: "apps/v1".to_string(),
|
||||
kind: "DaemonSet".to_string(),
|
||||
name: "some-ds".to_string(),
|
||||
uid: "uid-ds".to_string(),
|
||||
..Default::default()
|
||||
}]);
|
||||
pod
|
||||
}
|
||||
|
||||
fn emptydir_pod(name: &str, ns: &str) -> Pod {
|
||||
let mut pod = base_pod(name, ns);
|
||||
pod.spec = Some(PodSpec {
|
||||
volumes: Some(vec![Volume {
|
||||
name: "scratch".to_string(),
|
||||
empty_dir: Some(EmptyDirVolumeSource::default()),
|
||||
..Default::default()
|
||||
}]),
|
||||
..Default::default()
|
||||
});
|
||||
pod
|
||||
}
|
||||
|
||||
fn completed_pod(name: &str, ns: &str, phase: &str) -> Pod {
|
||||
let mut pod = base_pod(name, ns);
|
||||
pod.status = Some(PodStatus {
|
||||
phase: Some(phase.to_string()),
|
||||
..Default::default()
|
||||
});
|
||||
pod
|
||||
}
|
||||
|
||||
fn default_opts() -> DrainOptions {
|
||||
DrainOptions::default()
|
||||
}
|
||||
|
||||
// All test bodies are identical to the original — only the module path changed.
|
||||
|
||||
#[test]
|
||||
fn empty_pod_list_returns_empty_vecs() {
|
||||
let (e, s) = K8sClient::classify_pods_for_drain(&[], &default_opts()).unwrap();
|
||||
assert!(e.is_empty());
|
||||
assert!(s.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normal_pod_is_evictable() {
|
||||
let pods = vec![base_pod("web", "default")];
|
||||
let (e, s) = K8sClient::classify_pods_for_drain(&pods, &default_opts()).unwrap();
|
||||
assert_eq!(e.len(), 1);
|
||||
assert!(s.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mirror_pod_is_skipped() {
|
||||
let pods = vec![mirror_pod("kube-apiserver", "kube-system")];
|
||||
let (e, s) = K8sClient::classify_pods_for_drain(&pods, &default_opts()).unwrap();
|
||||
assert!(e.is_empty());
|
||||
assert!(s[0].contains("mirror pod"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn completed_pods_are_skipped() {
|
||||
for phase in ["Succeeded", "Failed"] {
|
||||
let pods = vec![completed_pod("job", "batch", phase)];
|
||||
let (e, s) = K8sClient::classify_pods_for_drain(&pods, &default_opts()).unwrap();
|
||||
assert!(e.is_empty());
|
||||
assert!(s[0].contains("completed"));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn daemonset_skipped_when_ignored() {
|
||||
let pods = vec![daemonset_pod("fluentd", "logging")];
|
||||
let opts = DrainOptions {
|
||||
ignore_daemonsets: true,
|
||||
..default_opts()
|
||||
};
|
||||
let (e, s) = K8sClient::classify_pods_for_drain(&pods, &opts).unwrap();
|
||||
assert!(e.is_empty());
|
||||
assert!(s[0].contains("DaemonSet-managed"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn daemonset_blocks_when_not_ignored() {
|
||||
let pods = vec![daemonset_pod("fluentd", "logging")];
|
||||
let opts = DrainOptions {
|
||||
ignore_daemonsets: false,
|
||||
..default_opts()
|
||||
};
|
||||
let err = K8sClient::classify_pods_for_drain(&pods, &opts).unwrap_err();
|
||||
assert!(err.contains("DaemonSet") && err.contains("logging/fluentd"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emptydir_blocks_without_flag() {
|
||||
let pods = vec![emptydir_pod("cache", "default")];
|
||||
let opts = DrainOptions {
|
||||
delete_emptydir_data: false,
|
||||
..default_opts()
|
||||
};
|
||||
let err = K8sClient::classify_pods_for_drain(&pods, &opts).unwrap_err();
|
||||
assert!(err.contains("emptyDir") && err.contains("default/cache"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emptydir_evictable_with_flag() {
|
||||
let pods = vec![emptydir_pod("cache", "default")];
|
||||
let opts = DrainOptions {
|
||||
delete_emptydir_data: true,
|
||||
..default_opts()
|
||||
};
|
||||
let (e, s) = K8sClient::classify_pods_for_drain(&pods, &opts).unwrap();
|
||||
assert_eq!(e.len(), 1);
|
||||
assert!(s.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_blocking_all_reported() {
|
||||
let pods = vec![daemonset_pod("ds", "ns1"), emptydir_pod("ed", "ns2")];
|
||||
let opts = DrainOptions {
|
||||
ignore_daemonsets: false,
|
||||
delete_emptydir_data: false,
|
||||
..default_opts()
|
||||
};
|
||||
let err = K8sClient::classify_pods_for_drain(&pods, &opts).unwrap_err();
|
||||
assert!(err.contains("ns1/ds") && err.contains("ns2/ed"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mixed_pods_classified_correctly() {
|
||||
let pods = vec![
|
||||
base_pod("web", "default"),
|
||||
mirror_pod("kube-apiserver", "kube-system"),
|
||||
daemonset_pod("fluentd", "logging"),
|
||||
completed_pod("job", "batch", "Succeeded"),
|
||||
base_pod("api", "default"),
|
||||
];
|
||||
let (e, s) = K8sClient::classify_pods_for_drain(&pods, &default_opts()).unwrap();
|
||||
let names: Vec<&str> = e
|
||||
.iter()
|
||||
.map(|p| p.metadata.name.as_deref().unwrap())
|
||||
.collect();
|
||||
assert_eq!(names, vec!["web", "api"]);
|
||||
assert_eq!(s.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mirror_checked_before_completed() {
|
||||
let mut pod = mirror_pod("static-etcd", "kube-system");
|
||||
pod.status = Some(PodStatus {
|
||||
phase: Some("Succeeded".to_string()),
|
||||
..Default::default()
|
||||
});
|
||||
let (_, s) = K8sClient::classify_pods_for_drain(&[pod], &default_opts()).unwrap();
|
||||
assert!(s[0].contains("mirror pod"), "got: {}", s[0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn completed_checked_before_daemonset() {
|
||||
let mut pod = daemonset_pod("collector", "monitoring");
|
||||
pod.status = Some(PodStatus {
|
||||
phase: Some("Failed".to_string()),
|
||||
..Default::default()
|
||||
});
|
||||
let (_, s) = K8sClient::classify_pods_for_drain(&[pod], &default_opts()).unwrap();
|
||||
assert!(s[0].contains("completed"), "got: {}", s[0]);
|
||||
}
|
||||
}
|
||||
193
harmony-k8s/src/pod.rs
Normal file
193
harmony-k8s/src/pod.rs
Normal file
@@ -0,0 +1,193 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use k8s_openapi::api::core::v1::Pod;
|
||||
use kube::{
|
||||
Error,
|
||||
api::{Api, AttachParams, ListParams},
|
||||
error::DiscoveryError,
|
||||
runtime::reflector::Lookup,
|
||||
};
|
||||
use log::debug;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::client::K8sClient;
|
||||
|
||||
impl K8sClient {
|
||||
pub async fn get_pod(&self, name: &str, namespace: Option<&str>) -> Result<Option<Pod>, Error> {
|
||||
let api: Api<Pod> = match namespace {
|
||||
Some(ns) => Api::namespaced(self.client.clone(), ns),
|
||||
None => Api::default_namespaced(self.client.clone()),
|
||||
};
|
||||
api.get_opt(name).await
|
||||
}
|
||||
|
||||
pub async fn wait_for_pod_ready(
|
||||
&self,
|
||||
pod_name: &str,
|
||||
namespace: Option<&str>,
|
||||
) -> Result<(), Error> {
|
||||
let mut elapsed = 0u64;
|
||||
let interval = 5u64;
|
||||
let timeout_secs = 120u64;
|
||||
loop {
|
||||
if let Some(p) = self.get_pod(pod_name, namespace).await? {
|
||||
if let Some(phase) = p.status.and_then(|s| s.phase) {
|
||||
if phase.to_lowercase() == "running" {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
if elapsed >= timeout_secs {
|
||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Pod '{}' in '{}' did not become ready within {timeout_secs}s",
|
||||
pod_name,
|
||||
namespace.unwrap_or("<default>"),
|
||||
))));
|
||||
}
|
||||
sleep(Duration::from_secs(interval)).await;
|
||||
elapsed += interval;
|
||||
}
|
||||
}
|
||||
|
||||
/// Polls a pod until it reaches `Succeeded` or `Failed`, then returns its
|
||||
/// logs. Used internally by node operations.
|
||||
pub(crate) async fn wait_for_pod_completion(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: &str,
|
||||
) -> Result<String, Error> {
|
||||
let api: Api<Pod> = Api::namespaced(self.client.clone(), namespace);
|
||||
let poll_interval = Duration::from_secs(2);
|
||||
for _ in 0..60 {
|
||||
sleep(poll_interval).await;
|
||||
let p = api.get(name).await?;
|
||||
match p.status.and_then(|s| s.phase).as_deref() {
|
||||
Some("Succeeded") => {
|
||||
let logs = api
|
||||
.logs(name, &Default::default())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
debug!("Pod {namespace}/{name} succeeded. Logs: {logs}");
|
||||
return Ok(logs);
|
||||
}
|
||||
Some("Failed") => {
|
||||
let logs = api
|
||||
.logs(name, &Default::default())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
debug!("Pod {namespace}/{name} failed. Logs: {logs}");
|
||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Pod '{name}' failed.\n{logs}"
|
||||
))));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Timed out waiting for pod '{name}'"
|
||||
))))
|
||||
}
|
||||
|
||||
/// Execute a command in the first pod matching `{label}={name}`.
|
||||
pub async fn exec_app_capture_output(
|
||||
&self,
|
||||
name: String,
|
||||
label: String,
|
||||
namespace: Option<&str>,
|
||||
command: Vec<&str>,
|
||||
) -> Result<String, String> {
|
||||
let api: Api<Pod> = match namespace {
|
||||
Some(ns) => Api::namespaced(self.client.clone(), ns),
|
||||
None => Api::default_namespaced(self.client.clone()),
|
||||
};
|
||||
let pod_list = api
|
||||
.list(&ListParams::default().labels(&format!("{label}={name}")))
|
||||
.await
|
||||
.expect("Failed to list pods");
|
||||
|
||||
let pod_name = pod_list
|
||||
.items
|
||||
.first()
|
||||
.expect("No matching pod")
|
||||
.name()
|
||||
.expect("Pod has no name")
|
||||
.into_owned();
|
||||
|
||||
match api
|
||||
.exec(
|
||||
&pod_name,
|
||||
command,
|
||||
&AttachParams::default().stdout(true).stderr(true),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(e) => Err(e.to_string()),
|
||||
Ok(mut process) => {
|
||||
let status = process
|
||||
.take_status()
|
||||
.expect("No status handle")
|
||||
.await
|
||||
.expect("Status channel closed");
|
||||
|
||||
if let Some(s) = status.status {
|
||||
let mut buf = String::new();
|
||||
if let Some(mut stdout) = process.stdout() {
|
||||
stdout
|
||||
.read_to_string(&mut buf)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to read stdout: {e}"))?;
|
||||
}
|
||||
debug!("exec status: {} - {:?}", s, status.details);
|
||||
if s == "Success" { Ok(buf) } else { Err(s) }
|
||||
} else {
|
||||
Err("No inner status from pod exec".to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a command in the first pod matching
|
||||
/// `app.kubernetes.io/name={name}`.
|
||||
pub async fn exec_app(
|
||||
&self,
|
||||
name: String,
|
||||
namespace: Option<&str>,
|
||||
command: Vec<&str>,
|
||||
) -> Result<(), String> {
|
||||
let api: Api<Pod> = match namespace {
|
||||
Some(ns) => Api::namespaced(self.client.clone(), ns),
|
||||
None => Api::default_namespaced(self.client.clone()),
|
||||
};
|
||||
let pod_list = api
|
||||
.list(&ListParams::default().labels(&format!("app.kubernetes.io/name={name}")))
|
||||
.await
|
||||
.expect("Failed to list pods");
|
||||
|
||||
let pod_name = pod_list
|
||||
.items
|
||||
.first()
|
||||
.expect("No matching pod")
|
||||
.name()
|
||||
.expect("Pod has no name")
|
||||
.into_owned();
|
||||
|
||||
match api.exec(&pod_name, command, &AttachParams::default()).await {
|
||||
Err(e) => Err(e.to_string()),
|
||||
Ok(mut process) => {
|
||||
let status = process
|
||||
.take_status()
|
||||
.expect("No status handle")
|
||||
.await
|
||||
.expect("Status channel closed");
|
||||
|
||||
if let Some(s) = status.status {
|
||||
debug!("exec status: {} - {:?}", s, status.details);
|
||||
if s == "Success" { Ok(()) } else { Err(s) }
|
||||
} else {
|
||||
Err("No inner status from pod exec".to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
316
harmony-k8s/src/resources.rs
Normal file
316
harmony-k8s/src/resources.rs
Normal file
@@ -0,0 +1,316 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use k8s_openapi::api::{
|
||||
apps::v1::Deployment,
|
||||
core::v1::{Node, ServiceAccount},
|
||||
};
|
||||
use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition;
|
||||
use kube::api::ApiResource;
|
||||
use kube::{
|
||||
Error, Resource,
|
||||
api::{Api, DynamicObject, GroupVersionKind, ListParams, ObjectList},
|
||||
runtime::conditions,
|
||||
runtime::wait::await_condition,
|
||||
};
|
||||
use log::debug;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::client::K8sClient;
|
||||
use crate::types::ScopeResolver;
|
||||
|
||||
impl K8sClient {
|
||||
pub async fn has_healthy_deployment_with_label(
|
||||
&self,
|
||||
namespace: &str,
|
||||
label_selector: &str,
|
||||
) -> Result<bool, Error> {
|
||||
let api: Api<Deployment> = Api::namespaced(self.client.clone(), namespace);
|
||||
let list = api
|
||||
.list(&ListParams::default().labels(label_selector))
|
||||
.await?;
|
||||
for d in list.items {
|
||||
let available = d
|
||||
.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.available_replicas)
|
||||
.unwrap_or(0);
|
||||
if available > 0 {
|
||||
return Ok(true);
|
||||
}
|
||||
if let Some(conds) = d.status.as_ref().and_then(|s| s.conditions.as_ref()) {
|
||||
if conds
|
||||
.iter()
|
||||
.any(|c| c.type_ == "Available" && c.status == "True")
|
||||
{
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
pub async fn list_namespaces_with_healthy_deployments(
|
||||
&self,
|
||||
label_selector: &str,
|
||||
) -> Result<Vec<String>, Error> {
|
||||
let api: Api<Deployment> = Api::all(self.client.clone());
|
||||
let list = api
|
||||
.list(&ListParams::default().labels(label_selector))
|
||||
.await?;
|
||||
|
||||
let mut healthy_ns: HashMap<String, bool> = HashMap::new();
|
||||
for d in list.items {
|
||||
let ns = match d.metadata.namespace.clone() {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
let available = d
|
||||
.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.available_replicas)
|
||||
.unwrap_or(0);
|
||||
let is_healthy = if available > 0 {
|
||||
true
|
||||
} else {
|
||||
d.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.conditions.as_ref())
|
||||
.map(|c| {
|
||||
c.iter()
|
||||
.any(|c| c.type_ == "Available" && c.status == "True")
|
||||
})
|
||||
.unwrap_or(false)
|
||||
};
|
||||
if is_healthy {
|
||||
healthy_ns.insert(ns, true);
|
||||
}
|
||||
}
|
||||
Ok(healthy_ns.into_keys().collect())
|
||||
}
|
||||
|
||||
pub async fn get_controller_service_account_name(
|
||||
&self,
|
||||
ns: &str,
|
||||
) -> Result<Option<String>, Error> {
|
||||
let api: Api<Deployment> = Api::namespaced(self.client.clone(), ns);
|
||||
let list = api
|
||||
.list(&ListParams::default().labels("app.kubernetes.io/component=controller"))
|
||||
.await?;
|
||||
if let Some(dep) = list.items.first() {
|
||||
if let Some(sa) = dep
|
||||
.spec
|
||||
.as_ref()
|
||||
.and_then(|s| s.template.spec.as_ref())
|
||||
.and_then(|s| s.service_account_name.clone())
|
||||
{
|
||||
return Ok(Some(sa));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub async fn list_clusterrolebindings_json(&self) -> Result<Vec<Value>, Error> {
|
||||
let gvk = GroupVersionKind::gvk("rbac.authorization.k8s.io", "v1", "ClusterRoleBinding");
|
||||
let ar = ApiResource::from_gvk(&gvk);
|
||||
let api: Api<DynamicObject> = Api::all_with(self.client.clone(), &ar);
|
||||
let list = api.list(&ListParams::default()).await?;
|
||||
Ok(list
|
||||
.items
|
||||
.into_iter()
|
||||
.map(|o| serde_json::to_value(&o).unwrap_or(Value::Null))
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub async fn is_service_account_cluster_wide(&self, sa: &str, ns: &str) -> Result<bool, Error> {
|
||||
let sa_user = format!("system:serviceaccount:{ns}:{sa}");
|
||||
for crb in self.list_clusterrolebindings_json().await? {
|
||||
if let Some(subjects) = crb.get("subjects").and_then(|s| s.as_array()) {
|
||||
for subj in subjects {
|
||||
let kind = subj.get("kind").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let name = subj.get("name").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let subj_ns = subj.get("namespace").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if (kind == "ServiceAccount" && name == sa && subj_ns == ns)
|
||||
|| (kind == "User" && name == sa_user)
|
||||
{
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
pub async fn has_crd(&self, name: &str) -> Result<bool, Error> {
|
||||
let api: Api<CustomResourceDefinition> = Api::all(self.client.clone());
|
||||
let crds = api
|
||||
.list(&ListParams::default().fields(&format!("metadata.name={name}")))
|
||||
.await?;
|
||||
Ok(!crds.items.is_empty())
|
||||
}
|
||||
|
||||
pub async fn service_account_api(&self, namespace: &str) -> Api<ServiceAccount> {
|
||||
Api::namespaced(self.client.clone(), namespace)
|
||||
}
|
||||
|
||||
pub async fn get_resource_json_value(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
gvk: &GroupVersionKind,
|
||||
) -> Result<DynamicObject, Error> {
|
||||
let ar = ApiResource::from_gvk(gvk);
|
||||
let api: Api<DynamicObject> = match namespace {
|
||||
Some(ns) => Api::namespaced_with(self.client.clone(), ns, &ar),
|
||||
None => Api::default_namespaced_with(self.client.clone(), &ar),
|
||||
};
|
||||
api.get(name).await
|
||||
}
|
||||
|
||||
pub async fn get_secret_json_value(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
) -> Result<DynamicObject, Error> {
|
||||
self.get_resource_json_value(
|
||||
name,
|
||||
namespace,
|
||||
&GroupVersionKind {
|
||||
group: String::new(),
|
||||
version: "v1".to_string(),
|
||||
kind: "Secret".to_string(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn get_deployment(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
) -> Result<Option<Deployment>, Error> {
|
||||
let api: Api<Deployment> = match namespace {
|
||||
Some(ns) => {
|
||||
debug!("Getting namespaced deployment '{name}' in '{ns}'");
|
||||
Api::namespaced(self.client.clone(), ns)
|
||||
}
|
||||
None => {
|
||||
debug!("Getting deployment '{name}' in default namespace");
|
||||
Api::default_namespaced(self.client.clone())
|
||||
}
|
||||
};
|
||||
api.get_opt(name).await
|
||||
}
|
||||
|
||||
pub async fn scale_deployment(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
replicas: u32,
|
||||
) -> Result<(), Error> {
|
||||
let api: Api<Deployment> = match namespace {
|
||||
Some(ns) => Api::namespaced(self.client.clone(), ns),
|
||||
None => Api::default_namespaced(self.client.clone()),
|
||||
};
|
||||
use kube::api::{Patch, PatchParams};
|
||||
use serde_json::json;
|
||||
let patch = json!({ "spec": { "replicas": replicas } });
|
||||
api.patch_scale(name, &PatchParams::default(), &Patch::Merge(&patch))
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn delete_deployment(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
) -> Result<(), Error> {
|
||||
let api: Api<Deployment> = match namespace {
|
||||
Some(ns) => Api::namespaced(self.client.clone(), ns),
|
||||
None => Api::default_namespaced(self.client.clone()),
|
||||
};
|
||||
api.delete(name, &kube::api::DeleteParams::default())
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn wait_until_deployment_ready(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<(), String> {
|
||||
let api: Api<Deployment> = match namespace {
|
||||
Some(ns) => Api::namespaced(self.client.clone(), ns),
|
||||
None => Api::default_namespaced(self.client.clone()),
|
||||
};
|
||||
let timeout = timeout.unwrap_or(Duration::from_secs(120));
|
||||
let establish = await_condition(api, name, conditions::is_deployment_completed());
|
||||
tokio::time::timeout(timeout, establish)
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(|_| "Timed out waiting for deployment".to_string())
|
||||
}
|
||||
|
||||
/// Gets a single named resource, using the correct API scope for `K`.
|
||||
pub async fn get_resource<K>(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
) -> Result<Option<K>, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned,
|
||||
<K as Resource>::Scope: ScopeResolver<K>,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
let api: Api<K> =
|
||||
<<K as Resource>::Scope as ScopeResolver<K>>::get_api(&self.client, namespace);
|
||||
api.get_opt(name).await
|
||||
}
|
||||
|
||||
pub async fn list_resources<K>(
|
||||
&self,
|
||||
namespace: Option<&str>,
|
||||
list_params: Option<ListParams>,
|
||||
) -> Result<ObjectList<K>, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned,
|
||||
<K as Resource>::Scope: ScopeResolver<K>,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
let api: Api<K> =
|
||||
<<K as Resource>::Scope as ScopeResolver<K>>::get_api(&self.client, namespace);
|
||||
api.list(&list_params.unwrap_or_default()).await
|
||||
}
|
||||
|
||||
pub async fn list_all_resources_with_labels<K>(&self, labels: &str) -> Result<Vec<K>, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
Api::<K>::all(self.client.clone())
|
||||
.list(&ListParams::default().labels(labels))
|
||||
.await
|
||||
.map(|l| l.items)
|
||||
}
|
||||
|
||||
pub async fn get_all_resource_in_all_namespace<K>(&self) -> Result<Vec<K>, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned,
|
||||
<K as Resource>::Scope: ScopeResolver<K>,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
Api::<K>::all(self.client.clone())
|
||||
.list(&Default::default())
|
||||
.await
|
||||
.map(|l| l.items)
|
||||
}
|
||||
|
||||
pub async fn get_nodes(
|
||||
&self,
|
||||
list_params: Option<ListParams>,
|
||||
) -> Result<ObjectList<Node>, Error> {
|
||||
self.list_resources(None, list_params).await
|
||||
}
|
||||
}
|
||||
100
harmony-k8s/src/types.rs
Normal file
100
harmony-k8s/src/types.rs
Normal file
@@ -0,0 +1,100 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use k8s_openapi::{ClusterResourceScope, NamespaceResourceScope};
|
||||
use kube::{Api, Client, Resource};
|
||||
use serde::Serialize;
|
||||
|
||||
/// Which Kubernetes distribution is running. Detected once at runtime via
|
||||
/// [`crate::discovery::K8sClient::get_k8s_distribution`].
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
pub enum KubernetesDistribution {
|
||||
Default,
|
||||
OpenshiftFamily,
|
||||
K3sFamily,
|
||||
}
|
||||
|
||||
/// A file to be written to a node's filesystem.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NodeFile {
|
||||
/// Absolute path on the host where the file should be written.
|
||||
pub path: String,
|
||||
/// Content of the file.
|
||||
pub content: String,
|
||||
/// UNIX permissions (e.g. `0o600`).
|
||||
pub mode: u32,
|
||||
}
|
||||
|
||||
/// Options controlling the behaviour of a [`crate::K8sClient::drain_node`] operation.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DrainOptions {
|
||||
/// Evict pods that use `emptyDir` volumes (ephemeral data is lost).
|
||||
/// Equivalent to `kubectl drain --delete-emptydir-data`.
|
||||
pub delete_emptydir_data: bool,
|
||||
/// Silently skip DaemonSet-managed pods instead of blocking the drain.
|
||||
/// Equivalent to `kubectl drain --ignore-daemonsets`.
|
||||
pub ignore_daemonsets: bool,
|
||||
/// Maximum wall-clock time to wait for all evictions to complete.
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
impl Default for DrainOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
delete_emptydir_data: false,
|
||||
ignore_daemonsets: true,
|
||||
timeout: Duration::from_secs(1),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DrainOptions {
|
||||
pub fn default_ignore_daemonset_delete_emptydir_data() -> Self {
|
||||
Self {
|
||||
delete_emptydir_data: true,
|
||||
ignore_daemonsets: true,
|
||||
..Self::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Controls how [`crate::K8sClient::apply_with_strategy`] behaves when the
|
||||
/// resource already exists (or does not).
|
||||
pub enum WriteMode {
|
||||
/// Server-side apply; create if absent, update if present (default).
|
||||
CreateOrUpdate,
|
||||
/// POST only; return an error if the resource already exists.
|
||||
Create,
|
||||
/// Server-side apply only; return an error if the resource does not exist.
|
||||
Update,
|
||||
}
|
||||
|
||||
// ── Scope resolution trait ───────────────────────────────────────────────────
|
||||
|
||||
/// Resolves the correct [`kube::Api`] for a resource type based on its scope
|
||||
/// (cluster-wide vs. namespace-scoped).
|
||||
pub trait ScopeResolver<K: Resource> {
|
||||
fn get_api(client: &Client, ns: Option<&str>) -> Api<K>;
|
||||
}
|
||||
|
||||
impl<K> ScopeResolver<K> for ClusterResourceScope
|
||||
where
|
||||
K: Resource<Scope = ClusterResourceScope>,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
fn get_api(client: &Client, _ns: Option<&str>) -> Api<K> {
|
||||
Api::all(client.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl<K> ScopeResolver<K> for NamespaceResourceScope
|
||||
where
|
||||
K: Resource<Scope = NamespaceResourceScope>,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
fn get_api(client: &Client, ns: Option<&str>) -> Api<K> {
|
||||
match ns {
|
||||
Some(ns) => Api::namespaced(client.clone(), ns),
|
||||
None => Api::default_namespaced(client.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,8 @@ semver = "1.0.23"
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-retry.workspace = true
|
||||
tokio-util.workspace = true
|
||||
derive-new.workspace = true
|
||||
log.workspace = true
|
||||
env_logger.workspace = true
|
||||
@@ -31,6 +33,7 @@ opnsense-config-xml = { path = "../opnsense-config-xml" }
|
||||
harmony_macros = { path = "../harmony_macros" }
|
||||
harmony_types = { path = "../harmony_types" }
|
||||
harmony_execution = { path = "../harmony_execution" }
|
||||
harmony-k8s = { path = "../harmony-k8s" }
|
||||
uuid.workspace = true
|
||||
url.workspace = true
|
||||
kube = { workspace = true, features = ["derive"] }
|
||||
@@ -60,7 +63,6 @@ temp-dir = "0.1.14"
|
||||
dyn-clone = "1.0.19"
|
||||
similar.workspace = true
|
||||
futures-util = "0.3.31"
|
||||
tokio-util = "0.7.15"
|
||||
strum = { version = "0.27.1", features = ["derive"] }
|
||||
tempfile.workspace = true
|
||||
serde_with = "3.14.0"
|
||||
@@ -80,7 +82,7 @@ sqlx.workspace = true
|
||||
inquire.workspace = true
|
||||
brocade = { path = "../brocade" }
|
||||
option-ext = "0.2.0"
|
||||
tokio-retry = "0.3.0"
|
||||
rand.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions.workspace = true
|
||||
|
||||
@@ -108,11 +108,18 @@ impl PhysicalHost {
|
||||
};
|
||||
|
||||
let storage_summary = if drive_count > 1 {
|
||||
let drive_sizes = self
|
||||
.storage
|
||||
.iter()
|
||||
.map(|d| format_storage(d.size_bytes))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
format!(
|
||||
"{} Storage ({}x {})",
|
||||
"{} Storage ({} Disks [{}])",
|
||||
format_storage(total_storage_bytes),
|
||||
drive_count,
|
||||
first_drive_model
|
||||
drive_sizes
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
|
||||
@@ -4,8 +4,6 @@ use std::error::Error;
|
||||
use async_trait::async_trait;
|
||||
use derive_new::new;
|
||||
|
||||
use crate::inventory::HostRole;
|
||||
|
||||
use super::{
|
||||
data::Version, executors::ExecutorError, inventory::Inventory, topology::PreparationError,
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use async_trait::async_trait;
|
||||
use brocade::PortOperatingMode;
|
||||
use harmony_k8s::K8sClient;
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{
|
||||
id::Id,
|
||||
@@ -9,17 +9,20 @@ use harmony_types::{
|
||||
use log::debug;
|
||||
use log::info;
|
||||
|
||||
use crate::topology::{HelmCommand, PxeOptions};
|
||||
use crate::{data::FileContent, executors::ExecutorError, topology::node_exporter::NodeExporter};
|
||||
use crate::{infra::network_manager::OpenShiftNmStateNetworkManager, topology::PortConfig};
|
||||
use crate::{modules::inventory::HarmonyDiscoveryStrategy, topology::PxeOptions};
|
||||
|
||||
use super::{
|
||||
DHCPStaticEntry, DhcpServer, DnsRecord, DnsRecordType, DnsServer, Firewall, HostNetworkConfig,
|
||||
HttpServer, IpAddress, K8sclient, LoadBalancer, LoadBalancerService, LogicalHost, NetworkError,
|
||||
NetworkManager, PreparationError, PreparationOutcome, Router, Switch, SwitchClient,
|
||||
SwitchError, TftpServer, Topology, k8s::K8sClient,
|
||||
SwitchError, TftpServer, Topology,
|
||||
};
|
||||
use std::{
|
||||
process::Command,
|
||||
sync::{Arc, OnceLock},
|
||||
};
|
||||
use std::sync::{Arc, OnceLock};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HAClusterTopology {
|
||||
@@ -53,6 +56,30 @@ impl Topology for HAClusterTopology {
|
||||
}
|
||||
}
|
||||
|
||||
impl HelmCommand for HAClusterTopology {
|
||||
fn get_helm_command(&self) -> Command {
|
||||
let mut cmd = Command::new("helm");
|
||||
if let Some(k) = &self.kubeconfig {
|
||||
cmd.args(["--kubeconfig", k]);
|
||||
}
|
||||
|
||||
// FIXME we should support context anywhere there is a k8sclient
|
||||
// This likely belongs in the k8sclient itself and should be extracted to a separate
|
||||
// crate
|
||||
//
|
||||
// I feel like helm could very well be a feature of this external k8s client.
|
||||
//
|
||||
// Same for kustomize
|
||||
//
|
||||
// if let Some(c) = &self.k8s_context {
|
||||
// cmd.args(["--kube-context", c]);
|
||||
// }
|
||||
|
||||
info!("Using helm command {cmd:?}");
|
||||
cmd
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl K8sclient for HAClusterTopology {
|
||||
async fn k8s_client(&self) -> Result<Arc<K8sClient>, String> {
|
||||
@@ -301,10 +328,10 @@ impl Switch for HAClusterTopology {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
async fn clear_port_channel(&self, _ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
async fn configure_interface(&self, _ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -322,7 +349,15 @@ impl NetworkManager for HAClusterTopology {
|
||||
self.network_manager().await.configure_bond(config).await
|
||||
}
|
||||
|
||||
//TODO add snmp here
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
self.network_manager()
|
||||
.await
|
||||
.configure_bond_on_primary_interface(config)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -562,10 +597,10 @@ impl SwitchClient for DummyInfra {
|
||||
) -> Result<u8, SwitchError> {
|
||||
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
async fn clear_port_channel(&self, _ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
async fn configure_interface(&self, _ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,18 +2,13 @@ use std::{collections::BTreeMap, process::Command, sync::Arc, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine, engine::general_purpose};
|
||||
use harmony_k8s::{K8sClient, KubernetesDistribution};
|
||||
use harmony_types::rfc1123::Rfc1123Name;
|
||||
use k8s_openapi::{
|
||||
ByteString,
|
||||
api::{
|
||||
core::v1::{Pod, Secret},
|
||||
rbac::v1::{ClusterRoleBinding, RoleRef, Subject},
|
||||
},
|
||||
};
|
||||
use kube::{
|
||||
api::{DynamicObject, GroupVersionKind, ObjectMeta},
|
||||
runtime::conditions,
|
||||
use k8s_openapi::api::{
|
||||
core::v1::{Pod, Secret},
|
||||
rbac::v1::{ClusterRoleBinding, RoleRef, Subject},
|
||||
};
|
||||
use kube::api::{DynamicObject, GroupVersionKind, ObjectMeta};
|
||||
use log::{debug, info, trace, warn};
|
||||
use serde::Serialize;
|
||||
use tokio::sync::OnceCell;
|
||||
@@ -34,10 +29,7 @@ use crate::{
|
||||
score_cert_management::CertificateManagementScore,
|
||||
},
|
||||
k3d::K3DInstallationScore,
|
||||
k8s::{
|
||||
ingress::{K8sIngressScore, PathType},
|
||||
resource::K8sResourceScore,
|
||||
},
|
||||
k8s::ingress::{K8sIngressScore, PathType},
|
||||
monitoring::{
|
||||
grafana::{grafana::Grafana, helm::helm_grafana::grafana_helm_chart_score},
|
||||
kube_prometheus::crd::{
|
||||
@@ -54,7 +46,6 @@ use crate::{
|
||||
service_monitor::ServiceMonitor,
|
||||
},
|
||||
},
|
||||
nats::capability::NatsCluster,
|
||||
okd::{crd::ingresses_config::Ingress as IngressResource, route::OKDTlsPassthroughScore},
|
||||
prometheus::{
|
||||
k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore,
|
||||
@@ -68,7 +59,6 @@ use crate::{
|
||||
use super::super::{
|
||||
DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, PreparationError,
|
||||
PreparationOutcome, Topology,
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::AlertReceiver,
|
||||
tenant::{
|
||||
TenantConfig, TenantManager,
|
||||
@@ -86,13 +76,6 @@ struct K8sState {
|
||||
message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub enum KubernetesDistribution {
|
||||
OpenshiftFamily,
|
||||
K3sFamily,
|
||||
Default,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum K8sSource {
|
||||
LocalK3d,
|
||||
@@ -103,7 +86,6 @@ enum K8sSource {
|
||||
pub struct K8sAnywhereTopology {
|
||||
k8s_state: Arc<OnceCell<Option<K8sState>>>,
|
||||
tenant_manager: Arc<OnceCell<K8sTenantManager>>,
|
||||
k8s_distribution: Arc<OnceCell<KubernetesDistribution>>,
|
||||
config: Arc<K8sAnywhereConfig>,
|
||||
}
|
||||
|
||||
@@ -554,7 +536,6 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
k8s_distribution: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(K8sAnywhereConfig::from_env()),
|
||||
}
|
||||
}
|
||||
@@ -563,7 +544,6 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
k8s_distribution: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(config),
|
||||
}
|
||||
}
|
||||
@@ -600,41 +580,6 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_k8s_distribution(&self) -> Result<&KubernetesDistribution, PreparationError> {
|
||||
self.k8s_distribution
|
||||
.get_or_try_init(async || {
|
||||
debug!("Trying to detect k8s distribution");
|
||||
let client = self.k8s_client().await.unwrap();
|
||||
|
||||
let discovery = client.discovery().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not discover API groups: {}", e))
|
||||
})?;
|
||||
|
||||
let version = client.get_apiserver_version().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not get server version: {}", e))
|
||||
})?;
|
||||
|
||||
// OpenShift / OKD
|
||||
if discovery
|
||||
.groups()
|
||||
.any(|g| g.name() == "project.openshift.io")
|
||||
{
|
||||
info!("Found KubernetesDistribution OpenshiftFamily");
|
||||
return Ok(KubernetesDistribution::OpenshiftFamily);
|
||||
}
|
||||
|
||||
// K3d / K3s
|
||||
if version.git_version.contains("k3s") {
|
||||
info!("Found KubernetesDistribution K3sFamily");
|
||||
return Ok(KubernetesDistribution::K3sFamily);
|
||||
}
|
||||
|
||||
info!("Could not identify KubernetesDistribution, using Default");
|
||||
return Ok(KubernetesDistribution::Default);
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
fn extract_and_normalize_token(&self, secret: &DynamicObject) -> Option<String> {
|
||||
let token_b64 = secret
|
||||
.data
|
||||
@@ -652,6 +597,16 @@ impl K8sAnywhereTopology {
|
||||
Some(cleaned)
|
||||
}
|
||||
|
||||
pub async fn get_k8s_distribution(&self) -> Result<KubernetesDistribution, PreparationError> {
|
||||
self.k8s_client()
|
||||
.await?
|
||||
.get_k8s_distribution()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
PreparationError::new(format!("Failed to get k8s distribution from client : {e}"))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build_cluster_rolebinding(
|
||||
&self,
|
||||
service_account_name: &str,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{
|
||||
interpret::Outcome,
|
||||
inventory::Inventory,
|
||||
modules::postgresql::{
|
||||
K8sPostgreSQLScore,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::{net::SocketAddr, str::FromStr};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
|
||||
use super::LogicalHost;
|
||||
|
||||
@@ -16,7 +16,6 @@ pub mod tenant;
|
||||
use derive_new::new;
|
||||
pub use k8s_anywhere::*;
|
||||
pub use localhost::*;
|
||||
pub mod k8s;
|
||||
mod load_balancer;
|
||||
pub mod router;
|
||||
mod tftp;
|
||||
|
||||
@@ -9,6 +9,7 @@ use std::{
|
||||
use async_trait::async_trait;
|
||||
use brocade::PortOperatingMode;
|
||||
use derive_new::new;
|
||||
use harmony_k8s::K8sClient;
|
||||
use harmony_types::{
|
||||
id::Id,
|
||||
net::{IpAddress, MacAddress},
|
||||
@@ -18,7 +19,7 @@ use serde::Serialize;
|
||||
|
||||
use crate::executors::ExecutorError;
|
||||
|
||||
use super::{LogicalHost, k8s::K8sClient};
|
||||
use super::LogicalHost;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DHCPStaticEntry {
|
||||
@@ -188,6 +189,10 @@ impl FromStr for DnsRecordType {
|
||||
pub trait NetworkManager: Debug + Send + Sync {
|
||||
async fn ensure_network_manager_installed(&self) -> Result<(), NetworkError>;
|
||||
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), NetworkError>;
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, new)]
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{
|
||||
executors::ExecutorError,
|
||||
topology::k8s::{ApplyStrategy, K8sClient},
|
||||
};
|
||||
use crate::executors::ExecutorError;
|
||||
use async_trait::async_trait;
|
||||
use harmony_k8s::K8sClient;
|
||||
use k8s_openapi::{
|
||||
api::{
|
||||
core::v1::{LimitRange, Namespace, ResourceQuota},
|
||||
@@ -14,7 +12,7 @@ use k8s_openapi::{
|
||||
},
|
||||
apimachinery::pkg::util::intstr::IntOrString,
|
||||
};
|
||||
use kube::{Resource, api::DynamicObject};
|
||||
use kube::Resource;
|
||||
use log::debug;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::json;
|
||||
@@ -59,7 +57,6 @@ impl K8sTenantManager {
|
||||
) -> Result<K, ExecutorError>
|
||||
where
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
<K as kube::Resource>::Scope: ApplyStrategy<K>,
|
||||
{
|
||||
self.apply_labels(&mut resource, config);
|
||||
self.k8s_client
|
||||
|
||||
@@ -5,9 +5,20 @@ use harmony_types::{
|
||||
net::{IpAddress, MacAddress},
|
||||
switch::{PortDeclaration, PortLocation},
|
||||
};
|
||||
use log::info;
|
||||
use option_ext::OptionExt;
|
||||
|
||||
use crate::topology::{PortConfig, SwitchClient, SwitchError};
|
||||
use crate::{
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{PortConfig, SwitchClient, SwitchError},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BrocadeSwitchConfig {
|
||||
pub ips: Vec<IpAddress>,
|
||||
pub auth: BrocadeSwitchAuth,
|
||||
pub options: BrocadeOptions,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BrocadeSwitchClient {
|
||||
@@ -15,13 +26,11 @@ pub struct BrocadeSwitchClient {
|
||||
}
|
||||
|
||||
impl BrocadeSwitchClient {
|
||||
pub async fn init(
|
||||
ip_addresses: &[IpAddress],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
) -> Result<Self, brocade::Error> {
|
||||
let brocade = brocade::init(ip_addresses, username, password, options).await?;
|
||||
pub async fn init(config: BrocadeSwitchConfig) -> Result<Self, brocade::Error> {
|
||||
let auth = &config.auth;
|
||||
let options = &config.options;
|
||||
|
||||
let brocade = brocade::init(&config.ips, &auth.username, &auth.password, options).await?;
|
||||
Ok(Self { brocade })
|
||||
}
|
||||
}
|
||||
@@ -52,13 +61,18 @@ impl SwitchClient for BrocadeSwitchClient {
|
||||
|| link.remote_port.contains(&interface.port_location)
|
||||
})
|
||||
})
|
||||
.map(|interface| (interface.name.clone(), PortOperatingMode::Access))
|
||||
.map(|interface| (interface.name.clone(), PortOperatingMode::Trunk))
|
||||
.collect();
|
||||
|
||||
if interfaces.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!("About to configure interfaces {interfaces:?}");
|
||||
// inquire::Confirm::new("Do you wish to configures interfaces now?")
|
||||
// .prompt()
|
||||
// .map_err(|e| SwitchError::new(e.to_string()))?;
|
||||
|
||||
self.brocade
|
||||
.configure_interfaces(&interfaces)
|
||||
.await
|
||||
@@ -208,8 +222,8 @@ mod tests {
|
||||
//TODO not sure about this
|
||||
let configured_interfaces = brocade.configured_interfaces.lock().unwrap();
|
||||
assert_that!(*configured_interfaces).contains_exactly(vec![
|
||||
(first_interface.name.clone(), PortOperatingMode::Access),
|
||||
(second_interface.name.clone(), PortOperatingMode::Access),
|
||||
(first_interface.name.clone(), PortOperatingMode::Trunk),
|
||||
(second_interface.name.clone(), PortOperatingMode::Trunk),
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
@@ -3,20 +3,77 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use askama::Template;
|
||||
use async_trait::async_trait;
|
||||
use harmony_k8s::{DrainOptions, K8sClient, NodeFile};
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::core::v1::Node;
|
||||
use kube::{
|
||||
ResourceExt,
|
||||
api::{ObjectList, ObjectMeta},
|
||||
};
|
||||
use log::{debug, info};
|
||||
use log::{debug, info, warn};
|
||||
|
||||
use crate::{
|
||||
modules::okd::crd::nmstate,
|
||||
topology::{HostNetworkConfig, NetworkError, NetworkManager, k8s::K8sClient},
|
||||
topology::{HostNetworkConfig, NetworkError, NetworkManager},
|
||||
};
|
||||
|
||||
/// NetworkManager bond configuration template
|
||||
#[derive(Template)]
|
||||
#[template(
|
||||
source = r#"[connection]
|
||||
id={{ bond_name }}
|
||||
uuid={{ bond_uuid }}
|
||||
type=bond
|
||||
autoconnect-slaves=1
|
||||
interface-name={{ bond_name }}
|
||||
|
||||
[bond]
|
||||
lacp_rate=fast
|
||||
mode=802.3ad
|
||||
xmit_hash_policy=layer2
|
||||
|
||||
[ipv4]
|
||||
method=auto
|
||||
|
||||
[ipv6]
|
||||
addr-gen-mode=default
|
||||
method=auto
|
||||
|
||||
[proxy]
|
||||
"#,
|
||||
ext = "txt"
|
||||
)]
|
||||
struct BondConfigTemplate {
|
||||
bond_name: String,
|
||||
bond_uuid: String,
|
||||
}
|
||||
|
||||
/// NetworkManager bond slave configuration template
|
||||
#[derive(Template)]
|
||||
#[template(
|
||||
source = r#"[connection]
|
||||
id={{ slave_id }}
|
||||
uuid={{ slave_uuid }}
|
||||
type=ethernet
|
||||
interface-name={{ interface_name }}
|
||||
master={{ bond_name }}
|
||||
slave-type=bond
|
||||
|
||||
[ethernet]
|
||||
|
||||
[bond-port]
|
||||
"#,
|
||||
ext = "txt"
|
||||
)]
|
||||
struct BondSlaveConfigTemplate {
|
||||
slave_id: String,
|
||||
slave_uuid: String,
|
||||
interface_name: String,
|
||||
bond_name: String,
|
||||
}
|
||||
|
||||
/// TODO document properly the non-intuitive behavior or "roll forward only" of nmstate in general
|
||||
/// It is documented in nmstate official doc, but worth mentionning here :
|
||||
///
|
||||
@@ -87,6 +144,117 @@ impl NetworkManager for OpenShiftNmStateNetworkManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Configures bonding on the primary network interface of a node.
|
||||
///
|
||||
/// Changing the *primary* network interface (making it a bond
|
||||
/// slave) will disrupt node connectivity mid-change, so the
|
||||
/// procedure is:
|
||||
///
|
||||
/// 1. Generate NetworkManager .nmconnection files
|
||||
/// 2. Drain the node (includes cordon)
|
||||
/// 3. Write configuration files to `/etc/NetworkManager/system-connections/`
|
||||
/// 4. Attempt to reload NetworkManager (optional, best-effort)
|
||||
/// 5. Reboot the node with full verification (drain, boot_id check, uncordon)
|
||||
///
|
||||
/// The reboot procedure includes:
|
||||
/// - Recording boot_id before reboot
|
||||
/// - Fire-and-forget reboot command
|
||||
/// - Waiting for NotReady status
|
||||
/// - Waiting for Ready status
|
||||
/// - Verifying boot_id changed
|
||||
/// - Uncordoning the node
|
||||
///
|
||||
/// See ADR-019 for context and rationale.
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
use std::time::Duration;
|
||||
|
||||
let node_name = self.get_node_name_for_id(&config.host_id).await?;
|
||||
let hostname = self.get_hostname(&config.host_id).await?;
|
||||
|
||||
info!(
|
||||
"Configuring bond on primary interface for host '{}' (node '{}')",
|
||||
config.host_id, node_name
|
||||
);
|
||||
|
||||
// 1. Generate .nmconnection files
|
||||
let files = self.generate_nmconnection_files(&hostname, config)?;
|
||||
debug!(
|
||||
"Generated {} NetworkManager configuration files",
|
||||
files.len()
|
||||
);
|
||||
|
||||
// 2. Write configuration files to the node (before draining)
|
||||
// We do this while the node is still running for faster operation
|
||||
info!(
|
||||
"Writing NetworkManager configuration files to node '{}'...",
|
||||
node_name
|
||||
);
|
||||
self.k8s_client
|
||||
.write_files_to_node(&node_name, &files)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to write configuration files to node '{}': {}",
|
||||
node_name, e
|
||||
))
|
||||
})?;
|
||||
|
||||
// 3. Reload NetworkManager configuration (best-effort)
|
||||
// This won't activate the bond yet since the primary interface would lose connectivity,
|
||||
// but it validates the configuration files are correct
|
||||
info!(
|
||||
"Reloading NetworkManager configuration on node '{}'...",
|
||||
node_name
|
||||
);
|
||||
match self
|
||||
.k8s_client
|
||||
.run_privileged_command_on_node(&node_name, "chroot /host nmcli connection reload")
|
||||
.await
|
||||
{
|
||||
Ok(output) => {
|
||||
debug!("NetworkManager reload output: {}", output.trim());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to reload NetworkManager configuration: {}. Proceeding with reboot.",
|
||||
e
|
||||
);
|
||||
// Don't fail here - reboot will pick up the config anyway
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Reboot the node with full verification
|
||||
// The reboot_node function handles: drain, boot_id capture, reboot, NotReady wait,
|
||||
// Ready wait, boot_id verification, and uncordon
|
||||
// 60 minutes timeout for bare-metal environments (drain can take 20-30 mins)
|
||||
let reboot_timeout = Duration::from_secs(3600);
|
||||
info!(
|
||||
"Rebooting node '{}' to apply network configuration (timeout: {:?})...",
|
||||
node_name, reboot_timeout
|
||||
);
|
||||
|
||||
self.k8s_client
|
||||
.reboot_node(
|
||||
&node_name,
|
||||
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
|
||||
reboot_timeout,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
NetworkError::new(format!("Failed to reboot node '{}': {}", node_name, e))
|
||||
})?;
|
||||
|
||||
info!(
|
||||
"Successfully configured bond on primary interface for host '{}' (node '{}')",
|
||||
config.host_id, node_name
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), NetworkError> {
|
||||
let hostname = self.get_hostname(&config.host_id).await.map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
@@ -208,14 +376,14 @@ impl OpenShiftNmStateNetworkManager {
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_hostname(&self, host_id: &Id) -> Result<String, String> {
|
||||
async fn get_node_for_id(&self, host_id: &Id) -> Result<Node, String> {
|
||||
let nodes: ObjectList<Node> = self
|
||||
.k8s_client
|
||||
.list_resources(None, None)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to list nodes: {e}"))?;
|
||||
|
||||
let Some(node) = nodes.iter().find(|n| {
|
||||
let Some(node) = nodes.into_iter().find(|n| {
|
||||
n.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.node_info.as_ref())
|
||||
@@ -225,6 +393,20 @@ impl OpenShiftNmStateNetworkManager {
|
||||
return Err(format!("No node found for host '{host_id}'"));
|
||||
};
|
||||
|
||||
Ok(node)
|
||||
}
|
||||
|
||||
async fn get_node_name_for_id(&self, host_id: &Id) -> Result<String, String> {
|
||||
let node = self.get_node_for_id(host_id).await?;
|
||||
|
||||
node.metadata.name.ok_or(format!(
|
||||
"A node should always have a name, node for host_id {host_id} has no name"
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_hostname(&self, host_id: &Id) -> Result<String, String> {
|
||||
let node = self.get_node_for_id(host_id).await?;
|
||||
|
||||
node.labels()
|
||||
.get("kubernetes.io/hostname")
|
||||
.ok_or(format!(
|
||||
@@ -261,4 +443,82 @@ impl OpenShiftNmStateNetworkManager {
|
||||
let next_id = (0..).find(|id| !used_ids.contains(id)).unwrap();
|
||||
Ok(format!("bond{next_id}"))
|
||||
}
|
||||
|
||||
/// Generates NetworkManager .nmconnection files for bonding configuration.
|
||||
///
|
||||
/// Creates:
|
||||
/// - One bond master configuration file (bond0.nmconnection)
|
||||
/// - One slave configuration file per interface (bond0-<iface>.nmconnection)
|
||||
///
|
||||
/// All files are placed in `/etc/NetworkManager/system-connections/` with
|
||||
/// mode 0o600 (required by NetworkManager).
|
||||
fn generate_nmconnection_files(
|
||||
&self,
|
||||
hostname: &str,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<Vec<NodeFile>, NetworkError> {
|
||||
let mut files = Vec::new();
|
||||
let bond_name = "bond0";
|
||||
let bond_uuid = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
// Generate bond master configuration
|
||||
let bond_template = BondConfigTemplate {
|
||||
bond_name: bond_name.to_string(),
|
||||
bond_uuid: bond_uuid.clone(),
|
||||
};
|
||||
|
||||
let bond_content = bond_template.render().map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to render bond configuration template: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
files.push(NodeFile {
|
||||
path: format!(
|
||||
"/etc/NetworkManager/system-connections/{}.nmconnection",
|
||||
bond_name
|
||||
),
|
||||
content: bond_content,
|
||||
mode: 0o600,
|
||||
});
|
||||
|
||||
// Generate slave configurations for each interface
|
||||
for switch_port in &config.switch_ports {
|
||||
let interface_name = &switch_port.interface.name;
|
||||
let slave_id = format!("{}-{}", bond_name, interface_name);
|
||||
let slave_uuid = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
let slave_template = BondSlaveConfigTemplate {
|
||||
slave_id: slave_id.clone(),
|
||||
slave_uuid,
|
||||
interface_name: interface_name.clone(),
|
||||
bond_name: bond_name.to_string(),
|
||||
};
|
||||
|
||||
let slave_content = slave_template.render().map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to render slave configuration template for interface '{}': {}",
|
||||
interface_name, e
|
||||
))
|
||||
})?;
|
||||
|
||||
files.push(NodeFile {
|
||||
path: format!(
|
||||
"/etc/NetworkManager/system-connections/{}.nmconnection",
|
||||
slave_id
|
||||
),
|
||||
content: slave_content,
|
||||
mode: 0o600,
|
||||
});
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Generated {} NetworkManager configuration files for host '{}'",
|
||||
files.len(),
|
||||
hostname
|
||||
);
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use async_trait::async_trait;
|
||||
use log::{debug, info, trace};
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
use std::path::PathBuf;
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use async_trait::async_trait;
|
||||
use harmony_k8s::K8sClient;
|
||||
use harmony_macros::hurl;
|
||||
use log::{debug, info, trace, warn};
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
@@ -14,7 +15,7 @@ use crate::{
|
||||
helm::chart::{HelmChartScore, HelmRepository},
|
||||
},
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology, ingress::Ingress, k8s::K8sClient},
|
||||
topology::{HelmCommand, K8sclient, Topology, ingress::Ingress},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use harmony_k8s::K8sClient;
|
||||
use log::{debug, info};
|
||||
|
||||
use crate::{interpret::InterpretError, topology::k8s::K8sClient};
|
||||
use crate::interpret::InterpretError;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum ArgoScope {
|
||||
|
||||
138
harmony/src/modules/brocade/brocade.rs
Normal file
138
harmony/src/modules/brocade/brocade.rs
Normal file
@@ -0,0 +1,138 @@
|
||||
use async_trait::async_trait;
|
||||
use brocade::{BrocadeOptions, PortOperatingMode};
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
infra::brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HostNetworkConfig, PortConfig, PreparationError, PreparationOutcome, Switch, SwitchClient,
|
||||
SwitchError, Topology,
|
||||
},
|
||||
};
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{id::Id, net::MacAddress, switch::PortLocation};
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct BrocadeSwitchScore {
|
||||
pub port_channels_to_clear: Vec<Id>,
|
||||
pub ports_to_configure: Vec<PortConfig>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Switch> Score<T> for BrocadeSwitchScore {
|
||||
fn name(&self) -> String {
|
||||
"BrocadeSwitchScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(BrocadeSwitchInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BrocadeSwitchInterpret {
|
||||
score: BrocadeSwitchScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + Switch> Interpret<T> for BrocadeSwitchInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Applying switch configuration {:?}", self.score);
|
||||
debug!(
|
||||
"Clearing port channel {:?}",
|
||||
self.score.port_channels_to_clear
|
||||
);
|
||||
topology
|
||||
.clear_port_channel(&self.score.port_channels_to_clear)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
debug!("Configuring interfaces {:?}", self.score.ports_to_configure);
|
||||
topology
|
||||
.configure_interface(&self.score.ports_to_configure)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success("switch configured".to_string()))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("BrocadeSwitchInterpret")
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
pub struct BrocadeSwitchConfig {
|
||||
pub ips: Vec<harmony_types::net::IpAddress>,
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
pub options: BrocadeOptions,
|
||||
}
|
||||
*/
|
||||
|
||||
pub struct SwitchTopology {
|
||||
client: Box<dyn SwitchClient>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Topology for SwitchTopology {
|
||||
fn name(&self) -> &str {
|
||||
"SwitchTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
impl SwitchTopology {
|
||||
pub async fn new(config: BrocadeSwitchConfig) -> Self {
|
||||
let client = BrocadeSwitchClient::init(config)
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
let client = Box::new(client);
|
||||
Self { client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Switch for SwitchTopology {
|
||||
async fn setup_switch(&self) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_port_for_mac_address(
|
||||
&self,
|
||||
_mac_address: &MacAddress,
|
||||
) -> Result<Option<PortLocation>, SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn configure_port_channel(&self, _config: &HostNetworkConfig) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
self.client.clear_port_channel(ids).await
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
self.client.configure_interface(ports).await
|
||||
}
|
||||
}
|
||||
@@ -39,16 +39,22 @@ pub struct BrocadeEnableSnmpInterpret {
|
||||
}
|
||||
|
||||
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
|
||||
struct BrocadeSwitchAuth {
|
||||
username: String,
|
||||
password: String,
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
impl BrocadeSwitchAuth {
|
||||
pub fn user_pass(username: String, password: String) -> Self {
|
||||
Self { username, password }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
|
||||
struct BrocadeSnmpAuth {
|
||||
username: String,
|
||||
auth_password: String,
|
||||
des_password: String,
|
||||
pub struct BrocadeSnmpAuth {
|
||||
pub username: String,
|
||||
pub auth_password: String,
|
||||
pub des_password: String,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -72,7 +78,7 @@ impl<T: Topology> Interpret<T> for BrocadeEnableSnmpInterpret {
|
||||
&switch_addresses,
|
||||
&config.username,
|
||||
&config.password,
|
||||
BrocadeOptions {
|
||||
&BrocadeOptions {
|
||||
dry_run: self.score.dry_run,
|
||||
..Default::default()
|
||||
},
|
||||
5
harmony/src/modules/brocade/mod.rs
Normal file
5
harmony/src/modules/brocade/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
pub mod brocade;
|
||||
pub use brocade::*;
|
||||
|
||||
pub mod brocade_snmp;
|
||||
pub use brocade_snmp::*;
|
||||
@@ -1,3 +1,4 @@
|
||||
use harmony_k8s::K8sClient;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -11,7 +12,7 @@ use crate::{
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, k8s::K8sClient},
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
|
||||
@@ -82,17 +82,40 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
|
||||
self.score.role,
|
||||
choice.summary()
|
||||
);
|
||||
let disk_names: Vec<String> =
|
||||
choice.storage.iter().map(|s| s.name.clone()).collect();
|
||||
let mut disk_choices: Vec<(String, String)> = vec![];
|
||||
|
||||
for s in choice.storage.iter() {
|
||||
let size_gb: f64 = s.size_bytes as f64 / 1_000_000_000.0;
|
||||
let (size, unit) = if size_gb >= 1000.0 {
|
||||
(size_gb / 1000.0, "TB")
|
||||
} else {
|
||||
(size_gb, "GB")
|
||||
};
|
||||
let drive_type = if s.rotational { "rotational" } else { "SSD" };
|
||||
let smart_str = s.smart_status.as_deref().unwrap_or("N/A");
|
||||
let display = format!(
|
||||
"{} : [{}] - {:.0} {} ({}) - {} - Smart: {}",
|
||||
s.name, s.model, size, unit, drive_type, s.interface_type, smart_str
|
||||
);
|
||||
disk_choices.push((display, s.name.clone()));
|
||||
}
|
||||
|
||||
let display_refs: Vec<&str> =
|
||||
disk_choices.iter().map(|(d, _)| d.as_str()).collect();
|
||||
|
||||
let disk_choice = inquire::Select::new(
|
||||
&format!("Select the disk to use on host {}:", choice.summary()),
|
||||
disk_names,
|
||||
display_refs,
|
||||
)
|
||||
.prompt();
|
||||
|
||||
match disk_choice {
|
||||
Ok(disk_name) => {
|
||||
Ok(selected_display) => {
|
||||
let disk_name = disk_choices
|
||||
.iter()
|
||||
.find(|(d, _)| d.as_str() == selected_display)
|
||||
.map(|(_, name)| name.clone())
|
||||
.unwrap();
|
||||
info!("Selected disk {} for node {}", disk_name, choice.summary());
|
||||
host_repo
|
||||
.save_role_mapping(&self.score.role, &choice, &disk_name)
|
||||
|
||||
@@ -54,6 +54,12 @@ pub enum HarmonyDiscoveryStrategy {
|
||||
SUBNET { cidr: cidr::Ipv4Cidr, port: u16 },
|
||||
}
|
||||
|
||||
impl Default for HarmonyDiscoveryStrategy {
|
||||
fn default() -> Self {
|
||||
HarmonyDiscoveryStrategy::MDNS
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology> Interpret<T> for DiscoverInventoryAgentInterpret {
|
||||
async fn execute(
|
||||
|
||||
@@ -3,7 +3,8 @@ use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use log::warn;
|
||||
|
||||
use crate::topology::{FailoverTopology, K8sclient, k8s::K8sClient};
|
||||
use crate::topology::{FailoverTopology, K8sclient};
|
||||
use harmony_k8s::K8sClient;
|
||||
|
||||
#[async_trait]
|
||||
impl<T: K8sclient> K8sclient for FailoverTopology<T> {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use async_trait::async_trait;
|
||||
use k8s_openapi::NamespaceResourceScope;
|
||||
use k8s_openapi::ResourceScope;
|
||||
use kube::Resource;
|
||||
use log::info;
|
||||
use serde::{Serialize, de::DeserializeOwned};
|
||||
@@ -29,7 +29,7 @@ impl<K: Resource + std::fmt::Debug> K8sResourceScore<K> {
|
||||
}
|
||||
|
||||
impl<
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
K: Resource<Scope: ResourceScope>
|
||||
+ std::fmt::Debug
|
||||
+ Sync
|
||||
+ DeserializeOwned
|
||||
@@ -61,7 +61,7 @@ pub struct K8sResourceInterpret<K: Resource + std::fmt::Debug + Sync + Send> {
|
||||
|
||||
#[async_trait]
|
||||
impl<
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
K: Resource<Scope: ResourceScope>
|
||||
+ Clone
|
||||
+ std::fmt::Debug
|
||||
+ DeserializeOwned
|
||||
@@ -109,7 +109,7 @@ where
|
||||
topology
|
||||
.k8s_client()
|
||||
.await
|
||||
.expect("Environment should provide enough information to instanciate a client")
|
||||
.map_err(|e| InterpretError::new(format!("Failed to get k8s client : {e}")))?
|
||||
.apply_many(&self.score.resource, self.score.namespace.as_deref())
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -15,10 +15,13 @@ pub mod load_balancer;
|
||||
pub mod monitoring;
|
||||
pub mod nats;
|
||||
pub mod network;
|
||||
pub mod node_health;
|
||||
pub mod okd;
|
||||
pub mod openbao;
|
||||
pub mod opnsense;
|
||||
pub mod postgresql;
|
||||
pub mod prometheus;
|
||||
pub mod storage;
|
||||
pub mod tenant;
|
||||
pub mod tftp;
|
||||
pub mod zitadel;
|
||||
|
||||
@@ -6,7 +6,7 @@ use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
interpret::InterpretError,
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
monitoring::{
|
||||
@@ -17,10 +17,10 @@ use crate::{
|
||||
topology::{
|
||||
K8sclient, Topology,
|
||||
installable::Installable,
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::{AlertReceiver, AlertSender, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
use harmony_k8s::K8sClient;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
|
||||
@@ -4,10 +4,8 @@ use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::topology::{
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::{AlertReceiver, AlertSender},
|
||||
};
|
||||
use crate::topology::oberservability::monitoring::{AlertReceiver, AlertSender};
|
||||
use harmony_k8s::K8sClient;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
|
||||
@@ -11,8 +11,9 @@ use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::ntfy::helm::ntfy_helm_chart::ntfy_helm_chart_score,
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, MultiTargetTopology, Topology, k8s::K8sClient},
|
||||
topology::{HelmCommand, K8sclient, MultiTargetTopology, Topology},
|
||||
};
|
||||
use harmony_k8s::K8sClient;
|
||||
use harmony_types::id::Id;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
topology::k8s::K8sClient,
|
||||
};
|
||||
use crate::interpret::{InterpretError, Outcome};
|
||||
use harmony_k8s::K8sClient;
|
||||
use k8s_openapi::api::core::v1::ConfigMap;
|
||||
use kube::api::ObjectMeta;
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::{collections::BTreeMap, str::FromStr};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_k8s::KubernetesDistribution;
|
||||
use harmony_macros::hurl;
|
||||
use harmony_secret::{Secret, SecretManager};
|
||||
use harmony_types::id::Id;
|
||||
@@ -25,7 +26,7 @@ use crate::{
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, KubernetesDistribution, TlsRouter, Topology},
|
||||
topology::{HelmCommand, K8sclient, TlsRouter, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
|
||||
260
harmony/src/modules/node_health/mod.rs
Normal file
260
harmony/src/modules/node_health/mod.rs
Normal file
@@ -0,0 +1,260 @@
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::{
|
||||
apps::v1::{DaemonSet, DaemonSetSpec},
|
||||
core::v1::{
|
||||
Container, ContainerPort, EnvVar, EnvVarSource, Namespace, ObjectFieldSelector, PodSpec,
|
||||
PodTemplateSpec, ResourceRequirements, ServiceAccount, Toleration,
|
||||
},
|
||||
rbac::v1::{ClusterRole, ClusterRoleBinding, PolicyRule, Role, RoleBinding, RoleRef, Subject},
|
||||
};
|
||||
use k8s_openapi::apimachinery::pkg::api::resource::Quantity;
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::k8s::resource::K8sResourceScore,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct NodeHealthScore {}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for NodeHealthScore {
|
||||
fn name(&self) -> String {
|
||||
format!("NodeHealthScore")
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(NodeHealthInterpret {})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NodeHealthInterpret {}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for NodeHealthInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let namespace_name = "harmony-node-healthcheck".to_string();
|
||||
|
||||
// Namespace
|
||||
let mut labels = BTreeMap::new();
|
||||
labels.insert("name".to_string(), namespace_name.clone());
|
||||
|
||||
let namespace = Namespace {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(namespace_name.clone()),
|
||||
labels: Some(labels),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
..Namespace::default()
|
||||
};
|
||||
|
||||
// ServiceAccount
|
||||
let service_account_name = "node-healthcheck-sa".to_string();
|
||||
let service_account = ServiceAccount {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(service_account_name.clone()),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
..ServiceAccount::default()
|
||||
};
|
||||
|
||||
// ClusterRole
|
||||
let cluster_role = ClusterRole {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("node-healthcheck-role".to_string()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
rules: Some(vec![PolicyRule {
|
||||
api_groups: Some(vec!["".to_string()]),
|
||||
resources: Some(vec!["nodes".to_string()]),
|
||||
verbs: vec!["get".to_string(), "list".to_string()],
|
||||
..PolicyRule::default()
|
||||
}]),
|
||||
..ClusterRole::default()
|
||||
};
|
||||
|
||||
// Role
|
||||
let role = Role {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("allow-hostnetwork-scc".to_string()),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
rules: Some(vec![PolicyRule {
|
||||
api_groups: Some(vec!["security.openshift.io".to_string()]),
|
||||
resources: Some(vec!["securitycontextconstraints".to_string()]),
|
||||
resource_names: Some(vec!["hostnetwork".to_string()]),
|
||||
verbs: vec!["use".to_string()],
|
||||
..PolicyRule::default()
|
||||
}]),
|
||||
..Role::default()
|
||||
};
|
||||
|
||||
// RoleBinding
|
||||
let role_binding = RoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("node-status-querier-scc-binding".to_string()),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".to_string(),
|
||||
name: service_account_name.clone(),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..Subject::default()
|
||||
}]),
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".to_string(),
|
||||
kind: "Role".to_string(),
|
||||
name: "allow-hostnetwork-scc".to_string(),
|
||||
},
|
||||
};
|
||||
|
||||
// ClusterRoleBinding
|
||||
let cluster_role_binding = ClusterRoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("read-nodes-binding".to_string()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".to_string(),
|
||||
name: service_account_name.clone(),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..Subject::default()
|
||||
}]),
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".to_string(),
|
||||
kind: "ClusterRole".to_string(),
|
||||
name: "node-healthcheck-role".to_string(),
|
||||
},
|
||||
};
|
||||
|
||||
// DaemonSet
|
||||
let mut daemonset_labels = BTreeMap::new();
|
||||
daemonset_labels.insert("app".to_string(), "node-healthcheck".to_string());
|
||||
|
||||
let daemon_set = DaemonSet {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("node-healthcheck".to_string()),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
labels: Some(daemonset_labels.clone()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
spec: Some(DaemonSetSpec {
|
||||
selector: LabelSelector {
|
||||
match_labels: Some(daemonset_labels.clone()),
|
||||
..LabelSelector::default()
|
||||
},
|
||||
template: PodTemplateSpec {
|
||||
metadata: Some(ObjectMeta {
|
||||
labels: Some(daemonset_labels),
|
||||
..ObjectMeta::default()
|
||||
}),
|
||||
spec: Some(PodSpec {
|
||||
service_account_name: Some(service_account_name.clone()),
|
||||
host_network: Some(true),
|
||||
tolerations: Some(vec![Toleration {
|
||||
operator: Some("Exists".to_string()),
|
||||
..Toleration::default()
|
||||
}]),
|
||||
containers: vec![Container {
|
||||
name: "checker".to_string(),
|
||||
image: Some(
|
||||
"hub.nationtech.io/harmony/harmony-node-readiness-endpoint:latest"
|
||||
.to_string(),
|
||||
),
|
||||
env: Some(vec![EnvVar {
|
||||
name: "NODE_NAME".to_string(),
|
||||
value_from: Some(EnvVarSource {
|
||||
field_ref: Some(ObjectFieldSelector {
|
||||
field_path: "spec.nodeName".to_string(),
|
||||
..ObjectFieldSelector::default()
|
||||
}),
|
||||
..EnvVarSource::default()
|
||||
}),
|
||||
..EnvVar::default()
|
||||
}]),
|
||||
ports: Some(vec![ContainerPort {
|
||||
container_port: 25001,
|
||||
host_port: Some(25001),
|
||||
name: Some("health-port".to_string()),
|
||||
..ContainerPort::default()
|
||||
}]),
|
||||
resources: Some(ResourceRequirements {
|
||||
requests: Some({
|
||||
let mut requests = BTreeMap::new();
|
||||
requests.insert("cpu".to_string(), Quantity("10m".to_string()));
|
||||
requests
|
||||
.insert("memory".to_string(), Quantity("50Mi".to_string()));
|
||||
requests
|
||||
}),
|
||||
..ResourceRequirements::default()
|
||||
}),
|
||||
..Container::default()
|
||||
}],
|
||||
..PodSpec::default()
|
||||
}),
|
||||
},
|
||||
..DaemonSetSpec::default()
|
||||
}),
|
||||
..DaemonSet::default()
|
||||
};
|
||||
|
||||
K8sResourceScore::single(namespace, None)
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(service_account, Some(namespace_name.clone()))
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(cluster_role, None)
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(role, Some(namespace_name.clone()))
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(role_binding, Some(namespace_name.clone()))
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(cluster_role_binding, None)
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(daemon_set, Some(namespace_name.clone()))
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
"Harmony node health successfully deployed".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("NodeHealth")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,74 @@ use crate::{
|
||||
topology::{HostNetworkConfig, NetworkInterface, NetworkManager, Switch, SwitchPort, Topology},
|
||||
};
|
||||
|
||||
/// Configures high-availability networking for a set of physical hosts.
|
||||
///
|
||||
/// This is an opinionated Score that creates a resilient network configuration.
|
||||
/// It assumes hosts have at least two network interfaces connected
|
||||
/// to redundant switches for high availability.
|
||||
///
|
||||
/// The Score's `Interpret` logic will:
|
||||
/// 1. Setup the switch with sane defaults (e.g. mark interfaces as switchports for discoverability).
|
||||
/// 2. Discover which switch ports each host's interfaces are connected to (via MAC address).
|
||||
/// 3. Create a network bond (e.g. LACP) on the host itself using these interfaces.
|
||||
/// 4. Configure a corresponding port-channel on the switch(es) for those ports.
|
||||
///
|
||||
/// This ensures that both the host and the switch are configured to treat the
|
||||
/// multiple links as a single, aggregated, and redundant connection.
|
||||
///
|
||||
/// Hosts with 0 or 1 detected interfaces will be skipped, as bonding is not
|
||||
/// applicable.
|
||||
///
|
||||
/// <div class="warning">
|
||||
/// The implementation is currently _not_ idempotent, even though it should be.
|
||||
/// Running it more than once on the same host might result in duplicated bond configurations.
|
||||
/// </div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
/// This Score is not named well. A better name would be
|
||||
/// `HighAvailabilityHostNetworkScore`, or something similar to better express the intent.
|
||||
/// </div>
|
||||
///
|
||||
/// # Requirements
|
||||
///
|
||||
/// This Score can only be applied to a [Topology] that implements both the
|
||||
/// [NetworkManager] (to configure the host-side bond) and [Switch]
|
||||
/// (to configure the switch-side port-channel) capabilities.
|
||||
///
|
||||
/// # Current limitations
|
||||
///
|
||||
/// ## 1. No rollback logic & limited idempotency
|
||||
///
|
||||
/// If any of the steps described above fails, the Score will not attempt to revert any changes
|
||||
/// already applied. Which could render the host or switch in an inconsistent state.
|
||||
///
|
||||
/// ## 2. Propagation delays on the switch
|
||||
///
|
||||
/// It might take some time for the sane defaults in step 1) to be applied. In some cases,
|
||||
/// it was observed that the switch takes up to 5min to actually apply the config.
|
||||
///
|
||||
/// But this Score's Interpret doesn't wait and directly proceeds to step 2) to discover
|
||||
/// the MAC addresses. Which could result interfaces being skipped because their corresponding port
|
||||
/// on the switch couldn't be found.
|
||||
///
|
||||
/// TODO: Validate that the switch is in the expected state before continuing.
|
||||
///
|
||||
/// ## 3. Bond configuration
|
||||
///
|
||||
/// To find the next available bond id, the current
|
||||
/// [NetworkManager](crate::infra::network_manager::OpenShiftNmStateNetworkManager) implementation
|
||||
/// simply checks for existing bonds named `bond[n]` and take the next available `n` number.
|
||||
///
|
||||
/// It doesn't check that there are already a bond for the interfaces that should be bonded. Which
|
||||
/// might result in a duplicate bond being created.
|
||||
///
|
||||
/// TODO: Make sure the interfaces to aggregate are not already bonded.
|
||||
///
|
||||
/// # Future improvements
|
||||
///
|
||||
/// Along with the `TODO` items above, splitting this Score into multiple smaller ones would be
|
||||
/// beneficial. It has a lot of moving parts and some of them could be used on their own to make
|
||||
/// operations on a cluster easier.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct HostNetworkConfigurationScore {
|
||||
pub hosts: Vec<PhysicalHost>,
|
||||
@@ -74,9 +142,13 @@ impl HostNetworkConfigurationInterpret {
|
||||
);
|
||||
|
||||
info!("[Host {current_host}/{total_hosts}] Configuring host network...");
|
||||
topology.configure_bond(&config).await.map_err(|e| {
|
||||
InterpretError::new(format!("Failed to configure host network: {e}"))
|
||||
})?;
|
||||
topology
|
||||
.configure_bond_on_primary_interface(&config)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
InterpretError::new(format!("Failed to configure host network: {e}"))
|
||||
})?;
|
||||
|
||||
topology
|
||||
.configure_port_channel(&config)
|
||||
.await
|
||||
@@ -663,6 +735,16 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
let mut configured_bonds = self.configured_bonds.lock().unwrap();
|
||||
configured_bonds.push((config.host_id.clone(), config.clone()));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
88
harmony/src/modules/openbao/mod.rs
Normal file
88
harmony/src/modules/openbao/mod.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use harmony_macros::hurl;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::helm::chart::{HelmChartScore, HelmRepository},
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct OpenbaoScore {
|
||||
/// Host used for external access (ingress)
|
||||
pub host: String,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient + HelmCommand> Score<T> for OpenbaoScore {
|
||||
fn name(&self) -> String {
|
||||
"OpenbaoScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
// TODO exec pod commands to initialize secret store if not already done
|
||||
let host = &self.host;
|
||||
|
||||
let values_yaml = Some(format!(
|
||||
r#"global:
|
||||
openshift: true
|
||||
server:
|
||||
standalone:
|
||||
enabled: true
|
||||
config: |
|
||||
ui = true
|
||||
|
||||
listener "tcp" {{
|
||||
tls_disable = true
|
||||
address = "[::]:8200"
|
||||
cluster_address = "[::]:8201"
|
||||
}}
|
||||
|
||||
storage "file" {{
|
||||
path = "/openbao/data"
|
||||
}}
|
||||
|
||||
service:
|
||||
enabled: true
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
hosts:
|
||||
- host: {host}
|
||||
dataStorage:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClass: null
|
||||
accessMode: ReadWriteOnce
|
||||
|
||||
auditStorage:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClass: null
|
||||
accessMode: ReadWriteOnce
|
||||
ui:
|
||||
enabled: true"#
|
||||
));
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str("openbao").unwrap()),
|
||||
release_name: NonBlankString::from_str("openbao").unwrap(),
|
||||
chart_name: NonBlankString::from_str("openbao/openbao").unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml,
|
||||
create_namespace: true,
|
||||
install_only: false,
|
||||
repository: Some(HelmRepository::new(
|
||||
"openbao".to_string(),
|
||||
hurl!("https://openbao.github.io/openbao-helm"),
|
||||
true,
|
||||
)),
|
||||
}
|
||||
.create_interpret()
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use kube::{CustomResource, api::ObjectMeta};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -13,9 +15,14 @@ use serde::{Deserialize, Serialize};
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ClusterSpec {
|
||||
pub instances: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub image_name: Option<String>,
|
||||
pub storage: Storage,
|
||||
pub bootstrap: Bootstrap,
|
||||
/// This must be set to None if you want cnpg to generate a superuser secret
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub superuser_secret: Option<BTreeMap<String, String>>,
|
||||
pub enable_superuser_access: bool,
|
||||
}
|
||||
|
||||
impl Default for Cluster {
|
||||
@@ -34,6 +41,8 @@ impl Default for ClusterSpec {
|
||||
image_name: None,
|
||||
storage: Storage::default(),
|
||||
bootstrap: Bootstrap::default(),
|
||||
superuser_secret: None,
|
||||
enable_superuser_access: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ use crate::topology::{K8sclient, Topology};
|
||||
/// # Usage
|
||||
/// ```
|
||||
/// use harmony::modules::postgresql::CloudNativePgOperatorScore;
|
||||
/// let score = CloudNativePgOperatorScore::default();
|
||||
/// let score = CloudNativePgOperatorScore::default_openshift();
|
||||
/// ```
|
||||
///
|
||||
/// Or, you can take control of most relevant fiedls this way :
|
||||
@@ -52,8 +52,8 @@ pub struct CloudNativePgOperatorScore {
|
||||
pub source_namespace: String,
|
||||
}
|
||||
|
||||
impl Default for CloudNativePgOperatorScore {
|
||||
fn default() -> Self {
|
||||
impl CloudNativePgOperatorScore {
|
||||
pub fn default_openshift() -> Self {
|
||||
Self {
|
||||
namespace: "openshift-operators".to_string(),
|
||||
channel: "stable-v1".to_string(),
|
||||
@@ -68,7 +68,7 @@ impl CloudNativePgOperatorScore {
|
||||
pub fn new(namespace: &str) -> Self {
|
||||
Self {
|
||||
namespace: namespace.to_string(),
|
||||
..Default::default()
|
||||
..Self::default_openshift()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::interpret::Interpret;
|
||||
@@ -66,6 +68,11 @@ impl<T: Topology + K8sclient> Score<T> for K8sPostgreSQLScore {
|
||||
owner: "app".to_string(),
|
||||
},
|
||||
},
|
||||
// superuser_secret: Some(BTreeMap::from([(
|
||||
// "name".to_string(),
|
||||
// format!("{}-superuser", self.config.cluster_name.clone()),
|
||||
// )])),
|
||||
enable_superuser_access: true,
|
||||
..ClusterSpec::default()
|
||||
};
|
||||
|
||||
|
||||
@@ -12,8 +12,7 @@ use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::C
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_grafana::{
|
||||
Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig,
|
||||
GrafanaDatasourceJsonData, GrafanaDatasourceSpec, GrafanaSecretKeyRef, GrafanaSpec,
|
||||
GrafanaValueFrom, GrafanaValueSource,
|
||||
GrafanaDatasourceJsonData, GrafanaDatasourceSpec, GrafanaSpec,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{
|
||||
PrometheusRule, PrometheusRuleSpec, RuleGroup,
|
||||
@@ -23,7 +22,7 @@ use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{
|
||||
ServiceMonitor, ServiceMonitorSpec,
|
||||
};
|
||||
use crate::topology::oberservability::monitoring::AlertReceiver;
|
||||
use crate::topology::{K8sclient, Topology, k8s::K8sClient};
|
||||
use crate::topology::{K8sclient, Topology};
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
@@ -38,6 +37,7 @@ use crate::{
|
||||
},
|
||||
score::Score,
|
||||
};
|
||||
use harmony_k8s::K8sClient;
|
||||
use harmony_types::id::Id;
|
||||
|
||||
use super::prometheus::PrometheusMonitoring;
|
||||
|
||||
@@ -30,12 +30,13 @@ use crate::modules::monitoring::kube_prometheus::crd::rhob_service_monitor::{
|
||||
use crate::score::Score;
|
||||
use crate::topology::ingress::Ingress;
|
||||
use crate::topology::oberservability::monitoring::AlertReceiver;
|
||||
use crate::topology::{K8sclient, Topology, k8s::K8sClient};
|
||||
use crate::topology::{K8sclient, Topology};
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
};
|
||||
use harmony_k8s::K8sClient;
|
||||
use harmony_types::id::Id;
|
||||
|
||||
use super::prometheus::PrometheusMonitoring;
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::{
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_k8s::K8sClient;
|
||||
use log::{debug, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::time::sleep;
|
||||
@@ -13,7 +14,7 @@ use crate::{
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, k8s::K8sClient},
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
|
||||
@@ -9,8 +9,9 @@ use crate::{
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, k8s::K8sClient},
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
use harmony_k8s::K8sClient;
|
||||
use harmony_types::id::Id;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
|
||||
518
harmony/src/modules/zitadel/mod.rs
Normal file
518
harmony/src/modules/zitadel/mod.rs
Normal file
@@ -0,0 +1,518 @@
|
||||
use k8s_openapi::api::core::v1::Namespace;
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
|
||||
use k8s_openapi::{ByteString, api::core::v1::Secret};
|
||||
use kube::{Error as KubeError, core::ErrorResponse};
|
||||
use rand::distr::Distribution;
|
||||
use rand::{Rng, rng, seq::SliceRandom};
|
||||
use std::collections::BTreeMap;
|
||||
use std::str::FromStr;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_macros::hurl;
|
||||
use harmony_types::id::Id;
|
||||
use harmony_types::storage::StorageSize;
|
||||
use log::{debug, error, info, trace, warn};
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::helm::chart::{HelmChartScore, HelmRepository},
|
||||
modules::k8s::resource::K8sResourceScore,
|
||||
modules::postgresql::capability::{PostgreSQL, PostgreSQLClusterRole, PostgreSQLConfig},
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology},
|
||||
};
|
||||
|
||||
const NAMESPACE: &str = "zitadel";
|
||||
const PG_CLUSTER_NAME: &str = "zitadel-pg";
|
||||
const MASTERKEY_SECRET_NAME: &str = "zitadel-masterkey";
|
||||
|
||||
/// Opinionated Zitadel deployment score.
|
||||
///
|
||||
/// Deploys a PostgreSQL cluster (via the [`PostgreSQL`] trait) and the Zitadel
|
||||
/// Helm chart into the same namespace. Intended as a central multi-tenant IdP
|
||||
/// with SSO for OKD/OpenShift, OpenBao, Harbor, Grafana, Nextcloud, Ente
|
||||
/// Photos, and others.
|
||||
///
|
||||
/// # Ingress annotations
|
||||
/// No controller-specific ingress annotations are set by default. On
|
||||
/// OKD/OpenShift, the ingress should request TLS so the generated Route is
|
||||
/// edge-terminated instead of HTTP-only. Optional cert-manager annotations are
|
||||
/// included for clusters that have cert-manager installed; clusters without
|
||||
/// cert-manager will ignore them.
|
||||
/// Add or adjust annotations via `values_overrides` depending on your
|
||||
/// distribution:
|
||||
/// - NGINX: `nginx.ingress.kubernetes.io/backend-protocol: GRPC`
|
||||
/// - OpenShift HAProxy: `route.openshift.io/termination: edge`
|
||||
/// - AWS ALB: set `ingress.controller: aws`
|
||||
|
||||
///
|
||||
/// # Database credentials
|
||||
/// CNPG creates a `<cluster>-superuser` secret with key `password`. Because
|
||||
/// `envVarsSecret` injects secret keys verbatim as env var names and the CNPG
|
||||
/// key (`password`) does not match ZITADEL's expected name
|
||||
/// (`ZITADEL_DATABASE_POSTGRES_USER_PASSWORD`), individual `env` entries with
|
||||
/// `valueFrom.secretKeyRef` are used instead. For environments with an
|
||||
/// External Secrets Operator or similar, create a dedicated secret with the
|
||||
/// correct ZITADEL env var names and switch to `envVarsSecret`.
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct ZitadelScore {
|
||||
/// External domain (e.g. `"auth.example.com"`).
|
||||
pub host: String,
|
||||
pub zitadel_version: String,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient + HelmCommand + PostgreSQL> Score<T> for ZitadelScore {
|
||||
fn name(&self) -> String {
|
||||
"ZitadelScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(ZitadelInterpret {
|
||||
host: self.host.clone(),
|
||||
zitadel_version: self.zitadel_version.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ZitadelInterpret {
|
||||
host: String,
|
||||
zitadel_version: String,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient + HelmCommand + PostgreSQL> Interpret<T> for ZitadelInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!(
|
||||
"[Zitadel] Starting full deployment — namespace: '{NAMESPACE}', host: '{}'",
|
||||
self.host
|
||||
);
|
||||
|
||||
info!("Creating namespace {NAMESPACE} if it does not exist");
|
||||
K8sResourceScore::single(
|
||||
Namespace {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(NAMESPACE.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
)
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
|
||||
// --- Step 1: PostgreSQL -------------------------------------------
|
||||
|
||||
let pg_config = PostgreSQLConfig {
|
||||
cluster_name: PG_CLUSTER_NAME.to_string(),
|
||||
instances: 2,
|
||||
storage_size: StorageSize::gi(10),
|
||||
role: PostgreSQLClusterRole::Primary,
|
||||
namespace: NAMESPACE.to_string(),
|
||||
};
|
||||
|
||||
debug!(
|
||||
"[Zitadel] Deploying PostgreSQL cluster '{}' — instances: {}, storage: 10Gi, namespace: '{}'",
|
||||
pg_config.cluster_name, pg_config.instances, pg_config.namespace
|
||||
);
|
||||
|
||||
topology.deploy(&pg_config).await.map_err(|e| {
|
||||
let msg = format!(
|
||||
"[Zitadel] PostgreSQL deployment failed for '{}': {e}",
|
||||
pg_config.cluster_name
|
||||
);
|
||||
error!("{msg}");
|
||||
InterpretError::new(msg)
|
||||
})?;
|
||||
|
||||
info!(
|
||||
"[Zitadel] PostgreSQL cluster '{}' deployed",
|
||||
pg_config.cluster_name
|
||||
);
|
||||
|
||||
// --- Step 2: Resolve internal DB endpoint -------------------------
|
||||
|
||||
debug!(
|
||||
"[Zitadel] Resolving internal endpoint for cluster '{}'",
|
||||
pg_config.cluster_name
|
||||
);
|
||||
|
||||
let endpoint = topology.get_endpoint(&pg_config).await.map_err(|e| {
|
||||
let msg = format!(
|
||||
"[Zitadel] Failed to resolve endpoint for cluster '{}': {e}",
|
||||
pg_config.cluster_name
|
||||
);
|
||||
error!("{msg}");
|
||||
InterpretError::new(msg)
|
||||
})?;
|
||||
|
||||
info!(
|
||||
"[Zitadel] DB endpoint resolved — host: '{}', port: {}",
|
||||
endpoint.host, endpoint.port
|
||||
);
|
||||
|
||||
// The CNPG-managed superuser secret contains 'password', 'username',
|
||||
// 'host', 'port', 'dbname', 'uri'. We reference 'password' directly
|
||||
// via env.valueFrom.secretKeyRef because CNPG's key names do not
|
||||
// match ZITADEL's required env var names.
|
||||
let pg_user_secret = format!("{PG_CLUSTER_NAME}-app");
|
||||
let pg_superuser_secret = format!("{PG_CLUSTER_NAME}-superuser");
|
||||
let db_host = &endpoint.host;
|
||||
let db_port = endpoint.port;
|
||||
let host = &self.host;
|
||||
|
||||
debug!("[Zitadel] DB credentials source — secret: '{pg_user_secret}', key: 'password'");
|
||||
debug!(
|
||||
"[Zitadel] DB credentials source — superuser secret: '{pg_superuser_secret}', key: 'password'"
|
||||
);
|
||||
|
||||
// Zitadel requires one symbol, one number and more. So let's force it.
|
||||
fn generate_secure_password(length: usize) -> String {
|
||||
const ALPHA_UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
const ALPHA_LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz";
|
||||
const DIGITS: &[u8] = b"0123456789";
|
||||
const SYMBOLS: &[u8] = b"!@#$%^&*()_+-=[]{}|;:',.<>?/";
|
||||
|
||||
let mut rng = rand::rng();
|
||||
let uniform_alpha_upper = rand::distr::Uniform::new(0, ALPHA_UPPER.len())
|
||||
.expect("Failed to create distribution");
|
||||
let uniform_alpha_lower = rand::distr::Uniform::new(0, ALPHA_LOWER.len())
|
||||
.expect("Failed to create distribution");
|
||||
let uniform_digits =
|
||||
rand::distr::Uniform::new(0, DIGITS.len()).expect("Failed to create distribution");
|
||||
let uniform_symbols =
|
||||
rand::distr::Uniform::new(0, SYMBOLS.len()).expect("Failed to create distribution");
|
||||
|
||||
let mut chars: Vec<char> = Vec::with_capacity(length);
|
||||
|
||||
// Ensure at least one of each: upper, lower, digit, symbol
|
||||
chars.push(ALPHA_UPPER[uniform_alpha_upper.sample(&mut rng)] as char);
|
||||
chars.push(ALPHA_LOWER[uniform_alpha_lower.sample(&mut rng)] as char);
|
||||
chars.push(DIGITS[uniform_digits.sample(&mut rng)] as char);
|
||||
chars.push(SYMBOLS[uniform_symbols.sample(&mut rng)] as char);
|
||||
|
||||
// Fill remaining with random from all categories
|
||||
let all_chars: Vec<u8> = [ALPHA_UPPER, ALPHA_LOWER, DIGITS, SYMBOLS].concat();
|
||||
|
||||
let uniform_all = rand::distr::Uniform::new(0, all_chars.len())
|
||||
.expect("Failed to create distribution");
|
||||
|
||||
for _ in 0..(length - 4) {
|
||||
chars.push(all_chars[uniform_all.sample(&mut rng)] as char);
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
let mut shuffled = chars;
|
||||
shuffled.shuffle(&mut rng);
|
||||
|
||||
return shuffled.iter().collect();
|
||||
}
|
||||
|
||||
let admin_password = generate_secure_password(16);
|
||||
|
||||
// --- Step 3: Create masterkey secret ------------------------------------
|
||||
|
||||
debug!(
|
||||
"[Zitadel] Creating masterkey secret '{}' in namespace '{}'",
|
||||
MASTERKEY_SECRET_NAME, NAMESPACE
|
||||
);
|
||||
|
||||
// Masterkey for symmetric encryption — must be exactly 32 ASCII bytes (alphanumeric only).
|
||||
let masterkey = rng()
|
||||
.sample_iter(&rand::distr::Alphanumeric)
|
||||
.take(32)
|
||||
.map(char::from)
|
||||
.collect::<String>();
|
||||
|
||||
debug!(
|
||||
"[Zitadel] Created masterkey secret '{}' in namespace '{}'",
|
||||
MASTERKEY_SECRET_NAME, NAMESPACE
|
||||
);
|
||||
|
||||
let mut masterkey_data: BTreeMap<String, ByteString> = BTreeMap::new();
|
||||
masterkey_data.insert("masterkey".to_string(), ByteString(masterkey.into()));
|
||||
|
||||
let masterkey_secret = Secret {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(MASTERKEY_SECRET_NAME.to_string()),
|
||||
namespace: Some(NAMESPACE.to_string()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
data: Some(masterkey_data),
|
||||
..Secret::default()
|
||||
};
|
||||
|
||||
match topology
|
||||
.k8s_client()
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(format!("Failed to get k8s client : {e}")))?
|
||||
.create(&masterkey_secret, Some(NAMESPACE))
|
||||
.await
|
||||
{
|
||||
Ok(_) => {
|
||||
info!(
|
||||
"[Zitadel] Masterkey secret '{}' created",
|
||||
MASTERKEY_SECRET_NAME
|
||||
);
|
||||
}
|
||||
Err(KubeError::Api(ErrorResponse { code: 409, .. })) => {
|
||||
info!(
|
||||
"[Zitadel] Masterkey secret '{}' already exists, leaving it untouched",
|
||||
MASTERKEY_SECRET_NAME
|
||||
);
|
||||
}
|
||||
Err(other) => {
|
||||
let msg = format!(
|
||||
"[Zitadel] Failed to create masterkey secret '{}': {other}",
|
||||
MASTERKEY_SECRET_NAME
|
||||
);
|
||||
error!("{msg}");
|
||||
return Err(InterpretError::new(msg));
|
||||
}
|
||||
};
|
||||
|
||||
debug!(
|
||||
"[Zitadel] Masterkey secret '{}' created successfully",
|
||||
MASTERKEY_SECRET_NAME
|
||||
);
|
||||
|
||||
// --- Step 4: Build Helm values ------------------------------------
|
||||
|
||||
warn!(
|
||||
"[Zitadel] Applying TLS-enabled ingress defaults for OKD/OpenShift. \
|
||||
cert-manager annotations are included as optional hints and are \
|
||||
ignored on clusters without cert-manager."
|
||||
);
|
||||
|
||||
let values_yaml = format!(
|
||||
r#"image:
|
||||
tag: {zitadel_version}
|
||||
zitadel:
|
||||
masterkeySecretName: "{MASTERKEY_SECRET_NAME}"
|
||||
configmapConfig:
|
||||
ExternalDomain: "{host}"
|
||||
ExternalSecure: true
|
||||
FirstInstance:
|
||||
Org:
|
||||
Human:
|
||||
UserName: "admin"
|
||||
Password: "{admin_password}"
|
||||
FirstName: "Zitadel"
|
||||
LastName: "Admin"
|
||||
Email: "admin@zitadel.example.com"
|
||||
PasswordChangeRequired: true
|
||||
TLS:
|
||||
Enabled: false
|
||||
Database:
|
||||
Postgres:
|
||||
Host: "{db_host}"
|
||||
Port: {db_port}
|
||||
Database: zitadel
|
||||
MaxOpenConns: 20
|
||||
MaxIdleConns: 10
|
||||
User:
|
||||
Username: postgres
|
||||
SSL:
|
||||
Mode: require
|
||||
Admin:
|
||||
Username: postgres
|
||||
SSL:
|
||||
Mode: require
|
||||
# Directly import credentials from the postgres secret
|
||||
# TODO : use a less privileged postgres user
|
||||
env:
|
||||
- name: ZITADEL_DATABASE_POSTGRES_USER_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: "{pg_superuser_secret}"
|
||||
key: user
|
||||
- name: ZITADEL_DATABASE_POSTGRES_USER_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: "{pg_superuser_secret}"
|
||||
key: password
|
||||
- name: ZITADEL_DATABASE_POSTGRES_ADMIN_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: "{pg_superuser_secret}"
|
||||
key: user
|
||||
- name: ZITADEL_DATABASE_POSTGRES_ADMIN_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: "{pg_superuser_secret}"
|
||||
key: password
|
||||
# Security context for OpenShift restricted PSA compliance
|
||||
podSecurityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: null
|
||||
fsGroup: null
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
runAsNonRoot: true
|
||||
runAsUser: null
|
||||
fsGroup: null
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
# Init job security context (runs before main deployment)
|
||||
initJob:
|
||||
podSecurityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: null
|
||||
fsGroup: null
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
runAsNonRoot: true
|
||||
runAsUser: null
|
||||
fsGroup: null
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
# Setup job security context
|
||||
setupJob:
|
||||
podSecurityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: null
|
||||
fsGroup: null
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
runAsNonRoot: true
|
||||
runAsUser: null
|
||||
fsGroup: null
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
route.openshift.io/termination: edge
|
||||
hosts:
|
||||
- host: "{host}"
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- "{host}"
|
||||
secretName: "{host}-tls"
|
||||
|
||||
login:
|
||||
enabled: true
|
||||
podSecurityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: null
|
||||
fsGroup: null
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
runAsNonRoot: true
|
||||
runAsUser: null
|
||||
fsGroup: null
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
route.openshift.io/termination: edge
|
||||
hosts:
|
||||
- host: "{host}"
|
||||
paths:
|
||||
- path: /ui/v2/login
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- "{host}"
|
||||
secretName: "{host}-tls""#,
|
||||
zitadel_version = self.zitadel_version
|
||||
);
|
||||
|
||||
trace!("[Zitadel] Helm values YAML:\n{values_yaml}");
|
||||
|
||||
// --- Step 5: Deploy Helm chart ------------------------------------
|
||||
|
||||
info!(
|
||||
"[Zitadel] Deploying Helm chart 'zitadel/zitadel' as release 'zitadel' in namespace '{NAMESPACE}'"
|
||||
);
|
||||
|
||||
let result = HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(NAMESPACE).unwrap()),
|
||||
release_name: NonBlankString::from_str("zitadel").unwrap(),
|
||||
chart_name: NonBlankString::from_str("zitadel/zitadel").unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml: Some(values_yaml),
|
||||
create_namespace: true,
|
||||
install_only: false,
|
||||
repository: Some(HelmRepository::new(
|
||||
"zitadel".to_string(),
|
||||
hurl!("https://charts.zitadel.com"),
|
||||
true,
|
||||
)),
|
||||
}
|
||||
.interpret(inventory, topology)
|
||||
.await;
|
||||
|
||||
match &result {
|
||||
Ok(_) => info!(
|
||||
"[Zitadel] Helm chart deployed successfully\n\n\
|
||||
===== ZITADEL DEPLOYMENT COMPLETE =====\n\
|
||||
Login URL: https://{host}\n\
|
||||
Username: admin@zitadel.{host}\n\
|
||||
Password: {admin_password}\n\n\
|
||||
IMPORTANT: The password is saved in ConfigMap 'zitadel-config-yaml'\n\
|
||||
and must be changed on first login. Save the credentials in a\n\
|
||||
secure location after changing them.\n\
|
||||
========================================="
|
||||
),
|
||||
Err(e) => error!("[Zitadel] Helm chart deployment failed: {e}"),
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("Zitadel")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use async_nats::ConnectOptions;
|
||||
|
||||
use crate::{
|
||||
agent::AgentRole,
|
||||
store::{ChaosKvStore, InMemoryKvStore, NatsKvStore},
|
||||
@@ -65,7 +67,15 @@ fn get_chaos_store(
|
||||
}
|
||||
|
||||
async fn get_local_nats_store() -> Arc<NatsKvStore> {
|
||||
let client = async_nats::connect("localhost").await.unwrap();
|
||||
let mut client = async_nats::ConnectOptions::new()
|
||||
// .require_tls(true)
|
||||
.user_and_password("admin".into(), "admin2".into())
|
||||
.ping_interval(std::time::Duration::from_secs(10))
|
||||
.connect("localhost")
|
||||
.await
|
||||
.expect("Connection to nats failed");
|
||||
|
||||
// let client = async_nats::connect("localhost").await.unwrap();
|
||||
let jetstream = async_nats::jetstream::new(client);
|
||||
let kv = jetstream
|
||||
.create_key_value(async_nats::jetstream::kv::Config {
|
||||
|
||||
17
harmony_node_readiness/Cargo.toml
Normal file
17
harmony_node_readiness/Cargo.toml
Normal file
@@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "harmony-node-readiness-endpoint"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
actix-web = "4"
|
||||
kube.workspace = true
|
||||
k8s-openapi.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
env_logger.workspace = true
|
||||
log.workspace = true
|
||||
tokio.workspace = true
|
||||
reqwest.workspace = true
|
||||
chrono.workspace = true
|
||||
tower = "0.5.3"
|
||||
13
harmony_node_readiness/Dockerfile
Normal file
13
harmony_node_readiness/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
||||
FROM debian:13-slim
|
||||
|
||||
# RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
# ca-certificates \
|
||||
# && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY harmony-node-readiness-endpoint /usr/local/bin/harmony-node-readiness-endpoint
|
||||
|
||||
ENV RUST_LOG=info
|
||||
|
||||
EXPOSE 25001
|
||||
|
||||
CMD ["harmony-node-readiness-endpoint"]
|
||||
197
harmony_node_readiness/README.md
Normal file
197
harmony_node_readiness/README.md
Normal file
@@ -0,0 +1,197 @@
|
||||
# harmony-node-readiness-endpoint
|
||||
|
||||
**A lightweight, standalone Rust service for Kubernetes node health checking.**
|
||||
|
||||
Designed for **bare-metal Kubernetes clusters** with external load balancers (HAProxy, OPNsense, F5, etc.).
|
||||
|
||||
Exposes a simple HTTP endpoint (`/health`) on each node:
|
||||
|
||||
- **200 OK** — node is healthy and ready to receive traffic
|
||||
- **503 Service Unavailable** — node should be removed from the load balancer pool
|
||||
- **500 Internal Server Error** — misconfiguration (e.g. `NODE_NAME` not set)
|
||||
|
||||
This project is **not dependent on Harmony**, but is commonly used as part of Harmony bare-metal Kubernetes deployments.
|
||||
|
||||
## Why this project exists
|
||||
|
||||
In bare-metal environments, external load balancers often rely on pod-level or router-level checks that can lag behind the authoritative Kubernetes `Node.status.conditions[Ready]`.
|
||||
This service provides the true source-of-truth with fast reaction time.
|
||||
|
||||
## Available checks
|
||||
|
||||
| Check name | Description | Status |
|
||||
|--------------------|-------------------------------------------------------------|-------------------|
|
||||
| `node_ready` | Queries `Node.status.conditions[Ready]` via Kubernetes API | Implemented |
|
||||
| `okd_router_1936` | Probes OpenShift router `/healthz/ready` on port 1936 | Implemented |
|
||||
| `filesystem_ro` | Detects read-only mounts via `/proc/mounts` | To be implemented |
|
||||
| `kubelet` | Local probe to kubelet `/healthz` (port 10248) | To be implemented |
|
||||
| `container_runtime`| Socket check + runtime status | To be implemented |
|
||||
| `disk_pressure` | Threshold checks on key filesystems | To be implemented |
|
||||
| `network` | DNS resolution + gateway connectivity | To be implemented |
|
||||
| `custom_conditions`| Reacts to extra conditions (NPD, etc.) | To be implemented |
|
||||
|
||||
All checks are combined with logical **AND** — any single failure results in 503.
|
||||
|
||||
## Behavior
|
||||
|
||||
### `node_ready` check — fail-open design
|
||||
|
||||
The `node_ready` check queries the Kubernetes API server to read `Node.status.conditions[Ready]`.
|
||||
Because this service runs on the node it is checking, there are scenarios where the API server is temporarily
|
||||
unreachable (e.g. during a control-plane restart). To avoid incorrectly draining a healthy node in such cases,
|
||||
the check is **fail-open**: it passes (reports ready) whenever the Kubernetes API is unavailable.
|
||||
|
||||
| Situation | Result | HTTP status |
|
||||
|------------------------------------------------------|-------------------|-------------|
|
||||
| `Node.conditions[Ready] == True` | Pass | 200 |
|
||||
| `Node.conditions[Ready] == False` | Fail | 503 |
|
||||
| `Ready` condition absent | Fail | 503 |
|
||||
| API server unreachable or timed out (1 s timeout) | Pass (assumes ready) | 200 |
|
||||
| Kubernetes client initialization failed | Pass (assumes ready) | 200 |
|
||||
| `NODE_NAME` env var not set | Hard error | 500 |
|
||||
|
||||
A warning is logged whenever the API is unavailable and the check falls back to assuming ready.
|
||||
|
||||
### `okd_router_1936` check
|
||||
|
||||
Sends `GET http://127.0.0.1:1936/healthz/ready` with a 5-second timeout.
|
||||
Returns pass on any 2xx response, fail otherwise.
|
||||
|
||||
### Unknown check names
|
||||
|
||||
Requesting an unknown check name (e.g. `check=bogus`) results in that check returning `passed: false`
|
||||
with reason `"Unknown check: bogus"`, and the overall response is 503.
|
||||
|
||||
## How it works
|
||||
|
||||
### Node name discovery
|
||||
|
||||
The service reads the `NODE_NAME` environment variable, which must be injected via the Kubernetes Downward API:
|
||||
|
||||
```yaml
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
```
|
||||
|
||||
### Kubernetes API authentication
|
||||
|
||||
- Uses standard **in-cluster configuration** — no external credentials needed.
|
||||
- The ServiceAccount token and CA certificate are automatically mounted at `/var/run/secrets/kubernetes.io/serviceaccount/`.
|
||||
- Requires only minimal RBAC: `get` and `list` on the `nodes` resource (see `deploy/resources.yaml`).
|
||||
- Connect and write timeouts are set to **1 second** to keep checks fast.
|
||||
|
||||
## Deploy
|
||||
|
||||
All Kubernetes resources (Namespace, ServiceAccount, ClusterRole, ClusterRoleBinding, and an OpenShift SCC RoleBinding for `hostnetwork`) are in a single file.
|
||||
|
||||
```bash
|
||||
kubectl apply -f deploy/resources.yaml
|
||||
kubectl apply -f deploy/daemonset.yaml
|
||||
```
|
||||
|
||||
The DaemonSet uses `hostNetwork: true` and `hostPort: 25001`, so the endpoint is reachable directly on the node's IP at port 25001.
|
||||
It tolerates all taints, ensuring it runs even on nodes marked unschedulable.
|
||||
|
||||
### Configure your external load balancer
|
||||
|
||||
**Example for HAProxy / OPNsense:**
|
||||
- Check type: **HTTP**
|
||||
- URI: `/health`
|
||||
- Port: `25001` (configurable via `LISTEN_PORT` env var)
|
||||
- Interval: 5–10 s
|
||||
- Rise: 2
|
||||
- Fall: 3
|
||||
- Expect: `2xx`
|
||||
|
||||
## Endpoint usage
|
||||
|
||||
### Query parameter
|
||||
|
||||
Use the `check` query parameter to select which checks to run (comma-separated).
|
||||
When omitted, only `node_ready` runs.
|
||||
|
||||
| Request | Checks run |
|
||||
|------------------------------------------------|-----------------------------------|
|
||||
| `GET /health` | `node_ready` |
|
||||
| `GET /health?check=okd_router_1936` | `okd_router_1936` only |
|
||||
| `GET /health?check=node_ready,okd_router_1936` | `node_ready` and `okd_router_1936`|
|
||||
|
||||
> **Note:** specifying `check=` replaces the default. Include `node_ready` explicitly if you need it alongside other checks.
|
||||
|
||||
### Response format
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ready" | "not-ready",
|
||||
"checks": [
|
||||
{
|
||||
"name": "<check-name>",
|
||||
"passed": true | false,
|
||||
"reason": "<failure reason, omitted on success>",
|
||||
"duration_ms": 42
|
||||
}
|
||||
],
|
||||
"total_duration_ms": 42
|
||||
}
|
||||
```
|
||||
|
||||
**Healthy node (default)**
|
||||
```http
|
||||
HTTP/1.1 200 OK
|
||||
|
||||
{
|
||||
"status": "ready",
|
||||
"checks": [{ "name": "node_ready", "passed": true, "duration_ms": 42 }],
|
||||
"total_duration_ms": 42
|
||||
}
|
||||
```
|
||||
|
||||
**Unhealthy node**
|
||||
```http
|
||||
HTTP/1.1 503 Service Unavailable
|
||||
|
||||
{
|
||||
"status": "not-ready",
|
||||
"checks": [
|
||||
{ "name": "node_ready", "passed": false, "reason": "KubeletNotReady", "duration_ms": 35 }
|
||||
],
|
||||
"total_duration_ms": 35
|
||||
}
|
||||
```
|
||||
|
||||
**API server unreachable (fail-open)**
|
||||
```http
|
||||
HTTP/1.1 200 OK
|
||||
|
||||
{
|
||||
"status": "ready",
|
||||
"checks": [{ "name": "node_ready", "passed": true, "duration_ms": 1001 }],
|
||||
"total_duration_ms": 1001
|
||||
}
|
||||
```
|
||||
*(A warning is logged: `Kubernetes API appears to be down … Assuming node is ready.`)*
|
||||
|
||||
## Configuration
|
||||
|
||||
| Env var | Default | Description |
|
||||
|---------------|----------|--------------------------------------|
|
||||
| `NODE_NAME` | required | Node name, injected via Downward API |
|
||||
| `LISTEN_PORT` | `25001` | TCP port the HTTP server binds to |
|
||||
| `RUST_LOG` | — | Log level (e.g. `info`, `debug`) |
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
# Run locally
|
||||
NODE_NAME=my-test-node cargo run
|
||||
|
||||
# Run tests
|
||||
cargo test
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
*Minimal, auditable, and built for production bare-metal Kubernetes environments.*
|
||||
13
harmony_node_readiness/build-docker.sh
Executable file
13
harmony_node_readiness/build-docker.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
# TODO
|
||||
# This is meant to be run on a machine with harmony development tools installed (cargo, etc)
|
||||
|
||||
DOCKER_TAG="${DOCKER_TAG:-dev}"
|
||||
|
||||
cargo build --release
|
||||
|
||||
cp ../target/release/harmony-node-readiness-endpoint .
|
||||
|
||||
docker build . -t hub.nationtech.io/harmony/harmony-node-readiness-endpoint:${DOCKER_TAG}
|
||||
|
||||
36
harmony_node_readiness/deploy/daemonset.yaml
Normal file
36
harmony_node_readiness/deploy/daemonset.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-healthcheck
|
||||
namespace: harmony-node-healthcheck
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: node-healthcheck
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: node-healthcheck
|
||||
spec:
|
||||
serviceAccountName: node-healthcheck-sa
|
||||
hostNetwork: true
|
||||
# This ensures the pod runs even if the node is already "unschedulable"
|
||||
# so it can report the status correctly.
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
containers:
|
||||
- name: checker
|
||||
image: hub.nationtech.io/harmony/harmony-node-readiness-endpoint:latest
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
ports:
|
||||
- containerPort: 25001
|
||||
hostPort: 25001
|
||||
name: health-port
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
64
harmony_node_readiness/deploy/resources.yaml
Normal file
64
harmony_node_readiness/deploy/resources.yaml
Normal file
@@ -0,0 +1,64 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: harmony-node-healthcheck
|
||||
labels:
|
||||
name: harmony-node-healthcheck
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: node-healthcheck-sa
|
||||
namespace: harmony-node-healthcheck
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: node-healthcheck-role
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get", "list"]
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: allow-hostnetwork-scc
|
||||
namespace: harmony-node-healthcheck
|
||||
rules:
|
||||
- apiGroups: ["security.openshift.io"]
|
||||
resources: ["securitycontextconstraints"]
|
||||
resourceNames: ["hostnetwork"]
|
||||
verbs: ["use"]
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: node-status-querier-scc-binding
|
||||
namespace: harmony-node-healthcheck
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: node-healthcheck-sa
|
||||
namespace: harmony-node-healthcheck
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: allow-hostnetwork-scc
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: read-nodes-binding
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: node-healthcheck-sa
|
||||
namespace: harmony-node-healthcheck
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: node-healthcheck-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user