Compare commits
56 Commits
feat/nats-
...
feat/chang
| Author | SHA1 | Date | |
|---|---|---|---|
| f532ba2b40 | |||
| fafca31798 | |||
| 64893a84f5 | |||
| f941672662 | |||
| 5db1a31d33 | |||
| d7e5bf11d5 | |||
| 2b157ad7fd | |||
| fe52f69473 | |||
| d8338ad12c | |||
| ac9fedf853 | |||
| fd3705e382 | |||
| 4840c7fdc2 | |||
| 20172a7801 | |||
| 6bb33c5845 | |||
| d9357adad3 | |||
| a25ca86bdf | |||
| 646c5e723e | |||
| 69c382e8c6 | |||
| dca764395d | |||
| 2738985edb | |||
| d9a21bf94b | |||
| 8f8bd34168 | |||
| b5e971b3b6 | |||
| a1c0e0e246 | |||
| d084cee8d5 | |||
| 63ef1c0ea7 | |||
| ff7d2fb89e | |||
| 9bb38b930a | |||
| c677487a5e | |||
| c1d46612ac | |||
| 4fba01338d | |||
| 913ed17453 | |||
| 9e185cbbd5 | |||
| 752526f831 | |||
| f9bd6ad260 | |||
| 111181c300 | |||
| 3257cd9569 | |||
| 4b1915c594 | |||
| cf3050ce87 | |||
| c3e27c60be | |||
| 2d26790c82 | |||
| 2e89308b82 | |||
| d8936a8307 | |||
| e2fa12508f | |||
| bea2a75882 | |||
| a1528665d0 | |||
| 613225a00b | |||
| dd1c088f0d | |||
| b4ef009804 | |||
| 191e92048b | |||
| f4a70d8978 | |||
| 2ddc9c0579 | |||
| fececc2efd | |||
| 8afcacbd24 | |||
| b885c35706 | |||
|
|
bb6b4b7f88 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -26,3 +26,6 @@ Cargo.lock
|
||||
*.pdb
|
||||
|
||||
.harmony_generated
|
||||
|
||||
# Useful to create ignore folders for temp files and notes
|
||||
ignore
|
||||
|
||||
231
Cargo.lock
generated
231
Cargo.lock
generated
@@ -1008,7 +1008,7 @@ dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
"strsim 0.11.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1375,14 +1375,38 @@ dependencies = [
|
||||
"syn 2.0.106",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.14.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850"
|
||||
dependencies = [
|
||||
"darling_core 0.14.4",
|
||||
"darling_macro 0.14.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.20.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"darling_macro",
|
||||
"darling_core 0.20.11",
|
||||
"darling_macro 0.20.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_core"
|
||||
version = "0.14.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim 0.10.0",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1395,17 +1419,28 @@ dependencies = [
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim",
|
||||
"strsim 0.11.1",
|
||||
"syn 2.0.106",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.14.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e"
|
||||
dependencies = [
|
||||
"darling_core 0.14.4",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.20.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"darling_core 0.20.11",
|
||||
"quote",
|
||||
"syn 2.0.106",
|
||||
]
|
||||
@@ -1448,6 +1483,37 @@ dependencies = [
|
||||
"syn 2.0.106",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8"
|
||||
dependencies = [
|
||||
"derive_builder_macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_core"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f"
|
||||
dependencies = [
|
||||
"darling 0.14.4",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_macro"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e"
|
||||
dependencies = [
|
||||
"derive_builder_core",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "2.0.1"
|
||||
@@ -1828,6 +1894,40 @@ dependencies = [
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-k8s-drain-node"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert_cmd",
|
||||
"cidr",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_types",
|
||||
"inquire 0.7.5",
|
||||
"log",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-k8s-write-file-on-node"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert_cmd",
|
||||
"cidr",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_types",
|
||||
"inquire 0.7.5",
|
||||
"log",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-kube-rs"
|
||||
version = "0.1.0"
|
||||
@@ -1947,6 +2047,19 @@ dependencies = [
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-node-health"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_types",
|
||||
"log",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-ntfy"
|
||||
version = "0.1.0"
|
||||
@@ -2661,6 +2774,23 @@ dependencies = [
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "harmony-node-readiness-endpoint"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"chrono",
|
||||
"env_logger",
|
||||
"k8s-openapi",
|
||||
"kube",
|
||||
"log",
|
||||
"reqwest 0.12.23",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tower",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "harmony_agent"
|
||||
version = "0.1.0"
|
||||
@@ -2813,6 +2943,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"thiserror 2.0.16",
|
||||
"tokio",
|
||||
"vaultrs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3511,7 +3642,7 @@ version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "435d80800b936787d62688c927b6490e887c7ef5ff9ce922c6c6050fca75eb9a"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"darling 0.20.11",
|
||||
"indoc",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -3638,26 +3769,6 @@ dependencies = [
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json-prompt"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"brocade",
|
||||
"cidr",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_secret",
|
||||
"harmony_secret_derive",
|
||||
"harmony_types",
|
||||
"log",
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath-rust"
|
||||
version = "0.7.5"
|
||||
@@ -3803,7 +3914,7 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "079fc8c1c397538628309cfdee20696ebdcc26745f9fb17f89b78782205bd995"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"darling 0.20.11",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde",
|
||||
@@ -5318,6 +5429,40 @@ dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustify"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "759a090a17ce545d1adcffcc48207d5136c8984d8153bd8247b1ad4a71e49f5f"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"http 1.3.1",
|
||||
"reqwest 0.12.23",
|
||||
"rustify_derive",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"thiserror 1.0.69",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustify_derive"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f07d43b2dbdbd99aaed648192098f0f413b762f0f352667153934ef3955f1793"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"serde_urlencoded",
|
||||
"syn 1.0.109",
|
||||
"synstructure 0.12.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.44"
|
||||
@@ -5796,7 +5941,7 @@ version = "3.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"darling 0.20.11",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.106",
|
||||
@@ -6271,6 +6416,12 @@ dependencies = [
|
||||
"unicode-properties",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
@@ -6766,9 +6917,9 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.5.2"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
|
||||
checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
@@ -7084,6 +7235,26 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
|
||||
|
||||
[[package]]
|
||||
name = "vaultrs"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f81eb4d9221ca29bad43d4b6871b6d2e7656e1af2cfca624a87e5d17880d831d"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"derive_builder",
|
||||
"http 1.3.1",
|
||||
"reqwest 0.12.23",
|
||||
"rustify",
|
||||
"rustify_derive",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 1.0.69",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
resolver = "2"
|
||||
members = [
|
||||
"private_repos/*",
|
||||
"examples/*",
|
||||
"harmony",
|
||||
"harmony_types",
|
||||
"harmony_macros",
|
||||
@@ -19,7 +18,8 @@ members = [
|
||||
"adr/agent_discovery/mdns",
|
||||
"brocade",
|
||||
"harmony_agent",
|
||||
"harmony_agent/deploy",
|
||||
"harmony_agent/deploy", "harmony_node_readiness",
|
||||
"examples/*",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -52,6 +52,7 @@ kube = { version = "1.1.0", features = [
|
||||
"jsonpatch",
|
||||
] }
|
||||
k8s-openapi = { version = "0.25", features = ["v1_30"] }
|
||||
# TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml
|
||||
serde_yaml = "0.9"
|
||||
serde-value = "0.7"
|
||||
http = "1.2"
|
||||
|
||||
87
README.md
87
README.md
@@ -1,4 +1,6 @@
|
||||
# Harmony : Open-source infrastructure orchestration that treats your platform like first-class code
|
||||
# Harmony
|
||||
|
||||
Open-source infrastructure orchestration that treats your platform like first-class code.
|
||||
|
||||
In other words, Harmony is a **next-generation platform engineering framework**.
|
||||
|
||||
@@ -20,9 +22,7 @@ All in **one strongly-typed Rust codebase**.
|
||||
|
||||
From a **developer laptop** to a **global production cluster**, a single **source of truth** drives the **full software lifecycle.**
|
||||
|
||||
---
|
||||
|
||||
## 1 · The Harmony Philosophy
|
||||
## The Harmony Philosophy
|
||||
|
||||
Infrastructure is essential, but it shouldn’t be your core business. Harmony is built on three guiding principles that make modern platforms reliable, repeatable, and easy to reason about.
|
||||
|
||||
@@ -34,9 +34,18 @@ Infrastructure is essential, but it shouldn’t be your core business. Harmony i
|
||||
|
||||
These principles surface as simple, ergonomic Rust APIs that let teams focus on their product while trusting the platform underneath.
|
||||
|
||||
---
|
||||
## Where to Start
|
||||
|
||||
## 2 · Quick Start
|
||||
We have a comprehensive set of documentation right here in the repository.
|
||||
|
||||
| I want to... | Start Here |
|
||||
| ----------------- | ------------------------------------------------------------------ |
|
||||
| Get Started | [Getting Started Guide](./docs/guides/getting-started.md) |
|
||||
| See an Example | [Use Case: Deploy a Rust Web App](./docs/use-cases/rust-webapp.md) |
|
||||
| Explore | [Documentation Hub](./docs/README.md) |
|
||||
| See Core Concepts | [Core Concepts Explained](./docs/concepts.md) |
|
||||
|
||||
## Quick Look: Deploy a Rust Webapp
|
||||
|
||||
The snippet below spins up a complete **production-grade Rust + Leptos Webapp** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
|
||||
|
||||
@@ -94,63 +103,33 @@ async fn main() {
|
||||
}
|
||||
```
|
||||
|
||||
Run it:
|
||||
To run this:
|
||||
|
||||
```bash
|
||||
cargo run
|
||||
```
|
||||
- Clone the repository: `git clone https://git.nationtech.io/nationtech/harmony`
|
||||
- Install dependencies: `cargo build --release`
|
||||
- Run the example: `cargo run --example try_rust_webapp`
|
||||
|
||||
Harmony analyses the code, shows an execution plan in a TUI, and applies it once you confirm. Same code, same binary—every environment.
|
||||
## Documentation
|
||||
|
||||
---
|
||||
All documentation is in the `/docs` directory.
|
||||
|
||||
## 3 · Core Concepts
|
||||
- [Documentation Hub](./docs/README.md): The main entry point for all documentation.
|
||||
- [Core Concepts](./docs/concepts.md): A detailed look at Score, Topology, Capability, Inventory, and Interpret.
|
||||
- [Component Catalogs](./docs/catalogs/README.md): Discover all available Scores, Topologies, and Capabilities.
|
||||
- [Developer Guide](./docs/guides/developer-guide.md): Learn how to write your own Scores and Topologies.
|
||||
|
||||
| Term | One-liner |
|
||||
| ---------------- | ---------------------------------------------------------------------------------------------------- |
|
||||
| **Score<T>** | Declarative description of the desired state (e.g., `LAMPScore`). |
|
||||
| **Interpret<T>** | Imperative logic that realises a `Score` on a specific environment. |
|
||||
| **Topology** | An environment (local k3d, AWS, bare-metal) exposing verified _Capabilities_ (Kubernetes, DNS, …). |
|
||||
| **Maestro** | Orchestrator that compiles Scores + Topology, ensuring all capabilities line up **at compile-time**. |
|
||||
| **Inventory** | Optional catalogue of physical assets for bare-metal and edge deployments. |
|
||||
## Architectural Decision Records
|
||||
|
||||
A visual overview is in the diagram below.
|
||||
- [ADR-001 · Why Rust](adr/001-rust.md)
|
||||
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
|
||||
- [ADR-006 · Secret Management](adr/006-secret-management.md)
|
||||
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
|
||||
|
||||
[Harmony Core Architecture](docs/diagrams/Harmony_Core_Architecture.drawio.svg)
|
||||
## Contribute
|
||||
|
||||
---
|
||||
Discussions and roadmap live in [Issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
|
||||
|
||||
## 4 · Install
|
||||
|
||||
Prerequisites:
|
||||
|
||||
- Rust
|
||||
- Docker (if you deploy locally)
|
||||
- `kubectl` / `helm` for Kubernetes-based topologies
|
||||
|
||||
```bash
|
||||
git clone https://git.nationtech.io/nationtech/harmony
|
||||
cd harmony
|
||||
cargo build --release # builds the CLI, TUI and libraries
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5 · Learning More
|
||||
|
||||
- **Architectural Decision Records** – dive into the rationale
|
||||
- [ADR-001 · Why Rust](adr/001-rust.md)
|
||||
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
|
||||
- [ADR-006 · Secret Management](adr/006-secret-management.md)
|
||||
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
|
||||
|
||||
- **Extending Harmony** – write new Scores / Interprets, add hardware like OPNsense firewalls, or embed Harmony in your own tooling (`/docs`).
|
||||
|
||||
- **Community** – discussions and roadmap live in [GitLab issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
|
||||
|
||||
---
|
||||
|
||||
## 6 · License
|
||||
## License
|
||||
|
||||
Harmony is released under the **GNU AGPL v3**.
|
||||
|
||||
|
||||
65
adr/019-Network-bond-setup.md
Normal file
65
adr/019-Network-bond-setup.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# Architecture Decision Record: Network Bonding Configuration via External Automation
|
||||
|
||||
Initial Author: Jean-Gabriel Gill-Couture & Sylvain Tremblay
|
||||
|
||||
Initial Date: 2026-02-13
|
||||
|
||||
Last Updated Date: 2026-02-13
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
We need to configure LACP bonds on 10GbE interfaces across all worker nodes in the OpenShift cluster. A significant challenge is that interface names (e.g., `enp1s0f0` vs `ens1f0`) vary across different hardware nodes.
|
||||
|
||||
The standard OpenShift mechanism (MachineConfig) applies identical configurations to all nodes in a MachineConfigPool. Since the interface names differ, a single static MachineConfig cannot target specific physical devices across the entire cluster without complex workarounds.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use the existing "Harmony" automation tool to generate and apply host-specific NetworkManager configuration files directly to the nodes.
|
||||
|
||||
1. Harmony will generate the specific `.nmconnection` files for the bond and slaves based on its inventory of interface names.
|
||||
2. Files will be pushed to `/etc/NetworkManager/system-connections/` on each node.
|
||||
3. Configuration will be applied via `nmcli` reload or a node reboot.
|
||||
|
||||
## Rationale
|
||||
|
||||
* **Inventory Awareness:** Harmony already possesses the specific interface mapping data for each host.
|
||||
* **Persistence:** Fedora CoreOS/SCOS allows writing to `/etc`, and these files persist across reboots and OS upgrades (rpm-ostree updates).
|
||||
* **Avoids Complexity:** This approach avoids the operational overhead of creating unique MachineConfigPools for every single host or hardware variant.
|
||||
* **Safety:** Unlike wildcard matching, this ensures explicit interface selection, preventing accidental bonding of reserved interfaces (e.g., future separation of Ceph storage traffic).
|
||||
|
||||
## Consequences
|
||||
|
||||
**Pros:**
|
||||
* Precise, per-host configuration without polluting the Kubernetes API with hundreds of MachineConfigs.
|
||||
* Standard Linux networking behavior; easy to debug locally.
|
||||
* Prevents accidental interface capture (unlike wildcards).
|
||||
|
||||
**Cons:**
|
||||
* **Loss of Declarative K8s State:** The network config is not managed by the Machine Config Operator (MCO).
|
||||
* **Node Replacement Friction:** Newly provisioned nodes (replacements) will boot with default config. Harmony must be run against new nodes manually or via a hook before they can fully join the cluster workload.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
1. **Wildcard Matching in NetworkManager (e.g., `interface-name=enp*`):**
|
||||
* *Pros:* Single MachineConfig for the whole cluster.
|
||||
* *Cons:* Rejected because it is too broad. It risks capturing interfaces intended for other purposes (e.g., splitting storage and cluster networks later).
|
||||
|
||||
2. **"Kitchen Sink" Configuration:**
|
||||
* *Pros:* Single file listing every possible interface name as a slave.
|
||||
* *Cons:* "Dirty" configuration; results in many inactive connections on every host; brittle if new naming schemes appear.
|
||||
|
||||
3. **Per-Host MachineConfig:**
|
||||
* *Pros:* Fully declarative within OpenShift.
|
||||
* *Cons:* Requires a unique `MachineConfigPool` per host, which is an anti-pattern and unmaintainable at scale.
|
||||
|
||||
4. **On-boot Generation Script:**
|
||||
* *Pros:* Dynamic detection.
|
||||
* *Cons:* Increases boot complexity; harder to debug if the script fails during startup.
|
||||
|
||||
## Additional Notes
|
||||
|
||||
While `/etc` is writable and persistent on CoreOS, this configuration falls outside the "Day 1" Ignition process. Operational runbooks must be updated to ensure Harmony runs on any node replacement events.
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::net::{IpAddr, Ipv4Addr};
|
||||
|
||||
use brocade::{BrocadeOptions, ssh};
|
||||
use harmony_secret::Secret;
|
||||
use harmony_secret::{Secret, SecretManager};
|
||||
use harmony_types::switch::PortLocation;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -21,17 +21,15 @@ async fn main() {
|
||||
// let ip = IpAddr::V4(Ipv4Addr::new(192, 168, 4, 11)); // brocade @ st
|
||||
let switch_addresses = vec![ip];
|
||||
|
||||
// let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
|
||||
// .await
|
||||
// .unwrap();
|
||||
let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let brocade = brocade::init(
|
||||
&switch_addresses,
|
||||
// &config.username,
|
||||
// &config.password,
|
||||
"admin",
|
||||
"password",
|
||||
BrocadeOptions {
|
||||
&config.username,
|
||||
&config.password,
|
||||
&BrocadeOptions {
|
||||
dry_run: true,
|
||||
ssh: ssh::SshOptions {
|
||||
port: 2222,
|
||||
|
||||
@@ -144,7 +144,7 @@ pub async fn init(
|
||||
ip_addresses: &[IpAddr],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
options: &BrocadeOptions,
|
||||
) -> Result<Box<dyn BrocadeClient + Send + Sync>, Error> {
|
||||
let shell = BrocadeShell::init(ip_addresses, username, password, options).await?;
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ impl BrocadeShell {
|
||||
ip_addresses: &[IpAddr],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
options: &BrocadeOptions,
|
||||
) -> Result<Self, Error> {
|
||||
let ip = ip_addresses
|
||||
.first()
|
||||
|
||||
@@ -70,7 +70,7 @@ pub async fn try_init_client(
|
||||
username: &str,
|
||||
password: &str,
|
||||
ip: &std::net::IpAddr,
|
||||
base_options: BrocadeOptions,
|
||||
base_options: &BrocadeOptions,
|
||||
) -> Result<BrocadeOptions, Error> {
|
||||
let mut default = SshOptions::default();
|
||||
default.port = base_options.ssh.port;
|
||||
|
||||
@@ -1 +1,33 @@
|
||||
Not much here yet, see the `adr` folder for now. More to come in time!
|
||||
# Harmony Documentation Hub
|
||||
|
||||
Welcome to the Harmony documentation. This is the main entry point for learning everything from core concepts to building your own Score, Topologies, and Capabilities.
|
||||
|
||||
## 1. Getting Started
|
||||
|
||||
If you're new to Harmony, start here:
|
||||
|
||||
- [**Getting Started Guide**](./guides/getting-started.md): A step-by-step tutorial that takes you from an empty project to deploying your first application.
|
||||
- [**Core Concepts**](./concepts.md): A high-level overview of the key concepts in Harmony: `Score`, `Topology`, `Capability`, `Inventory`, `Interpret`, ...
|
||||
|
||||
## 2. Use Cases & Examples
|
||||
|
||||
See how to use Harmony to solve real-world problems.
|
||||
|
||||
- [**OKD on Bare Metal**](./use-cases/okd-on-bare-metal.md): A detailed walkthrough of bootstrapping a high-availability OKD cluster from physical hardware.
|
||||
- [**Deploy a Rust Web App**](./use-cases/deploy-rust-webapp.md): A quick guide to deploying a monitored, containerized web application to a Kubernetes cluster.
|
||||
|
||||
## 3. Component Catalogs
|
||||
|
||||
Discover existing, reusable components you can use in your Harmony projects.
|
||||
|
||||
- [**Scores Catalog**](./catalogs/scores.md): A categorized list of all available `Scores` (the "what").
|
||||
- [**Topologies Catalog**](./catalogs/topologies.md): A list of all available `Topologies` (the "where").
|
||||
- [**Capabilities Catalog**](./catalogs/capabilities.md): A list of all available `Capabilities` (the "how").
|
||||
|
||||
## 4. Developer Guides
|
||||
|
||||
Ready to build your own components? These guides show you how.
|
||||
|
||||
- [**Writing a Score**](./guides/writing-a-score.md): Learn how to create your own `Score` and `Interpret` logic to define a new desired state.
|
||||
- [**Writing a Topology**](./guides/writing-a-topology.md): Learn how to model a new environment (like AWS, GCP, or custom hardware) as a `Topology`.
|
||||
- [**Adding Capabilities**](./guides/adding-capabilities.md): See how to add a `Capability` to your custom `Topology`.
|
||||
|
||||
7
docs/catalogs/README.md
Normal file
7
docs/catalogs/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Component Catalogs
|
||||
|
||||
This section is the "dictionary" for Harmony. It lists all the reusable components available out-of-the-box.
|
||||
|
||||
- [**Scores Catalog**](./scores.md): Discover all available `Scores` (the "what").
|
||||
- [**Topologies Catalog**](./topologies.md): A list of all available `Topologies` (the "where").
|
||||
- [**Capabilities Catalog**](./capabilities.md): A list of all available `Capabilities` (the "how").
|
||||
40
docs/catalogs/capabilities.md
Normal file
40
docs/catalogs/capabilities.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Capabilities Catalog
|
||||
|
||||
A `Capability` is a specific feature or API that a `Topology` offers. `Interpret` logic uses these capabilities to execute a `Score`.
|
||||
|
||||
This list is primarily for developers **writing new Topologies or Scores**. As a user, you just need to know that the `Topology` you pick (like `K8sAnywhereTopology`) provides the capabilities your `Scores` (like `ApplicationScore`) need.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Capabilities Catalog](#capabilities-catalog)
|
||||
- [Kubernetes & Application](#kubernetes-application)
|
||||
- [Monitoring & Observability](#monitoring-observability)
|
||||
- [Networking (Core Services)](#networking-core-services)
|
||||
- [Networking (Hardware & Host)](#networking-hardware-host)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
## Kubernetes & Application
|
||||
|
||||
- **K8sClient**: Provides an authenticated client to interact with a Kubernetes API (create/read/update/delete resources).
|
||||
- **HelmCommand**: Provides the ability to execute Helm commands (install, upgrade, template).
|
||||
- **TenantManager**: Provides methods for managing tenants in a multi-tenant cluster.
|
||||
- **Ingress**: Provides an interface for managing ingress controllers and resources.
|
||||
|
||||
## Monitoring & Observability
|
||||
|
||||
- **Grafana**: Provides an API for configuring Grafana (datasources, dashboards).
|
||||
- **Monitoring**: A general capability for configuring monitoring (e.g., creating Prometheus rules).
|
||||
|
||||
## Networking (Core Services)
|
||||
|
||||
- **DnsServer**: Provides an interface for creating and managing DNS records.
|
||||
- **LoadBalancer**: Provides an interface for configuring a load balancer (e.g., OPNsense, MetalLB).
|
||||
- **DhcpServer**: Provides an interface for managing DHCP leases and host bindings.
|
||||
- **TftpServer**: Provides an interface for managing files on a TFTP server (e.g., iPXE boot files).
|
||||
|
||||
## Networking (Hardware & Host)
|
||||
|
||||
- **Router**: Provides an interface for configuring routing rules, typically on a firewall like OPNsense.
|
||||
- **Switch**: Provides an interface for configuring a physical network switch (e.g., managing VLANs and port channels).
|
||||
- **NetworkManager**: Provides an interface for configuring host-level networking (e.g., creating bonds and bridges on a node).
|
||||
102
docs/catalogs/scores.md
Normal file
102
docs/catalogs/scores.md
Normal file
@@ -0,0 +1,102 @@
|
||||
# Scores Catalog
|
||||
|
||||
A `Score` is a declarative description of a desired state. Find the Score you need and add it to your `harmony!` block's `scores` array.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Scores Catalog](#scores-catalog)
|
||||
- [Application Deployment](#application-deployment)
|
||||
- [OKD / Kubernetes Cluster Setup](#okd-kubernetes-cluster-setup)
|
||||
- [Cluster Services & Management](#cluster-services-management)
|
||||
- [Monitoring & Alerting](#monitoring-alerting)
|
||||
- [Infrastructure & Networking (Bare Metal)](#infrastructure-networking-bare-metal)
|
||||
- [Infrastructure & Networking (Cluster)](#infrastructure-networking-cluster)
|
||||
- [Tenant Management](#tenant-management)
|
||||
- [Utility](#utility)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
## Application Deployment
|
||||
|
||||
Scores for deploying and managing end-user applications.
|
||||
|
||||
- **ApplicationScore**: The primary score for deploying a web application. Describes the application, its framework, and the features it requires (e.g., monitoring, CI/CD).
|
||||
- **HelmChartScore**: Deploys a generic Helm chart to a Kubernetes cluster.
|
||||
- **ArgoHelmScore**: Deploys an application using an ArgoCD Helm chart.
|
||||
- **LAMPScore**: A specialized score for deploying a classic LAMP (Linux, Apache, MySQL, PHP) stack.
|
||||
|
||||
## OKD / Kubernetes Cluster Setup
|
||||
|
||||
This collection of Scores is used to provision an entire OKD cluster from bare metal. They are typically used in order.
|
||||
|
||||
- **OKDSetup01InventoryScore**: Discovers and catalogs the physical hardware.
|
||||
- **OKDSetup02BootstrapScore**: Configures the bootstrap node, renders iPXE files, and kicks off the SCOS installation.
|
||||
- **OKDSetup03ControlPlaneScore**: Renders iPXE configurations for the control plane nodes.
|
||||
- **OKDSetupPersistNetworkBondScore**: Configures network bonds on the nodes and port channels on the switches.
|
||||
- **OKDSetup04WorkersScore**: Renders iPXE configurations for the worker nodes.
|
||||
- **OKDSetup06InstallationReportScore**: Runs post-installation checks and generates a report.
|
||||
- **OKDUpgradeScore**: Manages the upgrade process for an existing OKD cluster.
|
||||
|
||||
## Cluster Services & Management
|
||||
|
||||
Scores for installing and managing services _inside_ a Kubernetes cluster.
|
||||
|
||||
- **K3DInstallationScore**: Installs and configes a local K3D (k3s-in-docker) cluster. Used by `K8sAnywhereTopology`.
|
||||
- **CertManagerHelmScore**: Deploys the `cert-manager` Helm chart.
|
||||
- **ClusterIssuerScore**: Configures a `ClusterIssuer` for `cert-manager`, (e.g., for Let's Encrypt).
|
||||
- **K8sNamespaceScore**: Ensures a Kubernetes namespace exists.
|
||||
- **K8sDeploymentScore**: Deploys a generic `Deployment` resource to Kubernetes.
|
||||
- **K8sIngressScore**: Configures an `Ingress` resource for a service.
|
||||
|
||||
## Monitoring & Alerting
|
||||
|
||||
Scores for configuring observability, dashboards, and alerts.
|
||||
|
||||
- **ApplicationMonitoringScore**: A generic score to set up monitoring for an application.
|
||||
- **ApplicationRHOBMonitoringScore**: A specialized score for setting up monitoring via the Red Hat Observability stack.
|
||||
- **HelmPrometheusAlertingScore**: Configures Prometheus alerts via a Helm chart.
|
||||
- **K8sPrometheusCRDAlertingScore**: Configures Prometheus alerts using the `PrometheusRule` CRD.
|
||||
- **PrometheusAlertScore**: A generic score for creating a Prometheus alert.
|
||||
- **RHOBAlertingScore**: Configures alerts specifically for the Red Hat Observability stack.
|
||||
- **NtfyScore**: Configures alerts to be sent to a `ntfy.sh` server.
|
||||
|
||||
## Infrastructure & Networking (Bare Metal)
|
||||
|
||||
Low-level scores for managing physical hardware and network services.
|
||||
|
||||
- **DhcpScore**: Configures a DHCP server.
|
||||
- **OKDDhcpScore**: A specialized DHCP configuration for the OKD bootstrap process.
|
||||
- **OKDBootstrapDhcpScore**: Configures DHCP specifically for the bootstrap node.
|
||||
- **DhcpHostBindingScore**: Creates a specific MAC-to-IP binding in the DHCP server.
|
||||
- **DnsScore**: Configures a DNS server.
|
||||
- **OKDDnsScore**: A specialized DNS configuration for the OKD cluster (e.g., `api.*`, `*.apps.*`).
|
||||
- **StaticFilesHttpScore**: Serves a directory of static files (e.g., a documentation site) over HTTP.
|
||||
- **TftpScore**: Configures a TFTP server, typically for serving iPXE boot files.
|
||||
- **IPxeMacBootFileScore**: Assigns a specific iPXE boot file to a MAC address in the TFTP server.
|
||||
- **OKDIpxeScore**: A specialized score for generating the iPXE boot scripts for OKD.
|
||||
- **OPNsenseShellCommandScore**: Executes a shell command on an OPNsense firewall.
|
||||
|
||||
## Infrastructure & Networking (Cluster)
|
||||
|
||||
Network services that run inside the cluster or as part of the topology.
|
||||
|
||||
- **LoadBalancerScore**: Configures a general-purpose load balancer.
|
||||
- **OKDLoadBalancerScore**: Configures the high-availability load balancers for the OKD API and ingress.
|
||||
- **OKDBootstrapLoadBalancerScore**: Configures the load balancer specifically for the bootstrap-time API endpoint.
|
||||
- **K8sIngressScore**: Configures an Ingress controller or resource.
|
||||
- [HighAvailabilityHostNetworkScore](../../harmony/src/modules/okd/host_network.rs): Configures network bonds on a host and the corresponding port-channels on the switch stack for high-availability.
|
||||
|
||||
## Tenant Management
|
||||
|
||||
Scores for managing multi-tenancy within a cluster.
|
||||
|
||||
- **TenantScore**: Creates a new tenant (e.g., a namespace, quotas, network policies).
|
||||
- **TenantCredentialScore**: Generates and provisions credentials for a new tenant.
|
||||
|
||||
## Utility
|
||||
|
||||
Helper scores for discovery and inspection.
|
||||
|
||||
- **LaunchDiscoverInventoryAgentScore**: Launches the agent responsible for the `OKDSetup01InventoryScore`.
|
||||
- **DiscoverHostForRoleScore**: A utility score to find a host matching a specific role in the inventory.
|
||||
- **InspectInventoryScore**: Dumps the discovered inventory for inspection.
|
||||
59
docs/catalogs/topologies.md
Normal file
59
docs/catalogs/topologies.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Topologies Catalog
|
||||
|
||||
A `Topology` is the logical representation of your infrastructure and its `Capabilities`. You select a `Topology` in your Harmony project to define _where_ your `Scores` will be applied.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Topologies Catalog](#topologies-catalog)
|
||||
- [HAClusterTopology](#haclustertopology)
|
||||
- [K8sAnywhereTopology](#k8sanywheretopology)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
### HAClusterTopology
|
||||
|
||||
- **`HAClusterTopology::autoload()`**
|
||||
|
||||
This `Topology` represents a high-availability, bare-metal cluster. It is designed for production-grade deployments like OKD.
|
||||
|
||||
It models an environment consisting of:
|
||||
|
||||
- At least 3 cluster nodes (for control plane/workers)
|
||||
- 2 redundant firewalls (e.g., OPNsense)
|
||||
- 2 redundant network switches
|
||||
|
||||
**Provided Capabilities:**
|
||||
This topology provides a rich set of capabilities required for bare-metal provisioning and cluster management, including:
|
||||
|
||||
- `K8sClient` (once the cluster is bootstrapped)
|
||||
- `DnsServer`
|
||||
- `LoadBalancer`
|
||||
- `DhcpServer`
|
||||
- `TftpServer`
|
||||
- `Router` (via the firewalls)
|
||||
- `Switch`
|
||||
- `NetworkManager` (for host-level network config)
|
||||
|
||||
---
|
||||
|
||||
### K8sAnywhereTopology
|
||||
|
||||
- **`K8sAnywhereTopology::from_env()`**
|
||||
|
||||
This `Topology` is designed for development and application deployment. It provides a simple, abstract way to deploy to _any_ Kubernetes cluster.
|
||||
|
||||
**How it works:**
|
||||
|
||||
1. By default (`from_env()` with no env vars), it automatically provisions a **local K3D (k3s-in-docker) cluster** on your machine. This is perfect for local development and testing.
|
||||
2. If you provide a `KUBECONFIG` environment variable, it will instead connect to that **existing Kubernetes cluster** (e.g., your staging or production OKD cluster).
|
||||
|
||||
This allows you to use the _exact same code_ to deploy your application locally as you do to deploy it to production.
|
||||
|
||||
**Provided Capabilities:**
|
||||
|
||||
- `K8sClient`
|
||||
- `HelmCommand`
|
||||
- `TenantManager`
|
||||
- `Ingress`
|
||||
- `Monitoring`
|
||||
- ...and more.
|
||||
40
docs/concepts.md
Normal file
40
docs/concepts.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Core Concepts
|
||||
|
||||
Harmony's design is based on a few key concepts. Understanding them is the key to unlocking the framework's power.
|
||||
|
||||
### 1. Score
|
||||
|
||||
- **What it is:** A **Score** is a declarative description of a desired state. It's a "resource" that defines _what_ you want to achieve, not _how_ to do it.
|
||||
- **Example:** `ApplicationScore` declares "I want this web application to be running and monitored."
|
||||
|
||||
### 2. Topology
|
||||
|
||||
- **What it is:** A **Topology** is the logical representation of your infrastructure and its abilities. It's the "where" your Scores will be applied.
|
||||
- **Key Job:** A Topology's most important job is to expose which `Capabilities` it supports.
|
||||
- **Example:** `HAClusterTopology` represents a bare-metal cluster and exposes `Capabilities` like `NetworkManager` and `Switch`. `K8sAnywhereTopology` represents a Kubernetes cluster and exposes the `K8sClient` `Capability`.
|
||||
|
||||
### 3. Capability
|
||||
|
||||
- **What it is:** A **Capability** is a specific feature or API that a `Topology` offers. It's the "how" a `Topology` can fulfill a `Score`'s request.
|
||||
- **Example:** The `K8sClient` capability offers a way to interact with a Kubernetes API. The `Switch` capability offers a way to configure a physical network switch.
|
||||
|
||||
### 4. Interpret
|
||||
|
||||
- **What it is:** An **Interpret** is the execution logic that makes a `Score` a reality. It's the "glue" that connects the _desired state_ (`Score`) to the _environment's abilities_ (`Topology`'s `Capabilities`).
|
||||
- **How it works:** When you apply a `Score`, Harmony finds the matching `Interpret` for your `Topology`. This `Interpret` then uses the `Capabilities` provided by the `Topology` to execute the necessary steps.
|
||||
|
||||
### 5. Inventory
|
||||
|
||||
- **What it is:** An **Inventory** is the physical material (the "what") used in a cluster. This is most relevant for bare-metal or on-premise topologies.
|
||||
- **Example:** A list of nodes with their roles (control plane, worker), CPU, RAM, and network interfaces. For the `K8sAnywhereTopology`, the inventory might be empty or autoloaded, as the infrastructure is more abstract.
|
||||
|
||||
---
|
||||
|
||||
### How They Work Together (The Compile-Time Check)
|
||||
|
||||
1. You **write a `Score`** (e.g., `ApplicationScore`).
|
||||
2. Your `Score`'s `Interpret` logic requires certain **`Capabilities`** (e.g., `K8sClient` and `Ingress`).
|
||||
3. You choose a **`Topology`** to run it on (e.g., `HAClusterTopology`).
|
||||
4. **At compile-time**, Harmony checks: "Does `HAClusterTopology` provide the `K8sClient` and `Ingress` capabilities that `ApplicationScore` needs?"
|
||||
- **If Yes:** Your code compiles. You can be confident it will run.
|
||||
- **If No:** The compiler gives you an error. You've just prevented a "config-is-valid-but-platform-is-wrong" runtime error before you even deployed.
|
||||
42
docs/guides/getting-started.md
Normal file
42
docs/guides/getting-started.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Getting Started Guide
|
||||
|
||||
Welcome to Harmony! This guide will walk you through installing the Harmony framework, setting up a new project, and deploying your first application.
|
||||
|
||||
We will build and deploy the "Rust Web App" example, which automatically:
|
||||
|
||||
1. Provisions a local K3D (Kubernetes in Docker) cluster.
|
||||
2. Deploys a sample Rust web application.
|
||||
3. Sets up monitoring for the application.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before you begin, you'll need a few tools installed on your system:
|
||||
|
||||
- **Rust & Cargo:** [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
- **Docker:** [Install Docker](https://docs.docker.com/get-docker/) (Required for the K3D local cluster)
|
||||
- **kubectl:** [Install kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) (For inspecting the cluster)
|
||||
|
||||
## 1. Install Harmony
|
||||
|
||||
First, clone the Harmony repository and build the project. This gives you the `harmony` CLI and all the core libraries.
|
||||
|
||||
```bash
|
||||
# Clone the main repository
|
||||
git clone https://git.nationtech.io/nationtech/harmony
|
||||
cd harmony
|
||||
|
||||
# Build the project (this may take a few minutes)
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
...
|
||||
|
||||
## Next Steps
|
||||
|
||||
Congratulations, you've just deployed an application using true infrastructure-as-code!
|
||||
|
||||
From here, you can:
|
||||
|
||||
- [Explore the Catalogs](../catalogs/README.md): See what other [Scores](../catalogs/scores.md) and [Topologies](../catalogs/topologies.md) are available.
|
||||
- [Read the Use Cases](../use-cases/README.md): Check out the [OKD on Bare Metal](./use-cases/okd-on-bare-metal.md) guide for a more advanced scenario.
|
||||
- [Write your own Score](../guides/writing-a-score.md): Dive into the [Developer Guide](./guides/developer-guide.md) to start building your own components.
|
||||
@@ -1,22 +1,28 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use brocade::{BrocadeOptions, PortOperatingMode};
|
||||
use harmony::{
|
||||
data::Version,
|
||||
infra::brocade::BrocadeSwitchClient,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
infra::brocade::BrocadeSwitchConfig,
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HostNetworkConfig, PortConfig, PreparationError, PreparationOutcome, Switch, SwitchClient,
|
||||
SwitchError, Topology,
|
||||
},
|
||||
modules::brocade::{BrocadeSwitchAuth, BrocadeSwitchScore, SwitchTopology},
|
||||
};
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{id::Id, net::MacAddress, switch::PortLocation};
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
use harmony_types::{id::Id, switch::PortLocation};
|
||||
|
||||
fn get_switch_config() -> BrocadeSwitchConfig {
|
||||
let mut options = BrocadeOptions::default();
|
||||
options.ssh.port = 2222;
|
||||
let auth = BrocadeSwitchAuth {
|
||||
username: "admin".to_string(),
|
||||
password: "password".to_string(),
|
||||
};
|
||||
|
||||
BrocadeSwitchConfig {
|
||||
ips: vec![ip!("127.0.0.1")],
|
||||
auth,
|
||||
options,
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
@@ -32,126 +38,13 @@ async fn main() {
|
||||
(PortLocation(1, 0, 18), PortOperatingMode::Trunk),
|
||||
],
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
SwitchTopology::new().await,
|
||||
SwitchTopology::new(get_switch_config()).await,
|
||||
vec![Box::new(switch_score)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
struct BrocadeSwitchScore {
|
||||
port_channels_to_clear: Vec<Id>,
|
||||
ports_to_configure: Vec<PortConfig>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Switch> Score<T> for BrocadeSwitchScore {
|
||||
fn name(&self) -> String {
|
||||
"BrocadeSwitchScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(BrocadeSwitchInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct BrocadeSwitchInterpret {
|
||||
score: BrocadeSwitchScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + Switch> Interpret<T> for BrocadeSwitchInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Applying switch configuration {:?}", self.score);
|
||||
debug!(
|
||||
"Clearing port channel {:?}",
|
||||
self.score.port_channels_to_clear
|
||||
);
|
||||
topology
|
||||
.clear_port_channel(&self.score.port_channels_to_clear)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
debug!("Configuring interfaces {:?}", self.score.ports_to_configure);
|
||||
topology
|
||||
.configure_interface(&self.score.ports_to_configure)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success("switch configured".to_string()))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("BrocadeSwitchInterpret")
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
struct SwitchTopology {
|
||||
client: Box<dyn SwitchClient>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Topology for SwitchTopology {
|
||||
fn name(&self) -> &str {
|
||||
"SwitchTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
impl SwitchTopology {
|
||||
async fn new() -> Self {
|
||||
let mut options = BrocadeOptions::default();
|
||||
options.ssh.port = 2222;
|
||||
let client =
|
||||
BrocadeSwitchClient::init(&vec![ip!("127.0.0.1")], &"admin", &"password", options)
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
let client = Box::new(client);
|
||||
Self { client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Switch for SwitchTopology {
|
||||
async fn setup_switch(&self) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_port_for_mac_address(
|
||||
&self,
|
||||
_mac_address: &MacAddress,
|
||||
) -> Result<Option<PortLocation>, SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn configure_port_channel(&self, _config: &HostNetworkConfig) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
self.client.clear_port_channel(ids).await
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
self.client.configure_interface(ports).await
|
||||
}
|
||||
}
|
||||
|
||||
20
examples/k8s_drain_node/Cargo.toml
Normal file
20
examples/k8s_drain_node/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "example-k8s-drain-node"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
cidr.workspace = true
|
||||
tokio.workspace = true
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log.workspace = true
|
||||
env_logger.workspace = true
|
||||
url.workspace = true
|
||||
assert_cmd = "2.0.16"
|
||||
inquire.workspace = true
|
||||
61
examples/k8s_drain_node/src/main.rs
Normal file
61
examples/k8s_drain_node/src/main.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use harmony::topology::k8s::{DrainOptions, K8sClient};
|
||||
use log::{info, trace};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
env_logger::init();
|
||||
let k8s = K8sClient::try_default().await.unwrap();
|
||||
let nodes = k8s.get_nodes(None).await.unwrap();
|
||||
trace!("Got nodes : {nodes:#?}");
|
||||
let node_names = nodes
|
||||
.iter()
|
||||
.map(|n| n.metadata.name.as_ref().unwrap())
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
info!("Got nodes : {:?}", node_names);
|
||||
|
||||
let node_name = inquire::Select::new("What node do you want to operate on?", node_names)
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
let drain = inquire::Confirm::new("Do you wish to drain the node now ?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if drain {
|
||||
let mut options = DrainOptions::default_ignore_daemonset_delete_emptydir_data();
|
||||
options.timeout = Duration::from_secs(1);
|
||||
k8s.drain_node(&node_name, &options).await.unwrap();
|
||||
|
||||
info!("Node {node_name} successfully drained");
|
||||
}
|
||||
|
||||
let uncordon =
|
||||
inquire::Confirm::new("Do you wish to uncordon node to resume scheduling workloads now?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if uncordon {
|
||||
info!("Uncordoning node {node_name}");
|
||||
k8s.uncordon_node(node_name).await.unwrap();
|
||||
info!("Node {node_name} uncordoned");
|
||||
}
|
||||
|
||||
let reboot = inquire::Confirm::new("Do you wish to reboot node now?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if reboot {
|
||||
k8s.reboot_node(
|
||||
&node_name,
|
||||
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
|
||||
Duration::from_secs(3600),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
info!("All done playing with nodes, happy harmonizing!");
|
||||
}
|
||||
20
examples/k8s_write_file_on_node/Cargo.toml
Normal file
20
examples/k8s_write_file_on_node/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "example-k8s-write-file-on-node"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
cidr.workspace = true
|
||||
tokio.workspace = true
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log.workspace = true
|
||||
env_logger.workspace = true
|
||||
url.workspace = true
|
||||
assert_cmd = "2.0.16"
|
||||
inquire.workspace = true
|
||||
45
examples/k8s_write_file_on_node/src/main.rs
Normal file
45
examples/k8s_write_file_on_node/src/main.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use harmony::topology::k8s::{DrainOptions, K8sClient, NodeFile};
|
||||
use log::{info, trace};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
env_logger::init();
|
||||
let k8s = K8sClient::try_default().await.unwrap();
|
||||
let nodes = k8s.get_nodes(None).await.unwrap();
|
||||
trace!("Got nodes : {nodes:#?}");
|
||||
let node_names = nodes
|
||||
.iter()
|
||||
.map(|n| n.metadata.name.as_ref().unwrap())
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
info!("Got nodes : {:?}", node_names);
|
||||
|
||||
let node = inquire::Select::new("What node do you want to write file to?", node_names)
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
let path = inquire::Text::new("File path on node").prompt().unwrap();
|
||||
let content = inquire::Text::new("File content").prompt().unwrap();
|
||||
|
||||
let node_file = NodeFile {
|
||||
path: path,
|
||||
content: content,
|
||||
mode: 0o600,
|
||||
};
|
||||
|
||||
k8s.write_files_to_node(&node, &vec![node_file.clone()])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let cmd = inquire::Text::new("Command to run on node")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
k8s.run_privileged_command_on_node(&node, &cmd)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
info!(
|
||||
"File {} mode {} written in node {node}",
|
||||
node_file.path, node_file.mode
|
||||
);
|
||||
}
|
||||
@@ -215,7 +215,7 @@ fn site(
|
||||
dns_name: format!("{cluster_name}-gw.{domain}"),
|
||||
supercluster_ca_secret_name: "nats-supercluster-ca-bundle",
|
||||
tls_cert_name: "nats-gateway",
|
||||
jetstream_enabled: "false",
|
||||
jetstream_enabled: "true",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
16
examples/node_health/Cargo.toml
Normal file
16
examples/node_health/Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "example-node-health"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
tokio = { workspace = true }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
17
examples/node_health/src/main.rs
Normal file
17
examples/node_health/src/main.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
use harmony::{
|
||||
inventory::Inventory, modules::node_health::NodeHealthScore, topology::K8sAnywhereTopology,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let node_health = NodeHealthScore {};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
vec![Box::new(node_health)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
@@ -2,8 +2,12 @@ use brocade::BrocadeOptions;
|
||||
use cidr::Ipv4Cidr;
|
||||
use harmony::{
|
||||
hardware::{Location, SwitchGroup},
|
||||
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
|
||||
infra::{
|
||||
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
opnsense::OPNSenseManagementInterface,
|
||||
},
|
||||
inventory::Inventory,
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
|
||||
};
|
||||
use harmony_macros::{ip, ipv4};
|
||||
@@ -36,12 +40,11 @@ pub async fn get_topology() -> HAClusterTopology {
|
||||
dry_run: *harmony::config::DRY_RUN,
|
||||
..Default::default()
|
||||
};
|
||||
let switch_client = BrocadeSwitchClient::init(
|
||||
&switches,
|
||||
&switch_auth.username,
|
||||
&switch_auth.password,
|
||||
brocade_options,
|
||||
)
|
||||
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
|
||||
ips: switches,
|
||||
auth: switch_auth,
|
||||
options: brocade_options,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
@@ -103,9 +106,3 @@ pub fn get_inventory() -> Inventory {
|
||||
control_plane_host: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
@@ -3,14 +3,16 @@ use cidr::Ipv4Cidr;
|
||||
use harmony::{
|
||||
config::secret::OPNSenseFirewallCredentials,
|
||||
hardware::{Location, SwitchGroup},
|
||||
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
|
||||
infra::{
|
||||
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
opnsense::OPNSenseManagementInterface,
|
||||
},
|
||||
inventory::Inventory,
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
|
||||
};
|
||||
use harmony_macros::{ip, ipv4};
|
||||
use harmony_secret::{Secret, SecretManager};
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use harmony_secret::SecretManager;
|
||||
use std::{
|
||||
net::IpAddr,
|
||||
sync::{Arc, OnceLock},
|
||||
@@ -31,12 +33,11 @@ pub async fn get_topology() -> HAClusterTopology {
|
||||
dry_run: *harmony::config::DRY_RUN,
|
||||
..Default::default()
|
||||
};
|
||||
let switch_client = BrocadeSwitchClient::init(
|
||||
&switches,
|
||||
&switch_auth.username,
|
||||
&switch_auth.password,
|
||||
brocade_options,
|
||||
)
|
||||
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
|
||||
ips: switches,
|
||||
auth: switch_auth,
|
||||
options: brocade_options,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
@@ -98,9 +99,3 @@ pub fn get_inventory() -> Inventory {
|
||||
control_plane_host: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
@@ -1,63 +1,13 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::helm::chart::{HelmChartScore, HelmRepository, NonBlankString},
|
||||
topology::K8sAnywhereTopology,
|
||||
inventory::Inventory, modules::openbao::OpenbaoScore, topology::K8sAnywhereTopology,
|
||||
};
|
||||
use harmony_macros::hurl;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let values_yaml = Some(
|
||||
r#"server:
|
||||
standalone:
|
||||
enabled: true
|
||||
config: |
|
||||
listener "tcp" {
|
||||
tls_disable = true
|
||||
address = "[::]:8200"
|
||||
cluster_address = "[::]:8201"
|
||||
}
|
||||
|
||||
storage "file" {
|
||||
path = "/openbao/data"
|
||||
}
|
||||
|
||||
service:
|
||||
enabled: true
|
||||
|
||||
dataStorage:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClass: null
|
||||
accessMode: ReadWriteOnce
|
||||
|
||||
auditStorage:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClass: null
|
||||
accessMode: ReadWriteOnce"#
|
||||
.to_string(),
|
||||
);
|
||||
let openbao = HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str("openbao").unwrap()),
|
||||
release_name: NonBlankString::from_str("openbao").unwrap(),
|
||||
chart_name: NonBlankString::from_str("openbao/openbao").unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml,
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: Some(HelmRepository::new(
|
||||
"openbao".to_string(),
|
||||
hurl!("https://openbao.github.io/openbao-helm"),
|
||||
true,
|
||||
)),
|
||||
let openbao = OpenbaoScore {
|
||||
host: String::new(),
|
||||
};
|
||||
|
||||
// TODO exec pod commands to initialize secret store if not already done
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
|
||||
@@ -5,6 +5,10 @@ version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[[example]]
|
||||
name = "try_rust_webapp"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
|
||||
@@ -108,11 +108,18 @@ impl PhysicalHost {
|
||||
};
|
||||
|
||||
let storage_summary = if drive_count > 1 {
|
||||
let drive_sizes = self
|
||||
.storage
|
||||
.iter()
|
||||
.map(|d| format_storage(d.size_bytes))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
format!(
|
||||
"{} Storage ({}x {})",
|
||||
"{} Storage ({} Disks [{}])",
|
||||
format_storage(total_storage_bytes),
|
||||
drive_count,
|
||||
first_drive_model
|
||||
drive_sizes
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use async_trait::async_trait;
|
||||
use brocade::PortOperatingMode;
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{
|
||||
id::Id,
|
||||
@@ -9,9 +8,9 @@ use harmony_types::{
|
||||
use log::debug;
|
||||
use log::info;
|
||||
|
||||
use crate::topology::{HelmCommand, PxeOptions};
|
||||
use crate::{data::FileContent, executors::ExecutorError, topology::node_exporter::NodeExporter};
|
||||
use crate::{infra::network_manager::OpenShiftNmStateNetworkManager, topology::PortConfig};
|
||||
use crate::{modules::inventory::HarmonyDiscoveryStrategy, topology::PxeOptions};
|
||||
|
||||
use super::{
|
||||
DHCPStaticEntry, DhcpServer, DnsRecord, DnsRecordType, DnsServer, Firewall, HostNetworkConfig,
|
||||
@@ -19,7 +18,10 @@ use super::{
|
||||
NetworkManager, PreparationError, PreparationOutcome, Router, Switch, SwitchClient,
|
||||
SwitchError, TftpServer, Topology, k8s::K8sClient,
|
||||
};
|
||||
use std::sync::{Arc, OnceLock};
|
||||
use std::{
|
||||
process::Command,
|
||||
sync::{Arc, OnceLock},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HAClusterTopology {
|
||||
@@ -53,6 +55,30 @@ impl Topology for HAClusterTopology {
|
||||
}
|
||||
}
|
||||
|
||||
impl HelmCommand for HAClusterTopology {
|
||||
fn get_helm_command(&self) -> Command {
|
||||
let mut cmd = Command::new("helm");
|
||||
if let Some(k) = &self.kubeconfig {
|
||||
cmd.args(["--kubeconfig", k]);
|
||||
}
|
||||
|
||||
// FIXME we should support context anywhere there is a k8sclient
|
||||
// This likely belongs in the k8sclient itself and should be extracted to a separate
|
||||
// crate
|
||||
//
|
||||
// I feel like helm could very well be a feature of this external k8s client.
|
||||
//
|
||||
// Same for kustomize
|
||||
//
|
||||
// if let Some(c) = &self.k8s_context {
|
||||
// cmd.args(["--kube-context", c]);
|
||||
// }
|
||||
|
||||
info!("Using helm command {cmd:?}");
|
||||
cmd
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl K8sclient for HAClusterTopology {
|
||||
async fn k8s_client(&self) -> Result<Arc<K8sClient>, String> {
|
||||
@@ -301,10 +327,10 @@ impl Switch for HAClusterTopology {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
async fn clear_port_channel(&self, _ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
async fn configure_interface(&self, _ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -322,7 +348,15 @@ impl NetworkManager for HAClusterTopology {
|
||||
self.network_manager().await.configure_bond(config).await
|
||||
}
|
||||
|
||||
//TODO add snmp here
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
self.network_manager()
|
||||
.await
|
||||
.configure_bond_on_primary_interface(config)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -562,10 +596,10 @@ impl SwitchClient for DummyInfra {
|
||||
) -> Result<u8, SwitchError> {
|
||||
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
async fn clear_port_channel(&self, _ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
async fn configure_interface(&self, _ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
133
harmony/src/domain/topology/k8s/bundle.rs
Normal file
133
harmony/src/domain/topology/k8s/bundle.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
//! Resource Bundle Pattern Implementation
|
||||
//!
|
||||
//! This module implements the Resource Bundle pattern for managing groups of
|
||||
//! Kubernetes resources that form a logical unit of work.
|
||||
//!
|
||||
//! ## Purpose
|
||||
//!
|
||||
//! The ResourceBundle pattern addresses the need to manage ephemeral privileged
|
||||
//! pods along with their platform-specific security requirements (e.g., OpenShift
|
||||
//! Security Context Constraints).
|
||||
//!
|
||||
//! ## Use Cases
|
||||
//!
|
||||
//! - Writing files to node filesystems (e.g., NetworkManager configurations for
|
||||
//! network bonding as described in ADR-019)
|
||||
//! - Running privileged commands on nodes (e.g., reboots, system configuration)
|
||||
//!
|
||||
//! ## Benefits
|
||||
//!
|
||||
//! - **Separation of Concerns**: Client code doesn't need to know about
|
||||
//! platform-specific RBAC requirements
|
||||
//! - **Atomic Operations**: Resources are applied and deleted as a unit
|
||||
//! - **Clean Abstractions**: Privileged operations are encapsulated in bundles
|
||||
//! rather than scattered throughout client methods
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```rust,no_run
|
||||
//! use harmony::topology::k8s::{K8sClient, helper};
|
||||
//! use harmony::topology::KubernetesDistribution;
|
||||
//!
|
||||
//! async fn write_network_config(client: &K8sClient, node: &str) {
|
||||
//! // Create a bundle with platform-specific RBAC
|
||||
//! let bundle = helper::build_privileged_bundle(
|
||||
//! helper::PrivilegedPodConfig {
|
||||
//! name: "network-config".to_string(),
|
||||
//! namespace: "default".to_string(),
|
||||
//! node_name: node.to_string(),
|
||||
//! // ... other config
|
||||
//! ..Default::default()
|
||||
//! },
|
||||
//! &KubernetesDistribution::OpenshiftFamily,
|
||||
//! );
|
||||
//!
|
||||
//! // Apply all resources (RBAC + Pod) atomically
|
||||
//! bundle.apply(client).await.unwrap();
|
||||
//!
|
||||
//! // ... wait for completion ...
|
||||
//!
|
||||
//! // Cleanup all resources
|
||||
//! bundle.delete(client).await.unwrap();
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use kube::{Error, Resource, ResourceExt, api::DynamicObject};
|
||||
use serde::Serialize;
|
||||
use serde_json;
|
||||
|
||||
use crate::domain::topology::k8s::K8sClient;
|
||||
|
||||
/// A ResourceBundle represents a logical unit of work consisting of multiple
|
||||
/// Kubernetes resources that should be applied or deleted together.
|
||||
///
|
||||
/// This pattern is useful for managing ephemeral privileged pods along with
|
||||
/// their required RBAC bindings (e.g., OpenShift SCC bindings).
|
||||
#[derive(Debug)]
|
||||
pub struct ResourceBundle {
|
||||
pub resources: Vec<DynamicObject>,
|
||||
}
|
||||
|
||||
impl ResourceBundle {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
resources: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a Kubernetes resource to this bundle.
|
||||
/// The resource is converted to a DynamicObject for generic handling.
|
||||
pub fn add<K>(&mut self, resource: K)
|
||||
where
|
||||
K: Resource + Serialize,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
// Convert the typed resource to JSON, then to DynamicObject
|
||||
let json = serde_json::to_value(&resource).expect("Failed to serialize resource");
|
||||
let mut obj: DynamicObject =
|
||||
serde_json::from_value(json).expect("Failed to convert to DynamicObject");
|
||||
|
||||
// Ensure type metadata is set
|
||||
if obj.types.is_none() {
|
||||
let api_version = Default::default();
|
||||
let kind = Default::default();
|
||||
let gvk = K::api_version(&api_version);
|
||||
let kind = K::kind(&kind);
|
||||
obj.types = Some(kube::api::TypeMeta {
|
||||
api_version: gvk.to_string(),
|
||||
kind: kind.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
self.resources.push(obj);
|
||||
}
|
||||
|
||||
/// Apply all resources in this bundle to the cluster.
|
||||
/// Resources are applied in the order they were added.
|
||||
pub async fn apply(&self, client: &K8sClient) -> Result<(), Error> {
|
||||
for res in &self.resources {
|
||||
let namespace = res.namespace();
|
||||
client
|
||||
.apply_dynamic(res, namespace.as_deref(), true)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete all resources in this bundle from the cluster.
|
||||
/// Resources are deleted in reverse order to respect dependencies.
|
||||
pub async fn delete(&self, client: &K8sClient) -> Result<(), Error> {
|
||||
// FIXME delete all in parallel and retry using kube::client::retry::RetryPolicy
|
||||
for res in self.resources.iter().rev() {
|
||||
let api = client.get_api_for_dynamic_object(res, res.namespace().as_deref())?;
|
||||
let name = res.name_any();
|
||||
// FIXME this swallows all errors. Swallowing a 404 is ok but other errors must be
|
||||
// handled properly (such as retrying). A normal error case is when we delete a
|
||||
// resource bundle with dependencies between various resources. Such as a pod with a
|
||||
// dependency on a ClusterRoleBinding. Trying to delete the ClusterRoleBinding first
|
||||
// is expected to fail
|
||||
let _ = api.delete(&name, &kube::api::DeleteParams::default()).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
1
harmony/src/domain/topology/k8s/config.rs
Normal file
1
harmony/src/domain/topology/k8s/config.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub const PRIVILEGED_POD_IMAGE: &str = "hub.nationtech.io/redhat/ubi10:latest";
|
||||
613
harmony/src/domain/topology/k8s/helper.rs
Normal file
613
harmony/src/domain/topology/k8s/helper.rs
Normal file
@@ -0,0 +1,613 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::topology::KubernetesDistribution;
|
||||
|
||||
use super::bundle::ResourceBundle;
|
||||
use super::config::PRIVILEGED_POD_IMAGE;
|
||||
use k8s_openapi::api::core::v1::{
|
||||
Container, HostPathVolumeSource, Pod, PodSpec, SecurityContext, Volume, VolumeMount,
|
||||
};
|
||||
use k8s_openapi::api::rbac::v1::{ClusterRoleBinding, RoleRef, Subject};
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
|
||||
use kube::api::DynamicObject;
|
||||
use kube::error::DiscoveryError;
|
||||
use log::{debug, error, info, warn};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PrivilegedPodConfig {
|
||||
pub name: String,
|
||||
pub namespace: String,
|
||||
pub node_name: String,
|
||||
pub container_name: String,
|
||||
pub command: Vec<String>,
|
||||
pub volumes: Vec<Volume>,
|
||||
pub volume_mounts: Vec<VolumeMount>,
|
||||
pub host_pid: bool,
|
||||
pub host_network: bool,
|
||||
}
|
||||
|
||||
impl Default for PrivilegedPodConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: "privileged-pod".to_string(),
|
||||
namespace: "harmony".to_string(),
|
||||
node_name: "".to_string(),
|
||||
container_name: "privileged-container".to_string(),
|
||||
command: vec![],
|
||||
volumes: vec![],
|
||||
volume_mounts: vec![],
|
||||
host_pid: false,
|
||||
host_network: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_privileged_pod(
|
||||
config: PrivilegedPodConfig,
|
||||
k8s_distribution: &KubernetesDistribution,
|
||||
) -> Pod {
|
||||
let annotations = match k8s_distribution {
|
||||
KubernetesDistribution::OpenshiftFamily => Some(BTreeMap::from([
|
||||
("openshift.io/scc".to_string(), "privileged".to_string()),
|
||||
(
|
||||
"openshift.io/required-scc".to_string(),
|
||||
"privileged".to_string(),
|
||||
),
|
||||
])),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Pod {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(config.name),
|
||||
namespace: Some(config.namespace),
|
||||
annotations,
|
||||
..Default::default()
|
||||
},
|
||||
spec: Some(PodSpec {
|
||||
node_name: Some(config.node_name),
|
||||
restart_policy: Some("Never".to_string()),
|
||||
host_pid: Some(config.host_pid),
|
||||
host_network: Some(config.host_network),
|
||||
containers: vec![Container {
|
||||
name: config.container_name,
|
||||
image: Some(PRIVILEGED_POD_IMAGE.to_string()),
|
||||
command: Some(config.command),
|
||||
security_context: Some(SecurityContext {
|
||||
privileged: Some(true),
|
||||
..Default::default()
|
||||
}),
|
||||
volume_mounts: Some(config.volume_mounts),
|
||||
..Default::default()
|
||||
}],
|
||||
volumes: Some(config.volumes),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn host_root_volume() -> (Volume, VolumeMount) {
|
||||
(
|
||||
Volume {
|
||||
name: "host".to_string(),
|
||||
host_path: Some(HostPathVolumeSource {
|
||||
path: "/".to_string(),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
VolumeMount {
|
||||
name: "host".to_string(),
|
||||
mount_path: "/host".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Build a ResourceBundle containing a privileged pod and any required RBAC.
|
||||
///
|
||||
/// This function implements the Resource Bundle pattern to encapsulate platform-specific
|
||||
/// security requirements for running privileged operations on nodes.
|
||||
///
|
||||
/// # Platform-Specific Behavior
|
||||
///
|
||||
/// - **OpenShift**: Creates a ClusterRoleBinding to grant the default ServiceAccount
|
||||
/// access to the `system:openshift:scc:privileged` ClusterRole, which allows the pod
|
||||
/// to use the privileged Security Context Constraint (SCC).
|
||||
/// - **Standard Kubernetes/K3s**: Only creates the Pod resource, as these distributions
|
||||
/// use standard PodSecurityPolicy or don't enforce additional security constraints.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `config` - Configuration for the privileged pod (name, namespace, command, etc.)
|
||||
/// * `k8s_distribution` - The detected Kubernetes distribution to determine RBAC requirements
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A `ResourceBundle` containing 1-2 resources:
|
||||
/// - ClusterRoleBinding (OpenShift only)
|
||||
/// - Pod (all distributions)
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust,no_run
|
||||
/// # use harmony::topology::k8s::helper::{build_privileged_bundle, PrivilegedPodConfig};
|
||||
/// # use harmony::topology::KubernetesDistribution;
|
||||
/// let bundle = build_privileged_bundle(
|
||||
/// PrivilegedPodConfig {
|
||||
/// name: "network-setup".to_string(),
|
||||
/// namespace: "default".to_string(),
|
||||
/// node_name: "worker-01".to_string(),
|
||||
/// container_name: "setup".to_string(),
|
||||
/// command: vec!["nmcli".to_string(), "connection".to_string(), "reload".to_string()],
|
||||
/// ..Default::default()
|
||||
/// },
|
||||
/// &KubernetesDistribution::OpenshiftFamily,
|
||||
/// );
|
||||
/// // Bundle now contains ClusterRoleBinding + Pod
|
||||
/// ```
|
||||
pub fn build_privileged_bundle(
|
||||
config: PrivilegedPodConfig,
|
||||
k8s_distribution: &KubernetesDistribution,
|
||||
) -> ResourceBundle {
|
||||
debug!(
|
||||
"Building privileged bundle for config {config:#?} on distribution {k8s_distribution:?}"
|
||||
);
|
||||
let mut bundle = ResourceBundle::new();
|
||||
let pod_name = config.name.clone();
|
||||
let namespace = config.namespace.clone();
|
||||
|
||||
// 1. On OpenShift, create RBAC binding to privileged SCC
|
||||
if let KubernetesDistribution::OpenshiftFamily = k8s_distribution {
|
||||
// The default ServiceAccount needs to be bound to the privileged SCC
|
||||
// via the system:openshift:scc:privileged ClusterRole
|
||||
let crb = ClusterRoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("{}-scc-binding", pod_name)),
|
||||
..Default::default()
|
||||
},
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".to_string(),
|
||||
kind: "ClusterRole".to_string(),
|
||||
name: "system:openshift:scc:privileged".to_string(),
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".to_string(),
|
||||
name: "default".to_string(),
|
||||
namespace: Some(namespace.clone()),
|
||||
api_group: None,
|
||||
..Default::default()
|
||||
}]),
|
||||
};
|
||||
bundle.add(crb);
|
||||
}
|
||||
|
||||
// 2. Build the privileged pod
|
||||
let pod = build_privileged_pod(config, k8s_distribution);
|
||||
bundle.add(pod);
|
||||
|
||||
bundle
|
||||
}
|
||||
|
||||
/// Action to take when a drain operation times out.
|
||||
pub enum DrainTimeoutAction {
|
||||
/// Accept the partial drain and continue
|
||||
Accept,
|
||||
/// Retry the drain for another timeout period
|
||||
Retry,
|
||||
/// Abort the drain operation
|
||||
Abort,
|
||||
}
|
||||
|
||||
/// Prompts the user to confirm acceptance of a partial drain.
|
||||
///
|
||||
/// Returns `Ok(true)` if the user confirms acceptance, `Ok(false)` if the user
|
||||
/// chooses to retry or abort, and `Err` if the prompt system fails entirely.
|
||||
pub fn prompt_drain_timeout_action(
|
||||
node_name: &str,
|
||||
pending_count: usize,
|
||||
timeout_duration: Duration,
|
||||
) -> Result<DrainTimeoutAction, kube::Error> {
|
||||
let prompt_msg = format!(
|
||||
"Drain operation timed out on node '{}' with {} pod(s) remaining. What would you like to do?",
|
||||
node_name, pending_count
|
||||
);
|
||||
|
||||
loop {
|
||||
let choices = vec![
|
||||
"Accept drain failure (requires confirmation)".to_string(),
|
||||
format!("Retry drain for another {:?}", timeout_duration),
|
||||
"Abort operation".to_string(),
|
||||
];
|
||||
|
||||
let selection = inquire::Select::new(&prompt_msg, choices)
|
||||
.with_help_message("Use arrow keys to navigate, Enter to select")
|
||||
.prompt()
|
||||
.map_err(|e| {
|
||||
kube::Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Prompt failed: {}",
|
||||
e
|
||||
)))
|
||||
})?;
|
||||
|
||||
if selection.starts_with("Accept") {
|
||||
// Require typed confirmation - retry until correct or user cancels
|
||||
let required_confirmation = format!("yes-accept-drain:{}={}", node_name, pending_count);
|
||||
|
||||
let confirmation_prompt = format!(
|
||||
"To accept this partial drain, type exactly: {}",
|
||||
required_confirmation
|
||||
);
|
||||
|
||||
match inquire::Text::new(&confirmation_prompt)
|
||||
.with_help_message(&format!(
|
||||
"This action acknowledges {} pods will remain on the node",
|
||||
pending_count
|
||||
))
|
||||
.prompt()
|
||||
{
|
||||
Ok(input) if input == required_confirmation => {
|
||||
warn!(
|
||||
"User accepted partial drain of node '{}' with {} pods remaining (confirmation: {})",
|
||||
node_name, pending_count, required_confirmation
|
||||
);
|
||||
return Ok(DrainTimeoutAction::Accept);
|
||||
}
|
||||
Ok(input) => {
|
||||
warn!(
|
||||
"Confirmation failed. Expected '{}', got '{}'. Please try again.",
|
||||
required_confirmation, input
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
// User cancelled (Ctrl+C) or prompt system failed
|
||||
error!("Confirmation prompt cancelled or failed: {}", e);
|
||||
return Ok(DrainTimeoutAction::Abort);
|
||||
}
|
||||
}
|
||||
} else if selection.starts_with("Retry") {
|
||||
info!(
|
||||
"User chose to retry drain operation for another {:?}",
|
||||
timeout_duration
|
||||
);
|
||||
return Ok(DrainTimeoutAction::Retry);
|
||||
} else {
|
||||
error!("Drain operation aborted by user");
|
||||
return Ok(DrainTimeoutAction::Abort);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// JSON round-trip: DynamicObject → K
|
||||
///
|
||||
/// Safe because the DynamicObject was produced by the apiserver from a
|
||||
/// payload that was originally serialized from K, so the schema is identical.
|
||||
pub(crate) fn dyn_to_typed<K: DeserializeOwned>(obj: DynamicObject) -> Result<K, kube::Error> {
|
||||
serde_json::to_value(obj)
|
||||
.and_then(serde_json::from_value)
|
||||
.map_err(kube::Error::SerdeError)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_host_root_volume() {
|
||||
let (volume, mount) = host_root_volume();
|
||||
|
||||
assert_eq!(volume.name, "host");
|
||||
assert_eq!(volume.host_path.as_ref().unwrap().path, "/");
|
||||
|
||||
assert_eq!(mount.name, "host");
|
||||
assert_eq!(mount.mount_path, "/host");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_minimal() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "minimal-pod".to_string(),
|
||||
namespace: "kube-system".to_string(),
|
||||
node_name: "node-123".to_string(),
|
||||
container_name: "debug-container".to_string(),
|
||||
command: vec!["sleep".to_string(), "3600".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
assert_eq!(pod.metadata.name, Some("minimal-pod".to_string()));
|
||||
assert_eq!(pod.metadata.namespace, Some("kube-system".to_string()));
|
||||
|
||||
let spec = pod.spec.as_ref().expect("Pod spec should be present");
|
||||
assert_eq!(spec.node_name, Some("node-123".to_string()));
|
||||
assert_eq!(spec.restart_policy, Some("Never".to_string()));
|
||||
assert_eq!(spec.host_pid, Some(false));
|
||||
assert_eq!(spec.host_network, Some(false));
|
||||
|
||||
assert_eq!(spec.containers.len(), 1);
|
||||
let container = &spec.containers[0];
|
||||
assert_eq!(container.name, "debug-container");
|
||||
assert_eq!(container.image, Some(PRIVILEGED_POD_IMAGE.to_string()));
|
||||
assert_eq!(
|
||||
container.command,
|
||||
Some(vec!["sleep".to_string(), "3600".to_string()])
|
||||
);
|
||||
|
||||
// Security context check
|
||||
let sec_ctx = container
|
||||
.security_context
|
||||
.as_ref()
|
||||
.expect("Security context missing");
|
||||
assert_eq!(sec_ctx.privileged, Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_with_volumes_and_host_access() {
|
||||
let (host_vol, host_mount) = host_root_volume();
|
||||
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "full-pod".to_string(),
|
||||
namespace: "default".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "runner".to_string(),
|
||||
command: vec!["/bin/sh".to_string()],
|
||||
volumes: vec![host_vol.clone()],
|
||||
volume_mounts: vec![host_mount.clone()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
let spec = pod.spec.as_ref().expect("Pod spec should be present");
|
||||
assert_eq!(spec.host_pid, Some(true));
|
||||
assert_eq!(spec.host_network, Some(true));
|
||||
|
||||
// Check volumes in Spec
|
||||
let volumes = spec.volumes.as_ref().expect("Volumes should be present");
|
||||
assert_eq!(volumes.len(), 1);
|
||||
assert_eq!(volumes[0].name, "host");
|
||||
|
||||
// Check mounts in Container
|
||||
let container = &spec.containers[0];
|
||||
let mounts = container
|
||||
.volume_mounts
|
||||
.as_ref()
|
||||
.expect("Mounts should be present");
|
||||
assert_eq!(mounts.len(), 1);
|
||||
assert_eq!(mounts[0].name, "host");
|
||||
assert_eq!(mounts[0].mount_path, "/host");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_structure_correctness() {
|
||||
// This test validates that the construction logic puts things in the right places
|
||||
// effectively validating the "template".
|
||||
|
||||
let custom_vol = Volume {
|
||||
name: "custom-vol".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
let custom_mount = VolumeMount {
|
||||
name: "custom-vol".to_string(),
|
||||
mount_path: "/custom".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "structure-test".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "test-node".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["cmd".to_string()],
|
||||
volumes: vec![custom_vol],
|
||||
volume_mounts: vec![custom_mount],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
// Validate structure depth
|
||||
let spec = pod.spec.as_ref().unwrap();
|
||||
|
||||
// 1. Spec level fields
|
||||
assert!(spec.node_name.is_some());
|
||||
assert!(spec.volumes.is_some());
|
||||
|
||||
// 2. Container level fields
|
||||
let container = &spec.containers[0];
|
||||
assert!(container.security_context.is_some());
|
||||
assert!(container.volume_mounts.is_some());
|
||||
|
||||
// 3. Nested fields
|
||||
assert!(
|
||||
container
|
||||
.security_context
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.privileged
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(spec.volumes.as_ref().unwrap()[0].name, "custom-vol");
|
||||
assert_eq!(
|
||||
container.volume_mounts.as_ref().unwrap()[0].mount_path,
|
||||
"/custom"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_default_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
// For Default distribution, only the Pod should be in the bundle
|
||||
assert_eq!(bundle.resources.len(), 1);
|
||||
|
||||
let pod_obj = &bundle.resources[0];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle"));
|
||||
assert_eq!(pod_obj.metadata.namespace.as_deref(), Some("test-ns"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_openshift_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle-ocp".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::OpenshiftFamily,
|
||||
);
|
||||
|
||||
// For OpenShift, both ClusterRoleBinding and Pod should be in the bundle
|
||||
assert_eq!(bundle.resources.len(), 2);
|
||||
|
||||
// First resource should be the ClusterRoleBinding
|
||||
let crb_obj = &bundle.resources[0];
|
||||
assert_eq!(
|
||||
crb_obj.metadata.name.as_deref(),
|
||||
Some("test-bundle-ocp-scc-binding")
|
||||
);
|
||||
|
||||
// Verify it's targeting the privileged SCC
|
||||
if let Some(role_ref) = crb_obj.data.get("roleRef") {
|
||||
assert_eq!(
|
||||
role_ref.get("name").and_then(|v| v.as_str()),
|
||||
Some("system:openshift:scc:privileged")
|
||||
);
|
||||
}
|
||||
|
||||
// Second resource should be the Pod
|
||||
let pod_obj = &bundle.resources[1];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle-ocp"));
|
||||
assert_eq!(pod_obj.metadata.namespace.as_deref(), Some("test-ns"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_k3s_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle-k3s".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::K3sFamily,
|
||||
);
|
||||
|
||||
// For K3s, only the Pod should be in the bundle (no special SCC)
|
||||
assert_eq!(bundle.resources.len(), 1);
|
||||
|
||||
let pod_obj = &bundle.resources[0];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle-k3s"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_yaml_rendering_expected() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "pod_name".to_string(),
|
||||
namespace: "pod_namespace".to_string(),
|
||||
node_name: "node name".to_string(),
|
||||
container_name: "container name".to_string(),
|
||||
command: vec!["command".to_string(), "argument".to_string()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&serde_yaml::to_string(&pod).unwrap(),
|
||||
"apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: pod_name
|
||||
namespace: pod_namespace
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- command
|
||||
- argument
|
||||
image: hub.nationtech.io/redhat/ubi10:latest
|
||||
name: container name
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts: []
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeName: node name
|
||||
restartPolicy: Never
|
||||
volumes: []
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_yaml_rendering_openshift() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "pod_name".to_string(),
|
||||
namespace: "pod_namespace".to_string(),
|
||||
node_name: "node name".to_string(),
|
||||
container_name: "container name".to_string(),
|
||||
command: vec!["command".to_string(), "argument".to_string()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::OpenshiftFamily,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&serde_yaml::to_string(&pod).unwrap(),
|
||||
"apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
annotations:
|
||||
openshift.io/required-scc: privileged
|
||||
openshift.io/scc: privileged
|
||||
name: pod_name
|
||||
namespace: pod_namespace
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- command
|
||||
- argument
|
||||
image: hub.nationtech.io/redhat/ubi10:latest
|
||||
name: container name
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts: []
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeName: node name
|
||||
restartPolicy: Never
|
||||
volumes: []
|
||||
"
|
||||
);
|
||||
}
|
||||
}
|
||||
2615
harmony/src/domain/topology/k8s/mod.rs
Normal file
2615
harmony/src/domain/topology/k8s/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,17 +3,11 @@ use std::{collections::BTreeMap, process::Command, sync::Arc, time::Duration};
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine, engine::general_purpose};
|
||||
use harmony_types::rfc1123::Rfc1123Name;
|
||||
use k8s_openapi::{
|
||||
ByteString,
|
||||
api::{
|
||||
core::v1::{Pod, Secret},
|
||||
rbac::v1::{ClusterRoleBinding, RoleRef, Subject},
|
||||
},
|
||||
};
|
||||
use kube::{
|
||||
api::{DynamicObject, GroupVersionKind, ObjectMeta},
|
||||
runtime::conditions,
|
||||
use k8s_openapi::api::{
|
||||
core::v1::{Pod, Secret},
|
||||
rbac::v1::{ClusterRoleBinding, RoleRef, Subject},
|
||||
};
|
||||
use kube::api::{DynamicObject, GroupVersionKind, ObjectMeta};
|
||||
use log::{debug, info, trace, warn};
|
||||
use serde::Serialize;
|
||||
use tokio::sync::OnceCell;
|
||||
@@ -34,10 +28,7 @@ use crate::{
|
||||
score_cert_management::CertificateManagementScore,
|
||||
},
|
||||
k3d::K3DInstallationScore,
|
||||
k8s::{
|
||||
ingress::{K8sIngressScore, PathType},
|
||||
resource::K8sResourceScore,
|
||||
},
|
||||
k8s::ingress::{K8sIngressScore, PathType},
|
||||
monitoring::{
|
||||
grafana::{grafana::Grafana, helm::helm_grafana::grafana_helm_chart_score},
|
||||
kube_prometheus::crd::{
|
||||
@@ -54,7 +45,6 @@ use crate::{
|
||||
service_monitor::ServiceMonitor,
|
||||
},
|
||||
},
|
||||
nats::capability::NatsCluster,
|
||||
okd::{crd::ingresses_config::Ingress as IngressResource, route::OKDTlsPassthroughScore},
|
||||
prometheus::{
|
||||
k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore,
|
||||
@@ -103,7 +93,6 @@ enum K8sSource {
|
||||
pub struct K8sAnywhereTopology {
|
||||
k8s_state: Arc<OnceCell<Option<K8sState>>>,
|
||||
tenant_manager: Arc<OnceCell<K8sTenantManager>>,
|
||||
k8s_distribution: Arc<OnceCell<KubernetesDistribution>>,
|
||||
config: Arc<K8sAnywhereConfig>,
|
||||
}
|
||||
|
||||
@@ -554,7 +543,6 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
k8s_distribution: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(K8sAnywhereConfig::from_env()),
|
||||
}
|
||||
}
|
||||
@@ -563,7 +551,6 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
k8s_distribution: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(config),
|
||||
}
|
||||
}
|
||||
@@ -600,41 +587,6 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_k8s_distribution(&self) -> Result<&KubernetesDistribution, PreparationError> {
|
||||
self.k8s_distribution
|
||||
.get_or_try_init(async || {
|
||||
debug!("Trying to detect k8s distribution");
|
||||
let client = self.k8s_client().await.unwrap();
|
||||
|
||||
let discovery = client.discovery().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not discover API groups: {}", e))
|
||||
})?;
|
||||
|
||||
let version = client.get_apiserver_version().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not get server version: {}", e))
|
||||
})?;
|
||||
|
||||
// OpenShift / OKD
|
||||
if discovery
|
||||
.groups()
|
||||
.any(|g| g.name() == "project.openshift.io")
|
||||
{
|
||||
info!("Found KubernetesDistribution OpenshiftFamily");
|
||||
return Ok(KubernetesDistribution::OpenshiftFamily);
|
||||
}
|
||||
|
||||
// K3d / K3s
|
||||
if version.git_version.contains("k3s") {
|
||||
info!("Found KubernetesDistribution K3sFamily");
|
||||
return Ok(KubernetesDistribution::K3sFamily);
|
||||
}
|
||||
|
||||
info!("Could not identify KubernetesDistribution, using Default");
|
||||
return Ok(KubernetesDistribution::Default);
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
fn extract_and_normalize_token(&self, secret: &DynamicObject) -> Option<String> {
|
||||
let token_b64 = secret
|
||||
.data
|
||||
@@ -652,6 +604,16 @@ impl K8sAnywhereTopology {
|
||||
Some(cleaned)
|
||||
}
|
||||
|
||||
pub async fn get_k8s_distribution(&self) -> Result<KubernetesDistribution, PreparationError> {
|
||||
self.k8s_client()
|
||||
.await?
|
||||
.get_k8s_distribution()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
PreparationError::new(format!("Failed to get k8s distribution from client : {e}"))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build_cluster_rolebinding(
|
||||
&self,
|
||||
service_account_name: &str,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::{net::SocketAddr, str::FromStr};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
|
||||
use super::LogicalHost;
|
||||
|
||||
@@ -188,6 +188,10 @@ impl FromStr for DnsRecordType {
|
||||
pub trait NetworkManager: Debug + Send + Sync {
|
||||
async fn ensure_network_manager_installed(&self) -> Result<(), NetworkError>;
|
||||
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), NetworkError>;
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, new)]
|
||||
|
||||
@@ -5,9 +5,20 @@ use harmony_types::{
|
||||
net::{IpAddress, MacAddress},
|
||||
switch::{PortDeclaration, PortLocation},
|
||||
};
|
||||
use log::info;
|
||||
use option_ext::OptionExt;
|
||||
|
||||
use crate::topology::{PortConfig, SwitchClient, SwitchError};
|
||||
use crate::{
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{PortConfig, SwitchClient, SwitchError},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BrocadeSwitchConfig {
|
||||
pub ips: Vec<IpAddress>,
|
||||
pub auth: BrocadeSwitchAuth,
|
||||
pub options: BrocadeOptions,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BrocadeSwitchClient {
|
||||
@@ -15,13 +26,11 @@ pub struct BrocadeSwitchClient {
|
||||
}
|
||||
|
||||
impl BrocadeSwitchClient {
|
||||
pub async fn init(
|
||||
ip_addresses: &[IpAddress],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
) -> Result<Self, brocade::Error> {
|
||||
let brocade = brocade::init(ip_addresses, username, password, options).await?;
|
||||
pub async fn init(config: BrocadeSwitchConfig) -> Result<Self, brocade::Error> {
|
||||
let auth = &config.auth;
|
||||
let options = &config.options;
|
||||
|
||||
let brocade = brocade::init(&config.ips, &auth.username, &auth.password, options).await?;
|
||||
Ok(Self { brocade })
|
||||
}
|
||||
}
|
||||
@@ -52,13 +61,18 @@ impl SwitchClient for BrocadeSwitchClient {
|
||||
|| link.remote_port.contains(&interface.port_location)
|
||||
})
|
||||
})
|
||||
.map(|interface| (interface.name.clone(), PortOperatingMode::Access))
|
||||
.map(|interface| (interface.name.clone(), PortOperatingMode::Trunk))
|
||||
.collect();
|
||||
|
||||
if interfaces.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!("About to configure interfaces {interfaces:?}");
|
||||
// inquire::Confirm::new("Do you wish to configures interfaces now?")
|
||||
// .prompt()
|
||||
// .map_err(|e| SwitchError::new(e.to_string()))?;
|
||||
|
||||
self.brocade
|
||||
.configure_interfaces(&interfaces)
|
||||
.await
|
||||
@@ -208,8 +222,8 @@ mod tests {
|
||||
//TODO not sure about this
|
||||
let configured_interfaces = brocade.configured_interfaces.lock().unwrap();
|
||||
assert_that!(*configured_interfaces).contains_exactly(vec![
|
||||
(first_interface.name.clone(), PortOperatingMode::Access),
|
||||
(second_interface.name.clone(), PortOperatingMode::Access),
|
||||
(first_interface.name.clone(), PortOperatingMode::Trunk),
|
||||
(second_interface.name.clone(), PortOperatingMode::Trunk),
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use askama::Template;
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::core::v1::Node;
|
||||
@@ -10,13 +11,71 @@ use kube::{
|
||||
ResourceExt,
|
||||
api::{ObjectList, ObjectMeta},
|
||||
};
|
||||
use log::{debug, info};
|
||||
use log::{debug, info, warn};
|
||||
|
||||
use crate::{
|
||||
modules::okd::crd::nmstate,
|
||||
topology::{HostNetworkConfig, NetworkError, NetworkManager, k8s::K8sClient},
|
||||
topology::{
|
||||
HostNetworkConfig, NetworkError, NetworkManager,
|
||||
k8s::{DrainOptions, K8sClient, NodeFile},
|
||||
},
|
||||
};
|
||||
|
||||
/// NetworkManager bond configuration template
|
||||
#[derive(Template)]
|
||||
#[template(
|
||||
source = r#"[connection]
|
||||
id={{ bond_name }}
|
||||
uuid={{ bond_uuid }}
|
||||
type=bond
|
||||
autoconnect-slaves=1
|
||||
interface-name={{ bond_name }}
|
||||
|
||||
[bond]
|
||||
lacp_rate=fast
|
||||
mode=802.3ad
|
||||
xmit_hash_policy=layer2
|
||||
|
||||
[ipv4]
|
||||
method=auto
|
||||
|
||||
[ipv6]
|
||||
addr-gen-mode=default
|
||||
method=auto
|
||||
|
||||
[proxy]
|
||||
"#,
|
||||
ext = "txt"
|
||||
)]
|
||||
struct BondConfigTemplate {
|
||||
bond_name: String,
|
||||
bond_uuid: String,
|
||||
}
|
||||
|
||||
/// NetworkManager bond slave configuration template
|
||||
#[derive(Template)]
|
||||
#[template(
|
||||
source = r#"[connection]
|
||||
id={{ slave_id }}
|
||||
uuid={{ slave_uuid }}
|
||||
type=ethernet
|
||||
interface-name={{ interface_name }}
|
||||
master={{ bond_name }}
|
||||
slave-type=bond
|
||||
|
||||
[ethernet]
|
||||
|
||||
[bond-port]
|
||||
"#,
|
||||
ext = "txt"
|
||||
)]
|
||||
struct BondSlaveConfigTemplate {
|
||||
slave_id: String,
|
||||
slave_uuid: String,
|
||||
interface_name: String,
|
||||
bond_name: String,
|
||||
}
|
||||
|
||||
/// TODO document properly the non-intuitive behavior or "roll forward only" of nmstate in general
|
||||
/// It is documented in nmstate official doc, but worth mentionning here :
|
||||
///
|
||||
@@ -87,6 +146,117 @@ impl NetworkManager for OpenShiftNmStateNetworkManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Configures bonding on the primary network interface of a node.
|
||||
///
|
||||
/// Changing the *primary* network interface (making it a bond
|
||||
/// slave) will disrupt node connectivity mid-change, so the
|
||||
/// procedure is:
|
||||
///
|
||||
/// 1. Generate NetworkManager .nmconnection files
|
||||
/// 2. Drain the node (includes cordon)
|
||||
/// 3. Write configuration files to `/etc/NetworkManager/system-connections/`
|
||||
/// 4. Attempt to reload NetworkManager (optional, best-effort)
|
||||
/// 5. Reboot the node with full verification (drain, boot_id check, uncordon)
|
||||
///
|
||||
/// The reboot procedure includes:
|
||||
/// - Recording boot_id before reboot
|
||||
/// - Fire-and-forget reboot command
|
||||
/// - Waiting for NotReady status
|
||||
/// - Waiting for Ready status
|
||||
/// - Verifying boot_id changed
|
||||
/// - Uncordoning the node
|
||||
///
|
||||
/// See ADR-019 for context and rationale.
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
use std::time::Duration;
|
||||
|
||||
let node_name = self.get_node_name_for_id(&config.host_id).await?;
|
||||
let hostname = self.get_hostname(&config.host_id).await?;
|
||||
|
||||
info!(
|
||||
"Configuring bond on primary interface for host '{}' (node '{}')",
|
||||
config.host_id, node_name
|
||||
);
|
||||
|
||||
// 1. Generate .nmconnection files
|
||||
let files = self.generate_nmconnection_files(&hostname, config)?;
|
||||
debug!(
|
||||
"Generated {} NetworkManager configuration files",
|
||||
files.len()
|
||||
);
|
||||
|
||||
// 2. Write configuration files to the node (before draining)
|
||||
// We do this while the node is still running for faster operation
|
||||
info!(
|
||||
"Writing NetworkManager configuration files to node '{}'...",
|
||||
node_name
|
||||
);
|
||||
self.k8s_client
|
||||
.write_files_to_node(&node_name, &files)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to write configuration files to node '{}': {}",
|
||||
node_name, e
|
||||
))
|
||||
})?;
|
||||
|
||||
// 3. Reload NetworkManager configuration (best-effort)
|
||||
// This won't activate the bond yet since the primary interface would lose connectivity,
|
||||
// but it validates the configuration files are correct
|
||||
info!(
|
||||
"Reloading NetworkManager configuration on node '{}'...",
|
||||
node_name
|
||||
);
|
||||
match self
|
||||
.k8s_client
|
||||
.run_privileged_command_on_node(&node_name, "chroot /host nmcli connection reload")
|
||||
.await
|
||||
{
|
||||
Ok(output) => {
|
||||
debug!("NetworkManager reload output: {}", output.trim());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to reload NetworkManager configuration: {}. Proceeding with reboot.",
|
||||
e
|
||||
);
|
||||
// Don't fail here - reboot will pick up the config anyway
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Reboot the node with full verification
|
||||
// The reboot_node function handles: drain, boot_id capture, reboot, NotReady wait,
|
||||
// Ready wait, boot_id verification, and uncordon
|
||||
// 60 minutes timeout for bare-metal environments (drain can take 20-30 mins)
|
||||
let reboot_timeout = Duration::from_secs(3600);
|
||||
info!(
|
||||
"Rebooting node '{}' to apply network configuration (timeout: {:?})...",
|
||||
node_name, reboot_timeout
|
||||
);
|
||||
|
||||
self.k8s_client
|
||||
.reboot_node(
|
||||
&node_name,
|
||||
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
|
||||
reboot_timeout,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
NetworkError::new(format!("Failed to reboot node '{}': {}", node_name, e))
|
||||
})?;
|
||||
|
||||
info!(
|
||||
"Successfully configured bond on primary interface for host '{}' (node '{}')",
|
||||
config.host_id, node_name
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), NetworkError> {
|
||||
let hostname = self.get_hostname(&config.host_id).await.map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
@@ -208,14 +378,14 @@ impl OpenShiftNmStateNetworkManager {
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_hostname(&self, host_id: &Id) -> Result<String, String> {
|
||||
async fn get_node_for_id(&self, host_id: &Id) -> Result<Node, String> {
|
||||
let nodes: ObjectList<Node> = self
|
||||
.k8s_client
|
||||
.list_resources(None, None)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to list nodes: {e}"))?;
|
||||
|
||||
let Some(node) = nodes.iter().find(|n| {
|
||||
let Some(node) = nodes.into_iter().find(|n| {
|
||||
n.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.node_info.as_ref())
|
||||
@@ -225,6 +395,20 @@ impl OpenShiftNmStateNetworkManager {
|
||||
return Err(format!("No node found for host '{host_id}'"));
|
||||
};
|
||||
|
||||
Ok(node)
|
||||
}
|
||||
|
||||
async fn get_node_name_for_id(&self, host_id: &Id) -> Result<String, String> {
|
||||
let node = self.get_node_for_id(host_id).await?;
|
||||
|
||||
node.metadata.name.ok_or(format!(
|
||||
"A node should always have a name, node for host_id {host_id} has no name"
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_hostname(&self, host_id: &Id) -> Result<String, String> {
|
||||
let node = self.get_node_for_id(host_id).await?;
|
||||
|
||||
node.labels()
|
||||
.get("kubernetes.io/hostname")
|
||||
.ok_or(format!(
|
||||
@@ -261,4 +445,82 @@ impl OpenShiftNmStateNetworkManager {
|
||||
let next_id = (0..).find(|id| !used_ids.contains(id)).unwrap();
|
||||
Ok(format!("bond{next_id}"))
|
||||
}
|
||||
|
||||
/// Generates NetworkManager .nmconnection files for bonding configuration.
|
||||
///
|
||||
/// Creates:
|
||||
/// - One bond master configuration file (bond0.nmconnection)
|
||||
/// - One slave configuration file per interface (bond0-<iface>.nmconnection)
|
||||
///
|
||||
/// All files are placed in `/etc/NetworkManager/system-connections/` with
|
||||
/// mode 0o600 (required by NetworkManager).
|
||||
fn generate_nmconnection_files(
|
||||
&self,
|
||||
hostname: &str,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<Vec<NodeFile>, NetworkError> {
|
||||
let mut files = Vec::new();
|
||||
let bond_name = "bond0";
|
||||
let bond_uuid = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
// Generate bond master configuration
|
||||
let bond_template = BondConfigTemplate {
|
||||
bond_name: bond_name.to_string(),
|
||||
bond_uuid: bond_uuid.clone(),
|
||||
};
|
||||
|
||||
let bond_content = bond_template.render().map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to render bond configuration template: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
files.push(NodeFile {
|
||||
path: format!(
|
||||
"/etc/NetworkManager/system-connections/{}.nmconnection",
|
||||
bond_name
|
||||
),
|
||||
content: bond_content,
|
||||
mode: 0o600,
|
||||
});
|
||||
|
||||
// Generate slave configurations for each interface
|
||||
for switch_port in &config.switch_ports {
|
||||
let interface_name = &switch_port.interface.name;
|
||||
let slave_id = format!("{}-{}", bond_name, interface_name);
|
||||
let slave_uuid = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
let slave_template = BondSlaveConfigTemplate {
|
||||
slave_id: slave_id.clone(),
|
||||
slave_uuid,
|
||||
interface_name: interface_name.clone(),
|
||||
bond_name: bond_name.to_string(),
|
||||
};
|
||||
|
||||
let slave_content = slave_template.render().map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to render slave configuration template for interface '{}': {}",
|
||||
interface_name, e
|
||||
))
|
||||
})?;
|
||||
|
||||
files.push(NodeFile {
|
||||
path: format!(
|
||||
"/etc/NetworkManager/system-connections/{}.nmconnection",
|
||||
slave_id
|
||||
),
|
||||
content: slave_content,
|
||||
mode: 0o600,
|
||||
});
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Generated {} NetworkManager configuration files for host '{}'",
|
||||
files.len(),
|
||||
hostname
|
||||
);
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
}
|
||||
|
||||
138
harmony/src/modules/brocade/brocade.rs
Normal file
138
harmony/src/modules/brocade/brocade.rs
Normal file
@@ -0,0 +1,138 @@
|
||||
use async_trait::async_trait;
|
||||
use brocade::{BrocadeOptions, PortOperatingMode};
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
infra::brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HostNetworkConfig, PortConfig, PreparationError, PreparationOutcome, Switch, SwitchClient,
|
||||
SwitchError, Topology,
|
||||
},
|
||||
};
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{id::Id, net::MacAddress, switch::PortLocation};
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct BrocadeSwitchScore {
|
||||
pub port_channels_to_clear: Vec<Id>,
|
||||
pub ports_to_configure: Vec<PortConfig>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Switch> Score<T> for BrocadeSwitchScore {
|
||||
fn name(&self) -> String {
|
||||
"BrocadeSwitchScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(BrocadeSwitchInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BrocadeSwitchInterpret {
|
||||
score: BrocadeSwitchScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + Switch> Interpret<T> for BrocadeSwitchInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Applying switch configuration {:?}", self.score);
|
||||
debug!(
|
||||
"Clearing port channel {:?}",
|
||||
self.score.port_channels_to_clear
|
||||
);
|
||||
topology
|
||||
.clear_port_channel(&self.score.port_channels_to_clear)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
debug!("Configuring interfaces {:?}", self.score.ports_to_configure);
|
||||
topology
|
||||
.configure_interface(&self.score.ports_to_configure)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success("switch configured".to_string()))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("BrocadeSwitchInterpret")
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
pub struct BrocadeSwitchConfig {
|
||||
pub ips: Vec<harmony_types::net::IpAddress>,
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
pub options: BrocadeOptions,
|
||||
}
|
||||
*/
|
||||
|
||||
pub struct SwitchTopology {
|
||||
client: Box<dyn SwitchClient>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Topology for SwitchTopology {
|
||||
fn name(&self) -> &str {
|
||||
"SwitchTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
impl SwitchTopology {
|
||||
pub async fn new(config: BrocadeSwitchConfig) -> Self {
|
||||
let client = BrocadeSwitchClient::init(config)
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
let client = Box::new(client);
|
||||
Self { client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Switch for SwitchTopology {
|
||||
async fn setup_switch(&self) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_port_for_mac_address(
|
||||
&self,
|
||||
_mac_address: &MacAddress,
|
||||
) -> Result<Option<PortLocation>, SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn configure_port_channel(&self, _config: &HostNetworkConfig) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
self.client.clear_port_channel(ids).await
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
self.client.configure_interface(ports).await
|
||||
}
|
||||
}
|
||||
@@ -39,16 +39,22 @@ pub struct BrocadeEnableSnmpInterpret {
|
||||
}
|
||||
|
||||
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
|
||||
struct BrocadeSwitchAuth {
|
||||
username: String,
|
||||
password: String,
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
impl BrocadeSwitchAuth {
|
||||
pub fn user_pass(username: String, password: String) -> Self {
|
||||
Self { username, password }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
|
||||
struct BrocadeSnmpAuth {
|
||||
username: String,
|
||||
auth_password: String,
|
||||
des_password: String,
|
||||
pub struct BrocadeSnmpAuth {
|
||||
pub username: String,
|
||||
pub auth_password: String,
|
||||
pub des_password: String,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -72,7 +78,7 @@ impl<T: Topology> Interpret<T> for BrocadeEnableSnmpInterpret {
|
||||
&switch_addresses,
|
||||
&config.username,
|
||||
&config.password,
|
||||
BrocadeOptions {
|
||||
&BrocadeOptions {
|
||||
dry_run: self.score.dry_run,
|
||||
..Default::default()
|
||||
},
|
||||
5
harmony/src/modules/brocade/mod.rs
Normal file
5
harmony/src/modules/brocade/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
pub mod brocade;
|
||||
pub use brocade::*;
|
||||
|
||||
pub mod brocade_snmp;
|
||||
pub use brocade_snmp::*;
|
||||
@@ -82,17 +82,40 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
|
||||
self.score.role,
|
||||
choice.summary()
|
||||
);
|
||||
let disk_names: Vec<String> =
|
||||
choice.storage.iter().map(|s| s.name.clone()).collect();
|
||||
let mut disk_choices: Vec<(String, String)> = vec![];
|
||||
|
||||
for s in choice.storage.iter() {
|
||||
let size_gb: f64 = s.size_bytes as f64 / 1_000_000_000.0;
|
||||
let (size, unit) = if size_gb >= 1000.0 {
|
||||
(size_gb / 1000.0, "TB")
|
||||
} else {
|
||||
(size_gb, "GB")
|
||||
};
|
||||
let drive_type = if s.rotational { "rotational" } else { "SSD" };
|
||||
let smart_str = s.smart_status.as_deref().unwrap_or("N/A");
|
||||
let display = format!(
|
||||
"{} : [{}] - {:.0} {} ({}) - {} - Smart: {}",
|
||||
s.name, s.model, size, unit, drive_type, s.interface_type, smart_str
|
||||
);
|
||||
disk_choices.push((display, s.name.clone()));
|
||||
}
|
||||
|
||||
let display_refs: Vec<&str> =
|
||||
disk_choices.iter().map(|(d, _)| d.as_str()).collect();
|
||||
|
||||
let disk_choice = inquire::Select::new(
|
||||
&format!("Select the disk to use on host {}:", choice.summary()),
|
||||
disk_names,
|
||||
display_refs,
|
||||
)
|
||||
.prompt();
|
||||
|
||||
match disk_choice {
|
||||
Ok(disk_name) => {
|
||||
Ok(selected_display) => {
|
||||
let disk_name = disk_choices
|
||||
.iter()
|
||||
.find(|(d, _)| d.as_str() == selected_display)
|
||||
.map(|(_, name)| name.clone())
|
||||
.unwrap();
|
||||
info!("Selected disk {} for node {}", disk_name, choice.summary());
|
||||
host_repo
|
||||
.save_role_mapping(&self.score.role, &choice, &disk_name)
|
||||
|
||||
@@ -54,6 +54,12 @@ pub enum HarmonyDiscoveryStrategy {
|
||||
SUBNET { cidr: cidr::Ipv4Cidr, port: u16 },
|
||||
}
|
||||
|
||||
impl Default for HarmonyDiscoveryStrategy {
|
||||
fn default() -> Self {
|
||||
HarmonyDiscoveryStrategy::MDNS
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology> Interpret<T> for DiscoverInventoryAgentInterpret {
|
||||
async fn execute(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use async_trait::async_trait;
|
||||
use k8s_openapi::NamespaceResourceScope;
|
||||
use k8s_openapi::{NamespaceResourceScope, ResourceScope};
|
||||
use kube::Resource;
|
||||
use log::info;
|
||||
use serde::{Serialize, de::DeserializeOwned};
|
||||
@@ -29,7 +29,7 @@ impl<K: Resource + std::fmt::Debug> K8sResourceScore<K> {
|
||||
}
|
||||
|
||||
impl<
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
K: Resource<Scope: ResourceScope>
|
||||
+ std::fmt::Debug
|
||||
+ Sync
|
||||
+ DeserializeOwned
|
||||
@@ -61,7 +61,7 @@ pub struct K8sResourceInterpret<K: Resource + std::fmt::Debug + Sync + Send> {
|
||||
|
||||
#[async_trait]
|
||||
impl<
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
K: Resource<Scope: ResourceScope>
|
||||
+ Clone
|
||||
+ std::fmt::Debug
|
||||
+ DeserializeOwned
|
||||
|
||||
@@ -15,10 +15,13 @@ pub mod load_balancer;
|
||||
pub mod monitoring;
|
||||
pub mod nats;
|
||||
pub mod network;
|
||||
pub mod node_health;
|
||||
pub mod okd;
|
||||
pub mod openbao;
|
||||
pub mod opnsense;
|
||||
pub mod postgresql;
|
||||
pub mod prometheus;
|
||||
pub mod storage;
|
||||
pub mod tenant;
|
||||
pub mod tftp;
|
||||
pub mod zitadel;
|
||||
|
||||
260
harmony/src/modules/node_health/mod.rs
Normal file
260
harmony/src/modules/node_health/mod.rs
Normal file
@@ -0,0 +1,260 @@
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::{
|
||||
apps::v1::{DaemonSet, DaemonSetSpec},
|
||||
core::v1::{
|
||||
Container, ContainerPort, EnvVar, EnvVarSource, Namespace, ObjectFieldSelector, PodSpec,
|
||||
PodTemplateSpec, ResourceRequirements, ServiceAccount, Toleration,
|
||||
},
|
||||
rbac::v1::{ClusterRole, ClusterRoleBinding, PolicyRule, Role, RoleBinding, RoleRef, Subject},
|
||||
};
|
||||
use k8s_openapi::apimachinery::pkg::api::resource::Quantity;
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::k8s::resource::K8sResourceScore,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct NodeHealthScore {}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for NodeHealthScore {
|
||||
fn name(&self) -> String {
|
||||
format!("NodeHealthScore")
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(NodeHealthInterpret {})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NodeHealthInterpret {}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for NodeHealthInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let namespace_name = "harmony-node-healthcheck".to_string();
|
||||
|
||||
// Namespace
|
||||
let mut labels = BTreeMap::new();
|
||||
labels.insert("name".to_string(), namespace_name.clone());
|
||||
|
||||
let namespace = Namespace {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(namespace_name.clone()),
|
||||
labels: Some(labels),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
..Namespace::default()
|
||||
};
|
||||
|
||||
// ServiceAccount
|
||||
let service_account_name = "node-healthcheck-sa".to_string();
|
||||
let service_account = ServiceAccount {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(service_account_name.clone()),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
..ServiceAccount::default()
|
||||
};
|
||||
|
||||
// ClusterRole
|
||||
let cluster_role = ClusterRole {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("node-healthcheck-role".to_string()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
rules: Some(vec![PolicyRule {
|
||||
api_groups: Some(vec!["".to_string()]),
|
||||
resources: Some(vec!["nodes".to_string()]),
|
||||
verbs: vec!["get".to_string(), "list".to_string()],
|
||||
..PolicyRule::default()
|
||||
}]),
|
||||
..ClusterRole::default()
|
||||
};
|
||||
|
||||
// Role
|
||||
let role = Role {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("allow-hostnetwork-scc".to_string()),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
rules: Some(vec![PolicyRule {
|
||||
api_groups: Some(vec!["security.openshift.io".to_string()]),
|
||||
resources: Some(vec!["securitycontextconstraints".to_string()]),
|
||||
resource_names: Some(vec!["hostnetwork".to_string()]),
|
||||
verbs: vec!["use".to_string()],
|
||||
..PolicyRule::default()
|
||||
}]),
|
||||
..Role::default()
|
||||
};
|
||||
|
||||
// RoleBinding
|
||||
let role_binding = RoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("node-status-querier-scc-binding".to_string()),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".to_string(),
|
||||
name: service_account_name.clone(),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..Subject::default()
|
||||
}]),
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".to_string(),
|
||||
kind: "Role".to_string(),
|
||||
name: "allow-hostnetwork-scc".to_string(),
|
||||
},
|
||||
};
|
||||
|
||||
// ClusterRoleBinding
|
||||
let cluster_role_binding = ClusterRoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("read-nodes-binding".to_string()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".to_string(),
|
||||
name: service_account_name.clone(),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
..Subject::default()
|
||||
}]),
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".to_string(),
|
||||
kind: "ClusterRole".to_string(),
|
||||
name: "node-healthcheck-role".to_string(),
|
||||
},
|
||||
};
|
||||
|
||||
// DaemonSet
|
||||
let mut daemonset_labels = BTreeMap::new();
|
||||
daemonset_labels.insert("app".to_string(), "node-healthcheck".to_string());
|
||||
|
||||
let daemon_set = DaemonSet {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("node-healthcheck".to_string()),
|
||||
namespace: Some(namespace_name.clone()),
|
||||
labels: Some(daemonset_labels.clone()),
|
||||
..ObjectMeta::default()
|
||||
},
|
||||
spec: Some(DaemonSetSpec {
|
||||
selector: LabelSelector {
|
||||
match_labels: Some(daemonset_labels.clone()),
|
||||
..LabelSelector::default()
|
||||
},
|
||||
template: PodTemplateSpec {
|
||||
metadata: Some(ObjectMeta {
|
||||
labels: Some(daemonset_labels),
|
||||
..ObjectMeta::default()
|
||||
}),
|
||||
spec: Some(PodSpec {
|
||||
service_account_name: Some(service_account_name.clone()),
|
||||
host_network: Some(true),
|
||||
tolerations: Some(vec![Toleration {
|
||||
operator: Some("Exists".to_string()),
|
||||
..Toleration::default()
|
||||
}]),
|
||||
containers: vec![Container {
|
||||
name: "checker".to_string(),
|
||||
image: Some(
|
||||
"hub.nationtech.io/harmony/harmony-node-readiness-endpoint:latest"
|
||||
.to_string(),
|
||||
),
|
||||
env: Some(vec![EnvVar {
|
||||
name: "NODE_NAME".to_string(),
|
||||
value_from: Some(EnvVarSource {
|
||||
field_ref: Some(ObjectFieldSelector {
|
||||
field_path: "spec.nodeName".to_string(),
|
||||
..ObjectFieldSelector::default()
|
||||
}),
|
||||
..EnvVarSource::default()
|
||||
}),
|
||||
..EnvVar::default()
|
||||
}]),
|
||||
ports: Some(vec![ContainerPort {
|
||||
container_port: 25001,
|
||||
host_port: Some(25001),
|
||||
name: Some("health-port".to_string()),
|
||||
..ContainerPort::default()
|
||||
}]),
|
||||
resources: Some(ResourceRequirements {
|
||||
requests: Some({
|
||||
let mut requests = BTreeMap::new();
|
||||
requests.insert("cpu".to_string(), Quantity("10m".to_string()));
|
||||
requests
|
||||
.insert("memory".to_string(), Quantity("50Mi".to_string()));
|
||||
requests
|
||||
}),
|
||||
..ResourceRequirements::default()
|
||||
}),
|
||||
..Container::default()
|
||||
}],
|
||||
..PodSpec::default()
|
||||
}),
|
||||
},
|
||||
..DaemonSetSpec::default()
|
||||
}),
|
||||
..DaemonSet::default()
|
||||
};
|
||||
|
||||
K8sResourceScore::single(namespace, None)
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(service_account, Some(namespace_name.clone()))
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(cluster_role, None)
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(role, Some(namespace_name.clone()))
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(role_binding, Some(namespace_name.clone()))
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(cluster_role_binding, None)
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
K8sResourceScore::single(daemon_set, Some(namespace_name.clone()))
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
"Harmony node health successfully deployed".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("NodeHealth")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,74 @@ use crate::{
|
||||
topology::{HostNetworkConfig, NetworkInterface, NetworkManager, Switch, SwitchPort, Topology},
|
||||
};
|
||||
|
||||
/// Configures high-availability networking for a set of physical hosts.
|
||||
///
|
||||
/// This is an opinionated Score that creates a resilient network configuration.
|
||||
/// It assumes hosts have at least two network interfaces connected
|
||||
/// to redundant switches for high availability.
|
||||
///
|
||||
/// The Score's `Interpret` logic will:
|
||||
/// 1. Setup the switch with sane defaults (e.g. mark interfaces as switchports for discoverability).
|
||||
/// 2. Discover which switch ports each host's interfaces are connected to (via MAC address).
|
||||
/// 3. Create a network bond (e.g. LACP) on the host itself using these interfaces.
|
||||
/// 4. Configure a corresponding port-channel on the switch(es) for those ports.
|
||||
///
|
||||
/// This ensures that both the host and the switch are configured to treat the
|
||||
/// multiple links as a single, aggregated, and redundant connection.
|
||||
///
|
||||
/// Hosts with 0 or 1 detected interfaces will be skipped, as bonding is not
|
||||
/// applicable.
|
||||
///
|
||||
/// <div class="warning">
|
||||
/// The implementation is currently _not_ idempotent, even though it should be.
|
||||
/// Running it more than once on the same host might result in duplicated bond configurations.
|
||||
/// </div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
/// This Score is not named well. A better name would be
|
||||
/// `HighAvailabilityHostNetworkScore`, or something similar to better express the intent.
|
||||
/// </div>
|
||||
///
|
||||
/// # Requirements
|
||||
///
|
||||
/// This Score can only be applied to a [Topology] that implements both the
|
||||
/// [NetworkManager] (to configure the host-side bond) and [Switch]
|
||||
/// (to configure the switch-side port-channel) capabilities.
|
||||
///
|
||||
/// # Current limitations
|
||||
///
|
||||
/// ## 1. No rollback logic & limited idempotency
|
||||
///
|
||||
/// If any of the steps described above fails, the Score will not attempt to revert any changes
|
||||
/// already applied. Which could render the host or switch in an inconsistent state.
|
||||
///
|
||||
/// ## 2. Propagation delays on the switch
|
||||
///
|
||||
/// It might take some time for the sane defaults in step 1) to be applied. In some cases,
|
||||
/// it was observed that the switch takes up to 5min to actually apply the config.
|
||||
///
|
||||
/// But this Score's Interpret doesn't wait and directly proceeds to step 2) to discover
|
||||
/// the MAC addresses. Which could result interfaces being skipped because their corresponding port
|
||||
/// on the switch couldn't be found.
|
||||
///
|
||||
/// TODO: Validate that the switch is in the expected state before continuing.
|
||||
///
|
||||
/// ## 3. Bond configuration
|
||||
///
|
||||
/// To find the next available bond id, the current
|
||||
/// [NetworkManager](crate::infra::network_manager::OpenShiftNmStateNetworkManager) implementation
|
||||
/// simply checks for existing bonds named `bond[n]` and take the next available `n` number.
|
||||
///
|
||||
/// It doesn't check that there are already a bond for the interfaces that should be bonded. Which
|
||||
/// might result in a duplicate bond being created.
|
||||
///
|
||||
/// TODO: Make sure the interfaces to aggregate are not already bonded.
|
||||
///
|
||||
/// # Future improvements
|
||||
///
|
||||
/// Along with the `TODO` items above, splitting this Score into multiple smaller ones would be
|
||||
/// beneficial. It has a lot of moving parts and some of them could be used on their own to make
|
||||
/// operations on a cluster easier.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct HostNetworkConfigurationScore {
|
||||
pub hosts: Vec<PhysicalHost>,
|
||||
@@ -74,9 +142,13 @@ impl HostNetworkConfigurationInterpret {
|
||||
);
|
||||
|
||||
info!("[Host {current_host}/{total_hosts}] Configuring host network...");
|
||||
topology.configure_bond(&config).await.map_err(|e| {
|
||||
InterpretError::new(format!("Failed to configure host network: {e}"))
|
||||
})?;
|
||||
topology
|
||||
.configure_bond_on_primary_interface(&config)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
InterpretError::new(format!("Failed to configure host network: {e}"))
|
||||
})?;
|
||||
|
||||
topology
|
||||
.configure_port_channel(&config)
|
||||
.await
|
||||
@@ -663,6 +735,16 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
let mut configured_bonds = self.configured_bonds.lock().unwrap();
|
||||
configured_bonds.push((config.host_id.clone(), config.clone()));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
88
harmony/src/modules/openbao/mod.rs
Normal file
88
harmony/src/modules/openbao/mod.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use harmony_macros::hurl;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::helm::chart::{HelmChartScore, HelmRepository},
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct OpenbaoScore {
|
||||
/// Host used for external access (ingress)
|
||||
pub host: String,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient + HelmCommand> Score<T> for OpenbaoScore {
|
||||
fn name(&self) -> String {
|
||||
"OpenbaoScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
// TODO exec pod commands to initialize secret store if not already done
|
||||
let host = &self.host;
|
||||
|
||||
let values_yaml = Some(format!(
|
||||
r#"global:
|
||||
openshift: true
|
||||
server:
|
||||
standalone:
|
||||
enabled: true
|
||||
config: |
|
||||
ui = true
|
||||
|
||||
listener "tcp" {{
|
||||
tls_disable = true
|
||||
address = "[::]:8200"
|
||||
cluster_address = "[::]:8201"
|
||||
}}
|
||||
|
||||
storage "file" {{
|
||||
path = "/openbao/data"
|
||||
}}
|
||||
|
||||
service:
|
||||
enabled: true
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
hosts:
|
||||
- host: {host}
|
||||
dataStorage:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClass: null
|
||||
accessMode: ReadWriteOnce
|
||||
|
||||
auditStorage:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClass: null
|
||||
accessMode: ReadWriteOnce
|
||||
ui:
|
||||
enabled: true"#
|
||||
));
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str("openbao").unwrap()),
|
||||
release_name: NonBlankString::from_str("openbao").unwrap(),
|
||||
chart_name: NonBlankString::from_str("openbao/openbao").unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml,
|
||||
create_namespace: true,
|
||||
install_only: false,
|
||||
repository: Some(HelmRepository::new(
|
||||
"openbao".to_string(),
|
||||
hurl!("https://openbao.github.io/openbao-helm"),
|
||||
true,
|
||||
)),
|
||||
}
|
||||
.create_interpret()
|
||||
}
|
||||
}
|
||||
51
harmony/src/modules/zitadel/mod.rs
Normal file
51
harmony/src/modules/zitadel/mod.rs
Normal file
@@ -0,0 +1,51 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use harmony_macros::hurl;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::helm::chart::{HelmChartScore, HelmRepository},
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct ZitadelScore {
|
||||
/// Host used for external access (ingress)
|
||||
pub host: String,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient + HelmCommand> Score<T> for ZitadelScore {
|
||||
fn name(&self) -> String {
|
||||
"ZitadelScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
// TODO exec pod commands to initialize secret store if not already done
|
||||
let host = &self.host;
|
||||
|
||||
let values_yaml = Some(format!(r#""#));
|
||||
|
||||
todo!("This is not complete yet");
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str("zitadel").unwrap()),
|
||||
release_name: NonBlankString::from_str("zitadel").unwrap(),
|
||||
chart_name: NonBlankString::from_str("zitadel/zitadel").unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml,
|
||||
create_namespace: true,
|
||||
install_only: false,
|
||||
repository: Some(HelmRepository::new(
|
||||
"zitadel".to_string(),
|
||||
hurl!("https://charts.zitadel.com"),
|
||||
true,
|
||||
)),
|
||||
}
|
||||
.create_interpret()
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use async_nats::ConnectOptions;
|
||||
|
||||
use crate::{
|
||||
agent::AgentRole,
|
||||
store::{ChaosKvStore, InMemoryKvStore, NatsKvStore},
|
||||
@@ -65,7 +67,15 @@ fn get_chaos_store(
|
||||
}
|
||||
|
||||
async fn get_local_nats_store() -> Arc<NatsKvStore> {
|
||||
let client = async_nats::connect("localhost").await.unwrap();
|
||||
let mut client = async_nats::ConnectOptions::new()
|
||||
// .require_tls(true)
|
||||
.user_and_password("admin".into(), "admin2".into())
|
||||
.ping_interval(std::time::Duration::from_secs(10))
|
||||
.connect("localhost")
|
||||
.await
|
||||
.expect("Connection to nats failed");
|
||||
|
||||
// let client = async_nats::connect("localhost").await.unwrap();
|
||||
let jetstream = async_nats::jetstream::new(client);
|
||||
let kv = jetstream
|
||||
.create_key_value(async_nats::jetstream::kv::Config {
|
||||
|
||||
17
harmony_node_readiness/Cargo.toml
Normal file
17
harmony_node_readiness/Cargo.toml
Normal file
@@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "harmony-node-readiness-endpoint"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
actix-web = "4"
|
||||
kube.workspace = true
|
||||
k8s-openapi.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
env_logger.workspace = true
|
||||
log.workspace = true
|
||||
tokio.workspace = true
|
||||
reqwest.workspace = true
|
||||
chrono.workspace = true
|
||||
tower = "0.5.3"
|
||||
13
harmony_node_readiness/Dockerfile
Normal file
13
harmony_node_readiness/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
||||
FROM debian:13-slim
|
||||
|
||||
# RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
# ca-certificates \
|
||||
# && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY harmony-node-readiness-endpoint /usr/local/bin/harmony-node-readiness-endpoint
|
||||
|
||||
ENV RUST_LOG=info
|
||||
|
||||
EXPOSE 25001
|
||||
|
||||
CMD ["harmony-node-readiness-endpoint"]
|
||||
197
harmony_node_readiness/README.md
Normal file
197
harmony_node_readiness/README.md
Normal file
@@ -0,0 +1,197 @@
|
||||
# harmony-node-readiness-endpoint
|
||||
|
||||
**A lightweight, standalone Rust service for Kubernetes node health checking.**
|
||||
|
||||
Designed for **bare-metal Kubernetes clusters** with external load balancers (HAProxy, OPNsense, F5, etc.).
|
||||
|
||||
Exposes a simple HTTP endpoint (`/health`) on each node:
|
||||
|
||||
- **200 OK** — node is healthy and ready to receive traffic
|
||||
- **503 Service Unavailable** — node should be removed from the load balancer pool
|
||||
- **500 Internal Server Error** — misconfiguration (e.g. `NODE_NAME` not set)
|
||||
|
||||
This project is **not dependent on Harmony**, but is commonly used as part of Harmony bare-metal Kubernetes deployments.
|
||||
|
||||
## Why this project exists
|
||||
|
||||
In bare-metal environments, external load balancers often rely on pod-level or router-level checks that can lag behind the authoritative Kubernetes `Node.status.conditions[Ready]`.
|
||||
This service provides the true source-of-truth with fast reaction time.
|
||||
|
||||
## Available checks
|
||||
|
||||
| Check name | Description | Status |
|
||||
|--------------------|-------------------------------------------------------------|-------------------|
|
||||
| `node_ready` | Queries `Node.status.conditions[Ready]` via Kubernetes API | Implemented |
|
||||
| `okd_router_1936` | Probes OpenShift router `/healthz/ready` on port 1936 | Implemented |
|
||||
| `filesystem_ro` | Detects read-only mounts via `/proc/mounts` | To be implemented |
|
||||
| `kubelet` | Local probe to kubelet `/healthz` (port 10248) | To be implemented |
|
||||
| `container_runtime`| Socket check + runtime status | To be implemented |
|
||||
| `disk_pressure` | Threshold checks on key filesystems | To be implemented |
|
||||
| `network` | DNS resolution + gateway connectivity | To be implemented |
|
||||
| `custom_conditions`| Reacts to extra conditions (NPD, etc.) | To be implemented |
|
||||
|
||||
All checks are combined with logical **AND** — any single failure results in 503.
|
||||
|
||||
## Behavior
|
||||
|
||||
### `node_ready` check — fail-open design
|
||||
|
||||
The `node_ready` check queries the Kubernetes API server to read `Node.status.conditions[Ready]`.
|
||||
Because this service runs on the node it is checking, there are scenarios where the API server is temporarily
|
||||
unreachable (e.g. during a control-plane restart). To avoid incorrectly draining a healthy node in such cases,
|
||||
the check is **fail-open**: it passes (reports ready) whenever the Kubernetes API is unavailable.
|
||||
|
||||
| Situation | Result | HTTP status |
|
||||
|------------------------------------------------------|-------------------|-------------|
|
||||
| `Node.conditions[Ready] == True` | Pass | 200 |
|
||||
| `Node.conditions[Ready] == False` | Fail | 503 |
|
||||
| `Ready` condition absent | Fail | 503 |
|
||||
| API server unreachable or timed out (1 s timeout) | Pass (assumes ready) | 200 |
|
||||
| Kubernetes client initialization failed | Pass (assumes ready) | 200 |
|
||||
| `NODE_NAME` env var not set | Hard error | 500 |
|
||||
|
||||
A warning is logged whenever the API is unavailable and the check falls back to assuming ready.
|
||||
|
||||
### `okd_router_1936` check
|
||||
|
||||
Sends `GET http://127.0.0.1:1936/healthz/ready` with a 5-second timeout.
|
||||
Returns pass on any 2xx response, fail otherwise.
|
||||
|
||||
### Unknown check names
|
||||
|
||||
Requesting an unknown check name (e.g. `check=bogus`) results in that check returning `passed: false`
|
||||
with reason `"Unknown check: bogus"`, and the overall response is 503.
|
||||
|
||||
## How it works
|
||||
|
||||
### Node name discovery
|
||||
|
||||
The service reads the `NODE_NAME` environment variable, which must be injected via the Kubernetes Downward API:
|
||||
|
||||
```yaml
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
```
|
||||
|
||||
### Kubernetes API authentication
|
||||
|
||||
- Uses standard **in-cluster configuration** — no external credentials needed.
|
||||
- The ServiceAccount token and CA certificate are automatically mounted at `/var/run/secrets/kubernetes.io/serviceaccount/`.
|
||||
- Requires only minimal RBAC: `get` and `list` on the `nodes` resource (see `deploy/resources.yaml`).
|
||||
- Connect and write timeouts are set to **1 second** to keep checks fast.
|
||||
|
||||
## Deploy
|
||||
|
||||
All Kubernetes resources (Namespace, ServiceAccount, ClusterRole, ClusterRoleBinding, and an OpenShift SCC RoleBinding for `hostnetwork`) are in a single file.
|
||||
|
||||
```bash
|
||||
kubectl apply -f deploy/resources.yaml
|
||||
kubectl apply -f deploy/daemonset.yaml
|
||||
```
|
||||
|
||||
The DaemonSet uses `hostNetwork: true` and `hostPort: 25001`, so the endpoint is reachable directly on the node's IP at port 25001.
|
||||
It tolerates all taints, ensuring it runs even on nodes marked unschedulable.
|
||||
|
||||
### Configure your external load balancer
|
||||
|
||||
**Example for HAProxy / OPNsense:**
|
||||
- Check type: **HTTP**
|
||||
- URI: `/health`
|
||||
- Port: `25001` (configurable via `LISTEN_PORT` env var)
|
||||
- Interval: 5–10 s
|
||||
- Rise: 2
|
||||
- Fall: 3
|
||||
- Expect: `2xx`
|
||||
|
||||
## Endpoint usage
|
||||
|
||||
### Query parameter
|
||||
|
||||
Use the `check` query parameter to select which checks to run (comma-separated).
|
||||
When omitted, only `node_ready` runs.
|
||||
|
||||
| Request | Checks run |
|
||||
|------------------------------------------------|-----------------------------------|
|
||||
| `GET /health` | `node_ready` |
|
||||
| `GET /health?check=okd_router_1936` | `okd_router_1936` only |
|
||||
| `GET /health?check=node_ready,okd_router_1936` | `node_ready` and `okd_router_1936`|
|
||||
|
||||
> **Note:** specifying `check=` replaces the default. Include `node_ready` explicitly if you need it alongside other checks.
|
||||
|
||||
### Response format
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ready" | "not-ready",
|
||||
"checks": [
|
||||
{
|
||||
"name": "<check-name>",
|
||||
"passed": true | false,
|
||||
"reason": "<failure reason, omitted on success>",
|
||||
"duration_ms": 42
|
||||
}
|
||||
],
|
||||
"total_duration_ms": 42
|
||||
}
|
||||
```
|
||||
|
||||
**Healthy node (default)**
|
||||
```http
|
||||
HTTP/1.1 200 OK
|
||||
|
||||
{
|
||||
"status": "ready",
|
||||
"checks": [{ "name": "node_ready", "passed": true, "duration_ms": 42 }],
|
||||
"total_duration_ms": 42
|
||||
}
|
||||
```
|
||||
|
||||
**Unhealthy node**
|
||||
```http
|
||||
HTTP/1.1 503 Service Unavailable
|
||||
|
||||
{
|
||||
"status": "not-ready",
|
||||
"checks": [
|
||||
{ "name": "node_ready", "passed": false, "reason": "KubeletNotReady", "duration_ms": 35 }
|
||||
],
|
||||
"total_duration_ms": 35
|
||||
}
|
||||
```
|
||||
|
||||
**API server unreachable (fail-open)**
|
||||
```http
|
||||
HTTP/1.1 200 OK
|
||||
|
||||
{
|
||||
"status": "ready",
|
||||
"checks": [{ "name": "node_ready", "passed": true, "duration_ms": 1001 }],
|
||||
"total_duration_ms": 1001
|
||||
}
|
||||
```
|
||||
*(A warning is logged: `Kubernetes API appears to be down … Assuming node is ready.`)*
|
||||
|
||||
## Configuration
|
||||
|
||||
| Env var | Default | Description |
|
||||
|---------------|----------|--------------------------------------|
|
||||
| `NODE_NAME` | required | Node name, injected via Downward API |
|
||||
| `LISTEN_PORT` | `25001` | TCP port the HTTP server binds to |
|
||||
| `RUST_LOG` | — | Log level (e.g. `info`, `debug`) |
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
# Run locally
|
||||
NODE_NAME=my-test-node cargo run
|
||||
|
||||
# Run tests
|
||||
cargo test
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
*Minimal, auditable, and built for production bare-metal Kubernetes environments.*
|
||||
13
harmony_node_readiness/build-docker.sh
Executable file
13
harmony_node_readiness/build-docker.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
# TODO
|
||||
# This is meant to be run on a machine with harmony development tools installed (cargo, etc)
|
||||
|
||||
DOCKER_TAG="${DOCKER_TAG:-dev}"
|
||||
|
||||
cargo build --release
|
||||
|
||||
cp ../target/release/harmony-node-readiness-endpoint .
|
||||
|
||||
docker build . -t hub.nationtech.io/harmony/harmony-node-readiness-endpoint:${DOCKER_TAG}
|
||||
|
||||
36
harmony_node_readiness/deploy/daemonset.yaml
Normal file
36
harmony_node_readiness/deploy/daemonset.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-healthcheck
|
||||
namespace: harmony-node-healthcheck
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: node-healthcheck
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: node-healthcheck
|
||||
spec:
|
||||
serviceAccountName: node-healthcheck-sa
|
||||
hostNetwork: true
|
||||
# This ensures the pod runs even if the node is already "unschedulable"
|
||||
# so it can report the status correctly.
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
containers:
|
||||
- name: checker
|
||||
image: hub.nationtech.io/harmony/harmony-node-readiness-endpoint:latest
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
ports:
|
||||
- containerPort: 25001
|
||||
hostPort: 25001
|
||||
name: health-port
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
64
harmony_node_readiness/deploy/resources.yaml
Normal file
64
harmony_node_readiness/deploy/resources.yaml
Normal file
@@ -0,0 +1,64 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: harmony-node-healthcheck
|
||||
labels:
|
||||
name: harmony-node-healthcheck
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: node-healthcheck-sa
|
||||
namespace: harmony-node-healthcheck
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: node-healthcheck-role
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get", "list"]
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: allow-hostnetwork-scc
|
||||
namespace: harmony-node-healthcheck
|
||||
rules:
|
||||
- apiGroups: ["security.openshift.io"]
|
||||
resources: ["securitycontextconstraints"]
|
||||
resourceNames: ["hostnetwork"]
|
||||
verbs: ["use"]
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: node-status-querier-scc-binding
|
||||
namespace: harmony-node-healthcheck
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: node-healthcheck-sa
|
||||
namespace: harmony-node-healthcheck
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: allow-hostnetwork-scc
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: read-nodes-binding
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: node-healthcheck-sa
|
||||
namespace: harmony-node-healthcheck
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: node-healthcheck-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
282
harmony_node_readiness/src/main.rs
Normal file
282
harmony_node_readiness/src/main.rs
Normal file
@@ -0,0 +1,282 @@
|
||||
use actix_web::{App, HttpResponse, HttpServer, Responder, get, web};
|
||||
use k8s_openapi::api::core::v1::Node;
|
||||
use kube::{Api, Client, Config};
|
||||
|
||||
use log::{debug, error, info, warn};
|
||||
use reqwest;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::env;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::task::JoinSet;
|
||||
|
||||
const K8S_CLIENT_TIMEOUT: Duration = Duration::from_secs(1);
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct HealthStatus {
|
||||
status: String,
|
||||
checks: Vec<CheckResult>,
|
||||
total_duration_ms: u128,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct CheckResult {
|
||||
name: String,
|
||||
passed: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
reason: Option<String>,
|
||||
duration_ms: u128,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct HealthError {
|
||||
status: String,
|
||||
error: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct HealthQuery {
|
||||
#[serde(rename = "check")]
|
||||
checks: Option<String>,
|
||||
}
|
||||
|
||||
/// Check if the node's Ready condition is true via Kubernetes API
|
||||
async fn check_node_ready(client: Client, node_name: &str) -> Result<(), String> {
|
||||
let nodes: Api<Node> = Api::all(client);
|
||||
|
||||
let node = match nodes.get(node_name).await {
|
||||
Ok(n) => n,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Kubernetes API appears to be down, unreachable, or timed out for node '{}': {}. Assuming node is ready.",
|
||||
node_name, e
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let conditions = node.status.and_then(|s| s.conditions).unwrap_or_default();
|
||||
|
||||
for condition in conditions {
|
||||
if condition.type_ == "Ready" {
|
||||
let is_ready = condition.status == "True";
|
||||
let reason = condition
|
||||
.reason
|
||||
.clone()
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
|
||||
if !is_ready {
|
||||
return Err(reason);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
Err("Ready condition not found".to_string())
|
||||
}
|
||||
|
||||
/// Check OKD router health endpoint on port 1936
|
||||
async fn check_okd_router_1936() -> Result<(), String> {
|
||||
debug!("Checking okd router 1936");
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(5))
|
||||
.build()
|
||||
.map_err(|e| format!("Failed to build HTTP client: {}", e))?;
|
||||
|
||||
let response = client
|
||||
.get("http://127.0.0.1:1936/healthz/ready")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to connect to OKD router: {}", e))?;
|
||||
|
||||
debug!("okd router 1936 response status {}", response.status());
|
||||
|
||||
if response.status().is_success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(format!("OKD router returned status: {}", response.status()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse comma-separated check names from query parameter
|
||||
fn parse_checks(checks_param: Option<&str>) -> Vec<String> {
|
||||
match checks_param {
|
||||
None => vec!["node_ready".to_string()],
|
||||
Some(s) if s.is_empty() => vec!["node_ready".to_string()],
|
||||
Some(s) => s.split(',').map(|c| c.trim().to_string()).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Run a single health check by name and return the result
|
||||
async fn run_check(check_name: &str, client: Option<Client>, node_name: &str) -> CheckResult {
|
||||
let start = Instant::now();
|
||||
|
||||
let result = match check_name {
|
||||
"node_ready" => match client {
|
||||
Some(c) => check_node_ready(c, node_name).await,
|
||||
None => {
|
||||
warn!(
|
||||
"Kubernetes client not available for node '{}'. Assuming node is ready.",
|
||||
node_name
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
"okd_router_1936" => check_okd_router_1936().await,
|
||||
_ => Err(format!("Unknown check: {}", check_name)),
|
||||
};
|
||||
|
||||
let duration_ms = start.elapsed().as_millis();
|
||||
|
||||
match result {
|
||||
Ok(()) => CheckResult {
|
||||
name: check_name.to_string(),
|
||||
passed: true,
|
||||
reason: None,
|
||||
duration_ms,
|
||||
},
|
||||
Err(reason) => CheckResult {
|
||||
name: check_name.to_string(),
|
||||
passed: false,
|
||||
reason: Some(reason),
|
||||
duration_ms,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[get("/health")]
|
||||
async fn health(query: web::Query<HealthQuery>) -> impl Responder {
|
||||
let node_name = match env::var("NODE_NAME") {
|
||||
Ok(name) => name,
|
||||
Err(_) => {
|
||||
error!("NODE_NAME environment variable not set");
|
||||
return HttpResponse::InternalServerError().json(HealthError {
|
||||
status: "error".to_string(),
|
||||
error: "NODE_NAME environment variable not set".to_string(),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Parse requested checks from query parameter
|
||||
let requested_checks = parse_checks(query.checks.as_deref());
|
||||
|
||||
// Check if node_ready check requires Kubernetes client
|
||||
let needs_k8s_client = requested_checks.contains(&"node_ready".to_string());
|
||||
|
||||
// Initialize Kubernetes client only if needed
|
||||
let k8s_client = if needs_k8s_client {
|
||||
match Config::infer().await {
|
||||
Ok(mut config) => {
|
||||
config.write_timeout = Some(K8S_CLIENT_TIMEOUT);
|
||||
config.connect_timeout = Some(K8S_CLIENT_TIMEOUT);
|
||||
Some(Client::try_from(config).map_err(|e| e.to_string()))
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to infer Kubernetes config for node '{}': {}. Assuming node_ready is healthy.",
|
||||
node_name, e
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
.and_then(|result| match result {
|
||||
Ok(client) => Some(client),
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to create Kubernetes client for node '{}': {}. Assuming node_ready is healthy.",
|
||||
node_name, e
|
||||
);
|
||||
None
|
||||
}
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Run all requested checks in parallel
|
||||
let start = Instant::now();
|
||||
let mut join_set = JoinSet::new();
|
||||
debug!("Running checks {requested_checks:?}");
|
||||
|
||||
for check_name in requested_checks {
|
||||
let client = k8s_client.clone();
|
||||
let node_name = node_name.clone();
|
||||
join_set.spawn(async move { run_check(&check_name, client, &node_name).await });
|
||||
}
|
||||
let mut check_results = Vec::new();
|
||||
while let Some(result) = join_set.join_next().await {
|
||||
match result {
|
||||
Ok(check) => check_results.push(check),
|
||||
Err(e) => error!("Check task failed: {}", e),
|
||||
}
|
||||
}
|
||||
let total_duration_ms = start.elapsed().as_millis();
|
||||
|
||||
// Determine overall status
|
||||
let all_passed = check_results.iter().all(|c| c.passed);
|
||||
|
||||
if all_passed {
|
||||
info!(
|
||||
"All health checks passed for node '{}' in {}ms",
|
||||
node_name, total_duration_ms
|
||||
);
|
||||
HttpResponse::Ok().json(HealthStatus {
|
||||
status: "ready".to_string(),
|
||||
checks: check_results,
|
||||
total_duration_ms,
|
||||
})
|
||||
} else {
|
||||
let failed_checks: Vec<&str> = check_results
|
||||
.iter()
|
||||
.filter(|c| !c.passed)
|
||||
.map(|c| c.name.as_str())
|
||||
.collect();
|
||||
warn!(
|
||||
"Health checks failed for node '{}' in {}ms: {:?}",
|
||||
node_name, total_duration_ms, failed_checks
|
||||
);
|
||||
HttpResponse::ServiceUnavailable().json(HealthStatus {
|
||||
status: "not-ready".to_string(),
|
||||
checks: check_results,
|
||||
total_duration_ms,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> std::io::Result<()> {
|
||||
env_logger::init();
|
||||
|
||||
let port = env::var("LISTEN_PORT").unwrap_or_else(|_| "25001".to_string());
|
||||
let port = port
|
||||
.parse::<u16>()
|
||||
.unwrap_or_else(|_| panic!("Invalid port number: {}", port));
|
||||
let bind_addr = format!("0.0.0.0:{}", port);
|
||||
|
||||
info!("Starting harmony-node-readiness-endpoint on {}", bind_addr);
|
||||
|
||||
HttpServer::new(|| App::new().service(health))
|
||||
.workers(3)
|
||||
.bind(&bind_addr)?
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kube::error::ErrorResponse;
|
||||
|
||||
#[test]
|
||||
fn parse_checks_defaults_to_node_ready() {
|
||||
assert_eq!(parse_checks(None), vec!["node_ready"]);
|
||||
assert_eq!(parse_checks(Some("")), vec!["node_ready"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_checks_splits_and_trims_values() {
|
||||
assert_eq!(
|
||||
parse_checks(Some("node_ready, okd_router_1936 ")),
|
||||
vec!["node_ready", "okd_router_1936"]
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,7 @@ http.workspace = true
|
||||
inquire.workspace = true
|
||||
interactive-parse = "0.1.5"
|
||||
schemars = "0.8"
|
||||
vaultrs = "0.7.4"
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions.workspace = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use lazy_static::lazy_static;
|
||||
use std::env;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref SECRET_NAMESPACE: String =
|
||||
@@ -16,3 +17,16 @@ lazy_static! {
|
||||
pub static ref INFISICAL_CLIENT_SECRET: Option<String> =
|
||||
std::env::var("HARMONY_SECRET_INFISICAL_CLIENT_SECRET").ok();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
// Openbao/Vault configuration
|
||||
pub static ref OPENBAO_URL: Option<String> =
|
||||
env::var("OPENBAO_URL").or(env::var("VAULT_ADDR")).ok();
|
||||
pub static ref OPENBAO_TOKEN: Option<String> = env::var("OPENBAO_TOKEN").ok();
|
||||
pub static ref OPENBAO_USERNAME: Option<String> = env::var("OPENBAO_USERNAME").ok();
|
||||
pub static ref OPENBAO_PASSWORD: Option<String> = env::var("OPENBAO_PASSWORD").ok();
|
||||
pub static ref OPENBAO_SKIP_TLS: bool =
|
||||
env::var("OPENBAO_SKIP_TLS").map(|v| v == "true").unwrap_or(false);
|
||||
pub static ref OPENBAO_KV_MOUNT: String =
|
||||
env::var("OPENBAO_KV_MOUNT").unwrap_or_else(|_| "secret".to_string());
|
||||
}
|
||||
|
||||
@@ -8,6 +8,12 @@ use config::INFISICAL_CLIENT_SECRET;
|
||||
use config::INFISICAL_ENVIRONMENT;
|
||||
use config::INFISICAL_PROJECT_ID;
|
||||
use config::INFISICAL_URL;
|
||||
use config::OPENBAO_KV_MOUNT;
|
||||
use config::OPENBAO_PASSWORD;
|
||||
use config::OPENBAO_SKIP_TLS;
|
||||
use config::OPENBAO_TOKEN;
|
||||
use config::OPENBAO_URL;
|
||||
use config::OPENBAO_USERNAME;
|
||||
use config::SECRET_STORE;
|
||||
use interactive_parse::InteractiveParseObj;
|
||||
use log::debug;
|
||||
@@ -17,6 +23,7 @@ use serde::{Serialize, de::DeserializeOwned};
|
||||
use std::fmt;
|
||||
use store::InfisicalSecretStore;
|
||||
use store::LocalFileSecretStore;
|
||||
use store::OpenbaoSecretStore;
|
||||
use thiserror::Error;
|
||||
use tokio::sync::OnceCell;
|
||||
|
||||
@@ -69,11 +76,24 @@ async fn get_secret_manager() -> &'static SecretManager {
|
||||
|
||||
/// The async initialization function for the SecretManager.
|
||||
async fn init_secret_manager() -> SecretManager {
|
||||
let default_secret_score = "infisical".to_string();
|
||||
let store_type = SECRET_STORE.as_ref().unwrap_or(&default_secret_score);
|
||||
let default_secret_store = "infisical".to_string();
|
||||
let store_type = SECRET_STORE.as_ref().unwrap_or(&default_secret_store);
|
||||
|
||||
let store: Box<dyn SecretStore> = match store_type.as_str() {
|
||||
"file" => Box::new(LocalFileSecretStore::default()),
|
||||
"openbao" | "vault" => {
|
||||
let store = OpenbaoSecretStore::new(
|
||||
OPENBAO_URL.clone().expect("Openbao/Vault URL must be set, see harmony_secret config for ways to provide it. You can try with OPENBAO_URL or VAULT_ADDR"),
|
||||
OPENBAO_KV_MOUNT.clone(),
|
||||
*OPENBAO_SKIP_TLS,
|
||||
OPENBAO_TOKEN.clone(),
|
||||
OPENBAO_USERNAME.clone(),
|
||||
OPENBAO_PASSWORD.clone(),
|
||||
)
|
||||
.await
|
||||
.expect("Failed to initialize Openbao/Vault secret store");
|
||||
Box::new(store)
|
||||
}
|
||||
"infisical" | _ => {
|
||||
let store = InfisicalSecretStore::new(
|
||||
INFISICAL_URL.clone().expect("Infisical url must be set, see harmony_secret config for ways to provide it. You can try with HARMONY_SECRET_INFISICAL_URL"),
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
mod infisical;
|
||||
mod local_file;
|
||||
mod openbao;
|
||||
|
||||
pub use infisical::InfisicalSecretStore;
|
||||
pub use infisical::*;
|
||||
pub use local_file::LocalFileSecretStore;
|
||||
pub use local_file::*;
|
||||
pub use openbao::OpenbaoSecretStore;
|
||||
|
||||
317
harmony_secret/src/store/openbao.rs
Normal file
317
harmony_secret/src/store/openbao.rs
Normal file
@@ -0,0 +1,317 @@
|
||||
use crate::{SecretStore, SecretStoreError};
|
||||
use async_trait::async_trait;
|
||||
use log::{debug, info, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Debug;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use vaultrs::auth;
|
||||
use vaultrs::client::{Client, VaultClient, VaultClientSettingsBuilder};
|
||||
use vaultrs::kv2;
|
||||
|
||||
/// Token response from Vault/Openbao auth endpoints
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TokenResponse {
|
||||
auth: AuthInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct AuthInfo {
|
||||
client_token: String,
|
||||
#[serde(default)]
|
||||
lease_duration: Option<u64>,
|
||||
token_type: String,
|
||||
}
|
||||
|
||||
impl From<vaultrs::api::AuthInfo> for AuthInfo {
|
||||
fn from(value: vaultrs::api::AuthInfo) -> Self {
|
||||
AuthInfo {
|
||||
client_token: value.client_token,
|
||||
token_type: value.token_type,
|
||||
lease_duration: Some(value.lease_duration),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OpenbaoSecretStore {
|
||||
client: VaultClient,
|
||||
kv_mount: String,
|
||||
}
|
||||
|
||||
impl Debug for OpenbaoSecretStore {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("OpenbaoSecretStore")
|
||||
.field("client", &self.client.settings)
|
||||
.field("kv_mount", &self.kv_mount)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenbaoSecretStore {
|
||||
/// Creates a new Openbao/Vault secret store with authentication
|
||||
pub async fn new(
|
||||
base_url: String,
|
||||
kv_mount: String,
|
||||
skip_tls: bool,
|
||||
token: Option<String>,
|
||||
username: Option<String>,
|
||||
password: Option<String>,
|
||||
) -> Result<Self, SecretStoreError> {
|
||||
info!("OPENBAO_STORE: Initializing client for URL: {base_url}");
|
||||
|
||||
// 1. If token is provided via env var, use it directly
|
||||
if let Some(t) = token {
|
||||
debug!("OPENBAO_STORE: Using token from environment variable");
|
||||
return Self::with_token(&base_url, skip_tls, &t, &kv_mount);
|
||||
}
|
||||
|
||||
// 2. Try to load cached token
|
||||
let cache_path = Self::get_token_cache_path(&base_url);
|
||||
if let Ok(cached_token) = Self::load_cached_token(&cache_path) {
|
||||
debug!("OPENBAO_STORE: Found cached token, validating...");
|
||||
if Self::validate_token(&base_url, skip_tls, &cached_token.client_token).await {
|
||||
info!("OPENBAO_STORE: Cached token is valid");
|
||||
return Self::with_token(
|
||||
&base_url,
|
||||
skip_tls,
|
||||
&cached_token.client_token,
|
||||
&kv_mount,
|
||||
);
|
||||
}
|
||||
warn!("OPENBAO_STORE: Cached token is invalid or expired");
|
||||
}
|
||||
|
||||
// 3. Authenticate with username/password
|
||||
let (user, pass) = match (username, password) {
|
||||
(Some(u), Some(p)) => (u, p),
|
||||
_ => {
|
||||
return Err(SecretStoreError::Store(
|
||||
"No valid token found and username/password not provided. \
|
||||
Set OPENBAO_TOKEN or OPENBAO_USERNAME/OPENBAO_PASSWORD environment variables."
|
||||
.into(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let token =
|
||||
Self::authenticate_userpass(&base_url, &kv_mount, skip_tls, &user, &pass).await?;
|
||||
|
||||
// Cache the token
|
||||
if let Err(e) = Self::cache_token(&cache_path, &token) {
|
||||
warn!("OPENBAO_STORE: Failed to cache token: {e}");
|
||||
}
|
||||
|
||||
Self::with_token(&base_url, skip_tls, &token.client_token, &kv_mount)
|
||||
}
|
||||
|
||||
/// Create a client with an existing token
|
||||
fn with_token(
|
||||
base_url: &str,
|
||||
skip_tls: bool,
|
||||
token: &str,
|
||||
kv_mount: &str,
|
||||
) -> Result<Self, SecretStoreError> {
|
||||
let mut settings = VaultClientSettingsBuilder::default();
|
||||
settings.address(base_url).token(token);
|
||||
|
||||
if skip_tls {
|
||||
warn!("OPENBAO_STORE: Skipping TLS verification - not recommended for production!");
|
||||
settings.verify(false);
|
||||
}
|
||||
|
||||
let client = VaultClient::new(
|
||||
settings
|
||||
.build()
|
||||
.map_err(|e| SecretStoreError::Store(Box::new(e)))?,
|
||||
)
|
||||
.map_err(|e| SecretStoreError::Store(Box::new(e)))?;
|
||||
|
||||
Ok(Self {
|
||||
client,
|
||||
kv_mount: kv_mount.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the cache file path for a given base URL
|
||||
fn get_token_cache_path(base_url: &str) -> PathBuf {
|
||||
let hash = Self::hash_url(base_url);
|
||||
directories::BaseDirs::new()
|
||||
.map(|dirs| {
|
||||
dirs.data_dir()
|
||||
.join("harmony")
|
||||
.join("secrets")
|
||||
.join(format!("openbao_token_{hash}"))
|
||||
})
|
||||
.unwrap_or_else(|| PathBuf::from(format!("/tmp/openbao_token_{hash}")))
|
||||
}
|
||||
|
||||
/// Create a simple hash of the URL for unique cache files
|
||||
fn hash_url(url: &str) -> String {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = DefaultHasher::new();
|
||||
url.hash(&mut hasher);
|
||||
format!("{:016x}", hasher.finish())
|
||||
}
|
||||
|
||||
/// Load cached token from file
|
||||
fn load_cached_token(path: &PathBuf) -> Result<AuthInfo, String> {
|
||||
serde_json::from_str(
|
||||
&fs::read_to_string(path)
|
||||
.map_err(|e| format!("Could not load token from file {path:?} : {e}"))?,
|
||||
)
|
||||
.map_err(|e| format!("Could not deserialize token from file {path:?} : {e}"))
|
||||
}
|
||||
|
||||
/// Cache token to file
|
||||
fn cache_token(path: &PathBuf, token: &AuthInfo) -> Result<(), std::io::Error> {
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
// Set file permissions to 0600 (owner read/write only)
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::OpenOptionsExt;
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.mode(0o600)
|
||||
.open(path)?;
|
||||
use std::io::Write;
|
||||
file.write_all(serde_json::to_string(token)?.as_bytes())?;
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
fs::write(path, token)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate if a token is still valid using vaultrs
|
||||
async fn validate_token(base_url: &str, skip_tls: bool, token: &str) -> bool {
|
||||
let mut settings = VaultClientSettingsBuilder::default();
|
||||
settings.address(base_url).token(token);
|
||||
if skip_tls {
|
||||
settings.verify(false);
|
||||
}
|
||||
|
||||
if let Some(settings) = settings.build().ok() {
|
||||
let client = match VaultClient::new(settings) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return false,
|
||||
};
|
||||
return vaultrs::token::lookup(&client, token).await.is_ok();
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Authenticate using username/password (userpass auth method)
|
||||
async fn authenticate_userpass(
|
||||
base_url: &str,
|
||||
kv_mount: &str,
|
||||
skip_tls: bool,
|
||||
username: &str,
|
||||
password: &str,
|
||||
) -> Result<AuthInfo, SecretStoreError> {
|
||||
info!("OPENBAO_STORE: Authenticating with username/password");
|
||||
|
||||
// Create a client without a token for authentication
|
||||
let mut settings = VaultClientSettingsBuilder::default();
|
||||
settings.address(base_url);
|
||||
if skip_tls {
|
||||
settings.verify(false);
|
||||
}
|
||||
|
||||
let client = VaultClient::new(
|
||||
settings
|
||||
.build()
|
||||
.map_err(|e| SecretStoreError::Store(Box::new(e)))?,
|
||||
)
|
||||
.map_err(|e| SecretStoreError::Store(Box::new(e)))?;
|
||||
|
||||
// Authenticate using userpass method
|
||||
let token = auth::userpass::login(&client, kv_mount, username, password)
|
||||
.await
|
||||
.map_err(|e| SecretStoreError::Store(Box::new(e)))?;
|
||||
|
||||
Ok(token.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SecretStore for OpenbaoSecretStore {
|
||||
async fn get_raw(&self, namespace: &str, key: &str) -> Result<Vec<u8>, SecretStoreError> {
|
||||
let path = format!("{}/{}", namespace, key);
|
||||
info!("OPENBAO_STORE: Getting key '{key}' from namespace '{namespace}'");
|
||||
debug!("OPENBAO_STORE: Request path: {path}");
|
||||
|
||||
let data: serde_json::Value = kv2::read(&self.client, &self.kv_mount, &path)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
// Check for not found error
|
||||
if e.to_string().contains("does not exist") || e.to_string().contains("404") {
|
||||
SecretStoreError::NotFound {
|
||||
namespace: namespace.to_string(),
|
||||
key: key.to_string(),
|
||||
}
|
||||
} else {
|
||||
SecretStoreError::Store(Box::new(e))
|
||||
}
|
||||
})?;
|
||||
|
||||
// Extract the actual secret value stored under the "value" key
|
||||
let value = data.get("value").and_then(|v| v.as_str()).ok_or_else(|| {
|
||||
SecretStoreError::Store("Secret does not contain expected 'value' field".into())
|
||||
})?;
|
||||
|
||||
Ok(value.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
async fn set_raw(
|
||||
&self,
|
||||
namespace: &str,
|
||||
key: &str,
|
||||
val: &[u8],
|
||||
) -> Result<(), SecretStoreError> {
|
||||
let path = format!("{}/{}", namespace, key);
|
||||
info!("OPENBAO_STORE: Setting key '{key}' in namespace '{namespace}'");
|
||||
debug!("OPENBAO_STORE: Request path: {path}");
|
||||
|
||||
let value_str =
|
||||
String::from_utf8(val.to_vec()).map_err(|e| SecretStoreError::Store(Box::new(e)))?;
|
||||
|
||||
// Create the data structure expected by our format
|
||||
let data = serde_json::json!({
|
||||
"value": value_str
|
||||
});
|
||||
|
||||
kv2::set(&self.client, &self.kv_mount, &path, &data)
|
||||
.await
|
||||
.map_err(|e| SecretStoreError::Store(Box::new(e)))?;
|
||||
|
||||
info!("OPENBAO_STORE: Successfully stored secret '{key}'");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_hash_url_consistency() {
|
||||
let url = "https://vault.example.com:8200";
|
||||
let hash1 = OpenbaoSecretStore::hash_url(url);
|
||||
let hash2 = OpenbaoSecretStore::hash_url(url);
|
||||
assert_eq!(hash1, hash2);
|
||||
assert_eq!(hash1.len(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hash_url_uniqueness() {
|
||||
let hash1 = OpenbaoSecretStore::hash_url("https://vault1.example.com");
|
||||
let hash2 = OpenbaoSecretStore::hash_url("https://vault2.example.com");
|
||||
assert_ne!(hash1, hash2);
|
||||
}
|
||||
}
|
||||
@@ -1408,6 +1408,7 @@ pub struct Account {
|
||||
pub hostnames: String,
|
||||
pub wildcard: i32,
|
||||
pub zone: MaybeString,
|
||||
pub dynipv6host: Option<MaybeString>,
|
||||
pub checkip: String,
|
||||
#[yaserde(rename = "checkip_timeout")]
|
||||
pub checkip_timeout: i32,
|
||||
|
||||
@@ -68,7 +68,7 @@ impl<'a> DhcpConfigDnsMasq<'a> {
|
||||
///
|
||||
/// This function implements specific logic to handle existing entries:
|
||||
/// - If no host exists for the given IP or hostname, a new entry is created.
|
||||
/// - If exactly one host exists for the IP and/or hostname, the new MAC is appended to it.
|
||||
/// - If exactly one host exists for the IP and/or hostname, the new MAC is set. Old MAC addresses are dropped.
|
||||
/// - It will error if the IP and hostname exist but point to two different host entries,
|
||||
/// as this represents an unresolvable conflict.
|
||||
/// - It will also error if multiple entries are found for the IP or hostname, indicating an
|
||||
@@ -146,40 +146,24 @@ impl<'a> DhcpConfigDnsMasq<'a> {
|
||||
let host_to_modify_ip = host_to_modify.ip.content_string();
|
||||
if host_to_modify_ip != ip_str {
|
||||
warn!(
|
||||
"Hostname '{}' already exists with a different IP ({}). Setting new IP {ip_str}. Appending MAC {}.",
|
||||
hostname, host_to_modify_ip, mac_list
|
||||
"Hostname '{}' already exists with a different IP ({}). Setting new IP {ip_str}.",
|
||||
hostname, host_to_modify_ip,
|
||||
);
|
||||
host_to_modify.ip.content = Some(ip_str);
|
||||
} else if host_to_modify.host != hostname {
|
||||
warn!(
|
||||
"IP {} already exists with a different hostname ('{}'). Setting hostname to {hostname}. Appending MAC {}.",
|
||||
ipaddr, host_to_modify.host, mac_list
|
||||
"IP {} already exists with a different hostname ('{}'). Setting hostname to {hostname}",
|
||||
ipaddr, host_to_modify.host
|
||||
);
|
||||
host_to_modify.host = hostname.to_string();
|
||||
}
|
||||
|
||||
for single_mac in mac.iter() {
|
||||
if !host_to_modify
|
||||
.hwaddr
|
||||
.content_string()
|
||||
.split(',')
|
||||
.any(|m| m.eq_ignore_ascii_case(single_mac))
|
||||
{
|
||||
info!(
|
||||
"Appending MAC {} to existing static host for {} ({})",
|
||||
single_mac, host_to_modify.host, host_to_modify_ip
|
||||
);
|
||||
let mut updated_macs = host_to_modify.hwaddr.content_string().to_string();
|
||||
updated_macs.push(',');
|
||||
updated_macs.push_str(single_mac);
|
||||
host_to_modify.hwaddr.content = updated_macs.into();
|
||||
} else {
|
||||
debug!(
|
||||
"MAC {} already present in static host entry for {} ({}). No changes made.",
|
||||
single_mac, host_to_modify.host, host_to_modify_ip
|
||||
);
|
||||
}
|
||||
}
|
||||
info!(
|
||||
"Replacing previous mac adresses {:?} with new {}",
|
||||
host_to_modify.hwaddr, mac_list
|
||||
);
|
||||
|
||||
host_to_modify.hwaddr.content = Some(mac_list);
|
||||
}
|
||||
_ => {
|
||||
return Err(DhcpError::Configuration(format!(
|
||||
@@ -397,7 +381,7 @@ mod test {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_mac_to_existing_host_by_ip_and_hostname() {
|
||||
fn test_replace_mac_on_existing_host_by_ip_and_hostname() {
|
||||
let initial_host = create_host(
|
||||
"uuid-1",
|
||||
"existing-host",
|
||||
@@ -416,14 +400,11 @@ mod test {
|
||||
let hosts = &dhcp_config.opnsense.dnsmasq.as_ref().unwrap().hosts;
|
||||
assert_eq!(hosts.len(), 1);
|
||||
let host = &hosts[0];
|
||||
assert_eq!(
|
||||
host.hwaddr.content_string(),
|
||||
"AA:BB:CC:DD:EE:FF,00:11:22:33:44:55"
|
||||
);
|
||||
assert_eq!(host.hwaddr.content_string(), "00:11:22:33:44:55");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_mac_to_existing_host_by_ip_only() {
|
||||
fn test_replace_mac_on_existing_host_by_ip_only() {
|
||||
let initial_host = create_host(
|
||||
"uuid-1",
|
||||
"existing-host",
|
||||
@@ -443,10 +424,7 @@ mod test {
|
||||
let hosts = &dhcp_config.opnsense.dnsmasq.as_ref().unwrap().hosts;
|
||||
assert_eq!(hosts.len(), 1);
|
||||
let host = &hosts[0];
|
||||
assert_eq!(
|
||||
host.hwaddr.content_string(),
|
||||
"AA:BB:CC:DD:EE:FF,00:11:22:33:44:55"
|
||||
);
|
||||
assert_eq!(host.hwaddr.content_string(), "00:11:22:33:44:55");
|
||||
assert_eq!(host.host, new_hostname); // hostname should be updated
|
||||
}
|
||||
|
||||
@@ -474,10 +452,7 @@ mod test {
|
||||
let hosts = &dhcp_config.opnsense.dnsmasq.as_ref().unwrap().hosts;
|
||||
assert_eq!(hosts.len(), 1);
|
||||
let host = &hosts[0];
|
||||
assert_eq!(
|
||||
host.hwaddr.content_string(),
|
||||
"AA:BB:CC:DD:EE:FF,00:11:22:33:44:55"
|
||||
);
|
||||
assert_eq!(host.hwaddr.content_string(), "00:11:22:33:44:55");
|
||||
assert_eq!(host.ip.content_string(), "192.168.1.99"); // Original IP should be preserved.
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user