Compare commits
31 Commits
adr-nats-c
...
feat/impro
| Author | SHA1 | Date | |
|---|---|---|---|
| 063a4d4f5c | |||
| ff7d2fb89e | |||
| 9bb38b930a | |||
| c677487a5e | |||
| c1d46612ac | |||
| 4fba01338d | |||
| 913ed17453 | |||
| 9e185cbbd5 | |||
| 752526f831 | |||
| f9bd6ad260 | |||
| 111181c300 | |||
| 3257cd9569 | |||
| 4b1915c594 | |||
| cf3050ce87 | |||
| c3e27c60be | |||
| 2d26790c82 | |||
| 2e89308b82 | |||
| d8936a8307 | |||
| e2fa12508f | |||
| bea2a75882 | |||
| a1528665d0 | |||
| 613225a00b | |||
| dd1c088f0d | |||
| b4ef009804 | |||
| 191e92048b | |||
| f4a70d8978 | |||
| 2ddc9c0579 | |||
| fececc2efd | |||
| 8afcacbd24 | |||
| b885c35706 | |||
|
|
bb6b4b7f88 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -26,3 +26,6 @@ Cargo.lock
|
||||
*.pdb
|
||||
|
||||
.harmony_generated
|
||||
|
||||
# Useful to create ignore folders for temp files and notes
|
||||
ignore
|
||||
|
||||
54
Cargo.lock
generated
54
Cargo.lock
generated
@@ -1828,6 +1828,40 @@ dependencies = [
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-k8s-drain-node"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert_cmd",
|
||||
"cidr",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_types",
|
||||
"inquire 0.7.5",
|
||||
"log",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-k8s-write-file-on-node"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert_cmd",
|
||||
"cidr",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_types",
|
||||
"inquire 0.7.5",
|
||||
"log",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-kube-rs"
|
||||
version = "0.1.0"
|
||||
@@ -3638,26 +3672,6 @@ dependencies = [
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json-prompt"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"brocade",
|
||||
"cidr",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_secret",
|
||||
"harmony_secret_derive",
|
||||
"harmony_types",
|
||||
"log",
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath-rust"
|
||||
version = "0.7.5"
|
||||
|
||||
@@ -52,6 +52,7 @@ kube = { version = "1.1.0", features = [
|
||||
"jsonpatch",
|
||||
] }
|
||||
k8s-openapi = { version = "0.25", features = ["v1_30"] }
|
||||
# TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml
|
||||
serde_yaml = "0.9"
|
||||
serde-value = "0.7"
|
||||
http = "1.2"
|
||||
|
||||
87
README.md
87
README.md
@@ -1,4 +1,6 @@
|
||||
# Harmony : Open-source infrastructure orchestration that treats your platform like first-class code
|
||||
# Harmony
|
||||
|
||||
Open-source infrastructure orchestration that treats your platform like first-class code.
|
||||
|
||||
In other words, Harmony is a **next-generation platform engineering framework**.
|
||||
|
||||
@@ -20,9 +22,7 @@ All in **one strongly-typed Rust codebase**.
|
||||
|
||||
From a **developer laptop** to a **global production cluster**, a single **source of truth** drives the **full software lifecycle.**
|
||||
|
||||
---
|
||||
|
||||
## 1 · The Harmony Philosophy
|
||||
## The Harmony Philosophy
|
||||
|
||||
Infrastructure is essential, but it shouldn’t be your core business. Harmony is built on three guiding principles that make modern platforms reliable, repeatable, and easy to reason about.
|
||||
|
||||
@@ -34,9 +34,18 @@ Infrastructure is essential, but it shouldn’t be your core business. Harmony i
|
||||
|
||||
These principles surface as simple, ergonomic Rust APIs that let teams focus on their product while trusting the platform underneath.
|
||||
|
||||
---
|
||||
## Where to Start
|
||||
|
||||
## 2 · Quick Start
|
||||
We have a comprehensive set of documentation right here in the repository.
|
||||
|
||||
| I want to... | Start Here |
|
||||
| ----------------- | ------------------------------------------------------------------ |
|
||||
| Get Started | [Getting Started Guide](./docs/guides/getting-started.md) |
|
||||
| See an Example | [Use Case: Deploy a Rust Web App](./docs/use-cases/rust-webapp.md) |
|
||||
| Explore | [Documentation Hub](./docs/README.md) |
|
||||
| See Core Concepts | [Core Concepts Explained](./docs/concepts.md) |
|
||||
|
||||
## Quick Look: Deploy a Rust Webapp
|
||||
|
||||
The snippet below spins up a complete **production-grade Rust + Leptos Webapp** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
|
||||
|
||||
@@ -94,63 +103,33 @@ async fn main() {
|
||||
}
|
||||
```
|
||||
|
||||
Run it:
|
||||
To run this:
|
||||
|
||||
```bash
|
||||
cargo run
|
||||
```
|
||||
- Clone the repository: `git clone https://git.nationtech.io/nationtech/harmony`
|
||||
- Install dependencies: `cargo build --release`
|
||||
- Run the example: `cargo run --example try_rust_webapp`
|
||||
|
||||
Harmony analyses the code, shows an execution plan in a TUI, and applies it once you confirm. Same code, same binary—every environment.
|
||||
## Documentation
|
||||
|
||||
---
|
||||
All documentation is in the `/docs` directory.
|
||||
|
||||
## 3 · Core Concepts
|
||||
- [Documentation Hub](./docs/README.md): The main entry point for all documentation.
|
||||
- [Core Concepts](./docs/concepts.md): A detailed look at Score, Topology, Capability, Inventory, and Interpret.
|
||||
- [Component Catalogs](./docs/catalogs/README.md): Discover all available Scores, Topologies, and Capabilities.
|
||||
- [Developer Guide](./docs/guides/developer-guide.md): Learn how to write your own Scores and Topologies.
|
||||
|
||||
| Term | One-liner |
|
||||
| ---------------- | ---------------------------------------------------------------------------------------------------- |
|
||||
| **Score<T>** | Declarative description of the desired state (e.g., `LAMPScore`). |
|
||||
| **Interpret<T>** | Imperative logic that realises a `Score` on a specific environment. |
|
||||
| **Topology** | An environment (local k3d, AWS, bare-metal) exposing verified _Capabilities_ (Kubernetes, DNS, …). |
|
||||
| **Maestro** | Orchestrator that compiles Scores + Topology, ensuring all capabilities line up **at compile-time**. |
|
||||
| **Inventory** | Optional catalogue of physical assets for bare-metal and edge deployments. |
|
||||
## Architectural Decision Records
|
||||
|
||||
A visual overview is in the diagram below.
|
||||
- [ADR-001 · Why Rust](adr/001-rust.md)
|
||||
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
|
||||
- [ADR-006 · Secret Management](adr/006-secret-management.md)
|
||||
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
|
||||
|
||||
[Harmony Core Architecture](docs/diagrams/Harmony_Core_Architecture.drawio.svg)
|
||||
## Contribute
|
||||
|
||||
---
|
||||
Discussions and roadmap live in [Issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
|
||||
|
||||
## 4 · Install
|
||||
|
||||
Prerequisites:
|
||||
|
||||
- Rust
|
||||
- Docker (if you deploy locally)
|
||||
- `kubectl` / `helm` for Kubernetes-based topologies
|
||||
|
||||
```bash
|
||||
git clone https://git.nationtech.io/nationtech/harmony
|
||||
cd harmony
|
||||
cargo build --release # builds the CLI, TUI and libraries
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5 · Learning More
|
||||
|
||||
- **Architectural Decision Records** – dive into the rationale
|
||||
- [ADR-001 · Why Rust](adr/001-rust.md)
|
||||
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
|
||||
- [ADR-006 · Secret Management](adr/006-secret-management.md)
|
||||
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
|
||||
|
||||
- **Extending Harmony** – write new Scores / Interprets, add hardware like OPNsense firewalls, or embed Harmony in your own tooling (`/docs`).
|
||||
|
||||
- **Community** – discussions and roadmap live in [GitLab issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
|
||||
|
||||
---
|
||||
|
||||
## 6 · License
|
||||
## License
|
||||
|
||||
Harmony is released under the **GNU AGPL v3**.
|
||||
|
||||
|
||||
65
adr/019-Network-bond-setup.md
Normal file
65
adr/019-Network-bond-setup.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# Architecture Decision Record: Network Bonding Configuration via External Automation
|
||||
|
||||
Initial Author: Jean-Gabriel Gill-Couture & Sylvain Tremblay
|
||||
|
||||
Initial Date: 2026-02-13
|
||||
|
||||
Last Updated Date: 2026-02-13
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
We need to configure LACP bonds on 10GbE interfaces across all worker nodes in the OpenShift cluster. A significant challenge is that interface names (e.g., `enp1s0f0` vs `ens1f0`) vary across different hardware nodes.
|
||||
|
||||
The standard OpenShift mechanism (MachineConfig) applies identical configurations to all nodes in a MachineConfigPool. Since the interface names differ, a single static MachineConfig cannot target specific physical devices across the entire cluster without complex workarounds.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use the existing "Harmony" automation tool to generate and apply host-specific NetworkManager configuration files directly to the nodes.
|
||||
|
||||
1. Harmony will generate the specific `.nmconnection` files for the bond and slaves based on its inventory of interface names.
|
||||
2. Files will be pushed to `/etc/NetworkManager/system-connections/` on each node.
|
||||
3. Configuration will be applied via `nmcli` reload or a node reboot.
|
||||
|
||||
## Rationale
|
||||
|
||||
* **Inventory Awareness:** Harmony already possesses the specific interface mapping data for each host.
|
||||
* **Persistence:** Fedora CoreOS/SCOS allows writing to `/etc`, and these files persist across reboots and OS upgrades (rpm-ostree updates).
|
||||
* **Avoids Complexity:** This approach avoids the operational overhead of creating unique MachineConfigPools for every single host or hardware variant.
|
||||
* **Safety:** Unlike wildcard matching, this ensures explicit interface selection, preventing accidental bonding of reserved interfaces (e.g., future separation of Ceph storage traffic).
|
||||
|
||||
## Consequences
|
||||
|
||||
**Pros:**
|
||||
* Precise, per-host configuration without polluting the Kubernetes API with hundreds of MachineConfigs.
|
||||
* Standard Linux networking behavior; easy to debug locally.
|
||||
* Prevents accidental interface capture (unlike wildcards).
|
||||
|
||||
**Cons:**
|
||||
* **Loss of Declarative K8s State:** The network config is not managed by the Machine Config Operator (MCO).
|
||||
* **Node Replacement Friction:** Newly provisioned nodes (replacements) will boot with default config. Harmony must be run against new nodes manually or via a hook before they can fully join the cluster workload.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
1. **Wildcard Matching in NetworkManager (e.g., `interface-name=enp*`):**
|
||||
* *Pros:* Single MachineConfig for the whole cluster.
|
||||
* *Cons:* Rejected because it is too broad. It risks capturing interfaces intended for other purposes (e.g., splitting storage and cluster networks later).
|
||||
|
||||
2. **"Kitchen Sink" Configuration:**
|
||||
* *Pros:* Single file listing every possible interface name as a slave.
|
||||
* *Cons:* "Dirty" configuration; results in many inactive connections on every host; brittle if new naming schemes appear.
|
||||
|
||||
3. **Per-Host MachineConfig:**
|
||||
* *Pros:* Fully declarative within OpenShift.
|
||||
* *Cons:* Requires a unique `MachineConfigPool` per host, which is an anti-pattern and unmaintainable at scale.
|
||||
|
||||
4. **On-boot Generation Script:**
|
||||
* *Pros:* Dynamic detection.
|
||||
* *Cons:* Increases boot complexity; harder to debug if the script fails during startup.
|
||||
|
||||
## Additional Notes
|
||||
|
||||
While `/etc` is writable and persistent on CoreOS, this configuration falls outside the "Day 1" Ignition process. Operational runbooks must be updated to ensure Harmony runs on any node replacement events.
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::net::{IpAddr, Ipv4Addr};
|
||||
|
||||
use brocade::{BrocadeOptions, ssh};
|
||||
use harmony_secret::Secret;
|
||||
use harmony_secret::{Secret, SecretManager};
|
||||
use harmony_types::switch::PortLocation;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -21,17 +21,15 @@ async fn main() {
|
||||
// let ip = IpAddr::V4(Ipv4Addr::new(192, 168, 4, 11)); // brocade @ st
|
||||
let switch_addresses = vec![ip];
|
||||
|
||||
// let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
|
||||
// .await
|
||||
// .unwrap();
|
||||
let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let brocade = brocade::init(
|
||||
&switch_addresses,
|
||||
// &config.username,
|
||||
// &config.password,
|
||||
"admin",
|
||||
"password",
|
||||
BrocadeOptions {
|
||||
&config.username,
|
||||
&config.password,
|
||||
&BrocadeOptions {
|
||||
dry_run: true,
|
||||
ssh: ssh::SshOptions {
|
||||
port: 2222,
|
||||
|
||||
@@ -144,7 +144,7 @@ pub async fn init(
|
||||
ip_addresses: &[IpAddr],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
options: &BrocadeOptions,
|
||||
) -> Result<Box<dyn BrocadeClient + Send + Sync>, Error> {
|
||||
let shell = BrocadeShell::init(ip_addresses, username, password, options).await?;
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ impl BrocadeShell {
|
||||
ip_addresses: &[IpAddr],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
options: &BrocadeOptions,
|
||||
) -> Result<Self, Error> {
|
||||
let ip = ip_addresses
|
||||
.first()
|
||||
|
||||
@@ -70,7 +70,7 @@ pub async fn try_init_client(
|
||||
username: &str,
|
||||
password: &str,
|
||||
ip: &std::net::IpAddr,
|
||||
base_options: BrocadeOptions,
|
||||
base_options: &BrocadeOptions,
|
||||
) -> Result<BrocadeOptions, Error> {
|
||||
let mut default = SshOptions::default();
|
||||
default.port = base_options.ssh.port;
|
||||
|
||||
@@ -1 +1,33 @@
|
||||
Not much here yet, see the `adr` folder for now. More to come in time!
|
||||
# Harmony Documentation Hub
|
||||
|
||||
Welcome to the Harmony documentation. This is the main entry point for learning everything from core concepts to building your own Score, Topologies, and Capabilities.
|
||||
|
||||
## 1. Getting Started
|
||||
|
||||
If you're new to Harmony, start here:
|
||||
|
||||
- [**Getting Started Guide**](./guides/getting-started.md): A step-by-step tutorial that takes you from an empty project to deploying your first application.
|
||||
- [**Core Concepts**](./concepts.md): A high-level overview of the key concepts in Harmony: `Score`, `Topology`, `Capability`, `Inventory`, `Interpret`, ...
|
||||
|
||||
## 2. Use Cases & Examples
|
||||
|
||||
See how to use Harmony to solve real-world problems.
|
||||
|
||||
- [**OKD on Bare Metal**](./use-cases/okd-on-bare-metal.md): A detailed walkthrough of bootstrapping a high-availability OKD cluster from physical hardware.
|
||||
- [**Deploy a Rust Web App**](./use-cases/deploy-rust-webapp.md): A quick guide to deploying a monitored, containerized web application to a Kubernetes cluster.
|
||||
|
||||
## 3. Component Catalogs
|
||||
|
||||
Discover existing, reusable components you can use in your Harmony projects.
|
||||
|
||||
- [**Scores Catalog**](./catalogs/scores.md): A categorized list of all available `Scores` (the "what").
|
||||
- [**Topologies Catalog**](./catalogs/topologies.md): A list of all available `Topologies` (the "where").
|
||||
- [**Capabilities Catalog**](./catalogs/capabilities.md): A list of all available `Capabilities` (the "how").
|
||||
|
||||
## 4. Developer Guides
|
||||
|
||||
Ready to build your own components? These guides show you how.
|
||||
|
||||
- [**Writing a Score**](./guides/writing-a-score.md): Learn how to create your own `Score` and `Interpret` logic to define a new desired state.
|
||||
- [**Writing a Topology**](./guides/writing-a-topology.md): Learn how to model a new environment (like AWS, GCP, or custom hardware) as a `Topology`.
|
||||
- [**Adding Capabilities**](./guides/adding-capabilities.md): See how to add a `Capability` to your custom `Topology`.
|
||||
|
||||
7
docs/catalogs/README.md
Normal file
7
docs/catalogs/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Component Catalogs
|
||||
|
||||
This section is the "dictionary" for Harmony. It lists all the reusable components available out-of-the-box.
|
||||
|
||||
- [**Scores Catalog**](./scores.md): Discover all available `Scores` (the "what").
|
||||
- [**Topologies Catalog**](./topologies.md): A list of all available `Topologies` (the "where").
|
||||
- [**Capabilities Catalog**](./capabilities.md): A list of all available `Capabilities` (the "how").
|
||||
40
docs/catalogs/capabilities.md
Normal file
40
docs/catalogs/capabilities.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Capabilities Catalog
|
||||
|
||||
A `Capability` is a specific feature or API that a `Topology` offers. `Interpret` logic uses these capabilities to execute a `Score`.
|
||||
|
||||
This list is primarily for developers **writing new Topologies or Scores**. As a user, you just need to know that the `Topology` you pick (like `K8sAnywhereTopology`) provides the capabilities your `Scores` (like `ApplicationScore`) need.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Capabilities Catalog](#capabilities-catalog)
|
||||
- [Kubernetes & Application](#kubernetes-application)
|
||||
- [Monitoring & Observability](#monitoring-observability)
|
||||
- [Networking (Core Services)](#networking-core-services)
|
||||
- [Networking (Hardware & Host)](#networking-hardware-host)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
## Kubernetes & Application
|
||||
|
||||
- **K8sClient**: Provides an authenticated client to interact with a Kubernetes API (create/read/update/delete resources).
|
||||
- **HelmCommand**: Provides the ability to execute Helm commands (install, upgrade, template).
|
||||
- **TenantManager**: Provides methods for managing tenants in a multi-tenant cluster.
|
||||
- **Ingress**: Provides an interface for managing ingress controllers and resources.
|
||||
|
||||
## Monitoring & Observability
|
||||
|
||||
- **Grafana**: Provides an API for configuring Grafana (datasources, dashboards).
|
||||
- **Monitoring**: A general capability for configuring monitoring (e.g., creating Prometheus rules).
|
||||
|
||||
## Networking (Core Services)
|
||||
|
||||
- **DnsServer**: Provides an interface for creating and managing DNS records.
|
||||
- **LoadBalancer**: Provides an interface for configuring a load balancer (e.g., OPNsense, MetalLB).
|
||||
- **DhcpServer**: Provides an interface for managing DHCP leases and host bindings.
|
||||
- **TftpServer**: Provides an interface for managing files on a TFTP server (e.g., iPXE boot files).
|
||||
|
||||
## Networking (Hardware & Host)
|
||||
|
||||
- **Router**: Provides an interface for configuring routing rules, typically on a firewall like OPNsense.
|
||||
- **Switch**: Provides an interface for configuring a physical network switch (e.g., managing VLANs and port channels).
|
||||
- **NetworkManager**: Provides an interface for configuring host-level networking (e.g., creating bonds and bridges on a node).
|
||||
102
docs/catalogs/scores.md
Normal file
102
docs/catalogs/scores.md
Normal file
@@ -0,0 +1,102 @@
|
||||
# Scores Catalog
|
||||
|
||||
A `Score` is a declarative description of a desired state. Find the Score you need and add it to your `harmony!` block's `scores` array.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Scores Catalog](#scores-catalog)
|
||||
- [Application Deployment](#application-deployment)
|
||||
- [OKD / Kubernetes Cluster Setup](#okd-kubernetes-cluster-setup)
|
||||
- [Cluster Services & Management](#cluster-services-management)
|
||||
- [Monitoring & Alerting](#monitoring-alerting)
|
||||
- [Infrastructure & Networking (Bare Metal)](#infrastructure-networking-bare-metal)
|
||||
- [Infrastructure & Networking (Cluster)](#infrastructure-networking-cluster)
|
||||
- [Tenant Management](#tenant-management)
|
||||
- [Utility](#utility)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
## Application Deployment
|
||||
|
||||
Scores for deploying and managing end-user applications.
|
||||
|
||||
- **ApplicationScore**: The primary score for deploying a web application. Describes the application, its framework, and the features it requires (e.g., monitoring, CI/CD).
|
||||
- **HelmChartScore**: Deploys a generic Helm chart to a Kubernetes cluster.
|
||||
- **ArgoHelmScore**: Deploys an application using an ArgoCD Helm chart.
|
||||
- **LAMPScore**: A specialized score for deploying a classic LAMP (Linux, Apache, MySQL, PHP) stack.
|
||||
|
||||
## OKD / Kubernetes Cluster Setup
|
||||
|
||||
This collection of Scores is used to provision an entire OKD cluster from bare metal. They are typically used in order.
|
||||
|
||||
- **OKDSetup01InventoryScore**: Discovers and catalogs the physical hardware.
|
||||
- **OKDSetup02BootstrapScore**: Configures the bootstrap node, renders iPXE files, and kicks off the SCOS installation.
|
||||
- **OKDSetup03ControlPlaneScore**: Renders iPXE configurations for the control plane nodes.
|
||||
- **OKDSetupPersistNetworkBondScore**: Configures network bonds on the nodes and port channels on the switches.
|
||||
- **OKDSetup04WorkersScore**: Renders iPXE configurations for the worker nodes.
|
||||
- **OKDSetup06InstallationReportScore**: Runs post-installation checks and generates a report.
|
||||
- **OKDUpgradeScore**: Manages the upgrade process for an existing OKD cluster.
|
||||
|
||||
## Cluster Services & Management
|
||||
|
||||
Scores for installing and managing services _inside_ a Kubernetes cluster.
|
||||
|
||||
- **K3DInstallationScore**: Installs and configes a local K3D (k3s-in-docker) cluster. Used by `K8sAnywhereTopology`.
|
||||
- **CertManagerHelmScore**: Deploys the `cert-manager` Helm chart.
|
||||
- **ClusterIssuerScore**: Configures a `ClusterIssuer` for `cert-manager`, (e.g., for Let's Encrypt).
|
||||
- **K8sNamespaceScore**: Ensures a Kubernetes namespace exists.
|
||||
- **K8sDeploymentScore**: Deploys a generic `Deployment` resource to Kubernetes.
|
||||
- **K8sIngressScore**: Configures an `Ingress` resource for a service.
|
||||
|
||||
## Monitoring & Alerting
|
||||
|
||||
Scores for configuring observability, dashboards, and alerts.
|
||||
|
||||
- **ApplicationMonitoringScore**: A generic score to set up monitoring for an application.
|
||||
- **ApplicationRHOBMonitoringScore**: A specialized score for setting up monitoring via the Red Hat Observability stack.
|
||||
- **HelmPrometheusAlertingScore**: Configures Prometheus alerts via a Helm chart.
|
||||
- **K8sPrometheusCRDAlertingScore**: Configures Prometheus alerts using the `PrometheusRule` CRD.
|
||||
- **PrometheusAlertScore**: A generic score for creating a Prometheus alert.
|
||||
- **RHOBAlertingScore**: Configures alerts specifically for the Red Hat Observability stack.
|
||||
- **NtfyScore**: Configures alerts to be sent to a `ntfy.sh` server.
|
||||
|
||||
## Infrastructure & Networking (Bare Metal)
|
||||
|
||||
Low-level scores for managing physical hardware and network services.
|
||||
|
||||
- **DhcpScore**: Configures a DHCP server.
|
||||
- **OKDDhcpScore**: A specialized DHCP configuration for the OKD bootstrap process.
|
||||
- **OKDBootstrapDhcpScore**: Configures DHCP specifically for the bootstrap node.
|
||||
- **DhcpHostBindingScore**: Creates a specific MAC-to-IP binding in the DHCP server.
|
||||
- **DnsScore**: Configures a DNS server.
|
||||
- **OKDDnsScore**: A specialized DNS configuration for the OKD cluster (e.g., `api.*`, `*.apps.*`).
|
||||
- **StaticFilesHttpScore**: Serves a directory of static files (e.g., a documentation site) over HTTP.
|
||||
- **TftpScore**: Configures a TFTP server, typically for serving iPXE boot files.
|
||||
- **IPxeMacBootFileScore**: Assigns a specific iPXE boot file to a MAC address in the TFTP server.
|
||||
- **OKDIpxeScore**: A specialized score for generating the iPXE boot scripts for OKD.
|
||||
- **OPNsenseShellCommandScore**: Executes a shell command on an OPNsense firewall.
|
||||
|
||||
## Infrastructure & Networking (Cluster)
|
||||
|
||||
Network services that run inside the cluster or as part of the topology.
|
||||
|
||||
- **LoadBalancerScore**: Configures a general-purpose load balancer.
|
||||
- **OKDLoadBalancerScore**: Configures the high-availability load balancers for the OKD API and ingress.
|
||||
- **OKDBootstrapLoadBalancerScore**: Configures the load balancer specifically for the bootstrap-time API endpoint.
|
||||
- **K8sIngressScore**: Configures an Ingress controller or resource.
|
||||
- [HighAvailabilityHostNetworkScore](../../harmony/src/modules/okd/host_network.rs): Configures network bonds on a host and the corresponding port-channels on the switch stack for high-availability.
|
||||
|
||||
## Tenant Management
|
||||
|
||||
Scores for managing multi-tenancy within a cluster.
|
||||
|
||||
- **TenantScore**: Creates a new tenant (e.g., a namespace, quotas, network policies).
|
||||
- **TenantCredentialScore**: Generates and provisions credentials for a new tenant.
|
||||
|
||||
## Utility
|
||||
|
||||
Helper scores for discovery and inspection.
|
||||
|
||||
- **LaunchDiscoverInventoryAgentScore**: Launches the agent responsible for the `OKDSetup01InventoryScore`.
|
||||
- **DiscoverHostForRoleScore**: A utility score to find a host matching a specific role in the inventory.
|
||||
- **InspectInventoryScore**: Dumps the discovered inventory for inspection.
|
||||
59
docs/catalogs/topologies.md
Normal file
59
docs/catalogs/topologies.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Topologies Catalog
|
||||
|
||||
A `Topology` is the logical representation of your infrastructure and its `Capabilities`. You select a `Topology` in your Harmony project to define _where_ your `Scores` will be applied.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Topologies Catalog](#topologies-catalog)
|
||||
- [HAClusterTopology](#haclustertopology)
|
||||
- [K8sAnywhereTopology](#k8sanywheretopology)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
### HAClusterTopology
|
||||
|
||||
- **`HAClusterTopology::autoload()`**
|
||||
|
||||
This `Topology` represents a high-availability, bare-metal cluster. It is designed for production-grade deployments like OKD.
|
||||
|
||||
It models an environment consisting of:
|
||||
|
||||
- At least 3 cluster nodes (for control plane/workers)
|
||||
- 2 redundant firewalls (e.g., OPNsense)
|
||||
- 2 redundant network switches
|
||||
|
||||
**Provided Capabilities:**
|
||||
This topology provides a rich set of capabilities required for bare-metal provisioning and cluster management, including:
|
||||
|
||||
- `K8sClient` (once the cluster is bootstrapped)
|
||||
- `DnsServer`
|
||||
- `LoadBalancer`
|
||||
- `DhcpServer`
|
||||
- `TftpServer`
|
||||
- `Router` (via the firewalls)
|
||||
- `Switch`
|
||||
- `NetworkManager` (for host-level network config)
|
||||
|
||||
---
|
||||
|
||||
### K8sAnywhereTopology
|
||||
|
||||
- **`K8sAnywhereTopology::from_env()`**
|
||||
|
||||
This `Topology` is designed for development and application deployment. It provides a simple, abstract way to deploy to _any_ Kubernetes cluster.
|
||||
|
||||
**How it works:**
|
||||
|
||||
1. By default (`from_env()` with no env vars), it automatically provisions a **local K3D (k3s-in-docker) cluster** on your machine. This is perfect for local development and testing.
|
||||
2. If you provide a `KUBECONFIG` environment variable, it will instead connect to that **existing Kubernetes cluster** (e.g., your staging or production OKD cluster).
|
||||
|
||||
This allows you to use the _exact same code_ to deploy your application locally as you do to deploy it to production.
|
||||
|
||||
**Provided Capabilities:**
|
||||
|
||||
- `K8sClient`
|
||||
- `HelmCommand`
|
||||
- `TenantManager`
|
||||
- `Ingress`
|
||||
- `Monitoring`
|
||||
- ...and more.
|
||||
40
docs/concepts.md
Normal file
40
docs/concepts.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Core Concepts
|
||||
|
||||
Harmony's design is based on a few key concepts. Understanding them is the key to unlocking the framework's power.
|
||||
|
||||
### 1. Score
|
||||
|
||||
- **What it is:** A **Score** is a declarative description of a desired state. It's a "resource" that defines _what_ you want to achieve, not _how_ to do it.
|
||||
- **Example:** `ApplicationScore` declares "I want this web application to be running and monitored."
|
||||
|
||||
### 2. Topology
|
||||
|
||||
- **What it is:** A **Topology** is the logical representation of your infrastructure and its abilities. It's the "where" your Scores will be applied.
|
||||
- **Key Job:** A Topology's most important job is to expose which `Capabilities` it supports.
|
||||
- **Example:** `HAClusterTopology` represents a bare-metal cluster and exposes `Capabilities` like `NetworkManager` and `Switch`. `K8sAnywhereTopology` represents a Kubernetes cluster and exposes the `K8sClient` `Capability`.
|
||||
|
||||
### 3. Capability
|
||||
|
||||
- **What it is:** A **Capability** is a specific feature or API that a `Topology` offers. It's the "how" a `Topology` can fulfill a `Score`'s request.
|
||||
- **Example:** The `K8sClient` capability offers a way to interact with a Kubernetes API. The `Switch` capability offers a way to configure a physical network switch.
|
||||
|
||||
### 4. Interpret
|
||||
|
||||
- **What it is:** An **Interpret** is the execution logic that makes a `Score` a reality. It's the "glue" that connects the _desired state_ (`Score`) to the _environment's abilities_ (`Topology`'s `Capabilities`).
|
||||
- **How it works:** When you apply a `Score`, Harmony finds the matching `Interpret` for your `Topology`. This `Interpret` then uses the `Capabilities` provided by the `Topology` to execute the necessary steps.
|
||||
|
||||
### 5. Inventory
|
||||
|
||||
- **What it is:** An **Inventory** is the physical material (the "what") used in a cluster. This is most relevant for bare-metal or on-premise topologies.
|
||||
- **Example:** A list of nodes with their roles (control plane, worker), CPU, RAM, and network interfaces. For the `K8sAnywhereTopology`, the inventory might be empty or autoloaded, as the infrastructure is more abstract.
|
||||
|
||||
---
|
||||
|
||||
### How They Work Together (The Compile-Time Check)
|
||||
|
||||
1. You **write a `Score`** (e.g., `ApplicationScore`).
|
||||
2. Your `Score`'s `Interpret` logic requires certain **`Capabilities`** (e.g., `K8sClient` and `Ingress`).
|
||||
3. You choose a **`Topology`** to run it on (e.g., `HAClusterTopology`).
|
||||
4. **At compile-time**, Harmony checks: "Does `HAClusterTopology` provide the `K8sClient` and `Ingress` capabilities that `ApplicationScore` needs?"
|
||||
- **If Yes:** Your code compiles. You can be confident it will run.
|
||||
- **If No:** The compiler gives you an error. You've just prevented a "config-is-valid-but-platform-is-wrong" runtime error before you even deployed.
|
||||
42
docs/guides/getting-started.md
Normal file
42
docs/guides/getting-started.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Getting Started Guide
|
||||
|
||||
Welcome to Harmony! This guide will walk you through installing the Harmony framework, setting up a new project, and deploying your first application.
|
||||
|
||||
We will build and deploy the "Rust Web App" example, which automatically:
|
||||
|
||||
1. Provisions a local K3D (Kubernetes in Docker) cluster.
|
||||
2. Deploys a sample Rust web application.
|
||||
3. Sets up monitoring for the application.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before you begin, you'll need a few tools installed on your system:
|
||||
|
||||
- **Rust & Cargo:** [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
- **Docker:** [Install Docker](https://docs.docker.com/get-docker/) (Required for the K3D local cluster)
|
||||
- **kubectl:** [Install kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) (For inspecting the cluster)
|
||||
|
||||
## 1. Install Harmony
|
||||
|
||||
First, clone the Harmony repository and build the project. This gives you the `harmony` CLI and all the core libraries.
|
||||
|
||||
```bash
|
||||
# Clone the main repository
|
||||
git clone https://git.nationtech.io/nationtech/harmony
|
||||
cd harmony
|
||||
|
||||
# Build the project (this may take a few minutes)
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
...
|
||||
|
||||
## Next Steps
|
||||
|
||||
Congratulations, you've just deployed an application using true infrastructure-as-code!
|
||||
|
||||
From here, you can:
|
||||
|
||||
- [Explore the Catalogs](../catalogs/README.md): See what other [Scores](../catalogs/scores.md) and [Topologies](../catalogs/topologies.md) are available.
|
||||
- [Read the Use Cases](../use-cases/README.md): Check out the [OKD on Bare Metal](./use-cases/okd-on-bare-metal.md) guide for a more advanced scenario.
|
||||
- [Write your own Score](../guides/writing-a-score.md): Dive into the [Developer Guide](./guides/developer-guide.md) to start building your own components.
|
||||
@@ -1,22 +1,28 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use brocade::{BrocadeOptions, PortOperatingMode};
|
||||
use harmony::{
|
||||
data::Version,
|
||||
infra::brocade::BrocadeSwitchClient,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
infra::brocade::BrocadeSwitchConfig,
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HostNetworkConfig, PortConfig, PreparationError, PreparationOutcome, Switch, SwitchClient,
|
||||
SwitchError, Topology,
|
||||
},
|
||||
modules::brocade::{BrocadeSwitchAuth, BrocadeSwitchScore, SwitchTopology},
|
||||
};
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{id::Id, net::MacAddress, switch::PortLocation};
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
use harmony_types::{id::Id, switch::PortLocation};
|
||||
|
||||
fn get_switch_config() -> BrocadeSwitchConfig {
|
||||
let mut options = BrocadeOptions::default();
|
||||
options.ssh.port = 2222;
|
||||
let auth = BrocadeSwitchAuth {
|
||||
username: "admin".to_string(),
|
||||
password: "password".to_string(),
|
||||
};
|
||||
|
||||
BrocadeSwitchConfig {
|
||||
ips: vec![ip!("127.0.0.1")],
|
||||
auth,
|
||||
options,
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
@@ -32,126 +38,13 @@ async fn main() {
|
||||
(PortLocation(1, 0, 18), PortOperatingMode::Trunk),
|
||||
],
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
SwitchTopology::new().await,
|
||||
SwitchTopology::new(get_switch_config()).await,
|
||||
vec![Box::new(switch_score)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
struct BrocadeSwitchScore {
|
||||
port_channels_to_clear: Vec<Id>,
|
||||
ports_to_configure: Vec<PortConfig>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Switch> Score<T> for BrocadeSwitchScore {
|
||||
fn name(&self) -> String {
|
||||
"BrocadeSwitchScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(BrocadeSwitchInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct BrocadeSwitchInterpret {
|
||||
score: BrocadeSwitchScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + Switch> Interpret<T> for BrocadeSwitchInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Applying switch configuration {:?}", self.score);
|
||||
debug!(
|
||||
"Clearing port channel {:?}",
|
||||
self.score.port_channels_to_clear
|
||||
);
|
||||
topology
|
||||
.clear_port_channel(&self.score.port_channels_to_clear)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
debug!("Configuring interfaces {:?}", self.score.ports_to_configure);
|
||||
topology
|
||||
.configure_interface(&self.score.ports_to_configure)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success("switch configured".to_string()))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("BrocadeSwitchInterpret")
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
struct SwitchTopology {
|
||||
client: Box<dyn SwitchClient>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Topology for SwitchTopology {
|
||||
fn name(&self) -> &str {
|
||||
"SwitchTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
impl SwitchTopology {
|
||||
async fn new() -> Self {
|
||||
let mut options = BrocadeOptions::default();
|
||||
options.ssh.port = 2222;
|
||||
let client =
|
||||
BrocadeSwitchClient::init(&vec![ip!("127.0.0.1")], &"admin", &"password", options)
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
let client = Box::new(client);
|
||||
Self { client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Switch for SwitchTopology {
|
||||
async fn setup_switch(&self) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_port_for_mac_address(
|
||||
&self,
|
||||
_mac_address: &MacAddress,
|
||||
) -> Result<Option<PortLocation>, SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn configure_port_channel(&self, _config: &HostNetworkConfig) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
self.client.clear_port_channel(ids).await
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
self.client.configure_interface(ports).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ async fn main() {
|
||||
role: HostRole::Worker,
|
||||
number_desired_hosts: 3,
|
||||
discovery_strategy: HarmonyDiscoveryStrategy::SUBNET {
|
||||
cidr: cidrv4!("192.168.0.1/25"),
|
||||
cidr: cidrv4!("192.168.2.0/24"),
|
||||
port: 25000,
|
||||
},
|
||||
};
|
||||
@@ -20,7 +20,7 @@ async fn main() {
|
||||
role: HostRole::ControlPlane,
|
||||
number_desired_hosts: 3,
|
||||
discovery_strategy: HarmonyDiscoveryStrategy::SUBNET {
|
||||
cidr: cidrv4!("192.168.0.1/25"),
|
||||
cidr: cidrv4!("192.168.2.0/24"),
|
||||
port: 25000,
|
||||
},
|
||||
};
|
||||
@@ -28,7 +28,8 @@ async fn main() {
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
LocalhostTopology::new(),
|
||||
vec![Box::new(discover_worker), Box::new(discover_control_plane)],
|
||||
vec![Box::new(discover_worker)],
|
||||
//vec![Box::new(discover_worker), Box::new(discover_control_plane)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
|
||||
20
examples/k8s_drain_node/Cargo.toml
Normal file
20
examples/k8s_drain_node/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "example-k8s-drain-node"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
cidr.workspace = true
|
||||
tokio.workspace = true
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log.workspace = true
|
||||
env_logger.workspace = true
|
||||
url.workspace = true
|
||||
assert_cmd = "2.0.16"
|
||||
inquire.workspace = true
|
||||
61
examples/k8s_drain_node/src/main.rs
Normal file
61
examples/k8s_drain_node/src/main.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use harmony::topology::k8s::{DrainOptions, K8sClient};
|
||||
use log::{info, trace};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
env_logger::init();
|
||||
let k8s = K8sClient::try_default().await.unwrap();
|
||||
let nodes = k8s.get_nodes(None).await.unwrap();
|
||||
trace!("Got nodes : {nodes:#?}");
|
||||
let node_names = nodes
|
||||
.iter()
|
||||
.map(|n| n.metadata.name.as_ref().unwrap())
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
info!("Got nodes : {:?}", node_names);
|
||||
|
||||
let node_name = inquire::Select::new("What node do you want to operate on?", node_names)
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
let drain = inquire::Confirm::new("Do you wish to drain the node now ?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if drain {
|
||||
let mut options = DrainOptions::default_ignore_daemonset_delete_emptydir_data();
|
||||
options.timeout = Duration::from_secs(1);
|
||||
k8s.drain_node(&node_name, &options).await.unwrap();
|
||||
|
||||
info!("Node {node_name} successfully drained");
|
||||
}
|
||||
|
||||
let uncordon =
|
||||
inquire::Confirm::new("Do you wish to uncordon node to resume scheduling workloads now?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if uncordon {
|
||||
info!("Uncordoning node {node_name}");
|
||||
k8s.uncordon_node(node_name).await.unwrap();
|
||||
info!("Node {node_name} uncordoned");
|
||||
}
|
||||
|
||||
let reboot = inquire::Confirm::new("Do you wish to reboot node now?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if reboot {
|
||||
k8s.reboot_node(
|
||||
&node_name,
|
||||
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
|
||||
Duration::from_secs(3600),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
info!("All done playing with nodes, happy harmonizing!");
|
||||
}
|
||||
20
examples/k8s_write_file_on_node/Cargo.toml
Normal file
20
examples/k8s_write_file_on_node/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "example-k8s-write-file-on-node"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
cidr.workspace = true
|
||||
tokio.workspace = true
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log.workspace = true
|
||||
env_logger.workspace = true
|
||||
url.workspace = true
|
||||
assert_cmd = "2.0.16"
|
||||
inquire.workspace = true
|
||||
45
examples/k8s_write_file_on_node/src/main.rs
Normal file
45
examples/k8s_write_file_on_node/src/main.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use harmony::topology::k8s::{DrainOptions, K8sClient, NodeFile};
|
||||
use log::{info, trace};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
env_logger::init();
|
||||
let k8s = K8sClient::try_default().await.unwrap();
|
||||
let nodes = k8s.get_nodes(None).await.unwrap();
|
||||
trace!("Got nodes : {nodes:#?}");
|
||||
let node_names = nodes
|
||||
.iter()
|
||||
.map(|n| n.metadata.name.as_ref().unwrap())
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
info!("Got nodes : {:?}", node_names);
|
||||
|
||||
let node = inquire::Select::new("What node do you want to write file to?", node_names)
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
let path = inquire::Text::new("File path on node").prompt().unwrap();
|
||||
let content = inquire::Text::new("File content").prompt().unwrap();
|
||||
|
||||
let node_file = NodeFile {
|
||||
path: path,
|
||||
content: content,
|
||||
mode: 0o600,
|
||||
};
|
||||
|
||||
k8s.write_files_to_node(&node, &vec![node_file.clone()])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let cmd = inquire::Text::new("Command to run on node")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
k8s.run_privileged_command_on_node(&node, &cmd)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
info!(
|
||||
"File {} mode {} written in node {node}",
|
||||
node_file.path, node_file.mode
|
||||
);
|
||||
}
|
||||
@@ -215,7 +215,7 @@ fn site(
|
||||
dns_name: format!("{cluster_name}-gw.{domain}"),
|
||||
supercluster_ca_secret_name: "nats-supercluster-ca-bundle",
|
||||
tls_cert_name: "nats-gateway",
|
||||
jetstream_enabled: "false",
|
||||
jetstream_enabled: "true",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,8 +2,12 @@ use brocade::BrocadeOptions;
|
||||
use cidr::Ipv4Cidr;
|
||||
use harmony::{
|
||||
hardware::{Location, SwitchGroup},
|
||||
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
|
||||
infra::{
|
||||
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
opnsense::OPNSenseManagementInterface,
|
||||
},
|
||||
inventory::Inventory,
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
|
||||
};
|
||||
use harmony_macros::{ip, ipv4};
|
||||
@@ -36,12 +40,11 @@ pub async fn get_topology() -> HAClusterTopology {
|
||||
dry_run: *harmony::config::DRY_RUN,
|
||||
..Default::default()
|
||||
};
|
||||
let switch_client = BrocadeSwitchClient::init(
|
||||
&switches,
|
||||
&switch_auth.username,
|
||||
&switch_auth.password,
|
||||
brocade_options,
|
||||
)
|
||||
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
|
||||
ips: switches,
|
||||
auth: switch_auth,
|
||||
options: brocade_options,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
@@ -103,9 +106,3 @@ pub fn get_inventory() -> Inventory {
|
||||
control_plane_host: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
@@ -3,14 +3,16 @@ use cidr::Ipv4Cidr;
|
||||
use harmony::{
|
||||
config::secret::OPNSenseFirewallCredentials,
|
||||
hardware::{Location, SwitchGroup},
|
||||
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
|
||||
infra::{
|
||||
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
opnsense::OPNSenseManagementInterface,
|
||||
},
|
||||
inventory::Inventory,
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
|
||||
};
|
||||
use harmony_macros::{ip, ipv4};
|
||||
use harmony_secret::{Secret, SecretManager};
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use harmony_secret::SecretManager;
|
||||
use std::{
|
||||
net::IpAddr,
|
||||
sync::{Arc, OnceLock},
|
||||
@@ -31,12 +33,11 @@ pub async fn get_topology() -> HAClusterTopology {
|
||||
dry_run: *harmony::config::DRY_RUN,
|
||||
..Default::default()
|
||||
};
|
||||
let switch_client = BrocadeSwitchClient::init(
|
||||
&switches,
|
||||
&switch_auth.username,
|
||||
&switch_auth.password,
|
||||
brocade_options,
|
||||
)
|
||||
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
|
||||
ips: switches,
|
||||
auth: switch_auth,
|
||||
options: brocade_options,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
@@ -98,9 +99,3 @@ pub fn get_inventory() -> Inventory {
|
||||
control_plane_host: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
@@ -5,6 +5,10 @@ version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[[example]]
|
||||
name = "try_rust_webapp"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
|
||||
@@ -108,11 +108,18 @@ impl PhysicalHost {
|
||||
};
|
||||
|
||||
let storage_summary = if drive_count > 1 {
|
||||
let drive_sizes = self
|
||||
.storage
|
||||
.iter()
|
||||
.map(|d| format_storage(d.size_bytes))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
format!(
|
||||
"{} Storage ({}x {})",
|
||||
"{} Storage ({} Disks [{}])",
|
||||
format_storage(total_storage_bytes),
|
||||
drive_count,
|
||||
first_drive_model
|
||||
drive_sizes
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use async_trait::async_trait;
|
||||
use brocade::PortOperatingMode;
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{
|
||||
id::Id,
|
||||
@@ -9,9 +8,9 @@ use harmony_types::{
|
||||
use log::debug;
|
||||
use log::info;
|
||||
|
||||
use crate::topology::PxeOptions;
|
||||
use crate::{data::FileContent, executors::ExecutorError, topology::node_exporter::NodeExporter};
|
||||
use crate::{infra::network_manager::OpenShiftNmStateNetworkManager, topology::PortConfig};
|
||||
use crate::{modules::inventory::HarmonyDiscoveryStrategy, topology::PxeOptions};
|
||||
|
||||
use super::{
|
||||
DHCPStaticEntry, DhcpServer, DnsRecord, DnsRecordType, DnsServer, Firewall, HostNetworkConfig,
|
||||
@@ -301,10 +300,10 @@ impl Switch for HAClusterTopology {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
async fn clear_port_channel(&self, _ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
async fn configure_interface(&self, _ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -322,7 +321,15 @@ impl NetworkManager for HAClusterTopology {
|
||||
self.network_manager().await.configure_bond(config).await
|
||||
}
|
||||
|
||||
//TODO add snmp here
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
self.network_manager()
|
||||
.await
|
||||
.configure_bond_on_primary_interface(config)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -562,10 +569,10 @@ impl SwitchClient for DummyInfra {
|
||||
) -> Result<u8, SwitchError> {
|
||||
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
async fn clear_port_channel(&self, _ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
async fn configure_interface(&self, _ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
133
harmony/src/domain/topology/k8s/bundle.rs
Normal file
133
harmony/src/domain/topology/k8s/bundle.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
//! Resource Bundle Pattern Implementation
|
||||
//!
|
||||
//! This module implements the Resource Bundle pattern for managing groups of
|
||||
//! Kubernetes resources that form a logical unit of work.
|
||||
//!
|
||||
//! ## Purpose
|
||||
//!
|
||||
//! The ResourceBundle pattern addresses the need to manage ephemeral privileged
|
||||
//! pods along with their platform-specific security requirements (e.g., OpenShift
|
||||
//! Security Context Constraints).
|
||||
//!
|
||||
//! ## Use Cases
|
||||
//!
|
||||
//! - Writing files to node filesystems (e.g., NetworkManager configurations for
|
||||
//! network bonding as described in ADR-019)
|
||||
//! - Running privileged commands on nodes (e.g., reboots, system configuration)
|
||||
//!
|
||||
//! ## Benefits
|
||||
//!
|
||||
//! - **Separation of Concerns**: Client code doesn't need to know about
|
||||
//! platform-specific RBAC requirements
|
||||
//! - **Atomic Operations**: Resources are applied and deleted as a unit
|
||||
//! - **Clean Abstractions**: Privileged operations are encapsulated in bundles
|
||||
//! rather than scattered throughout client methods
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```rust,no_run
|
||||
//! use harmony::topology::k8s::{K8sClient, helper};
|
||||
//! use harmony::topology::KubernetesDistribution;
|
||||
//!
|
||||
//! async fn write_network_config(client: &K8sClient, node: &str) {
|
||||
//! // Create a bundle with platform-specific RBAC
|
||||
//! let bundle = helper::build_privileged_bundle(
|
||||
//! helper::PrivilegedPodConfig {
|
||||
//! name: "network-config".to_string(),
|
||||
//! namespace: "default".to_string(),
|
||||
//! node_name: node.to_string(),
|
||||
//! // ... other config
|
||||
//! ..Default::default()
|
||||
//! },
|
||||
//! &KubernetesDistribution::OpenshiftFamily,
|
||||
//! );
|
||||
//!
|
||||
//! // Apply all resources (RBAC + Pod) atomically
|
||||
//! bundle.apply(client).await.unwrap();
|
||||
//!
|
||||
//! // ... wait for completion ...
|
||||
//!
|
||||
//! // Cleanup all resources
|
||||
//! bundle.delete(client).await.unwrap();
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use kube::{Error, Resource, ResourceExt, api::DynamicObject};
|
||||
use serde::Serialize;
|
||||
use serde_json;
|
||||
|
||||
use crate::domain::topology::k8s::K8sClient;
|
||||
|
||||
/// A ResourceBundle represents a logical unit of work consisting of multiple
|
||||
/// Kubernetes resources that should be applied or deleted together.
|
||||
///
|
||||
/// This pattern is useful for managing ephemeral privileged pods along with
|
||||
/// their required RBAC bindings (e.g., OpenShift SCC bindings).
|
||||
#[derive(Debug)]
|
||||
pub struct ResourceBundle {
|
||||
pub resources: Vec<DynamicObject>,
|
||||
}
|
||||
|
||||
impl ResourceBundle {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
resources: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a Kubernetes resource to this bundle.
|
||||
/// The resource is converted to a DynamicObject for generic handling.
|
||||
pub fn add<K>(&mut self, resource: K)
|
||||
where
|
||||
K: Resource + Serialize,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
// Convert the typed resource to JSON, then to DynamicObject
|
||||
let json = serde_json::to_value(&resource).expect("Failed to serialize resource");
|
||||
let mut obj: DynamicObject =
|
||||
serde_json::from_value(json).expect("Failed to convert to DynamicObject");
|
||||
|
||||
// Ensure type metadata is set
|
||||
if obj.types.is_none() {
|
||||
let api_version = Default::default();
|
||||
let kind = Default::default();
|
||||
let gvk = K::api_version(&api_version);
|
||||
let kind = K::kind(&kind);
|
||||
obj.types = Some(kube::api::TypeMeta {
|
||||
api_version: gvk.to_string(),
|
||||
kind: kind.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
self.resources.push(obj);
|
||||
}
|
||||
|
||||
/// Apply all resources in this bundle to the cluster.
|
||||
/// Resources are applied in the order they were added.
|
||||
pub async fn apply(&self, client: &K8sClient) -> Result<(), Error> {
|
||||
for res in &self.resources {
|
||||
let namespace = res.namespace();
|
||||
client
|
||||
.apply_dynamic(res, namespace.as_deref(), true)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete all resources in this bundle from the cluster.
|
||||
/// Resources are deleted in reverse order to respect dependencies.
|
||||
pub async fn delete(&self, client: &K8sClient) -> Result<(), Error> {
|
||||
// FIXME delete all in parallel and retry using kube::client::retry::RetryPolicy
|
||||
for res in self.resources.iter().rev() {
|
||||
let api = client.get_api_for_dynamic_object(res, res.namespace().as_deref())?;
|
||||
let name = res.name_any();
|
||||
// FIXME this swallows all errors. Swallowing a 404 is ok but other errors must be
|
||||
// handled properly (such as retrying). A normal error case is when we delete a
|
||||
// resource bundle with dependencies between various resources. Such as a pod with a
|
||||
// dependency on a ClusterRoleBinding. Trying to delete the ClusterRoleBinding first
|
||||
// is expected to fail
|
||||
let _ = api.delete(&name, &kube::api::DeleteParams::default()).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
1
harmony/src/domain/topology/k8s/config.rs
Normal file
1
harmony/src/domain/topology/k8s/config.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub const PRIVILEGED_POD_IMAGE: &str = "hub.nationtech.io/redhat/ubi10:latest";
|
||||
601
harmony/src/domain/topology/k8s/helper.rs
Normal file
601
harmony/src/domain/topology/k8s/helper.rs
Normal file
@@ -0,0 +1,601 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::topology::KubernetesDistribution;
|
||||
|
||||
use super::bundle::ResourceBundle;
|
||||
use super::config::PRIVILEGED_POD_IMAGE;
|
||||
use k8s_openapi::api::core::v1::{
|
||||
Container, HostPathVolumeSource, Pod, PodSpec, SecurityContext, Volume, VolumeMount,
|
||||
};
|
||||
use k8s_openapi::api::rbac::v1::{ClusterRoleBinding, RoleRef, Subject};
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
|
||||
use kube::error::DiscoveryError;
|
||||
use log::{debug, error, info, warn};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PrivilegedPodConfig {
|
||||
pub name: String,
|
||||
pub namespace: String,
|
||||
pub node_name: String,
|
||||
pub container_name: String,
|
||||
pub command: Vec<String>,
|
||||
pub volumes: Vec<Volume>,
|
||||
pub volume_mounts: Vec<VolumeMount>,
|
||||
pub host_pid: bool,
|
||||
pub host_network: bool,
|
||||
}
|
||||
|
||||
impl Default for PrivilegedPodConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: "privileged-pod".to_string(),
|
||||
namespace: "harmony".to_string(),
|
||||
node_name: "".to_string(),
|
||||
container_name: "privileged-container".to_string(),
|
||||
command: vec![],
|
||||
volumes: vec![],
|
||||
volume_mounts: vec![],
|
||||
host_pid: false,
|
||||
host_network: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_privileged_pod(
|
||||
config: PrivilegedPodConfig,
|
||||
k8s_distribution: &KubernetesDistribution,
|
||||
) -> Pod {
|
||||
let annotations = match k8s_distribution {
|
||||
KubernetesDistribution::OpenshiftFamily => Some(BTreeMap::from([
|
||||
("openshift.io/scc".to_string(), "privileged".to_string()),
|
||||
(
|
||||
"openshift.io/required-scc".to_string(),
|
||||
"privileged".to_string(),
|
||||
),
|
||||
])),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Pod {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(config.name),
|
||||
namespace: Some(config.namespace),
|
||||
annotations,
|
||||
..Default::default()
|
||||
},
|
||||
spec: Some(PodSpec {
|
||||
node_name: Some(config.node_name),
|
||||
restart_policy: Some("Never".to_string()),
|
||||
host_pid: Some(config.host_pid),
|
||||
host_network: Some(config.host_network),
|
||||
containers: vec![Container {
|
||||
name: config.container_name,
|
||||
image: Some(PRIVILEGED_POD_IMAGE.to_string()),
|
||||
command: Some(config.command),
|
||||
security_context: Some(SecurityContext {
|
||||
privileged: Some(true),
|
||||
..Default::default()
|
||||
}),
|
||||
volume_mounts: Some(config.volume_mounts),
|
||||
..Default::default()
|
||||
}],
|
||||
volumes: Some(config.volumes),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn host_root_volume() -> (Volume, VolumeMount) {
|
||||
(
|
||||
Volume {
|
||||
name: "host".to_string(),
|
||||
host_path: Some(HostPathVolumeSource {
|
||||
path: "/".to_string(),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
VolumeMount {
|
||||
name: "host".to_string(),
|
||||
mount_path: "/host".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Build a ResourceBundle containing a privileged pod and any required RBAC.
|
||||
///
|
||||
/// This function implements the Resource Bundle pattern to encapsulate platform-specific
|
||||
/// security requirements for running privileged operations on nodes.
|
||||
///
|
||||
/// # Platform-Specific Behavior
|
||||
///
|
||||
/// - **OpenShift**: Creates a ClusterRoleBinding to grant the default ServiceAccount
|
||||
/// access to the `system:openshift:scc:privileged` ClusterRole, which allows the pod
|
||||
/// to use the privileged Security Context Constraint (SCC).
|
||||
/// - **Standard Kubernetes/K3s**: Only creates the Pod resource, as these distributions
|
||||
/// use standard PodSecurityPolicy or don't enforce additional security constraints.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `config` - Configuration for the privileged pod (name, namespace, command, etc.)
|
||||
/// * `k8s_distribution` - The detected Kubernetes distribution to determine RBAC requirements
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A `ResourceBundle` containing 1-2 resources:
|
||||
/// - ClusterRoleBinding (OpenShift only)
|
||||
/// - Pod (all distributions)
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust,no_run
|
||||
/// # use harmony::topology::k8s::helper::{build_privileged_bundle, PrivilegedPodConfig};
|
||||
/// # use harmony::topology::KubernetesDistribution;
|
||||
/// let bundle = build_privileged_bundle(
|
||||
/// PrivilegedPodConfig {
|
||||
/// name: "network-setup".to_string(),
|
||||
/// namespace: "default".to_string(),
|
||||
/// node_name: "worker-01".to_string(),
|
||||
/// container_name: "setup".to_string(),
|
||||
/// command: vec!["nmcli".to_string(), "connection".to_string(), "reload".to_string()],
|
||||
/// ..Default::default()
|
||||
/// },
|
||||
/// &KubernetesDistribution::OpenshiftFamily,
|
||||
/// );
|
||||
/// // Bundle now contains ClusterRoleBinding + Pod
|
||||
/// ```
|
||||
pub fn build_privileged_bundle(
|
||||
config: PrivilegedPodConfig,
|
||||
k8s_distribution: &KubernetesDistribution,
|
||||
) -> ResourceBundle {
|
||||
debug!(
|
||||
"Building privileged bundle for config {config:#?} on distribution {k8s_distribution:?}"
|
||||
);
|
||||
let mut bundle = ResourceBundle::new();
|
||||
let pod_name = config.name.clone();
|
||||
let namespace = config.namespace.clone();
|
||||
|
||||
// 1. On OpenShift, create RBAC binding to privileged SCC
|
||||
if let KubernetesDistribution::OpenshiftFamily = k8s_distribution {
|
||||
// The default ServiceAccount needs to be bound to the privileged SCC
|
||||
// via the system:openshift:scc:privileged ClusterRole
|
||||
let crb = ClusterRoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("{}-scc-binding", pod_name)),
|
||||
..Default::default()
|
||||
},
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".to_string(),
|
||||
kind: "ClusterRole".to_string(),
|
||||
name: "system:openshift:scc:privileged".to_string(),
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".to_string(),
|
||||
name: "default".to_string(),
|
||||
namespace: Some(namespace.clone()),
|
||||
api_group: None,
|
||||
..Default::default()
|
||||
}]),
|
||||
};
|
||||
bundle.add(crb);
|
||||
}
|
||||
|
||||
// 2. Build the privileged pod
|
||||
let pod = build_privileged_pod(config, k8s_distribution);
|
||||
bundle.add(pod);
|
||||
|
||||
bundle
|
||||
}
|
||||
|
||||
/// Action to take when a drain operation times out.
|
||||
pub enum DrainTimeoutAction {
|
||||
/// Accept the partial drain and continue
|
||||
Accept,
|
||||
/// Retry the drain for another timeout period
|
||||
Retry,
|
||||
/// Abort the drain operation
|
||||
Abort,
|
||||
}
|
||||
|
||||
/// Prompts the user to confirm acceptance of a partial drain.
|
||||
///
|
||||
/// Returns `Ok(true)` if the user confirms acceptance, `Ok(false)` if the user
|
||||
/// chooses to retry or abort, and `Err` if the prompt system fails entirely.
|
||||
pub fn prompt_drain_timeout_action(
|
||||
node_name: &str,
|
||||
pending_count: usize,
|
||||
timeout_duration: Duration,
|
||||
) -> Result<DrainTimeoutAction, kube::Error> {
|
||||
let prompt_msg = format!(
|
||||
"Drain operation timed out on node '{}' with {} pod(s) remaining. What would you like to do?",
|
||||
node_name, pending_count
|
||||
);
|
||||
|
||||
loop {
|
||||
let choices = vec![
|
||||
"Accept drain failure (requires confirmation)".to_string(),
|
||||
format!("Retry drain for another {:?}", timeout_duration),
|
||||
"Abort operation".to_string(),
|
||||
];
|
||||
|
||||
let selection = inquire::Select::new(&prompt_msg, choices)
|
||||
.with_help_message("Use arrow keys to navigate, Enter to select")
|
||||
.prompt()
|
||||
.map_err(|e| {
|
||||
kube::Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Prompt failed: {}",
|
||||
e
|
||||
)))
|
||||
})?;
|
||||
|
||||
if selection.starts_with("Accept") {
|
||||
// Require typed confirmation - retry until correct or user cancels
|
||||
let required_confirmation = format!("yes-accept-drain:{}={}", node_name, pending_count);
|
||||
|
||||
let confirmation_prompt = format!(
|
||||
"To accept this partial drain, type exactly: {}",
|
||||
required_confirmation
|
||||
);
|
||||
|
||||
match inquire::Text::new(&confirmation_prompt)
|
||||
.with_help_message(&format!(
|
||||
"This action acknowledges {} pods will remain on the node",
|
||||
pending_count
|
||||
))
|
||||
.prompt()
|
||||
{
|
||||
Ok(input) if input == required_confirmation => {
|
||||
warn!(
|
||||
"User accepted partial drain of node '{}' with {} pods remaining (confirmation: {})",
|
||||
node_name, pending_count, required_confirmation
|
||||
);
|
||||
return Ok(DrainTimeoutAction::Accept);
|
||||
}
|
||||
Ok(input) => {
|
||||
warn!(
|
||||
"Confirmation failed. Expected '{}', got '{}'. Please try again.",
|
||||
required_confirmation, input
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
// User cancelled (Ctrl+C) or prompt system failed
|
||||
error!("Confirmation prompt cancelled or failed: {}", e);
|
||||
return Ok(DrainTimeoutAction::Abort);
|
||||
}
|
||||
}
|
||||
} else if selection.starts_with("Retry") {
|
||||
info!(
|
||||
"User chose to retry drain operation for another {:?}",
|
||||
timeout_duration
|
||||
);
|
||||
return Ok(DrainTimeoutAction::Retry);
|
||||
} else {
|
||||
error!("Drain operation aborted by user");
|
||||
return Ok(DrainTimeoutAction::Abort);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_host_root_volume() {
|
||||
let (volume, mount) = host_root_volume();
|
||||
|
||||
assert_eq!(volume.name, "host");
|
||||
assert_eq!(volume.host_path.as_ref().unwrap().path, "/");
|
||||
|
||||
assert_eq!(mount.name, "host");
|
||||
assert_eq!(mount.mount_path, "/host");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_minimal() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "minimal-pod".to_string(),
|
||||
namespace: "kube-system".to_string(),
|
||||
node_name: "node-123".to_string(),
|
||||
container_name: "debug-container".to_string(),
|
||||
command: vec!["sleep".to_string(), "3600".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
assert_eq!(pod.metadata.name, Some("minimal-pod".to_string()));
|
||||
assert_eq!(pod.metadata.namespace, Some("kube-system".to_string()));
|
||||
|
||||
let spec = pod.spec.as_ref().expect("Pod spec should be present");
|
||||
assert_eq!(spec.node_name, Some("node-123".to_string()));
|
||||
assert_eq!(spec.restart_policy, Some("Never".to_string()));
|
||||
assert_eq!(spec.host_pid, Some(false));
|
||||
assert_eq!(spec.host_network, Some(false));
|
||||
|
||||
assert_eq!(spec.containers.len(), 1);
|
||||
let container = &spec.containers[0];
|
||||
assert_eq!(container.name, "debug-container");
|
||||
assert_eq!(container.image, Some(PRIVILEGED_POD_IMAGE.to_string()));
|
||||
assert_eq!(
|
||||
container.command,
|
||||
Some(vec!["sleep".to_string(), "3600".to_string()])
|
||||
);
|
||||
|
||||
// Security context check
|
||||
let sec_ctx = container
|
||||
.security_context
|
||||
.as_ref()
|
||||
.expect("Security context missing");
|
||||
assert_eq!(sec_ctx.privileged, Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_with_volumes_and_host_access() {
|
||||
let (host_vol, host_mount) = host_root_volume();
|
||||
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "full-pod".to_string(),
|
||||
namespace: "default".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "runner".to_string(),
|
||||
command: vec!["/bin/sh".to_string()],
|
||||
volumes: vec![host_vol.clone()],
|
||||
volume_mounts: vec![host_mount.clone()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
let spec = pod.spec.as_ref().expect("Pod spec should be present");
|
||||
assert_eq!(spec.host_pid, Some(true));
|
||||
assert_eq!(spec.host_network, Some(true));
|
||||
|
||||
// Check volumes in Spec
|
||||
let volumes = spec.volumes.as_ref().expect("Volumes should be present");
|
||||
assert_eq!(volumes.len(), 1);
|
||||
assert_eq!(volumes[0].name, "host");
|
||||
|
||||
// Check mounts in Container
|
||||
let container = &spec.containers[0];
|
||||
let mounts = container
|
||||
.volume_mounts
|
||||
.as_ref()
|
||||
.expect("Mounts should be present");
|
||||
assert_eq!(mounts.len(), 1);
|
||||
assert_eq!(mounts[0].name, "host");
|
||||
assert_eq!(mounts[0].mount_path, "/host");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_structure_correctness() {
|
||||
// This test validates that the construction logic puts things in the right places
|
||||
// effectively validating the "template".
|
||||
|
||||
let custom_vol = Volume {
|
||||
name: "custom-vol".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
let custom_mount = VolumeMount {
|
||||
name: "custom-vol".to_string(),
|
||||
mount_path: "/custom".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "structure-test".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "test-node".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["cmd".to_string()],
|
||||
volumes: vec![custom_vol],
|
||||
volume_mounts: vec![custom_mount],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
// Validate structure depth
|
||||
let spec = pod.spec.as_ref().unwrap();
|
||||
|
||||
// 1. Spec level fields
|
||||
assert!(spec.node_name.is_some());
|
||||
assert!(spec.volumes.is_some());
|
||||
|
||||
// 2. Container level fields
|
||||
let container = &spec.containers[0];
|
||||
assert!(container.security_context.is_some());
|
||||
assert!(container.volume_mounts.is_some());
|
||||
|
||||
// 3. Nested fields
|
||||
assert!(
|
||||
container
|
||||
.security_context
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.privileged
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(spec.volumes.as_ref().unwrap()[0].name, "custom-vol");
|
||||
assert_eq!(
|
||||
container.volume_mounts.as_ref().unwrap()[0].mount_path,
|
||||
"/custom"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_default_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
// For Default distribution, only the Pod should be in the bundle
|
||||
assert_eq!(bundle.resources.len(), 1);
|
||||
|
||||
let pod_obj = &bundle.resources[0];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle"));
|
||||
assert_eq!(pod_obj.metadata.namespace.as_deref(), Some("test-ns"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_openshift_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle-ocp".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::OpenshiftFamily,
|
||||
);
|
||||
|
||||
// For OpenShift, both ClusterRoleBinding and Pod should be in the bundle
|
||||
assert_eq!(bundle.resources.len(), 2);
|
||||
|
||||
// First resource should be the ClusterRoleBinding
|
||||
let crb_obj = &bundle.resources[0];
|
||||
assert_eq!(
|
||||
crb_obj.metadata.name.as_deref(),
|
||||
Some("test-bundle-ocp-scc-binding")
|
||||
);
|
||||
|
||||
// Verify it's targeting the privileged SCC
|
||||
if let Some(role_ref) = crb_obj.data.get("roleRef") {
|
||||
assert_eq!(
|
||||
role_ref.get("name").and_then(|v| v.as_str()),
|
||||
Some("system:openshift:scc:privileged")
|
||||
);
|
||||
}
|
||||
|
||||
// Second resource should be the Pod
|
||||
let pod_obj = &bundle.resources[1];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle-ocp"));
|
||||
assert_eq!(pod_obj.metadata.namespace.as_deref(), Some("test-ns"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_k3s_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle-k3s".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::K3sFamily,
|
||||
);
|
||||
|
||||
// For K3s, only the Pod should be in the bundle (no special SCC)
|
||||
assert_eq!(bundle.resources.len(), 1);
|
||||
|
||||
let pod_obj = &bundle.resources[0];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle-k3s"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_yaml_rendering_expected() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "pod_name".to_string(),
|
||||
namespace: "pod_namespace".to_string(),
|
||||
node_name: "node name".to_string(),
|
||||
container_name: "container name".to_string(),
|
||||
command: vec!["command".to_string(), "argument".to_string()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&serde_yaml::to_string(&pod).unwrap(),
|
||||
"apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: pod_name
|
||||
namespace: pod_namespace
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- command
|
||||
- argument
|
||||
image: hub.nationtech.io/redhat/ubi10:latest
|
||||
name: container name
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts: []
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeName: node name
|
||||
restartPolicy: Never
|
||||
volumes: []
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_yaml_rendering_openshift() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "pod_name".to_string(),
|
||||
namespace: "pod_namespace".to_string(),
|
||||
node_name: "node name".to_string(),
|
||||
container_name: "container name".to_string(),
|
||||
command: vec!["command".to_string(), "argument".to_string()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::OpenshiftFamily,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&serde_yaml::to_string(&pod).unwrap(),
|
||||
"apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
annotations:
|
||||
openshift.io/required-scc: privileged
|
||||
openshift.io/scc: privileged
|
||||
name: pod_name
|
||||
namespace: pod_namespace
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- command
|
||||
- argument
|
||||
image: hub.nationtech.io/redhat/ubi10:latest
|
||||
name: container name
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts: []
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeName: node name
|
||||
restartPolicy: Never
|
||||
volumes: []
|
||||
"
|
||||
);
|
||||
}
|
||||
}
|
||||
2586
harmony/src/domain/topology/k8s/mod.rs
Normal file
2586
harmony/src/domain/topology/k8s/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,17 +3,11 @@ use std::{collections::BTreeMap, process::Command, sync::Arc, time::Duration};
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine, engine::general_purpose};
|
||||
use harmony_types::rfc1123::Rfc1123Name;
|
||||
use k8s_openapi::{
|
||||
ByteString,
|
||||
api::{
|
||||
core::v1::{Pod, Secret},
|
||||
rbac::v1::{ClusterRoleBinding, RoleRef, Subject},
|
||||
},
|
||||
};
|
||||
use kube::{
|
||||
api::{DynamicObject, GroupVersionKind, ObjectMeta},
|
||||
runtime::conditions,
|
||||
use k8s_openapi::api::{
|
||||
core::v1::{Pod, Secret},
|
||||
rbac::v1::{ClusterRoleBinding, RoleRef, Subject},
|
||||
};
|
||||
use kube::api::{DynamicObject, GroupVersionKind, ObjectMeta};
|
||||
use log::{debug, info, trace, warn};
|
||||
use serde::Serialize;
|
||||
use tokio::sync::OnceCell;
|
||||
@@ -34,10 +28,7 @@ use crate::{
|
||||
score_cert_management::CertificateManagementScore,
|
||||
},
|
||||
k3d::K3DInstallationScore,
|
||||
k8s::{
|
||||
ingress::{K8sIngressScore, PathType},
|
||||
resource::K8sResourceScore,
|
||||
},
|
||||
k8s::ingress::{K8sIngressScore, PathType},
|
||||
monitoring::{
|
||||
grafana::{grafana::Grafana, helm::helm_grafana::grafana_helm_chart_score},
|
||||
kube_prometheus::crd::{
|
||||
@@ -54,7 +45,6 @@ use crate::{
|
||||
service_monitor::ServiceMonitor,
|
||||
},
|
||||
},
|
||||
nats::capability::NatsCluster,
|
||||
okd::{crd::ingresses_config::Ingress as IngressResource, route::OKDTlsPassthroughScore},
|
||||
prometheus::{
|
||||
k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore,
|
||||
@@ -103,7 +93,6 @@ enum K8sSource {
|
||||
pub struct K8sAnywhereTopology {
|
||||
k8s_state: Arc<OnceCell<Option<K8sState>>>,
|
||||
tenant_manager: Arc<OnceCell<K8sTenantManager>>,
|
||||
k8s_distribution: Arc<OnceCell<KubernetesDistribution>>,
|
||||
config: Arc<K8sAnywhereConfig>,
|
||||
}
|
||||
|
||||
@@ -554,7 +543,6 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
k8s_distribution: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(K8sAnywhereConfig::from_env()),
|
||||
}
|
||||
}
|
||||
@@ -563,7 +551,6 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
k8s_distribution: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(config),
|
||||
}
|
||||
}
|
||||
@@ -600,41 +587,6 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_k8s_distribution(&self) -> Result<&KubernetesDistribution, PreparationError> {
|
||||
self.k8s_distribution
|
||||
.get_or_try_init(async || {
|
||||
debug!("Trying to detect k8s distribution");
|
||||
let client = self.k8s_client().await.unwrap();
|
||||
|
||||
let discovery = client.discovery().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not discover API groups: {}", e))
|
||||
})?;
|
||||
|
||||
let version = client.get_apiserver_version().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not get server version: {}", e))
|
||||
})?;
|
||||
|
||||
// OpenShift / OKD
|
||||
if discovery
|
||||
.groups()
|
||||
.any(|g| g.name() == "project.openshift.io")
|
||||
{
|
||||
info!("Found KubernetesDistribution OpenshiftFamily");
|
||||
return Ok(KubernetesDistribution::OpenshiftFamily);
|
||||
}
|
||||
|
||||
// K3d / K3s
|
||||
if version.git_version.contains("k3s") {
|
||||
info!("Found KubernetesDistribution K3sFamily");
|
||||
return Ok(KubernetesDistribution::K3sFamily);
|
||||
}
|
||||
|
||||
info!("Could not identify KubernetesDistribution, using Default");
|
||||
return Ok(KubernetesDistribution::Default);
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
fn extract_and_normalize_token(&self, secret: &DynamicObject) -> Option<String> {
|
||||
let token_b64 = secret
|
||||
.data
|
||||
@@ -652,6 +604,16 @@ impl K8sAnywhereTopology {
|
||||
Some(cleaned)
|
||||
}
|
||||
|
||||
pub async fn get_k8s_distribution(&self) -> Result<KubernetesDistribution, PreparationError> {
|
||||
self.k8s_client()
|
||||
.await?
|
||||
.get_k8s_distribution()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
PreparationError::new(format!("Failed to get k8s distribution from client : {e}"))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build_cluster_rolebinding(
|
||||
&self,
|
||||
service_account_name: &str,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::{net::SocketAddr, str::FromStr};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
|
||||
use super::LogicalHost;
|
||||
|
||||
@@ -188,6 +188,10 @@ impl FromStr for DnsRecordType {
|
||||
pub trait NetworkManager: Debug + Send + Sync {
|
||||
async fn ensure_network_manager_installed(&self) -> Result<(), NetworkError>;
|
||||
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), NetworkError>;
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, new)]
|
||||
|
||||
@@ -5,9 +5,20 @@ use harmony_types::{
|
||||
net::{IpAddress, MacAddress},
|
||||
switch::{PortDeclaration, PortLocation},
|
||||
};
|
||||
use log::info;
|
||||
use option_ext::OptionExt;
|
||||
|
||||
use crate::topology::{PortConfig, SwitchClient, SwitchError};
|
||||
use crate::{
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{PortConfig, SwitchClient, SwitchError},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BrocadeSwitchConfig {
|
||||
pub ips: Vec<IpAddress>,
|
||||
pub auth: BrocadeSwitchAuth,
|
||||
pub options: BrocadeOptions,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BrocadeSwitchClient {
|
||||
@@ -15,13 +26,11 @@ pub struct BrocadeSwitchClient {
|
||||
}
|
||||
|
||||
impl BrocadeSwitchClient {
|
||||
pub async fn init(
|
||||
ip_addresses: &[IpAddress],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
) -> Result<Self, brocade::Error> {
|
||||
let brocade = brocade::init(ip_addresses, username, password, options).await?;
|
||||
pub async fn init(config: BrocadeSwitchConfig) -> Result<Self, brocade::Error> {
|
||||
let auth = &config.auth;
|
||||
let options = &config.options;
|
||||
|
||||
let brocade = brocade::init(&config.ips, &auth.username, &auth.password, options).await?;
|
||||
Ok(Self { brocade })
|
||||
}
|
||||
}
|
||||
@@ -52,13 +61,18 @@ impl SwitchClient for BrocadeSwitchClient {
|
||||
|| link.remote_port.contains(&interface.port_location)
|
||||
})
|
||||
})
|
||||
.map(|interface| (interface.name.clone(), PortOperatingMode::Access))
|
||||
.map(|interface| (interface.name.clone(), PortOperatingMode::Trunk))
|
||||
.collect();
|
||||
|
||||
if interfaces.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!("About to configure interfaces {interfaces:?}");
|
||||
// inquire::Confirm::new("Do you wish to configures interfaces now?")
|
||||
// .prompt()
|
||||
// .map_err(|e| SwitchError::new(e.to_string()))?;
|
||||
|
||||
self.brocade
|
||||
.configure_interfaces(&interfaces)
|
||||
.await
|
||||
@@ -208,8 +222,8 @@ mod tests {
|
||||
//TODO not sure about this
|
||||
let configured_interfaces = brocade.configured_interfaces.lock().unwrap();
|
||||
assert_that!(*configured_interfaces).contains_exactly(vec![
|
||||
(first_interface.name.clone(), PortOperatingMode::Access),
|
||||
(second_interface.name.clone(), PortOperatingMode::Access),
|
||||
(first_interface.name.clone(), PortOperatingMode::Trunk),
|
||||
(second_interface.name.clone(), PortOperatingMode::Trunk),
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use askama::Template;
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::core::v1::Node;
|
||||
@@ -10,13 +11,71 @@ use kube::{
|
||||
ResourceExt,
|
||||
api::{ObjectList, ObjectMeta},
|
||||
};
|
||||
use log::{debug, info};
|
||||
use log::{debug, info, warn};
|
||||
|
||||
use crate::{
|
||||
modules::okd::crd::nmstate,
|
||||
topology::{HostNetworkConfig, NetworkError, NetworkManager, k8s::K8sClient},
|
||||
topology::{
|
||||
HostNetworkConfig, NetworkError, NetworkManager,
|
||||
k8s::{DrainOptions, K8sClient, NodeFile},
|
||||
},
|
||||
};
|
||||
|
||||
/// NetworkManager bond configuration template
|
||||
#[derive(Template)]
|
||||
#[template(
|
||||
source = r#"[connection]
|
||||
id={{ bond_name }}
|
||||
uuid={{ bond_uuid }}
|
||||
type=bond
|
||||
autoconnect-slaves=1
|
||||
interface-name={{ bond_name }}
|
||||
|
||||
[bond]
|
||||
lacp_rate=fast
|
||||
mode=802.3ad
|
||||
xmit_hash_policy=layer2
|
||||
|
||||
[ipv4]
|
||||
method=auto
|
||||
|
||||
[ipv6]
|
||||
addr-gen-mode=default
|
||||
method=auto
|
||||
|
||||
[proxy]
|
||||
"#,
|
||||
ext = "txt"
|
||||
)]
|
||||
struct BondConfigTemplate {
|
||||
bond_name: String,
|
||||
bond_uuid: String,
|
||||
}
|
||||
|
||||
/// NetworkManager bond slave configuration template
|
||||
#[derive(Template)]
|
||||
#[template(
|
||||
source = r#"[connection]
|
||||
id={{ slave_id }}
|
||||
uuid={{ slave_uuid }}
|
||||
type=ethernet
|
||||
interface-name={{ interface_name }}
|
||||
master={{ bond_name }}
|
||||
slave-type=bond
|
||||
|
||||
[ethernet]
|
||||
|
||||
[bond-port]
|
||||
"#,
|
||||
ext = "txt"
|
||||
)]
|
||||
struct BondSlaveConfigTemplate {
|
||||
slave_id: String,
|
||||
slave_uuid: String,
|
||||
interface_name: String,
|
||||
bond_name: String,
|
||||
}
|
||||
|
||||
/// TODO document properly the non-intuitive behavior or "roll forward only" of nmstate in general
|
||||
/// It is documented in nmstate official doc, but worth mentionning here :
|
||||
///
|
||||
@@ -87,6 +146,117 @@ impl NetworkManager for OpenShiftNmStateNetworkManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Configures bonding on the primary network interface of a node.
|
||||
///
|
||||
/// Changing the *primary* network interface (making it a bond
|
||||
/// slave) will disrupt node connectivity mid-change, so the
|
||||
/// procedure is:
|
||||
///
|
||||
/// 1. Generate NetworkManager .nmconnection files
|
||||
/// 2. Drain the node (includes cordon)
|
||||
/// 3. Write configuration files to `/etc/NetworkManager/system-connections/`
|
||||
/// 4. Attempt to reload NetworkManager (optional, best-effort)
|
||||
/// 5. Reboot the node with full verification (drain, boot_id check, uncordon)
|
||||
///
|
||||
/// The reboot procedure includes:
|
||||
/// - Recording boot_id before reboot
|
||||
/// - Fire-and-forget reboot command
|
||||
/// - Waiting for NotReady status
|
||||
/// - Waiting for Ready status
|
||||
/// - Verifying boot_id changed
|
||||
/// - Uncordoning the node
|
||||
///
|
||||
/// See ADR-019 for context and rationale.
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
use std::time::Duration;
|
||||
|
||||
let node_name = self.get_node_name_for_id(&config.host_id).await?;
|
||||
let hostname = self.get_hostname(&config.host_id).await?;
|
||||
|
||||
info!(
|
||||
"Configuring bond on primary interface for host '{}' (node '{}')",
|
||||
config.host_id, node_name
|
||||
);
|
||||
|
||||
// 1. Generate .nmconnection files
|
||||
let files = self.generate_nmconnection_files(&hostname, config)?;
|
||||
debug!(
|
||||
"Generated {} NetworkManager configuration files",
|
||||
files.len()
|
||||
);
|
||||
|
||||
// 2. Write configuration files to the node (before draining)
|
||||
// We do this while the node is still running for faster operation
|
||||
info!(
|
||||
"Writing NetworkManager configuration files to node '{}'...",
|
||||
node_name
|
||||
);
|
||||
self.k8s_client
|
||||
.write_files_to_node(&node_name, &files)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to write configuration files to node '{}': {}",
|
||||
node_name, e
|
||||
))
|
||||
})?;
|
||||
|
||||
// 3. Reload NetworkManager configuration (best-effort)
|
||||
// This won't activate the bond yet since the primary interface would lose connectivity,
|
||||
// but it validates the configuration files are correct
|
||||
info!(
|
||||
"Reloading NetworkManager configuration on node '{}'...",
|
||||
node_name
|
||||
);
|
||||
match self
|
||||
.k8s_client
|
||||
.run_privileged_command_on_node(&node_name, "chroot /host nmcli connection reload")
|
||||
.await
|
||||
{
|
||||
Ok(output) => {
|
||||
debug!("NetworkManager reload output: {}", output.trim());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to reload NetworkManager configuration: {}. Proceeding with reboot.",
|
||||
e
|
||||
);
|
||||
// Don't fail here - reboot will pick up the config anyway
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Reboot the node with full verification
|
||||
// The reboot_node function handles: drain, boot_id capture, reboot, NotReady wait,
|
||||
// Ready wait, boot_id verification, and uncordon
|
||||
// 60 minutes timeout for bare-metal environments (drain can take 20-30 mins)
|
||||
let reboot_timeout = Duration::from_secs(3600);
|
||||
info!(
|
||||
"Rebooting node '{}' to apply network configuration (timeout: {:?})...",
|
||||
node_name, reboot_timeout
|
||||
);
|
||||
|
||||
self.k8s_client
|
||||
.reboot_node(
|
||||
&node_name,
|
||||
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
|
||||
reboot_timeout,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
NetworkError::new(format!("Failed to reboot node '{}': {}", node_name, e))
|
||||
})?;
|
||||
|
||||
info!(
|
||||
"Successfully configured bond on primary interface for host '{}' (node '{}')",
|
||||
config.host_id, node_name
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), NetworkError> {
|
||||
let hostname = self.get_hostname(&config.host_id).await.map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
@@ -208,14 +378,14 @@ impl OpenShiftNmStateNetworkManager {
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_hostname(&self, host_id: &Id) -> Result<String, String> {
|
||||
async fn get_node_for_id(&self, host_id: &Id) -> Result<Node, String> {
|
||||
let nodes: ObjectList<Node> = self
|
||||
.k8s_client
|
||||
.list_resources(None, None)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to list nodes: {e}"))?;
|
||||
|
||||
let Some(node) = nodes.iter().find(|n| {
|
||||
let Some(node) = nodes.into_iter().find(|n| {
|
||||
n.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.node_info.as_ref())
|
||||
@@ -225,6 +395,20 @@ impl OpenShiftNmStateNetworkManager {
|
||||
return Err(format!("No node found for host '{host_id}'"));
|
||||
};
|
||||
|
||||
Ok(node)
|
||||
}
|
||||
|
||||
async fn get_node_name_for_id(&self, host_id: &Id) -> Result<String, String> {
|
||||
let node = self.get_node_for_id(host_id).await?;
|
||||
|
||||
node.metadata.name.ok_or(format!(
|
||||
"A node should always have a name, node for host_id {host_id} has no name"
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_hostname(&self, host_id: &Id) -> Result<String, String> {
|
||||
let node = self.get_node_for_id(host_id).await?;
|
||||
|
||||
node.labels()
|
||||
.get("kubernetes.io/hostname")
|
||||
.ok_or(format!(
|
||||
@@ -261,4 +445,82 @@ impl OpenShiftNmStateNetworkManager {
|
||||
let next_id = (0..).find(|id| !used_ids.contains(id)).unwrap();
|
||||
Ok(format!("bond{next_id}"))
|
||||
}
|
||||
|
||||
/// Generates NetworkManager .nmconnection files for bonding configuration.
|
||||
///
|
||||
/// Creates:
|
||||
/// - One bond master configuration file (bond0.nmconnection)
|
||||
/// - One slave configuration file per interface (bond0-<iface>.nmconnection)
|
||||
///
|
||||
/// All files are placed in `/etc/NetworkManager/system-connections/` with
|
||||
/// mode 0o600 (required by NetworkManager).
|
||||
fn generate_nmconnection_files(
|
||||
&self,
|
||||
hostname: &str,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<Vec<NodeFile>, NetworkError> {
|
||||
let mut files = Vec::new();
|
||||
let bond_name = "bond0";
|
||||
let bond_uuid = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
// Generate bond master configuration
|
||||
let bond_template = BondConfigTemplate {
|
||||
bond_name: bond_name.to_string(),
|
||||
bond_uuid: bond_uuid.clone(),
|
||||
};
|
||||
|
||||
let bond_content = bond_template.render().map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to render bond configuration template: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
files.push(NodeFile {
|
||||
path: format!(
|
||||
"/etc/NetworkManager/system-connections/{}.nmconnection",
|
||||
bond_name
|
||||
),
|
||||
content: bond_content,
|
||||
mode: 0o600,
|
||||
});
|
||||
|
||||
// Generate slave configurations for each interface
|
||||
for switch_port in &config.switch_ports {
|
||||
let interface_name = &switch_port.interface.name;
|
||||
let slave_id = format!("{}-{}", bond_name, interface_name);
|
||||
let slave_uuid = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
let slave_template = BondSlaveConfigTemplate {
|
||||
slave_id: slave_id.clone(),
|
||||
slave_uuid,
|
||||
interface_name: interface_name.clone(),
|
||||
bond_name: bond_name.to_string(),
|
||||
};
|
||||
|
||||
let slave_content = slave_template.render().map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to render slave configuration template for interface '{}': {}",
|
||||
interface_name, e
|
||||
))
|
||||
})?;
|
||||
|
||||
files.push(NodeFile {
|
||||
path: format!(
|
||||
"/etc/NetworkManager/system-connections/{}.nmconnection",
|
||||
slave_id
|
||||
),
|
||||
content: slave_content,
|
||||
mode: 0o600,
|
||||
});
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Generated {} NetworkManager configuration files for host '{}'",
|
||||
files.len(),
|
||||
hostname
|
||||
);
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
}
|
||||
|
||||
138
harmony/src/modules/brocade/brocade.rs
Normal file
138
harmony/src/modules/brocade/brocade.rs
Normal file
@@ -0,0 +1,138 @@
|
||||
use async_trait::async_trait;
|
||||
use brocade::{BrocadeOptions, PortOperatingMode};
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
infra::brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HostNetworkConfig, PortConfig, PreparationError, PreparationOutcome, Switch, SwitchClient,
|
||||
SwitchError, Topology,
|
||||
},
|
||||
};
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{id::Id, net::MacAddress, switch::PortLocation};
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct BrocadeSwitchScore {
|
||||
pub port_channels_to_clear: Vec<Id>,
|
||||
pub ports_to_configure: Vec<PortConfig>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Switch> Score<T> for BrocadeSwitchScore {
|
||||
fn name(&self) -> String {
|
||||
"BrocadeSwitchScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(BrocadeSwitchInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BrocadeSwitchInterpret {
|
||||
score: BrocadeSwitchScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + Switch> Interpret<T> for BrocadeSwitchInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Applying switch configuration {:?}", self.score);
|
||||
debug!(
|
||||
"Clearing port channel {:?}",
|
||||
self.score.port_channels_to_clear
|
||||
);
|
||||
topology
|
||||
.clear_port_channel(&self.score.port_channels_to_clear)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
debug!("Configuring interfaces {:?}", self.score.ports_to_configure);
|
||||
topology
|
||||
.configure_interface(&self.score.ports_to_configure)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success("switch configured".to_string()))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("BrocadeSwitchInterpret")
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
pub struct BrocadeSwitchConfig {
|
||||
pub ips: Vec<harmony_types::net::IpAddress>,
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
pub options: BrocadeOptions,
|
||||
}
|
||||
*/
|
||||
|
||||
pub struct SwitchTopology {
|
||||
client: Box<dyn SwitchClient>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Topology for SwitchTopology {
|
||||
fn name(&self) -> &str {
|
||||
"SwitchTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
impl SwitchTopology {
|
||||
pub async fn new(config: BrocadeSwitchConfig) -> Self {
|
||||
let client = BrocadeSwitchClient::init(config)
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
let client = Box::new(client);
|
||||
Self { client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Switch for SwitchTopology {
|
||||
async fn setup_switch(&self) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_port_for_mac_address(
|
||||
&self,
|
||||
_mac_address: &MacAddress,
|
||||
) -> Result<Option<PortLocation>, SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn configure_port_channel(&self, _config: &HostNetworkConfig) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
self.client.clear_port_channel(ids).await
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
self.client.configure_interface(ports).await
|
||||
}
|
||||
}
|
||||
@@ -39,16 +39,16 @@ pub struct BrocadeEnableSnmpInterpret {
|
||||
}
|
||||
|
||||
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
|
||||
struct BrocadeSwitchAuth {
|
||||
username: String,
|
||||
password: String,
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
|
||||
struct BrocadeSnmpAuth {
|
||||
username: String,
|
||||
auth_password: String,
|
||||
des_password: String,
|
||||
pub struct BrocadeSnmpAuth {
|
||||
pub username: String,
|
||||
pub auth_password: String,
|
||||
pub des_password: String,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -72,7 +72,7 @@ impl<T: Topology> Interpret<T> for BrocadeEnableSnmpInterpret {
|
||||
&switch_addresses,
|
||||
&config.username,
|
||||
&config.password,
|
||||
BrocadeOptions {
|
||||
&BrocadeOptions {
|
||||
dry_run: self.score.dry_run,
|
||||
..Default::default()
|
||||
},
|
||||
5
harmony/src/modules/brocade/mod.rs
Normal file
5
harmony/src/modules/brocade/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
pub mod brocade;
|
||||
pub use brocade::*;
|
||||
|
||||
pub mod brocade_snmp;
|
||||
pub use brocade_snmp::*;
|
||||
@@ -45,7 +45,7 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!(
|
||||
"Launching discovery agent, make sure that your nodes are successfully PXE booted and running inventory agent. They should answer on `http://<node_ip>:8080/inventory`"
|
||||
"Launching discovery agent, make sure that your nodes are successfully PXE booted and running inventory agent. They should answer on `http://<node_ip>:25000/inventory`"
|
||||
);
|
||||
LaunchDiscoverInventoryAgentScore {
|
||||
discovery_timeout: None,
|
||||
@@ -58,6 +58,8 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
|
||||
let host_repo = InventoryRepositoryFactory::build().await?;
|
||||
|
||||
let mut assigned_hosts = 0;
|
||||
// let hosts_for_role = host_repo.get_hosts_for_role(&self.score.role);
|
||||
|
||||
loop {
|
||||
let all_hosts = host_repo.get_all_hosts().await?;
|
||||
|
||||
@@ -82,17 +84,40 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
|
||||
self.score.role,
|
||||
choice.summary()
|
||||
);
|
||||
let disk_names: Vec<String> =
|
||||
choice.storage.iter().map(|s| s.name.clone()).collect();
|
||||
let mut disk_choices: Vec<(String, String)> = vec![];
|
||||
|
||||
for s in choice.storage.iter() {
|
||||
let size_gb: f64 = s.size_bytes as f64 / 1_000_000_000.0;
|
||||
let (size, unit) = if size_gb >= 1000.0 {
|
||||
(size_gb / 1000.0, "TB")
|
||||
} else {
|
||||
(size_gb, "GB")
|
||||
};
|
||||
let drive_type = if s.rotational { "rotational" } else { "SSD" };
|
||||
let smart_str = s.smart_status.as_deref().unwrap_or("N/A");
|
||||
let display = format!(
|
||||
"{} : [{}] - {:.0} {} ({}) - {} - Smart: {}",
|
||||
s.name, s.model, size, unit, drive_type, s.interface_type, smart_str
|
||||
);
|
||||
disk_choices.push((display, s.name.clone()));
|
||||
}
|
||||
|
||||
let display_refs: Vec<&str> =
|
||||
disk_choices.iter().map(|(d, _)| d.as_str()).collect();
|
||||
|
||||
let disk_choice = inquire::Select::new(
|
||||
&format!("Select the disk to use on host {}:", choice.summary()),
|
||||
disk_names,
|
||||
display_refs,
|
||||
)
|
||||
.prompt();
|
||||
|
||||
match disk_choice {
|
||||
Ok(disk_name) => {
|
||||
Ok(selected_display) => {
|
||||
let disk_name = disk_choices
|
||||
.iter()
|
||||
.find(|(d, _)| d.as_str() == selected_display)
|
||||
.map(|(_, name)| name.clone())
|
||||
.unwrap();
|
||||
info!("Selected disk {} for node {}", disk_name, choice.summary());
|
||||
host_repo
|
||||
.save_role_mapping(&self.score.role, &choice, &disk_name)
|
||||
|
||||
@@ -12,6 +12,74 @@ use crate::{
|
||||
topology::{HostNetworkConfig, NetworkInterface, NetworkManager, Switch, SwitchPort, Topology},
|
||||
};
|
||||
|
||||
/// Configures high-availability networking for a set of physical hosts.
|
||||
///
|
||||
/// This is an opinionated Score that creates a resilient network configuration.
|
||||
/// It assumes hosts have at least two network interfaces connected
|
||||
/// to redundant switches for high availability.
|
||||
///
|
||||
/// The Score's `Interpret` logic will:
|
||||
/// 1. Setup the switch with sane defaults (e.g. mark interfaces as switchports for discoverability).
|
||||
/// 2. Discover which switch ports each host's interfaces are connected to (via MAC address).
|
||||
/// 3. Create a network bond (e.g. LACP) on the host itself using these interfaces.
|
||||
/// 4. Configure a corresponding port-channel on the switch(es) for those ports.
|
||||
///
|
||||
/// This ensures that both the host and the switch are configured to treat the
|
||||
/// multiple links as a single, aggregated, and redundant connection.
|
||||
///
|
||||
/// Hosts with 0 or 1 detected interfaces will be skipped, as bonding is not
|
||||
/// applicable.
|
||||
///
|
||||
/// <div class="warning">
|
||||
/// The implementation is currently _not_ idempotent, even though it should be.
|
||||
/// Running it more than once on the same host might result in duplicated bond configurations.
|
||||
/// </div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
/// This Score is not named well. A better name would be
|
||||
/// `HighAvailabilityHostNetworkScore`, or something similar to better express the intent.
|
||||
/// </div>
|
||||
///
|
||||
/// # Requirements
|
||||
///
|
||||
/// This Score can only be applied to a [Topology] that implements both the
|
||||
/// [NetworkManager] (to configure the host-side bond) and [Switch]
|
||||
/// (to configure the switch-side port-channel) capabilities.
|
||||
///
|
||||
/// # Current limitations
|
||||
///
|
||||
/// ## 1. No rollback logic & limited idempotency
|
||||
///
|
||||
/// If any of the steps described above fails, the Score will not attempt to revert any changes
|
||||
/// already applied. Which could render the host or switch in an inconsistent state.
|
||||
///
|
||||
/// ## 2. Propagation delays on the switch
|
||||
///
|
||||
/// It might take some time for the sane defaults in step 1) to be applied. In some cases,
|
||||
/// it was observed that the switch takes up to 5min to actually apply the config.
|
||||
///
|
||||
/// But this Score's Interpret doesn't wait and directly proceeds to step 2) to discover
|
||||
/// the MAC addresses. Which could result interfaces being skipped because their corresponding port
|
||||
/// on the switch couldn't be found.
|
||||
///
|
||||
/// TODO: Validate that the switch is in the expected state before continuing.
|
||||
///
|
||||
/// ## 3. Bond configuration
|
||||
///
|
||||
/// To find the next available bond id, the current
|
||||
/// [NetworkManager](crate::infra::network_manager::OpenShiftNmStateNetworkManager) implementation
|
||||
/// simply checks for existing bonds named `bond[n]` and take the next available `n` number.
|
||||
///
|
||||
/// It doesn't check that there are already a bond for the interfaces that should be bonded. Which
|
||||
/// might result in a duplicate bond being created.
|
||||
///
|
||||
/// TODO: Make sure the interfaces to aggregate are not already bonded.
|
||||
///
|
||||
/// # Future improvements
|
||||
///
|
||||
/// Along with the `TODO` items above, splitting this Score into multiple smaller ones would be
|
||||
/// beneficial. It has a lot of moving parts and some of them could be used on their own to make
|
||||
/// operations on a cluster easier.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct HostNetworkConfigurationScore {
|
||||
pub hosts: Vec<PhysicalHost>,
|
||||
@@ -74,9 +142,13 @@ impl HostNetworkConfigurationInterpret {
|
||||
);
|
||||
|
||||
info!("[Host {current_host}/{total_hosts}] Configuring host network...");
|
||||
topology.configure_bond(&config).await.map_err(|e| {
|
||||
InterpretError::new(format!("Failed to configure host network: {e}"))
|
||||
})?;
|
||||
topology
|
||||
.configure_bond_on_primary_interface(&config)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
InterpretError::new(format!("Failed to configure host network: {e}"))
|
||||
})?;
|
||||
|
||||
topology
|
||||
.configure_port_channel(&config)
|
||||
.await
|
||||
@@ -663,6 +735,16 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
let mut configured_bonds = self.configured_bonds.lock().unwrap();
|
||||
configured_bonds.push((config.host_id.clone(), config.clone()));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use async_nats::ConnectOptions;
|
||||
|
||||
use crate::{
|
||||
agent::AgentRole,
|
||||
store::{ChaosKvStore, InMemoryKvStore, NatsKvStore},
|
||||
@@ -65,7 +67,15 @@ fn get_chaos_store(
|
||||
}
|
||||
|
||||
async fn get_local_nats_store() -> Arc<NatsKvStore> {
|
||||
let client = async_nats::connect("localhost").await.unwrap();
|
||||
let mut client = async_nats::ConnectOptions::new()
|
||||
// .require_tls(true)
|
||||
.user_and_password("admin".into(), "admin2".into())
|
||||
.ping_interval(std::time::Duration::from_secs(10))
|
||||
.connect("localhost")
|
||||
.await
|
||||
.expect("Connection to nats failed");
|
||||
|
||||
// let client = async_nats::connect("localhost").await.unwrap();
|
||||
let jetstream = async_nats::jetstream::new(client);
|
||||
let kv = jetstream
|
||||
.create_key_value(async_nats::jetstream::kv::Config {
|
||||
|
||||
@@ -68,7 +68,7 @@ impl<'a> DhcpConfigDnsMasq<'a> {
|
||||
///
|
||||
/// This function implements specific logic to handle existing entries:
|
||||
/// - If no host exists for the given IP or hostname, a new entry is created.
|
||||
/// - If exactly one host exists for the IP and/or hostname, the new MAC is appended to it.
|
||||
/// - If exactly one host exists for the IP and/or hostname, the new MAC is set. Old MAC addresses are dropped.
|
||||
/// - It will error if the IP and hostname exist but point to two different host entries,
|
||||
/// as this represents an unresolvable conflict.
|
||||
/// - It will also error if multiple entries are found for the IP or hostname, indicating an
|
||||
@@ -146,40 +146,24 @@ impl<'a> DhcpConfigDnsMasq<'a> {
|
||||
let host_to_modify_ip = host_to_modify.ip.content_string();
|
||||
if host_to_modify_ip != ip_str {
|
||||
warn!(
|
||||
"Hostname '{}' already exists with a different IP ({}). Setting new IP {ip_str}. Appending MAC {}.",
|
||||
hostname, host_to_modify_ip, mac_list
|
||||
"Hostname '{}' already exists with a different IP ({}). Setting new IP {ip_str}.",
|
||||
hostname, host_to_modify_ip,
|
||||
);
|
||||
host_to_modify.ip.content = Some(ip_str);
|
||||
} else if host_to_modify.host != hostname {
|
||||
warn!(
|
||||
"IP {} already exists with a different hostname ('{}'). Setting hostname to {hostname}. Appending MAC {}.",
|
||||
ipaddr, host_to_modify.host, mac_list
|
||||
"IP {} already exists with a different hostname ('{}'). Setting hostname to {hostname}",
|
||||
ipaddr, host_to_modify.host
|
||||
);
|
||||
host_to_modify.host = hostname.to_string();
|
||||
}
|
||||
|
||||
for single_mac in mac.iter() {
|
||||
if !host_to_modify
|
||||
.hwaddr
|
||||
.content_string()
|
||||
.split(',')
|
||||
.any(|m| m.eq_ignore_ascii_case(single_mac))
|
||||
{
|
||||
info!(
|
||||
"Appending MAC {} to existing static host for {} ({})",
|
||||
single_mac, host_to_modify.host, host_to_modify_ip
|
||||
);
|
||||
let mut updated_macs = host_to_modify.hwaddr.content_string().to_string();
|
||||
updated_macs.push(',');
|
||||
updated_macs.push_str(single_mac);
|
||||
host_to_modify.hwaddr.content = updated_macs.into();
|
||||
} else {
|
||||
debug!(
|
||||
"MAC {} already present in static host entry for {} ({}). No changes made.",
|
||||
single_mac, host_to_modify.host, host_to_modify_ip
|
||||
);
|
||||
}
|
||||
}
|
||||
info!(
|
||||
"Replacing previous mac adresses {:?} with new {}",
|
||||
host_to_modify.hwaddr, mac_list
|
||||
);
|
||||
|
||||
host_to_modify.hwaddr.content = Some(mac_list);
|
||||
}
|
||||
_ => {
|
||||
return Err(DhcpError::Configuration(format!(
|
||||
@@ -397,7 +381,7 @@ mod test {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_mac_to_existing_host_by_ip_and_hostname() {
|
||||
fn test_replace_mac_on_existing_host_by_ip_and_hostname() {
|
||||
let initial_host = create_host(
|
||||
"uuid-1",
|
||||
"existing-host",
|
||||
@@ -416,14 +400,11 @@ mod test {
|
||||
let hosts = &dhcp_config.opnsense.dnsmasq.as_ref().unwrap().hosts;
|
||||
assert_eq!(hosts.len(), 1);
|
||||
let host = &hosts[0];
|
||||
assert_eq!(
|
||||
host.hwaddr.content_string(),
|
||||
"AA:BB:CC:DD:EE:FF,00:11:22:33:44:55"
|
||||
);
|
||||
assert_eq!(host.hwaddr.content_string(), "00:11:22:33:44:55");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_mac_to_existing_host_by_ip_only() {
|
||||
fn test_replace_mac_on_existing_host_by_ip_only() {
|
||||
let initial_host = create_host(
|
||||
"uuid-1",
|
||||
"existing-host",
|
||||
@@ -443,10 +424,7 @@ mod test {
|
||||
let hosts = &dhcp_config.opnsense.dnsmasq.as_ref().unwrap().hosts;
|
||||
assert_eq!(hosts.len(), 1);
|
||||
let host = &hosts[0];
|
||||
assert_eq!(
|
||||
host.hwaddr.content_string(),
|
||||
"AA:BB:CC:DD:EE:FF,00:11:22:33:44:55"
|
||||
);
|
||||
assert_eq!(host.hwaddr.content_string(), "00:11:22:33:44:55");
|
||||
assert_eq!(host.host, new_hostname); // hostname should be updated
|
||||
}
|
||||
|
||||
@@ -474,10 +452,7 @@ mod test {
|
||||
let hosts = &dhcp_config.opnsense.dnsmasq.as_ref().unwrap().hosts;
|
||||
assert_eq!(hosts.len(), 1);
|
||||
let host = &hosts[0];
|
||||
assert_eq!(
|
||||
host.hwaddr.content_string(),
|
||||
"AA:BB:CC:DD:EE:FF,00:11:22:33:44:55"
|
||||
);
|
||||
assert_eq!(host.hwaddr.content_string(), "00:11:22:33:44:55");
|
||||
assert_eq!(host.ip.content_string(), "192.168.1.99"); // Original IP should be preserved.
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user