Compare commits

..

1 Commits

Author SHA1 Message Date
ac7fd53d5e wip: rook-ceph install score
Some checks failed
Run Check Script / check (pull_request) Failing after 20s
2025-08-25 15:25:10 -04:00
668 changed files with 6473 additions and 65585 deletions

View File

@@ -1,6 +1,2 @@
target/
Dockerfile
.git
data
target
demos
Dockerfile

2
.gitattributes vendored
View File

@@ -2,5 +2,3 @@ bootx64.efi filter=lfs diff=lfs merge=lfs -text
grubx64.efi filter=lfs diff=lfs merge=lfs -text
initrd filter=lfs diff=lfs merge=lfs -text
linux filter=lfs diff=lfs merge=lfs -text
data/okd/bin/* filter=lfs diff=lfs merge=lfs -text
data/okd/installer_image/* filter=lfs diff=lfs merge=lfs -text

View File

@@ -15,4 +15,4 @@ jobs:
uses: actions/checkout@v4
- name: Run check script
run: bash build/check.sh
run: bash check.sh

9
.gitignore vendored
View File

@@ -3,7 +3,6 @@ private_repos/
### Harmony ###
harmony.log
data/okd/installation_files*
### Helm ###
# Chart dependencies
@@ -24,11 +23,3 @@ Cargo.lock
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
.harmony_generated
# Useful to create ignore folders for temp files and notes
ignore
# Generated book
book

15
.gitmodules vendored
View File

@@ -1,15 +0,0 @@
[submodule "examples/try_rust_webapp/tryrust.org"]
path = examples/try_rust_webapp/tryrust.org
url = https://github.com/rust-dd/tryrust.org.git
[submodule "/home/jeangab/work/nationtech/harmony2/opnsense-codegen/vendor/core"]
path = /home/jeangab/work/nationtech/harmony2/opnsense-codegen/vendor/core
url = https://github.com/opnsense/core.git
[submodule "/home/jeangab/work/nationtech/harmony2/opnsense-codegen/vendor/plugins"]
path = /home/jeangab/work/nationtech/harmony2/opnsense-codegen/vendor/plugins
url = https://github.com/opnsense/plugins.git
[submodule "opnsense-codegen/vendor/core"]
path = opnsense-codegen/vendor/core
url = https://github.com/opnsense/core.git
[submodule "opnsense-codegen/vendor/plugins"]
path = opnsense-codegen/vendor/plugins
url = https://github.com/opnsense/plugins.git

View File

@@ -1,26 +0,0 @@
{
"db_name": "SQLite",
"query": "SELECT host_id, installation_device FROM host_role_mapping WHERE role = ?",
"describe": {
"columns": [
{
"name": "host_id",
"ordinal": 0,
"type_info": "Text"
},
{
"name": "installation_device",
"ordinal": 1,
"type_info": "Text"
}
],
"parameters": {
"Right": 1
},
"nullable": [
false,
true
]
},
"hash": "24f719d57144ecf4daa55f0aa5836c165872d70164401c0388e8d625f1b72d7b"
}

View File

@@ -1,12 +0,0 @@
{
"db_name": "SQLite",
"query": "\n INSERT INTO host_role_mapping (host_id, role, installation_device)\n VALUES (?, ?, ?)\n ",
"describe": {
"columns": [],
"parameters": {
"Right": 3
},
"nullable": []
},
"hash": "6fcc29cfdbdf3b2cee94a4844e227f09b245dd8f079832a9a7b774151cb03af6"
}

View File

@@ -1,32 +0,0 @@
{
"db_name": "SQLite",
"query": "\n SELECT\n p1.id,\n p1.version_id,\n p1.data as \"data: Json<PhysicalHost>\"\n FROM\n physical_hosts p1\n INNER JOIN (\n SELECT\n id,\n MAX(version_id) AS max_version\n FROM\n physical_hosts\n GROUP BY\n id\n ) p2 ON p1.id = p2.id AND p1.version_id = p2.max_version\n ",
"describe": {
"columns": [
{
"name": "id",
"ordinal": 0,
"type_info": "Text"
},
{
"name": "version_id",
"ordinal": 1,
"type_info": "Text"
},
{
"name": "data: Json<PhysicalHost>",
"ordinal": 2,
"type_info": "Blob"
}
],
"parameters": {
"Right": 0
},
"nullable": [
false,
false,
false
]
},
"hash": "8d247918eca10a88b784ee353db090c94a222115c543231f2140cba27bd0f067"
}

View File

@@ -1,32 +0,0 @@
{
"db_name": "SQLite",
"query": "SELECT id, version_id, data as \"data: Json<PhysicalHost>\" FROM physical_hosts WHERE id = ? ORDER BY version_id DESC LIMIT 1",
"describe": {
"columns": [
{
"name": "id",
"ordinal": 0,
"type_info": "Text"
},
{
"name": "version_id",
"ordinal": 1,
"type_info": "Text"
},
{
"name": "data: Json<PhysicalHost>",
"ordinal": 2,
"type_info": "Null"
}
],
"parameters": {
"Right": 1
},
"nullable": [
false,
false,
false
]
},
"hash": "934035c7ca6e064815393e4e049a7934b0a7fac04a4fe4b2a354f0443d630990"
}

View File

@@ -1,12 +0,0 @@
{
"db_name": "SQLite",
"query": "INSERT INTO physical_hosts (id, version_id, data) VALUES (?, ?, ?)",
"describe": {
"columns": [],
"parameters": {
"Right": 3
},
"nullable": []
},
"hash": "f10f615ee42129ffa293e46f2f893d65a237d31d24b74a29c6a8d8420d255ab8"
}

4611
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,12 @@
[workspace]
resolver = "2"
members = [
"examples/*",
"private_repos/*",
"examples/*",
"harmony",
"harmony_types",
"harmony_macros",
"harmony_tui",
"harmony_execution",
"opnsense-config",
"opnsense-config-xml",
"harmony_cli",
@@ -16,16 +15,6 @@ members = [
"harmony_inventory_agent",
"harmony_secret_derive",
"harmony_secret",
"examples/kvm_okd_ha_cluster",
"examples/example_linux_vm",
"harmony_config_derive",
"harmony_config",
"brocade",
"harmony_agent",
"harmony_agent/deploy",
"harmony_node_readiness",
"harmony-k8s",
"harmony_assets", "opnsense-codegen", "opnsense-api",
]
[workspace.package]
@@ -40,17 +29,14 @@ derive-new = "0.7"
async-trait = "0.1"
tokio = { version = "1.40", features = [
"io-std",
"io-util",
"fs",
"macros",
"rt-multi-thread",
] }
tokio-retry = "0.3.0"
tokio-util = "0.7.15"
cidr = { features = ["serde"], version = "0.2" }
russh = "0.45"
russh-keys = "0.45"
rand = "0.9"
rand = "0.8"
url = "2.5"
kube = { version = "1.1.0", features = [
"config",
@@ -61,7 +47,6 @@ kube = { version = "1.1.0", features = [
"jsonpatch",
] }
k8s-openapi = { version = "0.25", features = ["v1_30"] }
# TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml
serde_yaml = "0.9"
serde-value = "0.7"
http = "1.2"
@@ -77,20 +62,6 @@ base64 = "0.22.1"
tar = "0.4.44"
lazy_static = "1.5.0"
directories = "6.0.0"
futures-util = "0.3"
thiserror = "2.0.14"
serde = { version = "1.0.209", features = ["derive", "rc"] }
serde_json = "1.0.127"
askama = "0.14"
sqlx = { version = "0.8", features = ["runtime-tokio", "sqlite"] }
reqwest = { version = "0.12", features = [
"blocking",
"stream",
"rustls-tls",
"http2",
"json",
], default-features = false }
assertor = "0.0.4"
tokio-test = "0.4"
anyhow = "1.0"
clap = { version = "4", features = ["derive"] }

290
README.md
View File

@@ -1,250 +1,150 @@
# Harmony
**Infrastructure orchestration that treats your platform like first-class code.**
Harmony is an open-source framework that brings the rigor of software engineering to infrastructure management. Write Rust code to define what you want, and Harmony handles the rest — from local development to production clusters.
# Harmony : Open-source infrastructure orchestration that treats your platform like first-class code
_By [NationTech](https://nationtech.io)_
[![Build](https://git.nationtech.io/NationTech/harmony/actions/workflows/check.yml/badge.svg)](https://git.nationtech.io/NationTech/harmony)
[![Build](https://git.nationtech.io/NationTech/harmony/actions/workflows/check.yml/badge.svg)](https://git.nationtech.io/nationtech/harmony)
[![License](https://img.shields.io/badge/license-AGPLv3-blue?style=flat-square)](LICENSE)
---
### Unify
## The Problem Harmony Solves
- **Project Scaffolding**
- **Infrastructure Provisioning**
- **Application Deployment**
- **Day-2 operations**
Modern infrastructure is messy. Your Kubernetes cluster needs monitoring. Your bare-metal servers need provisioning. Your applications need deployments. Each comes with its own tooling, its own configuration format, and its own failure modes.
All in **one strongly-typed Rust codebase**.
**What if you could describe your entire platform in one consistent language?**
### Deploy anywhere
That's Harmony. It unifies project scaffolding, infrastructure provisioning, application deployment, and day-2 operations into a single strongly-typed Rust codebase.
From a **developer laptop** to a **global production cluster**, a single **source of truth** drives the **full software lifecycle.**
---
## Three Principles That Make the Difference
## 1 · The Harmony Philosophy
| Principle | What It Means |
|-----------|---------------|
| **Infrastructure as Resilient Code** | Stop fighting with YAML and bash. Write type-safe Rust that you can test, version, and refactor like any other code. |
| **Prove It Works Before You Deploy** | Harmony verifies at _compile time_ that your application can actually run on your target infrastructure. No more "the config looks right but it doesn't work" surprises. |
| **One Unified Model** | Software and infrastructure are one system. Deploy from laptop to production cluster without switching contexts or tools. |
Infrastructure is essential, but it shouldnt be your core business. Harmony is built on three guiding principles that make modern platforms reliable, repeatable, and easy to reason about.
| Principle | What it means for you |
| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **Infrastructure as Resilient Code** | Replace sprawling YAML and bash scripts with type-safe Rust. Test, refactor, and version your platform just like application code. |
| **Prove It Works — Before You Deploy** | Harmony uses the compiler to verify that your applications needs match the target environments capabilities at **compile-time**, eliminating an entire class of runtime outages. |
| **One Unified Model** | Software and infrastructure are a single system. Harmony models them together, enabling deep automation—from bare-metal servers to Kubernetes workloads—with zero context switching. |
These principles surface as simple, ergonomic Rust APIs that let teams focus on their product while trusting the platform underneath.
---
## How It Works: The Core Concepts
## 2 · Quick Start
Harmony is built around three concepts that work together:
### Score — "What You Want"
A `Score` is a declarative description of desired state. Think of it as a "recipe" that says _what_ you want without specifying _how_ to get there.
```rust
// "I want a PostgreSQL cluster running with default settings"
let postgres = PostgreSQLScore {
config: PostgreSQLConfig {
cluster_name: "harmony-postgres-example".to_string(),
namespace: "harmony-postgres-example".to_string(),
..Default::default()
},
};
```
### Topology — "Where It Goes"
A `Topology` represents your infrastructure environment and its capabilities. It answers the question: "What can this environment actually do?"
```rust
// Deploy to a local K3D cluster, or any Kubernetes cluster via environment variables
K8sAnywhereTopology::from_env()
```
### Interpret — "How It Happens"
An `Interpret` is the execution logic that connects your `Score` to your `Topology`. It translates "what you want" into "what the infrastructure does."
**The Compile-Time Check:** Before your code ever runs, Harmony verifies that your `Score` is compatible with your `Topology`. If your application needs a feature your infrastructure doesn't provide, you get a compile error — not a runtime failure.
---
## What You Can Deploy
Harmony ships with ready-made Scores for:
**Data Services**
- PostgreSQL clusters (via CloudNativePG operator)
- Multi-site PostgreSQL with failover
**Kubernetes**
- Namespaces, Deployments, Ingress
- Helm charts
- cert-manager for TLS
- Monitoring (Prometheus, alerting, ntfy)
**Bare Metal / Infrastructure**
- OKD clusters from scratch
- OPNsense firewalls
- Network services (DNS, DHCP, TFTP)
- Brocade switch configuration
**And more:** Application deployment, tenant management, load balancing, and more.
---
## Quick Start: Deploy a PostgreSQL Cluster
This example provisions a local Kubernetes cluster (K3D) and deploys a PostgreSQL cluster on it — no external infrastructure required.
The snippet below spins up a complete **production-grade LAMP stack** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
```rust
use harmony::{
data::Version,
inventory::Inventory,
modules::postgresql::{PostgreSQLScore, capability::PostgreSQLConfig},
topology::K8sAnywhereTopology,
maestro::Maestro,
modules::{
lamp::{LAMPConfig, LAMPScore},
monitoring::monitoring_alerting::MonitoringAlertingStackScore,
},
topology::{K8sAnywhereTopology, Url},
};
#[tokio::main]
async fn main() {
let postgres = PostgreSQLScore {
config: PostgreSQLConfig {
cluster_name: "harmony-postgres-example".to_string(),
namespace: "harmony-postgres-example".to_string(),
// 1. Describe what you want
let lamp_stack = LAMPScore {
name: "harmony-lamp-demo".into(),
domain: Url::Url(url::Url::parse("https://lampdemo.example.com").unwrap()),
php_version: Version::from("8.3.0").unwrap(),
config: LAMPConfig {
project_root: "./php".into(),
database_size: "4Gi".into(),
..Default::default()
},
};
// 2. Enhance with extra scores (monitoring, CI/CD, …)
let mut monitoring = MonitoringAlertingStackScore::new();
monitoring.namespace = Some(lamp_stack.config.namespace.clone());
// 3. Run your scores on the desired topology & inventory
harmony_cli::run(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
vec![Box::new(postgres)],
None,
)
.await
.unwrap();
Inventory::autoload(), // auto-detect hardware / kube-config
K8sAnywhereTopology::from_env(), // local k3d, CI, staging, prod…
vec![
Box::new(lamp_stack),
Box::new(monitoring)
],
None
).await.unwrap();
}
```
### What this actually does
When you compile and run this program:
1. **Compiles** the Harmony Score into an executable
2. **Connects** to `K8sAnywhereTopology` — which auto-provisions a local K3D cluster if none exists
3. **Installs** the CloudNativePG operator into the cluster (one-time setup)
4. **Creates** a PostgreSQL cluster with 1 instance and 1 GiB of storage
5. **Exposes** the PostgreSQL instance as a Kubernetes Service
### Prerequisites
- [Rust](https://rust-lang.org/tools/install) (edition 2024)
- [Docker](https://docs.docker.com/get-docker/) (for the local K3D cluster)
- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) (optional, for inspecting the cluster)
### Run it
Run it:
```bash
cargo run
```
Harmony analyses the code, shows an execution plan in a TUI, and applies it once you confirm. Same code, same binary—every environment.
---
## 3 · Core Concepts
| Term | One-liner |
| ---------------- | ---------------------------------------------------------------------------------------------------- |
| **Score<T>** | Declarative description of the desired state (e.g., `LAMPScore`). |
| **Interpret<T>** | Imperative logic that realises a `Score` on a specific environment. |
| **Topology** | An environment (local k3d, AWS, bare-metal) exposing verified _Capabilities_ (Kubernetes, DNS, …). |
| **Maestro** | Orchestrator that compiles Scores + Topology, ensuring all capabilities line up **at compile-time**. |
| **Inventory** | Optional catalogue of physical assets for bare-metal and edge deployments. |
A visual overview is in the diagram below.
[Harmony Core Architecture](docs/diagrams/Harmony_Core_Architecture.drawio.svg)
---
## 4 · Install
Prerequisites:
- Rust
- Docker (if you deploy locally)
- `kubectl` / `helm` for Kubernetes-based topologies
```bash
# Clone the repository
git clone https://git.nationtech.io/nationtech/harmony
cd harmony
# Build the project
cargo build --release
# Run the example
cargo run -p example-postgresql
```
Harmony will print its progress as it sets up the cluster and deploys PostgreSQL. When complete, you can inspect the deployment:
```bash
kubectl get pods -n harmony-postgres-example
kubectl get secret -n harmony-postgres-example harmony-postgres-example-db-user -o jsonpath='{.data.password}' | base64 -d
```
To connect to the database, forward the port:
```bash
kubectl port-forward -n harmony-postgres-example svc/harmony-postgres-example-rw 5432:5432
psql -h localhost -p 5432 -U postgres
```
To clean up, delete the K3D cluster:
```bash
k3d cluster delete harmony-postgres-example
cargo build --release # builds the CLI, TUI and libraries
```
---
## Environment Variables
## 5 · Learning More
`K8sAnywhereTopology::from_env()` reads the following environment variables to determine where and how to connect:
- **Architectural Decision Records** dive into the rationale
- [ADR-001 · Why Rust](adr/001-rust.md)
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
- [ADR-006 · Secret Management](adr/006-secret-management.md)
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
| Variable | Default | Description |
|----------|---------|-------------|
| `KUBECONFIG` | `~/.kube/config` | Path to your kubeconfig file |
| `HARMONY_AUTOINSTALL` | `true` | Auto-provision a local K3D cluster if none found |
| `HARMONY_USE_LOCAL_K3D` | `true` | Always prefer local K3D over remote clusters |
| `HARMONY_PROFILE` | `dev` | Deployment profile: `dev`, `staging`, or `prod` |
| `HARMONY_K8S_CONTEXT` | _none_ | Use a specific kubeconfig context |
| `HARMONY_PUBLIC_DOMAIN` | _none_ | Public domain for ingress endpoints |
- **Extending Harmony** write new Scores / Interprets, add hardware like OPNsense firewalls, or embed Harmony in your own tooling (`/docs`).
To connect to an existing Kubernetes cluster instead of provisioning K3D:
```bash
# Point to your kubeconfig
export KUBECONFIG=/path/to/your/kubeconfig
export HARMONY_USE_LOCAL_K3D=false
export HARMONY_AUTOINSTALL=false
# Then run
cargo run -p example-postgresql
```
- **Community** discussions and roadmap live in [GitLab issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
---
## Documentation
| I want to... | Start here |
|--------------|------------|
| Understand the core concepts | [Core Concepts](./docs/concepts.md) |
| Deploy my first application | [Getting Started Guide](./docs/guides/getting-started.md) |
| Explore available components | [Scores Catalog](./docs/catalogs/scores.md) · [Topologies Catalog](./docs/catalogs/topologies.md) |
| See a complete bare-metal deployment | [OKD on Bare Metal](./docs/use-cases/okd-on-bare-metal.md) |
| Build my own Score or Topology | [Developer Guide](./docs/guides/developer-guide.md) |
---
## Why Rust?
We chose Rust for the same reason you might: **reliability through type safety**.
Infrastructure code runs in production. It needs to be correct. Rust's ownership model and type system let us build a framework where:
- Invalid configurations fail at compile time, not at 3 AM
- Refactoring infrastructure is as safe as refactoring application code
- The compiler verifies that your platform can actually fulfill your requirements
See [ADR-001 · Why Rust](./adr/001-rust.md) for our full rationale.
---
## Architecture Decisions
Harmony's design is documented through Architecture Decision Records (ADRs):
- [ADR-001 · Why Rust](./adr/001-rust.md)
- [ADR-003 · Infrastructure Abstractions](./adr/003-infrastructure-abstractions.md)
- [ADR-006 · Secret Management](./adr/006-secret-management.md)
- [ADR-011 · Multi-Tenant Cluster](./adr/011-multi-tenant-cluster.md)
---
## License
## 6 · License
Harmony is released under the **GNU AGPL v3**.
> We choose a strong copyleft license to ensure the project—and every improvement to it—remains open and benefits the entire community.
> We choose a strong copyleft license to ensure the project—and every improvement to it—remains open and benefits the entire community. Fork it, enhance it, even out-innovate us; just keep it open.
See [LICENSE](LICENSE) for the full text.
---
_Made with ❤️ & 🦀 by NationTech and the Harmony community_
_Made with ❤️ & 🦀 by the NationTech and the Harmony community_

View File

@@ -1,29 +0,0 @@
# Harmony Roadmap
Six phases to take Harmony from working prototype to production-ready open-source project.
| # | Phase | Status | Depends On | Detail |
|---|-------|--------|------------|--------|
| 1 | [Harden `harmony_config`](ROADMAP/01-config-crate.md) | Not started | — | Test every source, add SQLite backend, wire Zitadel + OpenBao, validate zero-setup UX |
| 2 | [Migrate to `harmony_config`](ROADMAP/02-refactor-harmony-config.md) | Not started | 1 | Replace all 19 `SecretManager` call sites, deprecate direct `harmony_secret` usage |
| 3 | [Complete `harmony_assets`](ROADMAP/03-assets-crate.md) | Not started | 1, 2 | Test, refactor k3d and OKD to use it, implement `Url::Url`, remove LFS |
| 4 | [Publish to GitHub](ROADMAP/04-publish-github.md) | Not started | 3 | Clean history, set up GitHub as community hub, CI on self-hosted runners |
| 5 | [E2E tests: PostgreSQL & RustFS](ROADMAP/05-e2e-tests-simple.md) | Not started | 1 | k3d-based test harness, two passing E2E tests, CI job |
| 6 | [E2E tests: OKD HA on KVM](ROADMAP/06-e2e-tests-kvm.md) | Not started | 5 | KVM test infrastructure, full OKD installation test, nightly CI |
## Current State (as of branch `feature/kvm-module`)
- `harmony_config` crate exists with `EnvSource`, `LocalFileSource`, `PromptSource`, `StoreSource`. 12 unit tests. **Zero consumers** in workspace — everything still uses `harmony_secret::SecretManager` directly (19 call sites).
- `harmony_assets` crate exists with `Asset`, `LocalCache`, `LocalStore`, `S3Store`. **No tests. Zero consumers.** The `k3d` crate has its own `DownloadableAsset` with identical functionality and full test coverage.
- `harmony_secret` has `LocalFileSecretStore`, `OpenbaoSecretStore` (token/userpass only), `InfisicalSecretStore`. Works but no Zitadel OIDC integration.
- KVM module exists on this branch with `KvmExecutor`, VM lifecycle, ISO download, two examples (`example_linux_vm`, `kvm_okd_ha_cluster`).
- RustFS module exists on `feat/rustfs` branch (2 commits ahead of master).
- 39 example crates, **zero E2E tests**. Unit tests pass across workspace (~240 tests).
- CI runs `cargo check`, `fmt`, `clippy`, `test` on Gitea. No E2E job.
## Guiding Principles
- **Zero-setup first**: A new user clones, runs `cargo run`, gets prompted for config, values persist to local SQLite. No env vars, no external services required.
- **Progressive disclosure**: Local SQLite → OpenBao → Zitadel SSO. Each layer is opt-in.
- **Test what ships**: Every example that works should have an E2E test proving it works.
- **Community over infrastructure**: GitHub for engagement, self-hosted runners for CI.

View File

@@ -1,623 +0,0 @@
# Phase 1: Harden `harmony_config`, Validate UX, Zero-Setup Starting Point
## Goal
Make `harmony_config` production-ready with a seamless first-run experience: clone, run, get prompted, values persist locally. Then progressively add team-scale backends (OpenBao, Zitadel SSO) without changing any calling code.
## Current State
`harmony_config` now has:
- `Config` trait + `#[derive(Config)]` macro
- `ConfigManager` with ordered source chain
- Five `ConfigSource` implementations:
- `EnvSource` — reads `HARMONY_CONFIG_{KEY}` env vars
- `LocalFileSource` — reads/writes `{key}.json` files from a directory
- `SqliteSource`**NEW** reads/writes to SQLite database
- `PromptSource` — returns `None` / no-op on set (placeholder for TUI integration)
- `StoreSource<S: SecretStore>` — wraps any `harmony_secret::SecretStore` backend
- 26 unit tests (mock source, env, local file, sqlite, prompt, integration, store graceful fallback)
- Global `CONFIG_MANAGER` static with `init()`, `get()`, `get_or_prompt()`, `set()`
- Two examples: `basic` and `prompting` in `harmony_config/examples/`
- **Zero workspace consumers** — nothing calls `harmony_config` yet
## Tasks
### 1.1 Add `SqliteSource` as the default zero-setup backend ✅
**Status**: Implemented
**Implementation Details**:
- Database location: `~/.local/share/harmony/config/config.db` (directory is auto-created)
- Schema: `config(key TEXT PRIMARY KEY, value TEXT NOT NULL, updated_at TEXT NOT NULL DEFAULT (datetime('now')))`
- Uses `sqlx` with SQLite runtime
- `SqliteSource::open(path)` - opens/creates database at given path
- `SqliteSource::default()` - uses default Harmony data directory
**Files**:
- `harmony_config/src/source/sqlite.rs` - new file
- `harmony_config/Cargo.toml` - added `sqlx = { workspace = true, features = ["runtime-tokio", "sqlite"] }`
- `Cargo.toml` - added `anyhow = "1.0"` to workspace dependencies
**Tests** (all passing):
- `test_sqlite_set_and_get` — round-trip a `TestConfig` struct
- `test_sqlite_get_returns_none_when_missing` — key not in DB
- `test_sqlite_overwrites_on_set` — set twice, get returns latest
- `test_sqlite_concurrent_access` — two tasks writing different keys simultaneously
### 1.1.1 Add Config example to show exact DX and confirm functionality ✅
**Status**: Implemented
**Examples created**:
1. `harmony_config/examples/basic.rs` - demonstrates:
- Zero-setup SQLite backend (auto-creates directory)
- Using the `#[derive(Config)]` macro
- Environment variable override (`HARMONY_CONFIG_TestConfig` overrides SQLite)
- Direct set/get operations
- Persistence verification
2. `harmony_config/examples/prompting.rs` - demonstrates:
- Config with no defaults (requires user input via `inquire`)
- `get()` flow: env > sqlite > prompt fallback
- `get_or_prompt()` for interactive configuration
- Full resolution chain
- Persistence of prompted values
### 1.2 Make `PromptSource` functional ✅
**Status**: Implemented with design improvement
**Key Finding - Bug Fixed During Implementation**:
The original design had a critical bug in `get_or_prompt()`:
```rust
// OLD (BUGGY) - breaks on first source where set() returns Ok(())
for source in &self.sources {
if source.set(T::KEY, &value).await.is_ok() {
break;
}
}
```
Since `EnvSource.set()` returns `Ok(())` (successfully sets env var), the loop would break immediately and never write to `SqliteSource`. Prompted values were never persisted!
**Solution - Added `should_persist()` method to ConfigSource trait**:
```rust
#[async_trait]
pub trait ConfigSource: Send + Sync {
async fn get(&self, key: &str) -> Result<Option<serde_json::Value>, ConfigError>;
async fn set(&self, key: &str, value: &serde_json::Value) -> Result<(), ConfigError>;
fn should_persist(&self) -> bool {
true
}
}
```
- `EnvSource::should_persist()` returns `false` - shouldn't persist prompted values to env vars
- `PromptSource::should_persist()` returns `false` - doesn't persist anyway
- `get_or_prompt()` now skips sources where `should_persist()` is `false`
**Updated `get_or_prompt()`**:
```rust
for source in &self.sources {
if !source.should_persist() {
continue;
}
if source.set(T::KEY, &value).await.is_ok() {
break;
}
}
```
**Tests**:
- `test_prompt_source_always_returns_none`
- `test_prompt_source_set_is_noop`
- `test_prompt_source_does_not_persist`
- `test_full_chain_with_prompt_source_falls_through_to_prompt`
### 1.3 Integration test: full resolution chain ✅
**Status**: Implemented
**Tests**:
- `test_full_resolution_chain_sqlite_fallback` — env not set, sqlite has value, get() returns sqlite
- `test_full_resolution_chain_env_overrides_sqlite` — env set, sqlite has value, get() returns env
- `test_branch_switching_scenario_deserialization_error` — old struct shape in sqlite returns Deserialization error
### 1.4 Validate Zitadel + OpenBao integration path ⏳
**Status**: Planning phase - detailed execution plan below
**Background**: ADR 020-1 documents the target architecture for Zitadel OIDC + OpenBao integration. This task validates the full chain by deploying Zitadel and OpenBao on a local k3d cluster and demonstrating an end-to-end example.
**Architecture Overview**:
```
┌─────────────────────────────────────────────────────────────────────┐
│ Harmony CLI / App │
│ │
│ ConfigManager: │
│ 1. EnvSource ← HARMONY_CONFIG_* env vars (highest priority) │
│ 2. SqliteSource ← ~/.local/share/harmony/config/config.db │
│ 3. StoreSource ← OpenBao (team-scale, via Zitadel OIDC) │
│ │
│ When StoreSource fails (OpenBao unreachable): │
│ → returns Ok(None), chain falls through to SqliteSource │
└─────────────────────────────────────────────────────────────────────┘
┌──────────────────┐ ┌──────────────────┐
│ Zitadel │ │ OpenBao │
│ (IdP + OIDC) │ │ (Secret Store) │
│ │ │ │
│ Device Auth │────JWT──▶│ JWT Auth │
│ Flow (RFC 8628)│ │ Method │
└──────────────────┘ └──────────────────┘
```
**Prerequisites**:
- Docker running (for k3d)
- Rust toolchain (edition 2024)
- Network access to download Helm charts
- `kubectl` (installed automatically with k3d, or pre-installed)
**Step-by-Step Execution Plan**:
#### Step 1: Create k3d cluster for local development
When you run `cargo run -p example-zitadel` (or any example using `K8sAnywhereTopology::from_env()`), Harmony automatically provisions a k3d cluster if one does not exist. By default:
- `use_local_k3d = true` (env: `HARMONY_USE_LOCAL_K3D`, default `true`)
- `autoinstall = true` (env: `HARMONY_AUTOINSTALL`, default `true`)
- Cluster name: **`harmony`** (hardcoded in `K3DInstallationScore::default()`)
- k3d binary is downloaded to `~/.local/share/harmony/k3d/`
- Kubeconfig is merged into `~/.kube/config`, context set to `k3d-harmony`
No manual `k3d cluster create` is needed. If you want to create the cluster manually first:
```bash
# Install k3d (requires sudo or install to user path)
curl -s https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash
# Create the cluster with the same name Harmony expects
k3d cluster create harmony
kubectl cluster-info --context k3d-harmony
```
**Validation**: `kubectl get nodes --context k3d-harmony` shows 1 server node (k3d default)
**Note**: The existing examples use hardcoded external hostnames (e.g., `sso.sto1.nationtech.io`) for ingress. On a local k3d cluster, these hostnames are not routable. For local development you must either:
- Use `kubectl port-forward` to access services directly
- Configure `/etc/hosts` entries pointing to `127.0.0.1`
- Use a k3d loadbalancer with `--port` mappings
#### Step 2: Deploy Zitadel
Zitadel requires the topology to implement `Topology + K8sclient + HelmCommand + PostgreSQL`. The `K8sAnywhereTopology` satisfies all four.
```bash
cargo run -p example-zitadel
```
**What happens internally** (see `harmony/src/modules/zitadel/mod.rs`):
1. Creates `zitadel` namespace via `K8sResourceScore`
2. Deploys a CNPG PostgreSQL cluster:
- Name: `zitadel-pg`
- Instances: **2** (not 1)
- Storage: 10Gi
- Namespace: `zitadel`
3. Resolves the internal DB endpoint (`host:port`) from the CNPG cluster
4. Generates a 32-byte alphanumeric masterkey, stores it as Kubernetes Secret `zitadel-masterkey` (idempotent: skips if it already exists)
5. Generates a 16-char admin password (guaranteed 1+ uppercase, lowercase, digit, symbol)
6. Deploys Zitadel Helm chart (`zitadel/zitadel` from `https://charts.zitadel.com`):
- `chart_version: None` -- **uses latest chart version** (not pinned)
- No `--wait` flag -- returns before pods are ready
- Ingress annotations are **OpenShift-oriented** (`route.openshift.io/termination: edge`, `cert-manager.io/cluster-issuer: letsencrypt-prod`). On k3d these annotations are silently ignored.
- Ingress includes TLS config with `secretName: "{host}-tls"`, which requires cert-manager. Without cert-manager, TLS termination does not happen at the ingress level.
**Key Helm values set by ZitadelScore**:
- `zitadel.configmapConfig.ExternalDomain`: the `host` field (e.g., `sso.sto1.nationtech.io`)
- `zitadel.configmapConfig.ExternalSecure: true`
- `zitadel.configmapConfig.TLS.Enabled: false` (TLS at ingress, not in Zitadel)
- Admin user: `UserName: "admin"`, Email: **`admin@zitadel.example.com`** (hardcoded, not derived from host)
- Database credentials: injected via `env[].valueFrom.secretKeyRef` from secret `zitadel-pg-superuser` (both user and admin use the same superuser -- there is a TODO to fix this)
**Expected output**:
```
===== ZITADEL DEPLOYMENT COMPLETE =====
Login URL: https://sso.sto1.nationtech.io
Username: admin@zitadel.sso.sto1.nationtech.io
Password: <generated 16-char password>
```
**Note on the success message**: The printed username `admin@zitadel.{host}` does not match the actual configured email `admin@zitadel.example.com`. The actual login username in Zitadel is `admin` (the `UserName` field). This discrepancy exists in the current code.
**Validation on k3d**:
```bash
# Wait for pods to be ready (Helm returns before readiness)
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=zitadel -n zitadel --timeout=300s
# Port-forward to access Zitadel (ingress won't work without proper DNS/TLS on k3d)
kubectl port-forward svc/zitadel -n zitadel 8080:8080
# Access at http://localhost:8080 (note: ExternalSecure=true may cause redirect issues)
```
**Known issues for k3d deployment**:
- `ExternalSecure: true` tells Zitadel to expect HTTPS, but k3d port-forward is HTTP. This may cause redirect loops. Override with: modify the example to set `ExternalSecure: false` for local dev.
- The CNPG operator must be installed on the cluster. `K8sAnywhereTopology` handles this via the `PostgreSQL` trait implementation, which deploys the operator first.
#### Step 3: Deploy OpenBao
OpenBao requires only `Topology + K8sclient + HelmCommand` (no PostgreSQL dependency).
```bash
cargo run -p example-openbao
```
**What happens internally** (see `harmony/src/modules/openbao/mod.rs`):
1. `OpenbaoScore` directly delegates to `HelmChartScore.create_interpret()` -- there is no custom `execute()` logic, no namespace creation step, no secret generation
2. Deploys OpenBao Helm chart (`openbao/openbao` from `https://openbao.github.io/openbao-helm`):
- `chart_version: None` -- **uses latest chart version** (not pinned)
- `create_namespace: true` -- the `openbao` namespace is created by Helm
- `install_only: false` -- uses `helm upgrade --install`
**Exact Helm values set by OpenbaoScore**:
```yaml
global:
openshift: true # <-- PROBLEM: hardcoded, see below
server:
standalone:
enabled: true
config: |
ui = true
listener "tcp" {
tls_disable = true
address = "[::]:8200"
cluster_address = "[::]:8201"
}
storage "file" {
path = "/openbao/data"
}
service:
enabled: true
ingress:
enabled: true
hosts:
- host: <host field> # e.g., openbao.sebastien.sto1.nationtech.io
dataStorage:
enabled: true
size: 10Gi
storageClass: null # uses cluster default
accessMode: ReadWriteOnce
auditStorage:
enabled: true
size: 10Gi
storageClass: null
accessMode: ReadWriteOnce
ui:
enabled: true
```
**Critical issue: `global.openshift: true` is hardcoded.** The OpenBao Helm chart default is `global.openshift: false`. When set to `true`, the chart adjusts security contexts and may create OpenShift Routes instead of standard Kubernetes Ingress resources. **On k3d (vanilla k8s), this will produce resources that may not work correctly.** Before deploying on k3d, this must be overridden.
**Fix required for k3d**: Either:
1. Modify `OpenbaoScore` to accept an `openshift: bool` field (preferred long-term fix)
2. Or for this example, create a custom example that passes `values_overrides` with `global.openshift=false`
**Post-deployment initialization** (manual -- the TODO in `mod.rs` acknowledges this is not automated):
OpenBao starts in a sealed state. You must initialize and unseal it manually. See https://openbao.org/docs/platform/k8s/helm/run/
```bash
# Initialize OpenBao (generates unseal keys + root token)
kubectl exec -n openbao openbao-0 -- bao operator init
# Save the output! It contains 5 unseal keys and the root token.
# Example output:
# Unseal Key 1: abc123...
# Unseal Key 2: def456...
# ...
# Initial Root Token: hvs.xxxxx
# Unseal (requires 3 of 5 keys by default)
kubectl exec -n openbao openbao-0 -- bao operator unseal <key1>
kubectl exec -n openbao openbao-0 -- bao operator unseal <key2>
kubectl exec -n openbao openbao-0 -- bao operator unseal <key3>
```
**Validation**:
```bash
kubectl exec -n openbao openbao-0 -- bao status
# Should show "Sealed: false"
```
**Note**: The ingress has **no TLS configuration** (unlike Zitadel's ingress). Access is HTTP-only unless you configure TLS separately.
#### Step 4: Configure OpenBao for Harmony
Two paths are available depending on the authentication method:
##### Path A: Userpass auth (simpler, for local dev)
The current `OpenbaoSecretStore` supports **token** and **userpass** authentication. It does NOT yet implement the JWT/OIDC device flow described in ADR 020-1.
```bash
# Port-forward to access OpenBao API
kubectl port-forward svc/openbao -n openbao 8200:8200 &
export BAO_ADDR="http://127.0.0.1:8200"
export BAO_TOKEN="<root token from init>"
# Enable KV v2 secrets engine (default mount "secret")
bao secrets enable -path=secret kv-v2
# Enable userpass auth method
bao auth enable userpass
# Create a user for Harmony
bao write auth/userpass/login/harmony password="harmony-dev-password"
# Create policy granting read/write on harmony/* paths
cat <<'EOF' | bao policy write harmony-dev -
path "secret/data/harmony/*" {
capabilities = ["create", "read", "update", "delete", "list"]
}
path "secret/metadata/harmony/*" {
capabilities = ["list", "read", "delete"]
}
EOF
# Create the user with the policy attached
bao write auth/userpass/users/harmony \
password="harmony-dev-password" \
policies="harmony-dev"
```
**Bug in `OpenbaoSecretStore::authenticate_userpass()`**: The `kv_mount` parameter (default `"secret"`) is passed to `vaultrs::auth::userpass::login()` as the auth mount path. This means it calls `POST /v1/auth/secret/login/{username}` instead of the correct `POST /v1/auth/userpass/login/{username}`. **The auth mount and KV mount are conflated into one parameter.**
**Workaround**: Set `OPENBAO_KV_MOUNT=userpass` so the auth call hits the correct mount path. But then KV operations would use mount `userpass` instead of `secret`, which is wrong.
**Proper fix needed**: Split `kv_mount` into two separate parameters: one for the KV v2 engine mount (`secret`) and one for the auth mount (`userpass`). This is a bug in `harmony_secret/src/store/openbao.rs:234`.
**For this example**: Use **token auth** instead of userpass to sidestep the bug:
```bash
# Set env vars for the example
export OPENBAO_URL="http://127.0.0.1:8200"
export OPENBAO_TOKEN="<root token from init>"
export OPENBAO_KV_MOUNT="secret"
```
##### Path B: JWT auth with Zitadel (target architecture, per ADR 020-1)
This is the production path described in the ADR. It requires the device flow code that is **not yet implemented** in `OpenbaoSecretStore`. The current code only supports token and userpass.
When implemented, the flow will be:
1. Enable JWT auth method in OpenBao
2. Configure it to trust Zitadel's OIDC discovery URL
3. Create a role that maps Zitadel JWT claims to OpenBao policies
```bash
# Enable JWT auth
bao auth enable jwt
# Configure JWT auth to trust Zitadel
bao write auth/jwt/config \
oidc_discovery_url="https://<zitadel-host>" \
bound_issuer="https://<zitadel-host>"
# Create role for Harmony developers
bao write auth/jwt/role/harmony-developer \
role_type="jwt" \
bound_audiences="<harmony_client_id>" \
user_claim="email" \
groups_claim="urn:zitadel:iam:org:project:roles" \
policies="harmony-dev" \
ttl="4h" \
max_ttl="24h" \
token_type="service"
```
**Zitadel application setup** (in Zitadel console):
1. Create project: `Harmony`
2. Add application: `Harmony CLI` (Native app type)
3. Enable Device Authorization grant type
4. Set scopes: `openid email profile offline_access`
5. Note the `client_id`
This path is deferred until the device flow is implemented in `OpenbaoSecretStore`.
#### Step 5: Write end-to-end example
The example uses `StoreSource<OpenbaoSecretStore>` with token auth to avoid the userpass mount bug.
**Environment variables required** (from `harmony_secret/src/config.rs`):
| Variable | Required | Default | Notes |
|---|---|---|---|
| `OPENBAO_URL` | Yes | None | Falls back to `VAULT_ADDR` |
| `OPENBAO_TOKEN` | For token auth | None | Root or user token |
| `OPENBAO_USERNAME` | For userpass | None | Requires `OPENBAO_PASSWORD` too |
| `OPENBAO_PASSWORD` | For userpass | None | |
| `OPENBAO_KV_MOUNT` | No | `"secret"` | KV v2 engine mount path. **Also used as userpass auth mount -- this is a bug.** |
| `OPENBAO_SKIP_TLS` | No | `false` | Set `"true"` to disable TLS verification |
**Note**: `OpenbaoSecretStore::new()` is `async` and **requires a running OpenBao** at construction time (it validates the token if using cached auth). If OpenBao is unreachable during construction, the call will fail. The graceful fallback only applies to `StoreSource::get()` calls after construction -- the `ConfigManager` must be built with a live store, or the store must be wrapped in a lazy initialization pattern.
```rust
// harmony_config/examples/openbao_chain.rs
use harmony_config::{ConfigManager, EnvSource, SqliteSource, StoreSource};
use harmony_secret::OpenbaoSecretStore;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema, PartialEq)]
struct AppConfig {
host: String,
port: u16,
}
impl harmony_config::Config for AppConfig {
const KEY: &'static str = "AppConfig";
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
env_logger::init();
// Build the source chain
let env_source: Arc<dyn harmony_config::ConfigSource> = Arc::new(EnvSource);
let sqlite = Arc::new(
SqliteSource::default()
.await
.expect("Failed to open SQLite"),
);
// OpenBao store -- requires OPENBAO_URL and OPENBAO_TOKEN env vars
// Falls back gracefully if OpenBao is unreachable at query time
let openbao_url = std::env::var("OPENBAO_URL")
.or(std::env::var("VAULT_ADDR"))
.ok();
let sources: Vec<Arc<dyn harmony_config::ConfigSource>> = if let Some(url) = openbao_url {
let kv_mount = std::env::var("OPENBAO_KV_MOUNT")
.unwrap_or_else(|_| "secret".to_string());
let skip_tls = std::env::var("OPENBAO_SKIP_TLS")
.map(|v| v == "true")
.unwrap_or(false);
match OpenbaoSecretStore::new(
url,
kv_mount,
skip_tls,
std::env::var("OPENBAO_TOKEN").ok(),
std::env::var("OPENBAO_USERNAME").ok(),
std::env::var("OPENBAO_PASSWORD").ok(),
)
.await
{
Ok(store) => {
let store_source = Arc::new(StoreSource::new("harmony".to_string(), store));
vec![env_source, Arc::clone(&sqlite) as _, store_source]
}
Err(e) => {
eprintln!("Warning: OpenBao unavailable ({e}), using local sources only");
vec![env_source, sqlite]
}
}
} else {
println!("No OPENBAO_URL set, using local sources only");
vec![env_source, sqlite]
};
let manager = ConfigManager::new(sources);
// Scenario 1: get() with nothing stored -- returns NotFound
let result = manager.get::<AppConfig>().await;
println!("Get (empty): {:?}", result);
// Scenario 2: set() then get()
let config = AppConfig {
host: "production.example.com".to_string(),
port: 443,
};
manager.set(&config).await?;
println!("Set: {:?}", config);
let retrieved = manager.get::<AppConfig>().await?;
println!("Get (after set): {:?}", retrieved);
assert_eq!(config, retrieved);
println!("End-to-end chain validated!");
Ok(())
}
```
**Key behaviors demonstrated**:
1. **Graceful construction fallback**: If `OPENBAO_URL` is not set or OpenBao is unreachable at startup, the chain is built without it
2. **Graceful query fallback**: `StoreSource::get()` returns `Ok(None)` on any error, so the chain continues to SQLite
3. **Environment override**: `HARMONY_CONFIG_AppConfig='{"host":"env-host","port":9090}'` bypasses all backends
#### Step 6: Validate graceful fallback
Already validated via unit tests (26 tests pass):
- `test_store_source_error_falls_through_to_sqlite` -- `StoreSource` with `AlwaysErrorStore` returns connection error, chain falls through to `SqliteSource`
- `test_store_source_not_found_falls_through_to_sqlite` -- `StoreSource` returns `NotFound`, chain falls through to `SqliteSource`
**Code path (FIXED in `harmony_config/src/source/store.rs`)**:
```rust
// StoreSource::get() -- returns Ok(None) on ANY error, allowing chain to continue
match self.store.get_raw(&self.namespace, key).await {
Ok(bytes) => { /* deserialize and return */ Ok(Some(value)) }
Err(SecretStoreError::NotFound { .. }) => Ok(None),
Err(_) => Ok(None), // Connection errors, timeouts, etc.
}
```
#### Step 7: Known issues and blockers
| Issue | Location | Severity | Status |
|---|---|---|---|
| `global.openshift: true` hardcoded | `harmony/src/modules/openbao/mod.rs:32` | **Blocker for k3d** | ✅ Fixed: Added `openshift: bool` field to `OpenbaoScore` (defaults to `false`) |
| `kv_mount` used as auth mount path | `harmony_secret/src/store/openbao.rs:234` | **Bug** | ✅ Fixed: Added separate `auth_mount` parameter; added `OPENBAO_AUTH_MOUNT` env var |
| Admin email hardcoded `admin@zitadel.example.com` | `harmony/src/modules/zitadel/mod.rs:314` | Minor | Cosmetic mismatch with success message |
| `ExternalSecure: true` hardcoded | `harmony/src/modules/zitadel/mod.rs:306` | **Issue for k3d** | ✅ Fixed: Zitadel now detects Kubernetes distribution and uses appropriate settings (OpenShift = TLS + cert-manager annotations, k3d = plain nginx ingress without TLS) |
| No Helm chart version pinning | Both modules | Risk | Non-deterministic deploys |
| No `--wait` on Helm install | `harmony/src/modules/helm/chart.rs` | UX | Must manually wait for readiness |
| `get_version()`/`get_status()` are `todo!()` | Both modules | Panic risk | Do not call these methods |
| JWT/OIDC device flow not implemented | `harmony_secret/src/store/openbao.rs` | **Gap** | ✅ Implemented: `ZitadelOidcAuth` in `harmony_secret/src/store/zitadel.rs` |
| `HARMONY_SECRET_NAMESPACE` panics if not set | `harmony_secret/src/config.rs:5` | Runtime panic | Only affects `SecretManager`, not `StoreSource` directly |
**Remaining work**:
- [x] `StoreSource<OpenbaoSecretStore>` integration validates compilation
- [x] StoreSource returns `Ok(None)` on connection error (not `Err`)
- [x] Graceful fallback tests pass when OpenBao is unreachable (2 new tests)
- [x] Fix `global.openshift: true` in `OpenbaoScore` for k3d compatibility
- [x] Fix `kv_mount` / auth mount conflation bug in `OpenbaoSecretStore`
- [x] Create and test `harmony_config/examples/openbao_chain.rs` against real k3d deployment
- [x] Implement JWT/OIDC device flow in `OpenbaoSecretStore` (ADR 020-1) — `ZitadelOidcAuth` implemented and wired into `OpenbaoSecretStore::new()` auth chain
- [x] Fix Zitadel distribution detection — Zitadel now uses `k8s_client.get_k8s_distribution()` to detect OpenShift vs k3d and applies appropriate Helm values (TLS + cert-manager for OpenShift, plain nginx for k3d)
### 1.5 UX validation checklist ⏳
**Status**: Partially complete - manual verification needed
- [ ] `cargo run --example postgresql` with no env vars → prompts for nothing
- [ ] An example that uses `SecretManager` today (e.g., `brocade_snmp_server`) → when migrated to `harmony_config`, first run prompts, second run reads from SQLite
- [ ] Setting `HARMONY_CONFIG_BrocadeSwitchAuth='{"host":"...","user":"...","password":"..."}'` → skips prompt, uses env value
- [ ] Deleting `~/.local/share/harmony/config/` directory → re-prompts on next run
## Deliverables
- [x] `SqliteSource` implementation with tests
- [x] Functional `PromptSource` with `should_persist()` design
- [x] Fix `get_or_prompt` to persist to first writable source (via `should_persist()`), not all sources
- [x] Integration tests for full resolution chain
- [x] Branch-switching deserialization failure test
- [x] `StoreSource<OpenbaoSecretStore>` integration validated (compiles, graceful fallback)
- [x] ADR for Zitadel OIDC target architecture
- [ ] Update docs to reflect final implementation and behavior
## Key Implementation Notes
1. **SQLite path**: `~/.local/share/harmony/config/config.db` (not `~/.local/share/harmony/config.db`)
2. **Auto-create directory**: `SqliteSource::open()` creates parent directories if they don't exist
3. **Default path**: `SqliteSource::default()` uses `directories::ProjectDirs` to find the correct data directory
4. **Env var precedence**: Environment variables always take precedence over SQLite in the resolution chain
5. **Testing**: All tests use `tempfile::NamedTempFile` for temporary database paths, ensuring test isolation
6. **Graceful fallback**: `StoreSource::get()` returns `Ok(None)` on any error (connection refused, timeout, etc.), allowing the chain to fall through to the next source. This ensures OpenBao unavailability doesn't break the config chain.
7. **StoreSource errors don't block chain**: When OpenBao is unreachable, `StoreSource::get()` returns `Ok(None)` and the `ConfigManager` continues to the next source (typically `SqliteSource`). This is validated by `test_store_source_error_falls_through_to_sqlite` and `test_store_source_not_found_falls_through_to_sqlite`.

View File

@@ -1,112 +0,0 @@
# Phase 2: Migrate Workspace to `harmony_config`
## Goal
Replace every direct `harmony_secret::SecretManager` call with `harmony_config` equivalents. After this phase, modules and examples depend only on `harmony_config`. `harmony_secret` becomes an internal implementation detail behind `StoreSource`.
## Current State
19 call sites use `SecretManager::get_or_prompt::<T>()` across:
| Location | Secret Types | Call Sites |
|----------|-------------|------------|
| `harmony/src/modules/brocade/brocade_snmp.rs` | `BrocadeSnmpAuth`, `BrocadeSwitchAuth` | 2 |
| `harmony/src/modules/nats/score_nats_k8s.rs` | `NatsAdmin` | 1 |
| `harmony/src/modules/okd/bootstrap_02_bootstrap.rs` | `RedhatSecret`, `SshKeyPair` | 2 |
| `harmony/src/modules/application/features/monitoring.rs` | `NtfyAuth` | 1 |
| `brocade/examples/main.rs` | `BrocadeSwitchAuth` | 1 |
| `examples/okd_installation/src/main.rs` + `topology.rs` | `SshKeyPair`, `BrocadeSwitchAuth`, `OPNSenseFirewallConfig` | 3 |
| `examples/okd_pxe/src/main.rs` + `topology.rs` | `SshKeyPair`, `BrocadeSwitchAuth`, `OPNSenseFirewallCredentials` | 3 |
| `examples/opnsense/src/main.rs` | `OPNSenseFirewallCredentials` | 1 |
| `examples/sttest/src/main.rs` + `topology.rs` | `SshKeyPair`, `OPNSenseFirewallConfig` | 2 |
| `examples/opnsense_node_exporter/` | (has dep but unclear usage) | ~1 |
| `examples/okd_cluster_alerts/` | (has dep but unclear usage) | ~1 |
| `examples/brocade_snmp_server/` | (has dep but unclear usage) | ~1 |
## Tasks
### 2.1 Bootstrap `harmony_config` in CLI and TUI entry points
Add `harmony_config::init()` as the first thing that happens in `harmony_cli::run()` and `harmony_tui::run()`.
```rust
// harmony_cli/src/lib.rs — inside run()
pub async fn run<T: Topology + Send + Sync + 'static>(
inventory: Inventory,
topology: T,
scores: Vec<Box<dyn Score<T>>>,
args_struct: Option<Args>,
) -> Result<(), Box<dyn std::error::Error>> {
// Initialize config system with default source chain
let sqlite = Arc::new(SqliteSource::default().await?);
let env = Arc::new(EnvSource);
harmony_config::init(vec![env, sqlite]).await;
// ... rest of run()
}
```
This replaces the implicit `SecretManager` lazy initialization that currently happens on first `get_or_prompt` call.
### 2.2 Migrate each secret type from `Secret` to `Config`
For each secret struct, change:
```rust
// Before
use harmony_secret::Secret;
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, InteractiveParse, Secret)]
struct BrocadeSwitchAuth { ... }
// After
use harmony_config::Config;
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, InteractiveParse, Config)]
struct BrocadeSwitchAuth { ... }
```
At each call site, change:
```rust
// Before
let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>().await.unwrap();
// After
let config = harmony_config::get_or_prompt::<BrocadeSwitchAuth>().await.unwrap();
```
### 2.3 Migration order (low risk to high risk)
1. **`brocade/examples/main.rs`** — 1 call site, isolated example, easy to test manually
2. **`examples/opnsense/src/main.rs`** — 1 call site, isolated
3. **`harmony/src/modules/brocade/brocade_snmp.rs`** — 2 call sites, core module but straightforward
4. **`harmony/src/modules/nats/score_nats_k8s.rs`** — 1 call site
5. **`harmony/src/modules/application/features/monitoring.rs`** — 1 call site
6. **`examples/sttest/`** — 2 call sites, has both main.rs and topology.rs patterns
7. **`examples/okd_installation/`** — 3 call sites, complex topology setup
8. **`examples/okd_pxe/`** — 3 call sites, similar to okd_installation
9. **`harmony/src/modules/okd/bootstrap_02_bootstrap.rs`** — 2 call sites, critical OKD bootstrap path
### 2.4 Remove `harmony_secret` from direct dependencies
After all call sites are migrated:
1. Remove `harmony_secret` from `Cargo.toml` of: `harmony`, `brocade`, and all examples that had it
2. `harmony_config` keeps `harmony_secret` as a dependency (for `StoreSource`)
3. The `Secret` trait and `SecretManager` remain in `harmony_secret` but are not used directly anymore
### 2.5 Backward compatibility for existing local secrets
Users who already have secrets stored via `LocalFileSecretStore` (JSON files in `~/.local/share/harmony/secrets/`) need a migration path:
- On first run after upgrade, if SQLite has no entry for a key but the old JSON file exists, read from JSON and write to SQLite
- Or: add `LocalFileSource` as a fallback source at the end of the chain (read-only) for one release cycle
- Log a deprecation warning when reading from old JSON files
## Deliverables
- [ ] `harmony_config::init()` called in `harmony_cli::run()` and `harmony_tui::run()`
- [ ] All 19 call sites migrated from `SecretManager` to `harmony_config`
- [ ] `harmony_secret` removed from direct dependencies of `harmony`, `brocade`, and all examples
- [ ] Backward compatibility for existing local JSON secrets
- [ ] All existing unit tests still pass
- [ ] Manual verification: one migrated example works end-to-end (prompt → persist → read)

View File

@@ -1,141 +0,0 @@
# Phase 3: Complete `harmony_assets`, Refactor Consumers
## Goal
Make `harmony_assets` the single way to manage downloadable binaries and images across Harmony. Eliminate `k3d::DownloadableAsset` duplication, implement `Url::Url` in OPNsense infra, remove LFS-tracked files from git.
## Current State
- `harmony_assets` exists with `Asset`, `LocalCache`, `LocalStore`, `S3Store` (behind feature flag). CLI with `upload`, `download`, `checksum`, `verify` commands. **No tests. Zero consumers.**
- `k3d/src/downloadable_asset.rs` has the same functionality with full test coverage (httptest mock server, checksum verification, cache hit, 404 handling, checksum failure).
- `Url::Url` variant in `harmony_types/src/net.rs` exists but is `todo!()` in OPNsense TFTP and HTTP infra layers.
- OKD modules hardcode `./data/...` paths (`bootstrap_02_bootstrap.rs:84-88`, `ipxe.rs:73`).
- `data/` directory contains ~3GB of LFS-tracked files (OKD binaries, PXE images, SCOS images).
## Tasks
### 3.1 Port k3d tests to `harmony_assets`
The k3d crate has 5 well-written tests in `downloadable_asset.rs`. Port them to test `harmony_assets::LocalStore`:
```rust
// harmony_assets/tests/local_store.rs (or in src/ as unit tests)
#[tokio::test]
async fn test_fetch_downloads_and_verifies_checksum() {
// Start httptest server serving a known file
// Create Asset with URL pointing to mock server
// Fetch via LocalStore
// Assert file exists at expected cache path
// Assert checksum matches
}
#[tokio::test]
async fn test_fetch_returns_cached_file_when_present() {
// Pre-populate cache with correct file
// Fetch — assert no HTTP request made (mock server not hit)
}
#[tokio::test]
async fn test_fetch_fails_on_404() { ... }
#[tokio::test]
async fn test_fetch_fails_on_checksum_mismatch() { ... }
#[tokio::test]
async fn test_fetch_with_progress_callback() {
// Assert progress callback is called with (bytes_received, total_size)
}
```
Add `httptest` to `[dev-dependencies]` of `harmony_assets`.
### 3.2 Refactor `k3d` to use `harmony_assets`
Replace `k3d/src/downloadable_asset.rs` with calls to `harmony_assets`:
```rust
// k3d/src/lib.rs — in download_latest_release()
use harmony_assets::{Asset, LocalCache, LocalStore, ChecksumAlgo};
let asset = Asset::new(
binary_url,
checksum,
ChecksumAlgo::SHA256,
K3D_BIN_FILE_NAME.to_string(),
);
let cache = LocalCache::new(self.base_dir.clone());
let store = LocalStore::new();
let path = store.fetch(&asset, &cache, None).await
.map_err(|e| format!("Failed to download k3d: {}", e))?;
```
Delete `k3d/src/downloadable_asset.rs`. Update k3d's `Cargo.toml` to depend on `harmony_assets`.
### 3.3 Define asset metadata as config structs
Following `plan.md` Phase 2, create typed config for OKD assets using `harmony_config`:
```rust
// harmony/src/modules/okd/config.rs
#[derive(Config, Serialize, Deserialize, JsonSchema, InteractiveParse)]
struct OkdInstallerConfig {
pub openshift_install_url: String,
pub openshift_install_sha256: String,
pub scos_kernel_url: String,
pub scos_kernel_sha256: String,
pub scos_initramfs_url: String,
pub scos_initramfs_sha256: String,
pub scos_rootfs_url: String,
pub scos_rootfs_sha256: String,
}
```
First run prompts for URLs/checksums (or uses compiled-in defaults). Values persist to SQLite. Can be overridden via env vars or OpenBao.
### 3.4 Implement `Url::Url` in OPNsense infra layer
In `harmony/src/infra/opnsense/http.rs` and `tftp.rs`, implement the `Url::Url(url)` match arm:
```rust
// Instead of SCP-ing files to OPNsense:
// SSH into OPNsense, run: fetch -o /usr/local/http/{path} {url}
// (FreeBSD-native HTTP client, no extra deps on OPNsense)
```
This eliminates the manual `scp` workaround and the `inquire::Confirm` prompts in `ipxe.rs:126` and `bootstrap_02_bootstrap.rs:230`.
### 3.5 Refactor OKD modules to use assets + config
In `bootstrap_02_bootstrap.rs`:
- `openshift-install`: Resolve `OkdInstallerConfig` from `harmony_config`, download via `harmony_assets`, invoke from cache.
- SCOS images: Pass `Url::Url(scos_kernel_url)` etc. to `StaticFilesHttpScore`. OPNsense fetches from S3 directly.
- Remove `oc` and `kubectl` from `data/okd/bin/` (never used by code).
In `ipxe.rs`:
- Replace the folder-to-serve SCP workaround with individual `Url::Url` entries.
- Remove the `inquire::Confirm` SCP prompts.
### 3.6 Upload assets to S3
- Upload all current `data/` binaries to Ceph S3 bucket with path scheme: `harmony-assets/okd/v{version}/openshift-install`, `harmony-assets/pxe/centos-stream-9/install.img`, etc.
- Set public-read ACL or configure presigned URL generation.
- Record S3 URLs and SHA256 checksums as defaults in the config structs.
### 3.7 Remove LFS, clean git
- Remove all LFS-tracked files from the repo.
- Update `.gitattributes` to remove LFS filters.
- Keep `data/` in `.gitignore` (it becomes a local cache directory).
- Optionally use `git filter-repo` or BFG to strip LFS objects from history (required before Phase 4 GitHub publish).
## Deliverables
- [ ] `harmony_assets` has tests ported from k3d pattern (5+ tests with httptest)
- [ ] `k3d::DownloadableAsset` replaced by `harmony_assets` usage
- [ ] `OkdInstallerConfig` struct using `harmony_config`
- [ ] `Url::Url` implemented in OPNsense HTTP and TFTP infra
- [ ] OKD bootstrap refactored to use lazy-download pattern
- [ ] Assets uploaded to S3 with documented URLs/checksums
- [ ] LFS removed, git history cleaned
- [ ] Repo size small enough for GitHub (~code + templates only)

View File

@@ -1,110 +0,0 @@
# Phase 4: Publish to GitHub
## Goal
Make Harmony publicly available on GitHub as the primary community hub for issues, pull requests, and discussions. CI runs on self-hosted runners.
## Prerequisites
- Phase 3 complete: LFS removed, git history cleaned, repo is small
- README polished with quick-start, architecture overview, examples
- All existing tests pass
## Tasks
### 4.1 Clean git history
```bash
# Option A: git filter-repo (preferred)
git filter-repo --strip-blobs-bigger-than 10M
# Option B: BFG Repo Cleaner
bfg --strip-blobs-bigger-than 10M
git reflog expire --expire=now --all
git gc --prune=now --aggressive
```
Verify final repo size is reasonable (target: <50MB including all code, docs, templates).
### 4.2 Create GitHub repository
- Create `NationTech/harmony` (or chosen org/name) on GitHub
- Push cleaned repo as initial commit
- Set default branch to `main` (rename from `master` if desired)
### 4.3 Set up CI on self-hosted runners
GitHub is the community hub, but CI runs on your own infrastructure. Options:
**Option A: GitHub Actions with self-hosted runners**
- Register your Gitea runner machines as GitHub Actions self-hosted runners
- Port `.gitea/workflows/check.yml` to `.github/workflows/check.yml`
- Same Docker image (`hub.nationtech.io/harmony/harmony_composer:latest`), same commands
- Pro: native GitHub PR checks, no external service needed
- Con: runners need outbound access to GitHub API
**Option B: External CI (Woodpecker, Drone, Jenkins)**
- Use any CI that supports webhooks from GitHub
- Report status back to GitHub via commit status API / checks API
- Pro: fully self-hosted, no GitHub dependency for builds
- Con: extra integration work
**Option C: Keep Gitea CI, mirror from GitHub**
- GitHub repo has a webhook that triggers Gitea CI on push
- Gitea reports back to GitHub via commit status API
- Pro: no migration of CI config
- Con: fragile webhook chain
**Recommendation**: Option A. GitHub Actions self-hosted runners are straightforward and give the best contributor UX (native PR checks). The workflow files are nearly identical to Gitea workflows.
```yaml
# .github/workflows/check.yml
name: Check
on: [push, pull_request]
jobs:
check:
runs-on: self-hosted
container:
image: hub.nationtech.io/harmony/harmony_composer:latest
steps:
- uses: actions/checkout@v4
- run: bash build/check.sh
```
### 4.4 Polish documentation
- **README.md**: Quick-start (clone run get prompted see result), architecture diagram (Score Interpret Topology), link to docs and examples
- **CONTRIBUTING.md**: Already exists. Review for GitHub-specific guidance (fork workflow, PR template)
- **docs/**: Already comprehensive. Verify links work on GitHub rendering
- **Examples**: Ensure each example has a one-line description in its `Cargo.toml` and a comment block in `main.rs`
### 4.5 License and legal
- Verify workspace `license` field in root `Cargo.toml` is set correctly
- Add `LICENSE` file at repo root if not present
- Scan for any proprietary dependencies or hardcoded internal URLs
### 4.6 GitHub repository configuration
- Branch protection on `main`: require PR review, require CI to pass
- Issue templates: bug report, feature request
- PR template: checklist (tests pass, docs updated, etc.)
- Topics/tags: `rust`, `infrastructure-as-code`, `kubernetes`, `orchestration`, `bare-metal`
- Repository description: "Infrastructure orchestration framework. Declare what you want (Score), describe your infrastructure (Topology), let Harmony figure out how."
### 4.7 Gitea as internal mirror
- Set up Gitea to mirror from GitHub (pull mirror)
- Internal CI can continue running on Gitea for private/experimental branches
- Public contributions flow through GitHub
## Deliverables
- [ ] Git history cleaned, repo size <50MB
- [ ] Public GitHub repository created
- [ ] CI running on self-hosted runners with GitHub Actions
- [ ] Branch protection enabled
- [ ] README polished with quick-start guide
- [ ] Issue and PR templates created
- [ ] LICENSE file present
- [ ] Gitea configured as mirror

View File

@@ -1,255 +0,0 @@
# Phase 5: E2E Tests for PostgreSQL & RustFS
## Goal
Establish an automated E2E test pipeline that proves working examples actually work. Start with the two simplest k8s-based examples: PostgreSQL and RustFS.
## Prerequisites
- Phase 1 complete (config crate works, bootstrap is clean)
- `feat/rustfs` branch merged
## Architecture
### Test harness: `tests/e2e/`
A dedicated workspace member crate at `tests/e2e/` that contains:
1. **Shared k3d utilities** — create/destroy clusters, wait for readiness
2. **Per-example test modules** — each example gets a `#[tokio::test]` function
3. **Assertion helpers** — wait for pods, check CRDs exist, verify services
```
tests/
e2e/
Cargo.toml
src/
lib.rs # Shared test utilities
k3d.rs # k3d cluster lifecycle
k8s_assert.rs # K8s assertion helpers
tests/
postgresql.rs # PostgreSQL E2E test
rustfs.rs # RustFS E2E test
```
### k3d cluster lifecycle
```rust
// tests/e2e/src/k3d.rs
use k3d_rs::K3d;
pub struct TestCluster {
pub name: String,
pub k3d: K3d,
pub client: kube::Client,
reuse: bool,
}
impl TestCluster {
/// Creates a k3d cluster for testing.
/// If HARMONY_E2E_REUSE_CLUSTER=1, reuses existing cluster.
pub async fn ensure(name: &str) -> Result<Self, String> {
let reuse = std::env::var("HARMONY_E2E_REUSE_CLUSTER")
.map(|v| v == "1")
.unwrap_or(false);
let base_dir = PathBuf::from("/tmp/harmony-e2e");
let k3d = K3d::new(base_dir, Some(name.to_string()));
let client = k3d.ensure_installed().await?;
Ok(Self { name: name.to_string(), k3d, client, reuse })
}
/// Returns the kubeconfig path for this cluster.
pub fn kubeconfig_path(&self) -> String { ... }
}
impl Drop for TestCluster {
fn drop(&mut self) {
if !self.reuse {
// Best-effort cleanup
let _ = self.k3d.run_k3d_command(["cluster", "delete", &self.name]);
}
}
}
```
### K8s assertion helpers
```rust
// tests/e2e/src/k8s_assert.rs
/// Wait until a pod matching the label selector is Running in the namespace.
/// Times out after `timeout` duration.
pub async fn wait_for_pod_running(
client: &kube::Client,
namespace: &str,
label_selector: &str,
timeout: Duration,
) -> Result<(), String>
/// Assert a CRD instance exists.
pub async fn assert_resource_exists<K: kube::Resource>(
client: &kube::Client,
name: &str,
namespace: Option<&str>,
) -> Result<(), String>
/// Install a Helm chart. Returns when all pods in the release are running.
pub async fn helm_install(
release_name: &str,
chart: &str,
namespace: &str,
repo_url: Option<&str>,
timeout: Duration,
) -> Result<(), String>
```
## Tasks
### 5.1 Create the `tests/e2e/` crate
Add to workspace `Cargo.toml`:
```toml
[workspace]
members = [
# ... existing members
"tests/e2e",
]
```
`tests/e2e/Cargo.toml`:
```toml
[package]
name = "harmony-e2e-tests"
edition = "2024"
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
k3d_rs = { path = "../../k3d", package = "k3d_rs" }
kube = { workspace = true }
k8s-openapi = { workspace = true }
tokio = { workspace = true }
log = { workspace = true }
env_logger = { workspace = true }
[dev-dependencies]
pretty_assertions = { workspace = true }
```
### 5.2 PostgreSQL E2E test
```rust
// tests/e2e/tests/postgresql.rs
use harmony::modules::postgresql::{PostgreSQLScore, capability::PostgreSQLConfig};
use harmony::topology::K8sAnywhereTopology;
use harmony::inventory::Inventory;
use harmony::maestro::Maestro;
#[tokio::test]
async fn test_postgresql_deploys_on_k3d() {
let cluster = TestCluster::ensure("harmony-e2e-pg").await.unwrap();
// Install CNPG operator via Helm
// (K8sAnywhereTopology::ensure_ready() now handles this since
// commit e1183ef "K8s postgresql score now ensures cnpg is installed")
// But we may need the Helm chart for non-OKD:
helm_install(
"cnpg",
"cloudnative-pg",
"cnpg-system",
Some("https://cloudnative-pg.github.io/charts"),
Duration::from_secs(120),
).await.unwrap();
// Configure topology pointing to test cluster
let config = K8sAnywhereConfig {
kubeconfig: Some(cluster.kubeconfig_path()),
use_local_k3d: false,
autoinstall: false,
use_system_kubeconfig: false,
harmony_profile: "dev".to_string(),
k8s_context: None,
};
let topology = K8sAnywhereTopology::with_config(config);
// Create and run the score
let score = PostgreSQLScore {
config: PostgreSQLConfig {
cluster_name: "e2e-test-pg".to_string(),
namespace: "e2e-pg-test".to_string(),
..Default::default()
},
};
let mut maestro = Maestro::initialize(Inventory::autoload(), topology).await.unwrap();
maestro.register_all(vec![Box::new(score)]);
let scores = maestro.scores().read().unwrap().first().unwrap().clone_box();
let result = maestro.interpret(scores).await;
assert!(result.is_ok(), "PostgreSQL score failed: {:?}", result.err());
// Assert: CNPG Cluster resource exists
// (the Cluster CRD is applied — pod readiness may take longer)
let client = cluster.client.clone();
// ... assert Cluster CRD exists in e2e-pg-test namespace
}
```
### 5.3 RustFS E2E test
Similar structure. Details depend on what the RustFS score deploys (likely a Helm chart or k8s resources for MinIO/RustFS).
```rust
#[tokio::test]
async fn test_rustfs_deploys_on_k3d() {
let cluster = TestCluster::ensure("harmony-e2e-rustfs").await.unwrap();
// ... similar pattern: configure topology, create score, interpret, assert
}
```
### 5.4 CI job for E2E tests
New workflow file (Gitea or GitHub Actions):
```yaml
# .gitea/workflows/e2e.yml (or .github/workflows/e2e.yml)
name: E2E Tests
on:
push:
branches: [master, main]
# Don't run on every PR — too slow. Run on label or manual trigger.
workflow_dispatch:
jobs:
e2e:
runs-on: self-hosted # Must have Docker available for k3d
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Install k3d
run: curl -s https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash
- name: Run E2E tests
run: cargo test -p harmony-e2e-tests -- --test-threads=1
env:
RUST_LOG: info
```
Note `--test-threads=1`: E2E tests create k3d clusters and should not run in parallel (port conflicts, resource contention).
## Deliverables
- [ ] `tests/e2e/` crate added to workspace
- [ ] Shared test utilities: `TestCluster`, `wait_for_pod_running`, `helm_install`
- [ ] PostgreSQL E2E test passing
- [ ] RustFS E2E test passing (after `feat/rustfs` merge)
- [ ] CI job running E2E tests on push to main
- [ ] `HARMONY_E2E_REUSE_CLUSTER=1` for fast local iteration

View File

@@ -1,214 +0,0 @@
# Phase 6: E2E Tests for OKD HA Cluster on KVM
## Goal
Prove the full OKD bare-metal installation flow works end-to-end using KVM virtual machines. This is the ultimate validation of Harmony's core value proposition: declare an OKD cluster, point it at infrastructure, watch it materialize.
## Prerequisites
- Phase 5 complete (test harness exists, k3d tests passing)
- `feature/kvm-module` merged to main
- A CI runner with libvirt/KVM access and nested virtualization support
## Architecture
The KVM branch already has a `kvm_okd_ha_cluster` example that creates:
```
Host bridge (WAN)
|
+--------------------+
| OPNsense | 192.168.100.1
| gateway + PXE |
+--------+-----------+
|
harmonylan (192.168.100.0/24)
+---------+---------+---------+---------+
| | | | |
+----+---+ +---+---+ +---+---+ +---+---+ +--+----+
| cp0 | | cp1 | | cp2 | |worker0| |worker1|
| .10 | | .11 | | .12 | | .20 | | .21 |
+--------+ +-------+ +-------+ +-------+ +---+---+
|
+-----+----+
| worker2 |
| .22 |
+----------+
```
The test needs to orchestrate this entire setup, wait for OKD to converge, and assert the cluster is healthy.
## Tasks
### 6.1 Start with `example_linux_vm` — the simplest KVM test
Before tackling the full OKD stack, validate the KVM module itself with the simplest possible test:
```rust
// tests/e2e/tests/kvm_linux_vm.rs
#[tokio::test]
#[ignore] // Requires libvirt access — run with: cargo test -- --ignored
async fn test_linux_vm_boots_from_iso() {
let executor = KvmExecutor::from_env().unwrap();
// Create isolated network
let network = NetworkConfig {
name: "e2e-test-net".to_string(),
bridge: "virbr200".to_string(),
// ...
};
executor.ensure_network(&network).await.unwrap();
// Define and start VM
let vm_config = VmConfig::builder("e2e-linux-test")
.vcpus(1)
.memory_gb(1)
.disk(5)
.network(NetworkRef::named("e2e-test-net"))
.cdrom("https://releases.ubuntu.com/24.04/ubuntu-24.04-live-server-amd64.iso")
.boot_order([BootDevice::Cdrom, BootDevice::Disk])
.build();
executor.ensure_vm(&vm_config).await.unwrap();
executor.start_vm("e2e-linux-test").await.unwrap();
// Assert VM is running
let status = executor.vm_status("e2e-linux-test").await.unwrap();
assert_eq!(status, VmStatus::Running);
// Cleanup
executor.destroy_vm("e2e-linux-test").await.unwrap();
executor.undefine_vm("e2e-linux-test").await.unwrap();
executor.delete_network("e2e-test-net").await.unwrap();
}
```
This test validates:
- ISO download works (via `harmony_assets` if refactored, or built-in KVM module download)
- libvirt XML generation is correct
- VM lifecycle (define → start → status → destroy → undefine)
- Network creation/deletion
### 6.2 OKD HA Cluster E2E test
The full integration test. This is long-running (30-60 minutes) and should only run nightly or on-demand.
```rust
// tests/e2e/tests/kvm_okd_ha.rs
#[tokio::test]
#[ignore] // Requires KVM + significant resources. Run nightly.
async fn test_okd_ha_cluster_on_kvm() {
// 1. Create virtual infrastructure
// - OPNsense gateway VM
// - 3 control plane VMs
// - 3 worker VMs
// - Virtual network (harmonylan)
// 2. Run OKD installation scores
// (the kvm_okd_ha_cluster example, but as a test)
// 3. Wait for OKD API server to become reachable
// - Poll https://api.okd.harmonylan:6443 until it responds
// - Timeout: 30 minutes
// 4. Assert cluster health
// - All nodes in Ready state
// - ClusterVersion reports Available=True
// - Sample workload (nginx) deploys and pod reaches Running
// 5. Cleanup
// - Destroy all VMs
// - Delete virtual networks
// - Clean up disk images
}
```
### 6.3 CI runner requirements
The KVM E2E test needs a runner with:
- **Hardware**: 32GB+ RAM, 8+ CPU cores, 100GB+ disk
- **Software**: libvirt, QEMU/KVM, `virsh`, nested virtualization enabled
- **Network**: Outbound internet access (to download ISOs, OKD images)
- **Permissions**: User in `libvirt` group, or root access
Options:
- **Dedicated bare-metal machine** registered as a self-hosted GitHub Actions runner
- **Cloud VM with nested virt** (e.g., GCP n2-standard-8 with `--enable-nested-virtualization`)
- **Manual trigger only** — developer runs locally, CI just tracks pass/fail
### 6.4 Nightly CI job
```yaml
# .github/workflows/e2e-kvm.yml
name: E2E KVM Tests
on:
schedule:
- cron: '0 2 * * *' # 2 AM daily
workflow_dispatch: # Manual trigger
jobs:
kvm-tests:
runs-on: [self-hosted, kvm] # Label for KVM-capable runners
timeout-minutes: 90
steps:
- uses: actions/checkout@v4
- name: Run KVM E2E tests
run: cargo test -p harmony-e2e-tests -- --ignored --test-threads=1
env:
RUST_LOG: info
HARMONY_KVM_URI: qemu:///system
- name: Cleanup VMs on failure
if: failure()
run: |
virsh list --all --name | grep e2e | xargs -I {} virsh destroy {} || true
virsh list --all --name | grep e2e | xargs -I {} virsh undefine {} --remove-all-storage || true
```
### 6.5 Test resource management
KVM tests create real resources that must be cleaned up even on failure. Implement a test fixture pattern:
```rust
struct KvmTestFixture {
executor: KvmExecutor,
vms: Vec<String>,
networks: Vec<String>,
}
impl KvmTestFixture {
fn track_vm(&mut self, name: &str) { self.vms.push(name.to_string()); }
fn track_network(&mut self, name: &str) { self.networks.push(name.to_string()); }
}
impl Drop for KvmTestFixture {
fn drop(&mut self) {
// Best-effort cleanup of all tracked resources
for vm in &self.vms {
let _ = std::process::Command::new("virsh")
.args(["destroy", vm]).output();
let _ = std::process::Command::new("virsh")
.args(["undefine", vm, "--remove-all-storage"]).output();
}
for net in &self.networks {
let _ = std::process::Command::new("virsh")
.args(["net-destroy", net]).output();
let _ = std::process::Command::new("virsh")
.args(["net-undefine", net]).output();
}
}
}
```
## Deliverables
- [ ] `test_linux_vm_boots_from_iso` — passing KVM smoke test
- [ ] `test_okd_ha_cluster_on_kvm` — full OKD installation test
- [ ] `KvmTestFixture` with resource cleanup on test failure
- [ ] Nightly CI job on KVM-capable runner
- [ ] Force-cleanup script for leaked VMs/networks
- [ ] Documentation: how to set up a KVM runner for E2E tests

View File

@@ -2,7 +2,7 @@
## Status
Rejected : See ADR 020 ./020-interactive-configuration-crate.md
Proposed
### TODO [#3](https://git.nationtech.io/NationTech/harmony/issues/3):

View File

@@ -1,9 +0,0 @@
[book]
title = "Harmony"
description = "Infrastructure orchestration that treats your platform like first-class code"
src = "docs"
build-dir = "book"
authors = ["NationTech"]
[output.html]
mathjax-support = false

View File

@@ -1,19 +0,0 @@
[package]
name = "brocade"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
async-trait.workspace = true
harmony_types = { path = "../harmony_types" }
russh.workspace = true
russh-keys.workspace = true
tokio.workspace = true
log.workspace = true
env_logger.workspace = true
regex = "1.11.3"
harmony_secret = { path = "../harmony_secret" }
serde.workspace = true
schemars = "0.8"

View File

@@ -1,4 +0,0 @@
export HARMONY_SECRET_NAMESPACE=brocade-example
export HARMONY_SECRET_STORE=file
export HARMONY_DATABASE_URL=sqlite://harmony_brocade_example.sqlite
export RUST_LOG=info

View File

@@ -1,98 +0,0 @@
use std::net::{IpAddr, Ipv4Addr};
use brocade::{BrocadeOptions, Vlan, ssh};
use harmony_secret::{Secret, SecretManager};
use harmony_types::switch::PortLocation;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
struct BrocadeSwitchAuth {
username: String,
password: String,
}
#[tokio::main]
async fn main() {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
// let ip = IpAddr::V4(Ipv4Addr::new(10, 0, 0, 250)); // old brocade @ ianlet
// let ip = IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)); // brocade @ sto1
// let ip = IpAddr::V4(Ipv4Addr::new(192, 168, 4, 11)); // brocade @ st
//let switch_addresses = vec![ip];
let ip0 = IpAddr::V4(Ipv4Addr::new(192, 168, 12, 147)); // brocade @ test
let ip1 = IpAddr::V4(Ipv4Addr::new(192, 168, 12, 109)); // brocade @ test
let switch_addresses = vec![ip0, ip1];
let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
.await
.unwrap();
let brocade = brocade::init(
&switch_addresses,
&config.username,
&config.password,
&BrocadeOptions {
dry_run: true,
ssh: ssh::SshOptions {
port: 22,
..Default::default()
},
..Default::default()
},
)
.await
.expect("Brocade client failed to connect");
let entries = brocade.get_stack_topology().await.unwrap();
println!("Stack topology: {entries:#?}");
let entries = brocade.get_interfaces().await.unwrap();
println!("Interfaces: {entries:#?}");
let version = brocade.version().await.unwrap();
println!("Version: {version:?}");
println!("--------------");
let mac_adddresses = brocade.get_mac_address_table().await.unwrap();
println!("VLAN\tMAC\t\t\tPORT");
for mac in mac_adddresses {
println!("{}\t{}\t{}", mac.vlan, mac.mac_address, mac.port);
}
println!("--------------");
println!("Creating VLAN 100 (test-vlan)...");
brocade
.create_vlan(&Vlan {
id: 100,
name: "test-vlan".to_string(),
})
.await
.unwrap();
println!("--------------");
println!("Deleting VLAN 100...");
brocade
.delete_vlan(&Vlan {
id: 100,
name: "test-vlan".to_string(),
})
.await
.unwrap();
println!("--------------");
todo!("STOP!");
let channel_name = "1";
brocade.clear_port_channel(channel_name).await.unwrap();
println!("--------------");
let channel_id = 1;
println!("--------------");
let channel_name = "HARMONY_LAG";
let ports = [PortLocation(2, 0, 35)];
brocade
.create_port_channel(channel_id, channel_name, &ports)
.await
.unwrap();
}

View File

@@ -1,242 +0,0 @@
use std::io::{self, Write};
use brocade::{
BrocadeOptions, InterfaceConfig, InterfaceSpeed, InterfaceType, PortOperatingMode,
SwitchInterface, Vlan, VlanList, ssh,
};
use harmony_secret::{Secret, SecretManager};
use harmony_types::switch::PortLocation;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
struct BrocadeSwitchAuth {
username: String,
password: String,
}
fn wait_for_enter() {
println!("\n--- Press ENTER to continue ---");
io::stdout().flush().unwrap();
io::stdin().read_line(&mut String::new()).unwrap();
}
#[tokio::main]
async fn main() {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
let ip0 = std::net::IpAddr::V4(std::net::Ipv4Addr::new(192, 168, 12, 147));
let ip1 = std::net::IpAddr::V4(std::net::Ipv4Addr::new(192, 168, 12, 109));
let switch_addresses = vec![ip0, ip1];
let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
.await
.unwrap();
let brocade = brocade::init(
&switch_addresses,
&config.username,
&config.password,
&BrocadeOptions {
dry_run: false,
ssh: ssh::SshOptions {
port: 22,
..Default::default()
},
..Default::default()
},
)
.await
.expect("Brocade client failed to connect");
println!("=== Connecting to Brocade switches ===");
let version = brocade.version().await.unwrap();
println!("Version: {version:?}");
let entries = brocade.get_stack_topology().await.unwrap();
println!("Stack topology: {entries:#?}");
println!("\n=== Creating VLANs 100, 200, 300 ===");
brocade
.create_vlan(&Vlan {
id: 100,
name: "vlan100".to_string(),
})
.await
.unwrap();
println!("Created VLAN 100 (vlan100)");
brocade
.create_vlan(&Vlan {
id: 200,
name: "vlan200".to_string(),
})
.await
.unwrap();
println!("Created VLAN 200 (vlan200)");
brocade
.create_vlan(&Vlan {
id: 300,
name: "vlan300".to_string(),
})
.await
.unwrap();
println!("Created VLAN 300 (vlan300)");
println!("\n=== Press ENTER to continue to port configuration tests ---");
wait_for_enter();
println!("\n=== TEST 1: Trunk port (all VLANs, speed 10Gbps) on TenGigabitEthernet 1/0/1 ===");
println!("Configuring port as trunk with all VLANs and speed 10Gbps...");
let configs = vec![InterfaceConfig {
interface: SwitchInterface::Ethernet(
InterfaceType::TenGigabitEthernet,
PortLocation(1, 0, 1),
),
mode: PortOperatingMode::Trunk,
access_vlan: None,
trunk_vlans: Some(VlanList::All),
speed: Some(InterfaceSpeed::Gbps10),
}];
brocade.configure_interfaces(&configs).await.unwrap();
println!("Querying interfaces...");
let interfaces = brocade.get_interfaces().await.unwrap();
for iface in &interfaces {
if iface.name.contains("1/0/1") {
println!(" {iface:?}");
}
}
wait_for_enter();
println!("\n=== TEST 2: Trunk port (specific VLANs) on TenGigabitEthernet 1/0/2 ===");
println!("Configuring port as trunk with VLANs 100, 200...");
let configs = vec![InterfaceConfig {
interface: SwitchInterface::Ethernet(
InterfaceType::TenGigabitEthernet,
PortLocation(1, 0, 2),
),
mode: PortOperatingMode::Trunk,
access_vlan: None,
trunk_vlans: Some(VlanList::Specific(vec![
Vlan {
id: 100,
name: "vlan100".to_string(),
},
Vlan {
id: 200,
name: "vlan200".to_string(),
},
])),
speed: None,
}];
brocade.configure_interfaces(&configs).await.unwrap();
println!("Querying interfaces...");
let interfaces = brocade.get_interfaces().await.unwrap();
for iface in &interfaces {
if iface.name.contains("1/0/2") {
println!(" {iface:?}");
}
}
wait_for_enter();
println!("\n=== TEST 3: Access port (default VLAN 1) on TenGigabitEthernet 1/0/3 ===");
println!("Configuring port as access (default VLAN 1)...");
let configs = vec![InterfaceConfig {
interface: SwitchInterface::Ethernet(
InterfaceType::TenGigabitEthernet,
PortLocation(1, 0, 3),
),
mode: PortOperatingMode::Access,
access_vlan: None,
trunk_vlans: None,
speed: None,
}];
brocade.configure_interfaces(&configs).await.unwrap();
println!("Querying interfaces...");
let interfaces = brocade.get_interfaces().await.unwrap();
for iface in &interfaces {
if iface.name.contains("1/0/3") {
println!(" {iface:?}");
}
}
wait_for_enter();
println!("\n=== TEST 4: Access port (custom VLAN 100) on TenGigabitEthernet 1/0/4 ===");
println!("Configuring port as access with VLAN 100...");
let configs = vec![InterfaceConfig {
interface: SwitchInterface::Ethernet(
InterfaceType::TenGigabitEthernet,
PortLocation(1, 0, 4),
),
mode: PortOperatingMode::Access,
access_vlan: Some(100),
trunk_vlans: None,
speed: None,
}];
brocade.configure_interfaces(&configs).await.unwrap();
println!("Querying interfaces...");
let interfaces = brocade.get_interfaces().await.unwrap();
for iface in &interfaces {
if iface.name.contains("1/0/4") {
println!(" {iface:?}");
}
}
wait_for_enter();
println!("\n=== TEST 5: Port-channel on TenGigabitEthernet 1/0/5 and 1/0/6 ===");
let channel_id = 1;
println!("Using channel ID: {channel_id}");
println!("Creating port-channel with ports 1/0/5 and 1/0/6...");
let ports = [PortLocation(1, 0, 5), PortLocation(1, 0, 6)];
brocade
.create_port_channel(channel_id, "HARMONY_LAG", &ports)
.await
.unwrap();
println!("Port-channel created.");
println!("Querying port-channel summary...");
let interfaces = brocade.get_interfaces().await.unwrap();
for iface in &interfaces {
if iface.name.contains("1/0/5") || iface.name.contains("1/0/6") {
println!(" {iface:?}");
}
}
wait_for_enter();
println!("\n=== TEARDOWN: Clearing port-channels and deleting VLANs ===");
println!("Clearing port-channel {channel_id}...");
brocade
.clear_port_channel(&channel_id.to_string())
.await
.unwrap();
println!("Resetting interfaces...");
for port in 1..=6 {
let interface = format!("TenGigabitEthernet 1/0/{port}");
println!(" Resetting {interface}...");
brocade.reset_interface(&interface).await.unwrap();
}
println!("Deleting VLAN 100...");
brocade
.delete_vlan(&Vlan {
id: 100,
name: "vlan100".to_string(),
})
.await
.unwrap();
println!("Deleting VLAN 200...");
brocade
.delete_vlan(&Vlan {
id: 200,
name: "vlan200".to_string(),
})
.await
.unwrap();
println!("Deleting VLAN 300...");
brocade
.delete_vlan(&Vlan {
id: 300,
name: "vlan300".to_string(),
})
.await
.unwrap();
println!("\n=== DONE ===");
}

View File

@@ -1,253 +0,0 @@
use super::BrocadeClient;
use crate::{
BrocadeInfo, Error, ExecutionMode, InterSwitchLink, InterfaceConfig, InterfaceInfo,
MacAddressEntry, PortChannelId, PortOperatingMode, Vlan, parse_brocade_mac_address,
shell::BrocadeShell,
};
use async_trait::async_trait;
use harmony_types::switch::{PortDeclaration, PortLocation};
use log::{debug, info};
use regex::Regex;
use std::{collections::HashSet, str::FromStr};
#[derive(Debug)]
pub struct FastIronClient {
shell: BrocadeShell,
version: BrocadeInfo,
}
impl FastIronClient {
pub fn init(mut shell: BrocadeShell, version_info: BrocadeInfo) -> Self {
shell.before_all(vec!["skip-page-display".into()]);
shell.after_all(vec!["page".into()]);
Self {
shell,
version: version_info,
}
}
fn parse_mac_entry(&self, line: &str) -> Option<Result<MacAddressEntry, Error>> {
debug!("[Brocade] Parsing mac address entry: {line}");
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 3 {
return None;
}
let (vlan, mac_address, port) = match parts.len() {
3 => (
u16::from_str(parts[0]).ok()?,
parse_brocade_mac_address(parts[1]).ok()?,
parts[2].to_string(),
),
_ => (
1,
parse_brocade_mac_address(parts[0]).ok()?,
parts[1].to_string(),
),
};
let port =
PortDeclaration::parse(&port).map_err(|e| Error::UnexpectedError(format!("{e}")));
match port {
Ok(p) => Some(Ok(MacAddressEntry {
vlan,
mac_address,
port: p,
})),
Err(e) => Some(Err(e)),
}
}
fn parse_stack_port_entry(&self, line: &str) -> Option<Result<InterSwitchLink, Error>> {
debug!("[Brocade] Parsing stack port entry: {line}");
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 10 {
return None;
}
let local_port = PortLocation::from_str(parts[0]).ok()?;
Some(Ok(InterSwitchLink {
local_port,
remote_port: None,
}))
}
fn build_port_channel_commands(
&self,
channel_id: PortChannelId,
channel_name: &str,
ports: &[PortLocation],
) -> Vec<String> {
let mut commands = vec![
"configure terminal".to_string(),
format!("lag {channel_name} static id {channel_id}"),
];
for port in ports {
commands.push(format!("ports ethernet {port}"));
}
commands.push(format!("primary-port {}", ports[0]));
commands.push("deploy".into());
commands.push("exit".into());
commands.push("write memory".into());
commands.push("exit".into());
commands
}
}
#[async_trait]
impl BrocadeClient for FastIronClient {
async fn version(&self) -> Result<BrocadeInfo, Error> {
Ok(self.version.clone())
}
async fn get_mac_address_table(&self) -> Result<Vec<MacAddressEntry>, Error> {
info!("[Brocade] Showing MAC address table...");
let output = self
.shell
.run_command("show mac-address", ExecutionMode::Regular)
.await?;
output
.lines()
.skip(2)
.filter_map(|line| self.parse_mac_entry(line))
.collect()
}
async fn get_stack_topology(&self) -> Result<Vec<InterSwitchLink>, Error> {
let output = self
.shell
.run_command("show interface stack-ports", crate::ExecutionMode::Regular)
.await?;
output
.lines()
.skip(1)
.filter_map(|line| self.parse_stack_port_entry(line))
.collect()
}
async fn get_interfaces(&self) -> Result<Vec<InterfaceInfo>, Error> {
todo!()
}
async fn configure_interfaces(&self, _interfaces: &Vec<InterfaceConfig>) -> Result<(), Error> {
todo!()
}
async fn create_vlan(&self, _vlan: &Vlan) -> Result<(), Error> {
todo!()
}
async fn delete_vlan(&self, _vlan: &Vlan) -> Result<(), Error> {
todo!()
}
async fn find_available_channel_id(&self) -> Result<PortChannelId, Error> {
info!("[Brocade] Finding next available channel id...");
let output = self
.shell
.run_command("show lag", ExecutionMode::Regular)
.await?;
let re = Regex::new(r"=== LAG .* ID\s+(\d+)").expect("Invalid regex");
let used_ids: HashSet<u8> = output
.lines()
.filter_map(|line| {
re.captures(line)
.and_then(|c| c.get(1))
.and_then(|id_match| id_match.as_str().parse().ok())
})
.collect();
let mut next_id: u8 = 1;
loop {
if !used_ids.contains(&next_id) {
break;
}
next_id += 1;
}
info!("[Brocade] Found channel id: {next_id}");
Ok(next_id)
}
async fn create_port_channel(
&self,
channel_id: PortChannelId,
channel_name: &str,
ports: &[PortLocation],
) -> Result<(), Error> {
info!(
"[Brocade] Configuring port-channel '{channel_name} {channel_id}' with ports: {ports:?}"
);
let commands = self.build_port_channel_commands(channel_id, channel_name, ports);
self.shell
.run_commands(commands, ExecutionMode::Privileged)
.await?;
info!("[Brocade] Port-channel '{channel_name}' configured.");
Ok(())
}
async fn reset_interface(&self, interface: &str) -> Result<(), Error> {
info!("[Brocade] Resetting interface: {interface}");
let commands = vec![
"configure terminal".into(),
format!("interface {interface}"),
"no switchport".into(),
"no speed".into(),
"exit".into(),
];
self.shell
.run_commands(commands, ExecutionMode::Privileged)
.await?;
info!("[Brocade] Interface '{interface}' reset.");
Ok(())
}
async fn clear_port_channel(&self, channel_name: &str) -> Result<(), Error> {
info!("[Brocade] Clearing port-channel: {channel_name}");
let commands = vec![
"configure terminal".to_string(),
format!("no lag {channel_name}"),
"write memory".to_string(),
];
self.shell
.run_commands(commands, ExecutionMode::Privileged)
.await?;
info!("[Brocade] Port-channel '{channel_name}' cleared.");
Ok(())
}
async fn enable_snmp(&self, user_name: &str, auth: &str, des: &str) -> Result<(), Error> {
let commands = vec![
"configure terminal".into(),
"snmp-server view ALL 1 included".into(),
"snmp-server group public v3 priv read ALL".into(),
format!(
"snmp-server user {user_name} groupname public auth md5 auth-password {auth} priv des priv-password {des}"
),
"exit".into(),
];
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
Ok(())
}
}

View File

@@ -1,427 +0,0 @@
use std::net::IpAddr;
use std::{
fmt::{self, Display},
time::Duration,
};
use crate::network_operating_system::NetworkOperatingSystemClient;
use crate::{
fast_iron::FastIronClient,
shell::{BrocadeSession, BrocadeShell},
};
use async_trait::async_trait;
use harmony_types::net::MacAddress;
use harmony_types::switch::{PortDeclaration, PortLocation};
use regex::Regex;
use serde::Serialize;
mod fast_iron;
mod network_operating_system;
mod shell;
pub mod ssh;
#[derive(Default, Clone, Debug)]
pub struct BrocadeOptions {
pub dry_run: bool,
pub ssh: ssh::SshOptions,
pub timeouts: TimeoutConfig,
}
#[derive(Clone, Debug)]
pub struct TimeoutConfig {
pub shell_ready: Duration,
pub command_execution: Duration,
pub command_output: Duration,
pub cleanup: Duration,
pub message_wait: Duration,
}
impl Default for TimeoutConfig {
fn default() -> Self {
Self {
shell_ready: Duration::from_secs(10),
command_execution: Duration::from_secs(60), // Commands like `deploy` (for a LAG) can take a while
command_output: Duration::from_secs(5), // Delay to start logging "waiting for command output"
cleanup: Duration::from_secs(10),
message_wait: Duration::from_millis(500),
}
}
}
enum ExecutionMode {
Regular,
Privileged,
}
#[derive(Clone, Debug)]
pub struct BrocadeInfo {
os: BrocadeOs,
_version: String,
}
#[derive(Clone, Debug)]
pub enum BrocadeOs {
NetworkOperatingSystem,
FastIron,
Unknown,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct MacAddressEntry {
pub vlan: u16,
pub mac_address: MacAddress,
pub port: PortDeclaration,
}
pub type PortChannelId = u8;
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Vlan {
pub id: u16,
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub enum VlanList {
All,
Specific(Vec<Vlan>),
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub enum SwitchInterface {
Ethernet(InterfaceType, PortLocation),
PortChannel(PortChannelId),
}
impl fmt::Display for SwitchInterface {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SwitchInterface::Ethernet(itype, loc) => write!(f, "{itype} {loc}"),
SwitchInterface::PortChannel(id) => write!(f, "port-channel {id}"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub enum InterfaceSpeed {
Mbps100,
Gbps1,
Gbps1Auto,
Gbps10,
Auto,
}
impl fmt::Display for InterfaceSpeed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
InterfaceSpeed::Mbps100 => write!(f, "100"),
InterfaceSpeed::Gbps1 => write!(f, "1000"),
InterfaceSpeed::Gbps1Auto => write!(f, "1000-auto"),
InterfaceSpeed::Gbps10 => write!(f, "10000"),
InterfaceSpeed::Auto => write!(f, "auto"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct InterfaceConfig {
pub interface: SwitchInterface,
pub mode: PortOperatingMode,
pub access_vlan: Option<u16>,
pub trunk_vlans: Option<VlanList>,
pub speed: Option<InterfaceSpeed>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct PortChannelConfig {
pub id: PortChannelId,
pub name: String,
pub ports: Vec<PortLocation>,
pub mode: PortOperatingMode,
pub access_vlan: Option<Vlan>,
pub trunk_vlans: Option<VlanList>,
pub speed: Option<InterfaceSpeed>,
}
/// Represents a single physical or logical link connecting two switches within a stack or fabric.
///
/// This structure provides a standardized view of the topology regardless of the
/// underlying Brocade OS configuration (stacking vs. fabric).
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct InterSwitchLink {
/// The local port on the switch where the topology command was run.
pub local_port: PortLocation,
/// The port on the directly connected neighboring switch.
pub remote_port: Option<PortLocation>,
}
/// Represents the key running configuration status of a single switch interface.
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct InterfaceInfo {
/// The full configuration name (e.g., "TenGigabitEthernet 1/0/1", "FortyGigabitEthernet 2/0/2").
pub name: String,
/// The physical location of the interface.
pub port_location: PortLocation,
/// The parsed type and name prefix of the interface.
pub interface_type: InterfaceType,
/// The primary configuration mode defining the interface's behavior (L2, L3, Fabric).
pub operating_mode: Option<PortOperatingMode>,
/// Indicates the current state of the interface.
pub status: InterfaceStatus,
}
/// Categorizes the functional type of a switch interface.
#[derive(Debug, PartialEq, Eq, Clone, Serialize)]
pub enum InterfaceType {
TenGigabitEthernet,
FortyGigabitEthernet,
}
impl fmt::Display for InterfaceType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
InterfaceType::TenGigabitEthernet => write!(f, "TenGigabitEthernet"),
InterfaceType::FortyGigabitEthernet => write!(f, "FortyGigabitEthernet"),
}
}
}
/// Defines the primary configuration mode of a switch interface, representing mutually exclusive roles.
#[derive(Debug, PartialEq, Eq, Clone, Serialize)]
pub enum PortOperatingMode {
/// The interface is explicitly configured for Brocade fabric roles (ISL or Trunk enabled).
Fabric,
/// The interface is configured for standard Layer 2 switching as Trunk port (`switchport mode trunk`).
Trunk,
/// The interface is configured for standard Layer 2 switching as Access port (`switchport` without trunk mode).
Access,
}
/// Defines the possible status of an interface.
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum InterfaceStatus {
/// The interface is connected.
Connected,
/// The interface is not connected and is not expected to be.
NotConnected,
/// The interface is not connected but is expected to be (configured with `no shutdown`).
SfpAbsent,
}
pub async fn init(
ip_addresses: &[IpAddr],
username: &str,
password: &str,
options: &BrocadeOptions,
) -> Result<Box<dyn BrocadeClient + Send + Sync>, Error> {
let shell = BrocadeShell::init(ip_addresses, username, password, options).await?;
let version_info = shell
.with_session(ExecutionMode::Regular, |session| {
Box::pin(get_brocade_info(session))
})
.await?;
Ok(match version_info.os {
BrocadeOs::FastIron => Box::new(FastIronClient::init(shell, version_info)),
BrocadeOs::NetworkOperatingSystem => {
Box::new(NetworkOperatingSystemClient::init(shell, version_info))
}
BrocadeOs::Unknown => todo!(),
})
}
#[async_trait]
pub trait BrocadeClient: std::fmt::Debug {
/// Retrieves the operating system and version details from the connected Brocade switch.
///
/// This is typically the first call made after establishing a connection to determine
/// the switch OS family (e.g., FastIron, NOS) for feature compatibility.
///
/// # Returns
///
/// A `BrocadeInfo` structure containing parsed OS type and version string.
async fn version(&self) -> Result<BrocadeInfo, Error>;
/// Retrieves the dynamically learned MAC address table from the switch.
///
/// This is crucial for discovering where specific network endpoints (MAC addresses)
/// are currently located on the physical ports.
///
/// # Returns
///
/// A vector of `MacAddressEntry`, where each entry typically contains VLAN, MAC address,
/// and the associated port name/index.
async fn get_mac_address_table(&self) -> Result<Vec<MacAddressEntry>, Error>;
/// Derives the physical connections used to link multiple switches together
/// to form a single logical entity (stack, fabric, etc.).
///
/// This abstracts the underlying configuration (e.g., stack ports, fabric ports)
/// to return a standardized view of the topology.
///
/// # Returns
///
/// A vector of `InterSwitchLink` structs detailing which ports are used for stacking/fabric.
/// If the switch is not stacked, returns an empty vector.
async fn get_stack_topology(&self) -> Result<Vec<InterSwitchLink>, Error>;
/// Retrieves the status for all interfaces
///
/// # Returns
///
/// A vector of `InterfaceInfo` structures.
async fn get_interfaces(&self) -> Result<Vec<InterfaceInfo>, Error>;
/// Configures a set of interfaces to be operated with a specified mode (access ports, ISL, etc.).
async fn configure_interfaces(&self, interfaces: &Vec<InterfaceConfig>) -> Result<(), Error>;
/// Creates a new VLAN on the switch.
async fn create_vlan(&self, vlan: &Vlan) -> Result<(), Error>;
/// Deletes a VLAN from the switch.
async fn delete_vlan(&self, vlan: &Vlan) -> Result<(), Error>;
/// Scans the existing configuration to find the next available (unused)
/// Port-Channel ID (`lag` or `trunk`) for assignment.
///
/// # Returns
///
/// The smallest, unassigned `PortChannelId` within the supported range.
async fn find_available_channel_id(&self) -> Result<PortChannelId, Error>;
/// Creates and configures a new Port-Channel (Link Aggregation Group or LAG)
/// using the specified channel ID and ports.
///
/// The resulting configuration must be persistent (saved to startup-config).
/// Assumes a static LAG configuration mode unless specified otherwise by the implementation.
///
/// # Parameters
///
/// * `channel_id`: The ID (e.g., 1-128) for the logical port channel.
/// * `channel_name`: A descriptive name for the LAG (used in configuration context).
/// * `ports`: A slice of `PortLocation` structs defining the physical member ports.
async fn create_port_channel(
&self,
channel_id: PortChannelId,
channel_name: &str,
ports: &[PortLocation],
) -> Result<(), Error>;
/// Enables Simple Network Management Protocol (SNMP) server for switch
///
/// # Parameters
///
/// * `user_name`: The user name for the snmp server
/// * `auth`: The password for authentication process for verifying the identity of a device
/// * `des`: The Data Encryption Standard algorithm key
async fn enable_snmp(&self, user_name: &str, auth: &str, des: &str) -> Result<(), Error>;
/// Resets an interface to its default state by removing switchport configuration.
async fn reset_interface(&self, interface: &str) -> Result<(), Error>;
/// Removes all configuration associated with the specified Port-Channel name.
///
/// This operation should be idempotent; attempting to clear a non-existent
/// channel should succeed (or return a benign error).
///
/// # Parameters
///
/// * `channel_name`: The name of the Port-Channel (LAG) to delete.
///
async fn clear_port_channel(&self, channel_name: &str) -> Result<(), Error>;
}
async fn get_brocade_info(session: &mut BrocadeSession) -> Result<BrocadeInfo, Error> {
let output = session.run_command("show version").await?;
if output.contains("Network Operating System") {
let re = Regex::new(r"Network Operating System Version:\s*(?P<version>[a-zA-Z0-9.\-]+)")
.expect("Invalid regex");
let version = re
.captures(&output)
.and_then(|cap| cap.name("version"))
.map(|m| m.as_str().to_string())
.unwrap_or_default();
return Ok(BrocadeInfo {
os: BrocadeOs::NetworkOperatingSystem,
_version: version,
});
} else if output.contains("ICX") {
let re = Regex::new(r"(?m)^\s*SW: Version\s*(?P<version>[a-zA-Z0-9.\-]+)")
.expect("Invalid regex");
let version = re
.captures(&output)
.and_then(|cap| cap.name("version"))
.map(|m| m.as_str().to_string())
.unwrap_or_default();
return Ok(BrocadeInfo {
os: BrocadeOs::FastIron,
_version: version,
});
}
Err(Error::UnexpectedError("Unknown Brocade OS version".into()))
}
fn parse_brocade_mac_address(value: &str) -> Result<MacAddress, String> {
let cleaned_mac = value.replace('.', "");
if cleaned_mac.len() != 12 {
return Err(format!("Invalid MAC address: {value}"));
}
let mut bytes = [0u8; 6];
for (i, pair) in cleaned_mac.as_bytes().chunks(2).enumerate() {
let byte_str = std::str::from_utf8(pair).map_err(|_| "Invalid UTF-8")?;
bytes[i] =
u8::from_str_radix(byte_str, 16).map_err(|_| format!("Invalid hex in MAC: {value}"))?;
}
Ok(MacAddress(bytes))
}
#[derive(Debug)]
pub enum SecurityLevel {
AuthPriv(String),
}
#[derive(Debug)]
pub enum Error {
NetworkError(String),
AuthenticationError(String),
ConfigurationError(String),
TimeoutError(String),
UnexpectedError(String),
CommandError(String),
}
impl Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Error::NetworkError(msg) => write!(f, "Network error: {msg}"),
Error::AuthenticationError(msg) => write!(f, "Authentication error: {msg}"),
Error::ConfigurationError(msg) => write!(f, "Configuration error: {msg}"),
Error::TimeoutError(msg) => write!(f, "Timeout error: {msg}"),
Error::UnexpectedError(msg) => write!(f, "Unexpected error: {msg}"),
Error::CommandError(msg) => write!(f, "{msg}"),
}
}
}
impl From<Error> for String {
fn from(val: Error) -> Self {
format!("{val}")
}
}
impl std::error::Error for Error {}
impl From<russh::Error> for Error {
fn from(value: russh::Error) -> Self {
Error::NetworkError(format!("Russh client error: {value}"))
}
}

View File

@@ -1,433 +0,0 @@
use std::str::FromStr;
use async_trait::async_trait;
use harmony_types::switch::{PortDeclaration, PortLocation};
use log::{debug, info};
use regex::Regex;
use crate::{
BrocadeClient, BrocadeInfo, Error, ExecutionMode, InterSwitchLink, InterfaceConfig,
InterfaceInfo, InterfaceStatus, InterfaceType, MacAddressEntry, PortChannelId,
PortOperatingMode, SwitchInterface, Vlan, VlanList, parse_brocade_mac_address,
shell::BrocadeShell,
};
#[derive(Debug)]
pub struct NetworkOperatingSystemClient {
shell: BrocadeShell,
version: BrocadeInfo,
}
impl NetworkOperatingSystemClient {
pub fn init(mut shell: BrocadeShell, version_info: BrocadeInfo) -> Self {
shell.before_all(vec!["terminal length 0".into()]);
Self {
shell,
version: version_info,
}
}
fn parse_mac_entry(&self, line: &str) -> Option<Result<MacAddressEntry, Error>> {
debug!("[Brocade] Parsing mac address entry: {line}");
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 5 {
return None;
}
let (vlan, mac_address, port) = match parts.len() {
5 => (
u16::from_str(parts[0]).ok()?,
parse_brocade_mac_address(parts[1]).ok()?,
parts[4].to_string(),
),
_ => (
u16::from_str(parts[0]).ok()?,
parse_brocade_mac_address(parts[1]).ok()?,
parts[5].to_string(),
),
};
let port =
PortDeclaration::parse(&port).map_err(|e| Error::UnexpectedError(format!("{e}")));
match port {
Ok(p) => Some(Ok(MacAddressEntry {
vlan,
mac_address,
port: p,
})),
Err(e) => Some(Err(e)),
}
}
fn parse_inter_switch_link_entry(&self, line: &str) -> Option<Result<InterSwitchLink, Error>> {
debug!("[Brocade] Parsing inter switch link entry: {line}");
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 10 {
return None;
}
let local_port = PortLocation::from_str(parts[2]).ok()?;
let remote_port = PortLocation::from_str(parts[5]).ok()?;
Some(Ok(InterSwitchLink {
local_port,
remote_port: Some(remote_port),
}))
}
fn parse_interface_status_entry(&self, line: &str) -> Option<Result<InterfaceInfo, Error>> {
debug!("[Brocade] Parsing interface status entry: {line}");
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 6 {
return None;
}
let interface_type = match parts[0] {
"Fo" => InterfaceType::FortyGigabitEthernet,
"Te" => InterfaceType::TenGigabitEthernet,
_ => return None,
};
let port_location = PortLocation::from_str(parts[1]).ok()?;
let status = match parts[2] {
"connected" => InterfaceStatus::Connected,
"notconnected" => InterfaceStatus::NotConnected,
"sfpAbsent" => InterfaceStatus::SfpAbsent,
_ => return None,
};
let operating_mode = match parts[3] {
"ISL" => Some(PortOperatingMode::Fabric),
"Trunk" => Some(PortOperatingMode::Trunk),
"Access" => Some(PortOperatingMode::Access),
"--" => None,
_ => return None,
};
Some(Ok(InterfaceInfo {
name: format!("{interface_type} {port_location}"),
port_location,
interface_type,
operating_mode,
status,
}))
}
fn map_configure_interfaces_error(&self, err: Error) -> Error {
debug!("[Brocade] {err}");
if let Error::CommandError(message) = &err {
if message.contains("switchport")
&& message.contains("Cannot configure aggregator member")
{
let re = Regex::new(r"\(conf-if-([a-zA-Z]+)-([\d/]+)\)#").unwrap();
if let Some(caps) = re.captures(message) {
let interface_type = &caps[1];
let port_location = &caps[2];
let interface = format!("{interface_type} {port_location}");
return Error::CommandError(format!(
"Cannot configure interface '{interface}', it is a member of a port-channel (LAG)"
));
}
}
}
err
}
}
#[async_trait]
impl BrocadeClient for NetworkOperatingSystemClient {
async fn version(&self) -> Result<BrocadeInfo, Error> {
Ok(self.version.clone())
}
async fn get_mac_address_table(&self) -> Result<Vec<MacAddressEntry>, Error> {
let output = self
.shell
.run_command("show mac-address-table", ExecutionMode::Regular)
.await?;
output
.lines()
.skip(1)
.filter_map(|line| self.parse_mac_entry(line))
.collect()
}
async fn get_stack_topology(&self) -> Result<Vec<InterSwitchLink>, Error> {
let output = self
.shell
.run_command("show fabric isl", ExecutionMode::Regular)
.await?;
output
.lines()
.skip(6)
.filter_map(|line| self.parse_inter_switch_link_entry(line))
.collect()
}
async fn get_interfaces(&self) -> Result<Vec<InterfaceInfo>, Error> {
let output = self
.shell
.run_command(
"show interface status rbridge-id all",
ExecutionMode::Regular,
)
.await?;
output
.lines()
.skip(2)
.filter_map(|line| self.parse_interface_status_entry(line))
.collect()
}
async fn configure_interfaces(&self, interfaces: &Vec<InterfaceConfig>) -> Result<(), Error> {
info!("[Brocade] Configuring {} interface(s)...", interfaces.len());
let mut commands = vec!["configure terminal".to_string()];
for interface in interfaces {
debug!(
"[Brocade] Configuring interface {} as {:?}",
interface.interface, interface.mode
);
commands.push(format!("interface {}", interface.interface));
match interface.mode {
PortOperatingMode::Fabric => {
commands.push("fabric isl enable".into());
commands.push("fabric trunk enable".into());
}
PortOperatingMode::Trunk => {
commands.push("switchport".into());
commands.push("switchport mode trunk".into());
match &interface.trunk_vlans {
Some(VlanList::All) => {
commands.push("switchport trunk allowed vlan all".into());
}
Some(VlanList::Specific(vlans)) => {
for vlan in vlans {
commands.push(format!("switchport trunk allowed vlan add {}", vlan.id));
}
}
None => {
commands.push("switchport trunk allowed vlan all".into());
}
}
commands.push("no switchport trunk tag native-vlan".into());
if matches!(interface.interface, SwitchInterface::Ethernet(..)) {
commands.push("spanning-tree shutdown".into());
commands.push("no fabric isl enable".into());
commands.push("no fabric trunk enable".into());
}
}
PortOperatingMode::Access => {
commands.push("switchport".into());
commands.push("switchport mode access".into());
let access_vlan = interface.access_vlan.unwrap_or(1);
commands.push(format!("switchport access vlan {access_vlan}"));
if matches!(interface.interface, SwitchInterface::Ethernet(..)) {
commands.push("no spanning-tree shutdown".into());
commands.push("no fabric isl enable".into());
commands.push("no fabric trunk enable".into());
}
}
}
if let Some(speed) = &interface.speed {
info!(
"[Brocade] Overriding speed on {} to {speed}",
interface.interface
);
if matches!(interface.interface, SwitchInterface::PortChannel(..)) {
commands.push("shutdown".into());
}
commands.push(format!("speed {speed}"));
}
commands.push("no shutdown".into());
commands.push("exit".into());
}
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await
.map_err(|err| self.map_configure_interfaces_error(err))?;
info!("[Brocade] Interfaces configured.");
Ok(())
}
async fn create_vlan(&self, vlan: &Vlan) -> Result<(), Error> {
info!("[Brocade] Creating VLAN {} ({})", vlan.id, vlan.name);
let commands = vec![
"configure terminal".into(),
format!("interface Vlan {}", vlan.id),
format!("name {}", vlan.name),
"exit".into(),
];
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
info!("[Brocade] VLAN {} ({}) created.", vlan.id, vlan.name);
Ok(())
}
async fn delete_vlan(&self, vlan: &Vlan) -> Result<(), Error> {
info!("[Brocade] Deleting VLAN {}", vlan.id);
let commands = vec![
"configure terminal".into(),
format!("no interface Vlan {}", vlan.id),
];
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
info!("[Brocade] VLAN {} deleted.", vlan.id);
Ok(())
}
async fn find_available_channel_id(&self) -> Result<PortChannelId, Error> {
info!("[Brocade] Finding next available channel id...");
let output = self
.shell
.run_command("show port-channel summary", ExecutionMode::Regular)
.await?;
let used_ids: Vec<u8> = output
.lines()
.skip(6)
.filter_map(|line| {
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 8 {
return None;
}
u8::from_str(parts[0]).ok()
})
.collect();
let mut next_id: u8 = 1;
loop {
if !used_ids.contains(&next_id) {
break;
}
next_id += 1;
}
info!("[Brocade] Found channel id: {next_id}");
Ok(next_id)
}
async fn create_port_channel(
&self,
channel_id: PortChannelId,
channel_name: &str,
ports: &[PortLocation],
) -> Result<(), Error> {
info!(
"[Brocade] Configuring port-channel '{channel_id} {channel_name}' with ports: {}",
ports
.iter()
.map(|p| format!("{p}"))
.collect::<Vec<String>>()
.join(", ")
);
let mut commands = vec![
"configure terminal".into(),
format!("interface port-channel {}", channel_id),
"no shutdown".into(),
format!("description {channel_name}"),
"exit".into(),
];
for port in ports {
debug!(
"[Brocade] Adding port TenGigabitEthernet {} to channel-group {}",
port, channel_id
);
commands.push(format!("interface TenGigabitEthernet {}", port));
commands.push("no switchport".into());
commands.push("no ip address".into());
commands.push("no fabric isl enable".into());
commands.push("no fabric trunk enable".into());
commands.push(format!("channel-group {channel_id} mode active"));
commands.push("no shutdown".into());
commands.push("exit".into());
}
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
info!("[Brocade] Port-channel '{channel_name}' configured.");
Ok(())
}
async fn reset_interface(&self, interface: &str) -> Result<(), Error> {
info!("[Brocade] Resetting interface: {interface}");
let commands = vec![
"configure terminal".into(),
format!("interface {interface}"),
"no switchport".into(),
"no speed".into(),
"exit".into(),
];
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
info!("[Brocade] Interface '{interface}' reset.");
Ok(())
}
async fn clear_port_channel(&self, channel_name: &str) -> Result<(), Error> {
info!("[Brocade] Clearing port-channel: {channel_name}");
let commands = vec![
"configure terminal".into(),
format!("no interface port-channel {}", channel_name),
"exit".into(),
];
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
info!("[Brocade] Port-channel '{channel_name}' cleared.");
Ok(())
}
async fn enable_snmp(&self, user_name: &str, auth: &str, des: &str) -> Result<(), Error> {
let commands = vec![
"configure terminal".into(),
"snmp-server view ALL 1 included".into(),
"snmp-server group public v3 priv read ALL".into(),
format!(
"snmp-server user {user_name} groupname public auth md5 auth-password {auth} priv des priv-password {des}"
),
"exit".into(),
];
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
Ok(())
}
}

View File

@@ -1,367 +0,0 @@
use std::net::IpAddr;
use std::time::Duration;
use std::time::Instant;
use crate::BrocadeOptions;
use crate::Error;
use crate::ExecutionMode;
use crate::TimeoutConfig;
use crate::ssh;
use log::debug;
use log::info;
use russh::ChannelMsg;
use tokio::time::timeout;
#[derive(Debug)]
pub struct BrocadeShell {
ip: IpAddr,
username: String,
password: String,
options: BrocadeOptions,
before_all_commands: Vec<String>,
after_all_commands: Vec<String>,
}
impl BrocadeShell {
pub async fn init(
ip_addresses: &[IpAddr],
username: &str,
password: &str,
options: &BrocadeOptions,
) -> Result<Self, Error> {
let ip = ip_addresses
.first()
.ok_or_else(|| Error::ConfigurationError("No IP addresses provided".to_string()))?;
let brocade_ssh_client_options =
ssh::try_init_client(username, password, ip, options).await?;
Ok(Self {
ip: *ip,
username: username.to_string(),
password: password.to_string(),
before_all_commands: vec![],
after_all_commands: vec![],
options: brocade_ssh_client_options,
})
}
pub async fn open_session(&self, mode: ExecutionMode) -> Result<BrocadeSession, Error> {
BrocadeSession::open(
self.ip,
self.options.ssh.port,
&self.username,
&self.password,
self.options.clone(),
mode,
)
.await
}
pub async fn with_session<F, R>(&self, mode: ExecutionMode, callback: F) -> Result<R, Error>
where
F: FnOnce(
&mut BrocadeSession,
) -> std::pin::Pin<
Box<dyn std::future::Future<Output = Result<R, Error>> + Send + '_>,
>,
{
let mut session = self.open_session(mode).await?;
let _ = session.run_commands(self.before_all_commands.clone()).await;
let result = callback(&mut session).await;
let _ = session.run_commands(self.after_all_commands.clone()).await;
session.close().await?;
result
}
pub async fn run_command(&self, command: &str, mode: ExecutionMode) -> Result<String, Error> {
let mut session = self.open_session(mode).await?;
let _ = session.run_commands(self.before_all_commands.clone()).await;
let result = session.run_command(command).await;
let _ = session.run_commands(self.after_all_commands.clone()).await;
session.close().await?;
result
}
pub async fn run_commands(
&self,
commands: Vec<String>,
mode: ExecutionMode,
) -> Result<(), Error> {
let mut session = self.open_session(mode).await?;
let _ = session.run_commands(self.before_all_commands.clone()).await;
let result = session.run_commands(commands).await;
let _ = session.run_commands(self.after_all_commands.clone()).await;
session.close().await?;
result
}
pub fn before_all(&mut self, commands: Vec<String>) {
self.before_all_commands = commands;
}
pub fn after_all(&mut self, commands: Vec<String>) {
self.after_all_commands = commands;
}
}
pub struct BrocadeSession {
pub channel: russh::Channel<russh::client::Msg>,
pub mode: ExecutionMode,
pub options: BrocadeOptions,
}
impl BrocadeSession {
pub async fn open(
ip: IpAddr,
port: u16,
username: &str,
password: &str,
options: BrocadeOptions,
mode: ExecutionMode,
) -> Result<Self, Error> {
let client = ssh::create_client(ip, port, username, password, &options).await?;
let mut channel = client.channel_open_session().await?;
channel
.request_pty(false, "vt100", 80, 24, 0, 0, &[])
.await?;
channel.request_shell(false).await?;
wait_for_shell_ready(&mut channel, &options.timeouts).await?;
if let ExecutionMode::Privileged = mode {
try_elevate_session(&mut channel, username, password, &options.timeouts).await?;
}
Ok(Self {
channel,
mode,
options,
})
}
pub async fn close(&mut self) -> Result<(), Error> {
debug!("[Brocade] Closing session...");
self.channel.data(&b"exit\n"[..]).await?;
if let ExecutionMode::Privileged = self.mode {
self.channel.data(&b"exit\n"[..]).await?;
}
let start = Instant::now();
while start.elapsed() < self.options.timeouts.cleanup {
match timeout(self.options.timeouts.message_wait, self.channel.wait()).await {
Ok(Some(ChannelMsg::Close)) => break,
Ok(Some(_)) => continue,
Ok(None) | Err(_) => break,
}
}
debug!("[Brocade] Session closed.");
Ok(())
}
pub async fn run_command(&mut self, command: &str) -> Result<String, Error> {
if self.should_skip_command(command) {
return Ok(String::new());
}
debug!("[Brocade] Running command: '{command}'...");
self.channel
.data(format!("{}\n", command).as_bytes())
.await?;
tokio::time::sleep(Duration::from_millis(100)).await;
let output = self.collect_command_output().await?;
let output = String::from_utf8(output)
.map_err(|_| Error::UnexpectedError("Invalid UTF-8 in command output".to_string()))?;
self.check_for_command_errors(&output, command)?;
Ok(output)
}
pub async fn run_commands(&mut self, commands: Vec<String>) -> Result<(), Error> {
for command in commands {
self.run_command(&command).await?;
}
Ok(())
}
fn should_skip_command(&self, command: &str) -> bool {
if (command.starts_with("write") || command.starts_with("deploy")) && self.options.dry_run {
info!("[Brocade] Dry-run mode enabled, skipping command: {command}");
return true;
}
false
}
async fn collect_command_output(&mut self) -> Result<Vec<u8>, Error> {
let mut output = Vec::new();
let start = Instant::now();
let read_timeout = Duration::from_millis(500);
let log_interval = Duration::from_secs(5);
let mut last_log = Instant::now();
loop {
if start.elapsed() > self.options.timeouts.command_execution {
return Err(Error::TimeoutError(
"Timeout waiting for command completion.".into(),
));
}
if start.elapsed() > self.options.timeouts.command_output
&& last_log.elapsed() > log_interval
{
info!("[Brocade] Waiting for command output...");
last_log = Instant::now();
}
match timeout(read_timeout, self.channel.wait()).await {
Ok(Some(ChannelMsg::Data { data } | ChannelMsg::ExtendedData { data, .. })) => {
output.extend_from_slice(&data);
let current_output = String::from_utf8_lossy(&output);
if current_output.contains('>') || current_output.contains('#') {
return Ok(output);
}
}
Ok(Some(ChannelMsg::Eof | ChannelMsg::Close)) => return Ok(output),
Ok(Some(ChannelMsg::ExitStatus { exit_status })) => {
debug!("[Brocade] Command exit status: {exit_status}");
}
Ok(Some(_)) => continue,
Ok(None) | Err(_) => {
if output.is_empty() {
if let Ok(None) = timeout(read_timeout, self.channel.wait()).await {
break;
}
continue;
}
tokio::time::sleep(Duration::from_millis(100)).await;
let current_output = String::from_utf8_lossy(&output);
if current_output.contains('>') || current_output.contains('#') {
return Ok(output);
}
}
}
}
Ok(output)
}
fn check_for_command_errors(&self, output: &str, command: &str) -> Result<(), Error> {
const ERROR_PATTERNS: &[&str] = &[
"invalid input",
"syntax error",
"command not found",
"unknown command",
"permission denied",
"access denied",
"authentication failed",
"configuration error",
"failed to",
"error:",
];
let output_lower = output.to_lowercase();
if ERROR_PATTERNS.iter().any(|&p| output_lower.contains(p)) {
return Err(Error::CommandError(format!(
"Command error: {}",
output.trim()
)));
}
if !command.starts_with("show") && output.trim().is_empty() {
return Err(Error::CommandError(format!(
"Command '{command}' produced no output"
)));
}
Ok(())
}
}
async fn wait_for_shell_ready(
channel: &mut russh::Channel<russh::client::Msg>,
timeouts: &TimeoutConfig,
) -> Result<(), Error> {
let mut buffer = Vec::new();
let start = Instant::now();
while start.elapsed() < timeouts.shell_ready {
match timeout(timeouts.message_wait, channel.wait()).await {
Ok(Some(ChannelMsg::Data { data })) => {
buffer.extend_from_slice(&data);
let output = String::from_utf8_lossy(&buffer);
let output = output.trim();
if output.ends_with('>') || output.ends_with('#') {
debug!("[Brocade] Shell ready");
return Ok(());
}
}
Ok(Some(_)) => continue,
Ok(None) => break,
Err(_) => continue,
}
}
Ok(())
}
async fn try_elevate_session(
channel: &mut russh::Channel<russh::client::Msg>,
username: &str,
password: &str,
timeouts: &TimeoutConfig,
) -> Result<(), Error> {
channel.data(&b"enable\n"[..]).await?;
let start = Instant::now();
let mut buffer = Vec::new();
while start.elapsed() < timeouts.shell_ready {
match timeout(timeouts.message_wait, channel.wait()).await {
Ok(Some(ChannelMsg::Data { data })) => {
buffer.extend_from_slice(&data);
let output = String::from_utf8_lossy(&buffer);
if output.ends_with('#') {
debug!("[Brocade] Privileged mode established");
return Ok(());
}
if output.contains("User Name:") {
channel.data(format!("{}\n", username).as_bytes()).await?;
buffer.clear();
} else if output.contains("Password:") {
channel.data(format!("{}\n", password).as_bytes()).await?;
buffer.clear();
} else if output.contains('>') {
return Err(Error::AuthenticationError(
"Enable authentication failed".into(),
));
}
}
Ok(Some(_)) => continue,
Ok(None) => break,
Err(_) => continue,
}
}
let output = String::from_utf8_lossy(&buffer);
if output.ends_with('#') {
debug!("[Brocade] Privileged mode established");
Ok(())
} else {
Err(Error::AuthenticationError(format!(
"Enable failed. Output:\n{output}"
)))
}
}

View File

@@ -1,131 +0,0 @@
use std::borrow::Cow;
use std::sync::Arc;
use async_trait::async_trait;
use log::debug;
use russh::client::Handler;
use russh::kex::DH_G1_SHA1;
use russh::kex::ECDH_SHA2_NISTP256;
use russh_keys::key::SSH_RSA;
use super::BrocadeOptions;
use super::Error;
#[derive(Clone, Debug)]
pub struct SshOptions {
pub preferred_algorithms: russh::Preferred,
pub port: u16,
}
impl Default for SshOptions {
fn default() -> Self {
Self {
preferred_algorithms: Default::default(),
port: 22,
}
}
}
impl SshOptions {
fn ecdhsa_sha2_nistp256(port: u16) -> Self {
Self {
preferred_algorithms: russh::Preferred {
kex: Cow::Borrowed(&[ECDH_SHA2_NISTP256]),
key: Cow::Borrowed(&[SSH_RSA]),
..Default::default()
},
port,
..Default::default()
}
}
fn legacy(port: u16) -> Self {
Self {
preferred_algorithms: russh::Preferred {
kex: Cow::Borrowed(&[DH_G1_SHA1]),
key: Cow::Borrowed(&[SSH_RSA]),
..Default::default()
},
port,
..Default::default()
}
}
}
pub struct Client;
#[async_trait]
impl Handler for Client {
type Error = Error;
async fn check_server_key(
&mut self,
_server_public_key: &russh_keys::key::PublicKey,
) -> Result<bool, Self::Error> {
Ok(true)
}
}
pub async fn try_init_client(
username: &str,
password: &str,
ip: &std::net::IpAddr,
base_options: &BrocadeOptions,
) -> Result<BrocadeOptions, Error> {
let mut default = SshOptions::default();
default.port = base_options.ssh.port;
let ssh_options = vec![
default,
SshOptions::ecdhsa_sha2_nistp256(base_options.ssh.port),
SshOptions::legacy(base_options.ssh.port),
];
for ssh in ssh_options {
let opts = BrocadeOptions {
ssh: ssh.clone(),
..base_options.clone()
};
debug!("Creating client {ip}:{} {username}", ssh.port);
let client = create_client(*ip, ssh.port, username, password, &opts).await;
match client {
Ok(_) => {
return Ok(opts);
}
Err(e) => match e {
Error::NetworkError(e) => {
if e.contains("No common key exchange algorithm") {
continue;
} else {
return Err(Error::NetworkError(e));
}
}
_ => return Err(e),
},
}
}
Err(Error::NetworkError(
"Could not establish ssh connection: wrong key exchange algorithm)".to_string(),
))
}
pub async fn create_client(
ip: std::net::IpAddr,
port: u16,
username: &str,
password: &str,
options: &BrocadeOptions,
) -> Result<russh::client::Handle<Client>, Error> {
let config = russh::client::Config {
preferred: options.ssh.preferred_algorithms.clone(),
..Default::default()
};
let mut client = russh::client::connect(Arc::new(config), (ip, port), Client {}).await?;
if !client.authenticate_password(username, password).await? {
return Err(Error::AuthenticationError(
"ssh authentication failed".to_string(),
));
}
Ok(client)
}

View File

@@ -1,11 +0,0 @@
#!/bin/sh
set -e
cd "$(dirname "$0")/.."
cargo install mdbook --locked
mdbook build
test -f book/index.html || (echo "ERROR: book/index.html not found" && exit 1)
test -f book/concepts.html || (echo "ERROR: book/concepts.html not found" && exit 1)
test -f book/guides/getting-started.html || (echo "ERROR: book/guides/getting-started.html not found" && exit 1)

View File

@@ -1,16 +0,0 @@
#!/bin/sh
set -e
cd "$(dirname "$0")/.."
BRANCH="${1:-main}"
echo "=== Running CI for branch: $BRANCH ==="
echo "--- Checking code ---"
./build/check.sh
echo "--- Building book ---"
./build/book.sh
echo "=== CI passed ==="

View File

@@ -1,12 +1,6 @@
#!/bin/sh
set -e
cd "$(dirname "$0")/.."
git submodule init
git submodule update
rustc --version
cargo check --all-targets --all-features --keep-going
cargo fmt --check
cargo clippy

BIN
data/okd/bin/kubectl (Stored with Git LFS)

Binary file not shown.

BIN
data/okd/bin/oc (Stored with Git LFS)

Binary file not shown.

BIN
data/okd/bin/oc_README.md (Stored with Git LFS)

Binary file not shown.

BIN
data/okd/bin/openshift-install (Stored with Git LFS)

Binary file not shown.

BIN
data/okd/bin/openshift-install_README.md (Stored with Git LFS)

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1 +0,0 @@
scos-9.0.20250510-0-live-initramfs.x86_64.img

View File

@@ -1 +0,0 @@
scos-9.0.20250510-0-live-kernel.x86_64

View File

@@ -1 +0,0 @@
scos-9.0.20250510-0-live-rootfs.x86_64.img

View File

@@ -1,8 +0,0 @@
Here lies all the data files required for an OKD cluster PXE boot setup.
This inclues ISO files, binary boot files, ipxe, etc.
TODO as of august 2025 :
- `harmony_inventory_agent` should be downloaded from official releases, this embedded version is practical for now though
- The cluster ssh key should be generated and handled by harmony with the private key saved in a secret store

View File

@@ -1,9 +0,0 @@
harmony_inventory_agent filter=lfs diff=lfs merge=lfs -text
os filter=lfs diff=lfs merge=lfs -text
os/centos-stream-9 filter=lfs diff=lfs merge=lfs -text
os/centos-stream-9/images filter=lfs diff=lfs merge=lfs -text
os/centos-stream-9/initrd.img filter=lfs diff=lfs merge=lfs -text
os/centos-stream-9/vmlinuz filter=lfs diff=lfs merge=lfs -text
os/centos-stream-9/images/efiboot.img filter=lfs diff=lfs merge=lfs -text
os/centos-stream-9/images/install.img filter=lfs diff=lfs merge=lfs -text
os/centos-stream-9/images/pxeboot filter=lfs diff=lfs merge=lfs -text

View File

@@ -1 +0,0 @@
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBx6bDylvC68cVpjKfEFtLQJ/dOFi6PVS2vsIOqPDJIc jeangab@liliane2

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,3 +0,0 @@
.terraform
*.tfstate
venv

Binary file not shown.

Before

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 100 KiB

View File

@@ -1,5 +0,0 @@
To build :
```bash
npx @marp-team/marp-cli@latest -w slides.md
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -1,9 +0,0 @@
To run this :
```bash
virtualenv venv
source venv/bin/activate
pip install ansible ansible-dev-tools
ansible-lint download.yml
ansible-playbook -i localhost download.yml
```

View File

@@ -1,8 +0,0 @@
- name: Test Ansible URL Validation
hosts: localhost
tasks:
- name: Download a file
ansible.builtin.get_url:
url: "http:/wikipedia.org/"
dest: "/tmp/ansible-test/wikipedia.html"
mode: '0900'

Some files were not shown because too many files have changed in this diff Show More