Compare commits

...

283 Commits

Author SHA1 Message Date
063a4d4f5c wip: improving inventory discovery 2026-03-04 07:18:20 -05:00
ff7d2fb89e fix: Complete brocade switch config and auth refactoring
Some checks failed
Run Check Script / check (push) Successful in 1m12s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m57s
2026-02-17 10:31:30 -05:00
9bb38b930a Merge pull request 'reafactor: brocade switch slight improvements' (#233) from fix/brocade into master
Some checks failed
Run Check Script / check (push) Failing after -10s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m54s
Reviewed-on: #233
2026-02-17 15:16:42 +00:00
c677487a5e Merge pull request 'feat/drain_k8s_node' (#232) from feat/drain_k8s_node into master
Some checks failed
Run Check Script / check (push) Successful in 1m0s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 3m11s
Reviewed-on: #232
Reviewed-by: stremblay <stremblay@nationtech.io>
2026-02-17 15:01:08 +00:00
c1d46612ac fix: dnsmasq now replaces mac address
All checks were successful
Run Check Script / check (pull_request) Successful in 1m2s
2026-02-17 10:00:11 -05:00
4fba01338d feat: Reboot k8s node works, good logs and tests
All checks were successful
Run Check Script / check (pull_request) Successful in 1m3s
2026-02-17 09:30:04 -05:00
913ed17453 Merge remote-tracking branch 'origin/master' into feat/drain_k8s_node 2026-02-17 09:15:06 -05:00
9e185cbbd5 chore: cleanup comments 2026-02-17 09:15:03 -05:00
752526f831 fix: reboot node now works with correct command
Some checks failed
Run Check Script / check (pull_request) Failing after 54s
2026-02-16 23:04:18 -05:00
f9bd6ad260 reafactor: brocade switch slight improvements
Some checks failed
Run Check Script / check (pull_request) Failing after -10s
2026-02-16 21:08:56 -05:00
111181c300 wip
Some checks failed
Run Check Script / check (pull_request) Failing after 54s
2026-02-16 20:54:46 -05:00
3257cd9569 wip: Reboot node cleanly via k8s api, copy files on node, run remote command with output, orchestrate network configuration, and some more
All checks were successful
Run Check Script / check (pull_request) Successful in 1m13s
2026-02-15 22:17:43 -05:00
4b1915c594 Merge pull request 'feat: improve output related to storage in the discovery process' (#231) from feat/improve-disk-device-display into master
All checks were successful
Run Check Script / check (push) Successful in 1m6s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m17s
Reviewed-on: #231
2026-02-15 18:22:54 +00:00
cf3050ce87 feat(k8s client): K8sClient module now holds the responsibility for the k8s distribution detection, add resource bundle useful for easy create and delete of a bunch of related resources.
First use case is creating a privileged pod allowing writing to nodes on
openshift family clusters. This requires creating the clusterrolebinding
and pod and other resources.
2026-02-15 09:14:49 -05:00
c3e27c60be feat: improve output related to storage in the discovery process
All checks were successful
Run Check Script / check (pull_request) Successful in 1m55s
2026-02-14 15:32:01 -05:00
2d26790c82 wip: K8s copy file on node refactoring to extract helpers and add tests 2026-02-14 10:22:48 -05:00
2e89308b82 wip: Copy files on k8s node via ephemeral pod and configmap 2026-02-14 08:07:03 -05:00
d8936a8307 feat(okd/network_manager): Add get_node_name_for_id and refactor" 2026-02-13 15:49:24 -05:00
e2fa12508f feat: Add k8s client drain node functionality with tests and example 2026-02-13 15:19:58 -05:00
bea2a75882 doc(opnsense): Add note that dnsmasq mac addresses will be droped when
setting static host
2026-02-13 15:18:20 -05:00
a1528665d0 Merge remote-tracking branch 'origin/doc-and-braindump'
All checks were successful
Run Check Script / check (push) Successful in 1m18s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 8m47s
2026-02-12 10:52:38 -05:00
613225a00b chore: push misc formatting and details
Some checks failed
Run Check Script / check (push) Has been cancelled
Compile and package harmony_composer / package_harmony_composer (push) Has been cancelled
2026-02-12 10:50:16 -05:00
dd1c088f0d example: Nats supercluster enable jetstream 2026-02-12 10:42:56 -05:00
b4ef009804 chore: Add node in cargo.toml to replace with serde-saphyr as serde_yaml is deprecated
Some checks failed
Run Check Script / check (push) Successful in 2m17s
Compile and package harmony_composer / package_harmony_composer (push) Has been cancelled
2026-02-12 10:42:16 -05:00
191e92048b feat: git ignore all ignore folders in the project 2026-02-12 10:42:16 -05:00
f4a70d8978 Merge pull request 'feat: integrate-brocade' (#230) from feat/integrate-brocade into master
Some checks failed
Run Check Script / check (push) Has been cancelled
Compile and package harmony_composer / package_harmony_composer (push) Has been cancelled
Reviewed-on: #230
2026-02-12 15:41:15 +00:00
2ddc9c0579 fix:format
All checks were successful
Run Check Script / check (pull_request) Successful in 1m12s
2026-02-12 10:31:06 -05:00
fececc2efd Creating a BrocadeSwitchConfig struct
Some checks failed
Run Check Script / check (pull_request) Failing after 0s
2026-02-09 11:24:29 -05:00
8afcacbd24 feat: integrate brocade 2026-02-09 09:53:16 -05:00
84992b7ada Merge pull request 'fix: use installation_device from host_config in bootstrap_okd_node' (#228) from fix/installation-device-for-nodes-in-byMAC into master
All checks were successful
Run Check Script / check (push) Successful in 1m7s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 8m20s
Reviewed-on: #228
2026-02-05 22:38:00 +00:00
7cd3c8b93d fix: shorthand
All checks were successful
Run Check Script / check (pull_request) Successful in 1m10s
2026-02-05 17:29:10 -05:00
83459eb2a6 fix: satisfy my ocd :)
All checks were successful
Run Check Script / check (pull_request) Successful in 1m8s
2026-02-05 17:04:53 -05:00
d6ddbfa51a fix: use installation_device from host_config in bootstrap_okd_node
All checks were successful
Run Check Script / check (pull_request) Successful in 1m11s
2026-02-05 16:57:14 -05:00
c6ca3d38d1 Merge pull request 'feat/harmony_agent' (#220) from feat/harmony_agent into master
Some checks failed
Run Check Script / check (push) Successful in 1m7s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 9m50s
Reviewed-on: #220
2026-02-04 21:05:34 +00:00
4c79a7628d Merge branch 'master' into feat/harmony_agent
All checks were successful
Run Check Script / check (pull_request) Successful in 1m8s
2026-02-04 16:03:22 -05:00
7ca1a64038 feat: completed harmony_agent implentation for primary and replica agents, fixed a test
All checks were successful
Run Check Script / check (pull_request) Successful in 1m7s
2026-02-04 15:56:40 -05:00
333884a81a Merge pull request 'feat: created decentralized topology, capability nats and nats super cluster' (#221) from feat/nats_capability into master
All checks were successful
Run Check Script / check (push) Successful in 1m8s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 9m48s
Reviewed-on: #221
Reviewed-by: johnride <jg@nationtech.io>
2026-02-04 19:05:50 +00:00
74e6da1a16 Merge branch 'master' into feat/nats_capability
All checks were successful
Run Check Script / check (pull_request) Successful in 1m5s
2026-02-04 14:03:47 -05:00
0372cc3f31 Merge pull request 'fix/nats-isp' (#226) from fix/nats-isp into feat/nats_capability
All checks were successful
Run Check Script / check (pull_request) Successful in 1m5s
Reviewed-on: #226
2026-02-04 18:55:12 +00:00
de14ba6b97 fix(agent): fetch from store returns metadata to allow rebuilding states properly
Some checks failed
Run Check Script / check (pull_request) Failing after 5s
2026-02-04 12:10:33 -05:00
a08c3fb03b wip: save new cluster info state
Some checks failed
Run Check Script / check (pull_request) Failing after 5s
2026-02-04 11:47:11 -05:00
17b3b3b351 test(agent): Wrote first few tests for Primary workflow use cases : initializing to healthy, healthy to failed
Some checks failed
Run Check Script / check (pull_request) Failing after 6s
2026-02-04 09:26:10 -05:00
01a775a01f wip(agent): workflow now return new cluster state when they decide to alter it, primary taking control of current_primary case handled but using wrong ID
Some checks failed
Run Check Script / check (pull_request) Failing after 7s
2026-02-04 07:01:13 -05:00
9c551a0eba fix: Agent can now reload heartbeat info from store
Some checks failed
Run Check Script / check (pull_request) Failing after 10s
2026-02-03 22:12:44 -05:00
a88d67627a chore: Add a note and delete old code
Some checks failed
Run Check Script / check (pull_request) Failing after 6s
2026-02-03 20:46:18 -05:00
5b04cc96d7 wip: we want to initialize to the right seq number after a restart
Some checks failed
Run Check Script / check (pull_request) Failing after 6s
2026-02-03 14:50:03 -05:00
73cda3425f Merge branch 'feat/harmony_agent' of https://git.nationtech.io/NationTech/harmony into feat/harmony_agent 2026-02-03 11:53:27 -05:00
7065e90475 feat: use the role of the agent to define its name 2026-02-03 11:45:03 -05:00
a20919bbda wip: write cluster state to jetstream kv
Some checks failed
Run Check Script / check (pull_request) Failing after 8s
2026-02-03 11:43:22 -05:00
948334b89e wip: cleaning up llm code, pretty close to something comprehensible and robust
Some checks failed
Run Check Script / check (pull_request) Failing after 13s
2026-02-03 06:39:56 -05:00
2a1d489b78 Merge pull request 'feat: support use-swap-file opnsense xml field' (#227) from feat/support-use-swap-file into master
All checks were successful
Run Check Script / check (push) Successful in 1m11s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 9m39s
Reviewed-on: #227
2026-02-02 20:06:33 +00:00
4507504c47 feat: support use-swap-file opnsense xml field
All checks were successful
Run Check Script / check (pull_request) Successful in 1m11s
2026-02-02 14:56:26 -05:00
50aa545bd9 wip(harmony_agent): It compiles, contains most if not all of the required skeleton, now time to review it carefully, complete a few details and battle test it
Some checks failed
Run Check Script / check (pull_request) Failing after 11s
2026-02-01 20:54:11 -05:00
8b200cfe91 chore: removed commented code
All checks were successful
Run Check Script / check (pull_request) Successful in 1m9s
2026-01-30 14:02:52 -05:00
f6ff78a573 Merge branch 'feat/nats_capability' into fix/nats-isp
All checks were successful
Run Check Script / check (pull_request) Successful in 1m8s
2026-01-30 13:50:07 -05:00
329d5d8473 fix: format
All checks were successful
Run Check Script / check (pull_request) Successful in 1m9s
2026-01-30 13:42:01 -05:00
cd81d6584c fix: removed nats implementation details from k8sanywhere, added secret prompt for nats cluster using harmony secret 2026-01-30 13:41:38 -05:00
a0f32bb565 wip: working on separation of concerns 2026-01-30 10:57:22 -05:00
0cff1e0f66 feat: Harmony agent new algorithm based on heartbeat counters basics. Old code will need to be refactored completely
Some checks failed
Run Check Script / check (pull_request) Failing after 11s
2026-01-30 06:58:03 -05:00
29d2d620d1 Merge pull request 'feat: introduced crate tokio-retry to allow multiple attempts to get secret from k8s' (#225) from fix/nats-capability-retry into feat/nats_capability
All checks were successful
Run Check Script / check (pull_request) Successful in 1m14s
Reviewed-on: #225
2026-01-29 20:42:32 +00:00
7df8429181 feat: introduced crate tokio-retry to allow multiple attempts to get secret from k8s
All checks were successful
Run Check Script / check (pull_request) Successful in 1m9s
2026-01-29 15:03:33 -05:00
0358ea5959 Merge pull request 'fix: support DiscoveryStrategy in OKDSetup01InventoryScore' (#224) from fix/support-discovery-strategy-OKDSetup01InventoryScore into master
Some checks failed
Run Check Script / check (push) Successful in 1m14s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m59s
Reviewed-on: #224
2026-01-28 20:48:28 +00:00
eebda0f4aa chore: format
All checks were successful
Run Check Script / check (pull_request) Successful in 1m14s
2026-01-28 15:20:33 -05:00
666a3c0071 fix: modified nats trait and nats supercluster trait to better respect interface segregation
All checks were successful
Run Check Script / check (pull_request) Successful in 1m12s
2026-01-28 15:16:46 -05:00
a8217887f4 Merge branch 'master' into fix/support-discovery-strategy-OKDSetup01InventoryScore
Some checks failed
Run Check Script / check (pull_request) Failing after 13s
2026-01-28 20:16:29 +00:00
edf94554b8 fix: formating and example env variables
All checks were successful
Run Check Script / check (pull_request) Successful in 1m13s
2026-01-28 14:43:50 -05:00
4ea3d7f69c fix: support DiscoveryStrategy in OKDSetup01InventoryScore
Some checks failed
Run Check Script / check (pull_request) Failing after 10s
2026-01-28 14:41:49 -05:00
00d4b9de73 fix: added fullnameOverride to helmchart score values so that the helm deployed cluster name and service name match the cluster name defined in the NatsK8sScore, without this field helm appends -nats to svc and cluster name, breaking the tls chain 2026-01-28 14:25:24 -05:00
cb90788129 Merge pull request 'fix(deps): updating fqdn version as the one currently in use have been yanked' (#223) from fix/update-fqdn-version into master
Some checks failed
Run Check Script / check (push) Successful in 1m35s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 10m47s
Reviewed-on: #223
2026-01-28 19:21:25 +00:00
5ee9643a6c fix(deps): updating fqdn version as the one currently in use have been yanked
All checks were successful
Run Check Script / check (pull_request) Successful in 1m14s
2026-01-28 10:27:10 -05:00
92d4e3488a refactor: modified struct to accept N sites and N clusters 2026-01-28 09:43:43 -05:00
e557270960 Merge pull request 'feat/ask-for-main-disk' (#222) from feat/ask-for-main-disk into master
Some checks failed
Run Check Script / check (push) Successful in 1m59s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 9m32s
Reviewed-on: #222
2026-01-27 17:13:56 +00:00
54320e2ebe chore: refactor PhysicalHost into a tuple with HostConfig, it contains only installation device for now but it saves some mishaps
All checks were successful
Run Check Script / check (pull_request) Successful in 1m13s
2026-01-27 12:11:49 -05:00
fdd5d1b47c in the middle of HostConfig refactor 2026-01-27 12:03:23 -05:00
6ff43f4775 Remove nanodc example 2026-01-27 11:47:25 -05:00
f6b0f321b4 refactor: get_host_for_role -> get_hosts_for_role 2026-01-27 09:46:49 -05:00
619ac99b44 feat: query for installation disk during discovery, store it in host_role_mapping 2026-01-27 09:22:24 -05:00
922dd794d9 feat: run sqlx migrations automatically 2026-01-27 07:47:14 -05:00
8959719375 wip: created decentralized topology, capability nats and nats super cluster
Some checks failed
Run Check Script / check (pull_request) Failing after 17s
2026-01-26 16:21:26 -05:00
9e1095fb9b Merge pull request 'feat/nats' (#207) from feat/nats into master
All checks were successful
Run Check Script / check (push) Successful in 1m16s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m58s
Reviewed-on: #207
2026-01-26 16:32:05 +00:00
4758465b28 Merge remote-tracking branch 'origin/master' into feat/nats
All checks were successful
Run Check Script / check (pull_request) Successful in 1m13s
2026-01-26 11:28:46 -05:00
8ae38399b7 Merge pull request 'feat: use interactive_parse lib to query for secrets attributes values' (#219) from feat/json-attributes-prompt into master
All checks were successful
Run Check Script / check (push) Successful in 1m13s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m33s
Reviewed-on: #219
2026-01-26 15:54:04 +00:00
565bb4afa1 chore: formatting
All checks were successful
Run Check Script / check (pull_request) Successful in 1m15s
2026-01-26 10:50:16 -05:00
25d5aff158 chore: remove useless comments
Some checks failed
Run Check Script / check (pull_request) Failing after 15s
2026-01-26 10:45:48 -05:00
95f860809e chore: remove useless comments
All checks were successful
Run Check Script / check (pull_request) Successful in 1m14s
2026-01-26 10:43:02 -05:00
ce53ae0e04 Merge branch 'master' into feat/json-attributes-prompt
All checks were successful
Run Check Script / check (pull_request) Successful in 1m17s
2026-01-26 15:38:30 +00:00
deca67fd55 feat(backend_app): Deployment now pretty much works to package and deploy an app with an existing Docker image and type-safe helm chart on local k3d, not tested for remote k8s with Argo yet
Some checks failed
Run Check Script / check (pull_request) Failing after 17s
2026-01-25 22:54:14 -05:00
0cc5f505f8 feat(harmony_execution): New crate to contain utils for execution such
as command line
2026-01-25 22:52:29 -05:00
ab68e7309d feat: Use k8s openapi structs as helm chart resources following ADR 018
Some checks failed
Run Check Script / check (pull_request) Failing after 21s
2026-01-23 23:31:37 -05:00
093e0d54c0 Merge pull request 'adr/nats-islands-of-trust' (#209) from adr/nats-islands-of-trust into feat/nats
All checks were successful
Run Check Script / check (pull_request) Successful in 1m16s
Reviewed-on: #209
2026-01-23 20:05:58 +00:00
8657261342 fix: extracted variables, removed uncool side effect
All checks were successful
Run Check Script / check (pull_request) Successful in 1m17s
2026-01-23 15:03:03 -05:00
c20db5b361 doc(adr): New ADR Template hydration for strongly typed workload deployment
Some checks failed
Run Check Script / check (pull_request) Failing after 19s
2026-01-23 11:49:32 -05:00
33476e899e Merge pull request 'fix: modified cert-manager ensure ready to check for existence of pods with labels matching cert-manager in kubernetes env. replaced deprecated olm subscription based install of cert-manager for supported helm-chart' (#218) from fix/cert-manager into master
All checks were successful
Run Check Script / check (push) Successful in 1m17s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m31s
Reviewed-on: #218
2026-01-23 16:28:34 +00:00
d0cd21c322 fix: improved logging and function names for clarity
All checks were successful
Run Check Script / check (pull_request) Successful in 1m17s
2026-01-23 11:26:35 -05:00
b2f0773795 wip: Working on backend app deployment 2026-01-23 09:34:58 -05:00
9c6b780634 feat: use interactive_parse lib to query for secrets attributes values
All checks were successful
Run Check Script / check (pull_request) Successful in 1m20s
2026-01-23 08:55:07 -05:00
3682a0cb5f feat: First draft of harmony_agent project that will synchronize multiple clusters using nats supercluster to communicate 2026-01-23 08:54:40 -05:00
53e1711aef Merge pull request 'feat: Create st-test example, fix a couple new missing xml fields for opnsense, fix bad HostRole' (#217) from feat/st_test into master
All checks were successful
Run Check Script / check (push) Successful in 1m20s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m50s
Reviewed-on: #217
Reviewed-by: wjro <wrolleman@nationtech.io>
2026-01-22 20:59:23 +00:00
f37a8e373a fix: modified cert-manager ensure ready to check for existence of pods with labels matching cert-manager in kubernetes env. replaced deprecated olm subscription based install of cert-manager for supported helm-chart
All checks were successful
Run Check Script / check (pull_request) Successful in 1m20s
2026-01-22 15:56:17 -05:00
c631b3aef9 fix more opnsense stuff, remove installation notes
All checks were successful
Run Check Script / check (pull_request) Successful in 1m19s
2026-01-22 15:54:19 -05:00
3e2d94cff0 adding Cargo.lock
All checks were successful
Run Check Script / check (pull_request) Successful in 1m19s
2026-01-22 14:54:39 -05:00
c9e39d11ad fix: fix opnsense stuff for opnsense 25.1 test file
Some checks failed
Run Check Script / check (pull_request) Failing after 12s
2026-01-22 14:52:29 -05:00
740b5500f2 feat: added poc for deploying nats supercluster with certificates, issuers, and okd routes
All checks were successful
Run Check Script / check (pull_request) Successful in 1m20s
2026-01-22 11:37:10 -05:00
52bff9b6be fix: mod.rs 2026-01-22 11:37:10 -05:00
bc962be31f fix: moved cert management ensure ready to k8sanywhere 2026-01-22 11:37:10 -05:00
f6a20832cf lint: Remove useless variable assignment 2026-01-22 11:37:10 -05:00
a4515d34ae fix: modified score names for better clarity 2026-01-22 11:37:10 -05:00
2b324d7962 fix: modified trait to use other return types, modified trait function name to be ensure ready, use rust CRD definitions rather than constructing gvk for certificateManagement trait function in k8sanywhere 2026-01-22 11:37:10 -05:00
779444699f fix: modified k8sanywhere implentation of get_ca_cert to use the kubernetes certificate name to find its respective secret and ca.crt 2026-01-22 11:37:10 -05:00
865dab2fc1 feat: added fn get_ca_cert to trait certificateManagement 2026-01-22 11:37:10 -05:00
502e544cd3 cargo fmt 2026-01-22 11:37:10 -05:00
4f2a7050f5 feat: added working examples to add self signed issuer and self signed certificate. modified get_resource_json_value to be able to get cluster scoped operators 2026-01-22 11:37:10 -05:00
26256d9945 fix: added create_issuer fn to trait and its implementation is k8sanywhere 2026-01-22 11:37:10 -05:00
947733b240 wip: added scores and basic implentation to create certs and issuers 2026-01-22 11:37:10 -05:00
d3a8171e3c feat(cert-manager): added crds for cert-manager 2026-01-22 11:37:10 -05:00
043cd561e9 feat: added cert manager capability as well as scores to install openshift subscription to community cert-manager operator 2026-01-22 11:37:10 -05:00
5ed14b75ed chore: fix formatting 2026-01-22 08:47:56 -05:00
25a45096f8 doc: adding installation notes file 2026-01-21 16:22:59 -05:00
74252ded5c chore: remove useless brocade stuff
Some checks failed
Run Check Script / check (pull_request) Failing after 24s
2026-01-21 15:12:23 -05:00
0ecadbfb97 chore: remove unused import
Some checks failed
Run Check Script / check (pull_request) Failing after 23s
2026-01-21 15:07:35 -05:00
eb492f3ca9 fix: remove double definition of RUST_LOG in env.sh
Some checks failed
Run Check Script / check (pull_request) Failing after 19m39s
2026-01-21 14:02:58 -05:00
de3c8e9a41 adding data symlink
Some checks failed
Run Check Script / check (pull_request) Failing after 31s
2026-01-21 13:59:22 -05:00
2ef2d9f064 Fix HostRole (ControlPlane -> Worker) in workers score, fix main, add topology.rs 2026-01-21 13:56:28 -05:00
d2d18205e9 fix deps in Cargo.toml, create env.sh file 2026-01-21 13:09:26 -05:00
0b55a6fb53 fix: add new xml fields after updating opnsense 2026-01-20 14:27:28 -05:00
2dc65531c3 Merge pull request 'feat/cert_manager_crds' (#211) from feat/cert_manager_crds into master
Some checks failed
Compile and package harmony_composer / package_harmony_composer (push) Failing after 11m21s
Run Check Script / check (push) Failing after 11m50s
Reviewed-on: #211
2026-01-20 18:46:20 +00:00
1e98100ed4 fix: mod.rs
All checks were successful
Run Check Script / check (pull_request) Successful in 1m21s
2026-01-20 13:43:52 -05:00
ab33aba776 fix: moved cert management ensure ready to k8sanywhere
Some checks failed
Run Check Script / check (pull_request) Failing after 26s
2026-01-20 13:21:24 -05:00
c3ac0bafad lint: Remove useless variable assignment
All checks were successful
Run Check Script / check (pull_request) Successful in 1m27s
2026-01-20 12:11:48 -05:00
54a53fa982 fix: modified score names for better clarity
All checks were successful
Run Check Script / check (pull_request) Successful in 1m29s
2026-01-19 12:48:47 -05:00
731d59c8b0 fix: modified trait to use other return types, modified trait function name to be ensure ready, use rust CRD definitions rather than constructing gvk for certificateManagement trait function in k8sanywhere 2026-01-19 11:37:47 -05:00
001dd5269c add (now commented) line to init env_logger 2026-01-18 10:07:28 -05:00
9978acf16d feat: change staticroutes->route to Option<RawXml> instead of MaybeString 2026-01-18 10:06:15 -05:00
c6642db6fb fix: modified k8sanywhere implentation of get_ca_cert to use the kubernetes certificate name to find its respective secret and ca.crt
All checks were successful
Run Check Script / check (pull_request) Successful in 1m46s
2026-01-16 13:39:10 -05:00
8f111bcb8b feat: added fn get_ca_cert to trait certificateManagement
All checks were successful
Run Check Script / check (pull_request) Successful in 1m40s
2026-01-16 13:16:06 -05:00
ced371ca43 feat: Nats supercluster example working
Some checks failed
Run Check Script / check (pull_request) Failing after 42s
2026-01-16 09:45:59 -05:00
f319f74edf cargo fmt
All checks were successful
Run Check Script / check (pull_request) Successful in 1m46s
2026-01-14 16:19:56 -05:00
f576effeca feat: added working examples to add self signed issuer and self signed certificate. modified get_resource_json_value to be able to get cluster scoped operators 2026-01-14 16:18:59 -05:00
25c5cd84fe fix: added create_issuer fn to trait and its implementation is k8sanywhere
All checks were successful
Run Check Script / check (pull_request) Successful in 1m43s
2026-01-14 14:39:05 -05:00
dc421fa099 wip: added scores and basic implentation to create certs and issuers
Some checks failed
Run Check Script / check (pull_request) Failing after 50s
2026-01-13 15:43:58 -05:00
2153edc68c feat(cert-manager): added crds for cert-manager 2026-01-13 14:05:10 -05:00
949c9a40be feat: added cert manager capability as well as scores to install openshift subscription to community cert-manager operator
All checks were successful
Run Check Script / check (pull_request) Successful in 1m41s
2026-01-13 12:09:56 -05:00
1837623394 adr: 17-1 nats clusters interconnection using islands of trust. mTLS via shared ca-bundle with each cluster distributing its own CA.
Some checks failed
Run Check Script / check (pull_request) Failing after 51s
2026-01-13 10:40:30 -05:00
270b6b87df wip nats supercluster
Some checks failed
Run Check Script / check (pull_request) Failing after 51s
2026-01-09 17:30:51 -05:00
6933280575 feat(helm): refactor helm execution to use topology-specific commands
Some checks failed
Run Check Script / check (pull_request) Failing after 8s
Refactors the `HelmChartInterpret` to move away from the `helm-wrapper-rs` crate in favor of a custom command builder pattern. This allows the `HelmCommand` trait to provide topology-specific configurations, such as `kubeconfig` and `kube-context`, directly to the `helm` CLI.

- Implements `get_helm_command` for `K8sAnywhereTopology` to inject configuration flags.
- Replaces `DefaultHelmExecutor` with a manual `Command` construction in `run_helm_command`.
- Updates `HelmChartInterpret` to pass the topology through to repository and installation logic.
- Cleans up unused imports and removes the temporary `HelmCommand` implementation for `LocalhostTopology`.
2026-01-08 23:42:54 -05:00
77583a1ad1 wip: nats multi cluster, fixing helm command to follow multiple k8s config by providing the helm command from the topology itself, fix cli_logger that can now be initialized multiple times, some more stuff 2026-01-08 16:03:15 -05:00
f7404bed36 wip: initial setup for installing nats helm chart score 2026-01-07 16:14:58 -05:00
9a1aad62c9 Merge pull request 'fix: kubeconfig falls back to .kube if KUBECONFIG env variable is not set' (#205) from fix/kubeconfig into master
All checks were successful
Run Check Script / check (push) Successful in 1m9s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m49s
Reviewed-on: #205
2026-01-07 21:05:49 +00:00
0f9a53c8f6 cargo fmt
All checks were successful
Run Check Script / check (pull_request) Successful in 1m4s
2026-01-07 15:57:56 -05:00
b21829470d Merge pull request 'fix: modified nats box to use image tag non root for use in openshift environment' (#204) from fix/nats_non_root into master
All checks were successful
Run Check Script / check (push) Successful in 1m5s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m36s
Reviewed-on: #204
2026-01-07 20:52:42 +00:00
15f5e14c70 fix: modified nats box to use image tag non root for use in openshift environment
All checks were successful
Run Check Script / check (pull_request) Successful in 1m12s
2026-01-07 15:48:37 -05:00
4dcaf55dc5 fix: kubeconfig falls back to .kube if KUBECONFIG env variable is not set
Some checks failed
Run Check Script / check (pull_request) Failing after 2s
2026-01-07 15:47:08 -05:00
05c6398875 Merge pull request 'fix: added missing functions to impl SwitchClient for unmanagedSwitch' (#203) from fix/brocade_unmaged_switch into master
All checks were successful
Run Check Script / check (push) Successful in 1m3s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m17s
Reviewed-on: #203
2026-01-07 19:22:23 +00:00
ad5abe1748 fix(test): Use a mutex to prevent race conditions between tests relying on KUBECONFIG env var
All checks were successful
Run Check Script / check (pull_request) Successful in 1m0s
2026-01-07 14:21:29 -05:00
4e5a24b07a fix: added missing functions to impl SwitchClient for unmanagedSwitch
Some checks failed
Run Check Script / check (pull_request) Failing after 53s
2026-01-07 13:22:41 -05:00
7f0b77969c Merge pull request 'feat: PostgreSQLScore happy path using cnpg operator' (#200) from feat/postgresqlScore into master
Some checks failed
Run Check Script / check (push) Failing after 4s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 26s
Reviewed-on: #200
Reviewed-by: wjro <wrolleman@nationtech.io>
2026-01-06 23:58:17 +00:00
166af498a0 Merge pull request 'Unmanaged switch client' (#187) from jvtrudel/harmony:feat/unmanaged-switch into master
Some checks failed
Run Check Script / check (push) Failing after 0s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 28s
Reviewed-on: #187
2026-01-06 21:43:18 +00:00
4144633098 Merge pull request 'feat: OPNSense Topology useful to interact with only an opnsense instance.' (#184) from feat/opnsenseTopology into master
Some checks failed
Run Check Script / check (push) Successful in 56s
Compile and package harmony_composer / package_harmony_composer (push) Has been cancelled
Reviewed-on: #184
Reviewed-by: Ian Letourneau <ian@noma.to>
2026-01-06 21:37:33 +00:00
59253a65da Merge remote-tracking branch 'origin/master' into feat/opnsenseTopology
All checks were successful
Run Check Script / check (pull_request) Successful in 56s
2026-01-06 16:37:11 -05:00
16f65efe4f Merge remote-tracking branch 'origin/master' into feat/postgresqlScore
All checks were successful
Run Check Script / check (pull_request) Successful in 56s
2026-01-06 15:54:34 -05:00
07bc59d414 Merge pull request 'feat/cluster_monitoring' (#179) from feat/cluster_monitoring into master
All checks were successful
Run Check Script / check (push) Successful in 1m4s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m16s
Reviewed-on: #179
2026-01-06 20:47:06 +00:00
d5137d5ebc Merge remote-tracking branch 'origin/master' into feat/cluster_monitoring
Some checks failed
Run Check Script / check (pull_request) Failing after 10m33s
2026-01-06 15:43:34 -05:00
f2ca97b3bf Merge pull request 'feat(application): Webapp feature with production dns' (#167) from feat/webappdns into master
All checks were successful
Run Check Script / check (push) Successful in 54s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 7m31s
Reviewed-on: #167
2026-01-06 20:15:28 +00:00
dbfae8539f Merge remote-tracking branch 'origin/master' into feat/webappdns
All checks were successful
Run Check Script / check (pull_request) Successful in 58s
2026-01-06 15:14:19 -05:00
ed61ed1d93 Merge remote-tracking branch 'origin/master' into feat/postgresqlScore
All checks were successful
Run Check Script / check (pull_request) Successful in 55s
2026-01-06 15:10:48 -05:00
9359d43fe1 chore: Fix pr comments, documentation, slight refactor for better apis
All checks were successful
Run Check Script / check (pull_request) Successful in 49s
2026-01-06 15:09:17 -05:00
5935d66407 removed serial_test crate to keep tests running un parallel
All checks were successful
Run Check Script / check (pull_request) Successful in 58s
2026-01-06 15:02:30 -05:00
e026ad4d69 Merge pull request 'adr: draft ADR proposing harmony agent and nats-jetstram for decentralized workload management' (#202) from adr/decentralized-workload-management into master
All checks were successful
Run Check Script / check (push) Successful in 58s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 6m41s
Reviewed-on: #202
Reviewed-by: wjro <wrolleman@nationtech.io>
2026-01-06 19:45:54 +00:00
98f098ffa4 Merge pull request 'feat: implementation for opnsense os-node_exporter' (#173) from feat/install_opnsense_node_exporter into master
All checks were successful
Run Check Script / check (push) Successful in 59s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 6m58s
Reviewed-on: #173
2026-01-06 19:19:34 +00:00
fdf1dfaa30 fix: leave implementers to define their Debug, so removed impl Debug for dyn NodeExporter
All checks were successful
Run Check Script / check (pull_request) Successful in 55s
2026-01-06 14:17:04 -05:00
4f8cd0c1cb Merge remote-tracking branch 'origin/master' into feat/install_opnsense_node_exporter
All checks were successful
Run Check Script / check (pull_request) Successful in 55s
2026-01-06 13:56:48 -05:00
028161000e Merge remote-tracking branch 'origin/master' into feat/postgresqlScore
Some checks failed
Run Check Script / check (pull_request) Failing after 1s
2026-01-06 13:44:50 -05:00
457d3d4546 fix tests, cargo fmt, introduced crate serial_test to allow sequential testing env sensitive tests
All checks were successful
Run Check Script / check (pull_request) Successful in 56s
2026-01-06 13:06:59 -05:00
004b35f08e Merge pull request 'feat/brocade_snmp' (#193) from feat/brocade_snmp into master
Some checks failed
Run Check Script / check (push) Failing after 2s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 27s
Reviewed-on: #193
2026-01-06 16:22:25 +00:00
2b19d8c3e8 fix: changed name to switch_ips for more clarity
All checks were successful
Run Check Script / check (pull_request) Successful in 54s
2026-01-06 10:51:53 -05:00
745479c667 Merge pull request 'doc for removing worker flag from cp on UPI' (#165) from doc/worker-flag into master
All checks were successful
Run Check Script / check (push) Successful in 53s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 9m24s
Reviewed-on: #165
2026-01-06 15:46:13 +00:00
2d89e08877 Merge pull request 'doc to clone and transfer a coreos disk' (#166) from doc/clone into master
Some checks failed
Run Check Script / check (push) Successful in 54s
Compile and package harmony_composer / package_harmony_composer (push) Has been cancelled
Reviewed-on: #166
2026-01-06 15:42:56 +00:00
e5bd866c09 Merge pull request 'feat: cnpg operator score' (#199) from feat/cnpgOperator into master
Some checks failed
Run Check Script / check (push) Has been cancelled
Compile and package harmony_composer / package_harmony_composer (push) Has been cancelled
Reviewed-on: #199
Reviewed-by: wjro <wrolleman@nationtech.io>
2026-01-06 15:41:55 +00:00
0973f76701 Merge pull request 'feat: Introducing FailoverTopology and OperatorHub Catalog Subscription with example' (#196) from feat/multisitePostgreSQL into master
Some checks failed
Run Check Script / check (push) Has been cancelled
Compile and package harmony_composer / package_harmony_composer (push) Has been cancelled
Reviewed-on: #196
Reviewed-by: wjro <wrolleman@nationtech.io>
2026-01-06 15:41:12 +00:00
fd69a2d101 Merge pull request 'feat/rebuild_inventory' (#201) from feat/rebuild_inventory into master
Reviewed-on: #201
Reviewed-by: wjro <wrolleman@nationtech.io>
2026-01-05 20:30:33 +00:00
4d535e192d feat: Add new nats example
Some checks failed
Run Check Script / check (pull_request) Failing after -5s
2025-12-22 16:02:10 -05:00
ef307081d8 chore: slight refactor of postgres public score
Some checks failed
Run Check Script / check (pull_request) Has been cancelled
2025-12-22 09:54:27 -05:00
5cce9f8e74 adr: draft ADR proposing harmony agent and nats-jetstram for decentralized workload management
All checks were successful
Run Check Script / check (pull_request) Successful in 1m31s
2025-12-19 10:12:44 -05:00
07e610c54a fix git merge conflict
All checks were successful
Run Check Script / check (pull_request) Successful in 1m24s
2025-12-17 17:09:32 -05:00
204795a74f feat(failoverPostgres): Its alive! We can now deploy a multisite postgres instance. The public hostname is still hardcoded, we will have to fix that but the rest is good enough
Some checks failed
Run Check Script / check (pull_request) Failing after 36s
2025-12-17 16:43:37 -05:00
03e98a51e3 Merge pull request 'fix: added fields missing for haproxy after most recent update' (#191) from fix/opnsense_update into master
Some checks failed
Run Check Script / check (push) Failing after 12m40s
Reviewed-on: #191
2025-12-17 20:03:49 +00:00
22875fe8f3 fix: updated test xml structures to match with new fields added to opnsense
All checks were successful
Run Check Script / check (pull_request) Successful in 1m32s
2025-12-17 15:00:48 -05:00
66a9a76a6b feat(postgres): Failover postgres example maybe working!? Added FailoverTopology implementations for required capabilities, documented a bit, some more tests, and quite a few utility functions
Some checks failed
Run Check Script / check (pull_request) Failing after 1m49s
2025-12-17 14:35:10 -05:00
440e684b35 feat: Postgresql score based on the postgres capability now. true infrastructure abstraction!
Some checks failed
Run Check Script / check (pull_request) Failing after 33s
2025-12-16 23:35:52 -05:00
b0383454f0 feat(types): Add utility initialization functions for StorageSize such as StorageSize::kb(324)
Some checks failed
Run Check Script / check (pull_request) Failing after 41s
2025-12-16 16:24:53 -05:00
9e8f3ce52f feat(postgres): Postgres Connection Test score now has a script that provides more insight. Not quite working properly but easy to improve at this point.
Some checks failed
Run Check Script / check (pull_request) Failing after 43s
2025-12-16 15:53:54 -05:00
c6f859f973 fix(OPNSense): update fields for haproxyy and opnsense following most recent update and upgrade to opnsense
Some checks failed
Run Check Script / check (pull_request) Failing after 1m25s
2025-12-16 15:31:35 -05:00
bbf28a1a28 Merge branch 'master' into fix/opnsense_update
Some checks failed
Run Check Script / check (pull_request) Failing after 1m21s
2025-12-16 20:00:54 +00:00
c3ec7070ec feat: PostgreSQL public and Connection test score, also moved k8s_anywhere in a folder
Some checks failed
Run Check Script / check (pull_request) Failing after 40s
2025-12-16 14:57:02 -05:00
29821d5e9f feat: TlsPassthroughScore works, improved logging, fixed CRD
Some checks failed
Run Check Script / check (pull_request) Failing after 35s
2025-12-15 19:09:10 -05:00
446e079595 wip: public postgres many fixes and refactoring to have a more cohesive routing management
Some checks failed
Run Check Script / check (pull_request) Failing after 41s
2025-12-15 17:04:30 -05:00
e0da5764fb feat(types): Added Rfc1123 String type, useful for k8s names
Some checks failed
Run Check Script / check (pull_request) Failing after 38s
2025-12-15 12:57:52 -05:00
e9cab92585 feat: Impl TlsRoute for K8sAnywhereTopology 2025-12-14 22:22:09 -05:00
d06bd4dac6 feat: OKD route CRD and OKD specific route score
All checks were successful
Run Check Script / check (pull_request) Successful in 1m30s
2025-12-14 17:05:26 -05:00
142300802d wip: TlsRoute score first version
Some checks failed
Run Check Script / check (pull_request) Failing after 1m11s
2025-12-14 06:19:33 -05:00
2254641f3d fix: Tests, doctests, formatting
All checks were successful
Run Check Script / check (pull_request) Successful in 1m38s
2025-12-13 17:56:53 -05:00
b61e4f9a96 wip: Expose postgres publicly. Created tlsroute capability and postgres implementations
Some checks failed
Run Check Script / check (pull_request) Failing after 41s
2025-12-13 09:47:59 -05:00
2e367d88d4 feat: PostgreSQL score works, added postgresql example, tested on OKD 4.19, added note about incompatible default namespace settings
Some checks failed
Run Check Script / check (pull_request) Failing after 2m37s
2025-12-11 22:54:57 -05:00
9edc42a665 feat: PostgreSQLScore happy path using cnpg operator
Some checks failed
Run Check Script / check (pull_request) Failing after 37s
2025-12-11 14:36:39 -05:00
f242aafebb feat: Subscription for cnpg-operator fixed default values, tested and added to operatorhub example.
All checks were successful
Run Check Script / check (pull_request) Successful in 1m31s
2025-12-11 12:18:28 -05:00
3e14ebd62c feat: cnpg operator score
All checks were successful
Run Check Script / check (pull_request) Successful in 1m36s
2025-12-10 22:55:08 -05:00
1b19638df4 wip(failover): Started implementation of the FailoverTopology with PostgreSQL capability
All checks were successful
Run Check Script / check (pull_request) Successful in 1m32s
This is our first Higher Order Topology (see ADR-015)
2025-12-10 21:15:51 -05:00
d39b1957cd feat(k8s_app): OperatorhubCatalogSourceScore can now install the operatorhub catalogsource on a cluster that already has operator lifecycle manager installed 2025-12-10 16:58:58 -05:00
bfdb11b217 Merge pull request 'feat(OKDInstallation): Implemented bootstrap of okd worker node, added features to allow both control plane and worker node to use the same bootstrap_okd_node score' (#198) from feat/okd-nodes into master
Some checks failed
Run Check Script / check (push) Successful in 1m57s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m59s
Reviewed-on: #198
Reviewed-by: johnride <jg@nationtech.io>
2025-12-10 19:27:51 +00:00
d5fadf4f44 fix: deleted storage node role, fixed erroneous comment, modified score name to be in line with clean code naming conventions, fixed how the OKDNodeInstallationScore is called via OKDSetup03ControlPlaneScore and OKDSetup04WorkersScore
All checks were successful
Run Check Script / check (pull_request) Successful in 1m45s
2025-12-10 14:20:24 -05:00
357ca93d90 wip: FailoverTopology implementation for PostgreSQL on the way! 2025-12-10 13:12:53 -05:00
8103932f23 doc: Initial documentation for the MultisitePostgreSQL module 2025-12-10 13:12:53 -05:00
9617e1cfde Merge pull request 'adr: Higher order topologies' (#197) from adr/015-higher-order-topologies into master
Some checks failed
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m54s
Run Check Script / check (push) Successful in 1m47s
Reviewed-on: #197
2025-12-10 18:12:23 +00:00
50bd5c5bba feat(OKDInstallation): Implemented bootstrap of okd worker node, added features to allow both control plane and worker node to use the same bootstrap_okd_node score
All checks were successful
Run Check Script / check (pull_request) Successful in 1m46s
2025-12-10 12:15:07 -05:00
a953284386 doc: Add note about counter-intuitive behavior of nmstate
Some checks failed
Run Check Script / check (push) Successful in 1m33s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m28s
2025-12-09 23:04:15 -05:00
bfde5f58ed adr: Higher order topologies
All checks were successful
Run Check Script / check (pull_request) Successful in 1m33s
These types of Topologies will orchestrate behavior in regular Topologies.

For example, a FailoverTopology is a Higher Order, it will orchestrate its capabilities between a primary and a replica topology. A great use case for this is a database deployment. The FailoverTopology will deploy both instances, connect them, and the able to execute the appropriate actions to promote de replica to primary and revert back to original state.

Other use cases are ShardedTopology, DecentralizedTopology, etc.
2025-12-09 11:23:30 -05:00
9fbdc72cd0 fix: git ignore
All checks were successful
Run Check Script / check (pull_request) Successful in 1m29s
2025-11-18 08:41:09 -05:00
78e595e696 feat: added alert manager routes to openshift cluster monitoring
All checks were successful
Run Check Script / check (pull_request) Successful in 1m37s
2025-11-17 15:22:43 -05:00
90b89224d8 fix: added K8sName type for strict naming of Kubernetes resources 2025-11-17 15:20:51 -05:00
43a17811cc fix formatting
Some checks failed
Run Check Script / check (pull_request) Failing after 1m49s
2025-11-14 12:53:43 -05:00
93ac89157a feat: added score to enable snmp_server on brocade switch and a working example
All checks were successful
Run Check Script / check (pull_request) Successful in 2m4s
2025-11-14 12:49:00 -05:00
b885c35706 Merge branch 'master' into doc-and-braindump 2025-11-13 23:46:39 +00:00
Ian Letourneau
bb6b4b7f88 docs: New docs structure & rustdoc for HostNetworkConfigScore
All checks were successful
Run Check Script / check (pull_request) Successful in 1m43s
2025-11-13 18:42:26 -05:00
29c82db70d fix: added fields missing for haproxy after most recent update
Some checks failed
Run Check Script / check (pull_request) Failing after 49s
2025-11-12 13:21:55 -05:00
734c9704ab feat: provide an unmanaged switch
Some checks failed
Run Check Script / check (pull_request) Failing after 40s
2025-11-11 13:30:03 -05:00
8ee3f8a4ad chore: Update harmony-inventory-agent binary as some fixes were introduced : port is 25000 now and nbd devices wont make the inventory crash
Some checks failed
Run Check Script / check (pull_request) Failing after 40s
2025-11-11 11:32:42 -05:00
d3634a6313 fix(types): Switch port location failed on port channel interfaces 2025-11-11 09:53:59 -05:00
83c1cc82b6 fix(host_network): remove extra fields from bond config to prevent clashes (#186)
All checks were successful
Run Check Script / check (push) Successful in 1m36s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 8m16s
Also alias `port` to support both `port` and `ports` as per the nmstate spec.

Reviewed-on: #186
2025-11-11 14:12:56 +00:00
a0a8d5277c fix: opnsense definitions more accurate for various resources such as ProxyGeneral, System, StaticMap, Job, etc. Also fixed brocade crate export and some warnings 2025-11-11 09:06:36 -05:00
43b04edbae feat(brocade): Add feature and example to remove port channel and configure switchport 2025-11-10 22:59:37 -05:00
755a4b7749 feat(inventory-agent): Discover algorithm by scanning a subnet of ips, slower than mdns but more reliable and versatile 2025-11-10 22:15:31 -05:00
5953bc58f4 feat: added function to enable snmp-server for brocade switches 2025-11-10 14:57:22 -05:00
51a5afbb6d fix: added some extra details
All checks were successful
Run Check Script / check (pull_request) Successful in 1m4s
2025-11-07 09:04:27 -05:00
66d346a10c fix(host_network): skip configuration for host with only 1 interface/port (#185)
All checks were successful
Run Check Script / check (push) Successful in 1m11s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 8m11s
Reviewed-on: #185
Reviewed-by: johnride <jg@nationtech.io>
2025-11-06 00:07:20 +00:00
06a004a65d refactor(host_network): extract NetworkManager as a reusable component (#183)
Some checks failed
Run Check Script / check (push) Successful in 1m12s
Compile and package harmony_composer / package_harmony_composer (push) Has been cancelled
The NetworkManager logic was implemented directly into the `HaClusterTopology`, which wasn't directly its concern and prevented us from being able to reuse that NetworkManaager implementations in the future for a different Topology.

* Extract a `NetworkManager` trait
* Implement a `OpenShiftNmStateNetworkManager` for `NetworkManager`
* Dynamically instantiate the NetworkManager in the Topology to delegate calls to it

Reviewed-on: #183
Reviewed-by: johnride <jg@nationtech.io>
2025-11-06 00:02:52 +00:00
9d4e6acac0 fix(host_network): retrieve proper hostname and next available bond id (#182)
Some checks failed
Run Check Script / check (push) Successful in 1m9s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m24s
In order to query the current network state `NodeNetworkState` and to apply a `NodeNetworkConfigurationPolicy` for a given node, we first needed to find its hostname. As all we had was the UUID of a node.

We had different options available (e.g. updating the Harmony Inventory Agent to retrieve it, store it in the OKD installation pipeline on assignation, etc.). But for the sake of simplicity and for better flexibility (e.g. being able to run this score on a cluster that wasn't setup with Harmony), the `hostname` was retrieved directly in the cluster by running the equivalent of `kubectl get nodes -o yaml` and matching the nodes with the system UUID.

### Other changes
* Find the next available bond id for a node
* Apply a network config policy for a node (configuring a bond in our case)
* Adjust the CRDs for NMState

Note: to see a quick demo, watch the recording in #183
Reviewed-on: #182
Reviewed-by: johnride <jg@nationtech.io>
2025-11-05 23:38:24 +00:00
759a9287d3 Merge remote-tracking branch 'origin/master' into feat/cluster_monitoring
Some checks failed
Run Check Script / check (pull_request) Failing after 19s
2025-11-05 17:02:10 -05:00
24922321b1 fix: webhook name must be k8s field compliant, add a FIXME note 2025-11-05 16:59:48 -05:00
4ff57062ae Merge pull request 'feat(kube): Convert kube_openapi Resource to DynamicObject' (#180) from feat/kube_convert_dynamic_resource into master
Some checks failed
Run Check Script / check (push) Successful in 1m19s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m23s
Reviewed-on: #180
Reviewed-by: Ian Letourneau <ian@noma.to>
2025-11-05 21:48:32 +00:00
50ce54ea66 Merge pull request 'fix(opnsense-config): mark Interface::enable as optional' (#181) from fix-opnsense-config into master
Some checks failed
Run Check Script / check (push) Successful in 1m12s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m27s
Reviewed-on: #181
2025-11-05 17:13:29 +00:00
7b542c9865 feat: OPNSense Topology useful to interact with only an opnsense instance.
All checks were successful
Run Check Script / check (pull_request) Successful in 1m11s
With this work, no need to initialize a full HAClusterTopology to run
opnsense scores.

Also added an example showing how to use it and perform basic
operations.

Made a video out of it, might publish it at some point!
2025-11-05 10:02:45 -05:00
Ian Letourneau
827a49e56b fix(opnsense-config): mark Interface::enable as optional
All checks were successful
Run Check Script / check (pull_request) Successful in 1m11s
2025-11-04 17:25:30 -05:00
cf84f2cce8 wip: cluster_monitoring almost there, a kink to fix in the yaml handling
All checks were successful
Run Check Script / check (pull_request) Successful in 1m15s
2025-10-29 23:12:34 -04:00
a12d12aa4f feat: example OpenshiftClusterAlertScore
All checks were successful
Run Check Script / check (pull_request) Successful in 1m17s
2025-10-29 17:29:28 -04:00
cefb65933a wip: cluster monitoring score coming along, this simply edits OKD builtin alertmanager instance and adds a receiver 2025-10-29 17:26:21 -04:00
95cfc03518 feat(kube): Utility function to convert kube_openapi Resource to DynamicObject. This will allow initializing resources strongly typed and then bundle various types into a list of DynamicObject
All checks were successful
Run Check Script / check (pull_request) Successful in 1m18s
2025-10-29 17:24:35 -04:00
c2fa4f1869 fix:cargo fmt
All checks were successful
Run Check Script / check (pull_request) Successful in 1m21s
2025-10-29 13:53:58 -04:00
ee278ac817 Merge remote-tracking branch 'origin/master' into feat/install_opnsense_node_exporter
Some checks failed
Run Check Script / check (pull_request) Failing after 25s
2025-10-29 13:49:56 -04:00
09a06f136e Merge remote-tracking branch 'origin/master' into feat/install_opnsense_node_exporter
All checks were successful
Run Check Script / check (pull_request) Successful in 1m21s
2025-10-29 13:42:12 -04:00
5f147fa672 fix: opnsense-config reload_config() returns live config.xml rather than dropping it, allows function is_package_installed() to read live state after package installation rather than old config before installation
All checks were successful
Run Check Script / check (pull_request) Successful in 1m17s
2025-10-29 13:25:37 -04:00
c80ede706b fix(host_network): adjust bond & port-channel configuration (partial) (#175)
Some checks failed
Run Check Script / check (push) Successful in 1m20s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m21s
## Description
* Replace the CatalogSource approach to install the OperatorHub.io catalog by a more simple & straightforward way to install NMState
* Improve logging
* Add report summarizing the host network configuration that was applied (which host, bonds, port-channels)
* Fix command to find next available port channel id

## Extra info
Using the `apply_url` approach to install the NMState operator isn't the best approach: it's harder to maintain and upgrade. But it helps us achieve waht we wanted for now: install the NMState Operator to configure bonds on a host.

The preferred approach, installing an operator from the OperatorHub.io catalog, didn't work for now. We had a timeout error with DeadlineExceeded probably caused by an insufficient CPU/Memory allocation to query such a big catalog, even though we tweaked the RAM allocation (we couldn't find a way to do it for CPU).

Spent too much time on this so we stopped these efforts for now. It would be good to get back to it when we need to install something else from a custom catalog.

Reviewed-on: #175
2025-10-29 17:09:16 +00:00
9ba939bde1 wip: cargo fmt
All checks were successful
Run Check Script / check (pull_request) Successful in 1m16s
2025-10-28 15:45:02 -04:00
44bf21718c wip: example score with impl topolgy for opnsense topology 2025-10-28 14:41:15 -04:00
b2825ec1ef Merge pull request 'feat/impl_installable_crd_prometheus' (#170) from feat/impl_installable_crd_prometheus into master
Some checks failed
Run Check Script / check (push) Successful in 1m25s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m20s
Reviewed-on: #170
2025-10-24 16:42:54 +00:00
609d7acb5d feat: impl clone_box for ScrapeTarget<CRDPrometheus>
All checks were successful
Run Check Script / check (pull_request) Successful in 1m25s
2025-10-24 12:05:54 -04:00
de761cf538 Merge branch 'master' into feat/impl_installable_crd_prometheus 2025-10-24 11:23:56 -04:00
5e1580e5c1 Merge branch 'master' into doc/clone 2025-10-23 19:32:26 +00:00
1802b10ddf fix:translated documentaion notes into English 2025-10-23 15:31:45 -04:00
008b03f979 fix: changed documentation language to english 2025-10-23 14:56:07 -04:00
9f7b90d182 feat(argocd): Can now detect argocd instance when already installed and write crd accordingly. One major caveat though is that crd versions are not managed properly yet
Some checks failed
Run Check Script / check (pull_request) Failing after 39s
2025-10-23 13:12:38 -04:00
dc70266b5a wip: install argocd app depending on how argocd is already installed in the cluster 2025-10-23 13:11:39 -04:00
8fb755cda1 wip: argocd discovery 2025-10-23 13:10:35 -04:00
cb7a64b160 feat: Support tls enabled by default on rust web app 2025-10-23 13:10:35 -04:00
afdd511a6d feat(application): Webapp feature with production dns 2025-10-23 13:10:35 -04:00
c069207f12 Merge pull request 'refactor(ha_cluster): inject switch client for better testability' (#174) from switch-client into master
Some checks failed
Run Check Script / check (push) Successful in 1m44s
Compile and package harmony_composer / package_harmony_composer (push) Failing after 2m43s
Reviewed-on: #174
2025-10-23 15:05:17 +00:00
Ian Letourneau
7368184917 fix(ha_cluster): inject switch client for better testability
All checks were successful
Run Check Script / check (pull_request) Successful in 1m30s
2025-10-22 15:12:53 -04:00
5ab58f0253 fix: added impl node exporter for hacluster topology and dummy infra
All checks were successful
Run Check Script / check (pull_request) Successful in 1m26s
2025-10-22 14:39:12 -04:00
5af13800b7 fix: removed unimplemnted marco and returned Err instead
Some checks failed
Run Check Script / check (pull_request) Failing after 29s
some formatting error
2025-10-22 11:51:22 -04:00
05205f4ac1 Merge pull request 'feat: scrape targets to be able to get snmp alerts from machines to prometheus' (#171) from feat/scrape_target into master
Some checks are pending
Run Check Script / check (push) Waiting to run
Compile and package harmony_composer / package_harmony_composer (push) Waiting to run
Reviewed-on: #171
2025-10-22 15:33:24 +00:00
3174645c97 Merge branch 'master' into feat/scrape_target
All checks were successful
Run Check Script / check (pull_request) Successful in 1m32s
2025-10-22 15:33:01 +00:00
8126b233d8 feat: implementation for opnsense os-node_exporter
Some checks failed
Run Check Script / check (pull_request) Failing after 41s
2025-10-22 11:27:28 -04:00
ce91ee0168 fix: removed dead code, mapped error from grafana operator to preparation error rather than ignoring it, modified k8sprometheus score to unwrap_or_default() service monitors
Some checks failed
Run Check Script / check (pull_request) Has been cancelled
2025-10-20 15:31:06 -04:00
cb66b7592e fix: made targets plural and changed scrape targets to option in AlertingInterpret
Some checks failed
Run Check Script / check (pull_request) Has been cancelled
2025-10-20 14:44:37 -04:00
a815f6ac9c feat: scrape targets to be able to get snmp alerts from machines to prometheus
Some checks failed
Run Check Script / check (pull_request) Has been cancelled
2025-10-20 11:44:11 -04:00
c0d54a4466 Merge remote-tracking branch 'origin/master' into feat/impl_installable_crd_prometheus
Some checks failed
Run Check Script / check (pull_request) Has been cancelled
2025-10-16 14:17:32 -04:00
fc384599a1 feat: implementation of Installable for CRDPrometheusIntroduction of Grafana trait and its impl for k8sanywhereallows for CRDPrometheus to be installed via AlertingInterpret which standardizes the installation of alert receivers, alerting rules, and alert senders 2025-10-16 14:07:23 -04:00
7dff70edcf wip: fixed token expiration and configured grafana dashboard 2025-10-15 15:26:36 -04:00
06a0c44c3c wip: connected the thanos-datasource to grafana, need to complete connecting the openshift-userworkload-monitoring as well 2025-10-14 15:53:42 -04:00
85bec66e58 wip: fixing grafana datasource for openshift which requires creating a token, sa, secret and inserting them into the grafanadatasource 2025-10-10 12:09:26 -04:00
e5eb7fde9f doc to clone and transfer a coreos disk
All checks were successful
Run Check Script / check (pull_request) Successful in 1m11s
2025-10-09 15:29:09 -04:00
dd3f07e5b7 doc for removing worker flag from cp on UPI
All checks were successful
Run Check Script / check (pull_request) Successful in 1m13s
2025-10-09 15:28:42 -04:00
1f3796f503 refactor(prometheus): modified crd prometheus to impl the installable trait 2025-10-09 12:26:05 -04:00
58b6268989 wip: moving the install steps for grafana and prometheus into the trait installable<T> 2025-09-29 10:46:29 -04:00
320 changed files with 25639 additions and 4843 deletions

View File

@@ -1,2 +1,6 @@
target/
Dockerfile
Dockerfile
.git
data
target
demos

5
.gitignore vendored
View File

@@ -24,3 +24,8 @@ Cargo.lock
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
.harmony_generated
# Useful to create ignore folders for temp files and notes
ignore

View File

@@ -0,0 +1,26 @@
{
"db_name": "SQLite",
"query": "SELECT host_id, installation_device FROM host_role_mapping WHERE role = ?",
"describe": {
"columns": [
{
"name": "host_id",
"ordinal": 0,
"type_info": "Text"
},
{
"name": "installation_device",
"ordinal": 1,
"type_info": "Text"
}
],
"parameters": {
"Right": 1
},
"nullable": [
false,
true
]
},
"hash": "24f719d57144ecf4daa55f0aa5836c165872d70164401c0388e8d625f1b72d7b"
}

View File

@@ -1,20 +0,0 @@
{
"db_name": "SQLite",
"query": "SELECT host_id FROM host_role_mapping WHERE role = ?",
"describe": {
"columns": [
{
"name": "host_id",
"ordinal": 0,
"type_info": "Text"
}
],
"parameters": {
"Right": 1
},
"nullable": [
false
]
},
"hash": "2ea29df2326f7c84bd4100ad510a3fd4878dc2e217dc83f9bf45a402dfd62a91"
}

View File

@@ -1,12 +1,12 @@
{
"db_name": "SQLite",
"query": "\n INSERT INTO host_role_mapping (host_id, role)\n VALUES (?, ?)\n ",
"query": "\n INSERT INTO host_role_mapping (host_id, role, installation_device)\n VALUES (?, ?, ?)\n ",
"describe": {
"columns": [],
"parameters": {
"Right": 2
"Right": 3
},
"nullable": []
},
"hash": "df7a7c9cfdd0972e2e0ce7ea444ba8bc9d708a4fb89d5593a0be2bbebde62aff"
"hash": "6fcc29cfdbdf3b2cee94a4844e227f09b245dd8f079832a9a7b774151cb03af6"
}

599
Cargo.lock generated
View File

@@ -243,7 +243,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if",
"const-random",
"getrandom 0.3.3",
"getrandom 0.3.4",
"once_cell",
"version_check",
"zerocopy",
@@ -450,6 +450,43 @@ dependencies = [
"pin-project-lite",
]
[[package]]
name = "async-nats"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86dde77d8a733a9dbaf865a9eb65c72e09c88f3d14d3dd0d2aecf511920ee4fe"
dependencies = [
"base64 0.22.1",
"bytes",
"futures-util",
"memchr",
"nkeys",
"nuid",
"once_cell",
"pin-project",
"portable-atomic",
"rand 0.8.5",
"regex",
"ring",
"rustls-native-certs 0.7.3",
"rustls-pemfile 2.2.0",
"rustls-webpki 0.102.8",
"serde",
"serde_json",
"serde_nanos",
"serde_repr",
"thiserror 1.0.69",
"time",
"tokio",
"tokio-rustls 0.26.2",
"tokio-stream",
"tokio-util",
"tokio-websockets",
"tracing",
"tryhard",
"url",
]
[[package]]
name = "async-stream"
version = "0.3.6"
@@ -686,10 +723,46 @@ dependencies = [
"regex",
"russh",
"russh-keys",
"schemars 0.8.22",
"serde",
"tokio",
]
[[package]]
name = "brocade-snmp-server"
version = "0.1.0"
dependencies = [
"base64 0.22.1",
"brocade",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_secret",
"harmony_types",
"log",
"serde",
"tokio",
"url",
]
[[package]]
name = "brocade-switch"
version = "0.1.0"
dependencies = [
"async-trait",
"brocade",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"log",
"serde",
"tokio",
"url",
]
[[package]]
name = "brotli"
version = "8.0.2"
@@ -739,6 +812,9 @@ name = "bytes"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
dependencies = [
"serde",
]
[[package]]
name = "bytestring"
@@ -840,6 +916,22 @@ dependencies = [
"shlex",
]
[[package]]
name = "cert_manager"
version = "0.1.0"
dependencies = [
"assert_cmd",
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"log",
"tokio",
"url",
]
[[package]]
name = "cfg-if"
version = "1.0.3"
@@ -1170,6 +1262,22 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "crossterm"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a84cda67535339806297f1b331d6dd6320470d2a0fe65381e79ee9e156dd3d13"
dependencies = [
"bitflags 1.3.2",
"crossterm_winapi",
"libc",
"mio 0.8.11",
"parking_lot",
"signal-hook",
"signal-hook-mio",
"winapi 0.3.9",
]
[[package]]
name = "crossterm"
version = "0.28.1"
@@ -1515,6 +1623,7 @@ dependencies = [
"rand_core 0.6.4",
"serde",
"sha2",
"signature",
"subtle",
"zeroize",
]
@@ -1719,6 +1828,40 @@ dependencies = [
"url",
]
[[package]]
name = "example-k8s-drain-node"
version = "0.1.0"
dependencies = [
"assert_cmd",
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"inquire 0.7.5",
"log",
"tokio",
"url",
]
[[package]]
name = "example-k8s-write-file-on-node"
version = "0.1.0"
dependencies = [
"assert_cmd",
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"inquire 0.7.5",
"log",
"tokio",
"url",
]
[[package]]
name = "example-kube-rs"
version = "0.1.0"
@@ -1728,7 +1871,7 @@ dependencies = [
"harmony",
"harmony_macros",
"http 1.3.1",
"inquire",
"inquire 0.7.5",
"k8s-openapi",
"kube",
"log",
@@ -1777,21 +1920,67 @@ dependencies = [
]
[[package]]
name = "example-nanodc"
name = "example-multisite-postgres"
version = "0.1.0"
dependencies = [
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_secret",
"harmony_tui",
"harmony_types",
"log",
"tokio",
"url",
]
[[package]]
name = "example-nats"
version = "0.1.0"
dependencies = [
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"log",
"tokio",
"url",
]
[[package]]
name = "example-nats-module-supercluster"
version = "0.1.0"
dependencies = [
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"k8s-openapi",
"log",
"tokio",
"url",
]
[[package]]
name = "example-nats-supercluster"
version = "0.1.0"
dependencies = [
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"k8s-openapi",
"log",
"tokio",
"url",
]
[[package]]
name = "example-ntfy"
version = "0.1.0"
@@ -1803,9 +1992,10 @@ dependencies = [
]
[[package]]
name = "example-okd-install"
name = "example-okd-cluster-alerts"
version = "0.1.0"
dependencies = [
"brocade",
"cidr",
"env_logger",
"harmony",
@@ -1820,6 +2010,26 @@ dependencies = [
"url",
]
[[package]]
name = "example-okd-install"
version = "0.1.0"
dependencies = [
"brocade",
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_secret",
"harmony_secret_derive",
"harmony_types",
"log",
"schemars 0.8.22",
"serde",
"tokio",
"url",
]
[[package]]
name = "example-openbao"
version = "0.1.0"
@@ -1833,14 +2043,14 @@ dependencies = [
]
[[package]]
name = "example-opnsense"
name = "example-operatorhub-catalogsource"
version = "0.1.0"
dependencies = [
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_tui",
"harmony_types",
"log",
"tokio",
@@ -1848,9 +2058,29 @@ dependencies = [
]
[[package]]
name = "example-pxe"
name = "example-opnsense"
version = "0.1.0"
dependencies = [
"brocade",
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_secret",
"harmony_types",
"log",
"schemars 0.8.22",
"serde",
"tokio",
"url",
]
[[package]]
name = "example-opnsense-node-exporter"
version = "0.1.0"
dependencies = [
"async-trait",
"cidr",
"env_logger",
"harmony",
@@ -1865,6 +2095,65 @@ dependencies = [
"url",
]
[[package]]
name = "example-postgresql"
version = "0.1.0"
dependencies = [
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"log",
"tokio",
"url",
]
[[package]]
name = "example-public-postgres"
version = "0.1.0"
dependencies = [
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"log",
"tokio",
"url",
]
[[package]]
name = "example-pxe"
version = "0.1.0"
dependencies = [
"brocade",
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_secret",
"harmony_secret_derive",
"harmony_types",
"log",
"schemars 0.8.22",
"serde",
"tokio",
"url",
]
[[package]]
name = "example-remove-rook-osd"
version = "0.1.0"
dependencies = [
"harmony",
"harmony_cli",
"tokio",
]
[[package]]
name = "example-rust"
version = "0.1.0"
@@ -1918,8 +2207,6 @@ dependencies = [
"env_logger",
"harmony",
"harmony_macros",
"harmony_secret",
"harmony_secret_derive",
"harmony_tui",
"harmony_types",
"log",
@@ -2064,9 +2351,9 @@ dependencies = [
[[package]]
name = "fqdn"
version = "0.4.6"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0f5d7f7b3eed2f771fc7f6fcb651f9560d7b0c483d75876082acb4649d266b3"
checksum = "886ac788f62d16d6b0f26b2fa762b34ef16ebfb4b624c2c15fbcadc9173c0f72"
dependencies = [
"punycode",
"serde",
@@ -2244,21 +2531,21 @@ dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi 0.11.1+wasi-snapshot-preview1",
"wasi",
"wasm-bindgen",
]
[[package]]
name = "getrandom"
version = "0.3.3"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"r-efi",
"wasi 0.14.3+wasi-0.2.4",
"wasip2",
"wasm-bindgen",
]
@@ -2360,6 +2647,7 @@ dependencies = [
"env_logger",
"fqdn",
"futures-util",
"harmony_execution",
"harmony_inventory_agent",
"harmony_macros",
"harmony_secret",
@@ -2368,7 +2656,7 @@ dependencies = [
"helm-wrapper-rs",
"hex",
"http 1.3.1",
"inquire",
"inquire 0.7.5",
"k3d-rs",
"k8s-openapi",
"kube",
@@ -2400,12 +2688,50 @@ dependencies = [
"tempfile",
"thiserror 2.0.16",
"tokio",
"tokio-retry",
"tokio-util",
"url",
"uuid",
"walkdir",
]
[[package]]
name = "harmony_agent"
version = "0.1.0"
dependencies = [
"async-nats",
"async-trait",
"cidr",
"env_logger",
"getrandom 0.3.4",
"harmony",
"harmony_macros",
"harmony_types",
"log",
"pretty_assertions",
"serde",
"serde_json",
"thiserror 2.0.16",
"tokio",
]
[[package]]
name = "harmony_agent_deploy"
version = "0.1.0"
dependencies = [
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"log",
"serde",
"serde_json",
"tokio",
"url",
]
[[package]]
name = "harmony_cli"
version = "0.1.0"
@@ -2419,7 +2745,7 @@ dependencies = [
"harmony_tui",
"indicatif",
"indicatif-log-bridge",
"inquire",
"inquire 0.7.5",
"lazy_static",
"log",
"tokio",
@@ -2446,6 +2772,16 @@ dependencies = [
"tokio",
]
[[package]]
name = "harmony_execution"
version = "0.1.0"
dependencies = [
"directories",
"lazy_static",
"log",
"thiserror 2.0.16",
]
[[package]]
name = "harmony_inventory_agent"
version = "0.1.0"
@@ -2465,6 +2801,19 @@ dependencies = [
"tokio",
]
[[package]]
name = "harmony_inventory_builder"
version = "0.1.0"
dependencies = [
"cidr",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"tokio",
"url",
]
[[package]]
name = "harmony_macros"
version = "0.1.0"
@@ -2487,10 +2836,12 @@ dependencies = [
"harmony_secret_derive",
"http 1.3.1",
"infisical",
"inquire",
"inquire 0.7.5",
"interactive-parse",
"lazy_static",
"log",
"pretty_assertions",
"schemars 0.8.22",
"serde",
"serde_json",
"tempfile",
@@ -2530,8 +2881,10 @@ dependencies = [
name = "harmony_types"
version = "0.1.0"
dependencies = [
"log",
"rand 0.9.2",
"serde",
"serde_json",
"url",
]
@@ -3153,6 +3506,22 @@ dependencies = [
"generic-array",
]
[[package]]
name = "inquire"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c33e7c1ddeb15c9abcbfef6029d8e29f69b52b6d6c891031b88ed91b5065803b"
dependencies = [
"bitflags 1.3.2",
"crossterm 0.25.0",
"dyn-clone",
"lazy_static",
"newline-converter 0.2.2",
"thiserror 1.0.69",
"unicode-segmentation",
"unicode-width 0.1.14",
]
[[package]]
name = "inquire"
version = "0.7.5"
@@ -3164,7 +3533,7 @@ dependencies = [
"dyn-clone",
"fuzzy-matcher",
"fxhash",
"newline-converter",
"newline-converter 0.3.0",
"once_cell",
"unicode-segmentation",
"unicode-width 0.1.14",
@@ -3183,6 +3552,22 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "interactive-parse"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82c6684d66c9fd6b51cafbf2a9105583d5046dd4c6363f31745686f503a285e8"
dependencies = [
"crossterm 0.26.1",
"inquire 0.6.2",
"lazy_static",
"log",
"schemars 0.8.22",
"serde",
"serde_json",
"thiserror 1.0.69",
]
[[package]]
name = "io-uring"
version = "0.7.10"
@@ -3261,7 +3646,7 @@ version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [
"getrandom 0.3.3",
"getrandom 0.3.4",
"libc",
]
@@ -3701,7 +4086,7 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"log",
"wasi 0.11.1+wasi-snapshot-preview1",
"wasi",
"windows-sys 0.48.0",
]
@@ -3713,7 +4098,7 @@ checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
dependencies = [
"libc",
"log",
"wasi 0.11.1+wasi-snapshot-preview1",
"wasi",
"windows-sys 0.59.0",
]
@@ -3742,6 +4127,15 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "newline-converter"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f71d09d5c87634207f894c6b31b6a2b2c64ea3bdcf71bd5599fdbbe1600c00f"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "newline-converter"
version = "0.3.0"
@@ -3751,6 +4145,21 @@ dependencies = [
"unicode-segmentation",
]
[[package]]
name = "nkeys"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879011babc47a1c7fdf5a935ae3cfe94f34645ca0cac1c7f6424b36fc743d1bf"
dependencies = [
"data-encoding",
"ed25519",
"ed25519-dalek",
"getrandom 0.2.16",
"log",
"rand 0.8.5",
"signatory",
]
[[package]]
name = "non-blank-string-rs"
version = "1.0.4"
@@ -3769,6 +4178,15 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "nuid"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc895af95856f929163a0aa20c26a78d26bfdc839f51b9d5aa7a5b79e52b7e83"
dependencies = [
"rand 0.8.5",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
@@ -4389,7 +4807,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
dependencies = [
"bytes",
"getrandom 0.3.3",
"getrandom 0.3.4",
"lru-slab",
"rand 0.9.2",
"ring",
@@ -4494,7 +4912,7 @@ version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
dependencies = [
"getrandom 0.3.3",
"getrandom 0.3.4",
]
[[package]]
@@ -4613,15 +5031,6 @@ version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
[[package]]
name = "remove_rook_osd"
version = "0.1.0"
dependencies = [
"harmony",
"harmony_cli",
"tokio",
]
[[package]]
name = "reqwest"
version = "0.11.27"
@@ -5039,6 +5448,16 @@ dependencies = [
"untrusted",
]
[[package]]
name = "rustls-webpki"
version = "0.102.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9"
dependencies = [
"rustls-pki-types",
"untrusted",
]
[[package]]
name = "rustls-webpki"
version = "0.103.4"
@@ -5302,6 +5721,15 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_nanos"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a93142f0367a4cc53ae0fead1bcda39e85beccfad3dcd717656cacab94b12985"
dependencies = [
"serde",
]
[[package]]
name = "serde_path_to_error"
version = "0.1.17"
@@ -5469,6 +5897,18 @@ dependencies = [
"libc",
]
[[package]]
name = "signatory"
version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1e303f8205714074f6068773f0e29527e0453937fe837c9717d066635b65f31"
dependencies = [
"pkcs8",
"rand_core 0.6.4",
"signature",
"zeroize",
]
[[package]]
name = "signature"
version = "2.2.0"
@@ -5894,6 +6334,26 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "sttest"
version = "0.1.0"
dependencies = [
"brocade",
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_secret",
"harmony_secret_derive",
"harmony_types",
"log",
"schemars 0.8.22",
"serde",
"tokio",
"url",
]
[[package]]
name = "subtle"
version = "2.6.1"
@@ -6032,7 +6492,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e"
dependencies = [
"fastrand",
"getrandom 0.3.3",
"getrandom 0.3.4",
"once_cell",
"rustix 1.0.8",
"windows-sys 0.60.2",
@@ -6188,6 +6648,17 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "tokio-retry"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f"
dependencies = [
"pin-project",
"rand 0.8.5",
"tokio",
]
[[package]]
name = "tokio-rustls"
version = "0.24.1"
@@ -6245,6 +6716,27 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-websockets"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d"
dependencies = [
"base64 0.22.1",
"bytes",
"futures-core",
"futures-sink",
"http 1.3.1",
"httparse",
"rand 0.8.5",
"ring",
"rustls-pki-types",
"tokio",
"tokio-rustls 0.26.2",
"tokio-util",
"webpki-roots 0.26.11",
]
[[package]]
name = "toml"
version = "0.8.23"
@@ -6396,6 +6888,16 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "tryhard"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fe58ebd5edd976e0fe0f8a14d2a04b7c81ef153ea9a54eebc42e67c2c23b4e5"
dependencies = [
"pin-project-lite",
"tokio",
]
[[package]]
name = "tui-logger"
version = "0.14.5"
@@ -6572,7 +7074,7 @@ version = "1.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
dependencies = [
"getrandom 0.3.3",
"getrandom 0.3.4",
"js-sys",
"rand 0.9.2",
"uuid-macro-internal",
@@ -6643,10 +7145,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasi"
version = "0.14.3+wasi-0.2.4"
name = "wasip2"
version = "1.0.2+wasi-0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a51ae83037bdd272a9e28ce236db8c07016dd0d50c27038b3f407533c030c95"
checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
dependencies = [
"wit-bindgen",
]
@@ -6768,6 +7270,15 @@ version = "0.25.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1"
[[package]]
name = "webpki-roots"
version = "0.26.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
dependencies = [
"webpki-roots 1.0.2",
]
[[package]]
name = "webpki-roots"
version = "1.0.2"
@@ -7145,9 +7656,9 @@ dependencies = [
[[package]]
name = "wit-bindgen"
version = "0.45.0"
version = "0.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "052283831dbae3d879dc7f51f3d92703a316ca49f91540417d38591826127814"
checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
[[package]]
name = "writeable"
@@ -7189,7 +7700,7 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "yaserde"
version = "0.12.0"
source = "git+https://github.com/jggc/yaserde.git#adfdb1c5f4d054f114e5bd0ea7bda9c07a369def"
source = "git+https://github.com/jggc/yaserde.git#2eacb304113beee7270a10b81046d40ed3a99550"
dependencies = [
"log",
"xml-rs",
@@ -7198,7 +7709,7 @@ dependencies = [
[[package]]
name = "yaserde_derive"
version = "0.12.0"
source = "git+https://github.com/jggc/yaserde.git#adfdb1c5f4d054f114e5bd0ea7bda9c07a369def"
source = "git+https://github.com/jggc/yaserde.git#2eacb304113beee7270a10b81046d40ed3a99550"
dependencies = [
"heck",
"log",

View File

@@ -7,6 +7,7 @@ members = [
"harmony_types",
"harmony_macros",
"harmony_tui",
"harmony_execution",
"opnsense-config",
"opnsense-config-xml",
"harmony_cli",
@@ -17,6 +18,8 @@ members = [
"harmony_secret",
"adr/agent_discovery/mdns",
"brocade",
"harmony_agent",
"harmony_agent/deploy",
]
[workspace.package]
@@ -49,6 +52,7 @@ kube = { version = "1.1.0", features = [
"jsonpatch",
] }
k8s-openapi = { version = "0.25", features = ["v1_30"] }
# TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml
serde_yaml = "0.9"
serde-value = "0.7"
http = "1.2"

View File

@@ -1,4 +1,8 @@
# Harmony : Open-source infrastructure orchestration that treats your platform like first-class code
# Harmony
Open-source infrastructure orchestration that treats your platform like first-class code.
In other words, Harmony is a **next-generation platform engineering framework**.
_By [NationTech](https://nationtech.io)_
@@ -18,9 +22,7 @@ All in **one strongly-typed Rust codebase**.
From a **developer laptop** to a **global production cluster**, a single **source of truth** drives the **full software lifecycle.**
---
## 1 · The Harmony Philosophy
## The Harmony Philosophy
Infrastructure is essential, but it shouldnt be your core business. Harmony is built on three guiding principles that make modern platforms reliable, repeatable, and easy to reason about.
@@ -32,9 +34,18 @@ Infrastructure is essential, but it shouldnt be your core business. Harmony i
These principles surface as simple, ergonomic Rust APIs that let teams focus on their product while trusting the platform underneath.
---
## Where to Start
## 2 · Quick Start
We have a comprehensive set of documentation right here in the repository.
| I want to... | Start Here |
| ----------------- | ------------------------------------------------------------------ |
| Get Started | [Getting Started Guide](./docs/guides/getting-started.md) |
| See an Example | [Use Case: Deploy a Rust Web App](./docs/use-cases/rust-webapp.md) |
| Explore | [Documentation Hub](./docs/README.md) |
| See Core Concepts | [Core Concepts Explained](./docs/concepts.md) |
## Quick Look: Deploy a Rust Webapp
The snippet below spins up a complete **production-grade Rust + Leptos Webapp** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
@@ -92,63 +103,33 @@ async fn main() {
}
```
Run it:
To run this:
```bash
cargo run
```
- Clone the repository: `git clone https://git.nationtech.io/nationtech/harmony`
- Install dependencies: `cargo build --release`
- Run the example: `cargo run --example try_rust_webapp`
Harmony analyses the code, shows an execution plan in a TUI, and applies it once you confirm. Same code, same binary—every environment.
## Documentation
---
All documentation is in the `/docs` directory.
## 3 · Core Concepts
- [Documentation Hub](./docs/README.md): The main entry point for all documentation.
- [Core Concepts](./docs/concepts.md): A detailed look at Score, Topology, Capability, Inventory, and Interpret.
- [Component Catalogs](./docs/catalogs/README.md): Discover all available Scores, Topologies, and Capabilities.
- [Developer Guide](./docs/guides/developer-guide.md): Learn how to write your own Scores and Topologies.
| Term | One-liner |
| ---------------- | ---------------------------------------------------------------------------------------------------- |
| **Score<T>** | Declarative description of the desired state (e.g., `LAMPScore`). |
| **Interpret<T>** | Imperative logic that realises a `Score` on a specific environment. |
| **Topology** | An environment (local k3d, AWS, bare-metal) exposing verified _Capabilities_ (Kubernetes, DNS, …). |
| **Maestro** | Orchestrator that compiles Scores + Topology, ensuring all capabilities line up **at compile-time**. |
| **Inventory** | Optional catalogue of physical assets for bare-metal and edge deployments. |
## Architectural Decision Records
A visual overview is in the diagram below.
- [ADR-001 · Why Rust](adr/001-rust.md)
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
- [ADR-006 · Secret Management](adr/006-secret-management.md)
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
[Harmony Core Architecture](docs/diagrams/Harmony_Core_Architecture.drawio.svg)
## Contribute
---
Discussions and roadmap live in [Issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
## 4 · Install
Prerequisites:
- Rust
- Docker (if you deploy locally)
- `kubectl` / `helm` for Kubernetes-based topologies
```bash
git clone https://git.nationtech.io/nationtech/harmony
cd harmony
cargo build --release # builds the CLI, TUI and libraries
```
---
## 5 · Learning More
- **Architectural Decision Records** dive into the rationale
- [ADR-001 · Why Rust](adr/001-rust.md)
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
- [ADR-006 · Secret Management](adr/006-secret-management.md)
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
- **Extending Harmony** write new Scores / Interprets, add hardware like OPNsense firewalls, or embed Harmony in your own tooling (`/docs`).
- **Community** discussions and roadmap live in [GitLab issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
---
## 6 · License
## License
Harmony is released under the **GNU AGPL v3**.

View File

@@ -0,0 +1,114 @@
# Architecture Decision Record: Higher-Order Topologies
**Initial Author:** Jean-Gabriel Gill-Couture
**Initial Date:** 2025-12-08
**Last Updated Date:** 2025-12-08
## Status
Implemented
## Context
Harmony models infrastructure as **Topologies** (deployment targets like `K8sAnywhereTopology`, `LinuxHostTopology`) implementing **Capabilities** (tech traits like `PostgreSQL`, `Docker`).
**Higher-Order Topologies** (e.g., `FailoverTopology<T>`) compose/orchestrate capabilities *across* multiple underlying topologies (e.g., primary+replica `T`).
Naive design requires manual `impl Capability for HigherOrderTopology<T>` *per T per capability*, causing:
- **Impl explosion**: N topologies × M capabilities = N×M boilerplate.
- **ISP violation**: Topologies forced to impl unrelated capabilities.
- **Maintenance hell**: New topology needs impls for *all* orchestrated capabilities; new capability needs impls for *all* topologies/higher-order.
- **Barrier to extension**: Users can't easily add topologies without todos/panics.
This makes scaling Harmony impractical as ecosystem grows.
## Decision
Use **blanket trait impls** on higher-order topologies to *automatically* derive orchestration:
````rust
/// Higher-Order Topology: Orchestrates capabilities across sub-topologies.
pub struct FailoverTopology<T> {
/// Primary sub-topology.
primary: T,
/// Replica sub-topology.
replica: T,
}
/// Automatically provides PostgreSQL failover for *any* `T: PostgreSQL`.
/// Delegates to primary for queries; orchestrates deploy across both.
#[async_trait]
impl<T: PostgreSQL> PostgreSQL for FailoverTopology<T> {
async fn deploy(&self, config: &PostgreSQLConfig) -> Result<String, String> {
// Deploy primary; extract certs/endpoint;
// deploy replica with pg_basebackup + TLS passthrough.
// (Full impl logged/elaborated.)
}
// Delegate queries to primary.
async fn get_replication_certs(&self, cluster_name: &str) -> Result<ReplicationCerts, String> {
self.primary.get_replication_certs(cluster_name).await
}
// ...
}
/// Similarly for other capabilities.
#[async_trait]
impl<T: Docker> Docker for FailoverTopology<T> {
// Failover Docker orchestration.
}
````
**Key properties:**
- **Auto-derivation**: `Failover<K8sAnywhere>` gets `PostgreSQL` iff `K8sAnywhere: PostgreSQL`.
- **No boilerplate**: One blanket impl per capability *per higher-order type*.
## Rationale
- **Composition via generics**: Rust trait solver auto-selects impls; zero runtime cost.
- **Compile-time safety**: Missing `T: Capability` → compile error (no panics).
- **Scalable**: O(capabilities) impls per higher-order; new `T` auto-works.
- **ISP-respecting**: Capabilities only surface if sub-topology provides.
- **Centralized logic**: Orchestration (e.g., cert propagation) in one place.
**Example usage:**
````rust
// ✅ Works: K8sAnywhere: PostgreSQL → Failover provides failover PG
let pg_failover: FailoverTopology<K8sAnywhereTopology> = ...;
pg_failover.deploy_pg(config).await;
// ✅ Works: LinuxHost: Docker → Failover provides failover Docker
let docker_failover: FailoverTopology<LinuxHostTopology> = ...;
docker_failover.deploy_docker(...).await;
// ❌ Compile fail: K8sAnywhere !: Docker
let invalid: FailoverTopology<K8sAnywhereTopology>;
invalid.deploy_docker(...); // `T: Docker` bound unsatisfied
````
## Consequences
**Pros:**
- **Extensible**: New topology `AWSTopology: PostgreSQL` → instant `Failover<AWSTopology>: PostgreSQL`.
- **Lean**: No useless impls (e.g., no `K8sAnywhere: Docker`).
- **Observable**: Logs trace every step.
**Cons:**
- **Monomorphization**: Generics generate code per T (mitigated: few Ts).
- **Delegation opacity**: Relies on rustdoc/logs for internals.
## Alternatives considered
| Approach | Pros | Cons |
|----------|------|------|
| **Manual per-T impls**<br>`impl PG for Failover<K8s> {..}`<br>`impl PG for Failover<Linux> {..}` | Explicit control | N×M explosion; violates ISP; hard to extend. |
| **Dynamic trait objects**<br>`Box<dyn AnyCapability>` | Runtime flex | Perf hit; type erasure; error-prone dispatch. |
| **Mega-topology trait**<br>All-in-one `OrchestratedTopology` | Simple wiring | Monolithic; poor composition. |
| **Registry dispatch**<br>Runtime capability lookup | Decoupled | Complex; no compile safety; perf/debug overhead. |
**Selected**: Blanket impls leverage Rust generics for safe, zero-cost composition.
## Additional Notes
- Applies to `MultisiteTopology<T>`, `ShardedTopology<T>`, etc.
- `FailoverTopology` in `failover.rs` is first implementation.

View File

@@ -0,0 +1,153 @@
//! Example of Higher-Order Topologies in Harmony.
//! Demonstrates how `FailoverTopology<T>` automatically provides failover for *any* capability
//! supported by a sub-topology `T` via blanket trait impls.
//!
//! Key insight: No manual impls per T or capability -- scales effortlessly.
//! Users can:
//! - Write new `Topology` (impl capabilities on a struct).
//! - Compose with `FailoverTopology` (gets capabilities if T has them).
//! - Compile fails if capability missing (safety).
use async_trait::async_trait;
use tokio;
/// Capability trait: Deploy and manage PostgreSQL.
#[async_trait]
pub trait PostgreSQL {
async fn deploy(&self, config: &PostgreSQLConfig) -> Result<String, String>;
async fn get_replication_certs(&self, cluster_name: &str) -> Result<ReplicationCerts, String>;
}
/// Capability trait: Deploy Docker.
#[async_trait]
pub trait Docker {
async fn deploy_docker(&self) -> Result<String, String>;
}
/// Configuration for PostgreSQL deployments.
#[derive(Clone)]
pub struct PostgreSQLConfig;
/// Replication certificates.
#[derive(Clone)]
pub struct ReplicationCerts;
/// Concrete topology: Kubernetes Anywhere (supports PostgreSQL).
#[derive(Clone)]
pub struct K8sAnywhereTopology;
#[async_trait]
impl PostgreSQL for K8sAnywhereTopology {
async fn deploy(&self, _config: &PostgreSQLConfig) -> Result<String, String> {
// Real impl: Use k8s helm chart, operator, etc.
Ok("K8sAnywhere PostgreSQL deployed".to_string())
}
async fn get_replication_certs(&self, _cluster_name: &str) -> Result<ReplicationCerts, String> {
Ok(ReplicationCerts)
}
}
/// Concrete topology: Linux Host (supports Docker).
#[derive(Clone)]
pub struct LinuxHostTopology;
#[async_trait]
impl Docker for LinuxHostTopology {
async fn deploy_docker(&self) -> Result<String, String> {
// Real impl: Install/configure Docker on host.
Ok("LinuxHost Docker deployed".to_string())
}
}
/// Higher-Order Topology: Composes multiple sub-topologies (primary + replica).
/// Automatically derives *all* capabilities of `T` with failover orchestration.
///
/// - If `T: PostgreSQL`, then `FailoverTopology<T>: PostgreSQL` (blanket impl).
/// - Same for `Docker`, etc. No boilerplate!
/// - Compile-time safe: Missing `T: Capability` → error.
#[derive(Clone)]
pub struct FailoverTopology<T> {
/// Primary sub-topology.
pub primary: T,
/// Replica sub-topology.
pub replica: T,
}
/// Blanket impl: Failover PostgreSQL if T provides PostgreSQL.
/// Delegates reads to primary; deploys to both.
#[async_trait]
impl<T: PostgreSQL + Send + Sync + Clone> PostgreSQL for FailoverTopology<T> {
async fn deploy(&self, config: &PostgreSQLConfig) -> Result<String, String> {
// Orchestrate: Deploy primary first, then replica (e.g., via pg_basebackup).
let primary_result = self.primary.deploy(config).await?;
let replica_result = self.replica.deploy(config).await?;
Ok(format!("Failover PG deployed: {} | {}", primary_result, replica_result))
}
async fn get_replication_certs(&self, cluster_name: &str) -> Result<ReplicationCerts, String> {
// Delegate to primary (replica follows).
self.primary.get_replication_certs(cluster_name).await
}
}
/// Blanket impl: Failover Docker if T provides Docker.
#[async_trait]
impl<T: Docker + Send + Sync + Clone> Docker for FailoverTopology<T> {
async fn deploy_docker(&self) -> Result<String, String> {
// Orchestrate across primary + replica.
let primary_result = self.primary.deploy_docker().await?;
let replica_result = self.replica.deploy_docker().await?;
Ok(format!("Failover Docker deployed: {} | {}", primary_result, replica_result))
}
}
#[tokio::main]
async fn main() {
let config = PostgreSQLConfig;
println!("=== ✅ PostgreSQL Failover (K8sAnywhere supports PG) ===");
let pg_failover = FailoverTopology {
primary: K8sAnywhereTopology,
replica: K8sAnywhereTopology,
};
let result = pg_failover.deploy(&config).await.unwrap();
println!("Result: {}", result);
println!("\n=== ✅ Docker Failover (LinuxHost supports Docker) ===");
let docker_failover = FailoverTopology {
primary: LinuxHostTopology,
replica: LinuxHostTopology,
};
let result = docker_failover.deploy_docker().await.unwrap();
println!("Result: {}", result);
println!("\n=== ❌ Would fail to compile (K8sAnywhere !: Docker) ===");
// let invalid = FailoverTopology {
// primary: K8sAnywhereTopology,
// replica: K8sAnywhereTopology,
// };
// invalid.deploy_docker().await.unwrap(); // Error: `K8sAnywhereTopology: Docker` not satisfied!
// Very clear error message :
// error[E0599]: the method `deploy_docker` exists for struct `FailoverTopology<K8sAnywhereTopology>`, but its trait bounds were not satisfied
// --> src/main.rs:90:9
// |
// 4 | pub struct FailoverTopology<T> {
// | ------------------------------ method `deploy_docker` not found for this struct because it doesn't satisfy `FailoverTopology<K8sAnywhereTopology>: Docker`
// ...
// 37 | struct K8sAnywhereTopology;
// | -------------------------- doesn't satisfy `K8sAnywhereTopology: Docker`
// ...
// 90 | invalid.deploy_docker(); // `T: Docker` bound unsatisfied
// | ^^^^^^^^^^^^^ method cannot be called on `FailoverTopology<K8sAnywhereTopology>` due to unsatisfied trait bounds
// |
// note: trait bound `K8sAnywhereTopology: Docker` was not satisfied
// --> src/main.rs:61:9
// |
// 61 | impl<T: Docker + Send + Sync> Docker for FailoverTopology<T> {
// | ^^^^^^ ------ -------------------
// | |
// | unsatisfied trait bound introduced here
// note: the trait `Docker` must be implemented
}

View File

@@ -0,0 +1,90 @@
# Architecture Decision Record: Global Orchestration Mesh & The Harmony Agent
**Status:** Proposed
**Date:** 2025-12-19
## Context
Harmony is designed to enable a truly decentralized infrastructure where independent clusters—owned by different organizations or running on diverse hardware—can collaborate reliably. This vision combines the decentralization of Web3 with the performance and capabilities of Web2.
Currently, Harmony operates as a stateless CLI tool, invoked manually or via CI runners. While effective for deployment, this model presents a critical limitation: **a CLI cannot react to real-time events.**
To achieve automated failover and dynamic workload management, we need a system that is "always on." Relying on manual intervention or scheduled CI jobs to recover from a cluster failure creates unacceptable latency and prevents us from scaling to thousands of nodes.
Furthermore, we face a challenge in serving diverse workloads:
* **Financial workloads** require absolute consistency (CP - Consistency/Partition Tolerance).
* **AI/Inference workloads** require maximum availability (AP - Availability/Partition Tolerance).
There are many more use cases, but those are the two extremes.
We need a unified architecture that automates cluster coordination and supports both consistency models without requiring a complete re-architecture in the future.
## Decision
We propose a fundamental architectural evolution. It has been clear since the start of Harmony that it would be necessary to transition Harmony from a purely ephemeral CLI tool to a system that includes a persistent **Harmony Agent**. This Agent will connect to a **Global Orchestration Mesh** based on a strongly consistent protocol.
The proposal consists of four key pillars:
### 1. The Harmony Agent (New Component)
We will develop a long-running process (Daemon/Agent) to be deployed alongside workloads.
* **Shift from CLI:** Unlike the CLI, which applies configuration and exits, the Agent maintains a persistent connection to the mesh.
* **Responsibility:** It actively monitors cluster health, participates in consensus, and executes lifecycle commands (start/stop/fence) instantly when the mesh dictates a state change.
### 2. The Technology: NATS JetStream
We will utilize **NATS JetStream** as the underlying transport and consensus layer for the Agent and the Mesh.
* **Why not raw Raft?** Implementing a raw Raft library requires building and maintaining the transport layer, log compaction, snapshotting, and peer discovery manually. NATS JetStream provides a battle-tested, distributed log and Key-Value store (based on Raft) out of the box, along with a high-performance pub/sub system for event propagation.
* **Role:** It will act as the "source of truth" for the cluster state.
### 3. Strong Consistency at the Mesh Layer
The mesh will operate with **Strong Consistency** by default.
* All critical cluster state changes (topology updates, lease acquisitions, leadership elections) will require consensus among the Agents.
* This ensures that in the event of a network partition, we have a mathematical guarantee of which side holds the valid state, preventing data corruption.
### 4. Public UX: The `FailoverStrategy` Abstraction
To keep the user experience stable and simple, we will expose the complexity of the mesh through a high-level configuration API, tentatively called `FailoverStrategy`.
The user defines the *intent* in their config, and the Harmony Agent automates the *execution*:
* **`FailoverStrategy::AbsoluteConsistency`**:
* *Use Case:* Banking, Transactional DBs.
* *Behavior:* If the mesh detects a partition, the Agent on the minority side immediately halts workloads. No split-brain is ever allowed.
* **`FailoverStrategy::SplitBrainAllowed`**:
* *Use Case:* LLM Inference, Stateless Web Servers.
* *Behavior:* If a partition occurs, the Agent keeps workloads running to maximize uptime. State is reconciled when connectivity returns.
## Rationale
**The Necessity of an Agent**
You cannot automate what you do not monitor. Moving to an Agent-based model is the only way to achieve sub-second reaction times to infrastructure failures. It transforms Harmony from a deployment tool into a self-healing platform.
**Scaling & Decentralization**
To allow independent clusters to collaborate, they need a shared language. A strongly consistent mesh allows Cluster A (Organization X) and Cluster B (Organization Y) to agree on workload placement without a central authority.
**Why Strong Consistency First?**
It is technically feasible to relax a strongly consistent system to allow for "Split Brain" behavior (AP) when the user requests it. However, it is nearly impossible to take an eventually consistent system and force it to be strongly consistent (CP) later. By starting with strict constraints, we cover the hardest use cases (Finance) immediately.
**Future Topologies**
While our immediate need is `FailoverTopology` (Multi-site), this architecture supports any future topology logic:
* **`CostTopology`**: Agents negotiate to route workloads to the cluster with the cheapest spot instances.
* **`HorizontalTopology`**: Spreading a single workload across 100 clusters for massive scale.
* **`GeoTopology`**: Ensuring data stays within specific legal jurisdictions.
The mesh provides the *capability* (consensus and messaging); the topology provides the *logic*.
## Consequences
**Positive**
* **Automation:** Eliminates manual failover, enabling massive scale.
* **Reliability:** Guarantees data safety for critical workloads by default.
* **Flexibility:** A single codebase serves both high-frequency trading and AI inference.
* **Stability:** The public API remains abstract, allowing us to optimize the mesh internals without breaking user code.
**Negative**
* **Deployment Complexity:** Users must now deploy and maintain a running service (the Agent) rather than just downloading a binary.
* **Engineering Complexity:** Integrating NATS JetStream and handling distributed state machines is significantly more complex than the current CLI logic.
## Implementation Plan (Short Term)
1. **Agent Bootstrap:** Create the initial scaffold for the Harmony Agent (daemon).
2. **Mesh Integration:** Prototype NATS JetStream embedding within the Agent.
3. **Strategy Implementation:** Add `FailoverStrategy` to the configuration schema and implement the logic in the Agent to read and act on it.
4. **Migration:** Transition the current manual failover scripts into event-driven logic handled by the Agent.

View File

@@ -0,0 +1,189 @@
### 1. ADR 017-1: NATS Cluster Interconnection & Trust Topology
# Architecture Decision Record: NATS Cluster Interconnection & Trust Topology
**Status:** Proposed
**Date:** 2026-01-12
**Precedes:** [017-Staleness-Detection-for-Failover.md]
## Context
In ADR 017, we defined the failover mechanisms for the Harmony mesh. However, for a Primary (Site A) and a Replica (Site B) to communicate securely—or for the Global Mesh to function across disparate locations—we must establish a robust Transport Layer Security (TLS) strategy.
Our primary deployment platform is OKD (Kubernetes). While OKD provides an internal `service-ca`, it is designed primarily for intra-cluster service-to-service communication. It lacks the flexibility required for:
1. **Public/External Gateway Identities:** NATS Gateways need to identify themselves via public DNS names or external IPs, not just internal `.svc` cluster domains.
2. **Cross-Cluster Trust:** We need a mechanism to allow Cluster A to trust Cluster B without sharing a single private root key.
## Decision
We will implement an **"Islands of Trust"** topology using **cert-manager** on OKD.
### 1. Per-Cluster Certificate Authorities (CA)
* We explicitly **reject** the use of a single "Supercluster CA" shared across all sites.
* Instead, every Harmony Cluster (Site A, Site B, etc.) will generate its own unique Self-Signed Root CA managed by `cert-manager` inside that cluster.
* **Lifecycle:** Root CAs will have a long duration (e.g., 10 years) to minimize rotation friction, while Leaf Certificates (NATS servers) will remain short-lived (e.g., 90 days) and rotate automatically.
> Note : The decision to have a single CA for various workloads managed by Harmony on each deployment, or to have multiple CA for each service that requires interconnection is not made yet. This ADR leans towards one CA per service. This allows for maximum flexibility. But the direction might change and no clear decision has been made yet. The alternative of establishing that each cluster/harmony deployment has a single identity could make mTLS very simple between tenants.
### 2. Trust Federation via Bundle Exchange
To enable secure communication (mTLS) between clusters (e.g., for NATS Gateways or Leaf Nodes):
* **No Private Keys are shared.**
* We will aggregate the **Public CA Certificates** of all trusted clusters into a shared `ca-bundle.pem`.
* This bundle is distributed to the NATS configuration of every node.
* **Verification Logic:** When Site A connects to Site B, Site A verifies Site B's certificate against the bundle. Since Site B's CA public key is in the bundle, the connection is accepted.
### 3. Tooling
* We will use **cert-manager** (deployed via Operator on OKD) rather than OKD's built-in `service-ca`. This provides us with standard CRDs (`Issuer`, `Certificate`) to manage the lifecycle, rotation, and complex SANs (Subject Alternative Names) required for external connectivity.
* Harmony will manage installation, configuration and bundle creation across all sites
## Rationale
**Security Blast Radius (The "Key Leak" Scenario)**
If we used a single global CA and the private key for Site A was compromised (e.g., physical theft of a server from a basement), the attacker could impersonate *any* site in the global mesh.
By using Per-Cluster CAs:
* If Site A is compromised, only Site A's identity is stolen.
* We can "evict" Site A from the mesh simply by removing Site A's Public CA from the `ca-bundle.pem` on the remaining healthy clusters and reloading. The attacker can no longer authenticate.
**Decentralized Autonomy**
This aligns with the "Humane Computing" vision. A local cluster owns its identity. It does not depend on a central authority to issue its certificates. It can function in isolation (offline) indefinitely without needing to "phone home" to renew credentials.
## Consequences
**Positive**
* **High Security:** Compromise of one node does not compromise the global mesh.
* **Flexibility:** Easier to integrate with third-party clusters or partners by simply adding their public CA to the bundle.
* **Standardization:** `cert-manager` is the industry standard, making the configuration portable to non-OKD K8s clusters if needed.
**Negative**
* **Configuration Complexity:** We must manage a mechanism to distribute the `ca-bundle.pem` containing public keys to all sites. This should be automated (e.g., via a Harmony Agent) to ensure timely updates and revocation.
* **Revocation Latency:** Revoking a compromised cluster requires updating and reloading the bundle on all other clusters. This is slower than OCSP/CRL but acceptable for infrastructure-level trust if automation is in place.
---
# 2. Concrete overview of the process, how it can be implemented manually across multiple OKD clusters
All of this will be automated via Harmony, but to understand correctly the process it is outlined in details here :
## 1. Deploying and Configuring cert-manager on OKD
While OKD has a built-in `service-ca` controller, it is "opinionated" and primarily signs certs for internal services (like `my-svc.my-namespace.svc`). It is **not suitable** for the Harmony Global Mesh because you cannot easily control the Subject Alternative Names (SANs) for external routes (e.g., `nats.site-a.nationtech.io`), nor can you easily export its CA to other clusters.
**The Solution:** Use the **cert-manager Operator for Red Hat OpenShift**.
### Step 1: Install the Operator
1. Log in to the OKD Web Console.
2. Navigate to **Operators** -> **OperatorHub**.
3. Search for **"cert-manager"**.
4. Choose the **"cert-manager Operator for Red Hat OpenShift"** (Red Hat provided) or the community version.
5. Click **Install**. Use the default settings (Namespace: `cert-manager-operator`).
### Step 2: Create the "Island" CA (The Issuer)
Once installed, you define your cluster's unique identity. Apply this YAML to your NATS namespace.
```yaml
# filepath: k8s/01-issuer.yaml
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: harmony-selfsigned-issuer
namespace: harmony-nats
spec:
selfSigned: {}
---
# This generates the unique Root CA for THIS specific cluster
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: harmony-root-ca
namespace: harmony-nats
spec:
isCA: true
commonName: "harmony-site-a-ca" # CHANGE THIS per cluster (e.g., site-b-ca)
duration: 87600h # 10 years
renewBefore: 2160h # 3 months before expiry
secretName: harmony-root-ca-secret
privateKey:
algorithm: ECDSA
size: 256
issuerRef:
name: harmony-selfsigned-issuer
kind: Issuer
group: cert-manager.io
---
# This Issuer uses the Root CA generated above to sign NATS certs
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: harmony-ca-issuer
namespace: harmony-nats
spec:
ca:
secretName: harmony-root-ca-secret
```
### Step 3: Generate the NATS Server Certificate
This certificate will be used by the NATS server. It includes both internal DNS names (for local clients) and external DNS names (for the global mesh).
```yaml
# filepath: k8s/02-nats-cert.yaml
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: nats-server-cert
namespace: harmony-nats
spec:
secretName: nats-server-tls
duration: 2160h # 90 days
renewBefore: 360h # 15 days
issuerRef:
name: harmony-ca-issuer
kind: Issuer
# CRITICAL: Define all names this server can be reached by
dnsNames:
- "nats"
- "nats.harmony-nats.svc"
- "nats.harmony-nats.svc.cluster.local"
- "*.nats.harmony-nats.svc.cluster.local"
- "nats-gateway.site-a.nationtech.io" # External Route for Mesh
```
## 2. Implementing the "Islands of Trust" (Trust Bundle)
To make Site A and Site B talk, you need to exchange **Public Keys**.
1. **Extract Public CA from Site A:**
```bash
oc get secret harmony-root-ca-secret -n harmony-nats -o jsonpath='{.data.ca\.crt}' | base64 -d > site-a.crt
```
2. **Extract Public CA from Site B:**
```bash
oc get secret harmony-root-ca-secret -n harmony-nats -o jsonpath='{.data.ca\.crt}' | base64 -d > site-b.crt
```
3. **Create the Bundle:**
Combine them into one file.
```bash
cat site-a.crt site-b.crt > ca-bundle.crt
```
4. **Upload Bundle to Both Clusters:**
Create a ConfigMap or Secret in *both* clusters containing this combined bundle.
```bash
oc create configmap nats-trust-bundle --from-file=ca.crt=ca-bundle.crt -n harmony-nats
```
5. **Configure NATS:**
Mount this ConfigMap and point NATS to it.
```conf
# nats.conf snippet
tls {
cert_file: "/etc/nats-certs/tls.crt"
key_file: "/etc/nats-certs/tls.key"
# Point to the bundle containing BOTH Site A and Site B public CAs
ca_file: "/etc/nats-trust/ca.crt"
}
```
This setup ensures that Site A can verify Site B's certificate (signed by `harmony-site-b-ca`) because Site B's CA is in Site A's trust store, and vice versa, without ever sharing the private keys that generated them.

View File

@@ -0,0 +1,141 @@
# Architecture Decision Record: Template Hydration for Kubernetes Manifest Generation
Initial Author: Jean-Gabriel Gill-Couture & Sylvain Tremblay
Initial Date: 2025-01-23
Last Updated Date: 2025-01-23
## Status
Implemented
## Context
Harmony's philosophy is built on three guiding principles: Infrastructure as Resilient Code, Prove It Works — Before You Deploy, and One Unified Model. Our goal is to shift validation and verification as left as possible—ideally to compile time—rather than discovering errors at deploy time.
After investigating a few approaches such as compile-checked Askama templates to generate Kubernetes manifests for Helm charts, we found again that this approach suffered from several fundamental limitations:
* **Late Validation:** Typos in template syntax or field names are only discovered at deployment time, not during compilation. A mistyped `metadata.name` won't surface until Helm attempts to render the template.
* **Brittle Maintenance:** Templates are string-based with limited IDE support. Refactoring requires grep-and-replace across YAML-like template files, risking subtle breakage.
* **Hard-to-Test Logic:** Testing template output requires mocking the template engine and comparing serialized strings rather than asserting against typed data structures.
* **No Type Safety:** There is no guarantee that the generated YAML will be valid Kubernetes resources without runtime validation.
We also faced a strategic choice around Helm: use it as both *templating engine* and *packaging mechanism*, or decouple these concerns. While Helm's ecosystem integration (Harbor, ArgoCD, OCI registry support) is valuable, the Jinja-like templating is at odds with Harmony's "code-first" ethos.
## Decision
We will adopt the **Template Hydration Pattern**—constructing Kubernetes manifests programmatically using strongly-typed `kube-rs` objects, then serializing them to YAML files for packaging into Helm charts.
Specifically:
* **Write strongly typed `k8s_openapi` Structs:** All Kubernetes resources (Deployment, Service, ConfigMap, etc.) will be constructed using the typed structs generated by `k8s_openapi`.
* **Direct Serialization to YAML:** Rather than rendering templates, we use `serde_yaml::to_string()` to serialize typed objects directly into YAML manifests. This way, YAML is only used as a data-transfer format and not a templating/programming language - which it is not.
* **Helm as Packaging-Only:** Helm's role is reduced to packaging pre-rendered templates into a tarball and pushing to OCI registries. No template rendering logic resides within Helm.
* **Ecosystem Preservation:** The generated Helm charts remain fully compatible with Harbor, ArgoCD, and any Helm-compatible tool—the only difference is that the `templates/` directory contains static YAML files.
The implementation in `backend_app.rs` demonstrates this pattern:
```rust
let deployment = Deployment {
metadata: ObjectMeta {
name: Some(self.name.clone()),
labels: Some([("app.kubernetes.io/name".to_string(), self.name.clone())].into()),
..Default::default()
},
spec: Some(DeploymentSpec { /* ... */ }),
..Default::default()
};
let deployment_yaml = serde_yaml::to_string(&deployment)?;
fs::write(templates_dir.join("deployment.yaml"), deployment_yaml)?;
```
## Rationale
**Aligns with "Infrastructure as Resilient Code"**
Harmony's first principle states that infrastructure should be treated like application code. By expressing Kubernetes manifests as Rust structs, we gain:
* **Refactorability:** Rename a label and the compiler catches all usages.
* **IDE Support:** Autocomplete for all Kubernetes API fields; documentation inline.
* **Code Navigation:** Jump to definition shows exactly where a value comes from.
**Achieves "Prove It Works — Before You Deploy"**
The compiler now validates that:
* All required fields are populated (Rust's `Option` type prevents missing fields).
* Field types match expectations (ports are integers, not strings).
* Enums contain valid values (e.g., `ServiceType::ClusterIP`).
This moves what was runtime validation into compile-time checks, fulfilling the "shift left" promise.
**Enables True Unit Testing**
Developers can now write unit tests that assert directly against typed objects:
```rust
let deployment = create_deployment(&app);
assert_eq!(deployment.spec.unwrap().replicas.unwrap(), 3);
assert_eq!(deployment.metadata.name.unwrap(), "my-app");
```
No string parsing, no YAML serialization, no fragile assertions against rendered output.
**Preserves Ecosystem Benefits**
By generating standard Helm chart structures, Harmony retains compatibility with:
* **OCI Registries (Harbor, GHCR):** `helm push` works exactly as before.
* **ArgoCD:** Syncs and manages releases using the generated charts.
* **Existing Workflows:** Teams already consuming Helm charts see no change.
The Helm tarball becomes a "dumb pipe" for transport, which is arguably its ideal role.
## Consequences
### Positive
* **Compile-Time Safety:** A broad class of errors (typos, missing fields, type mismatches) is now caught at build time.
* **Better Developer Experience:** IDE autocomplete, inline documentation, and refactor support significantly reduce the learning curve for Kubernetes manifests.
* **Testability:** Unit tests can validate manifest structure without integration or runtime checks.
* **Auditability:** The source-of-truth for manifests is now pure Rust—easier to review in pull requests than template logic scattered across files.
* **Future-Extensibility:** CustomResources (CRDs) can be supported via `kopium`-generated Rust types, maintaining the same strong typing.
### Negative
* **API Schema Drift:** Kubernetes API changes require regenerating `k8s_openapi` types and updating code. A change in a struct field will cause the build to fail—intentionally, but still requiring the pipeline to be updated.
* **Verbosity:** Typed construction is more verbose than the equivalent template. Builder patterns or helper functions will be needed to keep code readable.
* **Learning Curve:** Contributors must understand both the Kubernetes resource spec *and* the Rust type system, rather than just YAML.
* **Debugging Shift:** When debugging generated YAML, you now trace through Rust code rather than template files—more precise but different mental model.
## Alternatives Considered
### 1. Enhance Askama with Compile-Time Validation
*Pros:* Stay within familiar templating paradigm; minimal code changes.
*Cons:* Rust's type system cannot fully express Kubernetes schema validation without significant macro boilerplate. Errors would still surface at template evaluation time, not compilation.
### 2. Use Helm SDK Programmatically (Go)
*Pros:* Direct access to Helm's template engine; no YAML serialization step.
*Cons:* Would introduce a second language (Go) into a Rust codebase, increasing cognitive load and compilation complexity. No improvement in compile-time safety.
### 3. Raw YAML String Templating (Manual)
*Pros:* Maximum control; no external dependencies.
*Cons:* Even more error-prone than Askama; no structure validation; string concatenation errors abound.
### 4. Use Kustomize for All Manifests
*Pros:* Declarative overlays; standard tool.
*Cons:* Kustomize is itself a layer over YAML templates with its own DSL. It does not provide compile-time type safety and would require externalizing manifest management outside Harmony's codebase.
__Note that this template hydration architecture still allows to override templates with tools like kustomize when required__
## Additional Notes
**Scalability to Future Topologies**
The Template Hydration pattern enables future Harmony architectures to generate manifests dynamically based on topology context. For example, a `CostTopology` might adjust resource requests based on cluster pricing, manipulating the typed `Deployment::spec` directly before serialization.
**Implementation Status**
As of this writing, the pattern is implemented for `BackendApp` deployments (`backend_app.rs`). The next phase is to extend this pattern across all application modules (`webapp.rs`, etc.) and to standardize on this approach for any new implementations.

View File

@@ -0,0 +1,65 @@
# Architecture Decision Record: Network Bonding Configuration via External Automation
Initial Author: Jean-Gabriel Gill-Couture & Sylvain Tremblay
Initial Date: 2026-02-13
Last Updated Date: 2026-02-13
## Status
Accepted
## Context
We need to configure LACP bonds on 10GbE interfaces across all worker nodes in the OpenShift cluster. A significant challenge is that interface names (e.g., `enp1s0f0` vs `ens1f0`) vary across different hardware nodes.
The standard OpenShift mechanism (MachineConfig) applies identical configurations to all nodes in a MachineConfigPool. Since the interface names differ, a single static MachineConfig cannot target specific physical devices across the entire cluster without complex workarounds.
## Decision
We will use the existing "Harmony" automation tool to generate and apply host-specific NetworkManager configuration files directly to the nodes.
1. Harmony will generate the specific `.nmconnection` files for the bond and slaves based on its inventory of interface names.
2. Files will be pushed to `/etc/NetworkManager/system-connections/` on each node.
3. Configuration will be applied via `nmcli` reload or a node reboot.
## Rationale
* **Inventory Awareness:** Harmony already possesses the specific interface mapping data for each host.
* **Persistence:** Fedora CoreOS/SCOS allows writing to `/etc`, and these files persist across reboots and OS upgrades (rpm-ostree updates).
* **Avoids Complexity:** This approach avoids the operational overhead of creating unique MachineConfigPools for every single host or hardware variant.
* **Safety:** Unlike wildcard matching, this ensures explicit interface selection, preventing accidental bonding of reserved interfaces (e.g., future separation of Ceph storage traffic).
## Consequences
**Pros:**
* Precise, per-host configuration without polluting the Kubernetes API with hundreds of MachineConfigs.
* Standard Linux networking behavior; easy to debug locally.
* Prevents accidental interface capture (unlike wildcards).
**Cons:**
* **Loss of Declarative K8s State:** The network config is not managed by the Machine Config Operator (MCO).
* **Node Replacement Friction:** Newly provisioned nodes (replacements) will boot with default config. Harmony must be run against new nodes manually or via a hook before they can fully join the cluster workload.
## Alternatives considered
1. **Wildcard Matching in NetworkManager (e.g., `interface-name=enp*`):**
* *Pros:* Single MachineConfig for the whole cluster.
* *Cons:* Rejected because it is too broad. It risks capturing interfaces intended for other purposes (e.g., splitting storage and cluster networks later).
2. **"Kitchen Sink" Configuration:**
* *Pros:* Single file listing every possible interface name as a slave.
* *Cons:* "Dirty" configuration; results in many inactive connections on every host; brittle if new naming schemes appear.
3. **Per-Host MachineConfig:**
* *Pros:* Fully declarative within OpenShift.
* *Cons:* Requires a unique `MachineConfigPool` per host, which is an anti-pattern and unmaintainable at scale.
4. **On-boot Generation Script:**
* *Pros:* Dynamic detection.
* *Cons:* Increases boot complexity; harder to debug if the script fails during startup.
## Additional Notes
While `/etc` is writable and persistent on CoreOS, this configuration falls outside the "Day 1" Ignition process. Operational runbooks must be updated to ensure Harmony runs on any node replacement events.

View File

@@ -16,3 +16,4 @@ env_logger.workspace = true
regex = "1.11.3"
harmony_secret = { path = "../harmony_secret" }
serde.workspace = true
schemars = "0.8"

View File

@@ -1,11 +1,12 @@
use std::net::{IpAddr, Ipv4Addr};
use brocade::BrocadeOptions;
use brocade::{BrocadeOptions, ssh};
use harmony_secret::{Secret, SecretManager};
use harmony_types::switch::PortLocation;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
#[derive(Secret, Clone, Debug, Serialize, Deserialize)]
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
struct BrocadeSwitchAuth {
username: String,
password: String,
@@ -16,7 +17,7 @@ async fn main() {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
// let ip = IpAddr::V4(Ipv4Addr::new(10, 0, 0, 250)); // old brocade @ ianlet
let ip = IpAddr::V4(Ipv4Addr::new(192, 168, 55, 101)); // brocade @ sto1
let ip = IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)); // brocade @ sto1
// let ip = IpAddr::V4(Ipv4Addr::new(192, 168, 4, 11)); // brocade @ st
let switch_addresses = vec![ip];
@@ -26,13 +27,16 @@ async fn main() {
let brocade = brocade::init(
&switch_addresses,
22,
&config.username,
&config.password,
Some(BrocadeOptions {
&BrocadeOptions {
dry_run: true,
ssh: ssh::SshOptions {
port: 2222,
..Default::default()
},
..Default::default()
}),
},
)
.await
.expect("Brocade client failed to connect");
@@ -54,6 +58,7 @@ async fn main() {
}
println!("--------------");
todo!();
let channel_name = "1";
brocade.clear_port_channel(channel_name).await.unwrap();

View File

@@ -1,7 +1,8 @@
use super::BrocadeClient;
use crate::{
BrocadeInfo, Error, ExecutionMode, InterSwitchLink, InterfaceInfo, MacAddressEntry,
PortChannelId, PortOperatingMode, parse_brocade_mac_address, shell::BrocadeShell,
PortChannelId, PortOperatingMode, SecurityLevel, parse_brocade_mac_address,
shell::BrocadeShell,
};
use async_trait::async_trait;
@@ -10,6 +11,7 @@ use log::{debug, info};
use regex::Regex;
use std::{collections::HashSet, str::FromStr};
#[derive(Debug)]
pub struct FastIronClient {
shell: BrocadeShell,
version: BrocadeInfo,
@@ -139,7 +141,7 @@ impl BrocadeClient for FastIronClient {
async fn configure_interfaces(
&self,
_interfaces: Vec<(String, PortOperatingMode)>,
_interfaces: &Vec<(String, PortOperatingMode)>,
) -> Result<(), Error> {
todo!()
}
@@ -208,4 +210,20 @@ impl BrocadeClient for FastIronClient {
info!("[Brocade] Port-channel '{channel_name}' cleared.");
Ok(())
}
async fn enable_snmp(&self, user_name: &str, auth: &str, des: &str) -> Result<(), Error> {
let commands = vec![
"configure terminal".into(),
"snmp-server view ALL 1 included".into(),
"snmp-server group public v3 priv read ALL".into(),
format!(
"snmp-server user {user_name} groupname public auth md5 auth-password {auth} priv des priv-password {des}"
),
"exit".into(),
];
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
Ok(())
}
}

View File

@@ -14,11 +14,12 @@ use async_trait::async_trait;
use harmony_types::net::MacAddress;
use harmony_types::switch::{PortDeclaration, PortLocation};
use regex::Regex;
use serde::Serialize;
mod fast_iron;
mod network_operating_system;
mod shell;
mod ssh;
pub mod ssh;
#[derive(Default, Clone, Debug)]
pub struct BrocadeOptions {
@@ -31,6 +32,7 @@ pub struct BrocadeOptions {
pub struct TimeoutConfig {
pub shell_ready: Duration,
pub command_execution: Duration,
pub command_output: Duration,
pub cleanup: Duration,
pub message_wait: Duration,
}
@@ -40,6 +42,7 @@ impl Default for TimeoutConfig {
Self {
shell_ready: Duration::from_secs(10),
command_execution: Duration::from_secs(60), // Commands like `deploy` (for a LAG) can take a while
command_output: Duration::from_secs(5), // Delay to start logging "waiting for command output"
cleanup: Duration::from_secs(10),
message_wait: Duration::from_millis(500),
}
@@ -54,7 +57,7 @@ enum ExecutionMode {
#[derive(Clone, Debug)]
pub struct BrocadeInfo {
os: BrocadeOs,
version: String,
_version: String,
}
#[derive(Clone, Debug)]
@@ -116,7 +119,7 @@ impl fmt::Display for InterfaceType {
}
/// Defines the primary configuration mode of a switch interface, representing mutually exclusive roles.
#[derive(Debug, PartialEq, Eq, Clone)]
#[derive(Debug, PartialEq, Eq, Clone, Serialize)]
pub enum PortOperatingMode {
/// The interface is explicitly configured for Brocade fabric roles (ISL or Trunk enabled).
Fabric,
@@ -139,12 +142,11 @@ pub enum InterfaceStatus {
pub async fn init(
ip_addresses: &[IpAddr],
port: u16,
username: &str,
password: &str,
options: Option<BrocadeOptions>,
options: &BrocadeOptions,
) -> Result<Box<dyn BrocadeClient + Send + Sync>, Error> {
let shell = BrocadeShell::init(ip_addresses, port, username, password, options).await?;
let shell = BrocadeShell::init(ip_addresses, username, password, options).await?;
let version_info = shell
.with_session(ExecutionMode::Regular, |session| {
@@ -162,7 +164,7 @@ pub async fn init(
}
#[async_trait]
pub trait BrocadeClient {
pub trait BrocadeClient: std::fmt::Debug {
/// Retrieves the operating system and version details from the connected Brocade switch.
///
/// This is typically the first call made after establishing a connection to determine
@@ -206,7 +208,7 @@ pub trait BrocadeClient {
/// Configures a set of interfaces to be operated with a specified mode (access ports, ISL, etc.).
async fn configure_interfaces(
&self,
interfaces: Vec<(String, PortOperatingMode)>,
interfaces: &Vec<(String, PortOperatingMode)>,
) -> Result<(), Error>;
/// Scans the existing configuration to find the next available (unused)
@@ -235,6 +237,15 @@ pub trait BrocadeClient {
ports: &[PortLocation],
) -> Result<(), Error>;
/// Enables Simple Network Management Protocol (SNMP) server for switch
///
/// # Parameters
///
/// * `user_name`: The user name for the snmp server
/// * `auth`: The password for authentication process for verifying the identity of a device
/// * `des`: The Data Encryption Standard algorithm key
async fn enable_snmp(&self, user_name: &str, auth: &str, des: &str) -> Result<(), Error>;
/// Removes all configuration associated with the specified Port-Channel name.
///
/// This operation should be idempotent; attempting to clear a non-existent
@@ -261,7 +272,7 @@ async fn get_brocade_info(session: &mut BrocadeSession) -> Result<BrocadeInfo, E
return Ok(BrocadeInfo {
os: BrocadeOs::NetworkOperatingSystem,
version,
_version: version,
});
} else if output.contains("ICX") {
let re = Regex::new(r"(?m)^\s*SW: Version\s*(?P<version>[a-zA-Z0-9.\-]+)")
@@ -274,7 +285,7 @@ async fn get_brocade_info(session: &mut BrocadeSession) -> Result<BrocadeInfo, E
return Ok(BrocadeInfo {
os: BrocadeOs::FastIron,
version,
_version: version,
});
}
@@ -298,6 +309,11 @@ fn parse_brocade_mac_address(value: &str) -> Result<MacAddress, String> {
Ok(MacAddress(bytes))
}
#[derive(Debug)]
pub enum SecurityLevel {
AuthPriv(String),
}
#[derive(Debug)]
pub enum Error {
NetworkError(String),

View File

@@ -3,13 +3,15 @@ use std::str::FromStr;
use async_trait::async_trait;
use harmony_types::switch::{PortDeclaration, PortLocation};
use log::{debug, info};
use regex::Regex;
use crate::{
BrocadeClient, BrocadeInfo, Error, ExecutionMode, InterSwitchLink, InterfaceInfo,
InterfaceStatus, InterfaceType, MacAddressEntry, PortChannelId, PortOperatingMode,
parse_brocade_mac_address, shell::BrocadeShell,
SecurityLevel, parse_brocade_mac_address, shell::BrocadeShell,
};
#[derive(Debug)]
pub struct NetworkOperatingSystemClient {
shell: BrocadeShell,
version: BrocadeInfo,
@@ -102,13 +104,37 @@ impl NetworkOperatingSystemClient {
};
Some(Ok(InterfaceInfo {
name: format!("{} {}", interface_type, port_location),
name: format!("{interface_type} {port_location}"),
port_location,
interface_type,
operating_mode,
status,
}))
}
fn map_configure_interfaces_error(&self, err: Error) -> Error {
debug!("[Brocade] {err}");
if let Error::CommandError(message) = &err {
if message.contains("switchport")
&& message.contains("Cannot configure aggregator member")
{
let re = Regex::new(r"\(conf-if-([a-zA-Z]+)-([\d/]+)\)#").unwrap();
if let Some(caps) = re.captures(message) {
let interface_type = &caps[1];
let port_location = &caps[2];
let interface = format!("{interface_type} {port_location}");
return Error::CommandError(format!(
"Cannot configure interface '{interface}', it is a member of a port-channel (LAG)"
));
}
}
}
err
}
}
#[async_trait]
@@ -161,7 +187,7 @@ impl BrocadeClient for NetworkOperatingSystemClient {
async fn configure_interfaces(
&self,
interfaces: Vec<(String, PortOperatingMode)>,
interfaces: &Vec<(String, PortOperatingMode)>,
) -> Result<(), Error> {
info!("[Brocade] Configuring {} interface(s)...", interfaces.len());
@@ -178,9 +204,12 @@ impl BrocadeClient for NetworkOperatingSystemClient {
PortOperatingMode::Trunk => {
commands.push("switchport".into());
commands.push("switchport mode trunk".into());
commands.push("no spanning-tree shutdown".into());
commands.push("switchport trunk allowed vlan all".into());
commands.push("no switchport trunk tag native-vlan".into());
commands.push("spanning-tree shutdown".into());
commands.push("no fabric isl enable".into());
commands.push("no fabric trunk enable".into());
commands.push("no shutdown".into());
}
PortOperatingMode::Access => {
commands.push("switchport".into());
@@ -196,11 +225,10 @@ impl BrocadeClient for NetworkOperatingSystemClient {
commands.push("exit".into());
}
commands.push("write memory".into());
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
.await
.map_err(|err| self.map_configure_interfaces_error(err))?;
info!("[Brocade] Interfaces configured.");
@@ -212,7 +240,7 @@ impl BrocadeClient for NetworkOperatingSystemClient {
let output = self
.shell
.run_command("show port-channel", ExecutionMode::Regular)
.run_command("show port-channel summary", ExecutionMode::Regular)
.await?;
let used_ids: Vec<u8> = output
@@ -247,7 +275,12 @@ impl BrocadeClient for NetworkOperatingSystemClient {
ports: &[PortLocation],
) -> Result<(), Error> {
info!(
"[Brocade] Configuring port-channel '{channel_name} {channel_id}' with ports: {ports:?}"
"[Brocade] Configuring port-channel '{channel_id} {channel_name}' with ports: {}",
ports
.iter()
.map(|p| format!("{p}"))
.collect::<Vec<String>>()
.join(", ")
);
let interfaces = self.get_interfaces().await?;
@@ -275,8 +308,6 @@ impl BrocadeClient for NetworkOperatingSystemClient {
commands.push("exit".into());
}
commands.push("write memory".into());
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
@@ -293,7 +324,6 @@ impl BrocadeClient for NetworkOperatingSystemClient {
"configure terminal".into(),
format!("no interface port-channel {}", channel_name),
"exit".into(),
"write memory".into(),
];
self.shell
@@ -303,4 +333,20 @@ impl BrocadeClient for NetworkOperatingSystemClient {
info!("[Brocade] Port-channel '{channel_name}' cleared.");
Ok(())
}
async fn enable_snmp(&self, user_name: &str, auth: &str, des: &str) -> Result<(), Error> {
let commands = vec![
"configure terminal".into(),
"snmp-server view ALL 1 included".into(),
"snmp-server group public v3 priv read ALL".into(),
format!(
"snmp-server user {user_name} groupname public auth md5 auth-password {auth} priv des priv-password {des}"
),
"exit".into(),
];
self.shell
.run_commands(commands, ExecutionMode::Regular)
.await?;
Ok(())
}
}

View File

@@ -13,9 +13,9 @@ use log::info;
use russh::ChannelMsg;
use tokio::time::timeout;
#[derive(Debug)]
pub struct BrocadeShell {
ip: IpAddr,
port: u16,
username: String,
password: String,
options: BrocadeOptions,
@@ -26,33 +26,31 @@ pub struct BrocadeShell {
impl BrocadeShell {
pub async fn init(
ip_addresses: &[IpAddr],
port: u16,
username: &str,
password: &str,
options: Option<BrocadeOptions>,
options: &BrocadeOptions,
) -> Result<Self, Error> {
let ip = ip_addresses
.first()
.ok_or_else(|| Error::ConfigurationError("No IP addresses provided".to_string()))?;
let base_options = options.unwrap_or_default();
let options = ssh::try_init_client(username, password, ip, base_options).await?;
let brocade_ssh_client_options =
ssh::try_init_client(username, password, ip, options).await?;
Ok(Self {
ip: *ip,
port,
username: username.to_string(),
password: password.to_string(),
before_all_commands: vec![],
after_all_commands: vec![],
options,
options: brocade_ssh_client_options,
})
}
pub async fn open_session(&self, mode: ExecutionMode) -> Result<BrocadeSession, Error> {
BrocadeSession::open(
self.ip,
self.port,
self.options.ssh.port,
&self.username,
&self.password,
self.options.clone(),
@@ -210,7 +208,7 @@ impl BrocadeSession {
let mut output = Vec::new();
let start = Instant::now();
let read_timeout = Duration::from_millis(500);
let log_interval = Duration::from_secs(3);
let log_interval = Duration::from_secs(5);
let mut last_log = Instant::now();
loop {
@@ -220,7 +218,9 @@ impl BrocadeSession {
));
}
if start.elapsed() > Duration::from_secs(5) && last_log.elapsed() > log_interval {
if start.elapsed() > self.options.timeouts.command_output
&& last_log.elapsed() > log_interval
{
info!("[Brocade] Waiting for command output...");
last_log = Instant::now();
}
@@ -275,7 +275,7 @@ impl BrocadeSession {
let output_lower = output.to_lowercase();
if ERROR_PATTERNS.iter().any(|&p| output_lower.contains(p)) {
return Err(Error::CommandError(format!(
"Command '{command}' failed: {}",
"Command error: {}",
output.trim()
)));
}

View File

@@ -2,6 +2,7 @@ use std::borrow::Cow;
use std::sync::Arc;
use async_trait::async_trait;
use log::debug;
use russh::client::Handler;
use russh::kex::DH_G1_SHA1;
use russh::kex::ECDH_SHA2_NISTP256;
@@ -10,29 +11,43 @@ use russh_keys::key::SSH_RSA;
use super::BrocadeOptions;
use super::Error;
#[derive(Default, Clone, Debug)]
#[derive(Clone, Debug)]
pub struct SshOptions {
pub preferred_algorithms: russh::Preferred,
pub port: u16,
}
impl Default for SshOptions {
fn default() -> Self {
Self {
preferred_algorithms: Default::default(),
port: 22,
}
}
}
impl SshOptions {
fn ecdhsa_sha2_nistp256() -> Self {
fn ecdhsa_sha2_nistp256(port: u16) -> Self {
Self {
preferred_algorithms: russh::Preferred {
kex: Cow::Borrowed(&[ECDH_SHA2_NISTP256]),
key: Cow::Borrowed(&[SSH_RSA]),
..Default::default()
},
port,
..Default::default()
}
}
fn legacy() -> Self {
fn legacy(port: u16) -> Self {
Self {
preferred_algorithms: russh::Preferred {
kex: Cow::Borrowed(&[DH_G1_SHA1]),
key: Cow::Borrowed(&[SSH_RSA]),
..Default::default()
},
port,
..Default::default()
}
}
}
@@ -55,20 +70,23 @@ pub async fn try_init_client(
username: &str,
password: &str,
ip: &std::net::IpAddr,
base_options: BrocadeOptions,
base_options: &BrocadeOptions,
) -> Result<BrocadeOptions, Error> {
let mut default = SshOptions::default();
default.port = base_options.ssh.port;
let ssh_options = vec![
SshOptions::default(),
SshOptions::ecdhsa_sha2_nistp256(),
SshOptions::legacy(),
default,
SshOptions::ecdhsa_sha2_nistp256(base_options.ssh.port),
SshOptions::legacy(base_options.ssh.port),
];
for ssh in ssh_options {
let opts = BrocadeOptions {
ssh,
ssh: ssh.clone(),
..base_options.clone()
};
let client = create_client(*ip, 22, username, password, &opts).await;
debug!("Creating client {ip}:{} {username}", ssh.port);
let client = create_client(*ip, ssh.port, username, password, &opts).await;
match client {
Ok(_) => {

Binary file not shown.

View File

@@ -1 +1,33 @@
Not much here yet, see the `adr` folder for now. More to come in time!
# Harmony Documentation Hub
Welcome to the Harmony documentation. This is the main entry point for learning everything from core concepts to building your own Score, Topologies, and Capabilities.
## 1. Getting Started
If you're new to Harmony, start here:
- [**Getting Started Guide**](./guides/getting-started.md): A step-by-step tutorial that takes you from an empty project to deploying your first application.
- [**Core Concepts**](./concepts.md): A high-level overview of the key concepts in Harmony: `Score`, `Topology`, `Capability`, `Inventory`, `Interpret`, ...
## 2. Use Cases & Examples
See how to use Harmony to solve real-world problems.
- [**OKD on Bare Metal**](./use-cases/okd-on-bare-metal.md): A detailed walkthrough of bootstrapping a high-availability OKD cluster from physical hardware.
- [**Deploy a Rust Web App**](./use-cases/deploy-rust-webapp.md): A quick guide to deploying a monitored, containerized web application to a Kubernetes cluster.
## 3. Component Catalogs
Discover existing, reusable components you can use in your Harmony projects.
- [**Scores Catalog**](./catalogs/scores.md): A categorized list of all available `Scores` (the "what").
- [**Topologies Catalog**](./catalogs/topologies.md): A list of all available `Topologies` (the "where").
- [**Capabilities Catalog**](./catalogs/capabilities.md): A list of all available `Capabilities` (the "how").
## 4. Developer Guides
Ready to build your own components? These guides show you how.
- [**Writing a Score**](./guides/writing-a-score.md): Learn how to create your own `Score` and `Interpret` logic to define a new desired state.
- [**Writing a Topology**](./guides/writing-a-topology.md): Learn how to model a new environment (like AWS, GCP, or custom hardware) as a `Topology`.
- [**Adding Capabilities**](./guides/adding-capabilities.md): See how to add a `Capability` to your custom `Topology`.

7
docs/catalogs/README.md Normal file
View File

@@ -0,0 +1,7 @@
# Component Catalogs
This section is the "dictionary" for Harmony. It lists all the reusable components available out-of-the-box.
- [**Scores Catalog**](./scores.md): Discover all available `Scores` (the "what").
- [**Topologies Catalog**](./topologies.md): A list of all available `Topologies` (the "where").
- [**Capabilities Catalog**](./capabilities.md): A list of all available `Capabilities` (the "how").

View File

@@ -0,0 +1,40 @@
# Capabilities Catalog
A `Capability` is a specific feature or API that a `Topology` offers. `Interpret` logic uses these capabilities to execute a `Score`.
This list is primarily for developers **writing new Topologies or Scores**. As a user, you just need to know that the `Topology` you pick (like `K8sAnywhereTopology`) provides the capabilities your `Scores` (like `ApplicationScore`) need.
<!--toc:start-->
- [Capabilities Catalog](#capabilities-catalog)
- [Kubernetes & Application](#kubernetes-application)
- [Monitoring & Observability](#monitoring-observability)
- [Networking (Core Services)](#networking-core-services)
- [Networking (Hardware & Host)](#networking-hardware-host)
<!--toc:end-->
## Kubernetes & Application
- **K8sClient**: Provides an authenticated client to interact with a Kubernetes API (create/read/update/delete resources).
- **HelmCommand**: Provides the ability to execute Helm commands (install, upgrade, template).
- **TenantManager**: Provides methods for managing tenants in a multi-tenant cluster.
- **Ingress**: Provides an interface for managing ingress controllers and resources.
## Monitoring & Observability
- **Grafana**: Provides an API for configuring Grafana (datasources, dashboards).
- **Monitoring**: A general capability for configuring monitoring (e.g., creating Prometheus rules).
## Networking (Core Services)
- **DnsServer**: Provides an interface for creating and managing DNS records.
- **LoadBalancer**: Provides an interface for configuring a load balancer (e.g., OPNsense, MetalLB).
- **DhcpServer**: Provides an interface for managing DHCP leases and host bindings.
- **TftpServer**: Provides an interface for managing files on a TFTP server (e.g., iPXE boot files).
## Networking (Hardware & Host)
- **Router**: Provides an interface for configuring routing rules, typically on a firewall like OPNsense.
- **Switch**: Provides an interface for configuring a physical network switch (e.g., managing VLANs and port channels).
- **NetworkManager**: Provides an interface for configuring host-level networking (e.g., creating bonds and bridges on a node).

102
docs/catalogs/scores.md Normal file
View File

@@ -0,0 +1,102 @@
# Scores Catalog
A `Score` is a declarative description of a desired state. Find the Score you need and add it to your `harmony!` block's `scores` array.
<!--toc:start-->
- [Scores Catalog](#scores-catalog)
- [Application Deployment](#application-deployment)
- [OKD / Kubernetes Cluster Setup](#okd-kubernetes-cluster-setup)
- [Cluster Services & Management](#cluster-services-management)
- [Monitoring & Alerting](#monitoring-alerting)
- [Infrastructure & Networking (Bare Metal)](#infrastructure-networking-bare-metal)
- [Infrastructure & Networking (Cluster)](#infrastructure-networking-cluster)
- [Tenant Management](#tenant-management)
- [Utility](#utility)
<!--toc:end-->
## Application Deployment
Scores for deploying and managing end-user applications.
- **ApplicationScore**: The primary score for deploying a web application. Describes the application, its framework, and the features it requires (e.g., monitoring, CI/CD).
- **HelmChartScore**: Deploys a generic Helm chart to a Kubernetes cluster.
- **ArgoHelmScore**: Deploys an application using an ArgoCD Helm chart.
- **LAMPScore**: A specialized score for deploying a classic LAMP (Linux, Apache, MySQL, PHP) stack.
## OKD / Kubernetes Cluster Setup
This collection of Scores is used to provision an entire OKD cluster from bare metal. They are typically used in order.
- **OKDSetup01InventoryScore**: Discovers and catalogs the physical hardware.
- **OKDSetup02BootstrapScore**: Configures the bootstrap node, renders iPXE files, and kicks off the SCOS installation.
- **OKDSetup03ControlPlaneScore**: Renders iPXE configurations for the control plane nodes.
- **OKDSetupPersistNetworkBondScore**: Configures network bonds on the nodes and port channels on the switches.
- **OKDSetup04WorkersScore**: Renders iPXE configurations for the worker nodes.
- **OKDSetup06InstallationReportScore**: Runs post-installation checks and generates a report.
- **OKDUpgradeScore**: Manages the upgrade process for an existing OKD cluster.
## Cluster Services & Management
Scores for installing and managing services _inside_ a Kubernetes cluster.
- **K3DInstallationScore**: Installs and configes a local K3D (k3s-in-docker) cluster. Used by `K8sAnywhereTopology`.
- **CertManagerHelmScore**: Deploys the `cert-manager` Helm chart.
- **ClusterIssuerScore**: Configures a `ClusterIssuer` for `cert-manager`, (e.g., for Let's Encrypt).
- **K8sNamespaceScore**: Ensures a Kubernetes namespace exists.
- **K8sDeploymentScore**: Deploys a generic `Deployment` resource to Kubernetes.
- **K8sIngressScore**: Configures an `Ingress` resource for a service.
## Monitoring & Alerting
Scores for configuring observability, dashboards, and alerts.
- **ApplicationMonitoringScore**: A generic score to set up monitoring for an application.
- **ApplicationRHOBMonitoringScore**: A specialized score for setting up monitoring via the Red Hat Observability stack.
- **HelmPrometheusAlertingScore**: Configures Prometheus alerts via a Helm chart.
- **K8sPrometheusCRDAlertingScore**: Configures Prometheus alerts using the `PrometheusRule` CRD.
- **PrometheusAlertScore**: A generic score for creating a Prometheus alert.
- **RHOBAlertingScore**: Configures alerts specifically for the Red Hat Observability stack.
- **NtfyScore**: Configures alerts to be sent to a `ntfy.sh` server.
## Infrastructure & Networking (Bare Metal)
Low-level scores for managing physical hardware and network services.
- **DhcpScore**: Configures a DHCP server.
- **OKDDhcpScore**: A specialized DHCP configuration for the OKD bootstrap process.
- **OKDBootstrapDhcpScore**: Configures DHCP specifically for the bootstrap node.
- **DhcpHostBindingScore**: Creates a specific MAC-to-IP binding in the DHCP server.
- **DnsScore**: Configures a DNS server.
- **OKDDnsScore**: A specialized DNS configuration for the OKD cluster (e.g., `api.*`, `*.apps.*`).
- **StaticFilesHttpScore**: Serves a directory of static files (e.g., a documentation site) over HTTP.
- **TftpScore**: Configures a TFTP server, typically for serving iPXE boot files.
- **IPxeMacBootFileScore**: Assigns a specific iPXE boot file to a MAC address in the TFTP server.
- **OKDIpxeScore**: A specialized score for generating the iPXE boot scripts for OKD.
- **OPNsenseShellCommandScore**: Executes a shell command on an OPNsense firewall.
## Infrastructure & Networking (Cluster)
Network services that run inside the cluster or as part of the topology.
- **LoadBalancerScore**: Configures a general-purpose load balancer.
- **OKDLoadBalancerScore**: Configures the high-availability load balancers for the OKD API and ingress.
- **OKDBootstrapLoadBalancerScore**: Configures the load balancer specifically for the bootstrap-time API endpoint.
- **K8sIngressScore**: Configures an Ingress controller or resource.
- [HighAvailabilityHostNetworkScore](../../harmony/src/modules/okd/host_network.rs): Configures network bonds on a host and the corresponding port-channels on the switch stack for high-availability.
## Tenant Management
Scores for managing multi-tenancy within a cluster.
- **TenantScore**: Creates a new tenant (e.g., a namespace, quotas, network policies).
- **TenantCredentialScore**: Generates and provisions credentials for a new tenant.
## Utility
Helper scores for discovery and inspection.
- **LaunchDiscoverInventoryAgentScore**: Launches the agent responsible for the `OKDSetup01InventoryScore`.
- **DiscoverHostForRoleScore**: A utility score to find a host matching a specific role in the inventory.
- **InspectInventoryScore**: Dumps the discovered inventory for inspection.

View File

@@ -0,0 +1,59 @@
# Topologies Catalog
A `Topology` is the logical representation of your infrastructure and its `Capabilities`. You select a `Topology` in your Harmony project to define _where_ your `Scores` will be applied.
<!--toc:start-->
- [Topologies Catalog](#topologies-catalog)
- [HAClusterTopology](#haclustertopology)
- [K8sAnywhereTopology](#k8sanywheretopology)
<!--toc:end-->
### HAClusterTopology
- **`HAClusterTopology::autoload()`**
This `Topology` represents a high-availability, bare-metal cluster. It is designed for production-grade deployments like OKD.
It models an environment consisting of:
- At least 3 cluster nodes (for control plane/workers)
- 2 redundant firewalls (e.g., OPNsense)
- 2 redundant network switches
**Provided Capabilities:**
This topology provides a rich set of capabilities required for bare-metal provisioning and cluster management, including:
- `K8sClient` (once the cluster is bootstrapped)
- `DnsServer`
- `LoadBalancer`
- `DhcpServer`
- `TftpServer`
- `Router` (via the firewalls)
- `Switch`
- `NetworkManager` (for host-level network config)
---
### K8sAnywhereTopology
- **`K8sAnywhereTopology::from_env()`**
This `Topology` is designed for development and application deployment. It provides a simple, abstract way to deploy to _any_ Kubernetes cluster.
**How it works:**
1. By default (`from_env()` with no env vars), it automatically provisions a **local K3D (k3s-in-docker) cluster** on your machine. This is perfect for local development and testing.
2. If you provide a `KUBECONFIG` environment variable, it will instead connect to that **existing Kubernetes cluster** (e.g., your staging or production OKD cluster).
This allows you to use the _exact same code_ to deploy your application locally as you do to deploy it to production.
**Provided Capabilities:**
- `K8sClient`
- `HelmCommand`
- `TenantManager`
- `Ingress`
- `Monitoring`
- ...and more.

40
docs/concepts.md Normal file
View File

@@ -0,0 +1,40 @@
# Core Concepts
Harmony's design is based on a few key concepts. Understanding them is the key to unlocking the framework's power.
### 1. Score
- **What it is:** A **Score** is a declarative description of a desired state. It's a "resource" that defines _what_ you want to achieve, not _how_ to do it.
- **Example:** `ApplicationScore` declares "I want this web application to be running and monitored."
### 2. Topology
- **What it is:** A **Topology** is the logical representation of your infrastructure and its abilities. It's the "where" your Scores will be applied.
- **Key Job:** A Topology's most important job is to expose which `Capabilities` it supports.
- **Example:** `HAClusterTopology` represents a bare-metal cluster and exposes `Capabilities` like `NetworkManager` and `Switch`. `K8sAnywhereTopology` represents a Kubernetes cluster and exposes the `K8sClient` `Capability`.
### 3. Capability
- **What it is:** A **Capability** is a specific feature or API that a `Topology` offers. It's the "how" a `Topology` can fulfill a `Score`'s request.
- **Example:** The `K8sClient` capability offers a way to interact with a Kubernetes API. The `Switch` capability offers a way to configure a physical network switch.
### 4. Interpret
- **What it is:** An **Interpret** is the execution logic that makes a `Score` a reality. It's the "glue" that connects the _desired state_ (`Score`) to the _environment's abilities_ (`Topology`'s `Capabilities`).
- **How it works:** When you apply a `Score`, Harmony finds the matching `Interpret` for your `Topology`. This `Interpret` then uses the `Capabilities` provided by the `Topology` to execute the necessary steps.
### 5. Inventory
- **What it is:** An **Inventory** is the physical material (the "what") used in a cluster. This is most relevant for bare-metal or on-premise topologies.
- **Example:** A list of nodes with their roles (control plane, worker), CPU, RAM, and network interfaces. For the `K8sAnywhereTopology`, the inventory might be empty or autoloaded, as the infrastructure is more abstract.
---
### How They Work Together (The Compile-Time Check)
1. You **write a `Score`** (e.g., `ApplicationScore`).
2. Your `Score`'s `Interpret` logic requires certain **`Capabilities`** (e.g., `K8sClient` and `Ingress`).
3. You choose a **`Topology`** to run it on (e.g., `HAClusterTopology`).
4. **At compile-time**, Harmony checks: "Does `HAClusterTopology` provide the `K8sClient` and `Ingress` capabilities that `ApplicationScore` needs?"
- **If Yes:** Your code compiles. You can be confident it will run.
- **If No:** The compiler gives you an error. You've just prevented a "config-is-valid-but-platform-is-wrong" runtime error before you even deployed.

View File

@@ -0,0 +1,133 @@
## Working procedure to clone and restore CoreOS disk from OKD Cluster
### **Step 1 - take a backup**
```
sudo dd if=/dev/old of=/dev/backup status=progress
```
### **Step 2 - clone beginning of old disk to new**
```
sudo dd if=/dev/old of=/dev/backup status=progress count=1000 bs=1M
```
### **Step 3 - verify and modify disk partitions**
list disk partitions
```
sgdisk -p /dev/new
```
if new disk is smaller than old disk and there is space on the xfs partition of the old disk, modify partitions of new disk
```
gdisk /dev/new
```
inside of gdisk commands
```
-v -> verify table
-p -> print table
-d -> select partition to delete partition
-n -> recreate partition with same partition number as deleted partition
```
For end sector, either specify the new end or just press Enter for maximum available
When asked about partition type, enter the same type code (it will show the old one)
```
p - >to verify
w -> to write
```
make xfs file system for new partition <new4>
```
sudo mkfs.xfs -f /dev/new4
```
### **Step 4 - copy old PARTUUID **
**careful here**
get old patuuid:
```
sgdisk -i <partition_number> /dev/old_disk # Note the "Partition unique GUID"
```
get labels
```
sgdisk -p /dev/old_disk # Shows partition names in the table
blkid /dev/old_disk* # Shows PARTUUIDs and labels for all partitions
```
set it on new disk
```
sgdisk -u <partition_number>:<old_partuuid> /dev/sdc
```
partition name:
```
sgdisk -c <partition_number>:"<old_name>" /dev/sdc
```
verify all:
```
lsblk -o NAME,SIZE,PARTUUID,PARTLABEL /dev/old_disk
```
### **Step 5 - Mount disks and copy files from old to new disk**
mount files before copy:
```
mkdir -p /mnt/new
mkdir -p /mnt/old
mount /dev/old4 /mnt/old
mount /dev/new4 /mnt/new
```
copy:
with -n flag can run as dry-run
```
rsync -aAXHvn --numeric-ids /source/ /destination/
```
```
rsync -aAXHv --numeric-ids /source/ /destination/
```
### **Step 6 - Set correct UUID for new partition 4**
to set uuid with xfs_admin you must unmount first
unmount old devices
```
umount /mnt/new
umount /mnt/old
```
to set correct uuid for partition 4
```
blkid /dev/old4
```
```
xfs_admin -U <old_uuid> /dev/new_partition
```
to set labels
get it
```
sgdisk -i 4 /dev/sda | grep "Partition name"
```
set it
```
sgdisk -c 4:"<label_name>" /dev/sdc
or
(check existing with xfs_admin -l /dev/old_partition)
Use xfs_admin -L <label> /dev/new_partition
```
### **Step 7 - Verify**
verify everything:
```
sgdisk -p /dev/sda # Old disk
sgdisk -p /dev/sdc # New disk
```
```
lsblk -o NAME,SIZE,PARTUUID,PARTLABEL /dev/sda
lsblk -o NAME,SIZE,PARTUUID,PARTLABEL /dev/sdc
```
```
blkid /dev/sda* | grep UUID=
blkid /dev/sdc* | grep UUID=
```

View File

@@ -0,0 +1,56 @@
## **Remove Worker flag from OKD Control Planes**
### **Context**
On OKD user provisioned infrastructure the control plane nodes can have the flag node-role.kubernetes.io/worker which allows non critical workloads to be scheduled on the control-planes
### **Observed Symptoms**
- After adding HAProxy servers to the backend each back end appears down
- Traffic is redirected to the control planes instead of workers
- The pods router-default are incorrectly applied on the control planes rather than on the workers
- Pods are being scheduled on the control planes causing cluster instability
```
ss -tlnp | grep 80
```
- shows process haproxy is listening at 0.0.0.0:80 on cps
- same problem for port 443
- In namespace rook-ceph certain pods are deploted on cps rather than on worker nodes
### **Cause**
- when intalling UPI, the roles (master, worker) are not managed by the Machine Config operator and the cps are made schedulable by default.
### **Diagnostic**
check node labels:
```
oc get nodes --show-labels | grep control-plane
```
Inspecter kubelet configuration:
```
cat /etc/systemd/system/kubelet.service
```
find the line:
```
--node-labels=node-role.kubernetes.io/control-plane,node-role.kubernetes.io/master,node-role.kubernetes.io/worker
```
→ presence of label worker confirms the problem.
Verify the flag doesnt come from MCO
```
oc get machineconfig | grep rendered-master
```
**Solution:**
To make the control planes non schedulable you must patch the cluster scheduler resource
```
oc patch scheduler cluster --type merge -p '{"spec":{"mastersSchedulable":false}}'
```
after the patch is applied the workloads can be deplaced by draining the nodes
```
oc adm cordon <cp-node>
oc adm drain <cp-node> --ignore-daemonsets delete-emptydir-data
```

View File

@@ -0,0 +1,42 @@
# Getting Started Guide
Welcome to Harmony! This guide will walk you through installing the Harmony framework, setting up a new project, and deploying your first application.
We will build and deploy the "Rust Web App" example, which automatically:
1. Provisions a local K3D (Kubernetes in Docker) cluster.
2. Deploys a sample Rust web application.
3. Sets up monitoring for the application.
## Prerequisites
Before you begin, you'll need a few tools installed on your system:
- **Rust & Cargo:** [Install Rust](https://www.rust-lang.org/tools/install)
- **Docker:** [Install Docker](https://docs.docker.com/get-docker/) (Required for the K3D local cluster)
- **kubectl:** [Install kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) (For inspecting the cluster)
## 1. Install Harmony
First, clone the Harmony repository and build the project. This gives you the `harmony` CLI and all the core libraries.
```bash
# Clone the main repository
git clone https://git.nationtech.io/nationtech/harmony
cd harmony
# Build the project (this may take a few minutes)
cargo build --release
```
...
## Next Steps
Congratulations, you've just deployed an application using true infrastructure-as-code!
From here, you can:
- [Explore the Catalogs](../catalogs/README.md): See what other [Scores](../catalogs/scores.md) and [Topologies](../catalogs/topologies.md) are available.
- [Read the Use Cases](../use-cases/README.md): Check out the [OKD on Bare Metal](./use-cases/okd-on-bare-metal.md) guide for a more advanced scenario.
- [Write your own Score](../guides/writing-a-score.md): Dive into the [Developer Guide](./guides/developer-guide.md) to start building your own components.

View File

@@ -0,0 +1,105 @@
# Design Document: Harmony PostgreSQL Module
**Status:** Draft
**Last Updated:** 2025-12-01
**Context:** Multi-site Data Replication & Orchestration
## 1. Overview
The Harmony PostgreSQL Module provides a high-level abstraction for deploying and managing high-availability PostgreSQL clusters across geographically distributed Kubernetes/OKD sites.
Instead of manually configuring complex replication slots, firewalls, and operator settings on each cluster, users define a single intent (a **Score**), and Harmony orchestrates the underlying infrastructure (the **Arrangement**) to establish a Primary-Replica architecture.
Currently, the implementation relies on the **CloudNativePG (CNPG)** operator as the backing engine.
## 2. Architecture
### 2.1 The Abstraction Model
Following **ADR 003 (Infrastructure Abstraction)**, Harmony separates the *intent* from the *implementation*.
1. **The Score (Intent):** The user defines a `MultisitePostgreSQL` resource. This describes *what* is needed (e.g., "A Postgres 15 cluster with 10GB storage, Primary on Site A, Replica on Site B").
2. **The Interpret (Action):** Harmony MultisitePostgreSQLInterpret processes this Score and orchestrates the deployment on both sites to reach the state defined in the Score.
3. **The Capability (Implementation):** The PostgreSQL Capability is implemented by the K8sTopology and the interpret can deploy it, configure it and fetch information about it. The concrete implementation will rely on the mature CloudnativePG operator to manage all the Kubernetes resources required.
### 2.2 Network Connectivity (TLS Passthrough)
One of the critical challenges in multi-site orchestration is secure connectivity between clusters that may have dynamic IPs or strict firewalls.
To solve this, we utilize **OKD/OpenShift Routes with TLS Passthrough**.
* **Mechanism:** The Primary site exposes a `Route` configured for `termination: passthrough`.
* **Routing:** The OpenShift HAProxy router inspects the **SNI (Server Name Indication)** header of the incoming TCP connection to route traffic to the correct PostgreSQL Pod.
* **Security:** SSL is **not** terminated at the ingress router. The encrypted stream is passed directly to the PostgreSQL instance. Mutual TLS (mTLS) authentication is handled natively by CNPG between the Primary and Replica instances.
* **Dynamic IPs:** Because connections are established via DNS hostnames (the Route URL), this architecture is resilient to dynamic IP changes at the Primary site.
#### Traffic Flow Diagram
```text
[ Site B: Replica ] [ Site A: Primary ]
| |
(CNPG Instance) --[Encrypted TCP]--> (OKD HAProxy Router)
| (Port 443) |
| |
| [SNI Inspection]
| |
| v
| (PostgreSQL Primary Pod)
| (Port 5432)
```
## 3. Design Decisions
### Why CloudNativePG?
We selected CloudNativePG because it relies exclusively on standard Kubernetes primitives and uses the native PostgreSQL replication protocol (WAL shipping/Streaming). This aligns with Harmony's goal of being "K8s Native."
### Why TLS Passthrough instead of VPN/NodePort?
* **NodePort:** Requires static IPs and opening non-standard ports on the firewall, which violates our security constraints.
* **VPN (e.g., Wireguard/Tailscale):** While secure, it introduces significant complexity (sidecars, key management) and external dependencies.
* **TLS Passthrough:** Leverages the existing Ingress/Router infrastructure already present in OKD. It requires zero additional software and respects multi-tenancy (Routes are namespaced).
### Configuration Philosophy (YAGNI)
The current design exposes a **generic configuration surface**. Users can configure standard parameters (Storage size, CPU/Memory requests, Postgres version).
**We explicitly do not expose advanced CNPG or PostgreSQL configurations at this stage.**
* **Reasoning:** We aim to keep the API surface small and manageable.
* **Future Path:** We plan to implement a "pass-through" mechanism to allow sending raw config maps or custom parameters to the underlying engine (CNPG) *only when a concrete use case arises*. Until then, we adhere to the **YAGNI (You Ain't Gonna Need It)** principle to avoid premature optimization and API bloat.
## 4. Usage Guide
To deploy a multi-site cluster, apply the `MultisitePostgreSQL` resource to the Harmony Control Plane.
### Example Manifest
```yaml
apiVersion: harmony.io/v1alpha1
kind: MultisitePostgreSQL
metadata:
name: finance-db
namespace: tenant-a
spec:
version: "15"
storage: "10Gi"
resources:
requests:
cpu: "500m"
memory: "1Gi"
# Topology Definition
topology:
primary:
site: "site-paris" # The name of the cluster in Harmony
replicas:
- site: "site-newyork"
```
### What happens next?
1. Harmony detects the CR.
2. **On Site Paris:** It deploys a CNPG Cluster (Primary) and creates a Passthrough Route `postgres-finance-db.apps.site-paris.example.com`.
3. **On Site New York:** It deploys a CNPG Cluster (Replica) configured with `externalClusters` pointing to the Paris Route.
4. Data begins replicating immediately over the encrypted channel.
## 5. Troubleshooting
* **Connection Refused:** Ensure the Primary site's Route is successfully admitted by the Ingress Controller.
* **Certificate Errors:** CNPG manages mTLS automatically. If errors persist, ensure the CA secrets were correctly propagated by Harmony from Primary to Replica namespaces.

BIN
empty_database.sqlite Normal file

Binary file not shown.

View File

@@ -27,6 +27,7 @@ async fn main() {
};
let application = Arc::new(RustWebapp {
name: "example-monitoring".to_string(),
dns: "example-monitoring.harmony.mcd".to_string(),
project_root: PathBuf::from("./examples/rust/webapp"),
framework: Some(RustWebFramework::Leptos),
service_port: 3000,

View File

@@ -0,0 +1,20 @@
[package]
name = "brocade-snmp-server"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { path = "../../harmony" }
brocade = { path = "../../brocade" }
harmony_secret = { path = "../../harmony_secret" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
harmony_macros = { path = "../../harmony_macros" }
tokio = { workspace = true }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
base64.workspace = true
serde.workspace = true

View File

@@ -0,0 +1,22 @@
use std::net::{IpAddr, Ipv4Addr};
use harmony::{
inventory::Inventory, modules::brocade::BrocadeEnableSnmpScore, topology::K8sAnywhereTopology,
};
#[tokio::main]
async fn main() {
let brocade_snmp_server = BrocadeEnableSnmpScore {
switch_ips: vec![IpAddr::V4(Ipv4Addr::new(192, 168, 1, 111))],
dry_run: true,
};
harmony_cli::run(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
vec![Box::new(brocade_snmp_server)],
None,
)
.await
.unwrap();
}

View File

@@ -0,0 +1,19 @@
[package]
name = "brocade-switch"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_macros = { path = "../../harmony_macros" }
harmony_types = { path = "../../harmony_types" }
tokio.workspace = true
url.workspace = true
async-trait.workspace = true
serde.workspace = true
log.workspace = true
env_logger.workspace = true
brocade = { path = "../../brocade" }

View File

@@ -0,0 +1,50 @@
use std::str::FromStr;
use brocade::{BrocadeOptions, PortOperatingMode};
use harmony::{
infra::brocade::BrocadeSwitchConfig,
inventory::Inventory,
modules::brocade::{BrocadeSwitchAuth, BrocadeSwitchScore, SwitchTopology},
};
use harmony_macros::ip;
use harmony_types::{id::Id, switch::PortLocation};
fn get_switch_config() -> BrocadeSwitchConfig {
let mut options = BrocadeOptions::default();
options.ssh.port = 2222;
let auth = BrocadeSwitchAuth {
username: "admin".to_string(),
password: "password".to_string(),
};
BrocadeSwitchConfig {
ips: vec![ip!("127.0.0.1")],
auth,
options,
}
}
#[tokio::main]
async fn main() {
let switch_score = BrocadeSwitchScore {
port_channels_to_clear: vec![
Id::from_str("17").unwrap(),
Id::from_str("19").unwrap(),
Id::from_str("18").unwrap(),
],
ports_to_configure: vec![
(PortLocation(2, 0, 17), PortOperatingMode::Trunk),
(PortLocation(2, 0, 19), PortOperatingMode::Trunk),
(PortLocation(1, 0, 18), PortOperatingMode::Trunk),
],
};
harmony_cli::run(
Inventory::autoload(),
SwitchTopology::new(get_switch_config()).await,
vec![Box::new(switch_score)],
None,
)
.await
.unwrap();
}

View File

@@ -1,5 +1,5 @@
[package]
name = "example-nanodc"
name = "cert_manager"
edition = "2024"
version.workspace = true
readme.workspace = true
@@ -8,12 +8,12 @@ publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_tui = { path = "../../harmony_tui" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
harmony_secret = { path = "../../harmony_secret" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
assert_cmd = "2.0.16"

View File

@@ -0,0 +1,42 @@
use harmony::{
inventory::Inventory,
modules::cert_manager::{
capability::CertificateManagementConfig, score_cert_management::CertificateManagementScore,
score_certificate::CertificateScore, score_issuer::CertificateIssuerScore,
},
topology::K8sAnywhereTopology,
};
#[tokio::main]
async fn main() {
let config = CertificateManagementConfig {
namespace: Some("test".to_string()),
acme_issuer: None,
ca_issuer: None,
self_signed: true,
};
let issuer_name = "test-self-signed-issuer".to_string();
let issuer = CertificateIssuerScore {
issuer_name: issuer_name.clone(),
config: config.clone(),
};
let cert = CertificateScore {
config: config.clone(),
issuer_name,
cert_name: "test-self-signed-cert".to_string(),
common_name: None,
dns_names: Some(vec!["test.dns.name".to_string()]),
is_ca: Some(false),
};
harmony_cli::run(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
vec![Box::new(issuer), Box::new(cert)],
None,
)
.await
.unwrap();
}

View File

@@ -2,7 +2,7 @@ use harmony::{
inventory::Inventory,
modules::{
dummy::{ErrorScore, PanicScore, SuccessScore},
inventory::LaunchDiscoverInventoryAgentScore,
inventory::{HarmonyDiscoveryStrategy, LaunchDiscoverInventoryAgentScore},
},
topology::LocalhostTopology,
};
@@ -18,6 +18,7 @@ async fn main() {
Box::new(PanicScore {}),
Box::new(LaunchDiscoverInventoryAgentScore {
discovery_timeout: Some(10),
discovery_strategy: HarmonyDiscoveryStrategy::MDNS,
}),
],
None,

View File

@@ -0,0 +1,15 @@
[package]
name = "harmony_inventory_builder"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_macros = { path = "../../harmony_macros" }
harmony_types = { path = "../../harmony_types" }
tokio.workspace = true
url.workspace = true
cidr.workspace = true

View File

@@ -0,0 +1,11 @@
cargo build -p harmony_inventory_builder --release --target x86_64-unknown-linux-musl
SCRIPT_DIR="$(dirname ${0})"
cd "${SCRIPT_DIR}/docker/"
cp ../../../target/x86_64-unknown-linux-musl/release/harmony_inventory_builder .
docker build . -t hub.nationtech.io/harmony/harmony_inventory_builder
docker push hub.nationtech.io/harmony/harmony_inventory_builder

View File

@@ -0,0 +1,10 @@
FROM debian:12-slim
RUN mkdir /app
WORKDIR /app/
COPY harmony_inventory_builder /app/
ENV RUST_LOG=info
CMD ["sleep", "infinity"]

View File

@@ -0,0 +1,37 @@
use harmony::{
inventory::{HostRole, Inventory},
modules::inventory::{DiscoverHostForRoleScore, HarmonyDiscoveryStrategy},
topology::LocalhostTopology,
};
use harmony_macros::cidrv4;
#[tokio::main]
async fn main() {
let discover_worker = DiscoverHostForRoleScore {
role: HostRole::Worker,
number_desired_hosts: 3,
discovery_strategy: HarmonyDiscoveryStrategy::SUBNET {
cidr: cidrv4!("192.168.2.0/24"),
port: 25000,
},
};
let discover_control_plane = DiscoverHostForRoleScore {
role: HostRole::ControlPlane,
number_desired_hosts: 3,
discovery_strategy: HarmonyDiscoveryStrategy::SUBNET {
cidr: cidrv4!("192.168.2.0/24"),
port: 25000,
},
};
harmony_cli::run(
Inventory::autoload(),
LocalhostTopology::new(),
vec![Box::new(discover_worker)],
//vec![Box::new(discover_worker), Box::new(discover_control_plane)],
None,
)
.await
.unwrap();
}

View File

@@ -0,0 +1,20 @@
[package]
name = "example-k8s-drain-node"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr.workspace = true
tokio.workspace = true
harmony_macros = { path = "../../harmony_macros" }
log.workspace = true
env_logger.workspace = true
url.workspace = true
assert_cmd = "2.0.16"
inquire.workspace = true

View File

@@ -0,0 +1,61 @@
use std::time::Duration;
use harmony::topology::k8s::{DrainOptions, K8sClient};
use log::{info, trace};
#[tokio::main]
async fn main() {
env_logger::init();
let k8s = K8sClient::try_default().await.unwrap();
let nodes = k8s.get_nodes(None).await.unwrap();
trace!("Got nodes : {nodes:#?}");
let node_names = nodes
.iter()
.map(|n| n.metadata.name.as_ref().unwrap())
.collect::<Vec<&String>>();
info!("Got nodes : {:?}", node_names);
let node_name = inquire::Select::new("What node do you want to operate on?", node_names)
.prompt()
.unwrap();
let drain = inquire::Confirm::new("Do you wish to drain the node now ?")
.prompt()
.unwrap();
if drain {
let mut options = DrainOptions::default_ignore_daemonset_delete_emptydir_data();
options.timeout = Duration::from_secs(1);
k8s.drain_node(&node_name, &options).await.unwrap();
info!("Node {node_name} successfully drained");
}
let uncordon =
inquire::Confirm::new("Do you wish to uncordon node to resume scheduling workloads now?")
.prompt()
.unwrap();
if uncordon {
info!("Uncordoning node {node_name}");
k8s.uncordon_node(node_name).await.unwrap();
info!("Node {node_name} uncordoned");
}
let reboot = inquire::Confirm::new("Do you wish to reboot node now?")
.prompt()
.unwrap();
if reboot {
k8s.reboot_node(
&node_name,
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
Duration::from_secs(3600),
)
.await
.unwrap();
}
info!("All done playing with nodes, happy harmonizing!");
}

View File

@@ -0,0 +1,20 @@
[package]
name = "example-k8s-write-file-on-node"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr.workspace = true
tokio.workspace = true
harmony_macros = { path = "../../harmony_macros" }
log.workspace = true
env_logger.workspace = true
url.workspace = true
assert_cmd = "2.0.16"
inquire.workspace = true

View File

@@ -0,0 +1,45 @@
use harmony::topology::k8s::{DrainOptions, K8sClient, NodeFile};
use log::{info, trace};
#[tokio::main]
async fn main() {
env_logger::init();
let k8s = K8sClient::try_default().await.unwrap();
let nodes = k8s.get_nodes(None).await.unwrap();
trace!("Got nodes : {nodes:#?}");
let node_names = nodes
.iter()
.map(|n| n.metadata.name.as_ref().unwrap())
.collect::<Vec<&String>>();
info!("Got nodes : {:?}", node_names);
let node = inquire::Select::new("What node do you want to write file to?", node_names)
.prompt()
.unwrap();
let path = inquire::Text::new("File path on node").prompt().unwrap();
let content = inquire::Text::new("File content").prompt().unwrap();
let node_file = NodeFile {
path: path,
content: content,
mode: 0o600,
};
k8s.write_files_to_node(&node, &vec![node_file.clone()])
.await
.unwrap();
let cmd = inquire::Text::new("Command to run on node")
.prompt()
.unwrap();
k8s.run_privileged_command_on_node(&node, &cmd)
.await
.unwrap();
info!(
"File {} mode {} written in node {node}",
node_file.path, node_file.mode
);
}

View File

@@ -24,13 +24,14 @@ use harmony::{
},
topology::K8sAnywhereTopology,
};
use harmony_types::net::Url;
use harmony_types::{k8s_name::K8sName, net::Url};
#[tokio::main]
async fn main() {
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
name: K8sName("test-discord".to_string()),
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
selectors: vec![],
};
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();

View File

@@ -22,8 +22,8 @@ use harmony::{
tenant::{ResourceLimits, TenantConfig, TenantNetworkPolicy},
},
};
use harmony_types::id::Id;
use harmony_types::net::Url;
use harmony_types::{id::Id, k8s_name::K8sName};
#[tokio::main]
async fn main() {
@@ -43,8 +43,9 @@ async fn main() {
};
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
name: K8sName("test-discord".to_string()),
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
selectors: vec![],
};
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();

View File

@@ -0,0 +1,18 @@
[package]
name = "example-multisite-postgres"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }

View File

@@ -0,0 +1,3 @@
export HARMONY_FAILOVER_TOPOLOGY_K8S_PRIMARY="context=default/api-your-openshift-cluster:6443/kube:admin"
export HARMONY_FAILOVER_TOPOLOGY_K8S_REPLICA="context=someuser/somecluster"
export RUST_LOG="harmony=debug"

View File

@@ -0,0 +1,28 @@
use harmony::{
inventory::Inventory,
modules::postgresql::{PublicPostgreSQLScore, capability::PostgreSQLConfig},
topology::{FailoverTopology, K8sAnywhereTopology},
};
#[tokio::main]
async fn main() {
// env_logger::init();
let postgres = PublicPostgreSQLScore {
config: PostgreSQLConfig {
cluster_name: "harmony-postgres-example".to_string(), // Override default name
namespace: "harmony-public-postgres".to_string(),
..Default::default() // Use harmony defaults, they are based on CNPG's default values :
// "default" namespace, 1 instance, 1Gi storage
},
hostname: "postgrestest.sto1.nationtech.io".to_string(),
};
harmony_cli::run(
Inventory::autoload(),
FailoverTopology::<K8sAnywhereTopology>::from_env(),
vec![Box::new(postgres)],
None,
)
.await
.unwrap();
}

View File

@@ -1,4 +0,0 @@
#!/bin/bash
helm install --create-namespace --namespace rook-ceph rook-ceph-cluster \
--set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster -f values.yaml

View File

@@ -1,721 +0,0 @@
# Default values for a single rook-ceph cluster
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# -- Namespace of the main rook operator
operatorNamespace: rook-ceph
# -- The metadata.name of the CephCluster CR
# @default -- The same as the namespace
clusterName:
# -- Optional override of the target kubernetes version
kubeVersion:
# -- Cluster ceph.conf override
configOverride:
# configOverride: |
# [global]
# mon_allow_pool_delete = true
# osd_pool_default_size = 3
# osd_pool_default_min_size = 2
# Installs a debugging toolbox deployment
toolbox:
# -- Enable Ceph debugging pod deployment. See [toolbox](../Troubleshooting/ceph-toolbox.md)
enabled: true
# -- Toolbox image, defaults to the image used by the Ceph cluster
image: #quay.io/ceph/ceph:v19.2.2
# -- Toolbox tolerations
tolerations: []
# -- Toolbox affinity
affinity: {}
# -- Toolbox container security context
containerSecurityContext:
runAsNonRoot: true
runAsUser: 2016
runAsGroup: 2016
capabilities:
drop: ["ALL"]
# -- Toolbox resources
resources:
limits:
memory: "1Gi"
requests:
cpu: "100m"
memory: "128Mi"
# -- Set the priority class for the toolbox if desired
priorityClassName:
monitoring:
# -- Enable Prometheus integration, will also create necessary RBAC rules to allow Operator to create ServiceMonitors.
# Monitoring requires Prometheus to be pre-installed
enabled: false
# -- Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled
metricsDisabled: false
# -- Whether to create the Prometheus rules for Ceph alerts
createPrometheusRules: false
# -- The namespace in which to create the prometheus rules, if different from the rook cluster namespace.
# If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus
# deployed) to set rulesNamespaceOverride for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions.
rulesNamespaceOverride:
# Monitoring settings for external clusters:
# externalMgrEndpoints: <list of endpoints>
# externalMgrPrometheusPort: <port>
# Scrape interval for prometheus
# interval: 10s
# allow adding custom labels and annotations to the prometheus rule
prometheusRule:
# -- Labels applied to PrometheusRule
labels: {}
# -- Annotations applied to PrometheusRule
annotations: {}
# -- Create & use PSP resources. Set this to the same value as the rook-ceph chart.
pspEnable: false
# imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts.
# imagePullSecrets:
# - name: my-registry-secret
# All values below are taken from the CephCluster CRD
# -- Cluster configuration.
# @default -- See [below](#ceph-cluster-spec)
cephClusterSpec:
# This cluster spec example is for a converged cluster where all the Ceph daemons are running locally,
# as in the host-based example (cluster.yaml). For a different configuration such as a
# PVC-based cluster (cluster-on-pvc.yaml), external cluster (cluster-external.yaml),
# or stretch cluster (cluster-stretched.yaml), replace this entire `cephClusterSpec`
# with the specs from those examples.
# For more details, check https://rook.io/docs/rook/v1.10/CRDs/Cluster/ceph-cluster-crd/
cephVersion:
# The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw).
# v18 is Reef, v19 is Squid
# RECOMMENDATION: In production, use a specific version tag instead of the general v18 flag, which pulls the latest release and could result in different
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
# If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v19.2.2-20250409
# This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
image: quay.io/ceph/ceph:v19.2.2
# Whether to allow unsupported versions of Ceph. Currently Reef and Squid are supported.
# Future versions such as Tentacle (v20) would require this to be set to `true`.
# Do not set to true in production.
allowUnsupported: false
# The path on the host where configuration files will be persisted. Must be specified. If there are multiple clusters, the directory must be unique for each cluster.
# Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster.
# In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment.
dataDirHostPath: /var/lib/rook
# Whether or not upgrade should continue even if a check fails
# This means Ceph's status could be degraded and we don't recommend upgrading but you might decide otherwise
# Use at your OWN risk
# To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/v1.10/Upgrade/ceph-upgrade/
skipUpgradeChecks: false
# Whether or not continue if PGs are not clean during an upgrade
continueUpgradeAfterChecksEvenIfNotHealthy: false
# WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart.
# If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one
# if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then operator would
# continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`.
# The default wait timeout is 10 minutes.
waitTimeoutForHealthyOSDInMinutes: 10
# Whether or not requires PGs are clean before an OSD upgrade. If set to `true` OSD upgrade process won't start until PGs are healthy.
# This configuration will be ignored if `skipUpgradeChecks` is `true`.
# Default is false.
upgradeOSDRequiresHealthyPGs: false
mon:
# Set the number of mons to be started. Generally recommended to be 3.
# For highest availability, an odd number of mons should be specified.
count: 3
# The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason.
# Mons should only be allowed on the same node for test environments where data loss is acceptable.
allowMultiplePerNode: false
mgr:
# When higher availability of the mgr is needed, increase the count to 2.
# In that case, one mgr will be active and one in standby. When Ceph updates which
# mgr is active, Rook will update the mgr services to match the active mgr.
count: 2
allowMultiplePerNode: false
modules:
# List of modules to optionally enable or disable.
# Note the "dashboard" and "monitoring" modules are already configured by other settings in the cluster CR.
# - name: rook
# enabled: true
# enable the ceph dashboard for viewing cluster status
dashboard:
enabled: true
# serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy)
# urlPrefix: /ceph-dashboard
# serve the dashboard at the given port.
# port: 8443
# Serve the dashboard using SSL (if using ingress to expose the dashboard and `ssl: true` you need to set
# the corresponding "backend protocol" annotation(s) for your ingress controller of choice)
ssl: true
# Network configuration, see: https://github.com/rook/rook/blob/master/Documentation/CRDs/Cluster/ceph-cluster-crd.md#network-configuration-settings
network:
connections:
# Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network.
# The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted.
# When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check.
# IMPORTANT: Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only,
# you can set the "mounter: rbd-nbd" in the rbd storage class, or "mounter: fuse" in the cephfs storage class.
# The nbd and fuse drivers are *not* recommended in production since restarting the csi driver pod will disconnect the volumes.
encryption:
enabled: false
# Whether to compress the data in transit across the wire. The default is false.
# The kernel requirements above for encryption also apply to compression.
compression:
enabled: false
# Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled
# and clients will be required to connect to the Ceph cluster with the v2 port (3300).
# Requires a kernel that supports msgr v2 (kernel 5.11 or CentOS 8.4 or newer).
requireMsgr2: false
# # enable host networking
# provider: host
# # EXPERIMENTAL: enable the Multus network provider
# provider: multus
# selectors:
# # The selector keys are required to be `public` and `cluster`.
# # Based on the configuration, the operator will do the following:
# # 1. if only the `public` selector key is specified both public_network and cluster_network Ceph settings will listen on that interface
# # 2. if both `public` and `cluster` selector keys are specified the first one will point to 'public_network' flag and the second one to 'cluster_network'
# #
# # In order to work, each selector value must match a NetworkAttachmentDefinition object in Multus
# #
# # public: public-conf --> NetworkAttachmentDefinition object name in Multus
# # cluster: cluster-conf --> NetworkAttachmentDefinition object name in Multus
# # Provide internet protocol version. IPv6, IPv4 or empty string are valid options. Empty string would mean IPv4
# ipFamily: "IPv6"
# # Ceph daemons to listen on both IPv4 and Ipv6 networks
# dualStack: false
# enable the crash collector for ceph daemon crash collection
crashCollector:
disable: false
# Uncomment daysToRetain to prune ceph crash entries older than the
# specified number of days.
# daysToRetain: 30
# enable log collector, daemons will log on files and rotate
logCollector:
enabled: true
periodicity: daily # one of: hourly, daily, weekly, monthly
maxLogSize: 500M # SUFFIX may be 'M' or 'G'. Must be at least 1M.
# automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction.
cleanupPolicy:
# Since cluster cleanup is destructive to data, confirmation is required.
# To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data".
# This value should only be set when the cluster is about to be deleted. After the confirmation is set,
# Rook will immediately stop configuring the cluster and only wait for the delete command.
# If the empty string is set, Rook will not destroy any data on hosts during uninstall.
confirmation: ""
# sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion
sanitizeDisks:
# method indicates if the entire disk should be sanitized or simply ceph's metadata
# in both case, re-install is possible
# possible choices are 'complete' or 'quick' (default)
method: quick
# dataSource indicate where to get random bytes from to write on the disk
# possible choices are 'zero' (default) or 'random'
# using random sources will consume entropy from the system and will take much more time then the zero source
dataSource: zero
# iteration overwrite N times instead of the default (1)
# takes an integer value
iteration: 1
# allowUninstallWithVolumes defines how the uninstall should be performed
# If set to true, cephCluster deletion does not wait for the PVs to be deleted.
allowUninstallWithVolumes: false
# To control where various services will be scheduled by kubernetes, use the placement configuration sections below.
# The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and
# tolerate taints with a key of 'storage-node'.
# placement:
# all:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: role
# operator: In
# values:
# - storage-node
# podAffinity:
# podAntiAffinity:
# topologySpreadConstraints:
# tolerations:
# - key: storage-node
# operator: Exists
# # The above placement information can also be specified for mon, osd, and mgr components
# mon:
# # Monitor deployments may contain an anti-affinity rule for avoiding monitor
# # collocation on the same node. This is a required rule when host network is used
# # or when AllowMultiplePerNode is false. Otherwise this anti-affinity rule is a
# # preferred rule with weight: 50.
# osd:
# mgr:
# cleanup:
# annotations:
# all:
# mon:
# osd:
# cleanup:
# prepareosd:
# # If no mgr annotations are set, prometheus scrape annotations will be set by default.
# mgr:
# dashboard:
# labels:
# all:
# mon:
# osd:
# cleanup:
# mgr:
# prepareosd:
# # monitoring is a list of key-value pairs. It is injected into all the monitoring resources created by operator.
# # These labels can be passed as LabelSelector to Prometheus
# monitoring:
# dashboard:
resources:
mgr:
limits:
memory: "1Gi"
requests:
cpu: "500m"
memory: "512Mi"
mon:
limits:
memory: "2Gi"
requests:
cpu: "1000m"
memory: "1Gi"
osd:
limits:
memory: "4Gi"
requests:
cpu: "1000m"
memory: "4Gi"
prepareosd:
# limits: It is not recommended to set limits on the OSD prepare job
# since it's a one-time burst for memory that must be allowed to
# complete without an OOM kill. Note however that if a k8s
# limitRange guardrail is defined external to Rook, the lack of
# a limit here may result in a sync failure, in which case a
# limit should be added. 1200Mi may suffice for up to 15Ti
# OSDs ; for larger devices 2Gi may be required.
# cf. https://github.com/rook/rook/pull/11103
requests:
cpu: "500m"
memory: "50Mi"
mgr-sidecar:
limits:
memory: "100Mi"
requests:
cpu: "100m"
memory: "40Mi"
crashcollector:
limits:
memory: "60Mi"
requests:
cpu: "100m"
memory: "60Mi"
logcollector:
limits:
memory: "1Gi"
requests:
cpu: "100m"
memory: "100Mi"
cleanup:
limits:
memory: "1Gi"
requests:
cpu: "500m"
memory: "100Mi"
exporter:
limits:
memory: "128Mi"
requests:
cpu: "50m"
memory: "50Mi"
# The option to automatically remove OSDs that are out and are safe to destroy.
removeOSDsIfOutAndSafeToRemove: false
# priority classes to apply to ceph resources
priorityClassNames:
mon: system-node-critical
osd: system-node-critical
mgr: system-cluster-critical
storage: # cluster level storage configuration and selection
useAllNodes: true
useAllDevices: true
# deviceFilter:
# config:
# crushRoot: "custom-root" # specify a non-default root label for the CRUSH map
# metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore.
# databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB
# osdsPerDevice: "1" # this value can be overridden at the node or device level
# encryptedDevice: "true" # the default value for this option is "false"
# # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
# # nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label.
# nodes:
# - name: "172.17.4.201"
# devices: # specific devices to use for storage can be specified for each node
# - name: "sdb"
# - name: "nvme01" # multiple osds can be created on high performance devices
# config:
# osdsPerDevice: "5"
# - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths
# config: # configuration can be specified at the node level which overrides the cluster level config
# - name: "172.17.4.301"
# deviceFilter: "^sd."
# The section for configuring management of daemon disruptions during upgrade or fencing.
disruptionManagement:
# If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically
# via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will
# block eviction of OSDs by default and unblock them safely when drains are detected.
managePodBudgets: true
# A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the
# default DOWN/OUT interval) when it is draining. This is only relevant when `managePodBudgets` is `true`. The default value is `30` minutes.
osdMaintenanceTimeout: 30
# Configure the healthcheck and liveness probes for ceph pods.
# Valid values for daemons are 'mon', 'osd', 'status'
healthCheck:
daemonHealth:
mon:
disabled: false
interval: 45s
osd:
disabled: false
interval: 60s
status:
disabled: false
interval: 60s
# Change pod liveness probe, it works for all mon, mgr, and osd pods.
livenessProbe:
mon:
disabled: false
mgr:
disabled: false
osd:
disabled: false
ingress:
# -- Enable an ingress for the ceph-dashboard
dashboard:
# {}
# labels:
# external-dns/private: "true"
annotations:
"route.openshift.io/termination": "passthrough"
# external-dns.alpha.kubernetes.io/hostname: dashboard.example.com
# nginx.ingress.kubernetes.io/rewrite-target: /ceph-dashboard/$2
# If the dashboard has ssl: true the following will make sure the NGINX Ingress controller can expose the dashboard correctly
# nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
# nginx.ingress.kubernetes.io/server-snippet: |
# proxy_ssl_verify off;
host:
name: ceph.apps.ncd0.harmony.mcd
path: null # TODO the chart does not allow removing the path, and it causes openshift to fail creating a route, because path is not supported with termination mode passthrough
pathType: ImplementationSpecific
tls:
- {}
# secretName: testsecret-tls
# Note: Only one of ingress class annotation or the `ingressClassName:` can be used at a time
# to set the ingress class
# ingressClassName: openshift-default
# labels:
# external-dns/private: "true"
# annotations:
# external-dns.alpha.kubernetes.io/hostname: dashboard.example.com
# nginx.ingress.kubernetes.io/rewrite-target: /ceph-dashboard/$2
# If the dashboard has ssl: true the following will make sure the NGINX Ingress controller can expose the dashboard correctly
# nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
# nginx.ingress.kubernetes.io/server-snippet: |
# proxy_ssl_verify off;
# host:
# name: dashboard.example.com
# path: "/ceph-dashboard(/|$)(.*)"
# pathType: Prefix
# tls:
# - hosts:
# - dashboard.example.com
# secretName: testsecret-tls
## Note: Only one of ingress class annotation or the `ingressClassName:` can be used at a time
## to set the ingress class
# ingressClassName: nginx
# -- A list of CephBlockPool configurations to deploy
# @default -- See [below](#ceph-block-pools)
cephBlockPools:
- name: ceph-blockpool
# see https://github.com/rook/rook/blob/master/Documentation/CRDs/Block-Storage/ceph-block-pool-crd.md#spec for available configuration
spec:
failureDomain: host
replicated:
size: 3
# Enables collecting RBD per-image IO statistics by enabling dynamic OSD performance counters. Defaults to false.
# For reference: https://docs.ceph.com/docs/latest/mgr/prometheus/#rbd-io-statistics
# enableRBDStats: true
storageClass:
enabled: true
name: ceph-block
annotations: {}
labels: {}
isDefault: true
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: "Immediate"
mountOptions: []
# see https://kubernetes.io/docs/concepts/storage/storage-classes/#allowed-topologies
allowedTopologies: []
# - matchLabelExpressions:
# - key: rook-ceph-role
# values:
# - storage-node
# see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Block-Storage-RBD/block-storage.md#provision-storage for available configuration
parameters:
# (optional) mapOptions is a comma-separated list of map options.
# For krbd options refer
# https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
# For nbd options refer
# https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
# mapOptions: lock_on_read,queue_depth=1024
# (optional) unmapOptions is a comma-separated list of unmap options.
# For krbd options refer
# https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
# For nbd options refer
# https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
# unmapOptions: force
# RBD image format. Defaults to "2".
imageFormat: "2"
# RBD image features, equivalent to OR'd bitfield value: 63
# Available for imageFormat: "2". Older releases of CSI RBD
# support only the `layering` feature. The Linux kernel (KRBD) supports the
# full feature complement as of 5.4
imageFeatures: layering
# These secrets contain Ceph admin credentials.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: "{{ .Release.Namespace }}"
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: "{{ .Release.Namespace }}"
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
csi.storage.k8s.io/node-stage-secret-namespace: "{{ .Release.Namespace }}"
# Specify the filesystem type of the volume. If not specified, csi-provisioner
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
# in hyperconverged settings where the volume is mounted on the same node as the osds.
csi.storage.k8s.io/fstype: ext4
# -- A list of CephFileSystem configurations to deploy
# @default -- See [below](#ceph-file-systems)
cephFileSystems:
- name: ceph-filesystem
# see https://github.com/rook/rook/blob/master/Documentation/CRDs/Shared-Filesystem/ceph-filesystem-crd.md#filesystem-settings for available configuration
spec:
metadataPool:
replicated:
size: 3
dataPools:
- failureDomain: host
replicated:
size: 3
# Optional and highly recommended, 'data0' by default, see https://github.com/rook/rook/blob/master/Documentation/CRDs/Shared-Filesystem/ceph-filesystem-crd.md#pools
name: data0
metadataServer:
activeCount: 1
activeStandby: true
resources:
limits:
memory: "4Gi"
requests:
cpu: "1000m"
memory: "4Gi"
priorityClassName: system-cluster-critical
storageClass:
enabled: true
isDefault: false
name: ceph-filesystem
# (Optional) specify a data pool to use, must be the name of one of the data pools above, 'data0' by default
pool: data0
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: "Immediate"
annotations: {}
labels: {}
mountOptions: []
# see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage.md#provision-storage for available configuration
parameters:
# The secrets contain Ceph admin credentials.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: "{{ .Release.Namespace }}"
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: "{{ .Release.Namespace }}"
csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
csi.storage.k8s.io/node-stage-secret-namespace: "{{ .Release.Namespace }}"
# Specify the filesystem type of the volume. If not specified, csi-provisioner
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
# in hyperconverged settings where the volume is mounted on the same node as the osds.
csi.storage.k8s.io/fstype: ext4
# -- Settings for the filesystem snapshot class
# @default -- See [CephFS Snapshots](../Storage-Configuration/Ceph-CSI/ceph-csi-snapshot.md#cephfs-snapshots)
cephFileSystemVolumeSnapshotClass:
enabled: false
name: ceph-filesystem
isDefault: true
deletionPolicy: Delete
annotations: {}
labels: {}
# see https://rook.io/docs/rook/v1.10/Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#cephfs-snapshots for available configuration
parameters: {}
# -- Settings for the block pool snapshot class
# @default -- See [RBD Snapshots](../Storage-Configuration/Ceph-CSI/ceph-csi-snapshot.md#rbd-snapshots)
cephBlockPoolsVolumeSnapshotClass:
enabled: false
name: ceph-block
isDefault: false
deletionPolicy: Delete
annotations: {}
labels: {}
# see https://rook.io/docs/rook/v1.10/Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#rbd-snapshots for available configuration
parameters: {}
# -- A list of CephObjectStore configurations to deploy
# @default -- See [below](#ceph-object-stores)
cephObjectStores:
- name: ceph-objectstore
# see https://github.com/rook/rook/blob/master/Documentation/CRDs/Object-Storage/ceph-object-store-crd.md#object-store-settings for available configuration
spec:
metadataPool:
failureDomain: host
replicated:
size: 3
dataPool:
failureDomain: host
erasureCoded:
dataChunks: 2
codingChunks: 1
parameters:
bulk: "true"
preservePoolsOnDelete: true
gateway:
port: 80
resources:
limits:
memory: "2Gi"
requests:
cpu: "1000m"
memory: "1Gi"
# securePort: 443
# sslCertificateRef:
instances: 1
priorityClassName: system-cluster-critical
# opsLogSidecar:
# resources:
# limits:
# memory: "100Mi"
# requests:
# cpu: "100m"
# memory: "40Mi"
storageClass:
enabled: true
name: ceph-bucket
reclaimPolicy: Delete
volumeBindingMode: "Immediate"
annotations: {}
labels: {}
# see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim.md#storageclass for available configuration
parameters:
# note: objectStoreNamespace and objectStoreName are configured by the chart
region: us-east-1
ingress:
# Enable an ingress for the ceph-objectstore
enabled: true
# The ingress port by default will be the object store's "securePort" (if set), or the gateway "port".
# To override those defaults, set this ingress port to the desired port.
# port: 80
# annotations: {}
host:
name: objectstore.apps.ncd0.harmony.mcd
path: /
pathType: Prefix
# tls:
# - hosts:
# - objectstore.example.com
# secretName: ceph-objectstore-tls
# ingressClassName: nginx
## cephECBlockPools are disabled by default, please remove the comments and set desired values to enable it
## For erasure coded a replicated metadata pool is required.
## https://rook.io/docs/rook/latest/CRDs/Shared-Filesystem/ceph-filesystem-crd/#erasure-coded
#cephECBlockPools:
# - name: ec-pool
# spec:
# metadataPool:
# replicated:
# size: 2
# dataPool:
# failureDomain: osd
# erasureCoded:
# dataChunks: 2
# codingChunks: 1
# deviceClass: hdd
#
# parameters:
# # clusterID is the namespace where the rook cluster is running
# # If you change this namespace, also change the namespace below where the secret namespaces are defined
# clusterID: rook-ceph # namespace:cluster
# # (optional) mapOptions is a comma-separated list of map options.
# # For krbd options refer
# # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
# # For nbd options refer
# # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
# # mapOptions: lock_on_read,queue_depth=1024
#
# # (optional) unmapOptions is a comma-separated list of unmap options.
# # For krbd options refer
# # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
# # For nbd options refer
# # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
# # unmapOptions: force
#
# # RBD image format. Defaults to "2".
# imageFormat: "2"
#
# # RBD image features, equivalent to OR'd bitfield value: 63
# # Available for imageFormat: "2". Older releases of CSI RBD
# # support only the `layering` feature. The Linux kernel (KRBD) supports the
# # full feature complement as of 5.4
# # imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock
# imageFeatures: layering
#
# storageClass:
# provisioner: rook-ceph.rbd.csi.ceph.com # csi-provisioner-name
# enabled: true
# name: rook-ceph-block
# isDefault: false
# annotations: { }
# labels: { }
# allowVolumeExpansion: true
# reclaimPolicy: Delete
# -- CSI driver name prefix for cephfs, rbd and nfs.
# @default -- `namespace name where rook-ceph operator is deployed`
csiDriverNamePrefix:

View File

@@ -1,3 +0,0 @@
#!/bin/bash
helm repo add rook-release https://charts.rook.io/release
helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph -f values.yaml

View File

@@ -1,674 +0,0 @@
# Default values for rook-ceph-operator
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
image:
# -- Image
repository: docker.io/rook/ceph
# -- Image tag
# @default -- `master`
tag: v1.17.1
# -- Image pull policy
pullPolicy: IfNotPresent
crds:
# -- Whether the helm chart should create and update the CRDs. If false, the CRDs must be
# managed independently with deploy/examples/crds.yaml.
# **WARNING** Only set during first deployment. If later disabled the cluster may be DESTROYED.
# If the CRDs are deleted in this case, see
# [the disaster recovery guide](https://rook.io/docs/rook/latest/Troubleshooting/disaster-recovery/#restoring-crds-after-deletion)
# to restore them.
enabled: true
# -- Pod resource requests & limits
resources:
limits:
memory: 512Mi
requests:
cpu: 200m
memory: 128Mi
# -- Kubernetes [`nodeSelector`](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector) to add to the Deployment.
nodeSelector: {}
# Constraint rook-ceph-operator Deployment to nodes with label `disktype: ssd`.
# For more info, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector
# disktype: ssd
# -- List of Kubernetes [`tolerations`](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) to add to the Deployment.
tolerations: []
# -- Delay to use for the `node.kubernetes.io/unreachable` pod failure toleration to override
# the Kubernetes default of 5 minutes
unreachableNodeTolerationSeconds: 5
# -- Whether the operator should watch cluster CRD in its own namespace or not
currentNamespaceOnly: false
# -- Custom pod labels for the operator
operatorPodLabels: {}
# -- Pod annotations
annotations: {}
# -- Global log level for the operator.
# Options: `ERROR`, `WARNING`, `INFO`, `DEBUG`
logLevel: INFO
# -- If true, create & use RBAC resources
rbacEnable: true
rbacAggregate:
# -- If true, create a ClusterRole aggregated to [user facing roles](https://kubernetes.io/docs/reference/access-authn-authz/rbac/#user-facing-roles) for objectbucketclaims
enableOBCs: false
# -- If true, create & use PSP resources
pspEnable: false
# -- Set the priority class for the rook operator deployment if desired
priorityClassName:
# -- Set the container security context for the operator
containerSecurityContext:
runAsNonRoot: true
runAsUser: 2016
runAsGroup: 2016
capabilities:
drop: ["ALL"]
# -- If true, loop devices are allowed to be used for osds in test clusters
allowLoopDevices: false
# Settings for whether to disable the drivers or other daemons if they are not
# needed
csi:
# -- Enable Ceph CSI RBD driver
enableRbdDriver: true
# -- Enable Ceph CSI CephFS driver
enableCephfsDriver: true
# -- Disable the CSI driver.
disableCsiDriver: "false"
# -- Enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary
# in some network configurations where the SDN does not provide access to an external cluster or
# there is significant drop in read/write performance
enableCSIHostNetwork: true
# -- Enable Snapshotter in CephFS provisioner pod
enableCephfsSnapshotter: true
# -- Enable Snapshotter in NFS provisioner pod
enableNFSSnapshotter: true
# -- Enable Snapshotter in RBD provisioner pod
enableRBDSnapshotter: true
# -- Enable Host mount for `/etc/selinux` directory for Ceph CSI nodeplugins
enablePluginSelinuxHostMount: false
# -- Enable Ceph CSI PVC encryption support
enableCSIEncryption: false
# -- Enable volume group snapshot feature. This feature is
# enabled by default as long as the necessary CRDs are available in the cluster.
enableVolumeGroupSnapshot: true
# -- PriorityClassName to be set on csi driver plugin pods
pluginPriorityClassName: system-node-critical
# -- PriorityClassName to be set on csi driver provisioner pods
provisionerPriorityClassName: system-cluster-critical
# -- Policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted.
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
rbdFSGroupPolicy: "File"
# -- Policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted.
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
cephFSFSGroupPolicy: "File"
# -- Policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted.
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
nfsFSGroupPolicy: "File"
# -- OMAP generator generates the omap mapping between the PV name and the RBD image
# which helps CSI to identify the rbd images for CSI operations.
# `CSI_ENABLE_OMAP_GENERATOR` needs to be enabled when we are using rbd mirroring feature.
# By default OMAP generator is disabled and when enabled, it will be deployed as a
# sidecar with CSI provisioner pod, to enable set it to true.
enableOMAPGenerator: false
# -- Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options.
# Set to "ms_mode=secure" when connections.encrypted is enabled in CephCluster CR
cephFSKernelMountOptions:
# -- Enable adding volume metadata on the CephFS subvolumes and RBD images.
# Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images.
# Hence enable metadata is false by default
enableMetadata: false
# -- Set replicas for csi provisioner deployment
provisionerReplicas: 2
# -- Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful
# in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster
clusterName:
# -- Set logging level for cephCSI containers maintained by the cephCSI.
# Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity.
logLevel: 0
# -- Set logging level for Kubernetes-csi sidecar containers.
# Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity.
# @default -- `0`
sidecarLogLevel:
# -- CSI driver name prefix for cephfs, rbd and nfs.
# @default -- `namespace name where rook-ceph operator is deployed`
csiDriverNamePrefix:
# -- CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate
# @default -- `RollingUpdate`
rbdPluginUpdateStrategy:
# -- A maxUnavailable parameter of CSI RBD plugin daemonset update strategy.
# @default -- `1`
rbdPluginUpdateStrategyMaxUnavailable:
# -- CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate
# @default -- `RollingUpdate`
cephFSPluginUpdateStrategy:
# -- A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy.
# @default -- `1`
cephFSPluginUpdateStrategyMaxUnavailable:
# -- CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate
# @default -- `RollingUpdate`
nfsPluginUpdateStrategy:
# -- Set GRPC timeout for csi containers (in seconds). It should be >= 120. If this value is not set or is invalid, it defaults to 150
grpcTimeoutInSeconds: 150
# -- Burst to use while communicating with the kubernetes apiserver.
kubeApiBurst:
# -- QPS to use while communicating with the kubernetes apiserver.
kubeApiQPS:
# -- The volume of the CephCSI RBD plugin DaemonSet
csiRBDPluginVolume:
# - name: lib-modules
# hostPath:
# path: /run/booted-system/kernel-modules/lib/modules/
# - name: host-nix
# hostPath:
# path: /nix
# -- The volume mounts of the CephCSI RBD plugin DaemonSet
csiRBDPluginVolumeMount:
# - name: host-nix
# mountPath: /nix
# readOnly: true
# -- The volume of the CephCSI CephFS plugin DaemonSet
csiCephFSPluginVolume:
# - name: lib-modules
# hostPath:
# path: /run/booted-system/kernel-modules/lib/modules/
# - name: host-nix
# hostPath:
# path: /nix
# -- The volume mounts of the CephCSI CephFS plugin DaemonSet
csiCephFSPluginVolumeMount:
# - name: host-nix
# mountPath: /nix
# readOnly: true
# -- CEPH CSI RBD provisioner resource requirement list
# csi-omap-generator resources will be applied only if `enableOMAPGenerator` is set to `true`
# @default -- see values.yaml
csiRBDProvisionerResource: |
- name : csi-provisioner
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-resizer
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-attacher
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-snapshotter
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-rbdplugin
resource:
requests:
memory: 512Mi
limits:
memory: 1Gi
- name : csi-omap-generator
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : liveness-prometheus
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
# -- CEPH CSI RBD plugin resource requirement list
# @default -- see values.yaml
csiRBDPluginResource: |
- name : driver-registrar
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
- name : csi-rbdplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : liveness-prometheus
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
# -- CEPH CSI CephFS provisioner resource requirement list
# @default -- see values.yaml
csiCephFSProvisionerResource: |
- name : csi-provisioner
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-resizer
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-attacher
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-snapshotter
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-cephfsplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : liveness-prometheus
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
# -- CEPH CSI CephFS plugin resource requirement list
# @default -- see values.yaml
csiCephFSPluginResource: |
- name : driver-registrar
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
- name : csi-cephfsplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : liveness-prometheus
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
# -- CEPH CSI NFS provisioner resource requirement list
# @default -- see values.yaml
csiNFSProvisionerResource: |
- name : csi-provisioner
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-nfsplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : csi-attacher
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
# -- CEPH CSI NFS plugin resource requirement list
# @default -- see values.yaml
csiNFSPluginResource: |
- name : driver-registrar
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
- name : csi-nfsplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
# Set provisionerTolerations and provisionerNodeAffinity for provisioner pod.
# The CSI provisioner would be best to start on the same nodes as other ceph daemons.
# -- Array of tolerations in YAML format which will be added to CSI provisioner deployment
provisionerTolerations:
# - key: key
# operator: Exists
# effect: NoSchedule
# -- The node labels for affinity of the CSI provisioner deployment [^1]
provisionerNodeAffinity: #key1=value1,value2; key2=value3
# Set pluginTolerations and pluginNodeAffinity for plugin daemonset pods.
# The CSI plugins need to be started on all the nodes where the clients need to mount the storage.
# -- Array of tolerations in YAML format which will be added to CephCSI plugin DaemonSet
pluginTolerations:
# - key: key
# operator: Exists
# effect: NoSchedule
# -- The node labels for affinity of the CephCSI RBD plugin DaemonSet [^1]
pluginNodeAffinity: # key1=value1,value2; key2=value3
# -- Enable Ceph CSI Liveness sidecar deployment
enableLiveness: false
# -- CSI CephFS driver metrics port
# @default -- `9081`
cephfsLivenessMetricsPort:
# -- CSI Addons server port
# @default -- `9070`
csiAddonsPort:
# -- CSI Addons server port for the RBD provisioner
# @default -- `9070`
csiAddonsRBDProvisionerPort:
# -- CSI Addons server port for the Ceph FS provisioner
# @default -- `9070`
csiAddonsCephFSProvisionerPort:
# -- Enable Ceph Kernel clients on kernel < 4.17. If your kernel does not support quotas for CephFS
# you may want to disable this setting. However, this will cause an issue during upgrades
# with the FUSE client. See the [upgrade guide](https://rook.io/docs/rook/v1.2/ceph-upgrade.html)
forceCephFSKernelClient: true
# -- Ceph CSI RBD driver metrics port
# @default -- `8080`
rbdLivenessMetricsPort:
serviceMonitor:
# -- Enable ServiceMonitor for Ceph CSI drivers
enabled: false
# -- Service monitor scrape interval
interval: 10s
# -- ServiceMonitor additional labels
labels: {}
# -- Use a different namespace for the ServiceMonitor
namespace:
# -- Kubelet root directory path (if the Kubelet uses a different path for the `--root-dir` flag)
# @default -- `/var/lib/kubelet`
kubeletDirPath:
# -- Duration in seconds that non-leader candidates will wait to force acquire leadership.
# @default -- `137s`
csiLeaderElectionLeaseDuration:
# -- Deadline in seconds that the acting leader will retry refreshing leadership before giving up.
# @default -- `107s`
csiLeaderElectionRenewDeadline:
# -- Retry period in seconds the LeaderElector clients should wait between tries of actions.
# @default -- `26s`
csiLeaderElectionRetryPeriod:
cephcsi:
# -- Ceph CSI image repository
repository: quay.io/cephcsi/cephcsi
# -- Ceph CSI image tag
tag: v3.14.0
registrar:
# -- Kubernetes CSI registrar image repository
repository: registry.k8s.io/sig-storage/csi-node-driver-registrar
# -- Registrar image tag
tag: v2.13.0
provisioner:
# -- Kubernetes CSI provisioner image repository
repository: registry.k8s.io/sig-storage/csi-provisioner
# -- Provisioner image tag
tag: v5.1.0
snapshotter:
# -- Kubernetes CSI snapshotter image repository
repository: registry.k8s.io/sig-storage/csi-snapshotter
# -- Snapshotter image tag
tag: v8.2.0
attacher:
# -- Kubernetes CSI Attacher image repository
repository: registry.k8s.io/sig-storage/csi-attacher
# -- Attacher image tag
tag: v4.8.0
resizer:
# -- Kubernetes CSI resizer image repository
repository: registry.k8s.io/sig-storage/csi-resizer
# -- Resizer image tag
tag: v1.13.1
# -- Image pull policy
imagePullPolicy: IfNotPresent
# -- Labels to add to the CSI CephFS Deployments and DaemonSets Pods
cephfsPodLabels: #"key1=value1,key2=value2"
# -- Labels to add to the CSI NFS Deployments and DaemonSets Pods
nfsPodLabels: #"key1=value1,key2=value2"
# -- Labels to add to the CSI RBD Deployments and DaemonSets Pods
rbdPodLabels: #"key1=value1,key2=value2"
csiAddons:
# -- Enable CSIAddons
enabled: false
# -- CSIAddons sidecar image repository
repository: quay.io/csiaddons/k8s-sidecar
# -- CSIAddons sidecar image tag
tag: v0.12.0
nfs:
# -- Enable the nfs csi driver
enabled: false
topology:
# -- Enable topology based provisioning
enabled: false
# NOTE: the value here serves as an example and needs to be
# updated with node labels that define domains of interest
# -- domainLabels define which node labels to use as domains
# for CSI nodeplugins to advertise their domains
domainLabels:
# - kubernetes.io/hostname
# - topology.kubernetes.io/zone
# - topology.rook.io/rack
# -- Whether to skip any attach operation altogether for CephFS PVCs. See more details
# [here](https://kubernetes-csi.github.io/docs/skip-attach.html#skip-attach-with-csi-driver-object).
# If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation
# of pods using the CephFS PVC fast. **WARNING** It's highly discouraged to use this for
# CephFS RWO volumes. Refer to this [issue](https://github.com/kubernetes/kubernetes/issues/103305) for more details.
cephFSAttachRequired: true
# -- Whether to skip any attach operation altogether for RBD PVCs. See more details
# [here](https://kubernetes-csi.github.io/docs/skip-attach.html#skip-attach-with-csi-driver-object).
# If set to false it skips the volume attachments and makes the creation of pods using the RBD PVC fast.
# **WARNING** It's highly discouraged to use this for RWO volumes as it can cause data corruption.
# csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set
# to false since we'll have no VolumeAttachments to determine which node the PVC is mounted on.
# Refer to this [issue](https://github.com/kubernetes/kubernetes/issues/103305) for more details.
rbdAttachRequired: true
# -- Whether to skip any attach operation altogether for NFS PVCs. See more details
# [here](https://kubernetes-csi.github.io/docs/skip-attach.html#skip-attach-with-csi-driver-object).
# If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation
# of pods using the NFS PVC fast. **WARNING** It's highly discouraged to use this for
# NFS RWO volumes. Refer to this [issue](https://github.com/kubernetes/kubernetes/issues/103305) for more details.
nfsAttachRequired: true
# -- Enable discovery daemon
enableDiscoveryDaemon: false
# -- Set the discovery daemon device discovery interval (default to 60m)
discoveryDaemonInterval: 60m
# -- The timeout for ceph commands in seconds
cephCommandsTimeoutSeconds: "15"
# -- If true, run rook operator on the host network
useOperatorHostNetwork:
# -- If true, scale down the rook operator.
# This is useful for administrative actions where the rook operator must be scaled down, while using gitops style tooling
# to deploy your helm charts.
scaleDownOperator: false
## Rook Discover configuration
## toleration: NoSchedule, PreferNoSchedule or NoExecute
## tolerationKey: Set this to the specific key of the taint to tolerate
## tolerations: Array of tolerations in YAML format which will be added to agent deployment
## nodeAffinity: Set to labels of the node to match
discover:
# -- Toleration for the discover pods.
# Options: `NoSchedule`, `PreferNoSchedule` or `NoExecute`
toleration:
# -- The specific key of the taint to tolerate
tolerationKey:
# -- Array of tolerations in YAML format which will be added to discover deployment
tolerations:
# - key: key
# operator: Exists
# effect: NoSchedule
# -- The node labels for affinity of `discover-agent` [^1]
nodeAffinity:
# key1=value1,value2; key2=value3
#
# or
#
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: storage-node
# operator: Exists
# -- Labels to add to the discover pods
podLabels: # "key1=value1,key2=value2"
# -- Add resources to discover daemon pods
resources:
# - limits:
# memory: 512Mi
# - requests:
# cpu: 100m
# memory: 128Mi
# -- Custom label to identify node hostname. If not set `kubernetes.io/hostname` will be used
customHostnameLabel:
# -- Runs Ceph Pods as privileged to be able to write to `hostPaths` in OpenShift with SELinux restrictions.
hostpathRequiresPrivileged: false
# -- Whether to create all Rook pods to run on the host network, for example in environments where a CNI is not enabled
enforceHostNetwork: false
# -- Disable automatic orchestration when new devices are discovered.
disableDeviceHotplug: false
# -- The revision history limit for all pods created by Rook. If blank, the K8s default is 10.
revisionHistoryLimit:
# -- Blacklist certain disks according to the regex provided.
discoverDaemonUdev:
# -- imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts.
imagePullSecrets:
# - name: my-registry-secret
# -- Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used
enableOBCWatchOperatorNamespace: true
# -- Specify the prefix for the OBC provisioner in place of the cluster namespace
# @default -- `ceph cluster namespace`
obcProvisionerNamePrefix:
# -- Many OBC additional config fields may be risky for administrators to allow users control over.
# The safe and default-allowed fields are 'maxObjects' and 'maxSize'.
# Other fields should be considered risky. To allow all additional configs, use this value:
# "maxObjects,maxSize,bucketMaxObjects,bucketMaxSize,bucketPolicy,bucketLifecycle,bucketOwner"
# @default -- "maxObjects,maxSize"
obcAllowAdditionalConfigFields: "maxObjects,maxSize"
monitoring:
# -- Enable monitoring. Requires Prometheus to be pre-installed.
# Enabling will also create RBAC rules to allow Operator to create ServiceMonitors
enabled: false

View File

@@ -1,168 +0,0 @@
use std::{
net::{IpAddr, Ipv4Addr},
sync::Arc,
};
use cidr::Ipv4Cidr;
use harmony::{
config::secret::SshKeyPair,
data::{FileContent, FilePath},
hardware::{HostCategory, Location, PhysicalHost, SwitchGroup},
infra::opnsense::OPNSenseManagementInterface,
inventory::Inventory,
modules::{
http::StaticFilesHttpScore,
okd::{
bootstrap_dhcp::OKDBootstrapDhcpScore,
bootstrap_load_balancer::OKDBootstrapLoadBalancerScore, dhcp::OKDDhcpScore,
dns::OKDDnsScore, ipxe::OKDIpxeScore,
},
tftp::TftpScore,
},
topology::{LogicalHost, UnmanagedRouter},
};
use harmony_macros::{ip, mac_address};
use harmony_secret::SecretManager;
use harmony_types::net::Url;
#[tokio::main]
async fn main() {
let firewall = harmony::topology::LogicalHost {
ip: ip!("192.168.33.1"),
name: String::from("fw0"),
};
let opnsense = Arc::new(
harmony::infra::opnsense::OPNSenseFirewall::new(firewall, None, "root", "opnsense").await,
);
let lan_subnet = Ipv4Addr::new(192, 168, 33, 0);
let gateway_ipv4 = Ipv4Addr::new(192, 168, 33, 1);
let gateway_ip = IpAddr::V4(gateway_ipv4);
let topology = harmony::topology::HAClusterTopology {
domain_name: "ncd0.harmony.mcd".to_string(), // TODO this must be set manually correctly
// when setting up the opnsense firewall
router: Arc::new(UnmanagedRouter::new(
gateway_ip,
Ipv4Cidr::new(lan_subnet, 24).unwrap(),
)),
load_balancer: opnsense.clone(),
firewall: opnsense.clone(),
tftp_server: opnsense.clone(),
http_server: opnsense.clone(),
dhcp_server: opnsense.clone(),
dns_server: opnsense.clone(),
control_plane: vec![
LogicalHost {
ip: ip!("192.168.33.20"),
name: "cp0".to_string(),
},
LogicalHost {
ip: ip!("192.168.33.21"),
name: "cp1".to_string(),
},
LogicalHost {
ip: ip!("192.168.33.22"),
name: "cp2".to_string(),
},
],
bootstrap_host: LogicalHost {
ip: ip!("192.168.33.66"),
name: "bootstrap".to_string(),
},
workers: vec![
LogicalHost {
ip: ip!("192.168.33.30"),
name: "wk0".to_string(),
},
LogicalHost {
ip: ip!("192.168.33.31"),
name: "wk1".to_string(),
},
LogicalHost {
ip: ip!("192.168.33.32"),
name: "wk2".to_string(),
},
],
switch: vec![],
};
let inventory = Inventory {
location: Location::new("I am mobile".to_string(), "earth".to_string()),
switch: SwitchGroup::from([]),
firewall_mgmt: Box::new(OPNSenseManagementInterface::new()),
storage_host: vec![],
worker_host: vec![
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:61:0F")),
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:61:26")),
// thisone
// Then create the ipxe file
// set the dns static leases
// bootstrap nodes
// start ceph cluster
// try installation of lampscore
// bingo?
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:61:70")),
],
control_plane_host: vec![
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:60:FA")),
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:61:1A")),
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:01:BC:68")),
],
};
// TODO regroup smaller scores in a larger one such as this
// let okd_boostrap_preparation();
let bootstrap_dhcp_score = OKDBootstrapDhcpScore::new(&topology, &inventory);
let bootstrap_load_balancer_score = OKDBootstrapLoadBalancerScore::new(&topology);
let dhcp_score = OKDDhcpScore::new(&topology, &inventory);
let dns_score = OKDDnsScore::new(&topology);
let load_balancer_score =
harmony::modules::okd::load_balancer::OKDLoadBalancerScore::new(&topology);
let ssh_key = SecretManager::get_or_prompt::<SshKeyPair>().await.unwrap();
let tftp_score = TftpScore::new(Url::LocalFolder("./data/watchguard/tftpboot".to_string()));
let http_score = StaticFilesHttpScore {
folder_to_serve: Some(Url::LocalFolder(
"./data/watchguard/pxe-http-files".to_string(),
)),
files: vec![],
remote_path: None,
};
let kickstart_filename = "inventory.kickstart".to_string();
let harmony_inventory_agent = "harmony_inventory_agent".to_string();
let ipxe_score = OKDIpxeScore {
kickstart_filename,
harmony_inventory_agent,
cluster_pubkey: FileContent {
path: FilePath::Relative("cluster_ssh_key.pub".to_string()),
content: ssh_key.public,
},
};
harmony_tui::run(
inventory,
topology,
vec![
Box::new(dns_score),
Box::new(bootstrap_dhcp_score),
Box::new(bootstrap_load_balancer_score),
Box::new(load_balancer_score),
Box::new(tftp_score),
Box::new(http_score),
Box::new(ipxe_score),
Box::new(dhcp_score),
],
)
.await
.unwrap();
}

View File

@@ -0,0 +1,19 @@
[package]
name = "example-nats-module-supercluster"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
k8s-openapi.workspace = true

View File

@@ -0,0 +1,9 @@
# Cluster 1
export HARMONY_DECENTRALIZED_TOPOLOGY_K8S_SITE_1="kubeconfig=$HOME/.kube/config,context=cluster-context"
export HARMONY_NATS_SITE_1_DOMAIN="your.domain.1"
# Cluster 2
export HARMONY_DECENTRALIZED_TOPOLOGY_K8S_SITE_2="kubeconfig=$HOME/.kube/config,context=cluster-context"
export HARMONY_NATS_SITE_2_DOMAIN="your.domain.2"
# Cluster 3
export HARMONY_DECENTRALIZED_TOPOLOGY_K8S_SITE_3="kubeconfig=$HOME/.kube/config,context=cluster-context"
export HARMONY_NATS_SITE_3_DOMAIN="your.domain.3"

View File

@@ -0,0 +1,77 @@
use harmony::{
inventory::Inventory,
modules::nats::{capability::NatsCluster, score_nats_supercluster::NatsSuperclusterScore},
topology::{K8sAnywhereTopology, decentralized::DecentralizedTopology},
};
#[tokio::main]
async fn main() {
let supercluster_ca_secret_name = "nats-supercluster-ca-bundle";
let tls_cert_name = "nats-gateway";
let jetstream_enabled = "false";
let nats_namespace = "nats-example".to_string();
let site_1_name = "site-1".to_string();
let site_1_domain =
std::env::var("HARMONY_NATS_SITE_1_DOMAIN").expect("missing domain in env for site_1");
let nats_site_1 = NatsCluster {
namespace: nats_namespace.clone(),
domain: site_1_domain.clone(),
replicas: 1,
name: site_1_name.clone(),
gateway_advertise: format!("{site_1_name}-gw.{site_1_domain}:443"),
dns_name: format!("{site_1_name}-gw.{site_1_domain}"),
supercluster_ca_secret_name: supercluster_ca_secret_name,
tls_cert_name: tls_cert_name,
jetstream_enabled: jetstream_enabled,
};
let site_2_name = "site-2".to_string();
let site_2_domain =
std::env::var("HARMONY_NATS_SITE_2_DOMAIN").expect("missing domain in env for site_2");
let nats_site_2 = NatsCluster {
namespace: nats_namespace.clone(),
domain: site_2_domain.clone(),
replicas: 1,
name: site_2_name.clone(),
gateway_advertise: format!("{site_2_name}-gw.{site_2_domain}:443"),
dns_name: format!("{site_2_name}-gw.{site_2_domain}"),
supercluster_ca_secret_name: supercluster_ca_secret_name,
tls_cert_name: tls_cert_name,
jetstream_enabled: jetstream_enabled,
};
let site_3_name = "site-3".to_string();
let site_3_domain =
std::env::var("HARMONY_NATS_SITE_3_DOMAIN").expect("missing domain in env for site_3");
let nats_site_3 = NatsCluster {
namespace: nats_namespace.clone(),
domain: site_3_domain.clone(),
replicas: 1,
name: site_3_name.clone(),
gateway_advertise: format!("{site_3_name}-gw.{site_3_domain}:443"),
dns_name: format!("{site_3_name}-gw.{site_3_domain}"),
supercluster_ca_secret_name: supercluster_ca_secret_name,
tls_cert_name: tls_cert_name,
jetstream_enabled: jetstream_enabled,
};
let clusters = vec![nats_site_1, nats_site_2, nats_site_3];
let nats_supercluster = NatsSuperclusterScore {
nats_cluster: clusters,
ca_certs: None,
};
harmony_cli::run(
Inventory::autoload(),
DecentralizedTopology::<K8sAnywhereTopology>::from_env(),
vec![Box::new(nats_supercluster)],
None,
)
.await
.unwrap();
}

View File

@@ -0,0 +1,19 @@
[package]
name = "example-nats-supercluster"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
k8s-openapi.workspace = true

View File

@@ -0,0 +1,6 @@
# Cluster 1
export HARMONY_NATS_SITE_1="kubeconfig=$HOME/.config/nt/kube/config,context=your_cluster_1_kube_context_name"
export HARMONY_NATS_SITE_1_DOMAIN="your_cluster_1_public_domain"
# Cluster 2
export HARMONY_NATS_SITE_2="kubeconfig=$HOME/.config/nt/kube/config,context=your_cluster_2_kube_context_name"
export HARMONY_NATS_SITE_2_DOMAIN="your_cluster_2_public_domain"

View File

@@ -0,0 +1,481 @@
use std::{collections::BTreeMap, str::FromStr};
use harmony::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
modules::{
cert_manager::{
capability::{CertificateManagement, CertificateManagementConfig},
crd::CaIssuer,
},
helm::chart::{HelmChartScore, HelmRepository, NonBlankString},
k8s::resource::K8sResourceScore,
okd::{
crd::route::{RoutePort, RouteSpec, RouteTargetReference, TLSConfig},
route::OKDRouteScore,
},
},
score::Score,
topology::{
HelmCommand, K8sAnywhereConfig, K8sAnywhereTopology, K8sclient, TlsRouter, Topology,
},
};
use harmony_macros::hurl;
use k8s_openapi::{
ByteString, api::core::v1::Secret, apimachinery::pkg::apis::meta::v1::ObjectMeta,
};
use log::{debug, info};
#[tokio::main]
async fn main() -> Result<(), InterpretError> {
let namespace = "nats-supercluster-test";
let self_signed_issuer_name = "harmony-self-signed-issuer";
let ca_issuer_name = "harmony-ca-issuer";
let root_ca_cert_name = "harmony-root-ca";
log::info!("starting nats supercluster bootstrap");
// --------------------------------------------------
// 1. Build site contexts
// --------------------------------------------------
let site1 = site(
"HARMONY_NATS_SITE_1",
"HARMONY_NATS_SITE_1_DOMAIN",
"nats-sto1-cert-test1",
);
let site2 = site(
"HARMONY_NATS_SITE_2",
"HARMONY_NATS_SITE_2_DOMAIN",
"nats-cb1-cert-test2",
);
// --------------------------------------------------
// 2. Ensure clusters are reachable
// --------------------------------------------------
log::info!("ensuring both topologies are ready");
tokio::try_join!(site1.topology.ensure_ready(), site2.topology.ensure_ready(),)?;
// --------------------------------------------------
// 3. Create certificates
// --------------------------------------------------
log::info!("creating certificates");
let root_ca_config = CertificateManagementConfig {
namespace: Some(namespace.into()),
acme_issuer: None,
ca_issuer: Some(CaIssuer {
secret_name: format!("{}-tls", root_ca_cert_name),
}),
self_signed: false,
};
let self_signed_config = CertificateManagementConfig {
namespace: Some(namespace.to_string().clone()),
acme_issuer: None,
ca_issuer: None,
self_signed: true,
};
tokio::try_join!(
create_nats_certs(
site1.topology.clone(),
&site1.cluster,
ca_issuer_name,
&root_ca_config,
self_signed_issuer_name,
&self_signed_config,
root_ca_cert_name
),
create_nats_certs(
site2.topology.clone(),
&site2.cluster,
ca_issuer_name,
&root_ca_config,
self_signed_issuer_name,
&self_signed_config,
root_ca_cert_name
),
)?;
// --------------------------------------------------
// 4. Build CA bundle
// --------------------------------------------------
log::info!("building supercluster CA bundle");
let mut ca_bundle = Vec::new();
ca_bundle.push(
site1
.topology
.get_ca_certificate(root_ca_cert_name.to_string(), &root_ca_config)
.await?,
);
ca_bundle.push(
site2
.topology
.get_ca_certificate(root_ca_cert_name.to_string(), &root_ca_config)
.await?,
);
// --------------------------------------------------
// 5. Build Scores
// --------------------------------------------------
log::info!("building scores");
let site1_scores = vec![
build_ca_bundle_secret_score(
site1.topology.clone(),
&site1.cluster,
&ca_bundle,
namespace.into(),
)
.await,
build_route_score(site1.topology.clone(), &site1.cluster, namespace.into()).await,
build_deploy_nats_score(
site1.topology.clone(),
&site1.cluster,
vec![&site2.cluster],
namespace.into(),
)
.await,
];
let site2_scores = vec![
build_ca_bundle_secret_score(
site2.topology.clone(),
&site2.cluster,
&ca_bundle,
namespace.into(),
)
.await,
build_route_score(site2.topology.clone(), &site2.cluster, namespace.into()).await,
build_deploy_nats_score(
site2.topology.clone(),
&site2.cluster,
vec![&site1.cluster],
namespace.into(),
)
.await,
];
// --------------------------------------------------
// 6. Apply Scores
// --------------------------------------------------
log::info!("applying scores");
tokio::try_join!(
apply_scores(site1.topology.clone(), site1_scores),
apply_scores(site2.topology.clone(), site2_scores),
)?;
log::info!("supercluster bootstrap complete");
log::info!(
"Enjoy! You can test your nats cluster by running : `kubectl exec -n {namespace} -it deployment/nats-box -- nats pub test hi`"
);
Ok(())
}
async fn apply_scores<T: Topology + 'static>(
topology: T,
scores: Vec<Box<dyn Score<T>>>,
) -> Result<(), InterpretError> {
info!("applying {} scores", scores.len());
harmony_cli::run(Inventory::autoload(), topology, scores, None)
.await
.map_err(|e| InterpretError::new(e.to_string()))?;
Ok(())
}
fn site(
topo_env: &str,
domain_env: &str,
cluster_name: &'static str,
) -> SiteContext<K8sAnywhereTopology> {
let domain = std::env::var(domain_env).expect("missing domain env");
let topology =
K8sAnywhereTopology::with_config(K8sAnywhereConfig::remote_k8s_from_env_var(topo_env));
SiteContext {
topology,
cluster: NatsCluster {
replicas: 1,
name: cluster_name,
gateway_advertise: format!("{cluster_name}-gw.{domain}:443"),
dns_name: format!("{cluster_name}-gw.{domain}"),
supercluster_ca_secret_name: "nats-supercluster-ca-bundle",
tls_cert_name: "nats-gateway",
jetstream_enabled: "true",
},
}
}
struct SiteContext<T> {
topology: T,
cluster: NatsCluster,
}
struct NatsCluster {
replicas: usize,
name: &'static str,
gateway_advertise: String,
dns_name: String,
supercluster_ca_secret_name: &'static str,
tls_cert_name: &'static str,
jetstream_enabled: &'static str,
}
async fn create_nats_certs<T: Topology + CertificateManagement>(
topology: T,
cluster: &NatsCluster,
ca_issuer_name: &str,
ca_cert_mgmt_config: &CertificateManagementConfig,
self_signed_issuer_name: &str,
self_signed_cert_config: &CertificateManagementConfig,
root_ca_cert_name: &str,
) -> Result<Outcome, InterpretError> {
//the order is pretty important
debug!(
"Applying certs to ns {:#?}",
ca_cert_mgmt_config.namespace.clone()
);
debug!("creating issuer '{}'", self_signed_issuer_name);
topology
.create_issuer(
self_signed_issuer_name.to_string(),
&self_signed_cert_config,
)
.await?;
debug!("creating certificate {root_ca_cert_name}");
topology
.create_certificate(
root_ca_cert_name.to_string(),
self_signed_issuer_name.to_string(),
Some(format!("harmony-{}-ca", cluster.name)),
None,
Some(true),
ca_cert_mgmt_config,
)
.await?;
debug!("creating issuer '{}'", ca_issuer_name);
topology
.create_issuer(ca_issuer_name.to_string(), ca_cert_mgmt_config)
.await?;
debug!("creating certificate {}", cluster.tls_cert_name);
topology
.create_certificate(
cluster.tls_cert_name.to_string(),
ca_issuer_name.to_string(),
None,
Some(vec![cluster.dns_name.clone()]),
Some(true),
ca_cert_mgmt_config,
)
.await?;
Ok(Outcome::success("success".to_string()))
}
async fn build_ca_bundle_secret(
namespace: &str,
nats_cluster: &NatsCluster,
bundle: &Vec<String>,
) -> Secret {
Secret {
metadata: ObjectMeta {
name: Some(nats_cluster.supercluster_ca_secret_name.to_string()),
namespace: Some(namespace.to_string()),
..Default::default()
},
data: Some(build_secret_data(bundle).await),
immutable: Some(false),
type_: Some("Opaque".to_string()),
string_data: None,
}
}
async fn build_secret_data(bundle: &Vec<String>) -> BTreeMap<String, ByteString> {
let mut data = BTreeMap::new();
data.insert(
"ca.crt".to_string(),
ByteString(bundle.join("\n").into_bytes()),
);
data
}
async fn build_ca_bundle_secret_score<T: Topology + K8sclient + 'static>(
_topology: T,
nats_cluster: &NatsCluster,
ca_bundle: &Vec<String>,
namespace: String,
) -> Box<dyn Score<T>> {
let bundle_secret = build_ca_bundle_secret(&namespace, nats_cluster, ca_bundle).await;
debug!(
"deploying secret to ns: {} \nsecret: {:#?}",
namespace, bundle_secret
);
let k8ssecret = K8sResourceScore::single(bundle_secret, Some(namespace));
Box::new(k8ssecret)
}
async fn build_route_score<T: Topology + K8sclient + 'static>(
_topology: T,
cluster: &NatsCluster,
namespace: String,
) -> Box<dyn Score<T>> {
let route = OKDRouteScore {
name: cluster.name.to_string(),
namespace,
spec: RouteSpec {
to: RouteTargetReference {
kind: "Service".to_string(),
name: cluster.name.to_string(),
weight: Some(100),
},
host: Some(cluster.dns_name.clone()),
port: Some(RoutePort { target_port: 7222 }),
tls: Some(TLSConfig {
insecure_edge_termination_policy: None,
termination: "passthrough".to_string(),
..Default::default()
}),
wildcard_policy: None,
..Default::default()
},
};
Box::new(route)
}
async fn build_deploy_nats_score<T: Topology + HelmCommand + TlsRouter + 'static>(
topology: T,
cluster: &NatsCluster,
peers: Vec<&NatsCluster>,
namespace: String,
) -> Box<dyn Score<T>> {
let mut gateway_gateways = String::new();
for peer in peers {
// Construct wss:// URLs on port 443 for the remote gateways
gateway_gateways.push_str(&format!(
r#"
- name: {}
urls:
- nats://{}"#,
peer.name, peer.gateway_advertise
));
}
let domain = topology.get_internal_domain().await.unwrap().unwrap();
// Inject gateway config into the 'merge' block to comply with chart structure
let values_yaml = Some(format!(
r#"config:
merge:
authorization:
default_permissions:
publish: ["TEST.*"]
subscribe: ["PUBLIC.>"]
users:
# - user: "admin"
# password: "admin_1"
# permissions:
# publish: ">"
# subscribe: ">"
- password: "enGk0cgZUabM6bN6FXHT"
user: "testUser"
accounts:
system:
users:
- user: "admin"
password: "admin_2"
logtime: true
debug: true
trace: true
system_account: system
cluster:
name: {cluster_name}
enabled: true
replicas: {replicas}
jetstream:
enabled: {jetstream_enabled}
fileStorage:
enabled: true
size: 10Gi
storageDirectory: /data/jetstream
leafnodes:
enabled: false
websocket:
enabled: false
ingress:
enabled: true
className: openshift-default
pathType: Prefix
hosts:
- nats-ws.{domain}
gateway:
enabled: true
port: 7222
name: {cluster_name}
merge:
advertise: {gateway_advertise}
gateways: {gateway_gateways}
tls:
enabled: true
secretName: {tls_secret_name}
# merge:
# ca_file: "/etc/nats-certs/gateway/ca.crt"
service:
ports:
gateway:
enabled: true
tlsCA:
enabled: true
secretName: {supercluster_ca_secret_name}
natsBox:
container:
image:
tag: nonroot"#,
cluster_name = cluster.name,
replicas = cluster.replicas,
domain = domain,
gateway_gateways = gateway_gateways,
gateway_advertise = cluster.gateway_advertise,
tls_secret_name = format!("{}-tls", cluster.tls_cert_name),
jetstream_enabled = cluster.jetstream_enabled,
supercluster_ca_secret_name = cluster.supercluster_ca_secret_name,
));
debug!("Prepared Helm Chart values : \n{values_yaml:#?}");
let nats = HelmChartScore {
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
release_name: NonBlankString::from_str(&cluster.name).unwrap(),
chart_name: NonBlankString::from_str("nats/nats").unwrap(),
chart_version: None,
values_overrides: None,
values_yaml,
create_namespace: true,
install_only: false,
repository: Some(HelmRepository::new(
"nats".to_string(),
hurl!("https://nats-io.github.io/k8s/helm/charts/"),
true,
)),
};
Box::new(nats)
}

18
examples/nats/Cargo.toml Normal file
View File

@@ -0,0 +1,18 @@
[package]
name = "example-nats"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }

120
examples/nats/src/main.rs Normal file
View File

@@ -0,0 +1,120 @@
use std::str::FromStr;
use harmony::{
inventory::Inventory,
modules::helm::chart::{HelmChartScore, HelmRepository, NonBlankString},
topology::{HelmCommand, K8sAnywhereConfig, K8sAnywhereTopology, TlsRouter, Topology},
};
use harmony_macros::hurl;
use log::info;
#[tokio::main]
async fn main() {
let site1_topo = K8sAnywhereTopology::with_config(K8sAnywhereConfig::remote_k8s_from_env_var(
"HARMONY_NATS_SITE_1",
));
let site2_topo = K8sAnywhereTopology::with_config(K8sAnywhereConfig::remote_k8s_from_env_var(
"HARMONY_NATS_SITE_2",
));
let site1_domain = site1_topo.get_internal_domain().await.unwrap().unwrap();
let site2_domain = site2_topo.get_internal_domain().await.unwrap().unwrap();
let site1_gateway = format!("nats-gateway.{}", site1_domain);
let site2_gateway = format!("nats-gateway.{}", site2_domain);
tokio::join!(
deploy_nats(
site1_topo,
"site-1",
vec![("site-2".to_string(), site2_gateway)]
),
deploy_nats(
site2_topo,
"site-2",
vec![("site-1".to_string(), site1_gateway)]
),
);
}
async fn deploy_nats<T: Topology + HelmCommand + TlsRouter + 'static>(
topology: T,
cluster_name: &str,
remote_gateways: Vec<(String, String)>,
) {
topology.ensure_ready().await.unwrap();
let mut gateway_gateways = String::new();
for (name, url) in remote_gateways {
gateway_gateways.push_str(&format!(
r#"
- name: {name}
urls:
- nats://{url}:7222"#
));
}
let values_yaml = Some(format!(
r#"config:
cluster:
enabled: true
replicas: 3
jetstream:
enabled: true
fileStorage:
enabled: true
size: 10Gi
storageDirectory: /data/jetstream
leafnodes:
enabled: false
# port: 7422
websocket:
enabled: true
ingress:
enabled: true
className: openshift-default
pathType: Prefix
hosts:
- nats-ws.{}
gateway:
enabled: true
name: {}
port: 7222
gateways: {}
service:
ports:
gateway:
enabled: true
natsBox:
container:
image:
tag: nonroot"#,
topology.get_internal_domain().await.unwrap().unwrap(),
cluster_name,
gateway_gateways,
));
let namespace = "nats";
let nats = HelmChartScore {
namespace: Some(NonBlankString::from_str(namespace).unwrap()),
release_name: NonBlankString::from_str("nats").unwrap(),
chart_name: NonBlankString::from_str("nats/nats").unwrap(),
chart_version: None,
values_overrides: None,
values_yaml,
create_namespace: true,
install_only: false,
repository: Some(HelmRepository::new(
"nats".to_string(),
hurl!("https://nats-io.github.io/k8s/helm/charts/"),
true,
)),
};
harmony_cli::run(Inventory::autoload(), topology, vec![Box::new(nats)], None)
.await
.unwrap();
info!(
"Enjoy! You can test your nats cluster by running : `kubectl exec -n {namespace} -it deployment/nats-box -- nats pub test hi`"
);
}

View File

@@ -0,0 +1,22 @@
[package]
name = "example-okd-cluster-alerts"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
harmony_secret = { path = "../../harmony_secret" }
harmony_secret_derive = { path = "../../harmony_secret_derive" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
serde.workspace = true
brocade = { path = "../../brocade" }

View File

@@ -0,0 +1,38 @@
use std::collections::HashMap;
use harmony::{
inventory::Inventory,
modules::monitoring::{
alert_channel::discord_alert_channel::DiscordWebhook,
okd::cluster_monitoring::OpenshiftClusterAlertScore,
},
topology::K8sAnywhereTopology,
};
use harmony_macros::hurl;
use harmony_types::k8s_name::K8sName;
#[tokio::main]
async fn main() {
let mut sel = HashMap::new();
sel.insert(
"openshift_io_alert_source".to_string(),
"platform".to_string(),
);
let mut sel2 = HashMap::new();
sel2.insert("openshift_io_alert_source".to_string(), "".to_string());
let selectors = vec![sel, sel2];
harmony_cli::run(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
vec![Box::new(OpenshiftClusterAlertScore {
receivers: vec![Box::new(DiscordWebhook {
name: K8sName("wills-discord-webhook-example".to_string()),
url: hurl!("https://something.io"),
selectors: selectors,
})],
})],
None,
)
.await
.unwrap();
}

View File

@@ -19,3 +19,5 @@ log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
serde.workspace = true
brocade = { path = "../../brocade" }
schemars = "0.8"

View File

@@ -4,7 +4,10 @@ use crate::topology::{get_inventory, get_topology};
use harmony::{
config::secret::SshKeyPair,
data::{FileContent, FilePath},
modules::okd::{installation::OKDInstallationPipeline, ipxe::OKDIpxeScore},
modules::{
inventory::HarmonyDiscoveryStrategy,
okd::{installation::OKDInstallationPipeline, ipxe::OKDIpxeScore},
},
score::Score,
topology::HAClusterTopology,
};
@@ -26,7 +29,8 @@ async fn main() {
},
})];
scores.append(&mut OKDInstallationPipeline::get_all_scores().await);
scores
.append(&mut OKDInstallationPipeline::get_all_scores(HarmonyDiscoveryStrategy::MDNS).await);
harmony_cli::run(inventory, topology, scores, None)
.await

View File

@@ -1,16 +1,25 @@
use brocade::BrocadeOptions;
use cidr::Ipv4Cidr;
use harmony::{
hardware::{Location, SwitchGroup},
infra::opnsense::OPNSenseManagementInterface,
infra::{
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
opnsense::OPNSenseManagementInterface,
},
inventory::Inventory,
modules::brocade::BrocadeSwitchAuth,
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
};
use harmony_macros::{ip, ipv4};
use harmony_secret::{Secret, SecretManager};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::{net::IpAddr, sync::Arc};
use std::{
net::IpAddr,
sync::{Arc, OnceLock},
};
#[derive(Secret, Serialize, Deserialize, Debug, PartialEq)]
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug, PartialEq)]
struct OPNSenseFirewallConfig {
username: String,
password: String,
@@ -22,6 +31,25 @@ pub async fn get_topology() -> HAClusterTopology {
name: String::from("opnsense-1"),
};
let switch_auth = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
.await
.expect("Failed to get credentials");
let switches: Vec<IpAddr> = vec![ip!("192.168.1.101")]; // TODO: Adjust me
let brocade_options = BrocadeOptions {
dry_run: *harmony::config::DRY_RUN,
..Default::default()
};
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
ips: switches,
auth: switch_auth,
options: brocade_options,
})
.await
.expect("Failed to connect to switch");
let switch_client = Arc::new(switch_client);
let config = SecretManager::get_or_prompt::<OPNSenseFirewallConfig>().await;
let config = config.unwrap();
@@ -38,6 +66,7 @@ pub async fn get_topology() -> HAClusterTopology {
let gateway_ipv4 = ipv4!("192.168.1.1");
let gateway_ip = IpAddr::V4(gateway_ipv4);
harmony::topology::HAClusterTopology {
kubeconfig: None,
domain_name: "demo.harmony.mcd".to_string(),
router: Arc::new(UnmanagedRouter::new(
gateway_ip,
@@ -58,7 +87,9 @@ pub async fn get_topology() -> HAClusterTopology {
name: "bootstrap".to_string(),
},
workers: vec![],
switch: vec![],
node_exporter: opnsense.clone(),
switch_client: switch_client.clone(),
network_manager: OnceLock::new(),
}
}

View File

@@ -19,3 +19,5 @@ log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
serde.workspace = true
brocade = { path = "../../brocade" }
schemars = "0.8"

View File

@@ -1,14 +1,22 @@
use brocade::BrocadeOptions;
use cidr::Ipv4Cidr;
use harmony::{
config::secret::OPNSenseFirewallCredentials,
hardware::{Location, SwitchGroup},
infra::opnsense::OPNSenseManagementInterface,
infra::{
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
opnsense::OPNSenseManagementInterface,
},
inventory::Inventory,
modules::brocade::BrocadeSwitchAuth,
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
};
use harmony_macros::{ip, ipv4};
use harmony_secret::SecretManager;
use std::{net::IpAddr, sync::Arc};
use std::{
net::IpAddr,
sync::{Arc, OnceLock},
};
pub async fn get_topology() -> HAClusterTopology {
let firewall = harmony::topology::LogicalHost {
@@ -16,6 +24,25 @@ pub async fn get_topology() -> HAClusterTopology {
name: String::from("opnsense-1"),
};
let switch_auth = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
.await
.expect("Failed to get credentials");
let switches: Vec<IpAddr> = vec![ip!("192.168.1.101")]; // TODO: Adjust me
let brocade_options = BrocadeOptions {
dry_run: *harmony::config::DRY_RUN,
..Default::default()
};
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
ips: switches,
auth: switch_auth,
options: brocade_options,
})
.await
.expect("Failed to connect to switch");
let switch_client = Arc::new(switch_client);
let config = SecretManager::get_or_prompt::<OPNSenseFirewallCredentials>().await;
let config = config.unwrap();
@@ -32,6 +59,7 @@ pub async fn get_topology() -> HAClusterTopology {
let gateway_ipv4 = ipv4!("192.168.1.1");
let gateway_ip = IpAddr::V4(gateway_ipv4);
harmony::topology::HAClusterTopology {
kubeconfig: None,
domain_name: "demo.harmony.mcd".to_string(),
router: Arc::new(UnmanagedRouter::new(
gateway_ip,
@@ -52,7 +80,9 @@ pub async fn get_topology() -> HAClusterTopology {
name: "cp0".to_string(),
},
workers: vec![],
switch: vec![],
node_exporter: opnsense.clone(),
switch_client: switch_client.clone(),
network_manager: OnceLock::new(),
}
}

View File

@@ -1,4 +1,4 @@
use std::{collections::HashMap, str::FromStr};
use std::str::FromStr;
use harmony::{
inventory::Inventory,
@@ -56,6 +56,8 @@ async fn main() {
)),
};
// TODO exec pod commands to initialize secret store if not already done
harmony_cli::run(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),

View File

@@ -0,0 +1,18 @@
[package]
name = "example-operatorhub-catalogsource"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }

View File

@@ -0,0 +1,22 @@
use std::str::FromStr;
use harmony::{
inventory::Inventory,
modules::{k8s::apps::OperatorHubCatalogSourceScore, postgresql::CloudNativePgOperatorScore},
topology::K8sAnywhereTopology,
};
#[tokio::main]
async fn main() {
let operatorhub_catalog = OperatorHubCatalogSourceScore::default();
let cnpg_operator = CloudNativePgOperatorScore::default();
harmony_cli::run(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
vec![Box::new(operatorhub_catalog), Box::new(cnpg_operator)],
None,
)
.await
.unwrap();
}

View File

@@ -8,7 +8,7 @@ publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_tui = { path = "../../harmony_tui" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
@@ -16,3 +16,7 @@ harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
harmony_secret = { path = "../../harmony_secret" }
brocade = { path = "../../brocade" }
serde = { workspace = true }
schemars = "0.8"

3
examples/opnsense/env.sh Normal file
View File

@@ -0,0 +1,3 @@
export HARMONY_SECRET_NAMESPACE=example-opnsense
export HARMONY_SECRET_STORE=file
export RUST_LOG=info

View File

@@ -1,111 +1,78 @@
use std::{
net::{IpAddr, Ipv4Addr},
sync::Arc,
};
use cidr::Ipv4Cidr;
use harmony::{
hardware::{HostCategory, Location, PhysicalHost, SwitchGroup},
infra::opnsense::OPNSenseManagementInterface,
config::secret::OPNSenseFirewallCredentials,
infra::opnsense::OPNSenseFirewall,
inventory::Inventory,
modules::{
dummy::{ErrorScore, PanicScore, SuccessScore},
http::StaticFilesHttpScore,
okd::{dhcp::OKDDhcpScore, dns::OKDDnsScore, load_balancer::OKDLoadBalancerScore},
opnsense::OPNsenseShellCommandScore,
tftp::TftpScore,
},
topology::{LogicalHost, UnmanagedRouter},
modules::{dhcp::DhcpScore, opnsense::OPNsenseShellCommandScore},
topology::LogicalHost,
};
use harmony_macros::{ip, mac_address};
use harmony_types::net::Url;
use harmony_macros::{ip, ipv4};
use harmony_secret::{Secret, SecretManager};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
#[tokio::main]
async fn main() {
let firewall = harmony::topology::LogicalHost {
ip: ip!("192.168.5.229"),
let firewall = LogicalHost {
ip: ip!("192.168.55.1"),
name: String::from("opnsense-1"),
};
let opnsense = Arc::new(
harmony::infra::opnsense::OPNSenseFirewall::new(firewall, None, "root", "opnsense").await,
);
let lan_subnet = Ipv4Addr::new(10, 100, 8, 0);
let gateway_ipv4 = Ipv4Addr::new(10, 100, 8, 1);
let gateway_ip = IpAddr::V4(gateway_ipv4);
let topology = harmony::topology::HAClusterTopology {
domain_name: "demo.harmony.mcd".to_string(),
router: Arc::new(UnmanagedRouter::new(
gateway_ip,
Ipv4Cidr::new(lan_subnet, 24).unwrap(),
)),
load_balancer: opnsense.clone(),
firewall: opnsense.clone(),
tftp_server: opnsense.clone(),
http_server: opnsense.clone(),
dhcp_server: opnsense.clone(),
dns_server: opnsense.clone(),
control_plane: vec![LogicalHost {
ip: ip!("10.100.8.20"),
name: "cp0".to_string(),
}],
bootstrap_host: LogicalHost {
ip: ip!("10.100.8.20"),
name: "cp0".to_string(),
},
workers: vec![],
switch: vec![],
};
let opnsense_auth = SecretManager::get_or_prompt::<OPNSenseFirewallCredentials>()
.await
.expect("Failed to get credentials");
let inventory = Inventory {
location: Location::new(
"232 des Éperviers, Wendake, Qc, G0A 4V0".to_string(),
"wk".to_string(),
let opnsense = OPNSenseFirewall::new(
firewall,
None,
&opnsense_auth.username,
&opnsense_auth.password,
)
.await;
let dhcp_score = DhcpScore {
dhcp_range: (
ipv4!("192.168.55.100").into(),
ipv4!("192.168.55.150").into(),
),
switch: SwitchGroup::from([]),
firewall_mgmt: Box::new(OPNSenseManagementInterface::new()),
storage_host: vec![],
worker_host: vec![],
control_plane_host: vec![
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("08:00:27:62:EC:C3")),
],
host_binding: vec![],
next_server: None,
boot_filename: None,
filename: None,
filename64: None,
filenameipxe: Some("filename.ipxe".to_string()),
domain: None,
};
// let dns_score = OKDDnsScore::new(&topology);
// let load_balancer_score = OKDLoadBalancerScore::new(&topology);
//
// let tftp_score = TftpScore::new(Url::LocalFolder("./data/watchguard/tftpboot".to_string()));
// let http_score = StaticFilesHttpScore {
// folder_to_serve: Some(Url::LocalFolder(
// "./data/watchguard/pxe-http-files".to_string(),
// )),
// files: vec![],
// remote_path: None,
// };
let opnsense_config = opnsense.get_opnsense_config();
// TODO regroup smaller scores in a larger one such as this
// let okd_boostrap_preparation();
let dhcp_score = OKDDhcpScore::new(&topology, &inventory);
let dns_score = OKDDnsScore::new(&topology);
let load_balancer_score = OKDLoadBalancerScore::new(&topology);
let tftp_score = TftpScore::new(Url::LocalFolder("./data/watchguard/tftpboot".to_string()));
let http_score = StaticFilesHttpScore {
folder_to_serve: Some(Url::LocalFolder(
"./data/watchguard/pxe-http-files".to_string(),
)),
files: vec![],
remote_path: None,
};
harmony_tui::run(
inventory,
topology,
harmony_cli::run(
Inventory::autoload(),
opnsense,
vec![
Box::new(dns_score),
Box::new(dhcp_score),
Box::new(load_balancer_score),
Box::new(tftp_score),
Box::new(http_score),
Box::new(OPNsenseShellCommandScore {
opnsense: opnsense.get_opnsense_config(),
command: "touch /tmp/helloharmonytouching".to_string(),
opnsense: opnsense_config,
command: "touch /tmp/helloharmonytouching_2".to_string(),
}),
Box::new(SuccessScore {}),
Box::new(ErrorScore {}),
Box::new(PanicScore {}),
],
None,
)
.await
.unwrap();
}
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
pub struct BrocadeSwitchAuth {
pub username: String,
pub password: String,
}

View File

@@ -0,0 +1,21 @@
[package]
name = "example-opnsense-node-exporter"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
harmony_secret = { path = "../../harmony_secret" }
harmony_secret_derive = { path = "../../harmony_secret_derive" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
serde.workspace = true
async-trait.workspace = true

View File

@@ -0,0 +1,80 @@
use std::{
net::{IpAddr, Ipv4Addr},
sync::Arc,
};
use async_trait::async_trait;
use cidr::Ipv4Cidr;
use harmony::{
executors::ExecutorError,
hardware::{HostCategory, Location, PhysicalHost, SwitchGroup},
infra::opnsense::OPNSenseManagementInterface,
inventory::Inventory,
modules::opnsense::node_exporter::NodeExporterScore,
topology::{
HAClusterTopology, LogicalHost, PreparationError, PreparationOutcome, Topology,
UnmanagedRouter, node_exporter::NodeExporter,
},
};
use harmony_macros::{ip, ipv4, mac_address};
#[derive(Debug)]
struct OpnSenseTopology {
node_exporter: Arc<dyn NodeExporter>,
}
#[async_trait]
impl Topology for OpnSenseTopology {
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
Ok(PreparationOutcome::Success {
details: "Success".to_string(),
})
}
fn name(&self) -> &str {
"OpnsenseTopology"
}
}
#[async_trait]
impl NodeExporter for OpnSenseTopology {
async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
self.node_exporter.ensure_initialized().await
}
async fn commit_config(&self) -> Result<(), ExecutorError> {
self.node_exporter.commit_config().await
}
async fn reload_restart(&self) -> Result<(), ExecutorError> {
self.node_exporter.reload_restart().await
}
}
#[tokio::main]
async fn main() {
let firewall = harmony::topology::LogicalHost {
ip: ip!("192.168.1.1"),
name: String::from("fw0"),
};
let opnsense = Arc::new(
harmony::infra::opnsense::OPNSenseFirewall::new(firewall, None, "root", "opnsense").await,
);
let topology = OpnSenseTopology {
node_exporter: opnsense.clone(),
};
let inventory = Inventory::empty();
let node_exporter_score = NodeExporterScore {};
harmony_cli::run(
inventory,
topology,
vec![Box::new(node_exporter_score)],
None,
)
.await
.unwrap();
}

View File

@@ -0,0 +1,18 @@
[package]
name = "example-postgresql"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }

View File

@@ -0,0 +1,26 @@
use harmony::{
inventory::Inventory,
modules::postgresql::{PostgreSQLScore, capability::PostgreSQLConfig},
topology::K8sAnywhereTopology,
};
#[tokio::main]
async fn main() {
let postgresql = PostgreSQLScore {
config: PostgreSQLConfig {
cluster_name: "harmony-postgres-example".to_string(), // Override default name
namespace: "harmony-postgres-example".to_string(),
..Default::default() // Use harmony defaults, they are based on CNPG's default values :
// "default" namespace, 1 instance, 1Gi storage
},
};
harmony_cli::run(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
vec![Box::new(postgresql)],
None,
)
.await
.unwrap();
}

View File

@@ -0,0 +1,18 @@
[package]
name = "example-public-postgres"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }

View File

@@ -0,0 +1,38 @@
use harmony::{
inventory::Inventory,
modules::postgresql::{
K8sPostgreSQLScore, PostgreSQLConnectionScore, PublicPostgreSQLScore,
capability::PostgreSQLConfig,
},
topology::K8sAnywhereTopology,
};
#[tokio::main]
async fn main() {
let postgres = PublicPostgreSQLScore {
config: PostgreSQLConfig {
cluster_name: "harmony-postgres-example".to_string(), // Override default name
namespace: "harmony-public-postgres".to_string(),
..Default::default() // Use harmony defaults, they are based on CNPG's default values :
// 1 instance, 1Gi storage
},
hostname: "postgrestest.sto1.nationtech.io".to_string(),
};
let test_connection = PostgreSQLConnectionScore {
name: "harmony-postgres-example".to_string(),
namespace: "harmony-public-postgres".to_string(),
cluster_name: "harmony-postgres-example".to_string(),
hostname: Some("postgrestest.sto1.nationtech.io".to_string()),
port_override: Some(443),
};
harmony_cli::run(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
vec![Box::new(postgres), Box::new(test_connection)],
None,
)
.await
.unwrap();
}

View File

@@ -1,4 +1,4 @@
use std::{path::PathBuf, sync::Arc};
use std::{collections::HashMap, path::PathBuf, sync::Arc};
use harmony::{
inventory::Inventory,
@@ -10,20 +10,22 @@ use harmony::{
},
topology::K8sAnywhereTopology,
};
use harmony_types::net::Url;
use harmony_types::{k8s_name::K8sName, net::Url};
#[tokio::main]
async fn main() {
let application = Arc::new(RustWebapp {
name: "test-rhob-monitoring".to_string(),
dns: "test-rhob-monitoring.harmony.mcd".to_string(),
project_root: PathBuf::from("./webapp"), // Relative from 'harmony-path' param
framework: Some(RustWebFramework::Leptos),
service_port: 3000,
});
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
name: K8sName("test-discord".to_string()),
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
selectors: vec![],
};
let app = ApplicationScore {

View File

@@ -1,3 +1,4 @@
Dockerfile.harmony
.harmony_generated
harmony
webapp

View File

@@ -1,4 +1,4 @@
use std::{path::PathBuf, sync::Arc};
use std::{collections::HashMap, path::PathBuf, sync::Arc};
use harmony::{
inventory::Inventory,
@@ -14,19 +14,22 @@ use harmony::{
topology::K8sAnywhereTopology,
};
use harmony_macros::hurl;
use harmony_types::k8s_name::K8sName;
#[tokio::main]
async fn main() {
let application = Arc::new(RustWebapp {
name: "harmony-example-rust-webapp".to_string(),
dns: "harmony-example-rust-webapp.harmony.mcd".to_string(),
project_root: PathBuf::from("./webapp"),
framework: Some(RustWebFramework::Leptos),
service_port: 3000,
});
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
name: K8sName("test-discord".to_string()),
url: hurl!("https://discord.doesnt.exist.com"),
selectors: vec![],
};
let webhook_receiver = WebhookReceiver {

View File

@@ -0,0 +1,7 @@
apiVersion: v2
name: harmony-example-rust-webapp-chart
description: A Helm chart for the harmony-example-rust-webapp web application.
type: application
version: 0.1.0
appVersion: "latest"

View File

@@ -0,0 +1,16 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "chart.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
*/}}
{{- define "chart.fullname" -}}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}

View File

@@ -0,0 +1,23 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "chart.fullname" . }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: {{ include "chart.name" . }}
template:
metadata:
labels:
app: {{ include "chart.name" . }}
spec:
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: 3000
protocol: TCP

View File

@@ -0,0 +1,35 @@
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "chart.fullname" . }}
annotations:
{{- toYaml .Values.ingress.annotations | nindent 4 }}
spec:
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
pathType: {{ .pathType }}
backend:
service:
name: {{ include "chart.fullname" $ }}
port:
number: 3000
{{- end }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "chart.fullname" . }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: 3000
protocol: TCP
name: http
selector:
app: {{ include "chart.name" . }}

View File

@@ -0,0 +1,34 @@
# Default values for harmony-example-rust-webapp-chart.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: hub.nationtech.io/harmony/harmony-example-rust-webapp
pullPolicy: IfNotPresent
# Overridden by the chart's appVersion
tag: "latest"
service:
type: ClusterIP
port: 3000
ingress:
enabled: true
# Annotations for cert-manager to handle SSL.
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-prod"
# Add other annotations like nginx ingress class if needed
# kubernetes.io/ingress.class: nginx
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls:
- secretName: harmony-example-rust-webapp-tls
hosts:
- chart-example.local

View File

@@ -0,0 +1,23 @@
[package]
name = "sttest"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
harmony_secret = { path = "../../harmony_secret" }
harmony_secret_derive = { path = "../../harmony_secret_derive" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
serde = { workspace = true }
brocade = { path = "../../brocade" }
schemars = "0.8"

1
examples/sttest/data Symbolic link
View File

@@ -0,0 +1 @@
../../data/

4
examples/sttest/env.sh Normal file
View File

@@ -0,0 +1,4 @@
export HARMONY_SECRET_NAMESPACE=sttest0
export HARMONY_SECRET_STORE=file
export HARMONY_DATABASE_URL=sqlite://harmony_sttest0.sqlite
export RUST_LOG=info

View File

@@ -0,0 +1,41 @@
mod topology;
use crate::topology::{get_inventory, get_topology};
use harmony::{
config::secret::SshKeyPair,
data::{FileContent, FilePath},
modules::{
inventory::HarmonyDiscoveryStrategy,
okd::{installation::OKDInstallationPipeline, ipxe::OKDIpxeScore},
},
score::Score,
topology::HAClusterTopology,
};
use harmony_secret::SecretManager;
#[tokio::main]
async fn main() {
// env_logger::init();
let inventory = get_inventory();
let topology = get_topology().await;
let ssh_key = SecretManager::get_or_prompt::<SshKeyPair>().await.unwrap();
let mut scores: Vec<Box<dyn Score<HAClusterTopology>>> = vec![Box::new(OKDIpxeScore {
kickstart_filename: "inventory.kickstart".to_string(),
harmony_inventory_agent: "harmony_inventory_agent".to_string(),
cluster_pubkey: FileContent {
path: FilePath::Relative("cluster_ssh_key.pub".to_string()),
content: ssh_key.public,
},
})];
// let mut scores: Vec<Box<dyn Score<HAClusterTopology>>> = vec![];
scores
.append(&mut OKDInstallationPipeline::get_all_scores(HarmonyDiscoveryStrategy::MDNS).await);
harmony_cli::run(inventory, topology, scores, None)
.await
.unwrap();
}

Some files were not shown because too many files have changed in this diff Show More