Compare commits

..

3 Commits

Author SHA1 Message Date
613def5e0b feat: depoloys cluster monitoring stack from monitoring score on k8sanywhere topology
All checks were successful
Run Check Script / check (push) Successful in 1m46s
Run Check Script / check (pull_request) Successful in 1m47s
2025-06-11 15:06:39 -04:00
238d1f85e2 wip: impl k8sMonitor
Some checks failed
Run Check Script / check (push) Failing after 45s
Run Check Script / check (pull_request) Failing after 42s
2025-06-11 13:35:07 -04:00
dbc66f3d0c feat: setup basic structure to for the concrete implementation of kube prometheus monitor, removed discord webhook receiver trait as the dependency is no longer required for prometheus to interact with discord
All checks were successful
Run Check Script / check (push) Successful in 1m47s
Run Check Script / check (pull_request) Successful in 1m49s
2025-06-06 16:41:17 -04:00
112 changed files with 1250 additions and 5737 deletions

View File

@@ -1,2 +0,0 @@
target/
Dockerfile

View File

@@ -1,15 +1,11 @@
name: Run Check Script
on:
push:
branches:
- master
pull_request:
jobs:
check:
runs-on: docker
container:
image: hub.nationtech.io/harmony/harmony_composer:latest@sha256:eb0406fcb95c63df9b7c4b19bc50ad7914dd8232ce98e9c9abef628e07c69386
runs-on: rust-cargo
steps:
- name: Checkout code
uses: actions/checkout@v4

View File

@@ -1,95 +0,0 @@
name: Compile and package harmony_composer
on:
push:
branches:
- master
jobs:
package_harmony_composer:
container:
image: hub.nationtech.io/harmony/harmony_composer:latest@sha256:eb0406fcb95c63df9b7c4b19bc50ad7914dd8232ce98e9c9abef628e07c69386
runs-on: dind
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Build for Linux x86_64
run: cargo build --release --bin harmony_composer --target x86_64-unknown-linux-gnu
- name: Build for Windows x86_64 GNU
run: cargo build --release --bin harmony_composer --target x86_64-pc-windows-gnu
- name: Setup log into hub.nationtech.io
uses: docker/login-action@v3
with:
registry: hub.nationtech.io
username: ${{ secrets.HUB_BOT_USER }}
password: ${{ secrets.HUB_BOT_PASSWORD }}
# TODO: build ARM images and MacOS binaries (or other targets) too
- name: Update snapshot-latest tag
run: |
git config user.name "Gitea CI"
git config user.email "ci@nationtech.io"
git tag -f snapshot-latest
git push origin snapshot-latest --force
- name: Install jq
run: apt install -y jq # The current image includes apt lists so we don't have to apt update and rm /var/lib/apt... every time. But if the image is optimized it won't work anymore
- name: Create or update release
run: |
# First, check if release exists and delete it if it does
RELEASE_ID=$(curl -s -X GET \
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/tags/snapshot-latest" \
| jq -r '.id // empty')
if [ -n "$RELEASE_ID" ]; then
# Delete existing release
curl -X DELETE \
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/$RELEASE_ID"
fi
# Create new release
RESPONSE=$(curl -X POST \
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
-H "Content-Type: application/json" \
-d '{
"tag_name": "snapshot-latest",
"name": "Latest Snapshot",
"body": "Automated snapshot build from master branch",
"draft": false,
"prerelease": true
}' \
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases")
echo "RELEASE_ID=$(echo $RESPONSE | jq -r '.id')" >> $GITHUB_ENV
- name: Upload Linux binary
run: |
curl -X POST \
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
-H "Content-Type: application/octet-stream" \
--data-binary "@target/x86_64-unknown-linux-gnu/release/harmony_composer" \
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/${{ env.RELEASE_ID }}/assets?name=harmony_composer"
- name: Upload Windows binary
run: |
curl -X POST \
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
-H "Content-Type: application/octet-stream" \
--data-binary "@target/x86_64-pc-windows-gnu/release/harmony_composer.exe" \
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/${{ env.RELEASE_ID }}/assets?name=harmony_composer.exe"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: hub.nationtech.io/harmony/harmony_composer:latest

1126
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -11,7 +11,6 @@ members = [
"opnsense-config-xml",
"harmony_cli",
"k3d",
"harmony_composer",
]
[workspace.package]
@@ -20,35 +19,28 @@ readme = "README.md"
license = "GNU AGPL v3"
[workspace.dependencies]
log = "0.4"
env_logger = "0.11"
derive-new = "0.7"
async-trait = "0.1"
tokio = { version = "1.40", features = [
"io-std",
"fs",
"macros",
"rt-multi-thread",
] }
cidr = { features = ["serde"], version = "0.2" }
russh = "0.45"
russh-keys = "0.45"
rand = "0.8"
url = "2.5"
kube = { version = "1.1.0", features = [
"config",
"client",
"runtime",
"rustls-tls",
"ws",
"jsonpatch",
] }
k8s-openapi = { version = "0.25", features = ["v1_30"] }
serde_yaml = "0.9"
serde-value = "0.7"
http = "1.2"
inquire = "0.7"
convert_case = "0.8"
chrono = "0.4"
similar = "2"
uuid = { version = "1.11", features = ["v4", "fast-rng", "macro-diagnostics"] }
log = "0.4.22"
env_logger = "0.11.5"
derive-new = "0.7.0"
async-trait = "0.1.82"
tokio = { version = "1.40.0", features = ["io-std", "fs", "macros", "rt-multi-thread"] }
cidr = "0.2.3"
russh = "0.45.0"
russh-keys = "0.45.0"
rand = "0.8.5"
url = "2.5.4"
kube = "0.98.0"
k8s-openapi = { version = "0.24.0", features = ["v1_30"] }
serde_yaml = "0.9.34"
serde-value = "0.7.0"
http = "1.2.0"
inquire = "0.7.5"
convert_case = "0.8.0"
[workspace.dependencies.uuid]
version = "1.11.0"
features = [
"v4", # Lets you generate random UUIDs
"fast-rng", # Use a faster (but still sufficiently random) RNG
"macro-diagnostics", # Enable better diagnostics for compile-time UUIDs
]

View File

@@ -1,25 +0,0 @@
FROM docker.io/rust:1.87.0 AS build
WORKDIR /app
COPY . .
RUN cargo build --release --bin harmony_composer
FROM docker.io/rust:1.87.0
WORKDIR /app
RUN rustup target add x86_64-pc-windows-gnu
RUN rustup target add x86_64-unknown-linux-gnu
RUN rustup component add rustfmt
RUN apt update
# TODO: Consider adding more supported targets
# nodejs for checkout action, docker for building containers, mingw for cross-compiling for windows
RUN apt install -y nodejs docker.io mingw-w64
COPY --from=build /app/target/release/harmony_composer .
ENTRYPOINT ["/app/harmony_composer"]

252
README.md
View File

@@ -1,151 +1,171 @@
# Harmony : Open-source infrastructure orchestration that treats your platform like first-class code.
*By [NationTech](https://nationtech.io)*
# Harmony : Open Infrastructure Orchestration
[![Build](https://git.nationtech.io/NationTech/harmony/actions/workflows/check.yml/badge.svg)](https://git.nationtech.io/nationtech/harmony)
[![License](https://img.shields.io/badge/license-AGPLv3-blue?style=flat-square)](LICENSE)
## Quick demo
### Unify
`cargo run -p example-tui`
- **Project Scaffolding**
- **Infrastructure Provisioning**
- **Application Deployment**
- **Day-2 operations**
This will launch Harmony's minimalist terminal ui which embeds a few demo scores.
All in **one strongly-typed Rust codebase**.
Usage instructions will be displayed at the bottom of the TUI.
### Deploy anywhere
`cargo run --bin example-cli -- --help`
From a **developer laptop** to a **global production cluster**, a single **source of truth** drives the **full software lifecycle.**
This is the harmony CLI, a minimal implementation
---
The current help text:
## 1 · The Harmony Philosophy
````
Usage: example-cli [OPTIONS]
Infrastructure is essential, but it shouldnt be your core business. Harmony is built on three guiding principles that make modern platforms reliable, repeatable, and easy to reason about.
Options:
-y, --yes Run score(s) or not
-f, --filter <FILTER> Filter query
-i, --interactive Run interactive TUI or not
-a, --all Run all or nth, defaults to all
-n, --number <NUMBER> Run nth matching, zero indexed [default: 0]
-l, --list list scores, will also be affected by run filter
-h, --help Print help
-V, --version Print version```
| Principle | What it means for you |
|-----------|-----------------------|
| **Infrastructure as Resilient Code** | Replace sprawling YAML and bash scripts with type-safe Rust. Test, refactor, and version your platform just like application code. |
| **Prove It Works — Before You Deploy** | Harmony uses the compiler to verify that your applications needs match the target environments capabilities at **compile-time**, eliminating an entire class of runtime outages. |
| **One Unified Model** | Software and infrastructure are a single system. Harmony models them together, enabling deep automation—from bare-metal servers to Kubernetes workloads—with zero context switching. |
## Core architecture
These principles surface as simple, ergonomic Rust APIs that let teams focus on their product while trusting the platform underneath.
![Harmony Core Architecture](docs/diagrams/Harmony_Core_Architecture.drawio.svg)
````
## Supporting a new field in OPNSense `config.xml`
---
Two steps:
- Supporting the field in `opnsense-config-xml`
- Enabling Harmony to control the field
## 2 · Quick Start
We'll use the `filename` field in the `dhcpcd` section of the file as an example.
The snippet below spins up a complete **production-grade LAMP stack** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
### Supporting the field
```rust
use harmony::{
data::Version,
inventory::Inventory,
maestro::Maestro,
modules::{
lamp::{LAMPConfig, LAMPScore},
monitoring::monitoring_alerting::MonitoringAlertingStackScore,
},
topology::{K8sAnywhereTopology, Url},
};
As type checking if enforced, every field from `config.xml` must be known by the code. Each subsection of `config.xml` has its `.rs` file. For the `dhcpcd` section, we'll modify `opnsense-config-xml/src/data/dhcpd.rs`.
#[tokio::main]
async fn main() {
// 1. Describe what you want
let lamp_stack = LAMPScore {
name: "harmony-lamp-demo".into(),
domain: Url::Url(url::Url::parse("https://lampdemo.example.com").unwrap()),
php_version: Version::from("8.3.0").unwrap(),
config: LAMPConfig {
project_root: "./php".into(),
database_size: "4Gi".into(),
..Default::default()
},
};
When a new field appears in the xml file, an error like this will be thrown and Harmony will panic :
```
Running `/home/stremblay/nt/dir/harmony/target/debug/example-nanodc`
Found unauthorized element filename
thread 'main' panicked at opnsense-config-xml/src/data/opnsense.rs:54:14:
OPNSense received invalid string, should be full XML: ()
// 2. Pick where it should run
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(), // auto-detect hardware / kube-config
K8sAnywhereTopology::from_env(), // local k3d, CI, staging, prod…
)
.await
.unwrap();
// 3. Enhance with extra scores (monitoring, CI/CD, …)
let mut monitoring = MonitoringAlertingStackScore::new();
monitoring.namespace = Some(lamp_stack.config.namespace.clone());
maestro.register_all(vec![Box::new(lamp_stack), Box::new(monitoring)]);
// 4. Launch an interactive CLI / TUI
harmony_cli::init(maestro, None).await.unwrap();
}
```
Run it:
```bash
cargo run
Define the missing field (`filename`) in the `DhcpInterface` struct of `opnsense-config-xml/src/data/dhcpd.rs`:
```
pub struct DhcpInterface {
...
pub filename: Option<String>,
```
Harmony analyses the code, shows an execution plan in a TUI, and applies it once you confirm. Same code, same binary—every environment.
Harmony should now be fixed, build and run.
---
### Controlling the field
## 3 · Core Concepts
| Term | One-liner |
|------|-----------|
| **Score<T>** | Declarative description of the desired state (e.g., `LAMPScore`). |
| **Interpret<T>** | Imperative logic that realises a `Score` on a specific environment. |
| **Topology** | An environment (local k3d, AWS, bare-metal) exposing verified *Capabilities* (Kubernetes, DNS, …). |
| **Maestro** | Orchestrator that compiles Scores + Topology, ensuring all capabilities line up **at compile-time**. |
| **Inventory** | Optional catalogue of physical assets for bare-metal and edge deployments. |
A visual overview is in the diagram below.
[Harmony Core Architecture](docs/diagrams/Harmony_Core_Architecture.drawio.svg)
---
## 4 · Install
Prerequisites:
* Rust
* Docker (if you deploy locally)
* `kubectl` / `helm` for Kubernetes-based topologies
```bash
git clone https://git.nationtech.io/nationtech/harmony
cd harmony
cargo build --release # builds the CLI, TUI and libraries
Define the `xml field setter` in `opnsense-config/src/modules/dhcpd.rs`.
```
impl<'a> DhcpConfig<'a> {
...
pub fn set_filename(&mut self, filename: &str) {
self.enable_netboot();
self.get_lan_dhcpd().filename = Some(filename.to_string());
}
...
```
---
Define the `value setter` in the `DhcpServer trait` in `domain/topology/network.rs`
```
#[async_trait]
pub trait DhcpServer: Send + Sync {
...
async fn set_filename(&self, filename: &str) -> Result<(), ExecutorError>;
...
```
## 5 · Learning More
Implement the `value setter` in each `DhcpServer` implementation.
`infra/opnsense/dhcp.rs`:
```
#[async_trait]
impl DhcpServer for OPNSenseFirewall {
...
async fn set_filename(&self, filename: &str) -> Result<(), ExecutorError> {
{
let mut writable_opnsense = self.opnsense_config.write().await;
writable_opnsense.dhcp().set_filename(filename);
debug!("OPNsense dhcp server set filename {filename}");
}
* **Architectural Decision Records** dive into the rationale
- [ADR-001 · Why Rust](adr/001-rust.md)
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
- [ADR-006 · Secret Management](adr/006-secret-management.md)
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
Ok(())
}
...
```
* **Extending Harmony** write new Scores / Interprets, add hardware like OPNsense firewalls, or embed Harmony in your own tooling (`/docs`).
`domain/topology/ha_cluster.rs`
```
#[async_trait]
impl DhcpServer for DummyInfra {
...
async fn set_filename(&self, _filename: &str) -> Result<(), ExecutorError> {
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
}
...
```
* **Community** discussions and roadmap live in [GitLab issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
Add the new field to the DhcpScore in `modules/dhcp.rs`
```
pub struct DhcpScore {
...
pub filename: Option<String>,
```
---
Define it in its implementation in `modules/okd/dhcp.rs`
```
impl OKDDhcpScore {
...
Self {
dhcp_score: DhcpScore {
...
filename: Some("undionly.kpxe".to_string()),
```
## 6 · License
Define it in its implementation in `modules/okd/bootstrap_dhcp.rs`
```
impl OKDDhcpScore {
...
Self {
dhcp_score: DhcpScore::new(
...
Some("undionly.kpxe".to_string()),
```
Harmony is released under the **GNU AGPL v3**.
Update the interpret (function called by the `execute` fn of the interpret) so it now updates the `filename` field value in `modules/dhcp.rs`
```
impl DhcpInterpret {
...
let filename_outcome = match &self.score.filename {
Some(filename) => {
let dhcp_server = Arc::new(topology.dhcp_server.clone());
dhcp_server.set_filename(&filename).await?;
Outcome::new(
InterpretStatus::SUCCESS,
format!("Dhcp Interpret Set filename to {filename}"),
)
}
None => Outcome::noop(),
};
> We choose a strong copyleft license to ensure the project—and every improvement to it—remains open and benefits the entire community. Fork it, enhance it, even out-innovate us; just keep it open.
if next_server_outcome.status == InterpretStatus::NOOP
&& boot_filename_outcome.status == InterpretStatus::NOOP
&& filename_outcome.status == InterpretStatus::NOOP
See [LICENSE](LICENSE) for the full text.
...
---
*Made with ❤️ & 🦀 by the NationTech and the Harmony community*
Ok(Outcome::new(
InterpretStatus::SUCCESS,
format!(
"Dhcp Interpret Set next boot to [{:?}], boot_filename to [{:?}], filename to [{:?}]",
self.score.boot_filename, self.score.boot_filename, self.score.filename
)
...
```

View File

@@ -1,73 +0,0 @@
pub trait MonitoringSystem {}
// 1. Modified AlertReceiver trait:
// - Removed the problematic `clone` method.
// - Added `box_clone` which returns a Box<dyn AlertReceiver>.
pub trait AlertReceiver {
type M: MonitoringSystem;
fn install(&self, sender: &Self::M) -> Result<(), String>;
// This method allows concrete types to clone themselves into a Box<dyn AlertReceiver>
fn box_clone(&self) -> Box<dyn AlertReceiver<M = Self::M>>;
}
#[derive(Clone)]
struct Prometheus{}
impl MonitoringSystem for Prometheus {}
#[derive(Clone)] // Keep derive(Clone) for DiscordWebhook itself
struct DiscordWebhook{}
impl AlertReceiver for DiscordWebhook {
type M = Prometheus;
fn install(&self, sender: &Self::M) -> Result<(), String> {
// Placeholder for actual installation logic
println!("DiscordWebhook installed for Prometheus monitoring.");
Ok(())
}
// 2. Implement `box_clone` for DiscordWebhook:
// This uses the derived `Clone` for DiscordWebhook to create a new boxed instance.
fn box_clone(&self) -> Box<dyn AlertReceiver<M = Self::M>> {
Box::new(self.clone())
}
}
// 3. Implement `std::clone::Clone` for `Box<dyn AlertReceiver<M= M>>`:
// This allows `Box<dyn AlertReceiver>` to be cloned.
// The `+ 'static` lifetime bound is often necessary for trait objects stored in collections,
// ensuring they live long enough.
impl<M: MonitoringSystem + 'static> Clone for Box<dyn AlertReceiver<M= M>> {
fn clone(&self) -> Self {
self.box_clone() // Call the custom `box_clone` method
}
}
// MonitoringConfig can now derive Clone because its `receivers` field
// (Vec<Box<dyn AlertReceiver<M = M>>>) is now cloneable.
#[derive(Clone)]
struct MonitoringConfig <M: MonitoringSystem + 'static>{
receivers: Vec<Box<dyn AlertReceiver<M = M>>>
}
// Example usage to demonstrate compilation and functionality
fn main() {
let prometheus_instance = Prometheus{};
let discord_webhook_instance = DiscordWebhook{};
let mut config = MonitoringConfig {
receivers: Vec::new()
};
// Create a boxed alert receiver
let boxed_receiver: Box<dyn AlertReceiver<M = Prometheus>> = Box::new(discord_webhook_instance);
config.receivers.push(boxed_receiver);
// Clone the config, which will now correctly clone the boxed receiver
let cloned_config = config.clone();
println!("Original config has {} receivers.", config.receivers.len());
println!("Cloned config has {} receivers.", cloned_config.receivers.len());
// Example of using the installed receiver
if let Some(receiver) = config.receivers.get(0) {
let _ = receiver.install(&prometheus_instance);
}
}

View File

@@ -137,9 +137,8 @@ Our approach addresses both customer and team multi-tenancy requirements:
### Implementation Roadmap
1. **Phase 1**: Implement VPN access and manual tenant provisioning
2. **Phase 2**: Deploy TenantScore automation for namespace, RBAC, and NetworkPolicy management
4. **Phase 3**: Work on privilege escalation from pods, audit for weaknesses, enforce security policies on pod runtimes
3. **Phase 4**: Integrate Keycloak for centralized identity management
4. **Phase 5**: Add advanced monitoring and per-tenant observability
3. **Phase 3**: Integrate Keycloak for centralized identity management
4. **Phase 4**: Add advanced monitoring and per-tenant observability
### TenantScore Structure Preview
```rust

View File

@@ -1,41 +0,0 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: tenant-isolation-policy
namespace: testtenant
spec:
podSelector: {} # Selects all pods in the namespace
policyTypes:
- Ingress
- Egress
ingress:
- from:
- podSelector: {} # Allow from all pods in the same namespace
egress:
- to:
- podSelector: {} # Allow to all pods in the same namespace
- to:
- podSelector: {}
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: openshift-dns # Target the openshift-dns namespace
# Note, only opening port 53 is not enough, will have to dig deeper into this one eventually
# ports:
# - protocol: UDP
# port: 53
# - protocol: TCP
# port: 53
# Allow egress to public internet only
- to:
- ipBlock:
cidr: 0.0.0.0/0
except:
- 10.0.0.0/8 # RFC1918
- 172.16.0.0/12 # RFC1918
- 192.168.0.0/16 # RFC1918
- 169.254.0.0/16 # Link-local
- 127.0.0.0/8 # Loopback
- 224.0.0.0/4 # Multicast
- 240.0.0.0/4 # Reserved
- 100.64.0.0/10 # Carrier-grade NAT
- 0.0.0.0/8 # Reserved

View File

@@ -1,95 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: testtenant
---
apiVersion: v1
kind: Namespace
metadata:
name: testtenant2
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: test-web
namespace: testtenant
spec:
replicas: 1
selector:
matchLabels:
app: test-web
template:
metadata:
labels:
app: test-web
spec:
containers:
- name: nginx
image: nginxinc/nginx-unprivileged
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: test-web
namespace: testtenant
spec:
selector:
app: test-web
ports:
- port: 80
targetPort: 8080
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: test-client
namespace: testtenant
spec:
replicas: 1
selector:
matchLabels:
app: test-client
template:
metadata:
labels:
app: test-client
spec:
containers:
- name: curl
image: curlimages/curl:latest
command: ["/bin/sh", "-c", "sleep 3600"]
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: test-web
namespace: testtenant2
spec:
replicas: 1
selector:
matchLabels:
app: test-web
template:
metadata:
labels:
app: test-web
spec:
containers:
- name: nginx
image: nginxinc/nginx-unprivileged
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: test-web
namespace: testtenant2
spec:
selector:
app: test-web
ports:
- port: 80
targetPort: 8080

View File

@@ -1,63 +0,0 @@
# Architecture Decision Record: \<Title\>
Initial Author: Jean-Gabriel Gill-Couture
Initial Date: 2025-06-04
Last Updated Date: 2025-06-04
## Status
Proposed
## Context
As Harmony's goal is to make software delivery easier, we must provide an easy way for developers to express their app's semantics and dependencies with great abstractions, in a similar fashion to what the score.dev project is doing.
Thus, we started working on ways to package common types of applications such as LAMP, which we started working on with `LAMPScore`.
Now is time for the next step : we want to pave the way towards complete lifecycle automation. To do this, we will start with a way to execute Harmony's modules easily from anywhere, starting with locally and in CI environments.
## Decision
To achieve easy, portable execution of Harmony, we will follow this architecture :
- Host a basic harmony release that is compiled with the CLI by our gitea/github server
- This binary will do the following : check if there is a `harmony` folder in the current path
- If yes
- Check if cargo is available locally and compile the harmony binary, or compile the harmony binary using a rust docker container, if neither cargo or a container runtime is available, output a message explaining the situation
- Run the newly compiled binary. (Ideally using pid handoff like exec does but some research around this should be done. I think handing off the process is to help with OS interaction such as terminal apps, signals, exit codes, process handling, etc but there might be some side effects)
- If not
- Suggest initializing a project by auto detecting what the project looks like
- When the project type cannot be auto detected, provide links to Harmony's documentation on how to set up a project, a link to the examples folder, and a ask the user if he wants to initialize an empty Harmony project in the current folder
- harmony/Cargo.toml with dependencies set
- harmony/src/main.rs with an example LAMPScore setup and ready to run
- This same binary can be used in a CI environment to run the target project's Harmony module. By default, we provide these opinionated steps :
1. **An empty check step.** The purpose of this step is to run all tests and checks against the codebase. For complex projects this could involve a very complex pipeline of test environments setup and execution but this is out of scope for now. This is not handled by harmony. For projects with automatic setup, we can fill this step with something like `cargo fmt --check; cargo test; cargo build` but Harmony is not directly involved in the execution of this step.
2. **Package and publish.** Once all checks have passed, the production ready container is built and pushed to a registry. This is done by Harmony.
3. **Deploy to staging automatically.**
4. **Run a sanity check on staging.** As Harmony is responsible for deploying, Harmony should have all the knowledge of how to perform a sanity check on the staging environment. This will, most of the time, be a simple verification of the kubernetes health of all deployed components, and a poke on the public endpoint when there is one.
5. **Deploy to production automatically.** Many projects will require manual approval here, this can be easily set up in the CI afterwards, but our opinion is that
6. **Run a sanity check on production.** Same check as staging, but on production.
*Note on providing a base pipeline :* Having a complete pipeline set up automatically will encourage development teams to build upon these by adding tests where they belong. The goal here is to provide an opiniated solution that works for most small and large projects. Of course, many orgnizations will need to add steps such as deploying to sandbox environments, requiring more advanced approvals, more complex publication and coordination with other projects. But this here encompasses the basics required to build and deploy software reliably at any scale.
### Environment setup
TBD : For now, environments (tenants) will be set up and configured manually. Harmony will rely on the kubeconfig provided in the environment where it is running to deploy in the namespace.
For the CD tool such as Argo or Flux they will be activated by default by Harmony when using application level Scores such as LAMPScore in a similar way that the container is automatically built. Then, CI deployment steps will be notifying the CD tool using its API of the new release to deploy.
## Rationale
Reasoning behind the decision
## Consequences
Pros/Cons of chosen solution
## Alternatives considered
Pros/Cons of various proposed solutions considered
## Additional Notes

View File

@@ -1,78 +0,0 @@
# Architecture Decision Record: Monitoring Notifications
Initial Author: Taha Hawa
Initial Date: 2025-06-26
Last Updated Date: 2025-06-26
## Status
Proposed
## Context
We need to send notifications (typically from AlertManager/Prometheus) and we need to receive said notifications on mobile devices for sure in some way, whether it's push messages, SMS, phone call, email, etc or all of the above.
## Decision
We should go with https://ntfy.sh except host it ourselves.
`ntfy` is an open source solution written in Go that has the features we need.
## Rationale
`ntfy` has pretty much everything we need (push notifications, email forwarding, receives via webhook), and nothing/not much we don't. Good fit, lightweight.
## Consequences
Pros:
- topics, with ACLs
- lightweight
- reliable
- easy to configure
- mobile app
- the mobile app can listen via websocket, poll, or receive via Firebase/GCM on Android, or similar on iOS.
- Forward to email
- Text-to-Speech phone call messages using Twilio integration
- Operates based on simple HTTP requests/Webhooks, easily usable via AlertManager
Cons:
- No SMS pushes
- SQLite DB, makes it harder to HA/scale
## Alternatives considered
[AWS SNS](https://aws.amazon.com/sns/):
Pros:
- highly reliable
- no hosting needed
Cons:
- no control, not self hosted
- costs (per usage)
[Apprise](https://github.com/caronc/apprise):
Pros:
- Way more ways of sending notifications
- Can use ntfy as one of the backends/ways of sending
Cons:
- Way too overkill for what we need in terms of features
[Gotify](https://github.com/gotify/server):
Pros:
- simple, lightweight, golang, etc
Cons:
- Pushes topics are per-user
## Additional Notes

View File

@@ -1 +0,0 @@
Not much here yet, see the `adr` folder for now. More to come in time!

View File

@@ -1,13 +0,0 @@
## Conceptual metaphor : The Cyborg and the Central Nervous System
At the heart of Harmony lies a core belief: in modern, decentralized systems, **software and infrastructure are not separate entities.** They are a single, symbiotic organism—a cyborg.
The software is the electronics, the "mind"; the infrastructure is the biological host, the "body". They live or die, thrive or sink together.
Traditional approaches attempt to manage this complex organism with fragmented tools: static YAML for configuration, brittle scripts for automation, and separate Infrastructure as Code (IaC) for provisioning. This creates a disjointed system that struggles to scale or heal itself, making it inadequate for the demands of fully automated, enterprise-grade clusters.
Harmony's goal is to provide the **central nervous system for this cyborg**. We aim to achieve the full automation of complex, decentralized clouds by managing this integrated entity holistically.
To achieve this, a tool must be both robust and powerful. It must manage the entire lifecycle—deployment, upgrades, failure recovery, and decommissioning—with precision. This requires full control over application packaging and a deep, intrinsic integration between the software and the infrastructure it inhabits.
This is why Harmony uses a powerful, living language like Rust. It replaces static, lifeless configuration files with a dynamic, breathing codebase. It allows us to express the complex relationships and behaviors of a modern distributed system, enabling the creation of truly automated, resilient, and powerful platforms that can thrive.

View File

@@ -14,8 +14,8 @@ harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
kube = "1.1.0"
k8s-openapi = { version = "0.25.0", features = ["v1_30"] }
kube = "0.98.0"
k8s-openapi = { version = "0.24.0", features = [ "v1_30" ] }
http = "1.2.0"
serde_yaml = "0.9.34"
inquire.workspace = true

View File

@@ -29,23 +29,13 @@ async fn main() {
},
};
//let monitoring = MonitoringAlertingScore {
// alert_receivers: vec![Box::new(DiscordWebhook {
// url: Url::Url(url::Url::parse("https://discord.idonotexist.com").unwrap()),
// // TODO write url macro
// // url: url!("https://discord.idonotexist.com"),
// })],
// alert_rules: vec![],
// scrape_targets: vec![],
//};
// You can choose the type of Topology you want, we suggest starting with the
// K8sAnywhereTopology as it is the most automatic one that enables you to easily deploy
// locally, to development environment from a CI, to staging, and to production with settings
// that automatically adapt to each environment grade.
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
K8sAnywhereTopology::new(),
)
.await
.unwrap();

View File

@@ -1,5 +1,5 @@
[package]
name = "example-monitoring"
name = "webhook_sender"
edition = "2024"
version.workspace = true
readme.workspace = true
@@ -8,6 +8,5 @@ license.workspace = true
[dependencies]
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
harmony_macros = { version = "0.1.0", path = "../../harmony_macros" }
tokio.workspace = true
url.workspace = true

View File

@@ -1,86 +1,23 @@
use std::collections::HashMap;
use harmony::{
inventory::Inventory,
maestro::Maestro,
modules::{
monitoring::{
alert_channel::discord_alert_channel::DiscordWebhook,
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
kube_prometheus::{
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
types::{
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
ServiceMonitorEndpoint,
},
},
},
prometheus::alerts::{
infra::dell_server::{
alert_global_storage_status_critical, alert_global_storage_status_non_recoverable,
global_storage_status_degraded_non_critical,
},
k8s::pvc::high_pvc_fill_rate_over_two_days,
},
},
topology::{K8sAnywhereTopology, Url},
modules::monitoring::monitoring_alerting::MonitoringAlertingScore,
topology::{K8sAnywhereTopology, oberservability::K8sMonitorConfig},
};
#[tokio::main]
async fn main() {
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
};
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
let dell_system_storage_degraded = global_storage_status_degraded_non_critical();
let alert_global_storage_status_critical = alert_global_storage_status_critical();
let alert_global_storage_status_non_recoverable = alert_global_storage_status_non_recoverable();
let additional_rules =
AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]);
let additional_rules2 = AlertManagerRuleGroup::new(
"dell-server-alerts",
vec![
dell_system_storage_degraded,
alert_global_storage_status_critical,
alert_global_storage_status_non_recoverable,
],
);
let service_monitor_endpoint = ServiceMonitorEndpoint {
port: Some("80".to_string()),
path: "/metrics".to_string(),
scheme: HTTPScheme::HTTP,
..Default::default()
};
let service_monitor = ServiceMonitor {
name: "test-service-monitor".to_string(),
selector: Selector {
match_labels: HashMap::new(),
match_expressions: vec![MatchExpression {
key: "test".to_string(),
operator: Operator::In,
values: vec!["test-service".to_string()],
}],
},
endpoints: vec![service_monitor_endpoint],
..Default::default()
};
let alerting_score = HelmPrometheusAlertingScore {
receivers: vec![Box::new(discord_receiver)],
rules: vec![Box::new(additional_rules), Box::new(additional_rules2)],
service_monitors: vec![service_monitor],
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
K8sAnywhereTopology::new(),
)
.await
.unwrap();
maestro.register_all(vec![Box::new(alerting_score)]);
let monitoring = MonitoringAlertingScore {
alert_channel_configs: None,
};
maestro.register_all(vec![Box::new(monitoring)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@@ -1,13 +0,0 @@
[package]
name = "example-monitoring-with-tenant"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
cidr.workspace = true
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true
url.workspace = true

View File

@@ -1,90 +0,0 @@
use std::collections::HashMap;
use harmony::{
data::Id,
inventory::Inventory,
maestro::Maestro,
modules::{
monitoring::{
alert_channel::discord_alert_channel::DiscordWebhook,
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
kube_prometheus::{
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
types::{
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
ServiceMonitorEndpoint,
},
},
},
prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days,
tenant::TenantScore,
},
topology::{
K8sAnywhereTopology, Url,
tenant::{ResourceLimits, TenantConfig, TenantNetworkPolicy},
},
};
#[tokio::main]
async fn main() {
let tenant = TenantScore {
config: TenantConfig {
id: Id::from_string("1234".to_string()),
name: "test-tenant".to_string(),
resource_limits: ResourceLimits {
cpu_request_cores: 6.0,
cpu_limit_cores: 4.0,
memory_request_gb: 4.0,
memory_limit_gb: 4.0,
storage_total_gb: 10.0,
},
network_policy: TenantNetworkPolicy::default(),
},
};
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
};
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
let additional_rules =
AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]);
let service_monitor_endpoint = ServiceMonitorEndpoint {
port: Some("80".to_string()),
path: "/metrics".to_string(),
scheme: HTTPScheme::HTTP,
..Default::default()
};
let service_monitor = ServiceMonitor {
name: "test-service-monitor".to_string(),
selector: Selector {
match_labels: HashMap::new(),
match_expressions: vec![MatchExpression {
key: "test".to_string(),
operator: Operator::In,
values: vec!["test-service".to_string()],
}],
},
endpoints: vec![service_monitor_endpoint],
..Default::default()
};
let alerting_score = HelmPrometheusAlertingScore {
receivers: vec![Box::new(discord_receiver)],
rules: vec![Box::new(additional_rules)],
service_monitors: vec![service_monitor],
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
)
.await
.unwrap();
maestro.register_all(vec![Box::new(tenant), Box::new(alerting_score)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@@ -1,12 +0,0 @@
[package]
name = "example-ntfy"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true
url.workspace = true

View File

@@ -1,19 +0,0 @@
use harmony::{
inventory::Inventory, maestro::Maestro, modules::monitoring::ntfy::ntfy::NtfyScore,
topology::K8sAnywhereTopology,
};
#[tokio::main]
async fn main() {
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
)
.await
.unwrap();
maestro.register_all(vec![Box::new(NtfyScore {
namespace: "monitoring".to_string(),
})]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@@ -1,10 +0,0 @@
[package]
name = "example-postgres"
version = "0.1.0"
edition = "2021"
[dependencies]
harmony = { path = "../../harmony" }
tokio = { version = "1", features = ["full"] }
serde = { version = "1.0", features = ["derive"] }
async-trait = "0.1.80"

View File

@@ -1,84 +0,0 @@
use async_trait::async_trait;
use harmony::{
data::{PostgresDatabase, PostgresUser},
interpret::InterpretError,
inventory::Inventory,
maestro::Maestro,
modules::postgres::PostgresScore,
topology::{PostgresServer, Topology},
};
use std::error::Error;
#[derive(Debug, Clone)]
struct MockTopology;
#[async_trait]
impl Topology for MockTopology {
fn name(&self) -> &str {
"MockTopology"
}
async fn ensure_ready(&self) -> Result<harmony::interpret::Outcome, InterpretError> {
Ok(harmony::interpret::Outcome::new(
harmony::interpret::InterpretStatus::SUCCESS,
"Mock topology is always ready".to_string(),
))
}
}
#[async_trait]
impl PostgresServer for MockTopology {
async fn ensure_users_exist(&self, users: Vec<PostgresUser>) -> Result<(), InterpretError> {
println!("Ensuring users exist:");
for user in users {
println!(" - {}: {}", user.name, user.password);
}
Ok(())
}
async fn ensure_databases_exist(
&self,
databases: Vec<PostgresDatabase>,
) -> Result<(), InterpretError> {
println!("Ensuring databases exist:");
for db in databases {
println!(" - {}: owner={}", db.name, db.owner);
}
Ok(())
}
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let users = vec![
PostgresUser {
name: "admin".to_string(),
password: "password".to_string(),
},
PostgresUser {
name: "user".to_string(),
password: "password".to_string(),
},
];
let databases = vec![
PostgresDatabase {
name: "app_db".to_string(),
owner: "admin".to_string(),
},
PostgresDatabase {
name: "user_db".to_string(),
owner: "user".to_string(),
},
];
let postgres_score = PostgresScore::new(users, databases);
let inventory = Inventory::empty();
let topology = MockTopology;
let maestro = Maestro::new(inventory, topology);
maestro.interpret(Box::new(postgres_score)).await?;
Ok(())
}

View File

@@ -1,14 +0,0 @@
[package]
name = "example-rust"
version = "0.1.0"
edition = "2024"
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
harmony_macros = { path = "../../harmony_macros" }
tokio = { workspace = true }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }

View File

@@ -1,20 +0,0 @@
use harmony::{
inventory::Inventory,
maestro::Maestro,
modules::application::{RustWebappScore, features::ContinuousDelivery},
topology::{K8sAnywhereTopology, Url},
};
#[tokio::main]
async fn main() {
let app = RustWebappScore {
name: "Example Rust Webapp".to_string(),
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
features: vec![Box::new(ContinuousDelivery {})],
};
let topology = K8sAnywhereTopology::from_env();
let mut maestro = Maestro::new(Inventory::autoload(), topology);
maestro.register_all(vec![Box::new(app)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@@ -1,18 +0,0 @@
[package]
name = "example-tenant"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }

View File

@@ -1,41 +0,0 @@
use harmony::{
data::Id,
inventory::Inventory,
maestro::Maestro,
modules::tenant::TenantScore,
topology::{K8sAnywhereTopology, tenant::TenantConfig},
};
#[tokio::main]
async fn main() {
let tenant = TenantScore {
config: TenantConfig {
id: Id::from_str("test-tenant-id"),
name: "testtenant".to_string(),
..Default::default()
},
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
)
.await
.unwrap();
maestro.register_all(vec![Box::new(tenant)]);
harmony_cli::init(maestro, None).await.unwrap();
}
// TODO write tests
// - Create Tenant with default config mostly, make sure namespace is created
// - deploy sample client/server app with nginx unprivileged and a service
// - exec in the client pod and validate the following
// - can reach internet
// - can reach server pod
// - can resolve dns queries to internet
// - can resolve dns queries to services
// - cannot reach services and pods in other namespaces
// - Create Tenant with specific cpu/ram/storage requests / limits and make sure they are enforced by trying to
// deploy a pod with lower requests/limits (accepted) and higher requests/limits (rejected)
// - Create TenantCredentials and make sure they give only access to the correct tenant

View File

@@ -10,9 +10,9 @@ publish = false
harmony = { path = "../../harmony" }
harmony_tui = { path = "../../harmony_tui" }
harmony_types = { path = "../../harmony_types" }
harmony_macros = { path = "../../harmony_macros" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }

View File

@@ -6,8 +6,6 @@ readme.workspace = true
license.workspace = true
[dependencies]
rand = "0.9"
hex = "0.4"
libredfish = "0.1.1"
reqwest = { version = "0.11", features = ["blocking", "json"] }
russh = "0.45.0"
@@ -42,7 +40,6 @@ dockerfile_builder = "0.1.5"
temp-file = "0.1.9"
convert_case.workspace = true
email_address = "0.2.9"
chrono.workspace = true
fqdn = { version = "0.4.6", features = [
"domain-label-cannot-start-or-end-with-hyphen",
"domain-label-length-limited-to-63",
@@ -53,7 +50,3 @@ fqdn = { version = "0.4.6", features = [
] }
temp-dir = "0.1.14"
dyn-clone = "1.0.19"
similar.workspace = true
futures-util = "0.3.31"
tokio-util = "0.7.15"
strum = { version = "0.27.1", features = ["derive"] }

View File

@@ -10,6 +10,4 @@ lazy_static! {
std::env::var("HARMONY_REGISTRY_URL").unwrap_or_else(|_| "hub.nationtech.io".to_string());
pub static ref REGISTRY_PROJECT: String =
std::env::var("HARMONY_REGISTRY_PROJECT").unwrap_or_else(|_| "harmony".to_string());
pub static ref DRY_RUN: bool =
std::env::var("HARMONY_DRY_RUN").map_or(true, |value| value.parse().unwrap_or(true));
}

View File

@@ -1,23 +1,5 @@
use rand::distr::Alphanumeric;
use rand::distr::SampleString;
use std::time::SystemTime;
use std::time::UNIX_EPOCH;
use serde::{Deserialize, Serialize};
/// A unique identifier designed for ease of use.
///
/// You can pass it any String to use and Id, or you can use the default format with `Id::default()`
///
/// The default format looks like this
///
/// `462d4c_g2COgai`
///
/// The first part is the unix timesamp in hexadecimal which makes Id easily sorted by creation time.
/// Second part is a serie of 7 random characters.
///
/// **It is not meant to be very secure or unique**, it is suitable to generate up to 10 000 items per
/// second with a reasonable collision rate of 0,000014 % as calculated by this calculator : https://kevingal.com/apps/collision.html
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Id {
value: String,
@@ -27,10 +9,6 @@ impl Id {
pub fn from_string(value: String) -> Self {
Self { value }
}
pub fn from_str(value: &str) -> Self {
Self::from_string(value.to_string())
}
}
impl std::fmt::Display for Id {
@@ -38,20 +16,3 @@ impl std::fmt::Display for Id {
f.write_str(&self.value)
}
}
impl Default for Id {
fn default() -> Self {
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
let timestamp = since_the_epoch.as_secs();
let hex_timestamp = format!("{:x}", timestamp & 0xffffff);
let random_part: String = Alphanumeric.sample_string(&mut rand::rng(), 7);
let value = format!("{}_{}", hex_timestamp, random_part);
Self { value }
}
}

View File

@@ -2,6 +2,3 @@ mod id;
mod version;
pub use id::*;
pub use version::*;
mod postgres;
pub use postgres::*;

View File

@@ -1,13 +0,0 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PostgresUser {
pub name: String,
pub password: String, // In a real scenario, this should be a secret type
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PostgresDatabase {
pub name: String,
pub owner: String,
}

View File

@@ -21,8 +21,6 @@ pub enum InterpretName {
OPNSense,
K3dInstallation,
TenantInterpret,
Application,
Postgres,
}
impl std::fmt::Display for InterpretName {
@@ -39,8 +37,6 @@ impl std::fmt::Display for InterpretName {
InterpretName::OPNSense => f.write_str("OPNSense"),
InterpretName::K3dInstallation => f.write_str("K3dInstallation"),
InterpretName::TenantInterpret => f.write_str("Tenant"),
InterpretName::Application => f.write_str("Application"),
InterpretName::Postgres => f.write_str("Postgres"),
}
}
}
@@ -128,11 +124,3 @@ impl From<kube::Error> for InterpretError {
}
}
}
impl From<String> for InterpretError {
fn from(value: String) -> Self {
Self {
msg: format!("InterpretError : {value}"),
}
}
}

View File

@@ -34,17 +34,6 @@ pub struct Inventory {
}
impl Inventory {
pub fn empty() -> Self {
Self {
location: Location::new("Empty".to_string(), "location".to_string()),
switch: vec![],
firewall: vec![],
worker_host: vec![],
storage_host: vec![],
control_plane_host: vec![],
}
}
pub fn autoload() -> Self {
Self {
location: Location::test_building(),

View File

@@ -46,7 +46,7 @@ pub struct HAClusterTopology {
#[async_trait]
impl Topology for HAClusterTopology {
fn name(&self) -> &str {
"HAClusterTopology"
todo!()
}
async fn ensure_ready(&self) -> Result<Outcome, InterpretError> {
todo!(

View File

@@ -1,14 +0,0 @@
use async_trait::async_trait;
use crate::{interpret::InterpretError, inventory::Inventory};
#[async_trait]
pub trait Installable<T>: Send + Sync {
async fn configure(&self, inventory: &Inventory, topology: &T) -> Result<(), InterpretError>;
async fn ensure_installed(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<(), InterpretError>;
}

View File

@@ -1,38 +1,18 @@
use derive_new::new;
use futures_util::StreamExt;
use k8s_openapi::{
ClusterResourceScope, NamespaceResourceScope,
api::{apps::v1::Deployment, core::v1::Pod},
};
use kube::runtime::conditions;
use kube::runtime::wait::await_condition;
use k8s_openapi::NamespaceResourceScope;
use kube::{
Client, Config, Error, Resource,
api::{Api, AttachParams, ListParams, Patch, PatchParams, ResourceExt},
Api, Client, Config, Error, Resource,
api::PostParams,
config::{KubeConfigOptions, Kubeconfig},
core::ErrorResponse,
runtime::reflector::Lookup,
};
use log::{debug, error, trace};
use log::error;
use serde::de::DeserializeOwned;
use similar::{DiffableStr, TextDiff};
#[derive(new, Clone)]
#[derive(new)]
pub struct K8sClient {
client: Client,
}
impl std::fmt::Debug for K8sClient {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// This is a poor man's debug implementation for now as kube::Client does not provide much
// useful information
f.write_fmt(format_args!(
"K8sClient {{ kube client using default namespace {} }}",
self.client.default_namespace()
))
}
}
impl K8sClient {
pub async fn try_default() -> Result<Self, Error> {
Ok(Self {
@@ -40,205 +20,54 @@ impl K8sClient {
})
}
pub async fn wait_until_deployment_ready(
pub async fn apply_all<
K: Resource<Scope = NamespaceResourceScope>
+ std::fmt::Debug
+ Sync
+ DeserializeOwned
+ Default
+ serde::Serialize
+ Clone,
>(
&self,
name: String,
namespace: Option<&str>,
timeout: Option<u64>,
) -> Result<(), String> {
let api: Api<Deployment>;
if let Some(ns) = namespace {
api = Api::namespaced(self.client.clone(), ns);
} else {
api = Api::default_namespaced(self.client.clone());
}
let establish = await_condition(api, name.as_str(), conditions::is_deployment_completed());
let t = if let Some(t) = timeout { t } else { 300 };
let res = tokio::time::timeout(std::time::Duration::from_secs(t), establish).await;
if let Ok(r) = res {
return Ok(());
} else {
return Err("timed out while waiting for deployment".to_string());
}
}
/// Will execute a command in the first pod found that matches the label `app.kubernetes.io/name={name}`
pub async fn exec_app(
&self,
name: String,
namespace: Option<&str>,
command: Vec<&str>,
) -> Result<(), String> {
let api: Api<Pod>;
if let Some(ns) = namespace {
api = Api::namespaced(self.client.clone(), ns);
} else {
api = Api::default_namespaced(self.client.clone());
}
let pod_list = api
.list(&ListParams::default().labels(format!("app.kubernetes.io/name={name}").as_str()))
.await
.expect("couldn't get list of pods");
let res = api
.exec(
pod_list
.items
.first()
.expect("couldn't get pod")
.name()
.expect("couldn't get pod name")
.into_owned()
.as_str(),
command,
&AttachParams::default(),
)
.await;
match res {
Err(e) => return Err(e.to_string()),
Ok(mut process) => {
let status = process
.take_status()
.expect("Couldn't get status")
.await
.expect("Couldn't unwrap status");
if let Some(s) = status.status {
debug!("Status: {}", s);
if s == "Success" {
return Ok(());
} else {
return Err(s);
}
} else {
return Err("Couldn't get inner status of pod exec".to_string());
}
}
}
}
/// Apply a resource in namespace
///
/// See `kubectl apply` for more information on the expected behavior of this function
pub async fn apply<K>(&self, resource: &K, namespace: Option<&str>) -> Result<K, Error>
resource: &Vec<K>,
) -> Result<Vec<K>, kube::Error>
where
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
<K as Resource>::Scope: ApplyStrategy<K>,
<K as kube::Resource>::DynamicType: Default,
{
debug!(
"Applying resource {:?} with ns {:?}",
resource.meta().name,
namespace
);
trace!(
"{:#}",
serde_json::to_value(resource).unwrap_or(serde_json::Value::Null)
);
let api: Api<K> =
<<K as Resource>::Scope as ApplyStrategy<K>>::get_api(&self.client, namespace);
// api.create(&PostParams::default(), &resource).await
let patch_params = PatchParams::apply("harmony");
let name = resource
.meta()
.name
.as_ref()
.expect("K8s Resource should have a name");
if *crate::config::DRY_RUN {
match api.get(name).await {
Ok(current) => {
trace!("Received current value {current:#?}");
// The resource exists, so we calculate and display a diff.
println!("\nPerforming dry-run for resource: '{}'", name);
let mut current_yaml = serde_yaml::to_value(&current)
.expect(&format!("Could not serialize current value : {current:#?}"));
if current_yaml.is_mapping() && current_yaml.get("status").is_some() {
let map = current_yaml.as_mapping_mut().unwrap();
let removed = map.remove_entry("status");
trace!("Removed status {:?}", removed);
} else {
trace!(
"Did not find status entry for current object {}/{}",
current.meta().namespace.as_ref().unwrap_or(&"".to_string()),
current.meta().name.as_ref().unwrap_or(&"".to_string())
);
}
let current_yaml = serde_yaml::to_string(&current_yaml)
.unwrap_or_else(|_| "Failed to serialize current resource".to_string());
let new_yaml = serde_yaml::to_string(resource)
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
if current_yaml == new_yaml {
println!("No changes detected.");
// Return the current resource state as there are no changes.
return Ok(current);
}
println!("Changes detected:");
let diff = TextDiff::from_lines(&current_yaml, &new_yaml);
// Iterate over the changes and print them in a git-like diff format.
for change in diff.iter_all_changes() {
let sign = match change.tag() {
similar::ChangeTag::Delete => "-",
similar::ChangeTag::Insert => "+",
similar::ChangeTag::Equal => " ",
};
print!("{}{}", sign, change);
}
// In a dry run, we return the new resource state that would have been applied.
Ok(resource.clone())
}
Err(Error::Api(ErrorResponse { code: 404, .. })) => {
// The resource does not exist, so the "diff" is the entire new resource.
println!("\nPerforming dry-run for new resource: '{}'", name);
println!(
"Resource does not exist. It would be created with the following content:"
);
let new_yaml = serde_yaml::to_string(resource)
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
// Print each line of the new resource with a '+' prefix.
for line in new_yaml.lines() {
println!("+{}", line);
}
// In a dry run, we return the new resource state that would have been created.
Ok(resource.clone())
}
Err(e) => {
// Another API error occurred.
error!("Failed to get resource '{}': {}", name, e);
Err(e)
}
}
} else {
return api
.patch(name, &patch_params, &Patch::Apply(resource))
.await;
}
}
pub async fn apply_many<K>(&self, resource: &Vec<K>, ns: Option<&str>) -> Result<Vec<K>, Error>
where
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
<K as Resource>::Scope: ApplyStrategy<K>,
<K as kube::Resource>::DynamicType: Default,
{
let mut result = Vec::new();
let mut result = vec![];
for r in resource.iter() {
result.push(self.apply(r, ns).await?);
let api: Api<K> = Api::all(self.client.clone());
result.push(api.create(&PostParams::default(), &r).await?);
}
Ok(result)
}
pub async fn apply_namespaced<K>(
&self,
resource: &Vec<K>,
ns: Option<&str>,
) -> Result<Vec<K>, Error>
where
K: Resource<Scope = NamespaceResourceScope>
+ Clone
+ std::fmt::Debug
+ DeserializeOwned
+ serde::Serialize
+ Default,
<K as kube::Resource>::DynamicType: Default,
{
let mut resources = Vec::new();
for r in resource.iter() {
let api: Api<K> = match ns {
Some(ns) => Api::namespaced(self.client.clone(), ns),
None => Api::default_namespaced(self.client.clone()),
};
resources.push(api.create(&PostParams::default(), &r).await?);
}
Ok(resources)
}
pub(crate) async fn from_kubeconfig(path: &str) -> Option<K8sClient> {
let k = match Kubeconfig::read_from(path) {
Ok(k) => k,
@@ -257,35 +86,3 @@ impl K8sClient {
))
}
}
pub trait ApplyStrategy<K: Resource> {
fn get_api(client: &Client, ns: Option<&str>) -> Api<K>;
}
/// Implementation for all resources that are cluster-scoped.
/// It will always use `Api::all` and ignore the namespace parameter.
impl<K> ApplyStrategy<K> for ClusterResourceScope
where
K: Resource<Scope = ClusterResourceScope>,
<K as kube::Resource>::DynamicType: Default,
{
fn get_api(client: &Client, _ns: Option<&str>) -> Api<K> {
Api::all(client.clone())
}
}
/// Implementation for all resources that are namespace-scoped.
/// It will use `Api::namespaced` if a namespace is provided, otherwise
/// it falls back to the default namespace configured in your kubeconfig.
impl<K> ApplyStrategy<K> for NamespaceResourceScope
where
K: Resource<Scope = NamespaceResourceScope>,
<K as kube::Resource>::DynamicType: Default,
{
fn get_api(client: &Client, ns: Option<&str>) -> Api<K> {
match ns {
Some(ns) => Api::namespaced(client.clone(), ns),
None => Api::default_namespaced(client.clone()),
}
}
}

View File

@@ -2,8 +2,7 @@ use std::{process::Command, sync::Arc};
use async_trait::async_trait;
use inquire::Confirm;
use log::{debug, info, warn};
use serde::Serialize;
use log::{info, warn};
use tokio::sync::OnceCell;
use crate::{
@@ -11,34 +10,42 @@ use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
maestro::Maestro,
modules::k3d::K3DInstallationScore,
modules::{
k3d::K3DInstallationScore,
monitoring::kube_prometheus::kube_prometheus_helm_chart_score::kube_prometheus_helm_chart_score,
},
topology::LocalhostTopology,
};
use super::{
HelmCommand, K8sclient, Topology,
k8s::K8sClient,
tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager},
oberservability::{
K8sMonitorConfig,
k8s::K8sMonitor,
monitoring::{AlertChannel, AlertChannelConfig, Monitor},
},
tenant::{
ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy, k8s::K8sTenantManager,
},
};
#[derive(Clone, Debug)]
struct K8sState {
client: Arc<K8sClient>,
_source: K8sSource,
source: K8sSource,
message: String,
}
#[derive(Debug, Clone)]
#[derive(Debug)]
enum K8sSource {
LocalK3d,
Kubeconfig,
}
#[derive(Clone, Debug)]
pub struct K8sAnywhereTopology {
k8s_state: Arc<OnceCell<Option<K8sState>>>,
tenant_manager: Arc<OnceCell<K8sTenantManager>>,
config: Arc<K8sAnywhereConfig>,
k8s_state: OnceCell<Option<K8sState>>,
tenant_manager: OnceCell<K8sTenantManager>,
k8s_monitor: OnceCell<K8sMonitor>,
}
#[async_trait]
@@ -58,29 +65,12 @@ impl K8sclient for K8sAnywhereTopology {
}
}
impl Serialize for K8sAnywhereTopology {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
todo!()
}
}
impl K8sAnywhereTopology {
pub fn from_env() -> Self {
pub fn new() -> Self {
Self {
k8s_state: Arc::new(OnceCell::new()),
tenant_manager: Arc::new(OnceCell::new()),
config: Arc::new(K8sAnywhereConfig::from_env()),
}
}
pub fn with_config(config: K8sAnywhereConfig) -> Self {
Self {
k8s_state: Arc::new(OnceCell::new()),
tenant_manager: Arc::new(OnceCell::new()),
config: Arc::new(config),
k8s_state: OnceCell::new(),
tenant_manager: OnceCell::new(),
k8s_monitor: OnceCell::new(),
}
}
@@ -121,15 +111,27 @@ impl K8sAnywhereTopology {
}
async fn try_get_or_install_k8s_client(&self) -> Result<Option<K8sState>, InterpretError> {
let k8s_anywhere_config = &self.config;
let k8s_anywhere_config = K8sAnywhereConfig {
kubeconfig: std::env::var("KUBECONFIG").ok().map(|v| v.to_string()),
use_system_kubeconfig: std::env::var("HARMONY_USE_SYSTEM_KUBECONFIG")
.map_or_else(|_| false, |v| v.parse().ok().unwrap_or(false)),
autoinstall: std::env::var("HARMONY_AUTOINSTALL")
.map_or_else(|_| false, |v| v.parse().ok().unwrap_or(false)),
};
if let Some(kubeconfig) = &k8s_anywhere_config.kubeconfig {
debug!("Loading kubeconfig {kubeconfig}");
if k8s_anywhere_config.use_system_kubeconfig {
match self.try_load_system_kubeconfig().await {
Some(_client) => todo!(),
None => todo!(),
}
}
if let Some(kubeconfig) = k8s_anywhere_config.kubeconfig {
match self.try_load_kubeconfig(&kubeconfig).await {
Some(client) => {
return Ok(Some(K8sState {
client: Arc::new(client),
_source: K8sSource::Kubeconfig,
source: K8sSource::Kubeconfig,
message: format!("Loaded k8s client from kubeconfig {kubeconfig}"),
}));
}
@@ -141,14 +143,6 @@ impl K8sAnywhereTopology {
}
}
if k8s_anywhere_config.use_system_kubeconfig {
debug!("Loading system kubeconfig");
match self.try_load_system_kubeconfig().await {
Some(_client) => todo!(),
None => todo!(),
}
}
info!("No kubernetes configuration found");
if !k8s_anywhere_config.autoinstall {
@@ -177,7 +171,7 @@ impl K8sAnywhereTopology {
let state = match k3d.get_client().await {
Ok(client) => K8sState {
client: Arc::new(K8sClient::new(client)),
_source: K8sSource::LocalK3d,
source: K8sSource::LocalK3d,
message: "Successfully installed K3D cluster and acquired client".to_string(),
},
Err(_) => todo!(),
@@ -186,21 +180,6 @@ impl K8sAnywhereTopology {
Ok(Some(state))
}
async fn ensure_k8s_tenant_manager(&self) -> Result<(), String> {
if let Some(_) = self.tenant_manager.get() {
return Ok(());
}
self.tenant_manager
.get_or_try_init(async || -> Result<K8sTenantManager, String> {
let k8s_client = self.k8s_client().await?;
Ok(K8sTenantManager::new(k8s_client))
})
.await?;
Ok(())
}
fn get_k8s_tenant_manager(&self) -> Result<&K8sTenantManager, ExecutorError> {
match self.tenant_manager.get() {
Some(t) => Ok(t),
@@ -209,40 +188,51 @@ impl K8sAnywhereTopology {
)),
}
}
async fn ensure_k8s_monitor(&self) -> Result<(), String> {
if let Some(_) = self.k8s_monitor.get() {
return Ok(());
}
self.k8s_monitor
.get_or_try_init(async || -> Result<K8sMonitor, String> {
let config = K8sMonitorConfig::cluster_monitor();
Ok(K8sMonitor { config })
})
.await
.unwrap();
Ok(())
}
fn get_k8s_monitor(&self) -> Result<&K8sMonitor, ExecutorError> {
match self.k8s_monitor.get() {
Some(k) => Ok(k),
None => Err(ExecutorError::UnexpectedError(
"K8sMonitor not available".to_string(),
)),
}
}
}
#[derive(Clone, Debug)]
pub struct K8sAnywhereConfig {
struct K8sAnywhereConfig {
/// The path of the KUBECONFIG file that Harmony should use to interact with the Kubernetes
/// cluster
///
/// Default : None
pub kubeconfig: Option<String>,
kubeconfig: Option<String>,
/// Whether to use the system KUBECONFIG, either the environment variable or the file in the
/// default or configured location
///
/// Default : false
pub use_system_kubeconfig: bool,
use_system_kubeconfig: bool,
/// Whether to install automatically a kubernetes cluster
///
/// When enabled, autoinstall will setup a K3D cluster on the localhost. https://k3d.io/stable/
///
/// Default: true
pub autoinstall: bool,
}
impl K8sAnywhereConfig {
fn from_env() -> Self {
Self {
kubeconfig: std::env::var("KUBECONFIG").ok().map(|v| v.to_string()),
use_system_kubeconfig: std::env::var("HARMONY_USE_SYSTEM_KUBECONFIG")
.map_or_else(|_| false, |v| v.parse().ok().unwrap_or(false)),
autoinstall: std::env::var("HARMONY_AUTOINSTALL")
.map_or_else(|_| false, |v| v.parse().ok().unwrap_or(false)),
}
}
autoinstall: bool,
}
#[async_trait]
@@ -261,7 +251,7 @@ impl Topology for K8sAnywhereTopology {
"No K8s client could be found or installed".to_string(),
))?;
self.ensure_k8s_tenant_manager()
self.ensure_k8s_monitor()
.await
.map_err(|e| InterpretError::new(e))?;
@@ -285,10 +275,46 @@ impl TenantManager for K8sAnywhereTopology {
.await
}
async fn get_tenant_config(&self) -> Option<TenantConfig> {
self.get_k8s_tenant_manager()
.ok()?
.get_tenant_config()
async fn update_tenant_resource_limits(
&self,
tenant_name: &str,
new_limits: &ResourceLimits,
) -> Result<(), ExecutorError> {
self.get_k8s_tenant_manager()?
.update_tenant_resource_limits(tenant_name, new_limits)
.await
}
async fn update_tenant_network_policy(
&self,
tenant_name: &str,
new_policy: &TenantNetworkPolicy,
) -> Result<(), ExecutorError> {
self.get_k8s_tenant_manager()?
.update_tenant_network_policy(tenant_name, new_policy)
.await
}
async fn deprovision_tenant(&self, tenant_name: &str) -> Result<(), ExecutorError> {
self.get_k8s_tenant_manager()?
.deprovision_tenant(tenant_name)
.await
}
}
#[async_trait]
impl Monitor for K8sAnywhereTopology {
async fn provision_monitor<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
alert_receivers: Option<Vec<Box<dyn AlertChannelConfig>>>,
) -> Result<Outcome, InterpretError> {
self.get_k8s_monitor()?
.provision_monitor(inventory, topology, alert_receivers)
.await
}
fn delete_monitor(&self) -> Result<Outcome, InterpretError> {
todo!()
}
}

View File

@@ -1,7 +1,6 @@
mod ha_cluster;
mod host_binding;
mod http;
pub mod installable;
mod k8s_anywhere;
mod localhost;
pub mod oberservability;
@@ -23,9 +22,6 @@ pub use network::*;
use serde::Serialize;
pub use tftp::*;
mod postgres;
pub use postgres::*;
mod helm_command;
pub use helm_command::*;

View File

@@ -0,0 +1,71 @@
use std::sync::Arc;
use async_trait::async_trait;
use serde::Serialize;
use crate::score::Score;
use crate::topology::HelmCommand;
use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
topology::Topology,
};
use super::{
K8sMonitorConfig,
monitoring::{AlertChannel, AlertChannelConfig, Monitor},
};
#[derive(Debug, Clone, Serialize)]
pub struct K8sMonitor {
pub config: K8sMonitorConfig,
}
#[async_trait]
impl Monitor for K8sMonitor {
async fn provision_monitor<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
alert_channels: Option<Vec<Box<dyn AlertChannelConfig>>>,
) -> Result<Outcome, InterpretError> {
if let Some(channels) = alert_channels {
let alert_channels = self.build_alert_channels(channels).await?;
for channel in alert_channels {
channel.register_alert_channel().await?;
}
}
let chart = self.config.chart.clone();
chart
.create_interpret()
.execute(inventory, topology)
.await?;
Ok(Outcome::success("installed monitor".to_string()))
}
fn delete_monitor(&self) -> Result<Outcome, InterpretError> {
todo!()
}
}
#[async_trait]
impl AlertChannelConfig for K8sMonitor {
async fn build_alert_channel(&self) -> Result<Box<dyn AlertChannel>, InterpretError> {
todo!()
}
}
impl K8sMonitor {
pub async fn build_alert_channels(
&self,
alert_channel_configs: Vec<Box<dyn AlertChannelConfig>>,
) -> Result<Vec<Box<dyn AlertChannel>>, InterpretError> {
let mut alert_channels = Vec::new();
for config in alert_channel_configs {
let channel = config.build_alert_channel().await?;
alert_channels.push(channel)
}
Ok(alert_channels)
}
}

View File

@@ -1 +1,23 @@
use serde::Serialize;
use crate::modules::{
helm::chart::HelmChartScore,
monitoring::kube_prometheus::kube_prometheus_helm_chart_score::kube_prometheus_helm_chart_score,
};
pub mod k8s;
pub mod monitoring;
#[derive(Debug, Clone, Serialize)]
pub struct K8sMonitorConfig {
//probably need to do something better here
pub chart: HelmChartScore,
}
impl K8sMonitorConfig {
pub fn cluster_monitor() -> Self {
Self {
chart: kube_prometheus_helm_chart_score(),
}
}
}

View File

@@ -1,77 +1,39 @@
use async_trait::async_trait;
use log::debug;
use dyn_clone::DynClone;
use std::fmt::Debug;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
topology::{Topology, installable::Installable},
};
use crate::executors::ExecutorError;
use crate::interpret::InterpretError;
use crate::inventory::Inventory;
use crate::topology::HelmCommand;
use crate::{interpret::Outcome, topology::Topology};
/// Represents an entity responsible for collecting and organizing observability data
/// from various telemetry sources such as Prometheus or Datadog
/// A `Monitor` abstracts the logic required to scrape, aggregate, and structure
/// monitoring data, enabling consistent processing regardless of the underlying data source.
#[async_trait]
pub trait AlertSender: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
}
#[derive(Debug)]
pub struct AlertingInterpret<S: AlertSender> {
pub sender: S,
pub receivers: Vec<Box<dyn AlertReceiver<S>>>,
pub rules: Vec<Box<dyn AlertRule<S>>>,
}
#[async_trait]
impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInterpret<S> {
async fn execute(
pub trait Monitor {
async fn provision_monitor<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
self.sender.configure(inventory, topology).await?;
for receiver in self.receivers.iter() {
receiver.install(&self.sender).await?;
}
for rule in self.rules.iter() {
debug!("installing rule: {:#?}", rule);
rule.install(&self.sender).await?;
}
self.sender.ensure_installed(inventory, topology).await?;
Ok(Outcome::success(format!(
"successfully installed alert sender {}",
self.sender.name()
)))
}
alert_receivers: Option<Vec<Box<dyn AlertChannelConfig>>>,
) -> Result<Outcome, InterpretError>;
fn get_name(&self) -> InterpretName {
todo!()
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
fn delete_monitor(&self) -> Result<Outcome, InterpretError>;
}
#[async_trait]
pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync {
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
fn clone_box(&self) -> Box<dyn AlertReceiver<S>>;
pub trait AlertChannel: Debug + Send + Sync {
async fn register_alert_channel(&self) -> Result<Outcome, ExecutorError>;
//async fn get_channel_id(&self) -> String;
}
#[async_trait]
pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
fn clone_box(&self) -> Box<dyn AlertRule<S>>;
pub trait AlertChannelConfig: Debug + Send + Sync + DynClone {
async fn build_alert_channel(&self) -> Result<Box<dyn AlertChannel>, InterpretError>;
}
#[async_trait]
pub trait ScrapeTarger<S: AlertSender> {
async fn install(&self, sender: &S) -> Result<(), InterpretError>;
}
dyn_clone::clone_trait_object!(AlertChannelConfig);

View File

@@ -1,14 +0,0 @@
use crate::{
data::{PostgresDatabase, PostgresUser},
interpret::InterpretError,
};
use async_trait::async_trait;
#[async_trait]
pub trait PostgresServer {
async fn ensure_users_exist(&self, users: Vec<PostgresUser>) -> Result<(), InterpretError>;
async fn ensure_databases_exist(
&self,
databases: Vec<PostgresDatabase>,
) -> Result<(), InterpretError>;
}

View File

@@ -1,441 +1,95 @@
use std::sync::Arc;
use crate::{
executors::ExecutorError,
topology::k8s::{ApplyStrategy, K8sClient},
};
use crate::{executors::ExecutorError, topology::k8s::K8sClient};
use async_trait::async_trait;
use k8s_openapi::{
api::{
core::v1::{LimitRange, Namespace, ResourceQuota},
networking::v1::{
NetworkPolicy, NetworkPolicyEgressRule, NetworkPolicyIngressRule, NetworkPolicyPort,
},
},
apimachinery::pkg::util::intstr::IntOrString,
};
use kube::Resource;
use log::{debug, info, warn};
use serde::de::DeserializeOwned;
use derive_new::new;
use k8s_openapi::api::core::v1::Namespace;
use serde_json::json;
use tokio::sync::OnceCell;
use super::{TenantConfig, TenantManager};
use super::{ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy};
#[derive(Clone, Debug)]
#[derive(new)]
pub struct K8sTenantManager {
k8s_client: Arc<K8sClient>,
k8s_tenant_config: Arc<OnceCell<TenantConfig>>,
}
impl K8sTenantManager {
pub fn new(client: Arc<K8sClient>) -> Self {
Self {
k8s_client: client,
k8s_tenant_config: Arc::new(OnceCell::new()),
}
}
}
impl K8sTenantManager {
fn get_namespace_name(&self, config: &TenantConfig) -> String {
config.name.clone()
}
fn ensure_constraints(&self, _namespace: &Namespace) -> Result<(), ExecutorError> {
warn!("Validate that when tenant already exists (by id) that name has not changed");
warn!("Make sure other Tenant constraints are respected by this k8s implementation");
Ok(())
}
async fn apply_resource<
K: Resource + std::fmt::Debug + Sync + DeserializeOwned + Default + serde::Serialize + Clone,
>(
&self,
mut resource: K,
config: &TenantConfig,
) -> Result<K, ExecutorError>
where
<K as kube::Resource>::DynamicType: Default,
<K as kube::Resource>::Scope: ApplyStrategy<K>,
{
self.apply_labels(&mut resource, config);
self.k8s_client
.apply(&resource, Some(&self.get_namespace_name(config)))
.await
.map_err(|e| {
ExecutorError::UnexpectedError(format!("Could not create Tenant resource : {e}"))
})
}
fn apply_labels<K: Resource>(&self, resource: &mut K, config: &TenantConfig) {
let labels = resource.meta_mut().labels.get_or_insert_default();
labels.insert(
"app.kubernetes.io/managed-by".to_string(),
"harmony".to_string(),
);
labels.insert("harmony/tenant-id".to_string(), config.id.to_string());
labels.insert("harmony/tenant-name".to_string(), config.name.clone());
}
fn build_namespace(&self, config: &TenantConfig) -> Result<Namespace, ExecutorError> {
#[async_trait]
impl TenantManager for K8sTenantManager {
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> {
let namespace = json!(
{
"apiVersion": "v1",
"kind": "Namespace",
"metadata": {
"labels": {
"harmony.nationtech.io/tenant.id": config.id.to_string(),
"harmony.nationtech.io/tenant.id": config.id,
"harmony.nationtech.io/tenant.name": config.name,
},
"name": self.get_namespace_name(config),
"name": config.name,
},
}
);
serde_json::from_value(namespace).map_err(|e| {
ExecutorError::ConfigurationError(format!(
"Could not build TenantManager Namespace. {}",
e
))
})
}
todo!("Validate that when tenant already exists (by id) that name has not changed");
let namespace: Namespace = serde_json::from_value(namespace).unwrap();
fn build_resource_quota(&self, config: &TenantConfig) -> Result<ResourceQuota, ExecutorError> {
let resource_quota = json!(
{
"apiVersion": "v1",
"kind": "ResourceQuota",
"metadata": {
"name": format!("{}-quota", config.name),
"labels": {
"harmony.nationtech.io/tenant.id": config.id.to_string(),
"harmony.nationtech.io/tenant.name": config.name,
},
"namespace": self.get_namespace_name(config),
},
"spec": {
"hard": {
"limits.cpu": format!("{:.0}",config.resource_limits.cpu_limit_cores),
"limits.memory": format!("{:.3}Gi", config.resource_limits.memory_limit_gb),
"requests.cpu": format!("{:.0}",config.resource_limits.cpu_request_cores),
"requests.memory": format!("{:.3}Gi", config.resource_limits.memory_request_gb),
"requests.storage": format!("{:.3}Gi", config.resource_limits.storage_total_gb),
"pods": "20",
"services": "10",
"configmaps": "60",
"secrets": "60",
"persistentvolumeclaims": "15",
"services.loadbalancers": "2",
"services.nodeports": "5",
"limits.ephemeral-storage": "10Gi",
"apiVersion": "v1",
"kind": "List",
"items": [
{
"apiVersion": "v1",
"kind": "ResourceQuota",
"metadata": {
"name": config.name,
"labels": {
"harmony.nationtech.io/tenant.id": config.id,
"harmony.nationtech.io/tenant.name": config.name,
},
"namespace": config.name,
},
"spec": {
"hard": {
"limits.cpu": format!("{:.0}",config.resource_limits.cpu_limit_cores),
"limits.memory": format!("{:.3}Gi", config.resource_limits.memory_limit_gb),
"requests.cpu": format!("{:.0}",config.resource_limits.cpu_request_cores),
"requests.memory": format!("{:.3}Gi", config.resource_limits.memory_request_gb),
"requests.storage": format!("{:.3}", config.resource_limits.storage_total_gb),
"pods": "20",
"services": "10",
"configmaps": "30",
"secrets": "30",
"persistentvolumeclaims": "15",
"services.loadbalancers": "2",
"services.nodeports": "5",
}
}
}
}
}
}
]
}
);
serde_json::from_value(resource_quota).map_err(|e| {
ExecutorError::ConfigurationError(format!(
"Could not build TenantManager ResourceQuota. {}",
e
))
})
}
fn build_limit_range(&self, config: &TenantConfig) -> Result<LimitRange, ExecutorError> {
let limit_range = json!({
"apiVersion": "v1",
"kind": "LimitRange",
"metadata": {
"name": format!("{}-defaults", config.name),
"labels": {
"harmony.nationtech.io/tenant.id": config.id.to_string(),
"harmony.nationtech.io/tenant.name": config.name,
},
"namespace": self.get_namespace_name(config),
},
"spec": {
"limits": [
{
"type": "Container",
"default": {
"cpu": "500m",
"memory": "500Mi"
},
"defaultRequest": {
"cpu": "100m",
"memory": "100Mi"
},
}
]
}
});
serde_json::from_value(limit_range).map_err(|e| {
ExecutorError::ConfigurationError(format!(
"Could not build TenantManager LimitRange. {}",
e
))
})
async fn update_tenant_resource_limits(
&self,
tenant_name: &str,
new_limits: &ResourceLimits,
) -> Result<(), ExecutorError> {
todo!()
}
fn build_network_policy(&self, config: &TenantConfig) -> Result<NetworkPolicy, ExecutorError> {
let network_policy = json!({
"apiVersion": "networking.k8s.io/v1",
"kind": "NetworkPolicy",
"metadata": {
"name": format!("{}-network-policy", config.name),
"namespace": self.get_namespace_name(config),
},
"spec": {
"podSelector": {},
"egress": [
{
"to": [
{ "podSelector": {} }
]
},
{
"to": [
{
"podSelector": {},
"namespaceSelector": {
"matchLabels": {
"kubernetes.io/metadata.name": "kube-system"
}
}
}
]
},
{
"to": [
{
"podSelector": {},
"namespaceSelector": {
"matchLabels": {
"kubernetes.io/metadata.name": "openshift-dns"
}
}
}
]
},
{
"to": [
{
"ipBlock": {
"cidr": "10.43.0.1/32",
}
}
]
},
{
"to": [
{
"ipBlock": {
"cidr": "172.23.0.0/16",
}
}
]
},
{
"to": [
{
"ipBlock": {
"cidr": "0.0.0.0/0",
"except": [
"10.0.0.0/8",
"172.16.0.0/12",
"192.168.0.0/16",
"192.0.0.0/24",
"192.0.2.0/24",
"192.88.99.0/24",
"192.18.0.0/15",
"198.51.100.0/24",
"169.254.0.0/16",
"203.0.113.0/24",
"127.0.0.0/8",
"224.0.0.0/4",
"240.0.0.0/4",
"100.64.0.0/10",
"233.252.0.0/24",
"0.0.0.0/8"
]
}
}
]
}
],
"ingress": [
{
"from": [
{ "podSelector": {} }
]
}
],
"policyTypes": [
"Ingress",
"Egress"
]
}
});
let mut network_policy: NetworkPolicy =
serde_json::from_value(network_policy).map_err(|e| {
ExecutorError::ConfigurationError(format!(
"Could not build TenantManager NetworkPolicy. {}",
e
))
})?;
config
.network_policy
.additional_allowed_cidr_ingress
.iter()
.try_for_each(|c| -> Result<(), ExecutorError> {
let cidr_list: Vec<serde_json::Value> =
c.0.iter()
.map(|ci| {
json!({
"ipBlock": {
"cidr": ci.to_string(),
}
})
})
.collect();
let ports: Option<Vec<NetworkPolicyPort>> =
c.1.as_ref().map(|spec| match &spec.data {
super::PortSpecData::SinglePort(port) => vec![NetworkPolicyPort {
port: Some(IntOrString::Int(port.clone().into())),
..Default::default()
}],
super::PortSpecData::PortRange(start, end) => vec![NetworkPolicyPort {
port: Some(IntOrString::Int(start.clone().into())),
end_port: Some(end.clone().into()),
protocol: None, // Not currently supported by Harmony
}],
super::PortSpecData::ListOfPorts(items) => items
.iter()
.map(|i| NetworkPolicyPort {
port: Some(IntOrString::Int(i.clone().into())),
..Default::default()
})
.collect(),
});
let rule = serde_json::from_value::<NetworkPolicyIngressRule>(json!({
"from": cidr_list,
"ports": ports,
}))
.map_err(|e| {
ExecutorError::ConfigurationError(format!(
"Could not build TenantManager NetworkPolicyIngressRule. {}",
e
))
})?;
network_policy
.spec
.as_mut()
.unwrap()
.ingress
.as_mut()
.unwrap()
.push(rule);
Ok(())
})?;
config
.network_policy
.additional_allowed_cidr_egress
.iter()
.try_for_each(|c| -> Result<(), ExecutorError> {
let cidr_list: Vec<serde_json::Value> =
c.0.iter()
.map(|ci| {
json!({
"ipBlock": {
"cidr": ci.to_string(),
}
})
})
.collect();
let ports: Option<Vec<NetworkPolicyPort>> =
c.1.as_ref().map(|spec| match &spec.data {
super::PortSpecData::SinglePort(port) => vec![NetworkPolicyPort {
port: Some(IntOrString::Int(port.clone().into())),
..Default::default()
}],
super::PortSpecData::PortRange(start, end) => vec![NetworkPolicyPort {
port: Some(IntOrString::Int(start.clone().into())),
end_port: Some(end.clone().into()),
protocol: None, // Not currently supported by Harmony
}],
super::PortSpecData::ListOfPorts(items) => items
.iter()
.map(|i| NetworkPolicyPort {
port: Some(IntOrString::Int(i.clone().into())),
..Default::default()
})
.collect(),
});
let rule = serde_json::from_value::<NetworkPolicyEgressRule>(json!({
"to": cidr_list,
"ports": ports,
}))
.map_err(|e| {
ExecutorError::ConfigurationError(format!(
"Could not build TenantManager NetworkPolicyEgressRule. {}",
e
))
})?;
network_policy
.spec
.as_mut()
.unwrap()
.egress
.as_mut()
.unwrap()
.push(rule);
Ok(())
})?;
Ok(network_policy)
async fn update_tenant_network_policy(
&self,
tenant_name: &str,
new_policy: &TenantNetworkPolicy,
) -> Result<(), ExecutorError> {
todo!()
}
fn store_config(&self, config: &TenantConfig) {
let _ = self.k8s_tenant_config.set(config.clone());
}
}
#[async_trait]
impl TenantManager for K8sTenantManager {
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> {
let namespace = self.build_namespace(config)?;
let resource_quota = self.build_resource_quota(config)?;
let network_policy = self.build_network_policy(config)?;
let resource_limit_range = self.build_limit_range(config)?;
self.ensure_constraints(&namespace)?;
debug!("Creating namespace for tenant {}", config.name);
self.apply_resource(namespace, config).await?;
debug!("Creating resource_quota for tenant {}", config.name);
self.apply_resource(resource_quota, config).await?;
debug!("Creating limit_range for tenant {}", config.name);
self.apply_resource(resource_limit_range, config).await?;
debug!("Creating network_policy for tenant {}", config.name);
self.apply_resource(network_policy, config).await?;
info!(
"Success provisionning K8s tenant id {} name {}",
config.id, config.name
);
self.store_config(config);
Ok(())
}
async fn get_tenant_config(&self) -> Option<TenantConfig> {
self.k8s_tenant_config.get().cloned()
async fn deprovision_tenant(&self, tenant_name: &str) -> Result<(), ExecutorError> {
todo!()
}
}

View File

@@ -5,10 +5,9 @@ use crate::executors::ExecutorError;
#[async_trait]
pub trait TenantManager {
/// Creates or update tenant based on the provided configuration.
/// This operation should be idempotent; if a tenant with the same `config.id`
/// Provisions a new tenant based on the provided configuration.
/// This operation should be idempotent; if a tenant with the same `config.name`
/// already exists and matches the config, it will succeed without changes.
///
/// If it exists but differs, it will be updated, or return an error if the update
/// action is not supported
///
@@ -16,5 +15,32 @@ pub trait TenantManager {
/// * `config`: The desired configuration for the new tenant.
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError>;
async fn get_tenant_config(&self) -> Option<TenantConfig>;
/// Updates the resource limits for an existing tenant.
///
/// # Arguments
/// * `tenant_name`: The logical name of the tenant to update.
/// * `new_limits`: The new set of resource limits to apply.
async fn update_tenant_resource_limits(
&self,
tenant_name: &str,
new_limits: &ResourceLimits,
) -> Result<(), ExecutorError>;
/// Updates the high-level network isolation policy for an existing tenant.
///
/// # Arguments
/// * `tenant_name`: The logical name of the tenant to update.
/// * `new_policy`: The new network policy to apply.
async fn update_tenant_network_policy(
&self,
tenant_name: &str,
new_policy: &TenantNetworkPolicy,
) -> Result<(), ExecutorError>;
/// Decommissions an existing tenant, removing its isolated context and associated resources.
/// This operation should be idempotent.
///
/// # Arguments
/// * `tenant_name`: The logical name of the tenant to deprovision.
async fn deprovision_tenant(&self, tenant_name: &str) -> Result<(), ExecutorError>;
}

View File

@@ -1,10 +1,10 @@
pub mod k8s;
mod manager;
use std::str::FromStr;
pub use manager::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::data::Id;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] // Assuming serde for Scores
@@ -21,26 +21,13 @@ pub struct TenantConfig {
/// High-level network isolation policies for the tenant.
pub network_policy: TenantNetworkPolicy,
/// Key-value pairs for provider-specific tagging, labeling, or metadata.
/// Useful for billing, organization, or filtering within the provider's console.
pub labels_or_tags: HashMap<String, String>,
}
impl Default for TenantConfig {
fn default() -> Self {
let id = Id::default();
Self {
name: format!("tenant_{id}"),
id,
resource_limits: ResourceLimits::default(),
network_policy: TenantNetworkPolicy {
default_inter_tenant_ingress: InterTenantIngressPolicy::DenyAll,
default_internet_egress: InternetEgressPolicy::AllowAll,
additional_allowed_cidr_ingress: vec![],
additional_allowed_cidr_egress: vec![],
},
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub struct ResourceLimits {
/// Requested/guaranteed CPU cores (e.g., 2.0).
pub cpu_request_cores: f32,
@@ -56,18 +43,6 @@ pub struct ResourceLimits {
pub storage_total_gb: f32,
}
impl Default for ResourceLimits {
fn default() -> Self {
Self {
cpu_request_cores: 4.0,
cpu_limit_cores: 4.0,
memory_request_gb: 4.0,
memory_limit_gb: 4.0,
storage_total_gb: 20.0,
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TenantNetworkPolicy {
/// Policy for ingress traffic originating from other tenants within the same Harmony-managed environment.
@@ -75,20 +50,6 @@ pub struct TenantNetworkPolicy {
/// Policy for egress traffic destined for the public internet.
pub default_internet_egress: InternetEgressPolicy,
pub additional_allowed_cidr_ingress: Vec<(Vec<cidr::Ipv4Cidr>, Option<PortSpec>)>,
pub additional_allowed_cidr_egress: Vec<(Vec<cidr::Ipv4Cidr>, Option<PortSpec>)>,
}
impl Default for TenantNetworkPolicy {
fn default() -> Self {
TenantNetworkPolicy {
default_inter_tenant_ingress: InterTenantIngressPolicy::DenyAll,
default_internet_egress: InternetEgressPolicy::DenyAll,
additional_allowed_cidr_ingress: vec![],
additional_allowed_cidr_egress: vec![],
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
@@ -104,121 +65,3 @@ pub enum InternetEgressPolicy {
/// Deny all outbound traffic to the internet by default.
DenyAll,
}
/// Represents a port specification that can be either a single port, a comma-separated list of ports,
/// or a range separated by a dash.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PortSpec {
/// The actual representation of the ports as strings for serialization/deserialization purposes.
pub data: PortSpecData,
}
impl PortSpec {
/// TODO write short rust doc that shows what types of input are supported
fn parse_from_str(spec: &str) -> Result<PortSpec, String> {
// Check for single port
if let Ok(port) = spec.parse::<u16>() {
let spec = PortSpecData::SinglePort(port);
return Ok(Self { data: spec });
}
if let Some(range) = spec.find('-') {
let start_str = &spec[..range];
let end_str = &spec[(range + 1)..];
if let (Ok(start), Ok(end)) = (start_str.parse::<u16>(), end_str.parse::<u16>()) {
let spec = PortSpecData::PortRange(start, end);
return Ok(Self { data: spec });
}
}
let ports: Vec<&str> = spec.split(',').collect();
if !ports.is_empty() && ports.iter().all(|p| p.parse::<u16>().is_ok()) {
let maybe_ports = ports.iter().try_fold(vec![], |mut list, &p| {
if let Ok(p) = p.parse::<u16>() {
list.push(p);
return Ok(list);
}
Err(())
});
if let Ok(ports) = maybe_ports {
let spec = PortSpecData::ListOfPorts(ports);
return Ok(Self { data: spec });
}
}
Err(format!("Invalid port spec format {spec}"))
}
}
impl FromStr for PortSpec {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse_from_str(s)
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum PortSpecData {
SinglePort(u16),
PortRange(u16, u16),
ListOfPorts(Vec<u16>),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_single_port() {
let port_spec = "2144".parse::<PortSpec>().unwrap();
match port_spec.data {
PortSpecData::SinglePort(port) => assert_eq!(port, 2144),
_ => panic!("Expected SinglePort"),
}
}
#[test]
fn test_port_range() {
let port_spec = "80-90".parse::<PortSpec>().unwrap();
match port_spec.data {
PortSpecData::PortRange(start, end) => {
assert_eq!(start, 80);
assert_eq!(end, 90);
}
_ => panic!("Expected PortRange"),
}
}
#[test]
fn test_list_of_ports() {
let port_spec = "2144,3424".parse::<PortSpec>().unwrap();
match port_spec.data {
PortSpecData::ListOfPorts(ports) => {
assert_eq!(ports[0], 2144);
assert_eq!(ports[1], 3424);
}
_ => panic!("Expected ListOfPorts"),
}
}
#[test]
fn test_invalid_port_spec() {
let result = "invalid".parse::<PortSpec>();
assert!(result.is_err());
}
#[test]
fn test_empty_input() {
let result = "".parse::<PortSpec>();
assert!(result.is_err());
}
#[test]
fn test_only_coma() {
let result = ",".parse::<PortSpec>();
assert!(result.is_err());
}
}

View File

@@ -1,41 +0,0 @@
use async_trait::async_trait;
use serde::Serialize;
use crate::topology::Topology;
/// An ApplicationFeature provided by harmony, such as Backups, Monitoring, MultisiteAvailability,
/// ContinuousIntegration, ContinuousDelivery
#[async_trait]
pub trait ApplicationFeature<T: Topology>:
std::fmt::Debug + Send + Sync + ApplicationFeatureClone<T>
{
async fn ensure_installed(&self, topology: &T) -> Result<(), String>;
fn name(&self) -> String;
}
trait ApplicationFeatureClone<T: Topology> {
fn clone_box(&self) -> Box<dyn ApplicationFeature<T>>;
}
impl<A, T: Topology> ApplicationFeatureClone<T> for A
where
A: ApplicationFeature<T> + Clone + 'static,
{
fn clone_box(&self) -> Box<dyn ApplicationFeature<T>> {
Box::new(self.clone())
}
}
impl<T: Topology> Serialize for Box<dyn ApplicationFeature<T>> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
todo!()
}
}
impl<T: Topology> Clone for Box<dyn ApplicationFeature<T>> {
fn clone(&self) -> Self {
self.clone_box()
}
}

View File

@@ -1,84 +0,0 @@
use async_trait::async_trait;
use log::info;
use serde_json::Value;
use crate::{
data::Version,
inventory::Inventory,
modules::{application::ApplicationFeature, helm::chart::HelmChartScore},
score::Score,
topology::{HelmCommand, Topology, Url},
};
/// ContinuousDelivery in Harmony provides this functionality :
///
/// - **Package** the application
/// - **Push** to an artifact registry
/// - **Deploy** to a testing environment
/// - **Deploy** to a production environment
///
/// It is intended to be used as an application feature passed down to an ApplicationInterpret. For
/// example :
///
/// ```rust,ignore
/// let app = RustApplicationScore {
/// name: "My Rust App".to_string(),
/// features: vec![ContinuousDelivery::default()],
/// };
/// ```
///
/// *Note :*
///
/// By default, the Harmony Opinionated Pipeline is built using these technologies :
///
/// - Gitea Action (executes pipeline steps)
/// - Docker to build an OCI container image
/// - Helm chart to package Kubernetes resources
/// - Harbor as artifact registru
/// - ArgoCD to install/upgrade/rollback/inspect k8s resources
/// - Kubernetes for runtime orchestration
#[derive(Debug, Default, Clone)]
pub struct ContinuousDelivery {}
#[async_trait]
impl<T: Topology + HelmCommand + 'static> ApplicationFeature<T> for ContinuousDelivery {
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
info!("Installing ContinuousDelivery feature");
let cd_server = HelmChartScore {
namespace: todo!(
"ArgoCD Helm chart with proper understanding of Tenant, see how Will did it for Monitoring for now"
),
release_name: todo!("argocd helm chart whatever"),
chart_name: todo!(),
chart_version: todo!(),
values_overrides: todo!(),
values_yaml: todo!(),
create_namespace: todo!(),
install_only: todo!(),
repository: todo!(),
};
let interpret = cd_server.create_interpret();
interpret.execute(&Inventory::empty(), topology);
todo!("1. Create ArgoCD score that installs argo using helm chart, see if Taha's already done it
2. Package app (docker image, helm chart)
3. Push to registry if staging or prod
4. Poke Argo
5. Ensure app is up")
}
fn name(&self) -> String {
"ContinuousDelivery".to_string()
}
}
/// For now this is entirely bound to K8s / ArgoCD, will have to be revisited when we support
/// more CD systems
pub struct CDApplicationConfig {
version: Version,
helm_chart_url: Url,
values_overrides: Value,
}
pub trait ContinuousDeliveryApplication {
fn get_config(&self) -> CDApplicationConfig;
}

View File

@@ -1,42 +0,0 @@
use async_trait::async_trait;
use log::info;
use crate::{
modules::application::ApplicationFeature,
topology::{K8sclient, Topology},
};
#[derive(Debug, Clone)]
pub struct PublicEndpoint {
application_port: u16,
}
/// Use port 3000 as default port. Harmony wants to provide "sane defaults" in general, and in this
/// particular context, using port 80 goes against our philosophy to provide production grade
/// defaults out of the box. Using an unprivileged port is a good security practice and will allow
/// for unprivileged containers to work with this out of the box.
///
/// Now, why 3000 specifically? Many popular web/network frameworks use it by default, there is no
/// perfect answer for this but many Rust and Python libraries tend to use 3000.
impl Default for PublicEndpoint {
fn default() -> Self {
Self {
application_port: 3000,
}
}
}
/// For now we only suport K8s ingress, but we will support more stuff at some point
#[async_trait]
impl<T: Topology + K8sclient + 'static> ApplicationFeature<T> for PublicEndpoint {
async fn ensure_installed(&self, _topology: &T) -> Result<(), String> {
info!(
"Making sure public endpoint is installed for port {}",
self.application_port
);
todo!()
}
fn name(&self) -> String {
"PublicEndpoint".to_string()
}
}

View File

@@ -1,8 +0,0 @@
mod endpoint;
pub use endpoint::*;
mod monitoring;
pub use monitoring::*;
mod continuous_delivery;
pub use continuous_delivery::*;

View File

@@ -1,21 +0,0 @@
use async_trait::async_trait;
use log::info;
use crate::{
modules::application::ApplicationFeature,
topology::{HelmCommand, Topology},
};
#[derive(Debug, Default, Clone)]
pub struct Monitoring {}
#[async_trait]
impl<T: Topology + HelmCommand + 'static> ApplicationFeature<T> for Monitoring {
async fn ensure_installed(&self, _topology: &T) -> Result<(), String> {
info!("Ensuring monitoring is available for application");
todo!("create and execute k8s prometheus score, depends on Will's work")
}
fn name(&self) -> String {
"Monitoring".to_string()
}
}

View File

@@ -1,78 +0,0 @@
mod feature;
pub mod features;
mod rust;
pub use feature::*;
use log::info;
pub use rust::*;
use async_trait::async_trait;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
topology::Topology,
};
pub trait Application: std::fmt::Debug + Send + Sync {
fn name(&self) -> String;
}
#[derive(Debug)]
pub struct ApplicationInterpret<T: Topology + std::fmt::Debug> {
features: Vec<Box<dyn ApplicationFeature<T>>>,
application: Box<dyn Application>,
}
#[async_trait]
impl<T: Topology + std::fmt::Debug> Interpret<T> for ApplicationInterpret<T> {
async fn execute(
&self,
_inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let app_name = self.application.name();
info!(
"Preparing {} features [{}] for application {app_name}",
self.features.len(),
self.features
.iter()
.map(|f| f.name())
.collect::<Vec<String>>()
.join(", ")
);
for feature in self.features.iter() {
info!(
"Installing feature {} for application {app_name}",
feature.name()
);
let _ = match feature.ensure_installed(topology).await {
Ok(()) => (),
Err(msg) => {
return Err(InterpretError::new(format!(
"Application Interpret failed to install feature : {msg}"
)));
}
};
}
todo!(
"Do I need to do anything more than this here?? I feel like the Application trait itself should expose something like ensure_ready but its becoming redundant. We'll see as this evolves."
)
}
fn get_name(&self) -> InterpretName {
InterpretName::Application
}
fn get_version(&self) -> Version {
Version::from("1.0.0").unwrap()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -1,41 +0,0 @@
use serde::Serialize;
use crate::{
score::Score,
topology::{Topology, Url},
};
use super::{Application, ApplicationFeature, ApplicationInterpret};
#[derive(Debug, Serialize, Clone)]
pub struct RustWebappScore<T: Topology + Clone + Serialize> {
pub name: String,
pub domain: Url,
pub features: Vec<Box<dyn ApplicationFeature<T>>>,
}
impl<T: Topology + std::fmt::Debug + Clone + Serialize + 'static> Score<T> for RustWebappScore<T> {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
Box::new(ApplicationInterpret {
features: self.features.clone(),
application: Box::new(RustWebapp {
name: self.name.clone(),
}),
})
}
fn name(&self) -> String {
format!("{}-RustWebapp", self.name)
}
}
#[derive(Debug)]
struct RustWebapp {
name: String,
}
impl Application for RustWebapp {
fn name(&self) -> String {
self.name.clone()
}
}

View File

@@ -311,7 +311,7 @@ impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for HelmChartInterpretV
_inventory: &Inventory,
_topology: &T,
) -> Result<Outcome, InterpretError> {
let _ns = self
let ns = self
.score
.chart
.namespace
@@ -339,7 +339,7 @@ impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for HelmChartInterpretV
let res = helm_executor.generate();
let _output = match res {
let output = match res {
Ok(output) => output,
Err(err) => return Err(InterpretError::new(err.to_string())),
};

View File

@@ -1,6 +1,5 @@
use harmony_macros::ingress_path;
use k8s_openapi::api::networking::v1::Ingress;
use log::{debug, trace};
use serde::Serialize;
use serde_json::json;
@@ -57,24 +56,22 @@ impl<T: Topology + K8sclient> Score<T> for K8sIngressScore {
let ingress = json!(
{
"metadata": {
"name": self.name.to_string(),
"name": self.name
},
"spec": {
"rules": [
{ "host": self.host.to_string(),
{ "host": self.host,
"http": {
"paths": [
{
"path": path,
"pathType": path_type.as_str(),
"backend": {
"service": {
"name": self.backend_service.to_string(),
"port": {
"number": self.port,
}
"backend": [
{
"service": self.backend_service,
"port": self.port
}
}
]
}
]
}
@@ -84,16 +81,13 @@ impl<T: Topology + K8sclient> Score<T> for K8sIngressScore {
}
);
trace!("Building ingresss object from Value {ingress:#}");
let ingress: Ingress = serde_json::from_value(ingress).unwrap();
debug!(
"Successfully built Ingress for host {:?}",
ingress.metadata.name
);
Box::new(K8sResourceInterpret {
score: K8sResourceScore::single(
ingress.clone(),
self.namespace.clone().map(|f| f.to_string()),
self.namespace
.clone()
.map(|f| f.as_c_str().to_str().unwrap().to_string()),
),
})
}

View File

@@ -1,7 +1,6 @@
use async_trait::async_trait;
use k8s_openapi::NamespaceResourceScope;
use kube::Resource;
use log::info;
use serde::{Serialize, de::DeserializeOwned};
use crate::{
@@ -76,12 +75,11 @@ where
_inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
info!("Applying {} resources", self.score.resource.len());
topology
.k8s_client()
.await
.expect("Environment should provide enough information to instanciate a client")
.apply_many(&self.score.resource, self.score.namespace.as_deref())
.apply_namespaced(&self.score.resource, self.score.namespace.as_deref())
.await?;
Ok(Outcome::success(

View File

@@ -1,4 +1,3 @@
pub mod application;
pub mod cert_manager;
pub mod dhcp;
pub mod dns;
@@ -13,7 +12,5 @@ pub mod load_balancer;
pub mod monitoring;
pub mod okd;
pub mod opnsense;
pub mod prometheus;
pub mod tenant;
pub mod tftp;
pub mod postgres;

View File

@@ -1,125 +1,42 @@
use async_trait::async_trait;
use serde::Serialize;
use serde_yaml::{Mapping, Value};
use url::Url;
use crate::{
interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::{
prometheus::{Prometheus, PrometheusReceiver},
types::{AlertChannelConfig, AlertManagerChannelConfig},
},
topology::{Url, oberservability::monitoring::AlertReceiver},
};
#[derive(Debug, Clone, Serialize)]
pub struct DiscordWebhook {
#[derive(Debug, Clone)]
pub struct DiscordWebhookAlertChannel {
pub webhook_url: Url,
pub name: String,
pub url: Url,
pub send_resolved_notifications: bool,
}
#[async_trait]
impl AlertReceiver<Prometheus> for DiscordWebhook {
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
sender.install_receiver(self).await
}
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
Box::new(self.clone())
}
}
#[async_trait]
impl PrometheusReceiver for DiscordWebhook {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
self.get_config().await
}
}
#[async_trait]
impl AlertChannelConfig for DiscordWebhook {
async fn get_config(&self) -> AlertManagerChannelConfig {
let channel_global_config = None;
let channel_receiver = self.alert_channel_receiver().await;
let channel_route = self.alert_channel_route().await;
AlertManagerChannelConfig {
channel_global_config,
channel_receiver,
channel_route,
}
}
}
impl DiscordWebhook {
async fn alert_channel_route(&self) -> serde_yaml::Value {
let mut route = Mapping::new();
route.insert(
Value::String("receiver".to_string()),
Value::String(self.name.clone()),
);
route.insert(
Value::String("matchers".to_string()),
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
);
route.insert(Value::String("continue".to_string()), Value::Bool(true));
Value::Mapping(route)
}
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
let mut receiver = Mapping::new();
receiver.insert(
Value::String("name".to_string()),
Value::String(self.name.clone()),
);
let mut discord_config = Mapping::new();
discord_config.insert(
Value::String("webhook_url".to_string()),
Value::String(self.url.to_string()),
);
receiver.insert(
Value::String("discord_configs".to_string()),
Value::Sequence(vec![Value::Mapping(discord_config)]),
);
Value::Mapping(receiver)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn discord_serialize_should_match() {
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
url: Url::Url(url::Url::parse("https://discord.i.dont.exist.com").unwrap()),
};
let discord_receiver_receiver =
serde_yaml::to_string(&discord_receiver.alert_channel_receiver().await).unwrap();
println!("receiver \n{:#}", discord_receiver_receiver);
let discord_receiver_receiver_yaml = r#"name: test-discord
discord_configs:
- webhook_url: https://discord.i.dont.exist.com/
"#
.to_string();
let discord_receiver_route =
serde_yaml::to_string(&discord_receiver.alert_channel_route().await).unwrap();
println!("route \n{:#}", discord_receiver_route);
let discord_receiver_route_yaml = r#"receiver: test-discord
matchers:
- alertname!=Watchdog
continue: true
"#
.to_string();
assert_eq!(discord_receiver_receiver, discord_receiver_receiver_yaml);
assert_eq!(discord_receiver_route, discord_receiver_route_yaml);
}
}
//impl AlertChannelConfig for DiscordWebhookAlertChannel {
// fn build_alert_channel(&self) -> Box<dyn AlertChannel> {
// Box::new(DiscordWebhookAlertChannel {
// webhook_url: self.webhook_url.clone(),
// name: self.name.clone(),
// send_resolved_notifications: self.send_resolved_notifications.clone(),
// })
// }
// fn channel_type(&self) -> String {
// "discord".to_string()
// }
//}
//
//#[async_trait]
//impl AlertChannel for DiscordWebhookAlertChannel {
// async fn get_channel_id(&self) -> String {
// self.name.clone()
// }
//}
//
//impl PrometheusAlertChannel for DiscordWebhookAlertChannel {
// fn get_alert_channel_global_settings(&self) -> Option<AlertManagerChannelGlobalConfigs> {
// None
// }
//
// fn get_alert_channel_route(&self) -> AlertManagerChannelRoute {
// todo!()
// }
//
// fn get_alert_channel_receiver(&self) -> AlertManagerChannelReceiver {
// todo!()
// }
//}

View File

@@ -1,2 +1 @@
pub mod discord_alert_channel;
pub mod webhook_receiver;

View File

@@ -1,124 +0,0 @@
use async_trait::async_trait;
use serde::Serialize;
use serde_yaml::{Mapping, Value};
use crate::{
interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::{
prometheus::{Prometheus, PrometheusReceiver},
types::{AlertChannelConfig, AlertManagerChannelConfig},
},
topology::{Url, oberservability::monitoring::AlertReceiver},
};
#[derive(Debug, Clone, Serialize)]
pub struct WebhookReceiver {
pub name: String,
pub url: Url,
}
#[async_trait]
impl AlertReceiver<Prometheus> for WebhookReceiver {
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
sender.install_receiver(self).await
}
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
Box::new(self.clone())
}
}
#[async_trait]
impl PrometheusReceiver for WebhookReceiver {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
self.get_config().await
}
}
#[async_trait]
impl AlertChannelConfig for WebhookReceiver {
async fn get_config(&self) -> AlertManagerChannelConfig {
let channel_global_config = None;
let channel_receiver = self.alert_channel_receiver().await;
let channel_route = self.alert_channel_route().await;
AlertManagerChannelConfig {
channel_global_config,
channel_receiver,
channel_route,
}
}
}
impl WebhookReceiver {
async fn alert_channel_route(&self) -> serde_yaml::Value {
let mut route = Mapping::new();
route.insert(
Value::String("receiver".to_string()),
Value::String(self.name.clone()),
);
route.insert(
Value::String("matchers".to_string()),
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
);
route.insert(Value::String("continue".to_string()), Value::Bool(true));
Value::Mapping(route)
}
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
let mut receiver = Mapping::new();
receiver.insert(
Value::String("name".to_string()),
Value::String(self.name.clone()),
);
let mut webhook_config = Mapping::new();
webhook_config.insert(
Value::String("url".to_string()),
Value::String(self.url.to_string()),
);
receiver.insert(
Value::String("webhook_configs".to_string()),
Value::Sequence(vec![Value::Mapping(webhook_config)]),
);
Value::Mapping(receiver)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn webhook_serialize_should_match() {
let webhook_receiver = WebhookReceiver {
name: "test-webhook".to_string(),
url: Url::Url(url::Url::parse("https://webhook.i.dont.exist.com").unwrap()),
};
let webhook_receiver_receiver =
serde_yaml::to_string(&webhook_receiver.alert_channel_receiver().await).unwrap();
println!("receiver \n{:#}", webhook_receiver_receiver);
let webhook_receiver_receiver_yaml = r#"name: test-webhook
webhook_configs:
- url: https://webhook.i.dont.exist.com/
"#
.to_string();
let webhook_receiver_route =
serde_yaml::to_string(&webhook_receiver.alert_channel_route().await).unwrap();
println!("route \n{:#}", webhook_receiver_route);
let webhook_receiver_route_yaml = r#"receiver: test-webhook
matchers:
- alertname!=Watchdog
continue: true
"#
.to_string();
assert_eq!(webhook_receiver_receiver, webhook_receiver_receiver_yaml);
assert_eq!(webhook_receiver_route, webhook_receiver_route_yaml);
}
}

View File

@@ -1 +0,0 @@
pub mod prometheus_alert_rule;

View File

@@ -1,99 +0,0 @@
use std::collections::{BTreeMap, HashMap};
use async_trait::async_trait;
use serde::Serialize;
use crate::{
interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::{
prometheus::{Prometheus, PrometheusRule},
types::{AlertGroup, AlertManagerAdditionalPromRules},
},
topology::oberservability::monitoring::AlertRule,
};
#[async_trait]
impl AlertRule<Prometheus> for AlertManagerRuleGroup {
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
sender.install_rule(&self).await
}
fn clone_box(&self) -> Box<dyn AlertRule<Prometheus>> {
Box::new(self.clone())
}
}
#[async_trait]
impl PrometheusRule for AlertManagerRuleGroup {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
let mut additional_prom_rules = BTreeMap::new();
additional_prom_rules.insert(
self.name.clone(),
AlertGroup {
groups: vec![self.clone()],
},
);
AlertManagerAdditionalPromRules {
rules: additional_prom_rules,
}
}
}
impl AlertManagerRuleGroup {
pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup {
AlertManagerRuleGroup {
name: name.to_string().to_lowercase(),
rules,
}
}
}
#[derive(Debug, Clone, Serialize)]
///logical group of alert rules
///evaluates to:
///name:
/// groups:
/// - name: name
/// rules: PrometheusAlertRule
pub struct AlertManagerRuleGroup {
pub name: String,
pub rules: Vec<PrometheusAlertRule>,
}
#[derive(Debug, Clone, Serialize)]
pub struct PrometheusAlertRule {
pub alert: String,
pub expr: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub r#for: Option<String>,
pub labels: HashMap<String, String>,
pub annotations: HashMap<String, String>,
}
impl PrometheusAlertRule {
pub fn new(alert_name: &str, expr: &str) -> Self {
Self {
alert: alert_name.into(),
expr: expr.into(),
r#for: Some("1m".into()),
labels: HashMap::new(),
annotations: HashMap::new(),
}
}
pub fn for_duration(mut self, duration: &str) -> Self {
self.r#for = Some(duration.into());
self
}
pub fn label(mut self, key: &str, value: &str) -> Self {
self.labels.insert(key.into(), value.into());
self
}
pub fn annotation(mut self, key: &str, value: &str) -> Self {
self.annotations.insert(key.into(), value.into());
self
}
}

View File

@@ -1,12 +1,10 @@
use serde::Serialize;
use crate::modules::monitoring::kube_prometheus::types::{
AlertManagerAdditionalPromRules, AlertManagerChannelConfig, ServiceMonitor,
};
use super::types::AlertManagerChannelConfig;
#[derive(Debug, Clone, Serialize)]
pub struct KubePrometheusConfig {
pub namespace: Option<String>,
pub namespace: String,
pub default_rules: bool,
pub windows_monitoring: bool,
pub alert_manager: bool,
@@ -23,14 +21,12 @@ pub struct KubePrometheusConfig {
pub kube_proxy: bool,
pub kube_state_metrics: bool,
pub prometheus_operator: bool,
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
pub additional_service_monitors: Vec<ServiceMonitor>,
pub alert_channels: Vec<AlertManagerChannelConfig>,
}
impl KubePrometheusConfig {
pub fn new() -> Self {
Self {
namespace: None,
namespace: "monitoring".into(),
default_rules: true,
windows_monitoring: false,
alert_manager: true,
@@ -39,7 +35,7 @@ impl KubePrometheusConfig {
prometheus: true,
kubernetes_service_monitors: true,
kubernetes_api_server: false,
kubelet: true,
kubelet: false,
kube_controller_manager: false,
kube_etcd: false,
kube_proxy: false,
@@ -47,9 +43,7 @@ impl KubePrometheusConfig {
prometheus_operator: true,
core_dns: false,
kube_scheduler: false,
alert_receiver_configs: vec![],
alert_rules: vec![],
additional_service_monitors: vec![],
alert_channels: Vec::new(),
}
}
}

View File

@@ -1,413 +0,0 @@
use super::config::KubePrometheusConfig;
use log::debug;
use non_blank_string_rs::NonBlankString;
use serde_yaml::{Mapping, Value};
use std::{
collections::BTreeMap,
str::FromStr,
sync::{Arc, Mutex},
};
use crate::modules::{
helm::chart::HelmChartScore,
monitoring::kube_prometheus::types::{
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
AlertManagerRoute, AlertManagerSpec, AlertManagerValues, ConfigReloader, Limits,
PrometheusConfig, Requests, Resources,
},
};
pub fn kube_prometheus_helm_chart_score(
config: Arc<Mutex<KubePrometheusConfig>>,
) -> HelmChartScore {
let config = config.lock().unwrap();
let default_rules = config.default_rules.to_string();
let windows_monitoring = config.windows_monitoring.to_string();
let grafana = config.grafana.to_string();
let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string();
let kubernetes_api_server = config.kubernetes_api_server.to_string();
let kubelet = config.kubelet.to_string();
let kube_controller_manager = config.kube_controller_manager.to_string();
let core_dns = config.core_dns.to_string();
let kube_etcd = config.kube_etcd.to_string();
let kube_scheduler = config.kube_scheduler.to_string();
let kube_proxy = config.kube_proxy.to_string();
let kube_state_metrics = config.kube_state_metrics.to_string();
let node_exporter = config.node_exporter.to_string();
let prometheus_operator = config.prometheus_operator.to_string();
let prometheus = config.prometheus.to_string();
let resource_limit = Resources {
limits: Limits {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
requests: Requests {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
};
fn indent_lines(s: &str, spaces: usize) -> String {
let pad = " ".repeat(spaces);
s.lines()
.map(|line| format!("{pad}{line}"))
.collect::<Vec<_>>()
.join("\n")
}
fn resource_block(resource: &Resources, indent_level: usize) -> String {
let yaml = serde_yaml::to_string(resource).unwrap();
format!(
"{}resources:\n{}",
" ".repeat(indent_level),
indent_lines(&yaml, indent_level + 2)
)
}
let resource_section = resource_block(&resource_limit, 2);
let mut values = format!(
r#"
prometheus:
enabled: {prometheus}
prometheusSpec:
resources:
requests:
cpu: 100m
memory: 500Mi
limits:
cpu: 200m
memory: 1000Mi
defaultRules:
create: {default_rules}
rules:
alertmanager: true
etcd: true
configReloaders: true
general: true
k8sContainerCpuUsageSecondsTotal: true
k8sContainerMemoryCache: true
k8sContainerMemoryRss: true
k8sContainerMemorySwap: true
k8sContainerResource: true
k8sContainerMemoryWorkingSetBytes: true
k8sPodOwner: true
kubeApiserverAvailability: true
kubeApiserverBurnrate: true
kubeApiserverHistogram: true
kubeApiserverSlos: true
kubeControllerManager: true
kubelet: true
kubeProxy: true
kubePrometheusGeneral: true
kubePrometheusNodeRecording: true
kubernetesApps: true
kubernetesResources: true
kubernetesStorage: true
kubernetesSystem: true
kubeSchedulerAlerting: true
kubeSchedulerRecording: true
kubeStateMetrics: true
network: true
node: true
nodeExporterAlerting: true
nodeExporterRecording: true
prometheus: true
prometheusOperator: true
windows: true
windowsMonitoring:
enabled: {windows_monitoring}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
grafana:
enabled: {grafana}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
initChownData:
resources:
requests:
cpu: 10m
memory: 50Mi
limits:
cpu: 50m
memory: 100Mi
sidecar:
resources:
requests:
cpu: 10m
memory: 50Mi
limits:
cpu: 50m
memory: 100Mi
kubernetesServiceMonitors:
enabled: {kubernetes_service_monitors}
kubeApiServer:
enabled: {kubernetes_api_server}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubelet:
enabled: {kubelet}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeControllerManager:
enabled: {kube_controller_manager}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
coreDns:
enabled: {core_dns}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeEtcd:
enabled: {kube_etcd}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeScheduler:
enabled: {kube_scheduler}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeProxy:
enabled: {kube_proxy}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeStateMetrics:
enabled: {kube_state_metrics}
kube-state-metrics:
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
nodeExporter:
enabled: {node_exporter}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
prometheus-node-exporter:
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
prometheusOperator:
enabled: {prometheus_operator}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 100m
memory: 200Mi
prometheusConfigReloader:
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 100m
memory: 200Mi
admissionWebhooks:
deployment:
resources:
limits:
cpu: 10m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
patch:
resources:
limits:
cpu: 10m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
"#,
);
let prometheus_config =
crate::modules::monitoring::kube_prometheus::types::PrometheusConfigValues {
prometheus: PrometheusConfig {
prometheus: bool::from_str(prometheus.as_str()).expect("couldn't parse bool"),
additional_service_monitors: config.additional_service_monitors.clone(),
},
};
let prometheus_config_yaml =
serde_yaml::to_string(&prometheus_config).expect("Failed to serialize YAML");
debug!(
"serialized prometheus config: \n {:#}",
prometheus_config_yaml
);
values.push_str(&prometheus_config_yaml);
// add required null receiver for prometheus alert manager
let mut null_receiver = Mapping::new();
null_receiver.insert(
Value::String("receiver".to_string()),
Value::String("null".to_string()),
);
null_receiver.insert(
Value::String("matchers".to_string()),
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
);
null_receiver.insert(Value::String("continue".to_string()), Value::Bool(true));
//add alert channels
let mut alert_manager_channel_config = AlertManagerConfig {
global: Mapping::new(),
route: AlertManagerRoute {
routes: vec![Value::Mapping(null_receiver)],
},
receivers: vec![serde_yaml::from_str("name: 'null'").unwrap()],
};
for receiver in config.alert_receiver_configs.iter() {
if let Some(global) = receiver.channel_global_config.clone() {
alert_manager_channel_config
.global
.insert(global.0, global.1);
}
alert_manager_channel_config
.route
.routes
.push(receiver.channel_route.clone());
alert_manager_channel_config
.receivers
.push(receiver.channel_receiver.clone());
}
let alert_manager_values = AlertManagerValues {
alertmanager: AlertManager {
enabled: config.alert_manager,
config: alert_manager_channel_config,
alertmanager_spec: AlertManagerSpec {
resources: Resources {
limits: Limits {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
requests: Requests {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
},
},
init_config_reloader: ConfigReloader {
resources: Resources {
limits: Limits {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
requests: Requests {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
},
},
},
};
let alert_manager_yaml =
serde_yaml::to_string(&alert_manager_values).expect("Failed to serialize YAML");
debug!("serialized alert manager: \n {:#}", alert_manager_yaml);
values.push_str(&alert_manager_yaml);
//format alert manager additional rules for helm chart
let mut merged_rules: BTreeMap<String, AlertGroup> = BTreeMap::new();
for additional_rule in config.alert_rules.clone() {
for (key, group) in additional_rule.rules {
merged_rules.insert(key, group);
}
}
let merged_rules = AlertManagerAdditionalPromRules {
rules: merged_rules,
};
let mut alert_manager_additional_rules = serde_yaml::Mapping::new();
let rules_value = serde_yaml::to_value(merged_rules).unwrap();
alert_manager_additional_rules.insert(
serde_yaml::Value::String("additionalPrometheusRulesMap".to_string()),
rules_value,
);
let alert_manager_additional_rules_yaml =
serde_yaml::to_string(&alert_manager_additional_rules).expect("Failed to serialize YAML");
debug!(
"alert_rules_yaml:\n{:#}",
alert_manager_additional_rules_yaml
);
values.push_str(&alert_manager_additional_rules_yaml);
debug!("full values.yaml: \n {:#}", values);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()),
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
chart_name: NonBlankString::from_str(
"oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack",
)
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values.to_string()),
create_namespace: true,
install_only: true,
repository: None,
}
}

View File

@@ -1,2 +0,0 @@
pub mod config;
pub mod kube_prometheus_helm_chart;

View File

@@ -1,39 +0,0 @@
use std::sync::{Arc, Mutex};
use serde::Serialize;
use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
use crate::{
modules::monitoring::kube_prometheus::types::ServiceMonitor,
score::Score,
topology::{
HelmCommand, Topology,
oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret},
tenant::TenantManager,
},
};
#[derive(Clone, Debug, Serialize)]
pub struct HelmPrometheusAlertingScore {
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
pub service_monitors: Vec<ServiceMonitor>,
}
impl<T: Topology + HelmCommand + TenantManager> Score<T> for HelmPrometheusAlertingScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
config
.try_lock()
.expect("couldn't lock config")
.additional_service_monitors = self.service_monitors.clone();
Box::new(AlertingInterpret {
sender: Prometheus::new(),
receivers: self.receivers.clone(),
rules: self.rules.clone(),
})
}
fn name(&self) -> String {
"HelmPrometheusAlertingScore".to_string()
}
}

View File

@@ -0,0 +1,261 @@
use super::config::KubePrometheusConfig;
use log::info;
use non_blank_string_rs::NonBlankString;
use std::str::FromStr;
use crate::modules::helm::chart::HelmChartScore;
pub fn kube_prometheus_helm_chart_score() -> HelmChartScore {
let config = KubePrometheusConfig::new();
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
//to the overrides or something leaving the user to deal with formatting here seems bad
let default_rules = config.default_rules.to_string();
let windows_monitoring = config.windows_monitoring.to_string();
let alert_manager = config.alert_manager.to_string();
let grafana = config.grafana.to_string();
let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string();
let kubernetes_api_server = config.kubernetes_api_server.to_string();
let kubelet = config.kubelet.to_string();
let kube_controller_manager = config.kube_controller_manager.to_string();
let core_dns = config.core_dns.to_string();
let kube_etcd = config.kube_etcd.to_string();
let kube_scheduler = config.kube_scheduler.to_string();
let kube_proxy = config.kube_proxy.to_string();
let kube_state_metrics = config.kube_state_metrics.to_string();
let node_exporter = config.node_exporter.to_string();
let prometheus_operator = config.prometheus_operator.to_string();
let prometheus = config.prometheus.to_string();
let mut values = format!(
r#"
additionalPrometheusRulesMap:
pods-status-alerts:
groups:
- name: pods
rules:
- alert: "[CRIT] POD not healthy"
expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{{phase=~"Pending|Unknown|Failed"}})[15m:1m]) > 0
for: 0m
labels:
severity: critical
annotations:
title: "[CRIT] POD not healthy : {{{{ $labels.pod }}}}"
description: |
A POD is in a non-ready state!
- **Pod**: {{{{ $labels.pod }}}}
- **Namespace**: {{{{ $labels.namespace }}}}
- alert: "[CRIT] POD crash looping"
expr: increase(kube_pod_container_status_restarts_total[5m]) > 3
for: 0m
labels:
severity: critical
annotations:
title: "[CRIT] POD crash looping : {{{{ $labels.pod }}}}"
description: |
A POD is drowning in a crash loop!
- **Pod**: {{{{ $labels.pod }}}}
- **Namespace**: {{{{ $labels.namespace }}}}
- **Instance**: {{{{ $labels.instance }}}}
pvc-alerts:
groups:
- name: pvc-alerts
rules:
- alert: 'PVC Fill Over 95 Percent In 2 Days'
expr: |
(
kubelet_volume_stats_used_bytes
/
kubelet_volume_stats_capacity_bytes
) > 0.95
AND
predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60)
/
kubelet_volume_stats_capacity_bytes
> 0.95
for: 1m
labels:
severity: warning
annotations:
description: The PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} is predicted to fill over 95% in less than 2 days.
title: PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} will fill over 95% in less than 2 days
defaultRules:
create: {default_rules}
rules:
alertmanager: true
etcd: true
configReloaders: true
general: true
k8sContainerCpuUsageSecondsTotal: true
k8sContainerMemoryCache: true
k8sContainerMemoryRss: true
k8sContainerMemorySwap: true
k8sContainerResource: true
k8sContainerMemoryWorkingSetBytes: true
k8sPodOwner: true
kubeApiserverAvailability: true
kubeApiserverBurnrate: true
kubeApiserverHistogram: true
kubeApiserverSlos: true
kubeControllerManager: true
kubelet: true
kubeProxy: true
kubePrometheusGeneral: true
kubePrometheusNodeRecording: true
kubernetesApps: true
kubernetesResources: true
kubernetesStorage: true
kubernetesSystem: true
kubeSchedulerAlerting: true
kubeSchedulerRecording: true
kubeStateMetrics: true
network: true
node: true
nodeExporterAlerting: true
nodeExporterRecording: true
prometheus: true
prometheusOperator: true
windows: true
windowsMonitoring:
enabled: {windows_monitoring}
grafana:
enabled: {grafana}
kubernetesServiceMonitors:
enabled: {kubernetes_service_monitors}
kubeApiServer:
enabled: {kubernetes_api_server}
kubelet:
enabled: {kubelet}
kubeControllerManager:
enabled: {kube_controller_manager}
coreDns:
enabled: {core_dns}
kubeEtcd:
enabled: {kube_etcd}
kubeScheduler:
enabled: {kube_scheduler}
kubeProxy:
enabled: {kube_proxy}
kubeStateMetrics:
enabled: {kube_state_metrics}
nodeExporter:
enabled: {node_exporter}
prometheusOperator:
enabled: {prometheus_operator}
prometheus:
enabled: {prometheus}
"#,
);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
chart_name: NonBlankString::from_str(
"oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack",
)
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values.to_string()),
create_namespace: true,
install_only: true,
repository: None,
}
}
// let alertmanager_config = alert_manager_yaml_builder(&config);
// values.push_str(&alertmanager_config);
//
// fn alert_manager_yaml_builder(config: &KubePrometheusConfig) -> String {
// let mut receivers = String::new();
// let mut routes = String::new();
// let mut global_configs = String::new();
// let alert_manager = config.alert_manager;
// for alert_channel in &config.alert_channel {
// match alert_channel {
// AlertChannel::Discord { name, .. } => {
// let (receiver, route) = discord_alert_builder(name);
// info!("discord receiver: {} \nroute: {}", receiver, route);
// receivers.push_str(&receiver);
// routes.push_str(&route);
// }
// AlertChannel::Slack {
// slack_channel,
// webhook_url,
// } => {
// let (receiver, route) = slack_alert_builder(slack_channel);
// info!("slack receiver: {} \nroute: {}", receiver, route);
// receivers.push_str(&receiver);
//
// routes.push_str(&route);
// let global_config = format!(
// r#"
// global:
// slack_api_url: {webhook_url}"#
// );
//
// global_configs.push_str(&global_config);
// }
// AlertChannel::Smpt { .. } => todo!(),
// }
// }
// info!("after alert receiver: {}", receivers);
// info!("after alert routes: {}", routes);
//
// let alertmanager_config = format!(
// r#"
//alertmanager:
// enabled: {alert_manager}
// config: {global_configs}
// route:
// group_by: ['job']
// group_wait: 30s
// group_interval: 5m
// repeat_interval: 12h
// routes:
//{routes}
// receivers:
// - name: 'null'
//{receivers}"#
// );
//
// info!("alert manager config: {}", alertmanager_config);
// alertmanager_config
// }
//fn discord_alert_builder(release_name: &String) -> (String, String) {
// let discord_receiver_name = format!("Discord-{}", release_name);
// let receiver = format!(
// r#"
// - name: '{discord_receiver_name}'
// webhook_configs:
// - url: 'http://{release_name}-alertmanager-discord:9094'
// send_resolved: true"#,
// );
// let route = format!(
// r#"
// - receiver: '{discord_receiver_name}'
// matchers:
// - alertname!=Watchdog
// continue: true"#,
// );
// (receiver, route)
//}
//
//fn slack_alert_builder(slack_channel: &String) -> (String, String) {
// let slack_receiver_name = format!("Slack-{}", slack_channel);
// let receiver = format!(
// r#"
// - name: '{slack_receiver_name}'
// slack_configs:
// - channel: '{slack_channel}'
// send_resolved: true
// title: '{{{{ .CommonAnnotations.title }}}}'
// text: '{{{{ .CommonAnnotations.description }}}}'"#,
// );
// let route = format!(
// r#"
// - receiver: '{slack_receiver_name}'
// matchers:
// - alertname!=Watchdog
// continue: true"#,
// );
// (receiver, route)
//}

View File

@@ -0,0 +1,85 @@
//#[derive(Debug, Clone, Serialize)]
//pub struct KubePrometheusMonitorScore {
// pub kube_prometheus_config: KubePrometheusConfig,
// pub alert_channel_configs: Vec<dyn AlertChannelConfig>,
//}
//impl<T: Topology + Debug + HelmCommand + Monitor<T>> MonitorConfig<T>
// for KubePrometheusMonitorScore
//{
// fn build_monitor(&self) -> Box<dyn Monitor<T>> {
// Box::new(self.clone())
// }
//}
//impl<T: Topology + HelmCommand + Debug + Clone + 'static + Monitor<T>> Score<T>
// for KubePrometheusMonitorScore
//{
// fn create_interpret(&self) -> Box<dyn Interpret<T>> {
// Box::new(KubePrometheusMonitorInterpret {
// score: self.clone(),
// })
// }
//
// fn name(&self) -> String {
// "KubePrometheusMonitorScore".to_string()
// }
//}
//#[derive(Debug, Clone)]
//pub struct KubePrometheusMonitorInterpret {
// score: KubePrometheusMonitorScore,
//}
//#[async_trait]
//impl AlertChannelConfig for KubePrometheusMonitorInterpret {
// async fn build_alert_channel(
// &self,
// ) -> Box<dyn AlertChannel> {
// todo!()
// }
//}
//#[async_trait]
//impl<T: Topology + HelmCommand + Debug + Monitor<T>> Interpret<T>
// for KubePrometheusMonitorInterpret
//{
// async fn execute(
// &self,
// inventory: &Inventory,
// topology: &T,
// ) -> Result<Outcome, InterpretError> {
// let monitor = self.score.build_monitor();
//
// let mut alert_channels = Vec::new();
// //for config in self.score.alert_channel_configs {
// // alert_channels.push(self.build_alert_channel());
// //}
//
// monitor
// .deploy_monitor(inventory, topology, alert_channels)
// .await
// }
//
// fn get_name(&self) -> InterpretName {
// todo!()
// }
//
// fn get_version(&self) -> Version {
// todo!()
// }
//
// fn get_status(&self) -> InterpretStatus {
// todo!()
// }
//
// fn get_children(&self) -> Vec<Id> {
// todo!()
// }
//}
//#[async_trait]
//pub trait PrometheusAlertChannel {
// fn get_alert_channel_global_settings(&self) -> Option<AlertManagerChannelGlobalConfigs>;
// fn get_alert_channel_route(&self) -> AlertManagerChannelRoute;
// fn get_alert_channel_receiver(&self) -> AlertManagerChannelReceiver;
//}

View File

@@ -1,4 +1,4 @@
pub mod helm;
pub mod helm_prometheus_alert_score;
pub mod prometheus;
pub mod config;
pub mod kube_prometheus_helm_chart_score;
pub mod kube_prometheus_monitor;
pub mod types;

View File

@@ -1,162 +0,0 @@
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use log::{debug, error};
use serde::Serialize;
use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
score,
topology::{
HelmCommand, K8sAnywhereTopology, Topology,
installable::Installable,
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
tenant::TenantManager,
},
};
use score::Score;
use super::{
helm::{
config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
},
types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig},
};
#[async_trait]
impl AlertSender for Prometheus {
fn name(&self) -> String {
"HelmKubePrometheus".to_string()
}
}
#[async_trait]
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
self.configure_with_topology(topology).await;
Ok(())
}
async fn ensure_installed(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<(), InterpretError> {
self.install_prometheus(inventory, topology).await?;
Ok(())
}
}
#[derive(Debug)]
pub struct Prometheus {
pub config: Arc<Mutex<KubePrometheusConfig>>,
}
impl Prometheus {
pub fn new() -> Self {
Self {
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
}
}
pub async fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
let ns = topology
.get_tenant_config()
.await
.map(|cfg| cfg.name.clone())
.unwrap_or_else(|| "monitoring".to_string());
error!("This must be refactored, see comments in pr #74");
debug!("NS: {}", ns);
self.config.lock().unwrap().namespace = Some(ns);
}
pub async fn install_receiver(
&self,
prometheus_receiver: &dyn PrometheusReceiver,
) -> Result<Outcome, InterpretError> {
let prom_receiver = prometheus_receiver.configure_receiver().await;
debug!(
"adding alert receiver to prometheus config: {:#?}",
&prom_receiver
);
let mut config = self.config.lock().unwrap();
config.alert_receiver_configs.push(prom_receiver);
let prom_receiver_name = prometheus_receiver.name();
debug!("installed alert receiver {}", &prom_receiver_name);
Ok(Outcome::success(format!(
"Sucessfully installed receiver {}",
prom_receiver_name
)))
}
pub async fn install_rule(
&self,
prometheus_rule: &AlertManagerRuleGroup,
) -> Result<Outcome, InterpretError> {
let prometheus_rule = prometheus_rule.configure_rule().await;
let mut config = self.config.lock().unwrap();
config.alert_rules.push(prometheus_rule.clone());
Ok(Outcome::success(format!(
"Successfully installed alert rule: {:#?},",
prometheus_rule
)))
}
pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
kube_prometheus_helm_chart_score(self.config.clone())
.create_interpret()
.execute(inventory, topology)
.await
}
}
#[async_trait]
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
}
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
todo!()
}
}
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
fn clone(&self) -> Self {
self.clone_box()
}
}
#[async_trait]
pub trait PrometheusRule: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
}
impl Serialize for Box<dyn AlertRule<Prometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
todo!()
}
}
impl Clone for Box<dyn AlertRule<Prometheus>> {
fn clone(&self) -> Self {
self.clone_box()
}
}

View File

@@ -1,286 +1,14 @@
use std::collections::{BTreeMap, HashMap};
use async_trait::async_trait;
use serde::Serialize;
use serde_yaml::{Mapping, Sequence, Value};
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup;
#[async_trait]
pub trait AlertChannelConfig {
async fn get_config(&self) -> AlertManagerChannelConfig;
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerValues {
pub alertmanager: AlertManager,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct AlertManager {
pub enabled: bool,
pub config: AlertManagerConfig,
pub alertmanager_spec: AlertManagerSpec,
pub init_config_reloader: ConfigReloader,
}
#[derive(Debug, Clone, Serialize)]
pub struct ConfigReloader {
pub resources: Resources,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerConfig {
pub global: Mapping,
pub route: AlertManagerRoute,
pub receivers: Sequence,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerRoute {
pub routes: Sequence,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelConfig {
///expecting an option that contains two values
///if necessary for the alertchannel
///[ jira_api_url: <string> ]
pub channel_global_config: Option<(Value, Value)>,
pub channel_route: Value,
pub channel_receiver: Value,
pub global_configs: AlertManagerChannelGlobalConfigs,
pub route: AlertManagerChannelRoute,
pub receiver: AlertManagerChannelReceiver,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct AlertManagerSpec {
pub(crate) resources: Resources,
}
pub struct AlertManagerChannelGlobalConfigs {}
#[derive(Debug, Clone, Serialize)]
pub struct Resources {
pub limits: Limits,
pub requests: Requests,
}
pub struct AlertManagerChannelReceiver {}
#[derive(Debug, Clone, Serialize)]
pub struct Limits {
pub memory: String,
pub cpu: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct Requests {
pub memory: String,
pub cpu: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerAdditionalPromRules {
#[serde(flatten)]
pub rules: BTreeMap<String, AlertGroup>,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertGroup {
pub groups: Vec<AlertManagerRuleGroup>,
}
#[derive(Debug, Clone, Serialize)]
pub enum HTTPScheme {
#[serde(rename = "http")]
HTTP,
#[serde(rename = "https")]
HTTPS,
}
#[derive(Debug, Clone, Serialize)]
pub enum Operator {
In,
NotIn,
Exists,
DoesNotExist,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct PrometheusConfigValues {
pub prometheus: PrometheusConfig,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct PrometheusConfig {
pub prometheus: bool,
pub additional_service_monitors: Vec<ServiceMonitor>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ServiceMonitorTLSConfig {
// ## Path to the CA file
// ##
pub ca_file: Option<String>,
// ## Path to client certificate file
// ##
pub cert_file: Option<String>,
// ## Skip certificate verification
// ##
pub insecure_skip_verify: Option<bool>,
// ## Path to client key file
// ##
pub key_file: Option<String>,
// ## Server name used to verify host name
// ##
pub server_name: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ServiceMonitorEndpoint {
// ## Name of the endpoint's service port
// ## Mutually exclusive with targetPort
pub port: Option<String>,
// ## Name or number of the endpoint's target port
// ## Mutually exclusive with port
pub target_port: Option<String>,
// ## File containing bearer token to be used when scraping targets
// ##
pub bearer_token_file: Option<String>,
// ## Interval at which metrics should be scraped
// ##
pub interval: Option<String>,
// ## HTTP path to scrape for metrics
// ##
pub path: String,
// ## HTTP scheme to use for scraping
// ##
pub scheme: HTTPScheme,
// ## TLS configuration to use when scraping the endpoint
// ##
pub tls_config: Option<ServiceMonitorTLSConfig>,
// ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
// ##
// # - action: keep
// # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
// # sourceLabels: [__name__]
pub metric_relabelings: Vec<Mapping>,
// ## RelabelConfigs to apply to samples before scraping
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
// ##
// # - sourceLabels: [__meta_kubernetes_pod_node_name]
// # separator: ;
// # regex: ^(.*)$
// # targetLabel: nodename
// # replacement: $1
// # action: replace
pub relabelings: Vec<Mapping>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct MatchExpression {
pub key: String,
pub operator: Operator,
pub values: Vec<String>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Selector {
// # label selector for services
pub match_labels: HashMap<String, String>,
pub match_expressions: Vec<MatchExpression>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ServiceMonitor {
pub name: String,
// # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
pub additional_labels: Option<Mapping>,
// # Service label for use in assembling a job name of the form <label value>-<port>
// # If no label is specified, the service name is used.
pub job_label: Option<String>,
// # labels to transfer from the kubernetes service to the target
pub target_labels: Vec<String>,
// # labels to transfer from the kubernetes pods to the target
pub pod_target_labels: Vec<String>,
// # Label selector for services to which this ServiceMonitor applies
// # Example which selects all services to be monitored
// # with label "monitoredby" with values any of "example-service-1" or "example-service-2"
// matchExpressions:
// - key: "monitoredby"
// operator: In
// values:
// - example-service-1
// - example-service-2
pub selector: Selector,
// # Namespaces from which services are selected
// # Match any namespace
// any: bool,
// # Explicit list of namespace names to select
// matchNames: Vec,
pub namespace_selector: Option<Mapping>,
// # Endpoints of the selected service to be monitored
pub endpoints: Vec<ServiceMonitorEndpoint>,
// # Fallback scrape protocol used by Prometheus for scraping metrics
// # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.ScrapeProtocol
pub fallback_scrape_protocol: Option<String>,
}
impl Default for ServiceMonitor {
fn default() -> Self {
Self {
name: Default::default(),
additional_labels: Default::default(),
job_label: Default::default(),
target_labels: Default::default(),
pod_target_labels: Default::default(),
selector: Selector {
match_labels: HashMap::new(),
match_expressions: vec![],
},
namespace_selector: Default::default(),
endpoints: Default::default(),
fallback_scrape_protocol: Default::default(),
}
}
}
impl Default for ServiceMonitorEndpoint {
fn default() -> Self {
Self {
port: Some("80".to_string()),
target_port: Default::default(),
bearer_token_file: Default::default(),
interval: Default::default(),
path: "/metrics".to_string(),
scheme: HTTPScheme::HTTP,
tls_config: Default::default(),
metric_relabelings: Default::default(),
relabelings: Default::default(),
}
}
}
pub struct AlertManagerChannelRoute {}

View File

@@ -1,4 +1,3 @@
pub mod alert_channel;
pub mod alert_rule;
pub mod kube_prometheus;
pub mod ntfy;
pub mod monitoring_alerting;

View File

@@ -0,0 +1,69 @@
use async_trait::async_trait;
use serde::Serialize;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{
HelmCommand, Topology,
oberservability::monitoring::{AlertChannelConfig, Monitor},
},
};
#[derive(Debug, Clone, Serialize)]
pub struct MonitoringAlertingScore {
#[serde(skip)]
pub alert_channel_configs: Option<Vec<Box<dyn AlertChannelConfig>>>,
}
impl<T: Topology + HelmCommand + Monitor> Score<T> for MonitoringAlertingScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(MonitoringAlertingInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {
"MonitoringAlertingScore".to_string()
}
}
#[derive(Debug)]
struct MonitoringAlertingInterpret {
score: MonitoringAlertingScore,
}
#[async_trait]
impl<T: Topology + HelmCommand + Monitor> Interpret<T> for MonitoringAlertingInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
topology
.provision_monitor(
inventory,
topology,
self.score.alert_channel_configs.clone(),
)
.await
}
fn get_name(&self) -> InterpretName {
todo!()
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -1 +0,0 @@
pub mod ntfy_helm_chart;

View File

@@ -1,83 +0,0 @@
use non_blank_string_rs::NonBlankString;
use std::str::FromStr;
use crate::modules::helm::chart::{HelmChartScore, HelmRepository};
pub fn ntfy_helm_chart_score(namespace: String) -> HelmChartScore {
let values = format!(
r#"
replicaCount: 1
image:
repository: binwiederhier/ntfy
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "v2.12.0"
serviceAccount:
# Specifies whether a service account should be created
create: true
# Annotations to add to the service account
# annotations:
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
# name: ""
service:
type: ClusterIP
port: 80
ingress:
enabled: false
# annotations:
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: ntfy.host.com
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
autoscaling:
enabled: false
config:
enabled: true
data:
# base-url: "https://ntfy.something.com"
auth-file: "/var/cache/ntfy/user.db"
auth-default-access: "deny-all"
cache-file: "/var/cache/ntfy/cache.db"
attachment-cache-dir: "/var/cache/ntfy/attachments"
behind-proxy: true
# web-root: "disable"
enable-signup: false
enable-login: "true"
persistence:
enabled: true
size: 200Mi
"#,
);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
release_name: NonBlankString::from_str("ntfy").unwrap(),
chart_name: NonBlankString::from_str("sarab97/ntfy").unwrap(),
chart_version: Some(NonBlankString::from_str("0.1.7").unwrap()),
values_overrides: None,
values_yaml: Some(values.to_string()),
create_namespace: true,
install_only: false,
repository: Some(HelmRepository::new(
"sarab97".to_string(),
url::Url::parse("https://charts.sarabsingh.com").unwrap(),
true,
)),
}
}

View File

@@ -1,2 +0,0 @@
pub mod helm;
pub mod ntfy;

View File

@@ -1,169 +0,0 @@
use std::sync::Arc;
use async_trait::async_trait;
use log::debug;
use serde::Serialize;
use strum::{Display, EnumString};
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
modules::monitoring::ntfy::helm::ntfy_helm_chart::ntfy_helm_chart_score,
score::Score,
topology::{HelmCommand, K8sclient, Topology, k8s::K8sClient},
};
#[derive(Debug, Clone, Serialize)]
pub struct NtfyScore {
pub namespace: String,
}
impl<T: Topology + HelmCommand + K8sclient> Score<T> for NtfyScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(NtfyInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {
format!("Ntfy")
}
}
#[derive(Debug, Serialize)]
pub struct NtfyInterpret {
pub score: NtfyScore,
}
#[derive(Debug, EnumString, Display)]
enum NtfyAccessMode {
#[strum(serialize = "read-write", serialize = "rw", to_string = "read-write")]
ReadWrite,
#[strum(
serialize = "read-only",
serialize = "ro",
serialize = "read",
to_string = "read-only"
)]
ReadOnly,
#[strum(
serialize = "write-only",
serialize = "wo",
serialize = "write",
to_string = "write-only"
)]
WriteOnly,
#[strum(serialize = "none", to_string = "deny")]
Deny,
}
#[derive(Debug, EnumString, Display)]
enum NtfyRole {
#[strum(serialize = "user", to_string = "user")]
User,
#[strum(serialize = "admin", to_string = "admin")]
Admin,
}
impl NtfyInterpret {
async fn add_user(
&self,
k8s_client: Arc<K8sClient>,
username: &str,
password: &str,
role: Option<NtfyRole>,
) -> Result<(), String> {
let role = match role {
Some(r) => r,
None => NtfyRole::User,
};
k8s_client
.exec_app(
"ntfy".to_string(),
Some(&self.score.namespace),
vec![
"sh",
"-c",
format!("NTFY_PASSWORD={password} ntfy user add --role={role} {username}")
.as_str(),
],
)
.await?;
Ok(())
}
async fn set_access(
&self,
k8s_client: Arc<K8sClient>,
username: &str,
topic: &str,
mode: NtfyAccessMode,
) -> Result<(), String> {
k8s_client
.exec_app(
"ntfy".to_string(),
Some(&self.score.namespace),
vec![
"sh",
"-c",
format!("ntfy access {username} {topic} {mode}").as_str(),
],
)
.await?;
Ok(())
}
}
/// We need a ntfy interpret to wrap the HelmChartScore in order to run the score, and then bootstrap the config inside ntfy
#[async_trait]
impl<T: Topology + HelmCommand + K8sclient> Interpret<T> for NtfyInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
ntfy_helm_chart_score(self.score.namespace.clone())
.create_interpret()
.execute(inventory, topology)
.await?;
debug!("installed ntfy helm chart");
let client = topology
.k8s_client()
.await
.expect("couldn't get k8s client");
client
.wait_until_deployment_ready(
"ntfy".to_string(),
Some(&self.score.namespace.as_str()),
None,
)
.await?;
debug!("created k8s client");
self.add_user(client, "harmony", "harmony", Some(NtfyRole::Admin))
.await?;
debug!("exec into pod done");
Ok(Outcome::success("installed ntfy".to_string()))
}
fn get_name(&self) -> InterpretName {
todo!()
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}

View File

@@ -1,119 +0,0 @@
use async_trait::async_trait;
use derive_new::new;
use log::info;
use serde::{Deserialize, Serialize};
use crate::{
data::{PostgresDatabase, PostgresUser, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{PostgresServer, Topology},
};
#[derive(Debug, new, Clone, Serialize, Deserialize)]
pub struct PostgresScore {
users: Vec<PostgresUser>,
databases: Vec<PostgresDatabase>,
}
impl<T: Topology + PostgresServer> Score<T> for PostgresScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(PostgresInterpret::new(self.clone()))
}
fn name(&self) -> String {
"PostgresScore".to_string()
}
}
#[derive(Debug, Clone)]
pub struct PostgresInterpret {
score: PostgresScore,
version: Version,
status: InterpretStatus,
}
impl PostgresInterpret {
pub fn new(score: PostgresScore) -> Self {
let version = Version::from("1.0.0").expect("Version should be valid");
Self {
version,
score,
status: InterpretStatus::QUEUED,
}
}
async fn ensure_users_exist<P: PostgresServer>(
&self,
postgres_server: &P,
) -> Result<Outcome, InterpretError> {
let users = &self.score.users;
postgres_server.ensure_users_exist(users.clone()).await?;
Ok(Outcome::new(
InterpretStatus::SUCCESS,
format!(
"PostgresInterpret ensured {} users exist successfully",
users.len()
),
))
}
async fn ensure_databases_exist<P: PostgresServer>(
&self,
postgres_server: &P,
) -> Result<Outcome, InterpretError> {
let databases = &self.score.databases;
postgres_server
.ensure_databases_exist(databases.clone())
.await?;
Ok(Outcome::new(
InterpretStatus::SUCCESS,
format!(
"PostgresInterpret ensured {} databases exist successfully",
databases.len()
),
))
}
}
#[async_trait]
impl<T: Topology + PostgresServer> Interpret<T> for PostgresInterpret {
fn get_name(&self) -> InterpretName {
InterpretName::Postgres
}
fn get_version(&self) -> crate::domain::data::Version {
self.version.clone()
}
fn get_status(&self) -> InterpretStatus {
self.status.clone()
}
fn get_children(&self) -> Vec<crate::domain::data::Id> {
todo!()
}
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
info!(
"Executing {} on inventory {inventory:?})",
<PostgresInterpret as Interpret<T>>::get_name(self)
);
self.ensure_users_exist(topology).await?;
self.ensure_databases_exist(topology).await?;
Ok(Outcome::new(
InterpretStatus::SUCCESS,
"Postgres Interpret execution successful".to_string(),
))
}
}

View File

@@ -1,40 +0,0 @@
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
pub fn global_storage_status_degraded_non_critical() -> PrometheusAlertRule {
PrometheusAlertRule::new("GlobalStorageStatusNonCritical", "globalStorageStatus == 4")
.for_duration("5m")
.label("severity", "warning")
.annotation(
"description",
"- **system**: {{ $labels.instance }}\n- **Status**: nonCritical\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}",
)
.annotation("title", " System storage status is in degraded state")
}
pub fn alert_global_storage_status_critical() -> PrometheusAlertRule {
PrometheusAlertRule::new(
"GlobalStorageStatus critical",
"globalStorageStatus == 5",
)
.for_duration("5m")
.label("severity", "warning")
.annotation("title", "System storage status is critical at {{ $labels.instance }}")
.annotation(
"description",
"- **System**: {{ $labels.instance }}\n- **Status**: Critical\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}",
)
}
pub fn alert_global_storage_status_non_recoverable() -> PrometheusAlertRule {
PrometheusAlertRule::new(
"GlobalStorageStatus nonRecoverable",
"globalStorageStatus == 6",
)
.for_duration("5m")
.label("severity", "warning")
.annotation("title", "System storage status is nonRecoverable at {{ $labels.instance }}")
.annotation(
"description",
"- **System**: {{ $labels.instance }}\n- **Status**: nonRecoverable\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}",
)
}

View File

@@ -1 +0,0 @@
pub mod dell_server;

View File

@@ -1 +0,0 @@
pub mod pvc;

View File

@@ -1,11 +0,0 @@
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
pub fn high_pvc_fill_rate_over_two_days() -> PrometheusAlertRule {
PrometheusAlertRule::new(
"PVC Fill Over 95 Percent In 2 Days",
"(kubelet_volume_stats_used_bytes/kubelet_volume_stats_capacity_bytes) > 0.95 AND predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60)/kubelet_volume_stats_capacity_bytes > 0.95",)
.for_duration("1m")
.label("severity", "warning")
.annotation("summary", "The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days.")
.annotation("description", "PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days",)
}

Some files were not shown because too many files have changed in this diff Show More