Compare commits
68 Commits
feat/disco
...
dev/postgr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0b8525fe05 | ||
| 6bf10b093c | |||
| 3eecc2f590 | |||
| 3959c07261 | |||
| e50c01c0b3 | |||
| 286460d59e | |||
| 4baa3ae707 | |||
| 82119076cf | |||
| f2a350fae6 | |||
| 197770a603 | |||
| ab69a2c264 | |||
| e857efa92f | |||
| 2ff3f4afa9 | |||
| 2f6a11ead7 | |||
| 7de9860dcf | |||
| 6e884cff3a | |||
| c74c51090a | |||
| 8ae0d6b548 | |||
| ee02906ce9 | |||
| 284cc6afd7 | |||
| 9bf6aac82e | |||
| 460c8b59e1 | |||
| 8e857bc72a | |||
| e8d55d27e4 | |||
| fea7e9ddb9 | |||
| 7ec89cdac5 | |||
| 55143dcad4 | |||
| 17ad92402d | |||
| 29e74a2712 | |||
| e16f8fa82e | |||
| c21f3084dc | |||
| 2c706225a1 | |||
| acfb93f1a2 | |||
| f437c40428 | |||
| e06548ac44 | |||
| 155e9bac28 | |||
| 7bebc58615 | |||
| 246d6718c3 | |||
| d776042e20 | |||
| 86c681be70 | |||
| b94dd1e595 | |||
| ef5ec4a131 | |||
| a8eb06f686 | |||
| d1678b529e | |||
| 1451260d4d | |||
| 415488ba39 | |||
| bf7a6d590c | |||
| 8d8120bbfd | |||
| 6cf61ae67c | |||
| 8c65aef127 | |||
| 00e71b97f6 | |||
| ee2bba5623 | |||
| 118d34db55 | |||
| 24e466fadd | |||
| 14fc4345c1 | |||
| 8e472e4c65 | |||
| ec17ccc246 | |||
| 5127f44ab3 | |||
| 2ff70db0b1 | |||
| e17ac1af83 | |||
| 31e59937dc | |||
| 60f2f31d6c | |||
| 045954f8d3 | |||
| 7c809bf18a | |||
| 6490e5e82a | |||
| 5e51f7490c | |||
| 97fba07f4e | |||
| 624e4330bb |
2
.dockerignore
Normal file
2
.dockerignore
Normal file
@@ -0,0 +1,2 @@
|
||||
target/
|
||||
Dockerfile
|
||||
@@ -1,14 +1,18 @@
|
||||
name: Run Check Script
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: rust-cargo
|
||||
runs-on: docker
|
||||
container:
|
||||
image: hub.nationtech.io/harmony/harmony_composer:latest@sha256:eb0406fcb95c63df9b7c4b19bc50ad7914dd8232ce98e9c9abef628e07c69386
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
|
||||
- name: Run check script
|
||||
run: bash check.sh
|
||||
|
||||
95
.gitea/workflows/harmony_composer.yaml
Normal file
95
.gitea/workflows/harmony_composer.yaml
Normal file
@@ -0,0 +1,95 @@
|
||||
name: Compile and package harmony_composer
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
package_harmony_composer:
|
||||
container:
|
||||
image: hub.nationtech.io/harmony/harmony_composer:latest@sha256:eb0406fcb95c63df9b7c4b19bc50ad7914dd8232ce98e9c9abef628e07c69386
|
||||
runs-on: dind
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build for Linux x86_64
|
||||
run: cargo build --release --bin harmony_composer --target x86_64-unknown-linux-gnu
|
||||
|
||||
- name: Build for Windows x86_64 GNU
|
||||
run: cargo build --release --bin harmony_composer --target x86_64-pc-windows-gnu
|
||||
|
||||
- name: Setup log into hub.nationtech.io
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: hub.nationtech.io
|
||||
username: ${{ secrets.HUB_BOT_USER }}
|
||||
password: ${{ secrets.HUB_BOT_PASSWORD }}
|
||||
|
||||
# TODO: build ARM images and MacOS binaries (or other targets) too
|
||||
|
||||
- name: Update snapshot-latest tag
|
||||
run: |
|
||||
git config user.name "Gitea CI"
|
||||
git config user.email "ci@nationtech.io"
|
||||
git tag -f snapshot-latest
|
||||
git push origin snapshot-latest --force
|
||||
|
||||
- name: Install jq
|
||||
run: apt install -y jq # The current image includes apt lists so we don't have to apt update and rm /var/lib/apt... every time. But if the image is optimized it won't work anymore
|
||||
|
||||
- name: Create or update release
|
||||
run: |
|
||||
# First, check if release exists and delete it if it does
|
||||
RELEASE_ID=$(curl -s -X GET \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/tags/snapshot-latest" \
|
||||
| jq -r '.id // empty')
|
||||
|
||||
if [ -n "$RELEASE_ID" ]; then
|
||||
# Delete existing release
|
||||
curl -X DELETE \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/$RELEASE_ID"
|
||||
fi
|
||||
|
||||
# Create new release
|
||||
RESPONSE=$(curl -X POST \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"tag_name": "snapshot-latest",
|
||||
"name": "Latest Snapshot",
|
||||
"body": "Automated snapshot build from master branch",
|
||||
"draft": false,
|
||||
"prerelease": true
|
||||
}' \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases")
|
||||
|
||||
echo "RELEASE_ID=$(echo $RESPONSE | jq -r '.id')" >> $GITHUB_ENV
|
||||
|
||||
- name: Upload Linux binary
|
||||
run: |
|
||||
curl -X POST \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary "@target/x86_64-unknown-linux-gnu/release/harmony_composer" \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/${{ env.RELEASE_ID }}/assets?name=harmony_composer"
|
||||
|
||||
- name: Upload Windows binary
|
||||
run: |
|
||||
curl -X POST \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary "@target/x86_64-pc-windows-gnu/release/harmony_composer.exe" \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/${{ env.RELEASE_ID }}/assets?name=harmony_composer.exe"
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: hub.nationtech.io/harmony/harmony_composer:latest
|
||||
1132
Cargo.lock
generated
1132
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
58
Cargo.toml
58
Cargo.toml
@@ -11,6 +11,7 @@ members = [
|
||||
"opnsense-config-xml",
|
||||
"harmony_cli",
|
||||
"k3d",
|
||||
"harmony_composer",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -19,28 +20,35 @@ readme = "README.md"
|
||||
license = "GNU AGPL v3"
|
||||
|
||||
[workspace.dependencies]
|
||||
log = "0.4.22"
|
||||
env_logger = "0.11.5"
|
||||
derive-new = "0.7.0"
|
||||
async-trait = "0.1.82"
|
||||
tokio = { version = "1.40.0", features = ["io-std", "fs", "macros", "rt-multi-thread"] }
|
||||
cidr = "0.2.3"
|
||||
russh = "0.45.0"
|
||||
russh-keys = "0.45.0"
|
||||
rand = "0.8.5"
|
||||
url = "2.5.4"
|
||||
kube = "0.98.0"
|
||||
k8s-openapi = { version = "0.24.0", features = ["v1_30"] }
|
||||
serde_yaml = "0.9.34"
|
||||
serde-value = "0.7.0"
|
||||
http = "1.2.0"
|
||||
inquire = "0.7.5"
|
||||
convert_case = "0.8.0"
|
||||
|
||||
[workspace.dependencies.uuid]
|
||||
version = "1.11.0"
|
||||
features = [
|
||||
"v4", # Lets you generate random UUIDs
|
||||
"fast-rng", # Use a faster (but still sufficiently random) RNG
|
||||
"macro-diagnostics", # Enable better diagnostics for compile-time UUIDs
|
||||
]
|
||||
log = "0.4"
|
||||
env_logger = "0.11"
|
||||
derive-new = "0.7"
|
||||
async-trait = "0.1"
|
||||
tokio = { version = "1.40", features = [
|
||||
"io-std",
|
||||
"fs",
|
||||
"macros",
|
||||
"rt-multi-thread",
|
||||
] }
|
||||
cidr = { features = ["serde"], version = "0.2" }
|
||||
russh = "0.45"
|
||||
russh-keys = "0.45"
|
||||
rand = "0.8"
|
||||
url = "2.5"
|
||||
kube = { version = "1.1.0", features = [
|
||||
"config",
|
||||
"client",
|
||||
"runtime",
|
||||
"rustls-tls",
|
||||
"ws",
|
||||
"jsonpatch",
|
||||
] }
|
||||
k8s-openapi = { version = "0.25", features = ["v1_30"] }
|
||||
serde_yaml = "0.9"
|
||||
serde-value = "0.7"
|
||||
http = "1.2"
|
||||
inquire = "0.7"
|
||||
convert_case = "0.8"
|
||||
chrono = "0.4"
|
||||
similar = "2"
|
||||
uuid = { version = "1.11", features = ["v4", "fast-rng", "macro-diagnostics"] }
|
||||
|
||||
25
Dockerfile
Normal file
25
Dockerfile
Normal file
@@ -0,0 +1,25 @@
|
||||
FROM docker.io/rust:1.87.0 AS build
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN cargo build --release --bin harmony_composer
|
||||
|
||||
FROM docker.io/rust:1.87.0
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN rustup target add x86_64-pc-windows-gnu
|
||||
RUN rustup target add x86_64-unknown-linux-gnu
|
||||
RUN rustup component add rustfmt
|
||||
|
||||
RUN apt update
|
||||
|
||||
# TODO: Consider adding more supported targets
|
||||
# nodejs for checkout action, docker for building containers, mingw for cross-compiling for windows
|
||||
RUN apt install -y nodejs docker.io mingw-w64
|
||||
|
||||
COPY --from=build /app/target/release/harmony_composer .
|
||||
|
||||
ENTRYPOINT ["/app/harmony_composer"]
|
||||
252
README.md
252
README.md
@@ -1,171 +1,151 @@
|
||||
# Harmony : Open Infrastructure Orchestration
|
||||
# Harmony : Open-source infrastructure orchestration that treats your platform like first-class code.
|
||||
*By [NationTech](https://nationtech.io)*
|
||||
|
||||
## Quick demo
|
||||
[](https://git.nationtech.io/nationtech/harmony)
|
||||
[](LICENSE)
|
||||
|
||||
`cargo run -p example-tui`
|
||||
### Unify
|
||||
|
||||
This will launch Harmony's minimalist terminal ui which embeds a few demo scores.
|
||||
- **Project Scaffolding**
|
||||
- **Infrastructure Provisioning**
|
||||
- **Application Deployment**
|
||||
- **Day-2 operations**
|
||||
|
||||
Usage instructions will be displayed at the bottom of the TUI.
|
||||
All in **one strongly-typed Rust codebase**.
|
||||
|
||||
`cargo run --bin example-cli -- --help`
|
||||
### Deploy anywhere
|
||||
|
||||
This is the harmony CLI, a minimal implementation
|
||||
From a **developer laptop** to a **global production cluster**, a single **source of truth** drives the **full software lifecycle.**
|
||||
|
||||
The current help text:
|
||||
---
|
||||
|
||||
````
|
||||
Usage: example-cli [OPTIONS]
|
||||
## 1 · The Harmony Philosophy
|
||||
|
||||
Options:
|
||||
-y, --yes Run score(s) or not
|
||||
-f, --filter <FILTER> Filter query
|
||||
-i, --interactive Run interactive TUI or not
|
||||
-a, --all Run all or nth, defaults to all
|
||||
-n, --number <NUMBER> Run nth matching, zero indexed [default: 0]
|
||||
-l, --list list scores, will also be affected by run filter
|
||||
-h, --help Print help
|
||||
-V, --version Print version```
|
||||
Infrastructure is essential, but it shouldn’t be your core business. Harmony is built on three guiding principles that make modern platforms reliable, repeatable, and easy to reason about.
|
||||
|
||||
## Core architecture
|
||||
| Principle | What it means for you |
|
||||
|-----------|-----------------------|
|
||||
| **Infrastructure as Resilient Code** | Replace sprawling YAML and bash scripts with type-safe Rust. Test, refactor, and version your platform just like application code. |
|
||||
| **Prove It Works — Before You Deploy** | Harmony uses the compiler to verify that your application’s needs match the target environment’s capabilities at **compile-time**, eliminating an entire class of runtime outages. |
|
||||
| **One Unified Model** | Software and infrastructure are a single system. Harmony models them together, enabling deep automation—from bare-metal servers to Kubernetes workloads—with zero context switching. |
|
||||
|
||||

|
||||
````
|
||||
## Supporting a new field in OPNSense `config.xml`
|
||||
These principles surface as simple, ergonomic Rust APIs that let teams focus on their product while trusting the platform underneath.
|
||||
|
||||
Two steps:
|
||||
- Supporting the field in `opnsense-config-xml`
|
||||
- Enabling Harmony to control the field
|
||||
---
|
||||
|
||||
We'll use the `filename` field in the `dhcpcd` section of the file as an example.
|
||||
## 2 · Quick Start
|
||||
|
||||
### Supporting the field
|
||||
The snippet below spins up a complete **production-grade LAMP stack** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
|
||||
|
||||
As type checking if enforced, every field from `config.xml` must be known by the code. Each subsection of `config.xml` has its `.rs` file. For the `dhcpcd` section, we'll modify `opnsense-config-xml/src/data/dhcpd.rs`.
|
||||
```rust
|
||||
use harmony::{
|
||||
data::Version,
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::{
|
||||
lamp::{LAMPConfig, LAMPScore},
|
||||
monitoring::monitoring_alerting::MonitoringAlertingStackScore,
|
||||
},
|
||||
topology::{K8sAnywhereTopology, Url},
|
||||
};
|
||||
|
||||
When a new field appears in the xml file, an error like this will be thrown and Harmony will panic :
|
||||
```
|
||||
Running `/home/stremblay/nt/dir/harmony/target/debug/example-nanodc`
|
||||
Found unauthorized element filename
|
||||
thread 'main' panicked at opnsense-config-xml/src/data/opnsense.rs:54:14:
|
||||
OPNSense received invalid string, should be full XML: ()
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
// 1. Describe what you want
|
||||
let lamp_stack = LAMPScore {
|
||||
name: "harmony-lamp-demo".into(),
|
||||
domain: Url::Url(url::Url::parse("https://lampdemo.example.com").unwrap()),
|
||||
php_version: Version::from("8.3.0").unwrap(),
|
||||
config: LAMPConfig {
|
||||
project_root: "./php".into(),
|
||||
database_size: "4Gi".into(),
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
|
||||
// 2. Pick where it should run
|
||||
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
|
||||
Inventory::autoload(), // auto-detect hardware / kube-config
|
||||
K8sAnywhereTopology::from_env(), // local k3d, CI, staging, prod…
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// 3. Enhance with extra scores (monitoring, CI/CD, …)
|
||||
let mut monitoring = MonitoringAlertingStackScore::new();
|
||||
monitoring.namespace = Some(lamp_stack.config.namespace.clone());
|
||||
|
||||
maestro.register_all(vec![Box::new(lamp_stack), Box::new(monitoring)]);
|
||||
|
||||
// 4. Launch an interactive CLI / TUI
|
||||
harmony_cli::init(maestro, None).await.unwrap();
|
||||
}
|
||||
```
|
||||
|
||||
Define the missing field (`filename`) in the `DhcpInterface` struct of `opnsense-config-xml/src/data/dhcpd.rs`:
|
||||
```
|
||||
pub struct DhcpInterface {
|
||||
...
|
||||
pub filename: Option<String>,
|
||||
Run it:
|
||||
|
||||
```bash
|
||||
cargo run
|
||||
```
|
||||
|
||||
Harmony should now be fixed, build and run.
|
||||
Harmony analyses the code, shows an execution plan in a TUI, and applies it once you confirm. Same code, same binary—every environment.
|
||||
|
||||
### Controlling the field
|
||||
---
|
||||
|
||||
Define the `xml field setter` in `opnsense-config/src/modules/dhcpd.rs`.
|
||||
```
|
||||
impl<'a> DhcpConfig<'a> {
|
||||
...
|
||||
pub fn set_filename(&mut self, filename: &str) {
|
||||
self.enable_netboot();
|
||||
self.get_lan_dhcpd().filename = Some(filename.to_string());
|
||||
}
|
||||
...
|
||||
## 3 · Core Concepts
|
||||
|
||||
| Term | One-liner |
|
||||
|------|-----------|
|
||||
| **Score<T>** | Declarative description of the desired state (e.g., `LAMPScore`). |
|
||||
| **Interpret<T>** | Imperative logic that realises a `Score` on a specific environment. |
|
||||
| **Topology** | An environment (local k3d, AWS, bare-metal) exposing verified *Capabilities* (Kubernetes, DNS, …). |
|
||||
| **Maestro** | Orchestrator that compiles Scores + Topology, ensuring all capabilities line up **at compile-time**. |
|
||||
| **Inventory** | Optional catalogue of physical assets for bare-metal and edge deployments. |
|
||||
|
||||
A visual overview is in the diagram below.
|
||||
|
||||
[Harmony Core Architecture](docs/diagrams/Harmony_Core_Architecture.drawio.svg)
|
||||
|
||||
---
|
||||
|
||||
## 4 · Install
|
||||
|
||||
Prerequisites:
|
||||
|
||||
* Rust
|
||||
* Docker (if you deploy locally)
|
||||
* `kubectl` / `helm` for Kubernetes-based topologies
|
||||
|
||||
```bash
|
||||
git clone https://git.nationtech.io/nationtech/harmony
|
||||
cd harmony
|
||||
cargo build --release # builds the CLI, TUI and libraries
|
||||
```
|
||||
|
||||
Define the `value setter` in the `DhcpServer trait` in `domain/topology/network.rs`
|
||||
```
|
||||
#[async_trait]
|
||||
pub trait DhcpServer: Send + Sync {
|
||||
...
|
||||
async fn set_filename(&self, filename: &str) -> Result<(), ExecutorError>;
|
||||
...
|
||||
```
|
||||
---
|
||||
|
||||
Implement the `value setter` in each `DhcpServer` implementation.
|
||||
`infra/opnsense/dhcp.rs`:
|
||||
```
|
||||
#[async_trait]
|
||||
impl DhcpServer for OPNSenseFirewall {
|
||||
...
|
||||
async fn set_filename(&self, filename: &str) -> Result<(), ExecutorError> {
|
||||
{
|
||||
let mut writable_opnsense = self.opnsense_config.write().await;
|
||||
writable_opnsense.dhcp().set_filename(filename);
|
||||
debug!("OPNsense dhcp server set filename {filename}");
|
||||
}
|
||||
## 5 · Learning More
|
||||
|
||||
Ok(())
|
||||
}
|
||||
...
|
||||
```
|
||||
* **Architectural Decision Records** – dive into the rationale
|
||||
- [ADR-001 · Why Rust](adr/001-rust.md)
|
||||
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
|
||||
- [ADR-006 · Secret Management](adr/006-secret-management.md)
|
||||
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
|
||||
|
||||
`domain/topology/ha_cluster.rs`
|
||||
```
|
||||
#[async_trait]
|
||||
impl DhcpServer for DummyInfra {
|
||||
...
|
||||
async fn set_filename(&self, _filename: &str) -> Result<(), ExecutorError> {
|
||||
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
|
||||
}
|
||||
...
|
||||
```
|
||||
* **Extending Harmony** – write new Scores / Interprets, add hardware like OPNsense firewalls, or embed Harmony in your own tooling (`/docs`).
|
||||
|
||||
Add the new field to the DhcpScore in `modules/dhcp.rs`
|
||||
```
|
||||
pub struct DhcpScore {
|
||||
...
|
||||
pub filename: Option<String>,
|
||||
```
|
||||
* **Community** – discussions and roadmap live in [GitLab issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
|
||||
|
||||
Define it in its implementation in `modules/okd/dhcp.rs`
|
||||
```
|
||||
impl OKDDhcpScore {
|
||||
...
|
||||
Self {
|
||||
dhcp_score: DhcpScore {
|
||||
...
|
||||
filename: Some("undionly.kpxe".to_string()),
|
||||
```
|
||||
---
|
||||
|
||||
Define it in its implementation in `modules/okd/bootstrap_dhcp.rs`
|
||||
```
|
||||
impl OKDDhcpScore {
|
||||
...
|
||||
Self {
|
||||
dhcp_score: DhcpScore::new(
|
||||
...
|
||||
Some("undionly.kpxe".to_string()),
|
||||
```
|
||||
## 6 · License
|
||||
|
||||
Update the interpret (function called by the `execute` fn of the interpret) so it now updates the `filename` field value in `modules/dhcp.rs`
|
||||
```
|
||||
impl DhcpInterpret {
|
||||
...
|
||||
let filename_outcome = match &self.score.filename {
|
||||
Some(filename) => {
|
||||
let dhcp_server = Arc::new(topology.dhcp_server.clone());
|
||||
dhcp_server.set_filename(&filename).await?;
|
||||
Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!("Dhcp Interpret Set filename to {filename}"),
|
||||
)
|
||||
}
|
||||
None => Outcome::noop(),
|
||||
};
|
||||
Harmony is released under the **GNU AGPL v3**.
|
||||
|
||||
if next_server_outcome.status == InterpretStatus::NOOP
|
||||
&& boot_filename_outcome.status == InterpretStatus::NOOP
|
||||
&& filename_outcome.status == InterpretStatus::NOOP
|
||||
> We choose a strong copyleft license to ensure the project—and every improvement to it—remains open and benefits the entire community. Fork it, enhance it, even out-innovate us; just keep it open.
|
||||
|
||||
...
|
||||
See [LICENSE](LICENSE) for the full text.
|
||||
|
||||
Ok(Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!(
|
||||
"Dhcp Interpret Set next boot to [{:?}], boot_filename to [{:?}], filename to [{:?}]",
|
||||
self.score.boot_filename, self.score.boot_filename, self.score.filename
|
||||
)
|
||||
...
|
||||
```
|
||||
---
|
||||
|
||||
*Made with ❤️ & 🦀 by the NationTech and the Harmony community*
|
||||
|
||||
73
adr/010-monitoring-alerting/architecture.rs
Normal file
73
adr/010-monitoring-alerting/architecture.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
pub trait MonitoringSystem {}
|
||||
|
||||
// 1. Modified AlertReceiver trait:
|
||||
// - Removed the problematic `clone` method.
|
||||
// - Added `box_clone` which returns a Box<dyn AlertReceiver>.
|
||||
pub trait AlertReceiver {
|
||||
type M: MonitoringSystem;
|
||||
fn install(&self, sender: &Self::M) -> Result<(), String>;
|
||||
// This method allows concrete types to clone themselves into a Box<dyn AlertReceiver>
|
||||
fn box_clone(&self) -> Box<dyn AlertReceiver<M = Self::M>>;
|
||||
}
|
||||
#[derive(Clone)]
|
||||
struct Prometheus{}
|
||||
impl MonitoringSystem for Prometheus {}
|
||||
|
||||
#[derive(Clone)] // Keep derive(Clone) for DiscordWebhook itself
|
||||
struct DiscordWebhook{}
|
||||
|
||||
impl AlertReceiver for DiscordWebhook {
|
||||
type M = Prometheus;
|
||||
fn install(&self, sender: &Self::M) -> Result<(), String> {
|
||||
// Placeholder for actual installation logic
|
||||
println!("DiscordWebhook installed for Prometheus monitoring.");
|
||||
Ok(())
|
||||
}
|
||||
// 2. Implement `box_clone` for DiscordWebhook:
|
||||
// This uses the derived `Clone` for DiscordWebhook to create a new boxed instance.
|
||||
fn box_clone(&self) -> Box<dyn AlertReceiver<M = Self::M>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Implement `std::clone::Clone` for `Box<dyn AlertReceiver<M= M>>`:
|
||||
// This allows `Box<dyn AlertReceiver>` to be cloned.
|
||||
// The `+ 'static` lifetime bound is often necessary for trait objects stored in collections,
|
||||
// ensuring they live long enough.
|
||||
impl<M: MonitoringSystem + 'static> Clone for Box<dyn AlertReceiver<M= M>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.box_clone() // Call the custom `box_clone` method
|
||||
}
|
||||
}
|
||||
|
||||
// MonitoringConfig can now derive Clone because its `receivers` field
|
||||
// (Vec<Box<dyn AlertReceiver<M = M>>>) is now cloneable.
|
||||
#[derive(Clone)]
|
||||
struct MonitoringConfig <M: MonitoringSystem + 'static>{
|
||||
receivers: Vec<Box<dyn AlertReceiver<M = M>>>
|
||||
}
|
||||
|
||||
// Example usage to demonstrate compilation and functionality
|
||||
fn main() {
|
||||
let prometheus_instance = Prometheus{};
|
||||
let discord_webhook_instance = DiscordWebhook{};
|
||||
|
||||
let mut config = MonitoringConfig {
|
||||
receivers: Vec::new()
|
||||
};
|
||||
|
||||
// Create a boxed alert receiver
|
||||
let boxed_receiver: Box<dyn AlertReceiver<M = Prometheus>> = Box::new(discord_webhook_instance);
|
||||
config.receivers.push(boxed_receiver);
|
||||
|
||||
// Clone the config, which will now correctly clone the boxed receiver
|
||||
let cloned_config = config.clone();
|
||||
|
||||
println!("Original config has {} receivers.", config.receivers.len());
|
||||
println!("Cloned config has {} receivers.", cloned_config.receivers.len());
|
||||
|
||||
// Example of using the installed receiver
|
||||
if let Some(receiver) = config.receivers.get(0) {
|
||||
let _ = receiver.install(&prometheus_instance);
|
||||
}
|
||||
}
|
||||
@@ -137,8 +137,9 @@ Our approach addresses both customer and team multi-tenancy requirements:
|
||||
### Implementation Roadmap
|
||||
1. **Phase 1**: Implement VPN access and manual tenant provisioning
|
||||
2. **Phase 2**: Deploy TenantScore automation for namespace, RBAC, and NetworkPolicy management
|
||||
3. **Phase 3**: Integrate Keycloak for centralized identity management
|
||||
4. **Phase 4**: Add advanced monitoring and per-tenant observability
|
||||
4. **Phase 3**: Work on privilege escalation from pods, audit for weaknesses, enforce security policies on pod runtimes
|
||||
3. **Phase 4**: Integrate Keycloak for centralized identity management
|
||||
4. **Phase 5**: Add advanced monitoring and per-tenant observability
|
||||
|
||||
### TenantScore Structure Preview
|
||||
```rust
|
||||
|
||||
41
adr/011-tenant/NetworkPolicy.yaml
Normal file
41
adr/011-tenant/NetworkPolicy.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: tenant-isolation-policy
|
||||
namespace: testtenant
|
||||
spec:
|
||||
podSelector: {} # Selects all pods in the namespace
|
||||
policyTypes:
|
||||
- Ingress
|
||||
- Egress
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector: {} # Allow from all pods in the same namespace
|
||||
egress:
|
||||
- to:
|
||||
- podSelector: {} # Allow to all pods in the same namespace
|
||||
- to:
|
||||
- podSelector: {}
|
||||
namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: openshift-dns # Target the openshift-dns namespace
|
||||
# Note, only opening port 53 is not enough, will have to dig deeper into this one eventually
|
||||
# ports:
|
||||
# - protocol: UDP
|
||||
# port: 53
|
||||
# - protocol: TCP
|
||||
# port: 53
|
||||
# Allow egress to public internet only
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 0.0.0.0/0
|
||||
except:
|
||||
- 10.0.0.0/8 # RFC1918
|
||||
- 172.16.0.0/12 # RFC1918
|
||||
- 192.168.0.0/16 # RFC1918
|
||||
- 169.254.0.0/16 # Link-local
|
||||
- 127.0.0.0/8 # Loopback
|
||||
- 224.0.0.0/4 # Multicast
|
||||
- 240.0.0.0/4 # Reserved
|
||||
- 100.64.0.0/10 # Carrier-grade NAT
|
||||
- 0.0.0.0/8 # Reserved
|
||||
95
adr/011-tenant/TestDeployment.yaml
Normal file
95
adr/011-tenant/TestDeployment.yaml
Normal file
@@ -0,0 +1,95 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: testtenant
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: testtenant2
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: test-web
|
||||
namespace: testtenant
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: test-web
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: test-web
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx
|
||||
image: nginxinc/nginx-unprivileged
|
||||
ports:
|
||||
- containerPort: 80
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: test-web
|
||||
namespace: testtenant
|
||||
spec:
|
||||
selector:
|
||||
app: test-web
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 8080
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: test-client
|
||||
namespace: testtenant
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: test-client
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: test-client
|
||||
spec:
|
||||
containers:
|
||||
- name: curl
|
||||
image: curlimages/curl:latest
|
||||
command: ["/bin/sh", "-c", "sleep 3600"]
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: test-web
|
||||
namespace: testtenant2
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: test-web
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: test-web
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx
|
||||
image: nginxinc/nginx-unprivileged
|
||||
ports:
|
||||
- containerPort: 80
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: test-web
|
||||
namespace: testtenant2
|
||||
spec:
|
||||
selector:
|
||||
app: test-web
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 8080
|
||||
63
adr/012-project-delivery-automation.md
Normal file
63
adr/012-project-delivery-automation.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# Architecture Decision Record: \<Title\>
|
||||
|
||||
Initial Author: Jean-Gabriel Gill-Couture
|
||||
|
||||
Initial Date: 2025-06-04
|
||||
|
||||
Last Updated Date: 2025-06-04
|
||||
|
||||
## Status
|
||||
|
||||
Proposed
|
||||
|
||||
## Context
|
||||
|
||||
As Harmony's goal is to make software delivery easier, we must provide an easy way for developers to express their app's semantics and dependencies with great abstractions, in a similar fashion to what the score.dev project is doing.
|
||||
|
||||
Thus, we started working on ways to package common types of applications such as LAMP, which we started working on with `LAMPScore`.
|
||||
|
||||
Now is time for the next step : we want to pave the way towards complete lifecycle automation. To do this, we will start with a way to execute Harmony's modules easily from anywhere, starting with locally and in CI environments.
|
||||
|
||||
## Decision
|
||||
|
||||
To achieve easy, portable execution of Harmony, we will follow this architecture :
|
||||
|
||||
- Host a basic harmony release that is compiled with the CLI by our gitea/github server
|
||||
- This binary will do the following : check if there is a `harmony` folder in the current path
|
||||
- If yes
|
||||
- Check if cargo is available locally and compile the harmony binary, or compile the harmony binary using a rust docker container, if neither cargo or a container runtime is available, output a message explaining the situation
|
||||
- Run the newly compiled binary. (Ideally using pid handoff like exec does but some research around this should be done. I think handing off the process is to help with OS interaction such as terminal apps, signals, exit codes, process handling, etc but there might be some side effects)
|
||||
- If not
|
||||
- Suggest initializing a project by auto detecting what the project looks like
|
||||
- When the project type cannot be auto detected, provide links to Harmony's documentation on how to set up a project, a link to the examples folder, and a ask the user if he wants to initialize an empty Harmony project in the current folder
|
||||
- harmony/Cargo.toml with dependencies set
|
||||
- harmony/src/main.rs with an example LAMPScore setup and ready to run
|
||||
- This same binary can be used in a CI environment to run the target project's Harmony module. By default, we provide these opinionated steps :
|
||||
1. **An empty check step.** The purpose of this step is to run all tests and checks against the codebase. For complex projects this could involve a very complex pipeline of test environments setup and execution but this is out of scope for now. This is not handled by harmony. For projects with automatic setup, we can fill this step with something like `cargo fmt --check; cargo test; cargo build` but Harmony is not directly involved in the execution of this step.
|
||||
2. **Package and publish.** Once all checks have passed, the production ready container is built and pushed to a registry. This is done by Harmony.
|
||||
3. **Deploy to staging automatically.**
|
||||
4. **Run a sanity check on staging.** As Harmony is responsible for deploying, Harmony should have all the knowledge of how to perform a sanity check on the staging environment. This will, most of the time, be a simple verification of the kubernetes health of all deployed components, and a poke on the public endpoint when there is one.
|
||||
5. **Deploy to production automatically.** Many projects will require manual approval here, this can be easily set up in the CI afterwards, but our opinion is that
|
||||
6. **Run a sanity check on production.** Same check as staging, but on production.
|
||||
|
||||
*Note on providing a base pipeline :* Having a complete pipeline set up automatically will encourage development teams to build upon these by adding tests where they belong. The goal here is to provide an opiniated solution that works for most small and large projects. Of course, many orgnizations will need to add steps such as deploying to sandbox environments, requiring more advanced approvals, more complex publication and coordination with other projects. But this here encompasses the basics required to build and deploy software reliably at any scale.
|
||||
|
||||
### Environment setup
|
||||
|
||||
TBD : For now, environments (tenants) will be set up and configured manually. Harmony will rely on the kubeconfig provided in the environment where it is running to deploy in the namespace.
|
||||
|
||||
For the CD tool such as Argo or Flux they will be activated by default by Harmony when using application level Scores such as LAMPScore in a similar way that the container is automatically built. Then, CI deployment steps will be notifying the CD tool using its API of the new release to deploy.
|
||||
|
||||
## Rationale
|
||||
|
||||
Reasoning behind the decision
|
||||
|
||||
## Consequences
|
||||
|
||||
Pros/Cons of chosen solution
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
Pros/Cons of various proposed solutions considered
|
||||
|
||||
## Additional Notes
|
||||
78
adr/013-monitoring-notifications.md
Normal file
78
adr/013-monitoring-notifications.md
Normal file
@@ -0,0 +1,78 @@
|
||||
# Architecture Decision Record: Monitoring Notifications
|
||||
|
||||
Initial Author: Taha Hawa
|
||||
|
||||
Initial Date: 2025-06-26
|
||||
|
||||
Last Updated Date: 2025-06-26
|
||||
|
||||
## Status
|
||||
|
||||
Proposed
|
||||
|
||||
## Context
|
||||
|
||||
We need to send notifications (typically from AlertManager/Prometheus) and we need to receive said notifications on mobile devices for sure in some way, whether it's push messages, SMS, phone call, email, etc or all of the above.
|
||||
|
||||
## Decision
|
||||
|
||||
We should go with https://ntfy.sh except host it ourselves.
|
||||
|
||||
`ntfy` is an open source solution written in Go that has the features we need.
|
||||
|
||||
## Rationale
|
||||
|
||||
`ntfy` has pretty much everything we need (push notifications, email forwarding, receives via webhook), and nothing/not much we don't. Good fit, lightweight.
|
||||
|
||||
## Consequences
|
||||
|
||||
Pros:
|
||||
|
||||
- topics, with ACLs
|
||||
- lightweight
|
||||
- reliable
|
||||
- easy to configure
|
||||
- mobile app
|
||||
- the mobile app can listen via websocket, poll, or receive via Firebase/GCM on Android, or similar on iOS.
|
||||
- Forward to email
|
||||
- Text-to-Speech phone call messages using Twilio integration
|
||||
- Operates based on simple HTTP requests/Webhooks, easily usable via AlertManager
|
||||
|
||||
Cons:
|
||||
|
||||
- No SMS pushes
|
||||
- SQLite DB, makes it harder to HA/scale
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
[AWS SNS](https://aws.amazon.com/sns/):
|
||||
Pros:
|
||||
|
||||
- highly reliable
|
||||
- no hosting needed
|
||||
|
||||
Cons:
|
||||
|
||||
- no control, not self hosted
|
||||
- costs (per usage)
|
||||
|
||||
[Apprise](https://github.com/caronc/apprise):
|
||||
Pros:
|
||||
|
||||
- Way more ways of sending notifications
|
||||
- Can use ntfy as one of the backends/ways of sending
|
||||
|
||||
Cons:
|
||||
|
||||
- Way too overkill for what we need in terms of features
|
||||
|
||||
[Gotify](https://github.com/gotify/server):
|
||||
Pros:
|
||||
|
||||
- simple, lightweight, golang, etc
|
||||
|
||||
Cons:
|
||||
|
||||
- Pushes topics are per-user
|
||||
|
||||
## Additional Notes
|
||||
1
docs/README.md
Normal file
1
docs/README.md
Normal file
@@ -0,0 +1 @@
|
||||
Not much here yet, see the `adr` folder for now. More to come in time!
|
||||
13
docs/cyborg-metaphor.md
Normal file
13
docs/cyborg-metaphor.md
Normal file
@@ -0,0 +1,13 @@
|
||||
## Conceptual metaphor : The Cyborg and the Central Nervous System
|
||||
|
||||
At the heart of Harmony lies a core belief: in modern, decentralized systems, **software and infrastructure are not separate entities.** They are a single, symbiotic organism—a cyborg.
|
||||
|
||||
The software is the electronics, the "mind"; the infrastructure is the biological host, the "body". They live or die, thrive or sink together.
|
||||
|
||||
Traditional approaches attempt to manage this complex organism with fragmented tools: static YAML for configuration, brittle scripts for automation, and separate Infrastructure as Code (IaC) for provisioning. This creates a disjointed system that struggles to scale or heal itself, making it inadequate for the demands of fully automated, enterprise-grade clusters.
|
||||
|
||||
Harmony's goal is to provide the **central nervous system for this cyborg**. We aim to achieve the full automation of complex, decentralized clouds by managing this integrated entity holistically.
|
||||
|
||||
To achieve this, a tool must be both robust and powerful. It must manage the entire lifecycle—deployment, upgrades, failure recovery, and decommissioning—with precision. This requires full control over application packaging and a deep, intrinsic integration between the software and the infrastructure it inhabits.
|
||||
|
||||
This is why Harmony uses a powerful, living language like Rust. It replaces static, lifeless configuration files with a dynamic, breathing codebase. It allows us to express the complex relationships and behaviors of a modern distributed system, enabling the creation of truly automated, resilient, and powerful platforms that can thrive.
|
||||
@@ -14,8 +14,8 @@ harmony_macros = { path = "../../harmony_macros" }
|
||||
log = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
url = { workspace = true }
|
||||
kube = "0.98.0"
|
||||
k8s-openapi = { version = "0.24.0", features = [ "v1_30" ] }
|
||||
kube = "1.1.0"
|
||||
k8s-openapi = { version = "0.25.0", features = ["v1_30"] }
|
||||
http = "1.2.0"
|
||||
serde_yaml = "0.9.34"
|
||||
inquire.workspace = true
|
||||
|
||||
@@ -2,10 +2,7 @@ use harmony::{
|
||||
data::Version,
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::{
|
||||
lamp::{LAMPConfig, LAMPScore},
|
||||
monitoring::monitoring_alerting::{AlertChannel, MonitoringAlertingStackScore},
|
||||
},
|
||||
modules::lamp::{LAMPConfig, LAMPScore},
|
||||
topology::{K8sAnywhereTopology, Url},
|
||||
};
|
||||
|
||||
@@ -32,24 +29,28 @@ async fn main() {
|
||||
},
|
||||
};
|
||||
|
||||
//let monitoring = MonitoringAlertingScore {
|
||||
// alert_receivers: vec![Box::new(DiscordWebhook {
|
||||
// url: Url::Url(url::Url::parse("https://discord.idonotexist.com").unwrap()),
|
||||
// // TODO write url macro
|
||||
// // url: url!("https://discord.idonotexist.com"),
|
||||
// })],
|
||||
// alert_rules: vec![],
|
||||
// scrape_targets: vec![],
|
||||
//};
|
||||
|
||||
// You can choose the type of Topology you want, we suggest starting with the
|
||||
// K8sAnywhereTopology as it is the most automatic one that enables you to easily deploy
|
||||
// locally, to development environment from a CI, to staging, and to production with settings
|
||||
// that automatically adapt to each environment grade.
|
||||
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::new(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let url = url::Url::parse("https://discord.com/api/webhooks/dummy_channel/dummy_token")
|
||||
.expect("invalid URL");
|
||||
|
||||
let mut monitoring_stack_score = MonitoringAlertingStackScore::new();
|
||||
monitoring_stack_score.namespace = Some(lamp_stack.config.namespace.clone());
|
||||
|
||||
maestro.register_all(vec![Box::new(lamp_stack), Box::new(monitoring_stack_score)]);
|
||||
maestro.register_all(vec![Box::new(lamp_stack)]);
|
||||
// Here we bootstrap the CLI, this gives some nice features if you need them
|
||||
harmony_cli::init(maestro, None).await.unwrap();
|
||||
}
|
||||
|
||||
13
examples/monitoring/Cargo.toml
Normal file
13
examples/monitoring/Cargo.toml
Normal file
@@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "example-monitoring"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
harmony = { version = "0.1.0", path = "../../harmony" }
|
||||
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
|
||||
harmony_macros = { version = "0.1.0", path = "../../harmony_macros" }
|
||||
tokio.workspace = true
|
||||
url.workspace = true
|
||||
86
examples/monitoring/src/main.rs
Normal file
86
examples/monitoring/src/main.rs
Normal file
@@ -0,0 +1,86 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::{
|
||||
monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
kube_prometheus::{
|
||||
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
|
||||
types::{
|
||||
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
|
||||
ServiceMonitorEndpoint,
|
||||
},
|
||||
},
|
||||
},
|
||||
prometheus::alerts::{
|
||||
infra::dell_server::{
|
||||
alert_global_storage_status_critical, alert_global_storage_status_non_recoverable,
|
||||
global_storage_status_degraded_non_critical,
|
||||
},
|
||||
k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
},
|
||||
},
|
||||
topology::{K8sAnywhereTopology, Url},
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
|
||||
let dell_system_storage_degraded = global_storage_status_degraded_non_critical();
|
||||
let alert_global_storage_status_critical = alert_global_storage_status_critical();
|
||||
let alert_global_storage_status_non_recoverable = alert_global_storage_status_non_recoverable();
|
||||
|
||||
let additional_rules =
|
||||
AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]);
|
||||
let additional_rules2 = AlertManagerRuleGroup::new(
|
||||
"dell-server-alerts",
|
||||
vec![
|
||||
dell_system_storage_degraded,
|
||||
alert_global_storage_status_critical,
|
||||
alert_global_storage_status_non_recoverable,
|
||||
],
|
||||
);
|
||||
|
||||
let service_monitor_endpoint = ServiceMonitorEndpoint {
|
||||
port: Some("80".to_string()),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let service_monitor = ServiceMonitor {
|
||||
name: "test-service-monitor".to_string(),
|
||||
selector: Selector {
|
||||
match_labels: HashMap::new(),
|
||||
match_expressions: vec![MatchExpression {
|
||||
key: "test".to_string(),
|
||||
operator: Operator::In,
|
||||
values: vec!["test-service".to_string()],
|
||||
}],
|
||||
},
|
||||
endpoints: vec![service_monitor_endpoint],
|
||||
..Default::default()
|
||||
};
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
receivers: vec![Box::new(discord_receiver)],
|
||||
rules: vec![Box::new(additional_rules), Box::new(additional_rules2)],
|
||||
service_monitors: vec![service_monitor],
|
||||
};
|
||||
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
maestro.register_all(vec![Box::new(alerting_score)]);
|
||||
harmony_cli::init(maestro, None).await.unwrap();
|
||||
}
|
||||
13
examples/monitoring_with_tenant/Cargo.toml
Normal file
13
examples/monitoring_with_tenant/Cargo.toml
Normal file
@@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "example-monitoring-with-tenant"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
cidr.workspace = true
|
||||
harmony = { version = "0.1.0", path = "../../harmony" }
|
||||
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
|
||||
tokio.workspace = true
|
||||
url.workspace = true
|
||||
90
examples/monitoring_with_tenant/src/main.rs
Normal file
90
examples/monitoring_with_tenant/src/main.rs
Normal file
@@ -0,0 +1,90 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use harmony::{
|
||||
data::Id,
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::{
|
||||
monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
kube_prometheus::{
|
||||
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
|
||||
types::{
|
||||
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
|
||||
ServiceMonitorEndpoint,
|
||||
},
|
||||
},
|
||||
},
|
||||
prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
tenant::TenantScore,
|
||||
},
|
||||
topology::{
|
||||
K8sAnywhereTopology, Url,
|
||||
tenant::{ResourceLimits, TenantConfig, TenantNetworkPolicy},
|
||||
},
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let tenant = TenantScore {
|
||||
config: TenantConfig {
|
||||
id: Id::from_string("1234".to_string()),
|
||||
name: "test-tenant".to_string(),
|
||||
resource_limits: ResourceLimits {
|
||||
cpu_request_cores: 6.0,
|
||||
cpu_limit_cores: 4.0,
|
||||
memory_request_gb: 4.0,
|
||||
memory_limit_gb: 4.0,
|
||||
storage_total_gb: 10.0,
|
||||
},
|
||||
network_policy: TenantNetworkPolicy::default(),
|
||||
},
|
||||
};
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
|
||||
|
||||
let additional_rules =
|
||||
AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]);
|
||||
|
||||
let service_monitor_endpoint = ServiceMonitorEndpoint {
|
||||
port: Some("80".to_string()),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let service_monitor = ServiceMonitor {
|
||||
name: "test-service-monitor".to_string(),
|
||||
selector: Selector {
|
||||
match_labels: HashMap::new(),
|
||||
match_expressions: vec![MatchExpression {
|
||||
key: "test".to_string(),
|
||||
operator: Operator::In,
|
||||
values: vec!["test-service".to_string()],
|
||||
}],
|
||||
},
|
||||
endpoints: vec![service_monitor_endpoint],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
receivers: vec![Box::new(discord_receiver)],
|
||||
rules: vec![Box::new(additional_rules)],
|
||||
service_monitors: vec![service_monitor],
|
||||
};
|
||||
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
maestro.register_all(vec![Box::new(tenant), Box::new(alerting_score)]);
|
||||
harmony_cli::init(maestro, None).await.unwrap();
|
||||
}
|
||||
12
examples/ntfy/Cargo.toml
Normal file
12
examples/ntfy/Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "example-ntfy"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
harmony = { version = "0.1.0", path = "../../harmony" }
|
||||
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
|
||||
tokio.workspace = true
|
||||
url.workspace = true
|
||||
19
examples/ntfy/src/main.rs
Normal file
19
examples/ntfy/src/main.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
use harmony::{
|
||||
inventory::Inventory, maestro::Maestro, modules::monitoring::ntfy::ntfy::NtfyScore,
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
maestro.register_all(vec![Box::new(NtfyScore {
|
||||
namespace: "monitoring".to_string(),
|
||||
})]);
|
||||
harmony_cli::init(maestro, None).await.unwrap();
|
||||
}
|
||||
10
examples/postgres/Cargo.toml
Normal file
10
examples/postgres/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "example-postgres"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
async-trait = "0.1.80"
|
||||
84
examples/postgres/src/main.rs
Normal file
84
examples/postgres/src/main.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
use async_trait::async_trait;
|
||||
use harmony::{
|
||||
data::{PostgresDatabase, PostgresUser},
|
||||
interpret::InterpretError,
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::postgres::PostgresScore,
|
||||
topology::{PostgresServer, Topology},
|
||||
};
|
||||
use std::error::Error;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct MockTopology;
|
||||
|
||||
#[async_trait]
|
||||
impl Topology for MockTopology {
|
||||
fn name(&self) -> &str {
|
||||
"MockTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<harmony::interpret::Outcome, InterpretError> {
|
||||
Ok(harmony::interpret::Outcome::new(
|
||||
harmony::interpret::InterpretStatus::SUCCESS,
|
||||
"Mock topology is always ready".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PostgresServer for MockTopology {
|
||||
async fn ensure_users_exist(&self, users: Vec<PostgresUser>) -> Result<(), InterpretError> {
|
||||
println!("Ensuring users exist:");
|
||||
for user in users {
|
||||
println!(" - {}: {}", user.name, user.password);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ensure_databases_exist(
|
||||
&self,
|
||||
databases: Vec<PostgresDatabase>,
|
||||
) -> Result<(), InterpretError> {
|
||||
println!("Ensuring databases exist:");
|
||||
for db in databases {
|
||||
println!(" - {}: owner={}", db.name, db.owner);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
let users = vec![
|
||||
PostgresUser {
|
||||
name: "admin".to_string(),
|
||||
password: "password".to_string(),
|
||||
},
|
||||
PostgresUser {
|
||||
name: "user".to_string(),
|
||||
password: "password".to_string(),
|
||||
},
|
||||
];
|
||||
|
||||
let databases = vec![
|
||||
PostgresDatabase {
|
||||
name: "app_db".to_string(),
|
||||
owner: "admin".to_string(),
|
||||
},
|
||||
PostgresDatabase {
|
||||
name: "user_db".to_string(),
|
||||
owner: "user".to_string(),
|
||||
},
|
||||
];
|
||||
|
||||
let postgres_score = PostgresScore::new(users, databases);
|
||||
|
||||
let inventory = Inventory::empty();
|
||||
let topology = MockTopology;
|
||||
let maestro = Maestro::new(inventory, topology);
|
||||
|
||||
maestro.interpret(Box::new(postgres_score)).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
14
examples/rust/Cargo.toml
Normal file
14
examples/rust/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "example-rust"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
tokio = { workspace = true }
|
||||
log = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
url = { workspace = true }
|
||||
20
examples/rust/src/main.rs
Normal file
20
examples/rust/src/main.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::application::{RustWebappScore, features::ContinuousDelivery},
|
||||
topology::{K8sAnywhereTopology, Url},
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let app = RustWebappScore {
|
||||
name: "Example Rust Webapp".to_string(),
|
||||
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
|
||||
features: vec![Box::new(ContinuousDelivery {})],
|
||||
};
|
||||
|
||||
let topology = K8sAnywhereTopology::from_env();
|
||||
let mut maestro = Maestro::new(Inventory::autoload(), topology);
|
||||
maestro.register_all(vec![Box::new(app)]);
|
||||
harmony_cli::init(maestro, None).await.unwrap();
|
||||
}
|
||||
18
examples/tenant/Cargo.toml
Normal file
18
examples/tenant/Cargo.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "example-tenant"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
cidr = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
url = { workspace = true }
|
||||
41
examples/tenant/src/main.rs
Normal file
41
examples/tenant/src/main.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use harmony::{
|
||||
data::Id,
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::tenant::TenantScore,
|
||||
topology::{K8sAnywhereTopology, tenant::TenantConfig},
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let tenant = TenantScore {
|
||||
config: TenantConfig {
|
||||
id: Id::from_str("test-tenant-id"),
|
||||
name: "testtenant".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
|
||||
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
maestro.register_all(vec![Box::new(tenant)]);
|
||||
harmony_cli::init(maestro, None).await.unwrap();
|
||||
}
|
||||
|
||||
// TODO write tests
|
||||
// - Create Tenant with default config mostly, make sure namespace is created
|
||||
// - deploy sample client/server app with nginx unprivileged and a service
|
||||
// - exec in the client pod and validate the following
|
||||
// - can reach internet
|
||||
// - can reach server pod
|
||||
// - can resolve dns queries to internet
|
||||
// - can resolve dns queries to services
|
||||
// - cannot reach services and pods in other namespaces
|
||||
// - Create Tenant with specific cpu/ram/storage requests / limits and make sure they are enforced by trying to
|
||||
// deploy a pod with lower requests/limits (accepted) and higher requests/limits (rejected)
|
||||
// - Create TenantCredentials and make sure they give only access to the correct tenant
|
||||
@@ -10,9 +10,9 @@ publish = false
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_tui = { path = "../../harmony_tui" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
cidr = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
url = { workspace = true }
|
||||
|
||||
@@ -6,6 +6,8 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
rand = "0.9"
|
||||
hex = "0.4"
|
||||
libredfish = "0.1.1"
|
||||
reqwest = { version = "0.11", features = ["blocking", "json"] }
|
||||
russh = "0.45.0"
|
||||
@@ -40,6 +42,7 @@ dockerfile_builder = "0.1.5"
|
||||
temp-file = "0.1.9"
|
||||
convert_case.workspace = true
|
||||
email_address = "0.2.9"
|
||||
chrono.workspace = true
|
||||
fqdn = { version = "0.4.6", features = [
|
||||
"domain-label-cannot-start-or-end-with-hyphen",
|
||||
"domain-label-length-limited-to-63",
|
||||
@@ -50,3 +53,7 @@ fqdn = { version = "0.4.6", features = [
|
||||
] }
|
||||
temp-dir = "0.1.14"
|
||||
dyn-clone = "1.0.19"
|
||||
similar.workspace = true
|
||||
futures-util = "0.3.31"
|
||||
tokio-util = "0.7.15"
|
||||
strum = { version = "0.27.1", features = ["derive"] }
|
||||
|
||||
@@ -10,4 +10,6 @@ lazy_static! {
|
||||
std::env::var("HARMONY_REGISTRY_URL").unwrap_or_else(|_| "hub.nationtech.io".to_string());
|
||||
pub static ref REGISTRY_PROJECT: String =
|
||||
std::env::var("HARMONY_REGISTRY_PROJECT").unwrap_or_else(|_| "harmony".to_string());
|
||||
pub static ref DRY_RUN: bool =
|
||||
std::env::var("HARMONY_DRY_RUN").map_or(true, |value| value.parse().unwrap_or(true));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
use rand::distr::Alphanumeric;
|
||||
use rand::distr::SampleString;
|
||||
use std::time::SystemTime;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A unique identifier designed for ease of use.
|
||||
///
|
||||
/// You can pass it any String to use and Id, or you can use the default format with `Id::default()`
|
||||
///
|
||||
/// The default format looks like this
|
||||
///
|
||||
/// `462d4c_g2COgai`
|
||||
///
|
||||
/// The first part is the unix timesamp in hexadecimal which makes Id easily sorted by creation time.
|
||||
/// Second part is a serie of 7 random characters.
|
||||
///
|
||||
/// **It is not meant to be very secure or unique**, it is suitable to generate up to 10 000 items per
|
||||
/// second with a reasonable collision rate of 0,000014 % as calculated by this calculator : https://kevingal.com/apps/collision.html
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Id {
|
||||
value: String,
|
||||
@@ -9,4 +27,31 @@ impl Id {
|
||||
pub fn from_string(value: String) -> Self {
|
||||
Self { value }
|
||||
}
|
||||
|
||||
pub fn from_str(value: &str) -> Self {
|
||||
Self::from_string(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Id {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(&self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Id {
|
||||
fn default() -> Self {
|
||||
let start = SystemTime::now();
|
||||
let since_the_epoch = start
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards");
|
||||
let timestamp = since_the_epoch.as_secs();
|
||||
|
||||
let hex_timestamp = format!("{:x}", timestamp & 0xffffff);
|
||||
|
||||
let random_part: String = Alphanumeric.sample_string(&mut rand::rng(), 7);
|
||||
|
||||
let value = format!("{}_{}", hex_timestamp, random_part);
|
||||
Self { value }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,3 +2,6 @@ mod id;
|
||||
mod version;
|
||||
pub use id::*;
|
||||
pub use version::*;
|
||||
|
||||
mod postgres;
|
||||
pub use postgres::*;
|
||||
|
||||
13
harmony/src/domain/data/postgres.rs
Normal file
13
harmony/src/domain/data/postgres.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PostgresUser {
|
||||
pub name: String,
|
||||
pub password: String, // In a real scenario, this should be a secret type
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PostgresDatabase {
|
||||
pub name: String,
|
||||
pub owner: String,
|
||||
}
|
||||
@@ -20,6 +20,9 @@ pub enum InterpretName {
|
||||
Panic,
|
||||
OPNSense,
|
||||
K3dInstallation,
|
||||
TenantInterpret,
|
||||
Application,
|
||||
Postgres,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for InterpretName {
|
||||
@@ -35,6 +38,9 @@ impl std::fmt::Display for InterpretName {
|
||||
InterpretName::Panic => f.write_str("Panic"),
|
||||
InterpretName::OPNSense => f.write_str("OPNSense"),
|
||||
InterpretName::K3dInstallation => f.write_str("K3dInstallation"),
|
||||
InterpretName::TenantInterpret => f.write_str("Tenant"),
|
||||
InterpretName::Application => f.write_str("Application"),
|
||||
InterpretName::Postgres => f.write_str("Postgres"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -122,3 +128,11 @@ impl From<kube::Error> for InterpretError {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for InterpretError {
|
||||
fn from(value: String) -> Self {
|
||||
Self {
|
||||
msg: format!("InterpretError : {value}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,6 +34,17 @@ pub struct Inventory {
|
||||
}
|
||||
|
||||
impl Inventory {
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
location: Location::new("Empty".to_string(), "location".to_string()),
|
||||
switch: vec![],
|
||||
firewall: vec![],
|
||||
worker_host: vec![],
|
||||
storage_host: vec![],
|
||||
control_plane_host: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn autoload() -> Self {
|
||||
Self {
|
||||
location: Location::test_building(),
|
||||
|
||||
@@ -46,7 +46,7 @@ pub struct HAClusterTopology {
|
||||
#[async_trait]
|
||||
impl Topology for HAClusterTopology {
|
||||
fn name(&self) -> &str {
|
||||
todo!()
|
||||
"HAClusterTopology"
|
||||
}
|
||||
async fn ensure_ready(&self) -> Result<Outcome, InterpretError> {
|
||||
todo!(
|
||||
|
||||
14
harmony/src/domain/topology/installable.rs
Normal file
14
harmony/src/domain/topology/installable.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{interpret::InterpretError, inventory::Inventory};
|
||||
|
||||
#[async_trait]
|
||||
pub trait Installable<T>: Send + Sync {
|
||||
async fn configure(&self, inventory: &Inventory, topology: &T) -> Result<(), InterpretError>;
|
||||
|
||||
async fn ensure_installed(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<(), InterpretError>;
|
||||
}
|
||||
@@ -1,18 +1,38 @@
|
||||
use derive_new::new;
|
||||
use k8s_openapi::NamespaceResourceScope;
|
||||
use kube::{
|
||||
Api, Client, Config, Error, Resource,
|
||||
api::PostParams,
|
||||
config::{KubeConfigOptions, Kubeconfig},
|
||||
use futures_util::StreamExt;
|
||||
use k8s_openapi::{
|
||||
ClusterResourceScope, NamespaceResourceScope,
|
||||
api::{apps::v1::Deployment, core::v1::Pod},
|
||||
};
|
||||
use log::error;
|
||||
use kube::runtime::conditions;
|
||||
use kube::runtime::wait::await_condition;
|
||||
use kube::{
|
||||
Client, Config, Error, Resource,
|
||||
api::{Api, AttachParams, ListParams, Patch, PatchParams, ResourceExt},
|
||||
config::{KubeConfigOptions, Kubeconfig},
|
||||
core::ErrorResponse,
|
||||
runtime::reflector::Lookup,
|
||||
};
|
||||
use log::{debug, error, trace};
|
||||
use serde::de::DeserializeOwned;
|
||||
use similar::{DiffableStr, TextDiff};
|
||||
|
||||
#[derive(new)]
|
||||
#[derive(new, Clone)]
|
||||
pub struct K8sClient {
|
||||
client: Client,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for K8sClient {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// This is a poor man's debug implementation for now as kube::Client does not provide much
|
||||
// useful information
|
||||
f.write_fmt(format_args!(
|
||||
"K8sClient {{ kube client using default namespace {} }}",
|
||||
self.client.default_namespace()
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl K8sClient {
|
||||
pub async fn try_default() -> Result<Self, Error> {
|
||||
Ok(Self {
|
||||
@@ -20,52 +40,203 @@ impl K8sClient {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn apply_all<
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
+ std::fmt::Debug
|
||||
+ Sync
|
||||
+ DeserializeOwned
|
||||
+ Default
|
||||
+ serde::Serialize
|
||||
+ Clone,
|
||||
>(
|
||||
pub async fn wait_until_deployment_ready(
|
||||
&self,
|
||||
resource: &Vec<K>,
|
||||
) -> Result<Vec<K>, kube::Error>
|
||||
where
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
{
|
||||
let mut result = vec![];
|
||||
for r in resource.iter() {
|
||||
let api: Api<K> = Api::all(self.client.clone());
|
||||
result.push(api.create(&PostParams::default(), &r).await?);
|
||||
name: String,
|
||||
namespace: Option<&str>,
|
||||
timeout: Option<u64>,
|
||||
) -> Result<(), String> {
|
||||
let api: Api<Deployment>;
|
||||
|
||||
if let Some(ns) = namespace {
|
||||
api = Api::namespaced(self.client.clone(), ns);
|
||||
} else {
|
||||
api = Api::default_namespaced(self.client.clone());
|
||||
}
|
||||
|
||||
let establish = await_condition(api, name.as_str(), conditions::is_deployment_completed());
|
||||
let t = if let Some(t) = timeout { t } else { 300 };
|
||||
let res = tokio::time::timeout(std::time::Duration::from_secs(t), establish).await;
|
||||
|
||||
if let Ok(r) = res {
|
||||
return Ok(());
|
||||
} else {
|
||||
return Err("timed out while waiting for deployment".to_string());
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn apply_namespaced<K>(
|
||||
/// Will execute a command in the first pod found that matches the label `app.kubernetes.io/name={name}`
|
||||
pub async fn exec_app(
|
||||
&self,
|
||||
resource: &Vec<K>,
|
||||
ns: Option<&str>,
|
||||
) -> Result<Vec<K>, Error>
|
||||
name: String,
|
||||
namespace: Option<&str>,
|
||||
command: Vec<&str>,
|
||||
) -> Result<(), String> {
|
||||
let api: Api<Pod>;
|
||||
|
||||
if let Some(ns) = namespace {
|
||||
api = Api::namespaced(self.client.clone(), ns);
|
||||
} else {
|
||||
api = Api::default_namespaced(self.client.clone());
|
||||
}
|
||||
let pod_list = api
|
||||
.list(&ListParams::default().labels(format!("app.kubernetes.io/name={name}").as_str()))
|
||||
.await
|
||||
.expect("couldn't get list of pods");
|
||||
|
||||
let res = api
|
||||
.exec(
|
||||
pod_list
|
||||
.items
|
||||
.first()
|
||||
.expect("couldn't get pod")
|
||||
.name()
|
||||
.expect("couldn't get pod name")
|
||||
.into_owned()
|
||||
.as_str(),
|
||||
command,
|
||||
&AttachParams::default(),
|
||||
)
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Err(e) => return Err(e.to_string()),
|
||||
Ok(mut process) => {
|
||||
let status = process
|
||||
.take_status()
|
||||
.expect("Couldn't get status")
|
||||
.await
|
||||
.expect("Couldn't unwrap status");
|
||||
|
||||
if let Some(s) = status.status {
|
||||
debug!("Status: {}", s);
|
||||
if s == "Success" {
|
||||
return Ok(());
|
||||
} else {
|
||||
return Err(s);
|
||||
}
|
||||
} else {
|
||||
return Err("Couldn't get inner status of pod exec".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply a resource in namespace
|
||||
///
|
||||
/// See `kubectl apply` for more information on the expected behavior of this function
|
||||
pub async fn apply<K>(&self, resource: &K, namespace: Option<&str>) -> Result<K, Error>
|
||||
where
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
+ Clone
|
||||
+ std::fmt::Debug
|
||||
+ DeserializeOwned
|
||||
+ serde::Serialize
|
||||
+ Default,
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
|
||||
<K as Resource>::Scope: ApplyStrategy<K>,
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
{
|
||||
let mut resources = Vec::new();
|
||||
for r in resource.iter() {
|
||||
let api: Api<K> = match ns {
|
||||
Some(ns) => Api::namespaced(self.client.clone(), ns),
|
||||
None => Api::default_namespaced(self.client.clone()),
|
||||
};
|
||||
resources.push(api.create(&PostParams::default(), &r).await?);
|
||||
debug!(
|
||||
"Applying resource {:?} with ns {:?}",
|
||||
resource.meta().name,
|
||||
namespace
|
||||
);
|
||||
trace!(
|
||||
"{:#}",
|
||||
serde_json::to_value(resource).unwrap_or(serde_json::Value::Null)
|
||||
);
|
||||
|
||||
let api: Api<K> =
|
||||
<<K as Resource>::Scope as ApplyStrategy<K>>::get_api(&self.client, namespace);
|
||||
// api.create(&PostParams::default(), &resource).await
|
||||
let patch_params = PatchParams::apply("harmony");
|
||||
let name = resource
|
||||
.meta()
|
||||
.name
|
||||
.as_ref()
|
||||
.expect("K8s Resource should have a name");
|
||||
|
||||
if *crate::config::DRY_RUN {
|
||||
match api.get(name).await {
|
||||
Ok(current) => {
|
||||
trace!("Received current value {current:#?}");
|
||||
// The resource exists, so we calculate and display a diff.
|
||||
println!("\nPerforming dry-run for resource: '{}'", name);
|
||||
let mut current_yaml = serde_yaml::to_value(¤t)
|
||||
.expect(&format!("Could not serialize current value : {current:#?}"));
|
||||
if current_yaml.is_mapping() && current_yaml.get("status").is_some() {
|
||||
let map = current_yaml.as_mapping_mut().unwrap();
|
||||
let removed = map.remove_entry("status");
|
||||
trace!("Removed status {:?}", removed);
|
||||
} else {
|
||||
trace!(
|
||||
"Did not find status entry for current object {}/{}",
|
||||
current.meta().namespace.as_ref().unwrap_or(&"".to_string()),
|
||||
current.meta().name.as_ref().unwrap_or(&"".to_string())
|
||||
);
|
||||
}
|
||||
let current_yaml = serde_yaml::to_string(¤t_yaml)
|
||||
.unwrap_or_else(|_| "Failed to serialize current resource".to_string());
|
||||
let new_yaml = serde_yaml::to_string(resource)
|
||||
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
|
||||
|
||||
if current_yaml == new_yaml {
|
||||
println!("No changes detected.");
|
||||
// Return the current resource state as there are no changes.
|
||||
return Ok(current);
|
||||
}
|
||||
|
||||
println!("Changes detected:");
|
||||
let diff = TextDiff::from_lines(¤t_yaml, &new_yaml);
|
||||
|
||||
// Iterate over the changes and print them in a git-like diff format.
|
||||
for change in diff.iter_all_changes() {
|
||||
let sign = match change.tag() {
|
||||
similar::ChangeTag::Delete => "-",
|
||||
similar::ChangeTag::Insert => "+",
|
||||
similar::ChangeTag::Equal => " ",
|
||||
};
|
||||
print!("{}{}", sign, change);
|
||||
}
|
||||
// In a dry run, we return the new resource state that would have been applied.
|
||||
Ok(resource.clone())
|
||||
}
|
||||
Err(Error::Api(ErrorResponse { code: 404, .. })) => {
|
||||
// The resource does not exist, so the "diff" is the entire new resource.
|
||||
println!("\nPerforming dry-run for new resource: '{}'", name);
|
||||
println!(
|
||||
"Resource does not exist. It would be created with the following content:"
|
||||
);
|
||||
let new_yaml = serde_yaml::to_string(resource)
|
||||
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
|
||||
|
||||
// Print each line of the new resource with a '+' prefix.
|
||||
for line in new_yaml.lines() {
|
||||
println!("+{}", line);
|
||||
}
|
||||
// In a dry run, we return the new resource state that would have been created.
|
||||
Ok(resource.clone())
|
||||
}
|
||||
Err(e) => {
|
||||
// Another API error occurred.
|
||||
error!("Failed to get resource '{}': {}", name, e);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return api
|
||||
.patch(name, &patch_params, &Patch::Apply(resource))
|
||||
.await;
|
||||
}
|
||||
Ok(resources)
|
||||
}
|
||||
|
||||
pub async fn apply_many<K>(&self, resource: &Vec<K>, ns: Option<&str>) -> Result<Vec<K>, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
|
||||
<K as Resource>::Scope: ApplyStrategy<K>,
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
{
|
||||
let mut result = Vec::new();
|
||||
for r in resource.iter() {
|
||||
result.push(self.apply(r, ns).await?);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub(crate) async fn from_kubeconfig(path: &str) -> Option<K8sClient> {
|
||||
@@ -86,3 +257,35 @@ impl K8sClient {
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ApplyStrategy<K: Resource> {
|
||||
fn get_api(client: &Client, ns: Option<&str>) -> Api<K>;
|
||||
}
|
||||
|
||||
/// Implementation for all resources that are cluster-scoped.
|
||||
/// It will always use `Api::all` and ignore the namespace parameter.
|
||||
impl<K> ApplyStrategy<K> for ClusterResourceScope
|
||||
where
|
||||
K: Resource<Scope = ClusterResourceScope>,
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
{
|
||||
fn get_api(client: &Client, _ns: Option<&str>) -> Api<K> {
|
||||
Api::all(client.clone())
|
||||
}
|
||||
}
|
||||
|
||||
/// Implementation for all resources that are namespace-scoped.
|
||||
/// It will use `Api::namespaced` if a namespace is provided, otherwise
|
||||
/// it falls back to the default namespace configured in your kubeconfig.
|
||||
impl<K> ApplyStrategy<K> for NamespaceResourceScope
|
||||
where
|
||||
K: Resource<Scope = NamespaceResourceScope>,
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
{
|
||||
fn get_api(client: &Client, ns: Option<&str>) -> Api<K> {
|
||||
match ns {
|
||||
Some(ns) => Api::namespaced(client.clone(), ns),
|
||||
None => Api::default_namespaced(client.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,8 @@ use std::{process::Command, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use inquire::Confirm;
|
||||
use log::{info, warn};
|
||||
use log::{debug, info, warn};
|
||||
use serde::Serialize;
|
||||
use tokio::sync::OnceCell;
|
||||
|
||||
use crate::{
|
||||
@@ -17,26 +18,27 @@ use crate::{
|
||||
use super::{
|
||||
HelmCommand, K8sclient, Topology,
|
||||
k8s::K8sClient,
|
||||
tenant::{
|
||||
ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy, k8s::K8sTenantManager,
|
||||
},
|
||||
tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct K8sState {
|
||||
client: Arc<K8sClient>,
|
||||
source: K8sSource,
|
||||
_source: K8sSource,
|
||||
message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
enum K8sSource {
|
||||
LocalK3d,
|
||||
Kubeconfig,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct K8sAnywhereTopology {
|
||||
k8s_state: OnceCell<Option<K8sState>>,
|
||||
tenant_manager: OnceCell<K8sTenantManager>,
|
||||
k8s_state: Arc<OnceCell<Option<K8sState>>>,
|
||||
tenant_manager: Arc<OnceCell<K8sTenantManager>>,
|
||||
config: Arc<K8sAnywhereConfig>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -56,11 +58,29 @@ impl K8sclient for K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for K8sAnywhereTopology {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl K8sAnywhereTopology {
|
||||
pub fn new() -> Self {
|
||||
pub fn from_env() -> Self {
|
||||
Self {
|
||||
k8s_state: OnceCell::new(),
|
||||
tenant_manager: OnceCell::new(),
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(K8sAnywhereConfig::from_env()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_config(config: K8sAnywhereConfig) -> Self {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(config),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,27 +121,15 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
|
||||
async fn try_get_or_install_k8s_client(&self) -> Result<Option<K8sState>, InterpretError> {
|
||||
let k8s_anywhere_config = K8sAnywhereConfig {
|
||||
kubeconfig: std::env::var("KUBECONFIG").ok().map(|v| v.to_string()),
|
||||
use_system_kubeconfig: std::env::var("HARMONY_USE_SYSTEM_KUBECONFIG")
|
||||
.map_or_else(|_| false, |v| v.parse().ok().unwrap_or(false)),
|
||||
autoinstall: std::env::var("HARMONY_AUTOINSTALL")
|
||||
.map_or_else(|_| false, |v| v.parse().ok().unwrap_or(false)),
|
||||
};
|
||||
let k8s_anywhere_config = &self.config;
|
||||
|
||||
if k8s_anywhere_config.use_system_kubeconfig {
|
||||
match self.try_load_system_kubeconfig().await {
|
||||
Some(_client) => todo!(),
|
||||
None => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(kubeconfig) = k8s_anywhere_config.kubeconfig {
|
||||
if let Some(kubeconfig) = &k8s_anywhere_config.kubeconfig {
|
||||
debug!("Loading kubeconfig {kubeconfig}");
|
||||
match self.try_load_kubeconfig(&kubeconfig).await {
|
||||
Some(client) => {
|
||||
return Ok(Some(K8sState {
|
||||
client: Arc::new(client),
|
||||
source: K8sSource::Kubeconfig,
|
||||
_source: K8sSource::Kubeconfig,
|
||||
message: format!("Loaded k8s client from kubeconfig {kubeconfig}"),
|
||||
}));
|
||||
}
|
||||
@@ -133,6 +141,14 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
if k8s_anywhere_config.use_system_kubeconfig {
|
||||
debug!("Loading system kubeconfig");
|
||||
match self.try_load_system_kubeconfig().await {
|
||||
Some(_client) => todo!(),
|
||||
None => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
info!("No kubernetes configuration found");
|
||||
|
||||
if !k8s_anywhere_config.autoinstall {
|
||||
@@ -161,7 +177,7 @@ impl K8sAnywhereTopology {
|
||||
let state = match k3d.get_client().await {
|
||||
Ok(client) => K8sState {
|
||||
client: Arc::new(K8sClient::new(client)),
|
||||
source: K8sSource::LocalK3d,
|
||||
_source: K8sSource::LocalK3d,
|
||||
message: "Successfully installed K3D cluster and acquired client".to_string(),
|
||||
},
|
||||
Err(_) => todo!(),
|
||||
@@ -170,6 +186,21 @@ impl K8sAnywhereTopology {
|
||||
Ok(Some(state))
|
||||
}
|
||||
|
||||
async fn ensure_k8s_tenant_manager(&self) -> Result<(), String> {
|
||||
if let Some(_) = self.tenant_manager.get() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.tenant_manager
|
||||
.get_or_try_init(async || -> Result<K8sTenantManager, String> {
|
||||
let k8s_client = self.k8s_client().await?;
|
||||
Ok(K8sTenantManager::new(k8s_client))
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_k8s_tenant_manager(&self) -> Result<&K8sTenantManager, ExecutorError> {
|
||||
match self.tenant_manager.get() {
|
||||
Some(t) => Ok(t),
|
||||
@@ -180,25 +211,38 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
struct K8sAnywhereConfig {
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct K8sAnywhereConfig {
|
||||
/// The path of the KUBECONFIG file that Harmony should use to interact with the Kubernetes
|
||||
/// cluster
|
||||
///
|
||||
/// Default : None
|
||||
kubeconfig: Option<String>,
|
||||
pub kubeconfig: Option<String>,
|
||||
|
||||
/// Whether to use the system KUBECONFIG, either the environment variable or the file in the
|
||||
/// default or configured location
|
||||
///
|
||||
/// Default : false
|
||||
use_system_kubeconfig: bool,
|
||||
pub use_system_kubeconfig: bool,
|
||||
|
||||
/// Whether to install automatically a kubernetes cluster
|
||||
///
|
||||
/// When enabled, autoinstall will setup a K3D cluster on the localhost. https://k3d.io/stable/
|
||||
///
|
||||
/// Default: true
|
||||
autoinstall: bool,
|
||||
pub autoinstall: bool,
|
||||
}
|
||||
|
||||
impl K8sAnywhereConfig {
|
||||
fn from_env() -> Self {
|
||||
Self {
|
||||
kubeconfig: std::env::var("KUBECONFIG").ok().map(|v| v.to_string()),
|
||||
use_system_kubeconfig: std::env::var("HARMONY_USE_SYSTEM_KUBECONFIG")
|
||||
.map_or_else(|_| false, |v| v.parse().ok().unwrap_or(false)),
|
||||
autoinstall: std::env::var("HARMONY_AUTOINSTALL")
|
||||
.map_or_else(|_| false, |v| v.parse().ok().unwrap_or(false)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -217,6 +261,10 @@ impl Topology for K8sAnywhereTopology {
|
||||
"No K8s client could be found or installed".to_string(),
|
||||
))?;
|
||||
|
||||
self.ensure_k8s_tenant_manager()
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e))?;
|
||||
|
||||
match self.is_helm_available() {
|
||||
Ok(()) => Ok(Outcome::success(format!(
|
||||
"{} + helm available",
|
||||
@@ -237,29 +285,10 @@ impl TenantManager for K8sAnywhereTopology {
|
||||
.await
|
||||
}
|
||||
|
||||
async fn update_tenant_resource_limits(
|
||||
&self,
|
||||
tenant_name: &str,
|
||||
new_limits: &ResourceLimits,
|
||||
) -> Result<(), ExecutorError> {
|
||||
self.get_k8s_tenant_manager()?
|
||||
.update_tenant_resource_limits(tenant_name, new_limits)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn update_tenant_network_policy(
|
||||
&self,
|
||||
tenant_name: &str,
|
||||
new_policy: &TenantNetworkPolicy,
|
||||
) -> Result<(), ExecutorError> {
|
||||
self.get_k8s_tenant_manager()?
|
||||
.update_tenant_network_policy(tenant_name, new_policy)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn deprovision_tenant(&self, tenant_name: &str) -> Result<(), ExecutorError> {
|
||||
self.get_k8s_tenant_manager()?
|
||||
.deprovision_tenant(tenant_name)
|
||||
async fn get_tenant_config(&self) -> Option<TenantConfig> {
|
||||
self.get_k8s_tenant_manager()
|
||||
.ok()?
|
||||
.get_tenant_config()
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
mod ha_cluster;
|
||||
mod host_binding;
|
||||
mod http;
|
||||
pub mod installable;
|
||||
mod k8s_anywhere;
|
||||
mod localhost;
|
||||
pub mod oberservability;
|
||||
@@ -22,6 +23,9 @@ pub use network::*;
|
||||
use serde::Serialize;
|
||||
pub use tftp::*;
|
||||
|
||||
mod postgres;
|
||||
pub use postgres::*;
|
||||
|
||||
mod helm_command;
|
||||
pub use helm_command::*;
|
||||
|
||||
|
||||
@@ -1,31 +1,77 @@
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
|
||||
use std::fmt::Debug;
|
||||
use url::Url;
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
topology::{Topology, installable::Installable},
|
||||
};
|
||||
|
||||
use crate::interpret::InterpretError;
|
||||
|
||||
use crate::{interpret::Outcome, topology::Topology};
|
||||
|
||||
/// Represents an entity responsible for collecting and organizing observability data
|
||||
/// from various telemetry sources
|
||||
/// A `Monitor` abstracts the logic required to scrape, aggregate, and structure
|
||||
/// monitoring data, enabling consistent processing regardless of the underlying data source.
|
||||
#[async_trait]
|
||||
pub trait Monitor<T: Topology>: Debug + Send + Sync {
|
||||
async fn deploy_monitor(
|
||||
pub trait AlertSender: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AlertingInterpret<S: AlertSender> {
|
||||
pub sender: S,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<S>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<S>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInterpret<S> {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
alert_receivers: Vec<AlertReceiver>,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
self.sender.configure(inventory, topology).await?;
|
||||
for receiver in self.receivers.iter() {
|
||||
receiver.install(&self.sender).await?;
|
||||
}
|
||||
for rule in self.rules.iter() {
|
||||
debug!("installing rule: {:#?}", rule);
|
||||
rule.install(&self.sender).await?;
|
||||
}
|
||||
self.sender.ensure_installed(inventory, topology).await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully installed alert sender {}",
|
||||
self.sender.name()
|
||||
)))
|
||||
}
|
||||
|
||||
async fn delete_monitor(
|
||||
&self,
|
||||
topolgy: &T,
|
||||
alert_receivers: Vec<AlertReceiver>,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AlertReceiver {
|
||||
pub receiver_id: String,
|
||||
#[async_trait]
|
||||
pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<S>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<S>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait ScrapeTarger<S: AlertSender> {
|
||||
async fn install(&self, sender: &S) -> Result<(), InterpretError>;
|
||||
}
|
||||
|
||||
14
harmony/src/domain/topology/postgres.rs
Normal file
14
harmony/src/domain/topology/postgres.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use crate::{
|
||||
data::{PostgresDatabase, PostgresUser},
|
||||
interpret::InterpretError,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
|
||||
#[async_trait]
|
||||
pub trait PostgresServer {
|
||||
async fn ensure_users_exist(&self, users: Vec<PostgresUser>) -> Result<(), InterpretError>;
|
||||
async fn ensure_databases_exist(
|
||||
&self,
|
||||
databases: Vec<PostgresDatabase>,
|
||||
) -> Result<(), InterpretError>;
|
||||
}
|
||||
@@ -1,95 +1,441 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{executors::ExecutorError, topology::k8s::K8sClient};
|
||||
use crate::{
|
||||
executors::ExecutorError,
|
||||
topology::k8s::{ApplyStrategy, K8sClient},
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use derive_new::new;
|
||||
use k8s_openapi::api::core::v1::Namespace;
|
||||
use k8s_openapi::{
|
||||
api::{
|
||||
core::v1::{LimitRange, Namespace, ResourceQuota},
|
||||
networking::v1::{
|
||||
NetworkPolicy, NetworkPolicyEgressRule, NetworkPolicyIngressRule, NetworkPolicyPort,
|
||||
},
|
||||
},
|
||||
apimachinery::pkg::util::intstr::IntOrString,
|
||||
};
|
||||
use kube::Resource;
|
||||
use log::{debug, info, warn};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::json;
|
||||
use tokio::sync::OnceCell;
|
||||
|
||||
use super::{ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy};
|
||||
use super::{TenantConfig, TenantManager};
|
||||
|
||||
#[derive(new)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct K8sTenantManager {
|
||||
k8s_client: Arc<K8sClient>,
|
||||
k8s_tenant_config: Arc<OnceCell<TenantConfig>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TenantManager for K8sTenantManager {
|
||||
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> {
|
||||
impl K8sTenantManager {
|
||||
pub fn new(client: Arc<K8sClient>) -> Self {
|
||||
Self {
|
||||
k8s_client: client,
|
||||
k8s_tenant_config: Arc::new(OnceCell::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl K8sTenantManager {
|
||||
fn get_namespace_name(&self, config: &TenantConfig) -> String {
|
||||
config.name.clone()
|
||||
}
|
||||
|
||||
fn ensure_constraints(&self, _namespace: &Namespace) -> Result<(), ExecutorError> {
|
||||
warn!("Validate that when tenant already exists (by id) that name has not changed");
|
||||
warn!("Make sure other Tenant constraints are respected by this k8s implementation");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn apply_resource<
|
||||
K: Resource + std::fmt::Debug + Sync + DeserializeOwned + Default + serde::Serialize + Clone,
|
||||
>(
|
||||
&self,
|
||||
mut resource: K,
|
||||
config: &TenantConfig,
|
||||
) -> Result<K, ExecutorError>
|
||||
where
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
<K as kube::Resource>::Scope: ApplyStrategy<K>,
|
||||
{
|
||||
self.apply_labels(&mut resource, config);
|
||||
self.k8s_client
|
||||
.apply(&resource, Some(&self.get_namespace_name(config)))
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ExecutorError::UnexpectedError(format!("Could not create Tenant resource : {e}"))
|
||||
})
|
||||
}
|
||||
|
||||
fn apply_labels<K: Resource>(&self, resource: &mut K, config: &TenantConfig) {
|
||||
let labels = resource.meta_mut().labels.get_or_insert_default();
|
||||
labels.insert(
|
||||
"app.kubernetes.io/managed-by".to_string(),
|
||||
"harmony".to_string(),
|
||||
);
|
||||
labels.insert("harmony/tenant-id".to_string(), config.id.to_string());
|
||||
labels.insert("harmony/tenant-name".to_string(), config.name.clone());
|
||||
}
|
||||
|
||||
fn build_namespace(&self, config: &TenantConfig) -> Result<Namespace, ExecutorError> {
|
||||
let namespace = json!(
|
||||
{
|
||||
"apiVersion": "v1",
|
||||
"kind": "Namespace",
|
||||
"metadata": {
|
||||
"labels": {
|
||||
"harmony.nationtech.io/tenant.id": config.id,
|
||||
"harmony.nationtech.io/tenant.id": config.id.to_string(),
|
||||
"harmony.nationtech.io/tenant.name": config.name,
|
||||
},
|
||||
"name": config.name,
|
||||
"name": self.get_namespace_name(config),
|
||||
},
|
||||
}
|
||||
);
|
||||
todo!("Validate that when tenant already exists (by id) that name has not changed");
|
||||
|
||||
let namespace: Namespace = serde_json::from_value(namespace).unwrap();
|
||||
serde_json::from_value(namespace).map_err(|e| {
|
||||
ExecutorError::ConfigurationError(format!(
|
||||
"Could not build TenantManager Namespace. {}",
|
||||
e
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
fn build_resource_quota(&self, config: &TenantConfig) -> Result<ResourceQuota, ExecutorError> {
|
||||
let resource_quota = json!(
|
||||
{
|
||||
"apiVersion": "v1",
|
||||
"kind": "List",
|
||||
"items": [
|
||||
{
|
||||
"apiVersion": "v1",
|
||||
"kind": "ResourceQuota",
|
||||
"metadata": {
|
||||
"name": config.name,
|
||||
"labels": {
|
||||
"harmony.nationtech.io/tenant.id": config.id,
|
||||
"harmony.nationtech.io/tenant.name": config.name,
|
||||
},
|
||||
"namespace": config.name,
|
||||
},
|
||||
"spec": {
|
||||
"hard": {
|
||||
"limits.cpu": format!("{:.0}",config.resource_limits.cpu_limit_cores),
|
||||
"limits.memory": format!("{:.3}Gi", config.resource_limits.memory_limit_gb),
|
||||
"requests.cpu": format!("{:.0}",config.resource_limits.cpu_request_cores),
|
||||
"requests.memory": format!("{:.3}Gi", config.resource_limits.memory_request_gb),
|
||||
"requests.storage": format!("{:.3}", config.resource_limits.storage_total_gb),
|
||||
"pods": "20",
|
||||
"services": "10",
|
||||
"configmaps": "30",
|
||||
"secrets": "30",
|
||||
"persistentvolumeclaims": "15",
|
||||
"services.loadbalancers": "2",
|
||||
"services.nodeports": "5",
|
||||
"apiVersion": "v1",
|
||||
"kind": "ResourceQuota",
|
||||
"metadata": {
|
||||
"name": format!("{}-quota", config.name),
|
||||
"labels": {
|
||||
"harmony.nationtech.io/tenant.id": config.id.to_string(),
|
||||
"harmony.nationtech.io/tenant.name": config.name,
|
||||
},
|
||||
"namespace": self.get_namespace_name(config),
|
||||
},
|
||||
"spec": {
|
||||
"hard": {
|
||||
"limits.cpu": format!("{:.0}",config.resource_limits.cpu_limit_cores),
|
||||
"limits.memory": format!("{:.3}Gi", config.resource_limits.memory_limit_gb),
|
||||
"requests.cpu": format!("{:.0}",config.resource_limits.cpu_request_cores),
|
||||
"requests.memory": format!("{:.3}Gi", config.resource_limits.memory_request_gb),
|
||||
"requests.storage": format!("{:.3}Gi", config.resource_limits.storage_total_gb),
|
||||
"pods": "20",
|
||||
"services": "10",
|
||||
"configmaps": "60",
|
||||
"secrets": "60",
|
||||
"persistentvolumeclaims": "15",
|
||||
"services.loadbalancers": "2",
|
||||
"services.nodeports": "5",
|
||||
"limits.ephemeral-storage": "10Gi",
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
serde_json::from_value(resource_quota).map_err(|e| {
|
||||
ExecutorError::ConfigurationError(format!(
|
||||
"Could not build TenantManager ResourceQuota. {}",
|
||||
e
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
async fn update_tenant_resource_limits(
|
||||
&self,
|
||||
tenant_name: &str,
|
||||
new_limits: &ResourceLimits,
|
||||
) -> Result<(), ExecutorError> {
|
||||
todo!()
|
||||
fn build_limit_range(&self, config: &TenantConfig) -> Result<LimitRange, ExecutorError> {
|
||||
let limit_range = json!({
|
||||
"apiVersion": "v1",
|
||||
"kind": "LimitRange",
|
||||
"metadata": {
|
||||
"name": format!("{}-defaults", config.name),
|
||||
"labels": {
|
||||
"harmony.nationtech.io/tenant.id": config.id.to_string(),
|
||||
"harmony.nationtech.io/tenant.name": config.name,
|
||||
},
|
||||
"namespace": self.get_namespace_name(config),
|
||||
},
|
||||
"spec": {
|
||||
"limits": [
|
||||
{
|
||||
"type": "Container",
|
||||
"default": {
|
||||
"cpu": "500m",
|
||||
"memory": "500Mi"
|
||||
},
|
||||
"defaultRequest": {
|
||||
"cpu": "100m",
|
||||
"memory": "100Mi"
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
serde_json::from_value(limit_range).map_err(|e| {
|
||||
ExecutorError::ConfigurationError(format!(
|
||||
"Could not build TenantManager LimitRange. {}",
|
||||
e
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
async fn update_tenant_network_policy(
|
||||
&self,
|
||||
tenant_name: &str,
|
||||
new_policy: &TenantNetworkPolicy,
|
||||
) -> Result<(), ExecutorError> {
|
||||
todo!()
|
||||
}
|
||||
fn build_network_policy(&self, config: &TenantConfig) -> Result<NetworkPolicy, ExecutorError> {
|
||||
let network_policy = json!({
|
||||
"apiVersion": "networking.k8s.io/v1",
|
||||
"kind": "NetworkPolicy",
|
||||
"metadata": {
|
||||
"name": format!("{}-network-policy", config.name),
|
||||
"namespace": self.get_namespace_name(config),
|
||||
},
|
||||
"spec": {
|
||||
"podSelector": {},
|
||||
"egress": [
|
||||
{
|
||||
"to": [
|
||||
{ "podSelector": {} }
|
||||
]
|
||||
},
|
||||
{
|
||||
"to": [
|
||||
{
|
||||
"podSelector": {},
|
||||
"namespaceSelector": {
|
||||
"matchLabels": {
|
||||
"kubernetes.io/metadata.name": "kube-system"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"to": [
|
||||
{
|
||||
"podSelector": {},
|
||||
"namespaceSelector": {
|
||||
"matchLabels": {
|
||||
"kubernetes.io/metadata.name": "openshift-dns"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"to": [
|
||||
{
|
||||
"ipBlock": {
|
||||
"cidr": "10.43.0.1/32",
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"to": [
|
||||
{
|
||||
"ipBlock": {
|
||||
"cidr": "172.23.0.0/16",
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"to": [
|
||||
{
|
||||
"ipBlock": {
|
||||
"cidr": "0.0.0.0/0",
|
||||
"except": [
|
||||
"10.0.0.0/8",
|
||||
"172.16.0.0/12",
|
||||
"192.168.0.0/16",
|
||||
"192.0.0.0/24",
|
||||
"192.0.2.0/24",
|
||||
"192.88.99.0/24",
|
||||
"192.18.0.0/15",
|
||||
"198.51.100.0/24",
|
||||
"169.254.0.0/16",
|
||||
"203.0.113.0/24",
|
||||
"127.0.0.0/8",
|
||||
"224.0.0.0/4",
|
||||
"240.0.0.0/4",
|
||||
"100.64.0.0/10",
|
||||
"233.252.0.0/24",
|
||||
"0.0.0.0/8"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"ingress": [
|
||||
{
|
||||
"from": [
|
||||
{ "podSelector": {} }
|
||||
]
|
||||
}
|
||||
],
|
||||
"policyTypes": [
|
||||
"Ingress",
|
||||
"Egress"
|
||||
]
|
||||
}
|
||||
});
|
||||
let mut network_policy: NetworkPolicy =
|
||||
serde_json::from_value(network_policy).map_err(|e| {
|
||||
ExecutorError::ConfigurationError(format!(
|
||||
"Could not build TenantManager NetworkPolicy. {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
async fn deprovision_tenant(&self, tenant_name: &str) -> Result<(), ExecutorError> {
|
||||
todo!()
|
||||
config
|
||||
.network_policy
|
||||
.additional_allowed_cidr_ingress
|
||||
.iter()
|
||||
.try_for_each(|c| -> Result<(), ExecutorError> {
|
||||
let cidr_list: Vec<serde_json::Value> =
|
||||
c.0.iter()
|
||||
.map(|ci| {
|
||||
json!({
|
||||
"ipBlock": {
|
||||
"cidr": ci.to_string(),
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
let ports: Option<Vec<NetworkPolicyPort>> =
|
||||
c.1.as_ref().map(|spec| match &spec.data {
|
||||
super::PortSpecData::SinglePort(port) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(port.clone().into())),
|
||||
..Default::default()
|
||||
}],
|
||||
super::PortSpecData::PortRange(start, end) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(start.clone().into())),
|
||||
end_port: Some(end.clone().into()),
|
||||
protocol: None, // Not currently supported by Harmony
|
||||
}],
|
||||
|
||||
super::PortSpecData::ListOfPorts(items) => items
|
||||
.iter()
|
||||
.map(|i| NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(i.clone().into())),
|
||||
..Default::default()
|
||||
})
|
||||
.collect(),
|
||||
});
|
||||
let rule = serde_json::from_value::<NetworkPolicyIngressRule>(json!({
|
||||
"from": cidr_list,
|
||||
"ports": ports,
|
||||
}))
|
||||
.map_err(|e| {
|
||||
ExecutorError::ConfigurationError(format!(
|
||||
"Could not build TenantManager NetworkPolicyIngressRule. {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
network_policy
|
||||
.spec
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.ingress
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.push(rule);
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
config
|
||||
.network_policy
|
||||
.additional_allowed_cidr_egress
|
||||
.iter()
|
||||
.try_for_each(|c| -> Result<(), ExecutorError> {
|
||||
let cidr_list: Vec<serde_json::Value> =
|
||||
c.0.iter()
|
||||
.map(|ci| {
|
||||
json!({
|
||||
"ipBlock": {
|
||||
"cidr": ci.to_string(),
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
let ports: Option<Vec<NetworkPolicyPort>> =
|
||||
c.1.as_ref().map(|spec| match &spec.data {
|
||||
super::PortSpecData::SinglePort(port) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(port.clone().into())),
|
||||
..Default::default()
|
||||
}],
|
||||
super::PortSpecData::PortRange(start, end) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(start.clone().into())),
|
||||
end_port: Some(end.clone().into()),
|
||||
protocol: None, // Not currently supported by Harmony
|
||||
}],
|
||||
|
||||
super::PortSpecData::ListOfPorts(items) => items
|
||||
.iter()
|
||||
.map(|i| NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(i.clone().into())),
|
||||
..Default::default()
|
||||
})
|
||||
.collect(),
|
||||
});
|
||||
let rule = serde_json::from_value::<NetworkPolicyEgressRule>(json!({
|
||||
"to": cidr_list,
|
||||
"ports": ports,
|
||||
}))
|
||||
.map_err(|e| {
|
||||
ExecutorError::ConfigurationError(format!(
|
||||
"Could not build TenantManager NetworkPolicyEgressRule. {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
network_policy
|
||||
.spec
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.egress
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.push(rule);
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
Ok(network_policy)
|
||||
}
|
||||
fn store_config(&self, config: &TenantConfig) {
|
||||
let _ = self.k8s_tenant_config.set(config.clone());
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TenantManager for K8sTenantManager {
|
||||
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> {
|
||||
let namespace = self.build_namespace(config)?;
|
||||
let resource_quota = self.build_resource_quota(config)?;
|
||||
let network_policy = self.build_network_policy(config)?;
|
||||
let resource_limit_range = self.build_limit_range(config)?;
|
||||
|
||||
self.ensure_constraints(&namespace)?;
|
||||
|
||||
debug!("Creating namespace for tenant {}", config.name);
|
||||
self.apply_resource(namespace, config).await?;
|
||||
|
||||
debug!("Creating resource_quota for tenant {}", config.name);
|
||||
self.apply_resource(resource_quota, config).await?;
|
||||
|
||||
debug!("Creating limit_range for tenant {}", config.name);
|
||||
self.apply_resource(resource_limit_range, config).await?;
|
||||
|
||||
debug!("Creating network_policy for tenant {}", config.name);
|
||||
self.apply_resource(network_policy, config).await?;
|
||||
|
||||
info!(
|
||||
"Success provisionning K8s tenant id {} name {}",
|
||||
config.id, config.name
|
||||
);
|
||||
self.store_config(config);
|
||||
Ok(())
|
||||
}
|
||||
async fn get_tenant_config(&self) -> Option<TenantConfig> {
|
||||
self.k8s_tenant_config.get().cloned()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,9 +5,10 @@ use crate::executors::ExecutorError;
|
||||
|
||||
#[async_trait]
|
||||
pub trait TenantManager {
|
||||
/// Provisions a new tenant based on the provided configuration.
|
||||
/// This operation should be idempotent; if a tenant with the same `config.name`
|
||||
/// Creates or update tenant based on the provided configuration.
|
||||
/// This operation should be idempotent; if a tenant with the same `config.id`
|
||||
/// already exists and matches the config, it will succeed without changes.
|
||||
///
|
||||
/// If it exists but differs, it will be updated, or return an error if the update
|
||||
/// action is not supported
|
||||
///
|
||||
@@ -15,32 +16,5 @@ pub trait TenantManager {
|
||||
/// * `config`: The desired configuration for the new tenant.
|
||||
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError>;
|
||||
|
||||
/// Updates the resource limits for an existing tenant.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `tenant_name`: The logical name of the tenant to update.
|
||||
/// * `new_limits`: The new set of resource limits to apply.
|
||||
async fn update_tenant_resource_limits(
|
||||
&self,
|
||||
tenant_name: &str,
|
||||
new_limits: &ResourceLimits,
|
||||
) -> Result<(), ExecutorError>;
|
||||
|
||||
/// Updates the high-level network isolation policy for an existing tenant.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `tenant_name`: The logical name of the tenant to update.
|
||||
/// * `new_policy`: The new network policy to apply.
|
||||
async fn update_tenant_network_policy(
|
||||
&self,
|
||||
tenant_name: &str,
|
||||
new_policy: &TenantNetworkPolicy,
|
||||
) -> Result<(), ExecutorError>;
|
||||
|
||||
/// Decommissions an existing tenant, removing its isolated context and associated resources.
|
||||
/// This operation should be idempotent.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `tenant_name`: The logical name of the tenant to deprovision.
|
||||
async fn deprovision_tenant(&self, tenant_name: &str) -> Result<(), ExecutorError>;
|
||||
async fn get_tenant_config(&self) -> Option<TenantConfig>;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
pub mod k8s;
|
||||
mod manager;
|
||||
use std::str::FromStr;
|
||||
|
||||
pub use manager::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::data::Id;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] // Assuming serde for Scores
|
||||
@@ -21,13 +21,26 @@ pub struct TenantConfig {
|
||||
|
||||
/// High-level network isolation policies for the tenant.
|
||||
pub network_policy: TenantNetworkPolicy,
|
||||
|
||||
/// Key-value pairs for provider-specific tagging, labeling, or metadata.
|
||||
/// Useful for billing, organization, or filtering within the provider's console.
|
||||
pub labels_or_tags: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
|
||||
impl Default for TenantConfig {
|
||||
fn default() -> Self {
|
||||
let id = Id::default();
|
||||
Self {
|
||||
name: format!("tenant_{id}"),
|
||||
id,
|
||||
resource_limits: ResourceLimits::default(),
|
||||
network_policy: TenantNetworkPolicy {
|
||||
default_inter_tenant_ingress: InterTenantIngressPolicy::DenyAll,
|
||||
default_internet_egress: InternetEgressPolicy::AllowAll,
|
||||
additional_allowed_cidr_ingress: vec![],
|
||||
additional_allowed_cidr_egress: vec![],
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ResourceLimits {
|
||||
/// Requested/guaranteed CPU cores (e.g., 2.0).
|
||||
pub cpu_request_cores: f32,
|
||||
@@ -43,6 +56,18 @@ pub struct ResourceLimits {
|
||||
pub storage_total_gb: f32,
|
||||
}
|
||||
|
||||
impl Default for ResourceLimits {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
cpu_request_cores: 4.0,
|
||||
cpu_limit_cores: 4.0,
|
||||
memory_request_gb: 4.0,
|
||||
memory_limit_gb: 4.0,
|
||||
storage_total_gb: 20.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct TenantNetworkPolicy {
|
||||
/// Policy for ingress traffic originating from other tenants within the same Harmony-managed environment.
|
||||
@@ -50,6 +75,20 @@ pub struct TenantNetworkPolicy {
|
||||
|
||||
/// Policy for egress traffic destined for the public internet.
|
||||
pub default_internet_egress: InternetEgressPolicy,
|
||||
|
||||
pub additional_allowed_cidr_ingress: Vec<(Vec<cidr::Ipv4Cidr>, Option<PortSpec>)>,
|
||||
pub additional_allowed_cidr_egress: Vec<(Vec<cidr::Ipv4Cidr>, Option<PortSpec>)>,
|
||||
}
|
||||
|
||||
impl Default for TenantNetworkPolicy {
|
||||
fn default() -> Self {
|
||||
TenantNetworkPolicy {
|
||||
default_inter_tenant_ingress: InterTenantIngressPolicy::DenyAll,
|
||||
default_internet_egress: InternetEgressPolicy::DenyAll,
|
||||
additional_allowed_cidr_ingress: vec![],
|
||||
additional_allowed_cidr_egress: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
@@ -65,3 +104,121 @@ pub enum InternetEgressPolicy {
|
||||
/// Deny all outbound traffic to the internet by default.
|
||||
DenyAll,
|
||||
}
|
||||
|
||||
/// Represents a port specification that can be either a single port, a comma-separated list of ports,
|
||||
/// or a range separated by a dash.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct PortSpec {
|
||||
/// The actual representation of the ports as strings for serialization/deserialization purposes.
|
||||
pub data: PortSpecData,
|
||||
}
|
||||
|
||||
impl PortSpec {
|
||||
/// TODO write short rust doc that shows what types of input are supported
|
||||
fn parse_from_str(spec: &str) -> Result<PortSpec, String> {
|
||||
// Check for single port
|
||||
if let Ok(port) = spec.parse::<u16>() {
|
||||
let spec = PortSpecData::SinglePort(port);
|
||||
return Ok(Self { data: spec });
|
||||
}
|
||||
|
||||
if let Some(range) = spec.find('-') {
|
||||
let start_str = &spec[..range];
|
||||
let end_str = &spec[(range + 1)..];
|
||||
|
||||
if let (Ok(start), Ok(end)) = (start_str.parse::<u16>(), end_str.parse::<u16>()) {
|
||||
let spec = PortSpecData::PortRange(start, end);
|
||||
return Ok(Self { data: spec });
|
||||
}
|
||||
}
|
||||
|
||||
let ports: Vec<&str> = spec.split(',').collect();
|
||||
if !ports.is_empty() && ports.iter().all(|p| p.parse::<u16>().is_ok()) {
|
||||
let maybe_ports = ports.iter().try_fold(vec![], |mut list, &p| {
|
||||
if let Ok(p) = p.parse::<u16>() {
|
||||
list.push(p);
|
||||
return Ok(list);
|
||||
}
|
||||
Err(())
|
||||
});
|
||||
|
||||
if let Ok(ports) = maybe_ports {
|
||||
let spec = PortSpecData::ListOfPorts(ports);
|
||||
return Ok(Self { data: spec });
|
||||
}
|
||||
}
|
||||
|
||||
Err(format!("Invalid port spec format {spec}"))
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for PortSpec {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Self::parse_from_str(s)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum PortSpecData {
|
||||
SinglePort(u16),
|
||||
PortRange(u16, u16),
|
||||
ListOfPorts(Vec<u16>),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_single_port() {
|
||||
let port_spec = "2144".parse::<PortSpec>().unwrap();
|
||||
match port_spec.data {
|
||||
PortSpecData::SinglePort(port) => assert_eq!(port, 2144),
|
||||
_ => panic!("Expected SinglePort"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_port_range() {
|
||||
let port_spec = "80-90".parse::<PortSpec>().unwrap();
|
||||
match port_spec.data {
|
||||
PortSpecData::PortRange(start, end) => {
|
||||
assert_eq!(start, 80);
|
||||
assert_eq!(end, 90);
|
||||
}
|
||||
_ => panic!("Expected PortRange"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_of_ports() {
|
||||
let port_spec = "2144,3424".parse::<PortSpec>().unwrap();
|
||||
match port_spec.data {
|
||||
PortSpecData::ListOfPorts(ports) => {
|
||||
assert_eq!(ports[0], 2144);
|
||||
assert_eq!(ports[1], 3424);
|
||||
}
|
||||
_ => panic!("Expected ListOfPorts"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_port_spec() {
|
||||
let result = "invalid".parse::<PortSpec>();
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_input() {
|
||||
let result = "".parse::<PortSpec>();
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_only_coma() {
|
||||
let result = ",".parse::<PortSpec>();
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
41
harmony/src/modules/application/feature.rs
Normal file
41
harmony/src/modules/application/feature.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::topology::Topology;
|
||||
/// An ApplicationFeature provided by harmony, such as Backups, Monitoring, MultisiteAvailability,
|
||||
/// ContinuousIntegration, ContinuousDelivery
|
||||
#[async_trait]
|
||||
pub trait ApplicationFeature<T: Topology>:
|
||||
std::fmt::Debug + Send + Sync + ApplicationFeatureClone<T>
|
||||
{
|
||||
async fn ensure_installed(&self, topology: &T) -> Result<(), String>;
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
|
||||
trait ApplicationFeatureClone<T: Topology> {
|
||||
fn clone_box(&self) -> Box<dyn ApplicationFeature<T>>;
|
||||
}
|
||||
|
||||
impl<A, T: Topology> ApplicationFeatureClone<T> for A
|
||||
where
|
||||
A: ApplicationFeature<T> + Clone + 'static,
|
||||
{
|
||||
fn clone_box(&self) -> Box<dyn ApplicationFeature<T>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Topology> Serialize for Box<dyn ApplicationFeature<T>> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Topology> Clone for Box<dyn ApplicationFeature<T>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
use async_trait::async_trait;
|
||||
use log::info;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
inventory::Inventory,
|
||||
modules::{application::ApplicationFeature, helm::chart::HelmChartScore},
|
||||
score::Score,
|
||||
topology::{HelmCommand, Topology, Url},
|
||||
};
|
||||
|
||||
/// ContinuousDelivery in Harmony provides this functionality :
|
||||
///
|
||||
/// - **Package** the application
|
||||
/// - **Push** to an artifact registry
|
||||
/// - **Deploy** to a testing environment
|
||||
/// - **Deploy** to a production environment
|
||||
///
|
||||
/// It is intended to be used as an application feature passed down to an ApplicationInterpret. For
|
||||
/// example :
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// let app = RustApplicationScore {
|
||||
/// name: "My Rust App".to_string(),
|
||||
/// features: vec![ContinuousDelivery::default()],
|
||||
/// };
|
||||
/// ```
|
||||
///
|
||||
/// *Note :*
|
||||
///
|
||||
/// By default, the Harmony Opinionated Pipeline is built using these technologies :
|
||||
///
|
||||
/// - Gitea Action (executes pipeline steps)
|
||||
/// - Docker to build an OCI container image
|
||||
/// - Helm chart to package Kubernetes resources
|
||||
/// - Harbor as artifact registru
|
||||
/// - ArgoCD to install/upgrade/rollback/inspect k8s resources
|
||||
/// - Kubernetes for runtime orchestration
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct ContinuousDelivery {}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand + 'static> ApplicationFeature<T> for ContinuousDelivery {
|
||||
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
|
||||
info!("Installing ContinuousDelivery feature");
|
||||
let cd_server = HelmChartScore {
|
||||
namespace: todo!(
|
||||
"ArgoCD Helm chart with proper understanding of Tenant, see how Will did it for Monitoring for now"
|
||||
),
|
||||
release_name: todo!("argocd helm chart whatever"),
|
||||
chart_name: todo!(),
|
||||
chart_version: todo!(),
|
||||
values_overrides: todo!(),
|
||||
values_yaml: todo!(),
|
||||
create_namespace: todo!(),
|
||||
install_only: todo!(),
|
||||
repository: todo!(),
|
||||
};
|
||||
let interpret = cd_server.create_interpret();
|
||||
interpret.execute(&Inventory::empty(), topology);
|
||||
|
||||
todo!("1. Create ArgoCD score that installs argo using helm chart, see if Taha's already done it
|
||||
2. Package app (docker image, helm chart)
|
||||
3. Push to registry if staging or prod
|
||||
4. Poke Argo
|
||||
5. Ensure app is up")
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"ContinuousDelivery".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// For now this is entirely bound to K8s / ArgoCD, will have to be revisited when we support
|
||||
/// more CD systems
|
||||
pub struct CDApplicationConfig {
|
||||
version: Version,
|
||||
helm_chart_url: Url,
|
||||
values_overrides: Value,
|
||||
}
|
||||
|
||||
pub trait ContinuousDeliveryApplication {
|
||||
fn get_config(&self) -> CDApplicationConfig;
|
||||
}
|
||||
42
harmony/src/modules/application/features/endpoint.rs
Normal file
42
harmony/src/modules/application/features/endpoint.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
use async_trait::async_trait;
|
||||
use log::info;
|
||||
|
||||
use crate::{
|
||||
modules::application::ApplicationFeature,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PublicEndpoint {
|
||||
application_port: u16,
|
||||
}
|
||||
|
||||
/// Use port 3000 as default port. Harmony wants to provide "sane defaults" in general, and in this
|
||||
/// particular context, using port 80 goes against our philosophy to provide production grade
|
||||
/// defaults out of the box. Using an unprivileged port is a good security practice and will allow
|
||||
/// for unprivileged containers to work with this out of the box.
|
||||
///
|
||||
/// Now, why 3000 specifically? Many popular web/network frameworks use it by default, there is no
|
||||
/// perfect answer for this but many Rust and Python libraries tend to use 3000.
|
||||
impl Default for PublicEndpoint {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
application_port: 3000,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// For now we only suport K8s ingress, but we will support more stuff at some point
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient + 'static> ApplicationFeature<T> for PublicEndpoint {
|
||||
async fn ensure_installed(&self, _topology: &T) -> Result<(), String> {
|
||||
info!(
|
||||
"Making sure public endpoint is installed for port {}",
|
||||
self.application_port
|
||||
);
|
||||
todo!()
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"PublicEndpoint".to_string()
|
||||
}
|
||||
}
|
||||
8
harmony/src/modules/application/features/mod.rs
Normal file
8
harmony/src/modules/application/features/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
mod endpoint;
|
||||
pub use endpoint::*;
|
||||
|
||||
mod monitoring;
|
||||
pub use monitoring::*;
|
||||
|
||||
mod continuous_delivery;
|
||||
pub use continuous_delivery::*;
|
||||
21
harmony/src/modules/application/features/monitoring.rs
Normal file
21
harmony/src/modules/application/features/monitoring.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
use async_trait::async_trait;
|
||||
use log::info;
|
||||
|
||||
use crate::{
|
||||
modules::application::ApplicationFeature,
|
||||
topology::{HelmCommand, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct Monitoring {}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand + 'static> ApplicationFeature<T> for Monitoring {
|
||||
async fn ensure_installed(&self, _topology: &T) -> Result<(), String> {
|
||||
info!("Ensuring monitoring is available for application");
|
||||
todo!("create and execute k8s prometheus score, depends on Will's work")
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"Monitoring".to_string()
|
||||
}
|
||||
}
|
||||
78
harmony/src/modules/application/mod.rs
Normal file
78
harmony/src/modules/application/mod.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
mod feature;
|
||||
pub mod features;
|
||||
mod rust;
|
||||
pub use feature::*;
|
||||
use log::info;
|
||||
pub use rust::*;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
topology::Topology,
|
||||
};
|
||||
|
||||
pub trait Application: std::fmt::Debug + Send + Sync {
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ApplicationInterpret<T: Topology + std::fmt::Debug> {
|
||||
features: Vec<Box<dyn ApplicationFeature<T>>>,
|
||||
application: Box<dyn Application>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + std::fmt::Debug> Interpret<T> for ApplicationInterpret<T> {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let app_name = self.application.name();
|
||||
info!(
|
||||
"Preparing {} features [{}] for application {app_name}",
|
||||
self.features.len(),
|
||||
self.features
|
||||
.iter()
|
||||
.map(|f| f.name())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ")
|
||||
);
|
||||
for feature in self.features.iter() {
|
||||
info!(
|
||||
"Installing feature {} for application {app_name}",
|
||||
feature.name()
|
||||
);
|
||||
let _ = match feature.ensure_installed(topology).await {
|
||||
Ok(()) => (),
|
||||
Err(msg) => {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Application Interpret failed to install feature : {msg}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
}
|
||||
todo!(
|
||||
"Do I need to do anything more than this here?? I feel like the Application trait itself should expose something like ensure_ready but its becoming redundant. We'll see as this evolves."
|
||||
)
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Application
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
Version::from("1.0.0").unwrap()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
41
harmony/src/modules/application/rust.rs
Normal file
41
harmony/src/modules/application/rust.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
score::Score,
|
||||
topology::{Topology, Url},
|
||||
};
|
||||
|
||||
use super::{Application, ApplicationFeature, ApplicationInterpret};
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct RustWebappScore<T: Topology + Clone + Serialize> {
|
||||
pub name: String,
|
||||
pub domain: Url,
|
||||
pub features: Vec<Box<dyn ApplicationFeature<T>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + std::fmt::Debug + Clone + Serialize + 'static> Score<T> for RustWebappScore<T> {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
Box::new(ApplicationInterpret {
|
||||
features: self.features.clone(),
|
||||
application: Box::new(RustWebapp {
|
||||
name: self.name.clone(),
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
format!("{}-RustWebapp", self.name)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RustWebapp {
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl Application for RustWebapp {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
}
|
||||
@@ -311,7 +311,7 @@ impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for HelmChartInterpretV
|
||||
_inventory: &Inventory,
|
||||
_topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let ns = self
|
||||
let _ns = self
|
||||
.score
|
||||
.chart
|
||||
.namespace
|
||||
@@ -339,7 +339,7 @@ impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for HelmChartInterpretV
|
||||
|
||||
let res = helm_executor.generate();
|
||||
|
||||
let output = match res {
|
||||
let _output = match res {
|
||||
Ok(output) => output,
|
||||
Err(err) => return Err(InterpretError::new(err.to_string())),
|
||||
};
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use harmony_macros::ingress_path;
|
||||
use k8s_openapi::api::networking::v1::Ingress;
|
||||
use log::{debug, trace};
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
|
||||
@@ -56,22 +57,24 @@ impl<T: Topology + K8sclient> Score<T> for K8sIngressScore {
|
||||
let ingress = json!(
|
||||
{
|
||||
"metadata": {
|
||||
"name": self.name
|
||||
"name": self.name.to_string(),
|
||||
},
|
||||
"spec": {
|
||||
"rules": [
|
||||
{ "host": self.host,
|
||||
{ "host": self.host.to_string(),
|
||||
"http": {
|
||||
"paths": [
|
||||
{
|
||||
"path": path,
|
||||
"pathType": path_type.as_str(),
|
||||
"backend": [
|
||||
{
|
||||
"service": self.backend_service,
|
||||
"port": self.port
|
||||
"backend": {
|
||||
"service": {
|
||||
"name": self.backend_service.to_string(),
|
||||
"port": {
|
||||
"number": self.port,
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -81,13 +84,16 @@ impl<T: Topology + K8sclient> Score<T> for K8sIngressScore {
|
||||
}
|
||||
);
|
||||
|
||||
trace!("Building ingresss object from Value {ingress:#}");
|
||||
let ingress: Ingress = serde_json::from_value(ingress).unwrap();
|
||||
debug!(
|
||||
"Successfully built Ingress for host {:?}",
|
||||
ingress.metadata.name
|
||||
);
|
||||
Box::new(K8sResourceInterpret {
|
||||
score: K8sResourceScore::single(
|
||||
ingress.clone(),
|
||||
self.namespace
|
||||
.clone()
|
||||
.map(|f| f.as_c_str().to_str().unwrap().to_string()),
|
||||
self.namespace.clone().map(|f| f.to_string()),
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use async_trait::async_trait;
|
||||
use k8s_openapi::NamespaceResourceScope;
|
||||
use kube::Resource;
|
||||
use log::info;
|
||||
use serde::{Serialize, de::DeserializeOwned};
|
||||
|
||||
use crate::{
|
||||
@@ -75,11 +76,12 @@ where
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Applying {} resources", self.score.resource.len());
|
||||
topology
|
||||
.k8s_client()
|
||||
.await
|
||||
.expect("Environment should provide enough information to instanciate a client")
|
||||
.apply_namespaced(&self.score.resource, self.score.namespace.as_deref())
|
||||
.apply_many(&self.score.resource, self.score.namespace.as_deref())
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod application;
|
||||
pub mod cert_manager;
|
||||
pub mod dhcp;
|
||||
pub mod dns;
|
||||
@@ -12,4 +13,7 @@ pub mod load_balancer;
|
||||
pub mod monitoring;
|
||||
pub mod okd;
|
||||
pub mod opnsense;
|
||||
pub mod prometheus;
|
||||
pub mod tenant;
|
||||
pub mod tftp;
|
||||
pub mod postgres;
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::kube_prometheus::{
|
||||
prometheus::{Prometheus, PrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
},
|
||||
topology::{Url, oberservability::monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct DiscordWebhook {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for DiscordWebhook {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusReceiver for DiscordWebhook {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertChannelConfig for DiscordWebhook {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig {
|
||||
let channel_global_config = None;
|
||||
let channel_receiver = self.alert_channel_receiver().await;
|
||||
let channel_route = self.alert_channel_route().await;
|
||||
|
||||
AlertManagerChannelConfig {
|
||||
channel_global_config,
|
||||
channel_receiver,
|
||||
channel_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DiscordWebhook {
|
||||
async fn alert_channel_route(&self) -> serde_yaml::Value {
|
||||
let mut route = Mapping::new();
|
||||
route.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
route.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
route.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
Value::Mapping(route)
|
||||
}
|
||||
|
||||
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
|
||||
let mut receiver = Mapping::new();
|
||||
receiver.insert(
|
||||
Value::String("name".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
|
||||
let mut discord_config = Mapping::new();
|
||||
discord_config.insert(
|
||||
Value::String("webhook_url".to_string()),
|
||||
Value::String(self.url.to_string()),
|
||||
);
|
||||
|
||||
receiver.insert(
|
||||
Value::String("discord_configs".to_string()),
|
||||
Value::Sequence(vec![Value::Mapping(discord_config)]),
|
||||
);
|
||||
|
||||
Value::Mapping(receiver)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn discord_serialize_should_match() {
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://discord.i.dont.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let discord_receiver_receiver =
|
||||
serde_yaml::to_string(&discord_receiver.alert_channel_receiver().await).unwrap();
|
||||
println!("receiver \n{:#}", discord_receiver_receiver);
|
||||
let discord_receiver_receiver_yaml = r#"name: test-discord
|
||||
discord_configs:
|
||||
- webhook_url: https://discord.i.dont.exist.com/
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
let discord_receiver_route =
|
||||
serde_yaml::to_string(&discord_receiver.alert_channel_route().await).unwrap();
|
||||
println!("route \n{:#}", discord_receiver_route);
|
||||
let discord_receiver_route_yaml = r#"receiver: test-discord
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
assert_eq!(discord_receiver_receiver, discord_receiver_receiver_yaml);
|
||||
assert_eq!(discord_receiver_route, discord_receiver_route_yaml);
|
||||
}
|
||||
}
|
||||
2
harmony/src/modules/monitoring/alert_channel/mod.rs
Normal file
2
harmony/src/modules/monitoring/alert_channel/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod discord_alert_channel;
|
||||
pub mod webhook_receiver;
|
||||
124
harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs
Normal file
124
harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs
Normal file
@@ -0,0 +1,124 @@
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::kube_prometheus::{
|
||||
prometheus::{Prometheus, PrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
},
|
||||
topology::{Url, oberservability::monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct WebhookReceiver {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for WebhookReceiver {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusReceiver for WebhookReceiver {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertChannelConfig for WebhookReceiver {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig {
|
||||
let channel_global_config = None;
|
||||
let channel_receiver = self.alert_channel_receiver().await;
|
||||
let channel_route = self.alert_channel_route().await;
|
||||
|
||||
AlertManagerChannelConfig {
|
||||
channel_global_config,
|
||||
channel_receiver,
|
||||
channel_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WebhookReceiver {
|
||||
async fn alert_channel_route(&self) -> serde_yaml::Value {
|
||||
let mut route = Mapping::new();
|
||||
route.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
route.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
route.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
Value::Mapping(route)
|
||||
}
|
||||
|
||||
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
|
||||
let mut receiver = Mapping::new();
|
||||
receiver.insert(
|
||||
Value::String("name".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
|
||||
let mut webhook_config = Mapping::new();
|
||||
webhook_config.insert(
|
||||
Value::String("url".to_string()),
|
||||
Value::String(self.url.to_string()),
|
||||
);
|
||||
|
||||
receiver.insert(
|
||||
Value::String("webhook_configs".to_string()),
|
||||
Value::Sequence(vec![Value::Mapping(webhook_config)]),
|
||||
);
|
||||
|
||||
Value::Mapping(receiver)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[tokio::test]
|
||||
async fn webhook_serialize_should_match() {
|
||||
let webhook_receiver = WebhookReceiver {
|
||||
name: "test-webhook".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://webhook.i.dont.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let webhook_receiver_receiver =
|
||||
serde_yaml::to_string(&webhook_receiver.alert_channel_receiver().await).unwrap();
|
||||
println!("receiver \n{:#}", webhook_receiver_receiver);
|
||||
let webhook_receiver_receiver_yaml = r#"name: test-webhook
|
||||
webhook_configs:
|
||||
- url: https://webhook.i.dont.exist.com/
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
let webhook_receiver_route =
|
||||
serde_yaml::to_string(&webhook_receiver.alert_channel_route().await).unwrap();
|
||||
println!("route \n{:#}", webhook_receiver_route);
|
||||
let webhook_receiver_route_yaml = r#"receiver: test-webhook
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
assert_eq!(webhook_receiver_receiver, webhook_receiver_receiver_yaml);
|
||||
assert_eq!(webhook_receiver_route, webhook_receiver_route_yaml);
|
||||
}
|
||||
}
|
||||
1
harmony/src/modules/monitoring/alert_rule/mod.rs
Normal file
1
harmony/src/modules/monitoring/alert_rule/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod prometheus_alert_rule;
|
||||
@@ -0,0 +1,99 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::kube_prometheus::{
|
||||
prometheus::{Prometheus, PrometheusRule},
|
||||
types::{AlertGroup, AlertManagerAdditionalPromRules},
|
||||
},
|
||||
topology::oberservability::monitoring::AlertRule,
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl AlertRule<Prometheus> for AlertManagerRuleGroup {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_rule(&self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusRule for AlertManagerRuleGroup {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
|
||||
let mut additional_prom_rules = BTreeMap::new();
|
||||
|
||||
additional_prom_rules.insert(
|
||||
self.name.clone(),
|
||||
AlertGroup {
|
||||
groups: vec![self.clone()],
|
||||
},
|
||||
);
|
||||
AlertManagerAdditionalPromRules {
|
||||
rules: additional_prom_rules,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertManagerRuleGroup {
|
||||
pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup {
|
||||
AlertManagerRuleGroup {
|
||||
name: name.to_string().to_lowercase(),
|
||||
rules,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
///logical group of alert rules
|
||||
///evaluates to:
|
||||
///name:
|
||||
/// groups:
|
||||
/// - name: name
|
||||
/// rules: PrometheusAlertRule
|
||||
pub struct AlertManagerRuleGroup {
|
||||
pub name: String,
|
||||
pub rules: Vec<PrometheusAlertRule>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PrometheusAlertRule {
|
||||
pub alert: String,
|
||||
pub expr: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub r#for: Option<String>,
|
||||
pub labels: HashMap<String, String>,
|
||||
pub annotations: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl PrometheusAlertRule {
|
||||
pub fn new(alert_name: &str, expr: &str) -> Self {
|
||||
Self {
|
||||
alert: alert_name.into(),
|
||||
expr: expr.into(),
|
||||
r#for: Some("1m".into()),
|
||||
labels: HashMap::new(),
|
||||
annotations: HashMap::new(),
|
||||
}
|
||||
}
|
||||
pub fn for_duration(mut self, duration: &str) -> Self {
|
||||
self.r#for = Some(duration.into());
|
||||
self
|
||||
}
|
||||
pub fn label(mut self, key: &str, value: &str) -> Self {
|
||||
self.labels.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn annotation(mut self, key: &str, value: &str) -> Self {
|
||||
self.annotations.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use url::Url;
|
||||
|
||||
use crate::modules::helm::chart::HelmChartScore;
|
||||
|
||||
pub fn discord_alert_manager_score(
|
||||
webhook_url: Url,
|
||||
namespace: String,
|
||||
name: String,
|
||||
) -> HelmChartScore {
|
||||
let values = format!(
|
||||
r#"
|
||||
environment:
|
||||
- name: "DISCORD_WEBHOOK"
|
||||
value: "{webhook_url}"
|
||||
"#,
|
||||
);
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
|
||||
release_name: NonBlankString::from_str(&name).unwrap(),
|
||||
chart_name: NonBlankString::from_str(
|
||||
"oci://hub.nationtech.io/library/alertmanager-discord",
|
||||
)
|
||||
.unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml: Some(values.to_string()),
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: None,
|
||||
}
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
use async_trait::async_trait;
|
||||
use serde_json::Value;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DiscordWebhookConfig {
|
||||
pub webhook_url: Url,
|
||||
pub name: String,
|
||||
pub send_resolved_notifications: bool,
|
||||
}
|
||||
|
||||
pub trait DiscordWebhookReceiver {
|
||||
fn deploy_discord_webhook_receiver(
|
||||
&self,
|
||||
_notification_adapter_id: &str,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
|
||||
fn delete_discord_webhook_receiver(
|
||||
&self,
|
||||
_notification_adapter_id: &str,
|
||||
) -> Result<Outcome, InterpretError>;
|
||||
}
|
||||
|
||||
// trait used to generate alert manager config values impl<T: Topology + AlertManagerConfig> Monitor for KubePrometheus
|
||||
pub trait AlertManagerConfig<T> {
|
||||
fn get_alert_manager_config(&self) -> Result<Value, InterpretError>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: DiscordWebhookReceiver> AlertManagerConfig<T> for DiscordWebhookConfig {
|
||||
fn get_alert_manager_config(&self) -> Result<Value, InterpretError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl DiscordWebhookReceiver for K8sAnywhereTopology {
|
||||
fn deploy_discord_webhook_receiver(
|
||||
&self,
|
||||
_notification_adapter_id: &str,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
todo!()
|
||||
}
|
||||
fn delete_discord_webhook_receiver(
|
||||
&self,
|
||||
_notification_adapter_id: &str,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -1,262 +0,0 @@
|
||||
use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel};
|
||||
use log::info;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use std::{collections::HashMap, str::FromStr};
|
||||
use url::Url;
|
||||
|
||||
use crate::modules::helm::chart::HelmChartScore;
|
||||
|
||||
pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmChartScore {
|
||||
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
|
||||
//to the overrides or something leaving the user to deal with formatting here seems bad
|
||||
let default_rules = config.default_rules.to_string();
|
||||
let windows_monitoring = config.windows_monitoring.to_string();
|
||||
let alert_manager = config.alert_manager.to_string();
|
||||
let grafana = config.grafana.to_string();
|
||||
let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string();
|
||||
let kubernetes_api_server = config.kubernetes_api_server.to_string();
|
||||
let kubelet = config.kubelet.to_string();
|
||||
let kube_controller_manager = config.kube_controller_manager.to_string();
|
||||
let core_dns = config.core_dns.to_string();
|
||||
let kube_etcd = config.kube_etcd.to_string();
|
||||
let kube_scheduler = config.kube_scheduler.to_string();
|
||||
let kube_proxy = config.kube_proxy.to_string();
|
||||
let kube_state_metrics = config.kube_state_metrics.to_string();
|
||||
let node_exporter = config.node_exporter.to_string();
|
||||
let prometheus_operator = config.prometheus_operator.to_string();
|
||||
let prometheus = config.prometheus.to_string();
|
||||
let mut values = format!(
|
||||
r#"
|
||||
additionalPrometheusRulesMap:
|
||||
pods-status-alerts:
|
||||
groups:
|
||||
- name: pods
|
||||
rules:
|
||||
- alert: "[CRIT] POD not healthy"
|
||||
expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{{phase=~"Pending|Unknown|Failed"}})[15m:1m]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
title: "[CRIT] POD not healthy : {{{{ $labels.pod }}}}"
|
||||
description: |
|
||||
A POD is in a non-ready state!
|
||||
- **Pod**: {{{{ $labels.pod }}}}
|
||||
- **Namespace**: {{{{ $labels.namespace }}}}
|
||||
- alert: "[CRIT] POD crash looping"
|
||||
expr: increase(kube_pod_container_status_restarts_total[5m]) > 3
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
title: "[CRIT] POD crash looping : {{{{ $labels.pod }}}}"
|
||||
description: |
|
||||
A POD is drowning in a crash loop!
|
||||
- **Pod**: {{{{ $labels.pod }}}}
|
||||
- **Namespace**: {{{{ $labels.namespace }}}}
|
||||
- **Instance**: {{{{ $labels.instance }}}}
|
||||
pvc-alerts:
|
||||
groups:
|
||||
- name: pvc-alerts
|
||||
rules:
|
||||
- alert: 'PVC Fill Over 95 Percent In 2 Days'
|
||||
expr: |
|
||||
(
|
||||
kubelet_volume_stats_used_bytes
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes
|
||||
) > 0.95
|
||||
AND
|
||||
predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60)
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes
|
||||
> 0.95
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: The PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} is predicted to fill over 95% in less than 2 days.
|
||||
title: PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} will fill over 95% in less than 2 days
|
||||
defaultRules:
|
||||
create: {default_rules}
|
||||
rules:
|
||||
alertmanager: true
|
||||
etcd: true
|
||||
configReloaders: true
|
||||
general: true
|
||||
k8sContainerCpuUsageSecondsTotal: true
|
||||
k8sContainerMemoryCache: true
|
||||
k8sContainerMemoryRss: true
|
||||
k8sContainerMemorySwap: true
|
||||
k8sContainerResource: true
|
||||
k8sContainerMemoryWorkingSetBytes: true
|
||||
k8sPodOwner: true
|
||||
kubeApiserverAvailability: true
|
||||
kubeApiserverBurnrate: true
|
||||
kubeApiserverHistogram: true
|
||||
kubeApiserverSlos: true
|
||||
kubeControllerManager: true
|
||||
kubelet: true
|
||||
kubeProxy: true
|
||||
kubePrometheusGeneral: true
|
||||
kubePrometheusNodeRecording: true
|
||||
kubernetesApps: true
|
||||
kubernetesResources: true
|
||||
kubernetesStorage: true
|
||||
kubernetesSystem: true
|
||||
kubeSchedulerAlerting: true
|
||||
kubeSchedulerRecording: true
|
||||
kubeStateMetrics: true
|
||||
network: true
|
||||
node: true
|
||||
nodeExporterAlerting: true
|
||||
nodeExporterRecording: true
|
||||
prometheus: true
|
||||
prometheusOperator: true
|
||||
windows: true
|
||||
windowsMonitoring:
|
||||
enabled: {windows_monitoring}
|
||||
grafana:
|
||||
enabled: {grafana}
|
||||
kubernetesServiceMonitors:
|
||||
enabled: {kubernetes_service_monitors}
|
||||
kubeApiServer:
|
||||
enabled: {kubernetes_api_server}
|
||||
kubelet:
|
||||
enabled: {kubelet}
|
||||
kubeControllerManager:
|
||||
enabled: {kube_controller_manager}
|
||||
coreDns:
|
||||
enabled: {core_dns}
|
||||
kubeEtcd:
|
||||
enabled: {kube_etcd}
|
||||
kubeScheduler:
|
||||
enabled: {kube_scheduler}
|
||||
kubeProxy:
|
||||
enabled: {kube_proxy}
|
||||
kubeStateMetrics:
|
||||
enabled: {kube_state_metrics}
|
||||
nodeExporter:
|
||||
enabled: {node_exporter}
|
||||
prometheusOperator:
|
||||
enabled: {prometheus_operator}
|
||||
prometheus:
|
||||
enabled: {prometheus}
|
||||
"#,
|
||||
);
|
||||
|
||||
let alertmanager_config = alert_manager_yaml_builder(&config);
|
||||
values.push_str(&alertmanager_config);
|
||||
|
||||
fn alert_manager_yaml_builder(config: &KubePrometheusConfig) -> String {
|
||||
let mut receivers = String::new();
|
||||
let mut routes = String::new();
|
||||
let mut global_configs = String::new();
|
||||
let alert_manager = config.alert_manager;
|
||||
for alert_channel in &config.alert_channel {
|
||||
match alert_channel {
|
||||
AlertChannel::Discord { name, .. } => {
|
||||
let (receiver, route) = discord_alert_builder(name);
|
||||
info!("discord receiver: {} \nroute: {}", receiver, route);
|
||||
receivers.push_str(&receiver);
|
||||
routes.push_str(&route);
|
||||
}
|
||||
AlertChannel::Slack {
|
||||
slack_channel,
|
||||
webhook_url,
|
||||
} => {
|
||||
let (receiver, route) = slack_alert_builder(slack_channel);
|
||||
info!("slack receiver: {} \nroute: {}", receiver, route);
|
||||
receivers.push_str(&receiver);
|
||||
|
||||
routes.push_str(&route);
|
||||
let global_config = format!(
|
||||
r#"
|
||||
global:
|
||||
slack_api_url: {webhook_url}"#
|
||||
);
|
||||
|
||||
global_configs.push_str(&global_config);
|
||||
}
|
||||
AlertChannel::Smpt { .. } => todo!(),
|
||||
}
|
||||
}
|
||||
info!("after alert receiver: {}", receivers);
|
||||
info!("after alert routes: {}", routes);
|
||||
|
||||
let alertmanager_config = format!(
|
||||
r#"
|
||||
alertmanager:
|
||||
enabled: {alert_manager}
|
||||
config: {global_configs}
|
||||
route:
|
||||
group_by: ['job']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
routes:
|
||||
{routes}
|
||||
receivers:
|
||||
- name: 'null'
|
||||
{receivers}"#
|
||||
);
|
||||
|
||||
info!("alert manager config: {}", alertmanager_config);
|
||||
alertmanager_config
|
||||
}
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
|
||||
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
||||
chart_name: NonBlankString::from_str(
|
||||
"oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack",
|
||||
)
|
||||
.unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml: Some(values.to_string()),
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn discord_alert_builder(release_name: &String) -> (String, String) {
|
||||
let discord_receiver_name = format!("Discord-{}", release_name);
|
||||
let receiver = format!(
|
||||
r#"
|
||||
- name: '{discord_receiver_name}'
|
||||
webhook_configs:
|
||||
- url: 'http://{release_name}-alertmanager-discord:9094'
|
||||
send_resolved: true"#,
|
||||
);
|
||||
let route = format!(
|
||||
r#"
|
||||
- receiver: '{discord_receiver_name}'
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true"#,
|
||||
);
|
||||
(receiver, route)
|
||||
}
|
||||
|
||||
fn slack_alert_builder(slack_channel: &String) -> (String, String) {
|
||||
let slack_receiver_name = format!("Slack-{}", slack_channel);
|
||||
let receiver = format!(
|
||||
r#"
|
||||
- name: '{slack_receiver_name}'
|
||||
slack_configs:
|
||||
- channel: '{slack_channel}'
|
||||
send_resolved: true
|
||||
title: '{{{{ .CommonAnnotations.title }}}}'
|
||||
text: '{{{{ .CommonAnnotations.description }}}}'"#,
|
||||
);
|
||||
let route = format!(
|
||||
r#"
|
||||
- receiver: '{slack_receiver_name}'
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true"#,
|
||||
);
|
||||
(receiver, route)
|
||||
}
|
||||
@@ -1,10 +1,12 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use super::monitoring_alerting::AlertChannel;
|
||||
use crate::modules::monitoring::kube_prometheus::types::{
|
||||
AlertManagerAdditionalPromRules, AlertManagerChannelConfig, ServiceMonitor,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct KubePrometheusConfig {
|
||||
pub namespace: String,
|
||||
pub namespace: Option<String>,
|
||||
pub default_rules: bool,
|
||||
pub windows_monitoring: bool,
|
||||
pub alert_manager: bool,
|
||||
@@ -21,22 +23,23 @@ pub struct KubePrometheusConfig {
|
||||
pub kube_proxy: bool,
|
||||
pub kube_state_metrics: bool,
|
||||
pub prometheus_operator: bool,
|
||||
pub alert_channel: Vec<AlertChannel>,
|
||||
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
|
||||
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
|
||||
pub additional_service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
impl KubePrometheusConfig {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
namespace: "monitoring".into(),
|
||||
namespace: None,
|
||||
default_rules: true,
|
||||
windows_monitoring: false,
|
||||
alert_manager: true,
|
||||
alert_channel: Vec::new(),
|
||||
grafana: true,
|
||||
node_exporter: false,
|
||||
prometheus: true,
|
||||
kubernetes_service_monitors: true,
|
||||
kubernetes_api_server: false,
|
||||
kubelet: false,
|
||||
kubelet: true,
|
||||
kube_controller_manager: false,
|
||||
kube_etcd: false,
|
||||
kube_proxy: false,
|
||||
@@ -44,6 +47,9 @@ impl KubePrometheusConfig {
|
||||
prometheus_operator: true,
|
||||
core_dns: false,
|
||||
kube_scheduler: false,
|
||||
alert_receiver_configs: vec![],
|
||||
alert_rules: vec![],
|
||||
additional_service_monitors: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,413 @@
|
||||
use super::config::KubePrometheusConfig;
|
||||
use log::debug;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
str::FromStr,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use crate::modules::{
|
||||
helm::chart::HelmChartScore,
|
||||
monitoring::kube_prometheus::types::{
|
||||
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
|
||||
AlertManagerRoute, AlertManagerSpec, AlertManagerValues, ConfigReloader, Limits,
|
||||
PrometheusConfig, Requests, Resources,
|
||||
},
|
||||
};
|
||||
|
||||
pub fn kube_prometheus_helm_chart_score(
|
||||
config: Arc<Mutex<KubePrometheusConfig>>,
|
||||
) -> HelmChartScore {
|
||||
let config = config.lock().unwrap();
|
||||
|
||||
let default_rules = config.default_rules.to_string();
|
||||
let windows_monitoring = config.windows_monitoring.to_string();
|
||||
let grafana = config.grafana.to_string();
|
||||
let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string();
|
||||
let kubernetes_api_server = config.kubernetes_api_server.to_string();
|
||||
let kubelet = config.kubelet.to_string();
|
||||
let kube_controller_manager = config.kube_controller_manager.to_string();
|
||||
let core_dns = config.core_dns.to_string();
|
||||
let kube_etcd = config.kube_etcd.to_string();
|
||||
let kube_scheduler = config.kube_scheduler.to_string();
|
||||
let kube_proxy = config.kube_proxy.to_string();
|
||||
let kube_state_metrics = config.kube_state_metrics.to_string();
|
||||
let node_exporter = config.node_exporter.to_string();
|
||||
let prometheus_operator = config.prometheus_operator.to_string();
|
||||
let prometheus = config.prometheus.to_string();
|
||||
let resource_limit = Resources {
|
||||
limits: Limits {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
requests: Requests {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
};
|
||||
|
||||
fn indent_lines(s: &str, spaces: usize) -> String {
|
||||
let pad = " ".repeat(spaces);
|
||||
s.lines()
|
||||
.map(|line| format!("{pad}{line}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
fn resource_block(resource: &Resources, indent_level: usize) -> String {
|
||||
let yaml = serde_yaml::to_string(resource).unwrap();
|
||||
format!(
|
||||
"{}resources:\n{}",
|
||||
" ".repeat(indent_level),
|
||||
indent_lines(&yaml, indent_level + 2)
|
||||
)
|
||||
}
|
||||
let resource_section = resource_block(&resource_limit, 2);
|
||||
|
||||
let mut values = format!(
|
||||
r#"
|
||||
prometheus:
|
||||
enabled: {prometheus}
|
||||
prometheusSpec:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 500Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 1000Mi
|
||||
defaultRules:
|
||||
create: {default_rules}
|
||||
rules:
|
||||
alertmanager: true
|
||||
etcd: true
|
||||
configReloaders: true
|
||||
general: true
|
||||
k8sContainerCpuUsageSecondsTotal: true
|
||||
k8sContainerMemoryCache: true
|
||||
k8sContainerMemoryRss: true
|
||||
k8sContainerMemorySwap: true
|
||||
k8sContainerResource: true
|
||||
k8sContainerMemoryWorkingSetBytes: true
|
||||
k8sPodOwner: true
|
||||
kubeApiserverAvailability: true
|
||||
kubeApiserverBurnrate: true
|
||||
kubeApiserverHistogram: true
|
||||
kubeApiserverSlos: true
|
||||
kubeControllerManager: true
|
||||
kubelet: true
|
||||
kubeProxy: true
|
||||
kubePrometheusGeneral: true
|
||||
kubePrometheusNodeRecording: true
|
||||
kubernetesApps: true
|
||||
kubernetesResources: true
|
||||
kubernetesStorage: true
|
||||
kubernetesSystem: true
|
||||
kubeSchedulerAlerting: true
|
||||
kubeSchedulerRecording: true
|
||||
kubeStateMetrics: true
|
||||
network: true
|
||||
node: true
|
||||
nodeExporterAlerting: true
|
||||
nodeExporterRecording: true
|
||||
prometheus: true
|
||||
prometheusOperator: true
|
||||
windows: true
|
||||
windowsMonitoring:
|
||||
enabled: {windows_monitoring}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
grafana:
|
||||
enabled: {grafana}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
initChownData:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
limits:
|
||||
cpu: 50m
|
||||
memory: 100Mi
|
||||
sidecar:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
limits:
|
||||
cpu: 50m
|
||||
memory: 100Mi
|
||||
kubernetesServiceMonitors:
|
||||
enabled: {kubernetes_service_monitors}
|
||||
kubeApiServer:
|
||||
enabled: {kubernetes_api_server}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
kubelet:
|
||||
enabled: {kubelet}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
kubeControllerManager:
|
||||
enabled: {kube_controller_manager}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
coreDns:
|
||||
enabled: {core_dns}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
kubeEtcd:
|
||||
enabled: {kube_etcd}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
kubeScheduler:
|
||||
enabled: {kube_scheduler}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
kubeProxy:
|
||||
enabled: {kube_proxy}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
kubeStateMetrics:
|
||||
enabled: {kube_state_metrics}
|
||||
kube-state-metrics:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
nodeExporter:
|
||||
enabled: {node_exporter}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
prometheus-node-exporter:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 250Mi
|
||||
prometheusOperator:
|
||||
enabled: {prometheus_operator}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 200Mi
|
||||
prometheusConfigReloader:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 200Mi
|
||||
admissionWebhooks:
|
||||
deployment:
|
||||
resources:
|
||||
limits:
|
||||
cpu: 10m
|
||||
memory: 100Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 100Mi
|
||||
patch:
|
||||
resources:
|
||||
limits:
|
||||
cpu: 10m
|
||||
memory: 100Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 100Mi
|
||||
"#,
|
||||
);
|
||||
|
||||
let prometheus_config =
|
||||
crate::modules::monitoring::kube_prometheus::types::PrometheusConfigValues {
|
||||
prometheus: PrometheusConfig {
|
||||
prometheus: bool::from_str(prometheus.as_str()).expect("couldn't parse bool"),
|
||||
additional_service_monitors: config.additional_service_monitors.clone(),
|
||||
},
|
||||
};
|
||||
let prometheus_config_yaml =
|
||||
serde_yaml::to_string(&prometheus_config).expect("Failed to serialize YAML");
|
||||
|
||||
debug!(
|
||||
"serialized prometheus config: \n {:#}",
|
||||
prometheus_config_yaml
|
||||
);
|
||||
values.push_str(&prometheus_config_yaml);
|
||||
|
||||
// add required null receiver for prometheus alert manager
|
||||
let mut null_receiver = Mapping::new();
|
||||
null_receiver.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String("null".to_string()),
|
||||
);
|
||||
null_receiver.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
null_receiver.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
|
||||
//add alert channels
|
||||
let mut alert_manager_channel_config = AlertManagerConfig {
|
||||
global: Mapping::new(),
|
||||
route: AlertManagerRoute {
|
||||
routes: vec![Value::Mapping(null_receiver)],
|
||||
},
|
||||
receivers: vec![serde_yaml::from_str("name: 'null'").unwrap()],
|
||||
};
|
||||
for receiver in config.alert_receiver_configs.iter() {
|
||||
if let Some(global) = receiver.channel_global_config.clone() {
|
||||
alert_manager_channel_config
|
||||
.global
|
||||
.insert(global.0, global.1);
|
||||
}
|
||||
alert_manager_channel_config
|
||||
.route
|
||||
.routes
|
||||
.push(receiver.channel_route.clone());
|
||||
alert_manager_channel_config
|
||||
.receivers
|
||||
.push(receiver.channel_receiver.clone());
|
||||
}
|
||||
|
||||
let alert_manager_values = AlertManagerValues {
|
||||
alertmanager: AlertManager {
|
||||
enabled: config.alert_manager,
|
||||
config: alert_manager_channel_config,
|
||||
alertmanager_spec: AlertManagerSpec {
|
||||
resources: Resources {
|
||||
limits: Limits {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
requests: Requests {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
},
|
||||
},
|
||||
init_config_reloader: ConfigReloader {
|
||||
resources: Resources {
|
||||
limits: Limits {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
requests: Requests {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
let alert_manager_yaml =
|
||||
serde_yaml::to_string(&alert_manager_values).expect("Failed to serialize YAML");
|
||||
debug!("serialized alert manager: \n {:#}", alert_manager_yaml);
|
||||
values.push_str(&alert_manager_yaml);
|
||||
|
||||
//format alert manager additional rules for helm chart
|
||||
let mut merged_rules: BTreeMap<String, AlertGroup> = BTreeMap::new();
|
||||
|
||||
for additional_rule in config.alert_rules.clone() {
|
||||
for (key, group) in additional_rule.rules {
|
||||
merged_rules.insert(key, group);
|
||||
}
|
||||
}
|
||||
|
||||
let merged_rules = AlertManagerAdditionalPromRules {
|
||||
rules: merged_rules,
|
||||
};
|
||||
|
||||
let mut alert_manager_additional_rules = serde_yaml::Mapping::new();
|
||||
let rules_value = serde_yaml::to_value(merged_rules).unwrap();
|
||||
|
||||
alert_manager_additional_rules.insert(
|
||||
serde_yaml::Value::String("additionalPrometheusRulesMap".to_string()),
|
||||
rules_value,
|
||||
);
|
||||
|
||||
let alert_manager_additional_rules_yaml =
|
||||
serde_yaml::to_string(&alert_manager_additional_rules).expect("Failed to serialize YAML");
|
||||
debug!(
|
||||
"alert_rules_yaml:\n{:#}",
|
||||
alert_manager_additional_rules_yaml
|
||||
);
|
||||
|
||||
values.push_str(&alert_manager_additional_rules_yaml);
|
||||
debug!("full values.yaml: \n {:#}", values);
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()),
|
||||
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
||||
chart_name: NonBlankString::from_str(
|
||||
"oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack",
|
||||
)
|
||||
.unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml: Some(values.to_string()),
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: None,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
pub mod config;
|
||||
pub mod kube_prometheus_helm_chart;
|
||||
@@ -0,0 +1,39 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
|
||||
use crate::{
|
||||
modules::monitoring::kube_prometheus::types::ServiceMonitor,
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret},
|
||||
tenant::TenantManager,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct HelmPrometheusAlertingScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand + TenantManager> Score<T> for HelmPrometheusAlertingScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
|
||||
config
|
||||
.try_lock()
|
||||
.expect("couldn't lock config")
|
||||
.additional_service_monitors = self.service_monitors.clone();
|
||||
Box::new(AlertingInterpret {
|
||||
sender: Prometheus::new(),
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
})
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"HelmPrometheusAlertingScore".to_string()
|
||||
}
|
||||
}
|
||||
4
harmony/src/modules/monitoring/kube_prometheus/mod.rs
Normal file
4
harmony/src/modules/monitoring/kube_prometheus/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod helm;
|
||||
pub mod helm_prometheus_alert_score;
|
||||
pub mod prometheus;
|
||||
pub mod types;
|
||||
162
harmony/src/modules/monitoring/kube_prometheus/prometheus.rs
Normal file
162
harmony/src/modules/monitoring/kube_prometheus/prometheus.rs
Normal file
@@ -0,0 +1,162 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::{debug, error};
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
score,
|
||||
topology::{
|
||||
HelmCommand, K8sAnywhereTopology, Topology,
|
||||
installable::Installable,
|
||||
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
|
||||
tenant::TenantManager,
|
||||
},
|
||||
};
|
||||
|
||||
use score::Score;
|
||||
|
||||
use super::{
|
||||
helm::{
|
||||
config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
|
||||
},
|
||||
types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl AlertSender for Prometheus {
|
||||
fn name(&self) -> String {
|
||||
"HelmKubePrometheus".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
|
||||
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
|
||||
self.configure_with_topology(topology).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ensure_installed(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<(), InterpretError> {
|
||||
self.install_prometheus(inventory, topology).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Prometheus {
|
||||
pub config: Arc<Mutex<KubePrometheusConfig>>,
|
||||
}
|
||||
|
||||
impl Prometheus {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
|
||||
let ns = topology
|
||||
.get_tenant_config()
|
||||
.await
|
||||
.map(|cfg| cfg.name.clone())
|
||||
.unwrap_or_else(|| "monitoring".to_string());
|
||||
error!("This must be refactored, see comments in pr #74");
|
||||
debug!("NS: {}", ns);
|
||||
self.config.lock().unwrap().namespace = Some(ns);
|
||||
}
|
||||
|
||||
pub async fn install_receiver(
|
||||
&self,
|
||||
prometheus_receiver: &dyn PrometheusReceiver,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let prom_receiver = prometheus_receiver.configure_receiver().await;
|
||||
debug!(
|
||||
"adding alert receiver to prometheus config: {:#?}",
|
||||
&prom_receiver
|
||||
);
|
||||
let mut config = self.config.lock().unwrap();
|
||||
|
||||
config.alert_receiver_configs.push(prom_receiver);
|
||||
let prom_receiver_name = prometheus_receiver.name();
|
||||
debug!("installed alert receiver {}", &prom_receiver_name);
|
||||
Ok(Outcome::success(format!(
|
||||
"Sucessfully installed receiver {}",
|
||||
prom_receiver_name
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn install_rule(
|
||||
&self,
|
||||
prometheus_rule: &AlertManagerRuleGroup,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let prometheus_rule = prometheus_rule.configure_rule().await;
|
||||
let mut config = self.config.lock().unwrap();
|
||||
|
||||
config.alert_rules.push(prometheus_rule.clone());
|
||||
Ok(Outcome::success(format!(
|
||||
"Successfully installed alert rule: {:#?},",
|
||||
prometheus_rule
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
kube_prometheus_helm_chart_score(self.config.clone())
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait PrometheusRule: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertRule<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertRule<Prometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
286
harmony/src/modules/monitoring/kube_prometheus/types.rs
Normal file
286
harmony/src/modules/monitoring/kube_prometheus/types.rs
Normal file
@@ -0,0 +1,286 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use serde_yaml::{Mapping, Sequence, Value};
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup;
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertChannelConfig {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerValues {
|
||||
pub alertmanager: AlertManager,
|
||||
}
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertManager {
|
||||
pub enabled: bool,
|
||||
pub config: AlertManagerConfig,
|
||||
pub alertmanager_spec: AlertManagerSpec,
|
||||
pub init_config_reloader: ConfigReloader,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ConfigReloader {
|
||||
pub resources: Resources,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerConfig {
|
||||
pub global: Mapping,
|
||||
pub route: AlertManagerRoute,
|
||||
pub receivers: Sequence,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerRoute {
|
||||
pub routes: Sequence,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerChannelConfig {
|
||||
///expecting an option that contains two values
|
||||
///if necessary for the alertchannel
|
||||
///[ jira_api_url: <string> ]
|
||||
pub channel_global_config: Option<(Value, Value)>,
|
||||
pub channel_route: Value,
|
||||
pub channel_receiver: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertManagerSpec {
|
||||
pub(crate) resources: Resources,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Resources {
|
||||
pub limits: Limits,
|
||||
pub requests: Requests,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Limits {
|
||||
pub memory: String,
|
||||
pub cpu: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Requests {
|
||||
pub memory: String,
|
||||
pub cpu: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerAdditionalPromRules {
|
||||
#[serde(flatten)]
|
||||
pub rules: BTreeMap<String, AlertGroup>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertGroup {
|
||||
pub groups: Vec<AlertManagerRuleGroup>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub enum HTTPScheme {
|
||||
#[serde(rename = "http")]
|
||||
HTTP,
|
||||
#[serde(rename = "https")]
|
||||
HTTPS,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub enum Operator {
|
||||
In,
|
||||
NotIn,
|
||||
Exists,
|
||||
DoesNotExist,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusConfigValues {
|
||||
pub prometheus: PrometheusConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusConfig {
|
||||
pub prometheus: bool,
|
||||
pub additional_service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitorTLSConfig {
|
||||
// ## Path to the CA file
|
||||
// ##
|
||||
pub ca_file: Option<String>,
|
||||
|
||||
// ## Path to client certificate file
|
||||
// ##
|
||||
pub cert_file: Option<String>,
|
||||
|
||||
// ## Skip certificate verification
|
||||
// ##
|
||||
pub insecure_skip_verify: Option<bool>,
|
||||
|
||||
// ## Path to client key file
|
||||
// ##
|
||||
pub key_file: Option<String>,
|
||||
|
||||
// ## Server name used to verify host name
|
||||
// ##
|
||||
pub server_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitorEndpoint {
|
||||
// ## Name of the endpoint's service port
|
||||
// ## Mutually exclusive with targetPort
|
||||
pub port: Option<String>,
|
||||
|
||||
// ## Name or number of the endpoint's target port
|
||||
// ## Mutually exclusive with port
|
||||
pub target_port: Option<String>,
|
||||
|
||||
// ## File containing bearer token to be used when scraping targets
|
||||
// ##
|
||||
pub bearer_token_file: Option<String>,
|
||||
|
||||
// ## Interval at which metrics should be scraped
|
||||
// ##
|
||||
pub interval: Option<String>,
|
||||
|
||||
// ## HTTP path to scrape for metrics
|
||||
// ##
|
||||
pub path: String,
|
||||
|
||||
// ## HTTP scheme to use for scraping
|
||||
// ##
|
||||
pub scheme: HTTPScheme,
|
||||
|
||||
// ## TLS configuration to use when scraping the endpoint
|
||||
// ##
|
||||
pub tls_config: Option<ServiceMonitorTLSConfig>,
|
||||
|
||||
// ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
|
||||
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
|
||||
// ##
|
||||
// # - action: keep
|
||||
// # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
// # sourceLabels: [__name__]
|
||||
pub metric_relabelings: Vec<Mapping>,
|
||||
|
||||
// ## RelabelConfigs to apply to samples before scraping
|
||||
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
|
||||
// ##
|
||||
// # - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
// # separator: ;
|
||||
// # regex: ^(.*)$
|
||||
// # targetLabel: nodename
|
||||
// # replacement: $1
|
||||
// # action: replace
|
||||
pub relabelings: Vec<Mapping>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MatchExpression {
|
||||
pub key: String,
|
||||
pub operator: Operator,
|
||||
pub values: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Selector {
|
||||
// # label selector for services
|
||||
pub match_labels: HashMap<String, String>,
|
||||
pub match_expressions: Vec<MatchExpression>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitor {
|
||||
pub name: String,
|
||||
|
||||
// # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
|
||||
pub additional_labels: Option<Mapping>,
|
||||
|
||||
// # Service label for use in assembling a job name of the form <label value>-<port>
|
||||
// # If no label is specified, the service name is used.
|
||||
pub job_label: Option<String>,
|
||||
|
||||
// # labels to transfer from the kubernetes service to the target
|
||||
pub target_labels: Vec<String>,
|
||||
|
||||
// # labels to transfer from the kubernetes pods to the target
|
||||
pub pod_target_labels: Vec<String>,
|
||||
|
||||
// # Label selector for services to which this ServiceMonitor applies
|
||||
// # Example which selects all services to be monitored
|
||||
// # with label "monitoredby" with values any of "example-service-1" or "example-service-2"
|
||||
// matchExpressions:
|
||||
// - key: "monitoredby"
|
||||
// operator: In
|
||||
// values:
|
||||
// - example-service-1
|
||||
// - example-service-2
|
||||
pub selector: Selector,
|
||||
|
||||
// # Namespaces from which services are selected
|
||||
// # Match any namespace
|
||||
// any: bool,
|
||||
// # Explicit list of namespace names to select
|
||||
// matchNames: Vec,
|
||||
pub namespace_selector: Option<Mapping>,
|
||||
|
||||
// # Endpoints of the selected service to be monitored
|
||||
pub endpoints: Vec<ServiceMonitorEndpoint>,
|
||||
|
||||
// # Fallback scrape protocol used by Prometheus for scraping metrics
|
||||
// # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.ScrapeProtocol
|
||||
pub fallback_scrape_protocol: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for ServiceMonitor {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: Default::default(),
|
||||
additional_labels: Default::default(),
|
||||
job_label: Default::default(),
|
||||
target_labels: Default::default(),
|
||||
pod_target_labels: Default::default(),
|
||||
selector: Selector {
|
||||
match_labels: HashMap::new(),
|
||||
match_expressions: vec![],
|
||||
},
|
||||
namespace_selector: Default::default(),
|
||||
endpoints: Default::default(),
|
||||
fallback_scrape_protocol: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ServiceMonitorEndpoint {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
port: Some("80".to_string()),
|
||||
target_port: Default::default(),
|
||||
bearer_token_file: Default::default(),
|
||||
interval: Default::default(),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
tls_config: Default::default(),
|
||||
metric_relabelings: Default::default(),
|
||||
relabelings: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
mod config;
|
||||
mod discord_alert_manager;
|
||||
pub mod discord_webhook_sender;
|
||||
mod kube_prometheus;
|
||||
pub mod monitoring_alerting;
|
||||
pub mod alert_channel;
|
||||
pub mod alert_rule;
|
||||
pub mod kube_prometheus;
|
||||
pub mod ntfy;
|
||||
|
||||
@@ -1,161 +0,0 @@
|
||||
use async_trait::async_trait;
|
||||
use email_address::EmailAddress;
|
||||
|
||||
use log::info;
|
||||
use serde::Serialize;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{HelmCommand, Topology},
|
||||
};
|
||||
|
||||
use super::{
|
||||
config::KubePrometheusConfig, discord_alert_manager::discord_alert_manager_score,
|
||||
kube_prometheus::kube_prometheus_helm_chart_score,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub enum AlertChannel {
|
||||
Discord {
|
||||
name: String,
|
||||
webhook_url: Url,
|
||||
},
|
||||
Slack {
|
||||
slack_channel: String,
|
||||
webhook_url: Url,
|
||||
},
|
||||
//TODO test and implement in helm chart
|
||||
//currently does not work
|
||||
Smpt {
|
||||
email_address: EmailAddress,
|
||||
service_name: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct MonitoringAlertingStackScore {
|
||||
pub alert_channel: Vec<AlertChannel>,
|
||||
pub namespace: Option<String>,
|
||||
}
|
||||
|
||||
impl MonitoringAlertingStackScore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
alert_channel: Vec::new(),
|
||||
namespace: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(MonitoringAlertingStackInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
format!("MonitoringAlertingStackScore")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
struct MonitoringAlertingStackInterpret {
|
||||
score: MonitoringAlertingStackScore,
|
||||
}
|
||||
|
||||
impl MonitoringAlertingStackInterpret {
|
||||
async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig {
|
||||
let mut config = KubePrometheusConfig::new();
|
||||
if let Some(ns) = &self.score.namespace {
|
||||
config.namespace = ns.clone();
|
||||
}
|
||||
config.alert_channel = self.score.alert_channel.clone();
|
||||
config
|
||||
}
|
||||
|
||||
async fn deploy_kube_prometheus_helm_chart_score<T: Topology + HelmCommand>(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
config: &KubePrometheusConfig,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let helm_chart = kube_prometheus_helm_chart_score(config);
|
||||
helm_chart
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn deploy_alert_channel_service<T: Topology + HelmCommand>(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
config: &KubePrometheusConfig,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
//let mut outcomes = vec![];
|
||||
|
||||
//for channel in &self.score.alert_channel {
|
||||
// let outcome = match channel {
|
||||
// AlertChannel::Discord { .. } => {
|
||||
// discord_alert_manager_score(config)
|
||||
// .create_interpret()
|
||||
// .execute(inventory, topology)
|
||||
// .await
|
||||
// }
|
||||
// AlertChannel::Slack { .. } => Ok(Outcome::success(
|
||||
// "No extra configs for slack alerting".to_string(),
|
||||
// )),
|
||||
// AlertChannel::Smpt { .. } => {
|
||||
// todo!()
|
||||
// }
|
||||
// };
|
||||
// outcomes.push(outcome);
|
||||
//}
|
||||
//for result in outcomes {
|
||||
// result?;
|
||||
//}
|
||||
|
||||
Ok(Outcome::success("All alert channels deployed".to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand> Interpret<T> for MonitoringAlertingStackInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let config = self.build_kube_prometheus_helm_chart_config().await;
|
||||
info!("Built kube prometheus config");
|
||||
info!("Installing kube prometheus chart");
|
||||
self.deploy_kube_prometheus_helm_chart_score(inventory, topology, &config)
|
||||
.await?;
|
||||
info!("Installing alert channel service");
|
||||
self.deploy_alert_channel_service(inventory, topology, &config)
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"succesfully deployed monitoring and alerting stack"
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
1
harmony/src/modules/monitoring/ntfy/helm/mod.rs
Normal file
1
harmony/src/modules/monitoring/ntfy/helm/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod ntfy_helm_chart;
|
||||
83
harmony/src/modules/monitoring/ntfy/helm/ntfy_helm_chart.rs
Normal file
83
harmony/src/modules/monitoring/ntfy/helm/ntfy_helm_chart.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::modules::helm::chart::{HelmChartScore, HelmRepository};
|
||||
|
||||
pub fn ntfy_helm_chart_score(namespace: String) -> HelmChartScore {
|
||||
let values = format!(
|
||||
r#"
|
||||
replicaCount: 1
|
||||
|
||||
image:
|
||||
repository: binwiederhier/ntfy
|
||||
pullPolicy: IfNotPresent
|
||||
# Overrides the image tag whose default is the chart appVersion.
|
||||
tag: "v2.12.0"
|
||||
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created
|
||||
create: true
|
||||
# Annotations to add to the service account
|
||||
# annotations:
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
# name: ""
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
|
||||
ingress:
|
||||
enabled: false
|
||||
# annotations:
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
hosts:
|
||||
- host: ntfy.host.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
|
||||
autoscaling:
|
||||
enabled: false
|
||||
|
||||
config:
|
||||
enabled: true
|
||||
data:
|
||||
# base-url: "https://ntfy.something.com"
|
||||
auth-file: "/var/cache/ntfy/user.db"
|
||||
auth-default-access: "deny-all"
|
||||
cache-file: "/var/cache/ntfy/cache.db"
|
||||
attachment-cache-dir: "/var/cache/ntfy/attachments"
|
||||
behind-proxy: true
|
||||
# web-root: "disable"
|
||||
enable-signup: false
|
||||
enable-login: "true"
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 200Mi
|
||||
"#,
|
||||
);
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
|
||||
release_name: NonBlankString::from_str("ntfy").unwrap(),
|
||||
chart_name: NonBlankString::from_str("sarab97/ntfy").unwrap(),
|
||||
chart_version: Some(NonBlankString::from_str("0.1.7").unwrap()),
|
||||
values_overrides: None,
|
||||
values_yaml: Some(values.to_string()),
|
||||
create_namespace: true,
|
||||
install_only: false,
|
||||
repository: Some(HelmRepository::new(
|
||||
"sarab97".to_string(),
|
||||
url::Url::parse("https://charts.sarabsingh.com").unwrap(),
|
||||
true,
|
||||
)),
|
||||
}
|
||||
}
|
||||
2
harmony/src/modules/monitoring/ntfy/mod.rs
Normal file
2
harmony/src/modules/monitoring/ntfy/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod helm;
|
||||
pub mod ntfy;
|
||||
169
harmony/src/modules/monitoring/ntfy/ntfy.rs
Normal file
169
harmony/src/modules/monitoring/ntfy/ntfy.rs
Normal file
@@ -0,0 +1,169 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
use strum::{Display, EnumString};
|
||||
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::ntfy::helm::ntfy_helm_chart::ntfy_helm_chart_score,
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology, k8s::K8sClient},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct NtfyScore {
|
||||
pub namespace: String,
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand + K8sclient> Score<T> for NtfyScore {
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(NtfyInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
format!("Ntfy")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct NtfyInterpret {
|
||||
pub score: NtfyScore,
|
||||
}
|
||||
|
||||
#[derive(Debug, EnumString, Display)]
|
||||
enum NtfyAccessMode {
|
||||
#[strum(serialize = "read-write", serialize = "rw", to_string = "read-write")]
|
||||
ReadWrite,
|
||||
#[strum(
|
||||
serialize = "read-only",
|
||||
serialize = "ro",
|
||||
serialize = "read",
|
||||
to_string = "read-only"
|
||||
)]
|
||||
ReadOnly,
|
||||
#[strum(
|
||||
serialize = "write-only",
|
||||
serialize = "wo",
|
||||
serialize = "write",
|
||||
to_string = "write-only"
|
||||
)]
|
||||
WriteOnly,
|
||||
#[strum(serialize = "none", to_string = "deny")]
|
||||
Deny,
|
||||
}
|
||||
|
||||
#[derive(Debug, EnumString, Display)]
|
||||
enum NtfyRole {
|
||||
#[strum(serialize = "user", to_string = "user")]
|
||||
User,
|
||||
#[strum(serialize = "admin", to_string = "admin")]
|
||||
Admin,
|
||||
}
|
||||
|
||||
impl NtfyInterpret {
|
||||
async fn add_user(
|
||||
&self,
|
||||
k8s_client: Arc<K8sClient>,
|
||||
username: &str,
|
||||
password: &str,
|
||||
role: Option<NtfyRole>,
|
||||
) -> Result<(), String> {
|
||||
let role = match role {
|
||||
Some(r) => r,
|
||||
None => NtfyRole::User,
|
||||
};
|
||||
|
||||
k8s_client
|
||||
.exec_app(
|
||||
"ntfy".to_string(),
|
||||
Some(&self.score.namespace),
|
||||
vec![
|
||||
"sh",
|
||||
"-c",
|
||||
format!("NTFY_PASSWORD={password} ntfy user add --role={role} {username}")
|
||||
.as_str(),
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn set_access(
|
||||
&self,
|
||||
k8s_client: Arc<K8sClient>,
|
||||
username: &str,
|
||||
topic: &str,
|
||||
mode: NtfyAccessMode,
|
||||
) -> Result<(), String> {
|
||||
k8s_client
|
||||
.exec_app(
|
||||
"ntfy".to_string(),
|
||||
Some(&self.score.namespace),
|
||||
vec![
|
||||
"sh",
|
||||
"-c",
|
||||
format!("ntfy access {username} {topic} {mode}").as_str(),
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// We need a ntfy interpret to wrap the HelmChartScore in order to run the score, and then bootstrap the config inside ntfy
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand + K8sclient> Interpret<T> for NtfyInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
ntfy_helm_chart_score(self.score.namespace.clone())
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await?;
|
||||
|
||||
debug!("installed ntfy helm chart");
|
||||
let client = topology
|
||||
.k8s_client()
|
||||
.await
|
||||
.expect("couldn't get k8s client");
|
||||
|
||||
client
|
||||
.wait_until_deployment_ready(
|
||||
"ntfy".to_string(),
|
||||
Some(&self.score.namespace.as_str()),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
debug!("created k8s client");
|
||||
|
||||
self.add_user(client, "harmony", "harmony", Some(NtfyRole::Admin))
|
||||
.await?;
|
||||
|
||||
debug!("exec into pod done");
|
||||
|
||||
Ok(Outcome::success("installed ntfy".to_string()))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
119
harmony/src/modules/postgres.rs
Normal file
119
harmony/src/modules/postgres.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
use async_trait::async_trait;
|
||||
use derive_new::new;
|
||||
use log::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
data::{PostgresDatabase, PostgresUser, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{PostgresServer, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, new, Clone, Serialize, Deserialize)]
|
||||
pub struct PostgresScore {
|
||||
users: Vec<PostgresUser>,
|
||||
databases: Vec<PostgresDatabase>,
|
||||
}
|
||||
|
||||
impl<T: Topology + PostgresServer> Score<T> for PostgresScore {
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(PostgresInterpret::new(self.clone()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"PostgresScore".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PostgresInterpret {
|
||||
score: PostgresScore,
|
||||
version: Version,
|
||||
status: InterpretStatus,
|
||||
}
|
||||
|
||||
impl PostgresInterpret {
|
||||
pub fn new(score: PostgresScore) -> Self {
|
||||
let version = Version::from("1.0.0").expect("Version should be valid");
|
||||
|
||||
Self {
|
||||
version,
|
||||
score,
|
||||
status: InterpretStatus::QUEUED,
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_users_exist<P: PostgresServer>(
|
||||
&self,
|
||||
postgres_server: &P,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let users = &self.score.users;
|
||||
postgres_server.ensure_users_exist(users.clone()).await?;
|
||||
|
||||
Ok(Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!(
|
||||
"PostgresInterpret ensured {} users exist successfully",
|
||||
users.len()
|
||||
),
|
||||
))
|
||||
}
|
||||
|
||||
async fn ensure_databases_exist<P: PostgresServer>(
|
||||
&self,
|
||||
postgres_server: &P,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let databases = &self.score.databases;
|
||||
postgres_server
|
||||
.ensure_databases_exist(databases.clone())
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!(
|
||||
"PostgresInterpret ensured {} databases exist successfully",
|
||||
databases.len()
|
||||
),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + PostgresServer> Interpret<T> for PostgresInterpret {
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Postgres
|
||||
}
|
||||
|
||||
fn get_version(&self) -> crate::domain::data::Version {
|
||||
self.version.clone()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
self.status.clone()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<crate::domain::data::Id> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!(
|
||||
"Executing {} on inventory {inventory:?})",
|
||||
<PostgresInterpret as Interpret<T>>::get_name(self)
|
||||
);
|
||||
|
||||
self.ensure_users_exist(topology).await?;
|
||||
self.ensure_databases_exist(topology).await?;
|
||||
|
||||
Ok(Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
"Postgres Interpret execution successful".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
40
harmony/src/modules/prometheus/alerts/infra/dell_server.rs
Normal file
40
harmony/src/modules/prometheus/alerts/infra/dell_server.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn global_storage_status_degraded_non_critical() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule::new("GlobalStorageStatusNonCritical", "globalStorageStatus == 4")
|
||||
.for_duration("5m")
|
||||
.label("severity", "warning")
|
||||
.annotation(
|
||||
"description",
|
||||
"- **system**: {{ $labels.instance }}\n- **Status**: nonCritical\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}",
|
||||
)
|
||||
.annotation("title", " System storage status is in degraded state")
|
||||
}
|
||||
|
||||
pub fn alert_global_storage_status_critical() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule::new(
|
||||
"GlobalStorageStatus critical",
|
||||
"globalStorageStatus == 5",
|
||||
)
|
||||
.for_duration("5m")
|
||||
.label("severity", "warning")
|
||||
.annotation("title", "System storage status is critical at {{ $labels.instance }}")
|
||||
.annotation(
|
||||
"description",
|
||||
"- **System**: {{ $labels.instance }}\n- **Status**: Critical\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}",
|
||||
)
|
||||
}
|
||||
|
||||
pub fn alert_global_storage_status_non_recoverable() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule::new(
|
||||
"GlobalStorageStatus nonRecoverable",
|
||||
"globalStorageStatus == 6",
|
||||
)
|
||||
.for_duration("5m")
|
||||
.label("severity", "warning")
|
||||
.annotation("title", "System storage status is nonRecoverable at {{ $labels.instance }}")
|
||||
.annotation(
|
||||
"description",
|
||||
"- **System**: {{ $labels.instance }}\n- **Status**: nonRecoverable\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}",
|
||||
)
|
||||
}
|
||||
1
harmony/src/modules/prometheus/alerts/infra/mod.rs
Normal file
1
harmony/src/modules/prometheus/alerts/infra/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod dell_server;
|
||||
1
harmony/src/modules/prometheus/alerts/k8s/mod.rs
Normal file
1
harmony/src/modules/prometheus/alerts/k8s/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod pvc;
|
||||
11
harmony/src/modules/prometheus/alerts/k8s/pvc.rs
Normal file
11
harmony/src/modules/prometheus/alerts/k8s/pvc.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn high_pvc_fill_rate_over_two_days() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule::new(
|
||||
"PVC Fill Over 95 Percent In 2 Days",
|
||||
"(kubelet_volume_stats_used_bytes/kubelet_volume_stats_capacity_bytes) > 0.95 AND predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60)/kubelet_volume_stats_capacity_bytes > 0.95",)
|
||||
.for_duration("1m")
|
||||
.label("severity", "warning")
|
||||
.annotation("summary", "The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days.")
|
||||
.annotation("description", "PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days",)
|
||||
}
|
||||
2
harmony/src/modules/prometheus/alerts/mod.rs
Normal file
2
harmony/src/modules/prometheus/alerts/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod infra;
|
||||
pub mod k8s;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user