Compare commits
45 Commits
adr-nats-c
...
fix/monito
| Author | SHA1 | Date | |
|---|---|---|---|
| c4dd0b0cf2 | |||
| b14b41d172 | |||
| 5e861cfc6d | |||
| 4fad077eb4 | |||
| d80561e326 | |||
| 621aed4903 | |||
| e68426cc3d | |||
| 0c1c8daf13 | |||
| 4b5e3a52a1 | |||
| c54936d19f | |||
| 699822af74 | |||
| 554c94f5a9 | |||
| 836db9e6b1 | |||
| bc6a41d40c | |||
| 8d446ec2e4 | |||
| ff7d2fb89e | |||
| 9bb38b930a | |||
| c677487a5e | |||
| c1d46612ac | |||
| 4fba01338d | |||
| 913ed17453 | |||
| 9e185cbbd5 | |||
| 752526f831 | |||
| f9bd6ad260 | |||
| 111181c300 | |||
| 3257cd9569 | |||
| 4b1915c594 | |||
| cf3050ce87 | |||
| c3e27c60be | |||
| 2d26790c82 | |||
| 2e89308b82 | |||
| d8936a8307 | |||
| e2fa12508f | |||
| bea2a75882 | |||
| a1528665d0 | |||
| 613225a00b | |||
| dd1c088f0d | |||
| b4ef009804 | |||
| 191e92048b | |||
| f4a70d8978 | |||
| 2ddc9c0579 | |||
| fececc2efd | |||
| 8afcacbd24 | |||
| b885c35706 | |||
|
|
bb6b4b7f88 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -26,3 +26,6 @@ Cargo.lock
|
||||
*.pdb
|
||||
|
||||
.harmony_generated
|
||||
|
||||
# Useful to create ignore folders for temp files and notes
|
||||
ignore
|
||||
|
||||
54
Cargo.lock
generated
54
Cargo.lock
generated
@@ -1828,6 +1828,40 @@ dependencies = [
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-k8s-drain-node"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert_cmd",
|
||||
"cidr",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_types",
|
||||
"inquire 0.7.5",
|
||||
"log",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-k8s-write-file-on-node"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert_cmd",
|
||||
"cidr",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_types",
|
||||
"inquire 0.7.5",
|
||||
"log",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-kube-rs"
|
||||
version = "0.1.0"
|
||||
@@ -3638,26 +3672,6 @@ dependencies = [
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json-prompt"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"brocade",
|
||||
"cidr",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_secret",
|
||||
"harmony_secret_derive",
|
||||
"harmony_types",
|
||||
"log",
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath-rust"
|
||||
version = "0.7.5"
|
||||
|
||||
@@ -52,6 +52,7 @@ kube = { version = "1.1.0", features = [
|
||||
"jsonpatch",
|
||||
] }
|
||||
k8s-openapi = { version = "0.25", features = ["v1_30"] }
|
||||
# TODO replace with https://github.com/bourumir-wyngs/serde-saphyr as serde_yaml is deprecated https://github.com/sebastienrousseau/serde_yml
|
||||
serde_yaml = "0.9"
|
||||
serde-value = "0.7"
|
||||
http = "1.2"
|
||||
|
||||
87
README.md
87
README.md
@@ -1,4 +1,6 @@
|
||||
# Harmony : Open-source infrastructure orchestration that treats your platform like first-class code
|
||||
# Harmony
|
||||
|
||||
Open-source infrastructure orchestration that treats your platform like first-class code.
|
||||
|
||||
In other words, Harmony is a **next-generation platform engineering framework**.
|
||||
|
||||
@@ -20,9 +22,7 @@ All in **one strongly-typed Rust codebase**.
|
||||
|
||||
From a **developer laptop** to a **global production cluster**, a single **source of truth** drives the **full software lifecycle.**
|
||||
|
||||
---
|
||||
|
||||
## 1 · The Harmony Philosophy
|
||||
## The Harmony Philosophy
|
||||
|
||||
Infrastructure is essential, but it shouldn’t be your core business. Harmony is built on three guiding principles that make modern platforms reliable, repeatable, and easy to reason about.
|
||||
|
||||
@@ -34,9 +34,18 @@ Infrastructure is essential, but it shouldn’t be your core business. Harmony i
|
||||
|
||||
These principles surface as simple, ergonomic Rust APIs that let teams focus on their product while trusting the platform underneath.
|
||||
|
||||
---
|
||||
## Where to Start
|
||||
|
||||
## 2 · Quick Start
|
||||
We have a comprehensive set of documentation right here in the repository.
|
||||
|
||||
| I want to... | Start Here |
|
||||
| ----------------- | ------------------------------------------------------------------ |
|
||||
| Get Started | [Getting Started Guide](./docs/guides/getting-started.md) |
|
||||
| See an Example | [Use Case: Deploy a Rust Web App](./docs/use-cases/rust-webapp.md) |
|
||||
| Explore | [Documentation Hub](./docs/README.md) |
|
||||
| See Core Concepts | [Core Concepts Explained](./docs/concepts.md) |
|
||||
|
||||
## Quick Look: Deploy a Rust Webapp
|
||||
|
||||
The snippet below spins up a complete **production-grade Rust + Leptos Webapp** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
|
||||
|
||||
@@ -94,63 +103,33 @@ async fn main() {
|
||||
}
|
||||
```
|
||||
|
||||
Run it:
|
||||
To run this:
|
||||
|
||||
```bash
|
||||
cargo run
|
||||
```
|
||||
- Clone the repository: `git clone https://git.nationtech.io/nationtech/harmony`
|
||||
- Install dependencies: `cargo build --release`
|
||||
- Run the example: `cargo run --example try_rust_webapp`
|
||||
|
||||
Harmony analyses the code, shows an execution plan in a TUI, and applies it once you confirm. Same code, same binary—every environment.
|
||||
## Documentation
|
||||
|
||||
---
|
||||
All documentation is in the `/docs` directory.
|
||||
|
||||
## 3 · Core Concepts
|
||||
- [Documentation Hub](./docs/README.md): The main entry point for all documentation.
|
||||
- [Core Concepts](./docs/concepts.md): A detailed look at Score, Topology, Capability, Inventory, and Interpret.
|
||||
- [Component Catalogs](./docs/catalogs/README.md): Discover all available Scores, Topologies, and Capabilities.
|
||||
- [Developer Guide](./docs/guides/developer-guide.md): Learn how to write your own Scores and Topologies.
|
||||
|
||||
| Term | One-liner |
|
||||
| ---------------- | ---------------------------------------------------------------------------------------------------- |
|
||||
| **Score<T>** | Declarative description of the desired state (e.g., `LAMPScore`). |
|
||||
| **Interpret<T>** | Imperative logic that realises a `Score` on a specific environment. |
|
||||
| **Topology** | An environment (local k3d, AWS, bare-metal) exposing verified _Capabilities_ (Kubernetes, DNS, …). |
|
||||
| **Maestro** | Orchestrator that compiles Scores + Topology, ensuring all capabilities line up **at compile-time**. |
|
||||
| **Inventory** | Optional catalogue of physical assets for bare-metal and edge deployments. |
|
||||
## Architectural Decision Records
|
||||
|
||||
A visual overview is in the diagram below.
|
||||
- [ADR-001 · Why Rust](adr/001-rust.md)
|
||||
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
|
||||
- [ADR-006 · Secret Management](adr/006-secret-management.md)
|
||||
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
|
||||
|
||||
[Harmony Core Architecture](docs/diagrams/Harmony_Core_Architecture.drawio.svg)
|
||||
## Contribute
|
||||
|
||||
---
|
||||
Discussions and roadmap live in [Issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
|
||||
|
||||
## 4 · Install
|
||||
|
||||
Prerequisites:
|
||||
|
||||
- Rust
|
||||
- Docker (if you deploy locally)
|
||||
- `kubectl` / `helm` for Kubernetes-based topologies
|
||||
|
||||
```bash
|
||||
git clone https://git.nationtech.io/nationtech/harmony
|
||||
cd harmony
|
||||
cargo build --release # builds the CLI, TUI and libraries
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5 · Learning More
|
||||
|
||||
- **Architectural Decision Records** – dive into the rationale
|
||||
- [ADR-001 · Why Rust](adr/001-rust.md)
|
||||
- [ADR-003 · Infrastructure Abstractions](adr/003-infrastructure-abstractions.md)
|
||||
- [ADR-006 · Secret Management](adr/006-secret-management.md)
|
||||
- [ADR-011 · Multi-Tenant Cluster](adr/011-multi-tenant-cluster.md)
|
||||
|
||||
- **Extending Harmony** – write new Scores / Interprets, add hardware like OPNsense firewalls, or embed Harmony in your own tooling (`/docs`).
|
||||
|
||||
- **Community** – discussions and roadmap live in [GitLab issues](https://git.nationtech.io/nationtech/harmony/-/issues). PRs, ideas, and feedback are welcome!
|
||||
|
||||
---
|
||||
|
||||
## 6 · License
|
||||
## License
|
||||
|
||||
Harmony is released under the **GNU AGPL v3**.
|
||||
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
Initial Date: 2025-02-06
|
||||
|
||||
## Status
|
||||
|
||||
Proposed
|
||||
|
||||
## Context
|
||||
|
||||
The Harmony Agent requires a persistent connection to the NATS Supercluster to perform Key-Value (KV) operations (Read/Write/Watch).
|
||||
|
||||
Service Requirements: The agent must authenticate with sufficient privileges to manage KV buckets and interact with the JetStream API.
|
||||
|
||||
Infrastructure: NATS is deployed as a multi-site Supercluster. Authentication must be consistent across sites to allow for agent failover and data replication.
|
||||
|
||||
https://docs.nats.io/running-a-nats-service/configuration/securing_nats/auth_intro
|
||||
|
||||
Technical Constraint: In NATS, JetStream functionality is not global by default; it must be explicitly enabled and capped at the Account level to allow KV bucket creation and persistence.
|
||||
|
||||
## Issues
|
||||
|
||||
1. The "System Account" Trap
|
||||
|
||||
The Hole: Using the system account for the Harmony Agent.
|
||||
|
||||
The Risk: The NATS System Account is for server heartbeat and monitoring. It cannot (and should not) own JetStream KV buckets.
|
||||
|
||||
2. Multi-Site Authorization Sync
|
||||
|
||||
The Hole: Defining users in local nats.conf files via Helm.
|
||||
|
||||
The Risk: If an agent at Site-2 fails over to Site-3, but Site-3’s local configuration doesn't have the testUser credentials, the agent will be locked out during an outage.
|
||||
|
||||
3. KV Replication Factor
|
||||
|
||||
The Hole: Not specifying the Replicas count for the KV bucket.
|
||||
|
||||
The Risk: If you create a KV bucket with the default (1 replica), it only exists at the site where it was created. If that site goes down, the data is lost despite having a Supercluster.
|
||||
|
||||
4. Subject-Level Permissions
|
||||
|
||||
The Hole: Only granting TEST.* permissions.
|
||||
|
||||
The Risk: NATS KV uses internal subjects (e.g., $KV.<bucket_name>.>). Without access to these, the agent will get an "Authorization Violation" even if it's logged in.
|
||||
|
||||
|
||||
## Proposed Solution
|
||||
|
||||
To enable reliable, secure communication between the Harmony Agent and the NATS Supercluster, we will implement Account-isolated JetStream using NKey Authentication (or mTLS).
|
||||
1. Dedicated Account Architecture
|
||||
|
||||
We will move away from the "Global/Default" account. A dedicated HARMONY account will be defined identically across all sites in the Supercluster. This ensures that the metadata for the KV bucket can replicate across the gateways.
|
||||
|
||||
System Account: Reserved for NATS internal health and Supercluster routing.
|
||||
|
||||
Harmony Account: Dedicated to Harmony Agent data, with JetStream explicitly enabled.
|
||||
|
||||
2. Authentication: Use harmony secret store mounted into nats container
|
||||
|
||||
Take advantage of currently implemented solution
|
||||
|
||||
3. JetStream & KV Configuration
|
||||
|
||||
To ensure the KV bucket is available across the Supercluster, the following configuration must be applied:
|
||||
|
||||
Replication Factor (R=3): KV buckets will be created with a replication factor of 3 to ensure data persists across Site-1, Site-2, and Site-3.
|
||||
|
||||
Permissions: The agent will be granted scoped access to:
|
||||
|
||||
$KV.HARMONY.> (Data operations)
|
||||
|
||||
$JS.API.CONSUMER.> and $JS.API.STREAM.> (Management operations)
|
||||
|
||||
## Consequence of Decision
|
||||
Pros
|
||||
|
||||
Resilience: Agents can fail over to any site in the Supercluster and find their credentials and data.
|
||||
|
||||
Security: By using a dedicated account, the Harmony Agent cannot see or interfere with NATS system traffic.
|
||||
|
||||
Scalability: We can add Site-4 or Site-5 simply by copying the HARMONY account definition.
|
||||
|
||||
Cons / Risks
|
||||
|
||||
Configuration Drift: If one site's ConfigMap is updated without the others, authentication will fail during a site failover.
|
||||
|
||||
Complexity: Requires a "Management" step to ensure the account exists on all NATS instances before the agent attempts to connect.
|
||||
65
adr/019-Network-bond-setup.md
Normal file
65
adr/019-Network-bond-setup.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# Architecture Decision Record: Network Bonding Configuration via External Automation
|
||||
|
||||
Initial Author: Jean-Gabriel Gill-Couture & Sylvain Tremblay
|
||||
|
||||
Initial Date: 2026-02-13
|
||||
|
||||
Last Updated Date: 2026-02-13
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
We need to configure LACP bonds on 10GbE interfaces across all worker nodes in the OpenShift cluster. A significant challenge is that interface names (e.g., `enp1s0f0` vs `ens1f0`) vary across different hardware nodes.
|
||||
|
||||
The standard OpenShift mechanism (MachineConfig) applies identical configurations to all nodes in a MachineConfigPool. Since the interface names differ, a single static MachineConfig cannot target specific physical devices across the entire cluster without complex workarounds.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use the existing "Harmony" automation tool to generate and apply host-specific NetworkManager configuration files directly to the nodes.
|
||||
|
||||
1. Harmony will generate the specific `.nmconnection` files for the bond and slaves based on its inventory of interface names.
|
||||
2. Files will be pushed to `/etc/NetworkManager/system-connections/` on each node.
|
||||
3. Configuration will be applied via `nmcli` reload or a node reboot.
|
||||
|
||||
## Rationale
|
||||
|
||||
* **Inventory Awareness:** Harmony already possesses the specific interface mapping data for each host.
|
||||
* **Persistence:** Fedora CoreOS/SCOS allows writing to `/etc`, and these files persist across reboots and OS upgrades (rpm-ostree updates).
|
||||
* **Avoids Complexity:** This approach avoids the operational overhead of creating unique MachineConfigPools for every single host or hardware variant.
|
||||
* **Safety:** Unlike wildcard matching, this ensures explicit interface selection, preventing accidental bonding of reserved interfaces (e.g., future separation of Ceph storage traffic).
|
||||
|
||||
## Consequences
|
||||
|
||||
**Pros:**
|
||||
* Precise, per-host configuration without polluting the Kubernetes API with hundreds of MachineConfigs.
|
||||
* Standard Linux networking behavior; easy to debug locally.
|
||||
* Prevents accidental interface capture (unlike wildcards).
|
||||
|
||||
**Cons:**
|
||||
* **Loss of Declarative K8s State:** The network config is not managed by the Machine Config Operator (MCO).
|
||||
* **Node Replacement Friction:** Newly provisioned nodes (replacements) will boot with default config. Harmony must be run against new nodes manually or via a hook before they can fully join the cluster workload.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
1. **Wildcard Matching in NetworkManager (e.g., `interface-name=enp*`):**
|
||||
* *Pros:* Single MachineConfig for the whole cluster.
|
||||
* *Cons:* Rejected because it is too broad. It risks capturing interfaces intended for other purposes (e.g., splitting storage and cluster networks later).
|
||||
|
||||
2. **"Kitchen Sink" Configuration:**
|
||||
* *Pros:* Single file listing every possible interface name as a slave.
|
||||
* *Cons:* "Dirty" configuration; results in many inactive connections on every host; brittle if new naming schemes appear.
|
||||
|
||||
3. **Per-Host MachineConfig:**
|
||||
* *Pros:* Fully declarative within OpenShift.
|
||||
* *Cons:* Requires a unique `MachineConfigPool` per host, which is an anti-pattern and unmaintainable at scale.
|
||||
|
||||
4. **On-boot Generation Script:**
|
||||
* *Pros:* Dynamic detection.
|
||||
* *Cons:* Increases boot complexity; harder to debug if the script fails during startup.
|
||||
|
||||
## Additional Notes
|
||||
|
||||
While `/etc` is writable and persistent on CoreOS, this configuration falls outside the "Day 1" Ignition process. Operational runbooks must be updated to ensure Harmony runs on any node replacement events.
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::net::{IpAddr, Ipv4Addr};
|
||||
|
||||
use brocade::{BrocadeOptions, ssh};
|
||||
use harmony_secret::Secret;
|
||||
use harmony_secret::{Secret, SecretManager};
|
||||
use harmony_types::switch::PortLocation;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -21,17 +21,15 @@ async fn main() {
|
||||
// let ip = IpAddr::V4(Ipv4Addr::new(192, 168, 4, 11)); // brocade @ st
|
||||
let switch_addresses = vec![ip];
|
||||
|
||||
// let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
|
||||
// .await
|
||||
// .unwrap();
|
||||
let config = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let brocade = brocade::init(
|
||||
&switch_addresses,
|
||||
// &config.username,
|
||||
// &config.password,
|
||||
"admin",
|
||||
"password",
|
||||
BrocadeOptions {
|
||||
&config.username,
|
||||
&config.password,
|
||||
&BrocadeOptions {
|
||||
dry_run: true,
|
||||
ssh: ssh::SshOptions {
|
||||
port: 2222,
|
||||
|
||||
@@ -144,7 +144,7 @@ pub async fn init(
|
||||
ip_addresses: &[IpAddr],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
options: &BrocadeOptions,
|
||||
) -> Result<Box<dyn BrocadeClient + Send + Sync>, Error> {
|
||||
let shell = BrocadeShell::init(ip_addresses, username, password, options).await?;
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ impl BrocadeShell {
|
||||
ip_addresses: &[IpAddr],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
options: &BrocadeOptions,
|
||||
) -> Result<Self, Error> {
|
||||
let ip = ip_addresses
|
||||
.first()
|
||||
|
||||
@@ -70,7 +70,7 @@ pub async fn try_init_client(
|
||||
username: &str,
|
||||
password: &str,
|
||||
ip: &std::net::IpAddr,
|
||||
base_options: BrocadeOptions,
|
||||
base_options: &BrocadeOptions,
|
||||
) -> Result<BrocadeOptions, Error> {
|
||||
let mut default = SshOptions::default();
|
||||
default.port = base_options.ssh.port;
|
||||
|
||||
@@ -1 +1,33 @@
|
||||
Not much here yet, see the `adr` folder for now. More to come in time!
|
||||
# Harmony Documentation Hub
|
||||
|
||||
Welcome to the Harmony documentation. This is the main entry point for learning everything from core concepts to building your own Score, Topologies, and Capabilities.
|
||||
|
||||
## 1. Getting Started
|
||||
|
||||
If you're new to Harmony, start here:
|
||||
|
||||
- [**Getting Started Guide**](./guides/getting-started.md): A step-by-step tutorial that takes you from an empty project to deploying your first application.
|
||||
- [**Core Concepts**](./concepts.md): A high-level overview of the key concepts in Harmony: `Score`, `Topology`, `Capability`, `Inventory`, `Interpret`, ...
|
||||
|
||||
## 2. Use Cases & Examples
|
||||
|
||||
See how to use Harmony to solve real-world problems.
|
||||
|
||||
- [**OKD on Bare Metal**](./use-cases/okd-on-bare-metal.md): A detailed walkthrough of bootstrapping a high-availability OKD cluster from physical hardware.
|
||||
- [**Deploy a Rust Web App**](./use-cases/deploy-rust-webapp.md): A quick guide to deploying a monitored, containerized web application to a Kubernetes cluster.
|
||||
|
||||
## 3. Component Catalogs
|
||||
|
||||
Discover existing, reusable components you can use in your Harmony projects.
|
||||
|
||||
- [**Scores Catalog**](./catalogs/scores.md): A categorized list of all available `Scores` (the "what").
|
||||
- [**Topologies Catalog**](./catalogs/topologies.md): A list of all available `Topologies` (the "where").
|
||||
- [**Capabilities Catalog**](./catalogs/capabilities.md): A list of all available `Capabilities` (the "how").
|
||||
|
||||
## 4. Developer Guides
|
||||
|
||||
Ready to build your own components? These guides show you how.
|
||||
|
||||
- [**Writing a Score**](./guides/writing-a-score.md): Learn how to create your own `Score` and `Interpret` logic to define a new desired state.
|
||||
- [**Writing a Topology**](./guides/writing-a-topology.md): Learn how to model a new environment (like AWS, GCP, or custom hardware) as a `Topology`.
|
||||
- [**Adding Capabilities**](./guides/adding-capabilities.md): See how to add a `Capability` to your custom `Topology`.
|
||||
|
||||
7
docs/catalogs/README.md
Normal file
7
docs/catalogs/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Component Catalogs
|
||||
|
||||
This section is the "dictionary" for Harmony. It lists all the reusable components available out-of-the-box.
|
||||
|
||||
- [**Scores Catalog**](./scores.md): Discover all available `Scores` (the "what").
|
||||
- [**Topologies Catalog**](./topologies.md): A list of all available `Topologies` (the "where").
|
||||
- [**Capabilities Catalog**](./capabilities.md): A list of all available `Capabilities` (the "how").
|
||||
40
docs/catalogs/capabilities.md
Normal file
40
docs/catalogs/capabilities.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Capabilities Catalog
|
||||
|
||||
A `Capability` is a specific feature or API that a `Topology` offers. `Interpret` logic uses these capabilities to execute a `Score`.
|
||||
|
||||
This list is primarily for developers **writing new Topologies or Scores**. As a user, you just need to know that the `Topology` you pick (like `K8sAnywhereTopology`) provides the capabilities your `Scores` (like `ApplicationScore`) need.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Capabilities Catalog](#capabilities-catalog)
|
||||
- [Kubernetes & Application](#kubernetes-application)
|
||||
- [Monitoring & Observability](#monitoring-observability)
|
||||
- [Networking (Core Services)](#networking-core-services)
|
||||
- [Networking (Hardware & Host)](#networking-hardware-host)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
## Kubernetes & Application
|
||||
|
||||
- **K8sClient**: Provides an authenticated client to interact with a Kubernetes API (create/read/update/delete resources).
|
||||
- **HelmCommand**: Provides the ability to execute Helm commands (install, upgrade, template).
|
||||
- **TenantManager**: Provides methods for managing tenants in a multi-tenant cluster.
|
||||
- **Ingress**: Provides an interface for managing ingress controllers and resources.
|
||||
|
||||
## Monitoring & Observability
|
||||
|
||||
- **Grafana**: Provides an API for configuring Grafana (datasources, dashboards).
|
||||
- **Monitoring**: A general capability for configuring monitoring (e.g., creating Prometheus rules).
|
||||
|
||||
## Networking (Core Services)
|
||||
|
||||
- **DnsServer**: Provides an interface for creating and managing DNS records.
|
||||
- **LoadBalancer**: Provides an interface for configuring a load balancer (e.g., OPNsense, MetalLB).
|
||||
- **DhcpServer**: Provides an interface for managing DHCP leases and host bindings.
|
||||
- **TftpServer**: Provides an interface for managing files on a TFTP server (e.g., iPXE boot files).
|
||||
|
||||
## Networking (Hardware & Host)
|
||||
|
||||
- **Router**: Provides an interface for configuring routing rules, typically on a firewall like OPNsense.
|
||||
- **Switch**: Provides an interface for configuring a physical network switch (e.g., managing VLANs and port channels).
|
||||
- **NetworkManager**: Provides an interface for configuring host-level networking (e.g., creating bonds and bridges on a node).
|
||||
102
docs/catalogs/scores.md
Normal file
102
docs/catalogs/scores.md
Normal file
@@ -0,0 +1,102 @@
|
||||
# Scores Catalog
|
||||
|
||||
A `Score` is a declarative description of a desired state. Find the Score you need and add it to your `harmony!` block's `scores` array.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Scores Catalog](#scores-catalog)
|
||||
- [Application Deployment](#application-deployment)
|
||||
- [OKD / Kubernetes Cluster Setup](#okd-kubernetes-cluster-setup)
|
||||
- [Cluster Services & Management](#cluster-services-management)
|
||||
- [Monitoring & Alerting](#monitoring-alerting)
|
||||
- [Infrastructure & Networking (Bare Metal)](#infrastructure-networking-bare-metal)
|
||||
- [Infrastructure & Networking (Cluster)](#infrastructure-networking-cluster)
|
||||
- [Tenant Management](#tenant-management)
|
||||
- [Utility](#utility)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
## Application Deployment
|
||||
|
||||
Scores for deploying and managing end-user applications.
|
||||
|
||||
- **ApplicationScore**: The primary score for deploying a web application. Describes the application, its framework, and the features it requires (e.g., monitoring, CI/CD).
|
||||
- **HelmChartScore**: Deploys a generic Helm chart to a Kubernetes cluster.
|
||||
- **ArgoHelmScore**: Deploys an application using an ArgoCD Helm chart.
|
||||
- **LAMPScore**: A specialized score for deploying a classic LAMP (Linux, Apache, MySQL, PHP) stack.
|
||||
|
||||
## OKD / Kubernetes Cluster Setup
|
||||
|
||||
This collection of Scores is used to provision an entire OKD cluster from bare metal. They are typically used in order.
|
||||
|
||||
- **OKDSetup01InventoryScore**: Discovers and catalogs the physical hardware.
|
||||
- **OKDSetup02BootstrapScore**: Configures the bootstrap node, renders iPXE files, and kicks off the SCOS installation.
|
||||
- **OKDSetup03ControlPlaneScore**: Renders iPXE configurations for the control plane nodes.
|
||||
- **OKDSetupPersistNetworkBondScore**: Configures network bonds on the nodes and port channels on the switches.
|
||||
- **OKDSetup04WorkersScore**: Renders iPXE configurations for the worker nodes.
|
||||
- **OKDSetup06InstallationReportScore**: Runs post-installation checks and generates a report.
|
||||
- **OKDUpgradeScore**: Manages the upgrade process for an existing OKD cluster.
|
||||
|
||||
## Cluster Services & Management
|
||||
|
||||
Scores for installing and managing services _inside_ a Kubernetes cluster.
|
||||
|
||||
- **K3DInstallationScore**: Installs and configes a local K3D (k3s-in-docker) cluster. Used by `K8sAnywhereTopology`.
|
||||
- **CertManagerHelmScore**: Deploys the `cert-manager` Helm chart.
|
||||
- **ClusterIssuerScore**: Configures a `ClusterIssuer` for `cert-manager`, (e.g., for Let's Encrypt).
|
||||
- **K8sNamespaceScore**: Ensures a Kubernetes namespace exists.
|
||||
- **K8sDeploymentScore**: Deploys a generic `Deployment` resource to Kubernetes.
|
||||
- **K8sIngressScore**: Configures an `Ingress` resource for a service.
|
||||
|
||||
## Monitoring & Alerting
|
||||
|
||||
Scores for configuring observability, dashboards, and alerts.
|
||||
|
||||
- **ApplicationMonitoringScore**: A generic score to set up monitoring for an application.
|
||||
- **ApplicationRHOBMonitoringScore**: A specialized score for setting up monitoring via the Red Hat Observability stack.
|
||||
- **HelmPrometheusAlertingScore**: Configures Prometheus alerts via a Helm chart.
|
||||
- **K8sPrometheusCRDAlertingScore**: Configures Prometheus alerts using the `PrometheusRule` CRD.
|
||||
- **PrometheusAlertScore**: A generic score for creating a Prometheus alert.
|
||||
- **RHOBAlertingScore**: Configures alerts specifically for the Red Hat Observability stack.
|
||||
- **NtfyScore**: Configures alerts to be sent to a `ntfy.sh` server.
|
||||
|
||||
## Infrastructure & Networking (Bare Metal)
|
||||
|
||||
Low-level scores for managing physical hardware and network services.
|
||||
|
||||
- **DhcpScore**: Configures a DHCP server.
|
||||
- **OKDDhcpScore**: A specialized DHCP configuration for the OKD bootstrap process.
|
||||
- **OKDBootstrapDhcpScore**: Configures DHCP specifically for the bootstrap node.
|
||||
- **DhcpHostBindingScore**: Creates a specific MAC-to-IP binding in the DHCP server.
|
||||
- **DnsScore**: Configures a DNS server.
|
||||
- **OKDDnsScore**: A specialized DNS configuration for the OKD cluster (e.g., `api.*`, `*.apps.*`).
|
||||
- **StaticFilesHttpScore**: Serves a directory of static files (e.g., a documentation site) over HTTP.
|
||||
- **TftpScore**: Configures a TFTP server, typically for serving iPXE boot files.
|
||||
- **IPxeMacBootFileScore**: Assigns a specific iPXE boot file to a MAC address in the TFTP server.
|
||||
- **OKDIpxeScore**: A specialized score for generating the iPXE boot scripts for OKD.
|
||||
- **OPNsenseShellCommandScore**: Executes a shell command on an OPNsense firewall.
|
||||
|
||||
## Infrastructure & Networking (Cluster)
|
||||
|
||||
Network services that run inside the cluster or as part of the topology.
|
||||
|
||||
- **LoadBalancerScore**: Configures a general-purpose load balancer.
|
||||
- **OKDLoadBalancerScore**: Configures the high-availability load balancers for the OKD API and ingress.
|
||||
- **OKDBootstrapLoadBalancerScore**: Configures the load balancer specifically for the bootstrap-time API endpoint.
|
||||
- **K8sIngressScore**: Configures an Ingress controller or resource.
|
||||
- [HighAvailabilityHostNetworkScore](../../harmony/src/modules/okd/host_network.rs): Configures network bonds on a host and the corresponding port-channels on the switch stack for high-availability.
|
||||
|
||||
## Tenant Management
|
||||
|
||||
Scores for managing multi-tenancy within a cluster.
|
||||
|
||||
- **TenantScore**: Creates a new tenant (e.g., a namespace, quotas, network policies).
|
||||
- **TenantCredentialScore**: Generates and provisions credentials for a new tenant.
|
||||
|
||||
## Utility
|
||||
|
||||
Helper scores for discovery and inspection.
|
||||
|
||||
- **LaunchDiscoverInventoryAgentScore**: Launches the agent responsible for the `OKDSetup01InventoryScore`.
|
||||
- **DiscoverHostForRoleScore**: A utility score to find a host matching a specific role in the inventory.
|
||||
- **InspectInventoryScore**: Dumps the discovered inventory for inspection.
|
||||
59
docs/catalogs/topologies.md
Normal file
59
docs/catalogs/topologies.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Topologies Catalog
|
||||
|
||||
A `Topology` is the logical representation of your infrastructure and its `Capabilities`. You select a `Topology` in your Harmony project to define _where_ your `Scores` will be applied.
|
||||
|
||||
<!--toc:start-->
|
||||
|
||||
- [Topologies Catalog](#topologies-catalog)
|
||||
- [HAClusterTopology](#haclustertopology)
|
||||
- [K8sAnywhereTopology](#k8sanywheretopology)
|
||||
|
||||
<!--toc:end-->
|
||||
|
||||
### HAClusterTopology
|
||||
|
||||
- **`HAClusterTopology::autoload()`**
|
||||
|
||||
This `Topology` represents a high-availability, bare-metal cluster. It is designed for production-grade deployments like OKD.
|
||||
|
||||
It models an environment consisting of:
|
||||
|
||||
- At least 3 cluster nodes (for control plane/workers)
|
||||
- 2 redundant firewalls (e.g., OPNsense)
|
||||
- 2 redundant network switches
|
||||
|
||||
**Provided Capabilities:**
|
||||
This topology provides a rich set of capabilities required for bare-metal provisioning and cluster management, including:
|
||||
|
||||
- `K8sClient` (once the cluster is bootstrapped)
|
||||
- `DnsServer`
|
||||
- `LoadBalancer`
|
||||
- `DhcpServer`
|
||||
- `TftpServer`
|
||||
- `Router` (via the firewalls)
|
||||
- `Switch`
|
||||
- `NetworkManager` (for host-level network config)
|
||||
|
||||
---
|
||||
|
||||
### K8sAnywhereTopology
|
||||
|
||||
- **`K8sAnywhereTopology::from_env()`**
|
||||
|
||||
This `Topology` is designed for development and application deployment. It provides a simple, abstract way to deploy to _any_ Kubernetes cluster.
|
||||
|
||||
**How it works:**
|
||||
|
||||
1. By default (`from_env()` with no env vars), it automatically provisions a **local K3D (k3s-in-docker) cluster** on your machine. This is perfect for local development and testing.
|
||||
2. If you provide a `KUBECONFIG` environment variable, it will instead connect to that **existing Kubernetes cluster** (e.g., your staging or production OKD cluster).
|
||||
|
||||
This allows you to use the _exact same code_ to deploy your application locally as you do to deploy it to production.
|
||||
|
||||
**Provided Capabilities:**
|
||||
|
||||
- `K8sClient`
|
||||
- `HelmCommand`
|
||||
- `TenantManager`
|
||||
- `Ingress`
|
||||
- `Monitoring`
|
||||
- ...and more.
|
||||
40
docs/concepts.md
Normal file
40
docs/concepts.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Core Concepts
|
||||
|
||||
Harmony's design is based on a few key concepts. Understanding them is the key to unlocking the framework's power.
|
||||
|
||||
### 1. Score
|
||||
|
||||
- **What it is:** A **Score** is a declarative description of a desired state. It's a "resource" that defines _what_ you want to achieve, not _how_ to do it.
|
||||
- **Example:** `ApplicationScore` declares "I want this web application to be running and monitored."
|
||||
|
||||
### 2. Topology
|
||||
|
||||
- **What it is:** A **Topology** is the logical representation of your infrastructure and its abilities. It's the "where" your Scores will be applied.
|
||||
- **Key Job:** A Topology's most important job is to expose which `Capabilities` it supports.
|
||||
- **Example:** `HAClusterTopology` represents a bare-metal cluster and exposes `Capabilities` like `NetworkManager` and `Switch`. `K8sAnywhereTopology` represents a Kubernetes cluster and exposes the `K8sClient` `Capability`.
|
||||
|
||||
### 3. Capability
|
||||
|
||||
- **What it is:** A **Capability** is a specific feature or API that a `Topology` offers. It's the "how" a `Topology` can fulfill a `Score`'s request.
|
||||
- **Example:** The `K8sClient` capability offers a way to interact with a Kubernetes API. The `Switch` capability offers a way to configure a physical network switch.
|
||||
|
||||
### 4. Interpret
|
||||
|
||||
- **What it is:** An **Interpret** is the execution logic that makes a `Score` a reality. It's the "glue" that connects the _desired state_ (`Score`) to the _environment's abilities_ (`Topology`'s `Capabilities`).
|
||||
- **How it works:** When you apply a `Score`, Harmony finds the matching `Interpret` for your `Topology`. This `Interpret` then uses the `Capabilities` provided by the `Topology` to execute the necessary steps.
|
||||
|
||||
### 5. Inventory
|
||||
|
||||
- **What it is:** An **Inventory** is the physical material (the "what") used in a cluster. This is most relevant for bare-metal or on-premise topologies.
|
||||
- **Example:** A list of nodes with their roles (control plane, worker), CPU, RAM, and network interfaces. For the `K8sAnywhereTopology`, the inventory might be empty or autoloaded, as the infrastructure is more abstract.
|
||||
|
||||
---
|
||||
|
||||
### How They Work Together (The Compile-Time Check)
|
||||
|
||||
1. You **write a `Score`** (e.g., `ApplicationScore`).
|
||||
2. Your `Score`'s `Interpret` logic requires certain **`Capabilities`** (e.g., `K8sClient` and `Ingress`).
|
||||
3. You choose a **`Topology`** to run it on (e.g., `HAClusterTopology`).
|
||||
4. **At compile-time**, Harmony checks: "Does `HAClusterTopology` provide the `K8sClient` and `Ingress` capabilities that `ApplicationScore` needs?"
|
||||
- **If Yes:** Your code compiles. You can be confident it will run.
|
||||
- **If No:** The compiler gives you an error. You've just prevented a "config-is-valid-but-platform-is-wrong" runtime error before you even deployed.
|
||||
42
docs/guides/getting-started.md
Normal file
42
docs/guides/getting-started.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Getting Started Guide
|
||||
|
||||
Welcome to Harmony! This guide will walk you through installing the Harmony framework, setting up a new project, and deploying your first application.
|
||||
|
||||
We will build and deploy the "Rust Web App" example, which automatically:
|
||||
|
||||
1. Provisions a local K3D (Kubernetes in Docker) cluster.
|
||||
2. Deploys a sample Rust web application.
|
||||
3. Sets up monitoring for the application.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before you begin, you'll need a few tools installed on your system:
|
||||
|
||||
- **Rust & Cargo:** [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
- **Docker:** [Install Docker](https://docs.docker.com/get-docker/) (Required for the K3D local cluster)
|
||||
- **kubectl:** [Install kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) (For inspecting the cluster)
|
||||
|
||||
## 1. Install Harmony
|
||||
|
||||
First, clone the Harmony repository and build the project. This gives you the `harmony` CLI and all the core libraries.
|
||||
|
||||
```bash
|
||||
# Clone the main repository
|
||||
git clone https://git.nationtech.io/nationtech/harmony
|
||||
cd harmony
|
||||
|
||||
# Build the project (this may take a few minutes)
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
...
|
||||
|
||||
## Next Steps
|
||||
|
||||
Congratulations, you've just deployed an application using true infrastructure-as-code!
|
||||
|
||||
From here, you can:
|
||||
|
||||
- [Explore the Catalogs](../catalogs/README.md): See what other [Scores](../catalogs/scores.md) and [Topologies](../catalogs/topologies.md) are available.
|
||||
- [Read the Use Cases](../use-cases/README.md): Check out the [OKD on Bare Metal](./use-cases/okd-on-bare-metal.md) guide for a more advanced scenario.
|
||||
- [Write your own Score](../guides/writing-a-score.md): Dive into the [Developer Guide](./guides/developer-guide.md) to start building your own components.
|
||||
@@ -1,22 +1,28 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use brocade::{BrocadeOptions, PortOperatingMode};
|
||||
use harmony::{
|
||||
data::Version,
|
||||
infra::brocade::BrocadeSwitchClient,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
infra::brocade::BrocadeSwitchConfig,
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HostNetworkConfig, PortConfig, PreparationError, PreparationOutcome, Switch, SwitchClient,
|
||||
SwitchError, Topology,
|
||||
},
|
||||
modules::brocade::{BrocadeSwitchAuth, BrocadeSwitchScore, SwitchTopology},
|
||||
};
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{id::Id, net::MacAddress, switch::PortLocation};
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
use harmony_types::{id::Id, switch::PortLocation};
|
||||
|
||||
fn get_switch_config() -> BrocadeSwitchConfig {
|
||||
let mut options = BrocadeOptions::default();
|
||||
options.ssh.port = 2222;
|
||||
let auth = BrocadeSwitchAuth {
|
||||
username: "admin".to_string(),
|
||||
password: "password".to_string(),
|
||||
};
|
||||
|
||||
BrocadeSwitchConfig {
|
||||
ips: vec![ip!("127.0.0.1")],
|
||||
auth,
|
||||
options,
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
@@ -32,126 +38,13 @@ async fn main() {
|
||||
(PortLocation(1, 0, 18), PortOperatingMode::Trunk),
|
||||
],
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
SwitchTopology::new().await,
|
||||
SwitchTopology::new(get_switch_config()).await,
|
||||
vec![Box::new(switch_score)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
struct BrocadeSwitchScore {
|
||||
port_channels_to_clear: Vec<Id>,
|
||||
ports_to_configure: Vec<PortConfig>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Switch> Score<T> for BrocadeSwitchScore {
|
||||
fn name(&self) -> String {
|
||||
"BrocadeSwitchScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(BrocadeSwitchInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct BrocadeSwitchInterpret {
|
||||
score: BrocadeSwitchScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + Switch> Interpret<T> for BrocadeSwitchInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Applying switch configuration {:?}", self.score);
|
||||
debug!(
|
||||
"Clearing port channel {:?}",
|
||||
self.score.port_channels_to_clear
|
||||
);
|
||||
topology
|
||||
.clear_port_channel(&self.score.port_channels_to_clear)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
debug!("Configuring interfaces {:?}", self.score.ports_to_configure);
|
||||
topology
|
||||
.configure_interface(&self.score.ports_to_configure)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success("switch configured".to_string()))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("BrocadeSwitchInterpret")
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
struct SwitchTopology {
|
||||
client: Box<dyn SwitchClient>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Topology for SwitchTopology {
|
||||
fn name(&self) -> &str {
|
||||
"SwitchTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
impl SwitchTopology {
|
||||
async fn new() -> Self {
|
||||
let mut options = BrocadeOptions::default();
|
||||
options.ssh.port = 2222;
|
||||
let client =
|
||||
BrocadeSwitchClient::init(&vec![ip!("127.0.0.1")], &"admin", &"password", options)
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
let client = Box::new(client);
|
||||
Self { client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Switch for SwitchTopology {
|
||||
async fn setup_switch(&self) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_port_for_mac_address(
|
||||
&self,
|
||||
_mac_address: &MacAddress,
|
||||
) -> Result<Option<PortLocation>, SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn configure_port_channel(&self, _config: &HostNetworkConfig) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
self.client.clear_port_channel(ids).await
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
self.client.configure_interface(ports).await
|
||||
}
|
||||
}
|
||||
|
||||
20
examples/k8s_drain_node/Cargo.toml
Normal file
20
examples/k8s_drain_node/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "example-k8s-drain-node"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
cidr.workspace = true
|
||||
tokio.workspace = true
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log.workspace = true
|
||||
env_logger.workspace = true
|
||||
url.workspace = true
|
||||
assert_cmd = "2.0.16"
|
||||
inquire.workspace = true
|
||||
61
examples/k8s_drain_node/src/main.rs
Normal file
61
examples/k8s_drain_node/src/main.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use harmony::topology::k8s::{DrainOptions, K8sClient};
|
||||
use log::{info, trace};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
env_logger::init();
|
||||
let k8s = K8sClient::try_default().await.unwrap();
|
||||
let nodes = k8s.get_nodes(None).await.unwrap();
|
||||
trace!("Got nodes : {nodes:#?}");
|
||||
let node_names = nodes
|
||||
.iter()
|
||||
.map(|n| n.metadata.name.as_ref().unwrap())
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
info!("Got nodes : {:?}", node_names);
|
||||
|
||||
let node_name = inquire::Select::new("What node do you want to operate on?", node_names)
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
let drain = inquire::Confirm::new("Do you wish to drain the node now ?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if drain {
|
||||
let mut options = DrainOptions::default_ignore_daemonset_delete_emptydir_data();
|
||||
options.timeout = Duration::from_secs(1);
|
||||
k8s.drain_node(&node_name, &options).await.unwrap();
|
||||
|
||||
info!("Node {node_name} successfully drained");
|
||||
}
|
||||
|
||||
let uncordon =
|
||||
inquire::Confirm::new("Do you wish to uncordon node to resume scheduling workloads now?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if uncordon {
|
||||
info!("Uncordoning node {node_name}");
|
||||
k8s.uncordon_node(node_name).await.unwrap();
|
||||
info!("Node {node_name} uncordoned");
|
||||
}
|
||||
|
||||
let reboot = inquire::Confirm::new("Do you wish to reboot node now?")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
if reboot {
|
||||
k8s.reboot_node(
|
||||
&node_name,
|
||||
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
|
||||
Duration::from_secs(3600),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
info!("All done playing with nodes, happy harmonizing!");
|
||||
}
|
||||
20
examples/k8s_write_file_on_node/Cargo.toml
Normal file
20
examples/k8s_write_file_on_node/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "example-k8s-write-file-on-node"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
cidr.workspace = true
|
||||
tokio.workspace = true
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
log.workspace = true
|
||||
env_logger.workspace = true
|
||||
url.workspace = true
|
||||
assert_cmd = "2.0.16"
|
||||
inquire.workspace = true
|
||||
45
examples/k8s_write_file_on_node/src/main.rs
Normal file
45
examples/k8s_write_file_on_node/src/main.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use harmony::topology::k8s::{DrainOptions, K8sClient, NodeFile};
|
||||
use log::{info, trace};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
env_logger::init();
|
||||
let k8s = K8sClient::try_default().await.unwrap();
|
||||
let nodes = k8s.get_nodes(None).await.unwrap();
|
||||
trace!("Got nodes : {nodes:#?}");
|
||||
let node_names = nodes
|
||||
.iter()
|
||||
.map(|n| n.metadata.name.as_ref().unwrap())
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
info!("Got nodes : {:?}", node_names);
|
||||
|
||||
let node = inquire::Select::new("What node do you want to write file to?", node_names)
|
||||
.prompt()
|
||||
.unwrap();
|
||||
|
||||
let path = inquire::Text::new("File path on node").prompt().unwrap();
|
||||
let content = inquire::Text::new("File content").prompt().unwrap();
|
||||
|
||||
let node_file = NodeFile {
|
||||
path: path,
|
||||
content: content,
|
||||
mode: 0o600,
|
||||
};
|
||||
|
||||
k8s.write_files_to_node(&node, &vec![node_file.clone()])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let cmd = inquire::Text::new("Command to run on node")
|
||||
.prompt()
|
||||
.unwrap();
|
||||
k8s.run_privileged_command_on_node(&node, &cmd)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
info!(
|
||||
"File {} mode {} written in node {node}",
|
||||
node_file.path, node_file.mode
|
||||
);
|
||||
}
|
||||
@@ -1,37 +1,45 @@
|
||||
use std::collections::HashMap;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
kube_prometheus::{
|
||||
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
|
||||
types::{
|
||||
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
|
||||
ServiceMonitorEndpoint,
|
||||
modules::monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordReceiver,
|
||||
alert_rule::{
|
||||
alerts::{
|
||||
infra::dell_server::{
|
||||
alert_global_storage_status_critical,
|
||||
alert_global_storage_status_non_recoverable,
|
||||
global_storage_status_degraded_non_critical,
|
||||
},
|
||||
k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
},
|
||||
prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
},
|
||||
prometheus::alerts::{
|
||||
infra::dell_server::{
|
||||
alert_global_storage_status_critical, alert_global_storage_status_non_recoverable,
|
||||
global_storage_status_degraded_non_critical,
|
||||
kube_prometheus::{
|
||||
helm::config::KubePrometheusConfig,
|
||||
kube_prometheus_alerting_score::KubePrometheusAlertingScore,
|
||||
types::{
|
||||
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
|
||||
ServiceMonitorEndpoint,
|
||||
},
|
||||
k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
},
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
topology::{K8sAnywhereTopology, monitoring::AlertRoute},
|
||||
};
|
||||
use harmony_types::{k8s_name::K8sName, net::Url};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
let receiver_name = "test-discord".to_string();
|
||||
let discord_receiver = DiscordReceiver {
|
||||
name: receiver_name.clone(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
selectors: vec![],
|
||||
route: AlertRoute {
|
||||
..AlertRoute::default(receiver_name)
|
||||
},
|
||||
};
|
||||
|
||||
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
|
||||
@@ -70,10 +78,15 @@ async fn main() {
|
||||
endpoints: vec![service_monitor_endpoint],
|
||||
..Default::default()
|
||||
};
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
|
||||
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
|
||||
|
||||
let alerting_score = KubePrometheusAlertingScore {
|
||||
receivers: vec![Box::new(discord_receiver)],
|
||||
rules: vec![Box::new(additional_rules), Box::new(additional_rules2)],
|
||||
service_monitors: vec![service_monitor],
|
||||
scrape_targets: None,
|
||||
config,
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
|
||||
@@ -1,24 +1,32 @@
|
||||
use std::{collections::HashMap, str::FromStr};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
str::FromStr,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
alert_channel::discord_alert_channel::DiscordReceiver,
|
||||
alert_rule::{
|
||||
alerts::k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
},
|
||||
kube_prometheus::{
|
||||
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
|
||||
helm::config::KubePrometheusConfig,
|
||||
kube_prometheus_alerting_score::KubePrometheusAlertingScore,
|
||||
types::{
|
||||
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
|
||||
ServiceMonitorEndpoint,
|
||||
},
|
||||
},
|
||||
},
|
||||
prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
tenant::TenantScore,
|
||||
},
|
||||
topology::{
|
||||
K8sAnywhereTopology,
|
||||
monitoring::AlertRoute,
|
||||
tenant::{ResourceLimits, TenantConfig, TenantNetworkPolicy},
|
||||
},
|
||||
};
|
||||
@@ -42,10 +50,13 @@ async fn main() {
|
||||
},
|
||||
};
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
let receiver_name = "test-discord".to_string();
|
||||
let discord_receiver = DiscordReceiver {
|
||||
name: receiver_name.clone(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
selectors: vec![],
|
||||
route: AlertRoute {
|
||||
..AlertRoute::default(receiver_name)
|
||||
},
|
||||
};
|
||||
|
||||
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
|
||||
@@ -74,10 +85,14 @@ async fn main() {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
|
||||
|
||||
let alerting_score = KubePrometheusAlertingScore {
|
||||
receivers: vec![Box::new(discord_receiver)],
|
||||
rules: vec![Box::new(additional_rules)],
|
||||
service_monitors: vec![service_monitor],
|
||||
scrape_targets: None,
|
||||
config,
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
|
||||
@@ -215,7 +215,7 @@ fn site(
|
||||
dns_name: format!("{cluster_name}-gw.{domain}"),
|
||||
supercluster_ca_secret_name: "nats-supercluster-ca-bundle",
|
||||
tls_cert_name: "nats-gateway",
|
||||
jetstream_enabled: "false",
|
||||
jetstream_enabled: "true",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,35 +1,64 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
okd::cluster_monitoring::OpenshiftClusterAlertScore,
|
||||
alert_channel::discord_alert_channel::DiscordReceiver,
|
||||
alert_rule::{
|
||||
alerts::{
|
||||
infra::opnsense::high_http_error_rate, k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
},
|
||||
prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
},
|
||||
okd::openshift_cluster_alerting_score::OpenshiftClusterAlertScore,
|
||||
scrape_target::prometheus_node_exporter::PrometheusNodeExporter,
|
||||
},
|
||||
topology::{
|
||||
K8sAnywhereTopology,
|
||||
monitoring::{AlertMatcher, AlertRoute, MatchOp},
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
use harmony_macros::hurl;
|
||||
use harmony_types::k8s_name::K8sName;
|
||||
|
||||
use harmony_macros::{hurl, ip};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let mut sel = HashMap::new();
|
||||
sel.insert(
|
||||
"openshift_io_alert_source".to_string(),
|
||||
"platform".to_string(),
|
||||
);
|
||||
let mut sel2 = HashMap::new();
|
||||
sel2.insert("openshift_io_alert_source".to_string(), "".to_string());
|
||||
let selectors = vec![sel, sel2];
|
||||
let platform_matcher = AlertMatcher {
|
||||
label: "prometheus".to_string(),
|
||||
operator: MatchOp::Eq,
|
||||
value: "openshift-monitoring/k8s".to_string(),
|
||||
};
|
||||
let severity = AlertMatcher {
|
||||
label: "severity".to_string(),
|
||||
operator: MatchOp::Eq,
|
||||
value: "critical".to_string(),
|
||||
};
|
||||
|
||||
let high_http_error_rate = high_http_error_rate();
|
||||
|
||||
let additional_rules = AlertManagerRuleGroup::new("", vec![high_http_error_rate]);
|
||||
|
||||
let scrape_target = PrometheusNodeExporter {
|
||||
job_name: "firewall".to_string(),
|
||||
metrics_path: "/metrics".to_string(),
|
||||
listen_address: ip!("127.0.0.1"),
|
||||
port: 9100,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
vec![Box::new(OpenshiftClusterAlertScore {
|
||||
receivers: vec![Box::new(DiscordWebhook {
|
||||
name: K8sName("wills-discord-webhook-example".to_string()),
|
||||
url: hurl!("https://something.io"),
|
||||
selectors: selectors,
|
||||
receivers: vec![Box::new(DiscordReceiver {
|
||||
name: "crit-wills-discord-channel-example".to_string(),
|
||||
url: hurl!("https://test.io"),
|
||||
route: AlertRoute {
|
||||
matchers: vec![severity],
|
||||
..AlertRoute::default("crit-wills-discord-channel-example".to_string())
|
||||
},
|
||||
})],
|
||||
sender: harmony::modules::monitoring::okd::OpenshiftClusterAlertSender,
|
||||
rules: vec![Box::new(additional_rules)],
|
||||
scrape_targets: Some(vec![Box::new(scrape_target)]),
|
||||
})],
|
||||
None,
|
||||
)
|
||||
|
||||
@@ -2,8 +2,12 @@ use brocade::BrocadeOptions;
|
||||
use cidr::Ipv4Cidr;
|
||||
use harmony::{
|
||||
hardware::{Location, SwitchGroup},
|
||||
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
|
||||
infra::{
|
||||
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
opnsense::OPNSenseManagementInterface,
|
||||
},
|
||||
inventory::Inventory,
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
|
||||
};
|
||||
use harmony_macros::{ip, ipv4};
|
||||
@@ -36,12 +40,11 @@ pub async fn get_topology() -> HAClusterTopology {
|
||||
dry_run: *harmony::config::DRY_RUN,
|
||||
..Default::default()
|
||||
};
|
||||
let switch_client = BrocadeSwitchClient::init(
|
||||
&switches,
|
||||
&switch_auth.username,
|
||||
&switch_auth.password,
|
||||
brocade_options,
|
||||
)
|
||||
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
|
||||
ips: switches,
|
||||
auth: switch_auth,
|
||||
options: brocade_options,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
@@ -103,9 +106,3 @@ pub fn get_inventory() -> Inventory {
|
||||
control_plane_host: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
@@ -3,14 +3,16 @@ use cidr::Ipv4Cidr;
|
||||
use harmony::{
|
||||
config::secret::OPNSenseFirewallCredentials,
|
||||
hardware::{Location, SwitchGroup},
|
||||
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
|
||||
infra::{
|
||||
brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
opnsense::OPNSenseManagementInterface,
|
||||
},
|
||||
inventory::Inventory,
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{HAClusterTopology, LogicalHost, UnmanagedRouter},
|
||||
};
|
||||
use harmony_macros::{ip, ipv4};
|
||||
use harmony_secret::{Secret, SecretManager};
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use harmony_secret::SecretManager;
|
||||
use std::{
|
||||
net::IpAddr,
|
||||
sync::{Arc, OnceLock},
|
||||
@@ -31,12 +33,11 @@ pub async fn get_topology() -> HAClusterTopology {
|
||||
dry_run: *harmony::config::DRY_RUN,
|
||||
..Default::default()
|
||||
};
|
||||
let switch_client = BrocadeSwitchClient::init(
|
||||
&switches,
|
||||
&switch_auth.username,
|
||||
&switch_auth.password,
|
||||
brocade_options,
|
||||
)
|
||||
let switch_client = BrocadeSwitchClient::init(BrocadeSwitchConfig {
|
||||
ips: switches,
|
||||
auth: switch_auth,
|
||||
options: brocade_options,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
@@ -98,9 +99,3 @@ pub fn get_inventory() -> Inventory {
|
||||
control_plane_host: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
@@ -6,9 +6,9 @@ use harmony::{
|
||||
application::{
|
||||
ApplicationScore, RustWebFramework, RustWebapp, features::rhob_monitoring::Monitoring,
|
||||
},
|
||||
monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
monitoring::alert_channel::discord_alert_channel::DiscordReceiver,
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
topology::{K8sAnywhereTopology, monitoring::AlertRoute},
|
||||
};
|
||||
use harmony_types::{k8s_name::K8sName, net::Url};
|
||||
|
||||
@@ -22,18 +22,21 @@ async fn main() {
|
||||
service_port: 3000,
|
||||
});
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
let receiver_name = "test-discord".to_string();
|
||||
let discord_receiver = DiscordReceiver {
|
||||
name: receiver_name.clone(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
selectors: vec![],
|
||||
route: AlertRoute {
|
||||
..AlertRoute::default(receiver_name)
|
||||
},
|
||||
};
|
||||
|
||||
let app = ApplicationScore {
|
||||
features: vec![
|
||||
Box::new(Monitoring {
|
||||
application: application.clone(),
|
||||
alert_receiver: vec![Box::new(discord_receiver)],
|
||||
}),
|
||||
// Box::new(Monitoring {
|
||||
// application: application.clone(),
|
||||
// alert_receiver: vec![Box::new(discord_receiver)],
|
||||
// }),
|
||||
// TODO add backups, multisite ha, etc
|
||||
],
|
||||
application,
|
||||
|
||||
@@ -8,13 +8,13 @@ use harmony::{
|
||||
features::{Monitoring, PackagingDeployment},
|
||||
},
|
||||
monitoring::alert_channel::{
|
||||
discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver,
|
||||
discord_alert_channel::DiscordReceiver, webhook_receiver::WebhookReceiver,
|
||||
},
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
topology::{K8sAnywhereTopology, monitoring::AlertRoute},
|
||||
};
|
||||
use harmony_macros::hurl;
|
||||
use harmony_types::k8s_name::K8sName;
|
||||
use harmony_types::{k8s_name::K8sName, net::Url};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
@@ -26,10 +26,13 @@ async fn main() {
|
||||
service_port: 3000,
|
||||
});
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
url: hurl!("https://discord.doesnt.exist.com"),
|
||||
selectors: vec![],
|
||||
let receiver_name = "test-discord".to_string();
|
||||
let discord_receiver = DiscordReceiver {
|
||||
name: receiver_name.clone(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
route: AlertRoute {
|
||||
..AlertRoute::default(receiver_name)
|
||||
},
|
||||
};
|
||||
|
||||
let webhook_receiver = WebhookReceiver {
|
||||
@@ -42,10 +45,10 @@ async fn main() {
|
||||
Box::new(PackagingDeployment {
|
||||
application: application.clone(),
|
||||
}),
|
||||
Box::new(Monitoring {
|
||||
application: application.clone(),
|
||||
alert_receiver: vec![Box::new(discord_receiver), Box::new(webhook_receiver)],
|
||||
}),
|
||||
// Box::new(Monitoring {
|
||||
// application: application.clone(),
|
||||
// alert_receiver: vec![Box::new(discord_receiver), Box::new(webhook_receiver)],
|
||||
// }),
|
||||
// TODO add backups, multisite ha, etc
|
||||
],
|
||||
application,
|
||||
|
||||
@@ -5,6 +5,10 @@ version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[[example]]
|
||||
name = "try_rust_webapp"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
application::{
|
||||
ApplicationScore, RustWebFramework, RustWebapp,
|
||||
features::{Monitoring, PackagingDeployment},
|
||||
},
|
||||
monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
modules::application::{
|
||||
ApplicationScore, RustWebFramework, RustWebapp,
|
||||
features::{Monitoring, PackagingDeployment},
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
@@ -30,14 +27,14 @@ async fn main() {
|
||||
Box::new(PackagingDeployment {
|
||||
application: application.clone(),
|
||||
}),
|
||||
Box::new(Monitoring {
|
||||
application: application.clone(),
|
||||
alert_receiver: vec![Box::new(DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
url: hurl!("https://discord.doesnt.exist.com"),
|
||||
selectors: vec![],
|
||||
})],
|
||||
}),
|
||||
// Box::new(Monitoring {
|
||||
// application: application.clone(),
|
||||
// alert_receiver: vec![Box::new(DiscordWebhook {
|
||||
// name: K8sName("test-discord".to_string()),
|
||||
// url: hurl!("https://discord.doesnt.exist.com"),
|
||||
// selectors: vec![],
|
||||
// })],
|
||||
// }),
|
||||
],
|
||||
application,
|
||||
};
|
||||
|
||||
@@ -108,11 +108,18 @@ impl PhysicalHost {
|
||||
};
|
||||
|
||||
let storage_summary = if drive_count > 1 {
|
||||
let drive_sizes = self
|
||||
.storage
|
||||
.iter()
|
||||
.map(|d| format_storage(d.size_bytes))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
format!(
|
||||
"{} Storage ({}x {})",
|
||||
"{} Storage ({} Disks [{}])",
|
||||
format_storage(total_storage_bytes),
|
||||
drive_count,
|
||||
first_drive_model
|
||||
drive_sizes
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use async_trait::async_trait;
|
||||
use brocade::PortOperatingMode;
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{
|
||||
id::Id,
|
||||
@@ -9,9 +8,9 @@ use harmony_types::{
|
||||
use log::debug;
|
||||
use log::info;
|
||||
|
||||
use crate::topology::PxeOptions;
|
||||
use crate::{data::FileContent, executors::ExecutorError, topology::node_exporter::NodeExporter};
|
||||
use crate::{infra::network_manager::OpenShiftNmStateNetworkManager, topology::PortConfig};
|
||||
use crate::{modules::inventory::HarmonyDiscoveryStrategy, topology::PxeOptions};
|
||||
|
||||
use super::{
|
||||
DHCPStaticEntry, DhcpServer, DnsRecord, DnsRecordType, DnsServer, Firewall, HostNetworkConfig,
|
||||
@@ -301,10 +300,10 @@ impl Switch for HAClusterTopology {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
async fn clear_port_channel(&self, _ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
async fn configure_interface(&self, _ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -322,7 +321,15 @@ impl NetworkManager for HAClusterTopology {
|
||||
self.network_manager().await.configure_bond(config).await
|
||||
}
|
||||
|
||||
//TODO add snmp here
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
self.network_manager()
|
||||
.await
|
||||
.configure_bond_on_primary_interface(config)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -562,10 +569,10 @@ impl SwitchClient for DummyInfra {
|
||||
) -> Result<u8, SwitchError> {
|
||||
unimplemented!("{}", UNIMPLEMENTED_DUMMY_INFRA)
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
async fn clear_port_channel(&self, _ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
async fn configure_interface(&self, _ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
133
harmony/src/domain/topology/k8s/bundle.rs
Normal file
133
harmony/src/domain/topology/k8s/bundle.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
//! Resource Bundle Pattern Implementation
|
||||
//!
|
||||
//! This module implements the Resource Bundle pattern for managing groups of
|
||||
//! Kubernetes resources that form a logical unit of work.
|
||||
//!
|
||||
//! ## Purpose
|
||||
//!
|
||||
//! The ResourceBundle pattern addresses the need to manage ephemeral privileged
|
||||
//! pods along with their platform-specific security requirements (e.g., OpenShift
|
||||
//! Security Context Constraints).
|
||||
//!
|
||||
//! ## Use Cases
|
||||
//!
|
||||
//! - Writing files to node filesystems (e.g., NetworkManager configurations for
|
||||
//! network bonding as described in ADR-019)
|
||||
//! - Running privileged commands on nodes (e.g., reboots, system configuration)
|
||||
//!
|
||||
//! ## Benefits
|
||||
//!
|
||||
//! - **Separation of Concerns**: Client code doesn't need to know about
|
||||
//! platform-specific RBAC requirements
|
||||
//! - **Atomic Operations**: Resources are applied and deleted as a unit
|
||||
//! - **Clean Abstractions**: Privileged operations are encapsulated in bundles
|
||||
//! rather than scattered throughout client methods
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```rust,no_run
|
||||
//! use harmony::topology::k8s::{K8sClient, helper};
|
||||
//! use harmony::topology::KubernetesDistribution;
|
||||
//!
|
||||
//! async fn write_network_config(client: &K8sClient, node: &str) {
|
||||
//! // Create a bundle with platform-specific RBAC
|
||||
//! let bundle = helper::build_privileged_bundle(
|
||||
//! helper::PrivilegedPodConfig {
|
||||
//! name: "network-config".to_string(),
|
||||
//! namespace: "default".to_string(),
|
||||
//! node_name: node.to_string(),
|
||||
//! // ... other config
|
||||
//! ..Default::default()
|
||||
//! },
|
||||
//! &KubernetesDistribution::OpenshiftFamily,
|
||||
//! );
|
||||
//!
|
||||
//! // Apply all resources (RBAC + Pod) atomically
|
||||
//! bundle.apply(client).await.unwrap();
|
||||
//!
|
||||
//! // ... wait for completion ...
|
||||
//!
|
||||
//! // Cleanup all resources
|
||||
//! bundle.delete(client).await.unwrap();
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use kube::{Error, Resource, ResourceExt, api::DynamicObject};
|
||||
use serde::Serialize;
|
||||
use serde_json;
|
||||
|
||||
use crate::domain::topology::k8s::K8sClient;
|
||||
|
||||
/// A ResourceBundle represents a logical unit of work consisting of multiple
|
||||
/// Kubernetes resources that should be applied or deleted together.
|
||||
///
|
||||
/// This pattern is useful for managing ephemeral privileged pods along with
|
||||
/// their required RBAC bindings (e.g., OpenShift SCC bindings).
|
||||
#[derive(Debug)]
|
||||
pub struct ResourceBundle {
|
||||
pub resources: Vec<DynamicObject>,
|
||||
}
|
||||
|
||||
impl ResourceBundle {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
resources: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a Kubernetes resource to this bundle.
|
||||
/// The resource is converted to a DynamicObject for generic handling.
|
||||
pub fn add<K>(&mut self, resource: K)
|
||||
where
|
||||
K: Resource + Serialize,
|
||||
<K as Resource>::DynamicType: Default,
|
||||
{
|
||||
// Convert the typed resource to JSON, then to DynamicObject
|
||||
let json = serde_json::to_value(&resource).expect("Failed to serialize resource");
|
||||
let mut obj: DynamicObject =
|
||||
serde_json::from_value(json).expect("Failed to convert to DynamicObject");
|
||||
|
||||
// Ensure type metadata is set
|
||||
if obj.types.is_none() {
|
||||
let api_version = Default::default();
|
||||
let kind = Default::default();
|
||||
let gvk = K::api_version(&api_version);
|
||||
let kind = K::kind(&kind);
|
||||
obj.types = Some(kube::api::TypeMeta {
|
||||
api_version: gvk.to_string(),
|
||||
kind: kind.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
self.resources.push(obj);
|
||||
}
|
||||
|
||||
/// Apply all resources in this bundle to the cluster.
|
||||
/// Resources are applied in the order they were added.
|
||||
pub async fn apply(&self, client: &K8sClient) -> Result<(), Error> {
|
||||
for res in &self.resources {
|
||||
let namespace = res.namespace();
|
||||
client
|
||||
.apply_dynamic(res, namespace.as_deref(), true)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete all resources in this bundle from the cluster.
|
||||
/// Resources are deleted in reverse order to respect dependencies.
|
||||
pub async fn delete(&self, client: &K8sClient) -> Result<(), Error> {
|
||||
// FIXME delete all in parallel and retry using kube::client::retry::RetryPolicy
|
||||
for res in self.resources.iter().rev() {
|
||||
let api = client.get_api_for_dynamic_object(res, res.namespace().as_deref())?;
|
||||
let name = res.name_any();
|
||||
// FIXME this swallows all errors. Swallowing a 404 is ok but other errors must be
|
||||
// handled properly (such as retrying). A normal error case is when we delete a
|
||||
// resource bundle with dependencies between various resources. Such as a pod with a
|
||||
// dependency on a ClusterRoleBinding. Trying to delete the ClusterRoleBinding first
|
||||
// is expected to fail
|
||||
let _ = api.delete(&name, &kube::api::DeleteParams::default()).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
1
harmony/src/domain/topology/k8s/config.rs
Normal file
1
harmony/src/domain/topology/k8s/config.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub const PRIVILEGED_POD_IMAGE: &str = "hub.nationtech.io/redhat/ubi10:latest";
|
||||
601
harmony/src/domain/topology/k8s/helper.rs
Normal file
601
harmony/src/domain/topology/k8s/helper.rs
Normal file
@@ -0,0 +1,601 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::topology::KubernetesDistribution;
|
||||
|
||||
use super::bundle::ResourceBundle;
|
||||
use super::config::PRIVILEGED_POD_IMAGE;
|
||||
use k8s_openapi::api::core::v1::{
|
||||
Container, HostPathVolumeSource, Pod, PodSpec, SecurityContext, Volume, VolumeMount,
|
||||
};
|
||||
use k8s_openapi::api::rbac::v1::{ClusterRoleBinding, RoleRef, Subject};
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
|
||||
use kube::error::DiscoveryError;
|
||||
use log::{debug, error, info, warn};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PrivilegedPodConfig {
|
||||
pub name: String,
|
||||
pub namespace: String,
|
||||
pub node_name: String,
|
||||
pub container_name: String,
|
||||
pub command: Vec<String>,
|
||||
pub volumes: Vec<Volume>,
|
||||
pub volume_mounts: Vec<VolumeMount>,
|
||||
pub host_pid: bool,
|
||||
pub host_network: bool,
|
||||
}
|
||||
|
||||
impl Default for PrivilegedPodConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: "privileged-pod".to_string(),
|
||||
namespace: "harmony".to_string(),
|
||||
node_name: "".to_string(),
|
||||
container_name: "privileged-container".to_string(),
|
||||
command: vec![],
|
||||
volumes: vec![],
|
||||
volume_mounts: vec![],
|
||||
host_pid: false,
|
||||
host_network: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_privileged_pod(
|
||||
config: PrivilegedPodConfig,
|
||||
k8s_distribution: &KubernetesDistribution,
|
||||
) -> Pod {
|
||||
let annotations = match k8s_distribution {
|
||||
KubernetesDistribution::OpenshiftFamily => Some(BTreeMap::from([
|
||||
("openshift.io/scc".to_string(), "privileged".to_string()),
|
||||
(
|
||||
"openshift.io/required-scc".to_string(),
|
||||
"privileged".to_string(),
|
||||
),
|
||||
])),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Pod {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(config.name),
|
||||
namespace: Some(config.namespace),
|
||||
annotations,
|
||||
..Default::default()
|
||||
},
|
||||
spec: Some(PodSpec {
|
||||
node_name: Some(config.node_name),
|
||||
restart_policy: Some("Never".to_string()),
|
||||
host_pid: Some(config.host_pid),
|
||||
host_network: Some(config.host_network),
|
||||
containers: vec![Container {
|
||||
name: config.container_name,
|
||||
image: Some(PRIVILEGED_POD_IMAGE.to_string()),
|
||||
command: Some(config.command),
|
||||
security_context: Some(SecurityContext {
|
||||
privileged: Some(true),
|
||||
..Default::default()
|
||||
}),
|
||||
volume_mounts: Some(config.volume_mounts),
|
||||
..Default::default()
|
||||
}],
|
||||
volumes: Some(config.volumes),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn host_root_volume() -> (Volume, VolumeMount) {
|
||||
(
|
||||
Volume {
|
||||
name: "host".to_string(),
|
||||
host_path: Some(HostPathVolumeSource {
|
||||
path: "/".to_string(),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
VolumeMount {
|
||||
name: "host".to_string(),
|
||||
mount_path: "/host".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Build a ResourceBundle containing a privileged pod and any required RBAC.
|
||||
///
|
||||
/// This function implements the Resource Bundle pattern to encapsulate platform-specific
|
||||
/// security requirements for running privileged operations on nodes.
|
||||
///
|
||||
/// # Platform-Specific Behavior
|
||||
///
|
||||
/// - **OpenShift**: Creates a ClusterRoleBinding to grant the default ServiceAccount
|
||||
/// access to the `system:openshift:scc:privileged` ClusterRole, which allows the pod
|
||||
/// to use the privileged Security Context Constraint (SCC).
|
||||
/// - **Standard Kubernetes/K3s**: Only creates the Pod resource, as these distributions
|
||||
/// use standard PodSecurityPolicy or don't enforce additional security constraints.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `config` - Configuration for the privileged pod (name, namespace, command, etc.)
|
||||
/// * `k8s_distribution` - The detected Kubernetes distribution to determine RBAC requirements
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A `ResourceBundle` containing 1-2 resources:
|
||||
/// - ClusterRoleBinding (OpenShift only)
|
||||
/// - Pod (all distributions)
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust,no_run
|
||||
/// # use harmony::topology::k8s::helper::{build_privileged_bundle, PrivilegedPodConfig};
|
||||
/// # use harmony::topology::KubernetesDistribution;
|
||||
/// let bundle = build_privileged_bundle(
|
||||
/// PrivilegedPodConfig {
|
||||
/// name: "network-setup".to_string(),
|
||||
/// namespace: "default".to_string(),
|
||||
/// node_name: "worker-01".to_string(),
|
||||
/// container_name: "setup".to_string(),
|
||||
/// command: vec!["nmcli".to_string(), "connection".to_string(), "reload".to_string()],
|
||||
/// ..Default::default()
|
||||
/// },
|
||||
/// &KubernetesDistribution::OpenshiftFamily,
|
||||
/// );
|
||||
/// // Bundle now contains ClusterRoleBinding + Pod
|
||||
/// ```
|
||||
pub fn build_privileged_bundle(
|
||||
config: PrivilegedPodConfig,
|
||||
k8s_distribution: &KubernetesDistribution,
|
||||
) -> ResourceBundle {
|
||||
debug!(
|
||||
"Building privileged bundle for config {config:#?} on distribution {k8s_distribution:?}"
|
||||
);
|
||||
let mut bundle = ResourceBundle::new();
|
||||
let pod_name = config.name.clone();
|
||||
let namespace = config.namespace.clone();
|
||||
|
||||
// 1. On OpenShift, create RBAC binding to privileged SCC
|
||||
if let KubernetesDistribution::OpenshiftFamily = k8s_distribution {
|
||||
// The default ServiceAccount needs to be bound to the privileged SCC
|
||||
// via the system:openshift:scc:privileged ClusterRole
|
||||
let crb = ClusterRoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("{}-scc-binding", pod_name)),
|
||||
..Default::default()
|
||||
},
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".to_string(),
|
||||
kind: "ClusterRole".to_string(),
|
||||
name: "system:openshift:scc:privileged".to_string(),
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".to_string(),
|
||||
name: "default".to_string(),
|
||||
namespace: Some(namespace.clone()),
|
||||
api_group: None,
|
||||
..Default::default()
|
||||
}]),
|
||||
};
|
||||
bundle.add(crb);
|
||||
}
|
||||
|
||||
// 2. Build the privileged pod
|
||||
let pod = build_privileged_pod(config, k8s_distribution);
|
||||
bundle.add(pod);
|
||||
|
||||
bundle
|
||||
}
|
||||
|
||||
/// Action to take when a drain operation times out.
|
||||
pub enum DrainTimeoutAction {
|
||||
/// Accept the partial drain and continue
|
||||
Accept,
|
||||
/// Retry the drain for another timeout period
|
||||
Retry,
|
||||
/// Abort the drain operation
|
||||
Abort,
|
||||
}
|
||||
|
||||
/// Prompts the user to confirm acceptance of a partial drain.
|
||||
///
|
||||
/// Returns `Ok(true)` if the user confirms acceptance, `Ok(false)` if the user
|
||||
/// chooses to retry or abort, and `Err` if the prompt system fails entirely.
|
||||
pub fn prompt_drain_timeout_action(
|
||||
node_name: &str,
|
||||
pending_count: usize,
|
||||
timeout_duration: Duration,
|
||||
) -> Result<DrainTimeoutAction, kube::Error> {
|
||||
let prompt_msg = format!(
|
||||
"Drain operation timed out on node '{}' with {} pod(s) remaining. What would you like to do?",
|
||||
node_name, pending_count
|
||||
);
|
||||
|
||||
loop {
|
||||
let choices = vec![
|
||||
"Accept drain failure (requires confirmation)".to_string(),
|
||||
format!("Retry drain for another {:?}", timeout_duration),
|
||||
"Abort operation".to_string(),
|
||||
];
|
||||
|
||||
let selection = inquire::Select::new(&prompt_msg, choices)
|
||||
.with_help_message("Use arrow keys to navigate, Enter to select")
|
||||
.prompt()
|
||||
.map_err(|e| {
|
||||
kube::Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"Prompt failed: {}",
|
||||
e
|
||||
)))
|
||||
})?;
|
||||
|
||||
if selection.starts_with("Accept") {
|
||||
// Require typed confirmation - retry until correct or user cancels
|
||||
let required_confirmation = format!("yes-accept-drain:{}={}", node_name, pending_count);
|
||||
|
||||
let confirmation_prompt = format!(
|
||||
"To accept this partial drain, type exactly: {}",
|
||||
required_confirmation
|
||||
);
|
||||
|
||||
match inquire::Text::new(&confirmation_prompt)
|
||||
.with_help_message(&format!(
|
||||
"This action acknowledges {} pods will remain on the node",
|
||||
pending_count
|
||||
))
|
||||
.prompt()
|
||||
{
|
||||
Ok(input) if input == required_confirmation => {
|
||||
warn!(
|
||||
"User accepted partial drain of node '{}' with {} pods remaining (confirmation: {})",
|
||||
node_name, pending_count, required_confirmation
|
||||
);
|
||||
return Ok(DrainTimeoutAction::Accept);
|
||||
}
|
||||
Ok(input) => {
|
||||
warn!(
|
||||
"Confirmation failed. Expected '{}', got '{}'. Please try again.",
|
||||
required_confirmation, input
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
// User cancelled (Ctrl+C) or prompt system failed
|
||||
error!("Confirmation prompt cancelled or failed: {}", e);
|
||||
return Ok(DrainTimeoutAction::Abort);
|
||||
}
|
||||
}
|
||||
} else if selection.starts_with("Retry") {
|
||||
info!(
|
||||
"User chose to retry drain operation for another {:?}",
|
||||
timeout_duration
|
||||
);
|
||||
return Ok(DrainTimeoutAction::Retry);
|
||||
} else {
|
||||
error!("Drain operation aborted by user");
|
||||
return Ok(DrainTimeoutAction::Abort);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_host_root_volume() {
|
||||
let (volume, mount) = host_root_volume();
|
||||
|
||||
assert_eq!(volume.name, "host");
|
||||
assert_eq!(volume.host_path.as_ref().unwrap().path, "/");
|
||||
|
||||
assert_eq!(mount.name, "host");
|
||||
assert_eq!(mount.mount_path, "/host");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_minimal() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "minimal-pod".to_string(),
|
||||
namespace: "kube-system".to_string(),
|
||||
node_name: "node-123".to_string(),
|
||||
container_name: "debug-container".to_string(),
|
||||
command: vec!["sleep".to_string(), "3600".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
assert_eq!(pod.metadata.name, Some("minimal-pod".to_string()));
|
||||
assert_eq!(pod.metadata.namespace, Some("kube-system".to_string()));
|
||||
|
||||
let spec = pod.spec.as_ref().expect("Pod spec should be present");
|
||||
assert_eq!(spec.node_name, Some("node-123".to_string()));
|
||||
assert_eq!(spec.restart_policy, Some("Never".to_string()));
|
||||
assert_eq!(spec.host_pid, Some(false));
|
||||
assert_eq!(spec.host_network, Some(false));
|
||||
|
||||
assert_eq!(spec.containers.len(), 1);
|
||||
let container = &spec.containers[0];
|
||||
assert_eq!(container.name, "debug-container");
|
||||
assert_eq!(container.image, Some(PRIVILEGED_POD_IMAGE.to_string()));
|
||||
assert_eq!(
|
||||
container.command,
|
||||
Some(vec!["sleep".to_string(), "3600".to_string()])
|
||||
);
|
||||
|
||||
// Security context check
|
||||
let sec_ctx = container
|
||||
.security_context
|
||||
.as_ref()
|
||||
.expect("Security context missing");
|
||||
assert_eq!(sec_ctx.privileged, Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_with_volumes_and_host_access() {
|
||||
let (host_vol, host_mount) = host_root_volume();
|
||||
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "full-pod".to_string(),
|
||||
namespace: "default".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "runner".to_string(),
|
||||
command: vec!["/bin/sh".to_string()],
|
||||
volumes: vec![host_vol.clone()],
|
||||
volume_mounts: vec![host_mount.clone()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
let spec = pod.spec.as_ref().expect("Pod spec should be present");
|
||||
assert_eq!(spec.host_pid, Some(true));
|
||||
assert_eq!(spec.host_network, Some(true));
|
||||
|
||||
// Check volumes in Spec
|
||||
let volumes = spec.volumes.as_ref().expect("Volumes should be present");
|
||||
assert_eq!(volumes.len(), 1);
|
||||
assert_eq!(volumes[0].name, "host");
|
||||
|
||||
// Check mounts in Container
|
||||
let container = &spec.containers[0];
|
||||
let mounts = container
|
||||
.volume_mounts
|
||||
.as_ref()
|
||||
.expect("Mounts should be present");
|
||||
assert_eq!(mounts.len(), 1);
|
||||
assert_eq!(mounts[0].name, "host");
|
||||
assert_eq!(mounts[0].mount_path, "/host");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_pod_structure_correctness() {
|
||||
// This test validates that the construction logic puts things in the right places
|
||||
// effectively validating the "template".
|
||||
|
||||
let custom_vol = Volume {
|
||||
name: "custom-vol".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
let custom_mount = VolumeMount {
|
||||
name: "custom-vol".to_string(),
|
||||
mount_path: "/custom".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "structure-test".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "test-node".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["cmd".to_string()],
|
||||
volumes: vec![custom_vol],
|
||||
volume_mounts: vec![custom_mount],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
// Validate structure depth
|
||||
let spec = pod.spec.as_ref().unwrap();
|
||||
|
||||
// 1. Spec level fields
|
||||
assert!(spec.node_name.is_some());
|
||||
assert!(spec.volumes.is_some());
|
||||
|
||||
// 2. Container level fields
|
||||
let container = &spec.containers[0];
|
||||
assert!(container.security_context.is_some());
|
||||
assert!(container.volume_mounts.is_some());
|
||||
|
||||
// 3. Nested fields
|
||||
assert!(
|
||||
container
|
||||
.security_context
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.privileged
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(spec.volumes.as_ref().unwrap()[0].name, "custom-vol");
|
||||
assert_eq!(
|
||||
container.volume_mounts.as_ref().unwrap()[0].mount_path,
|
||||
"/custom"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_default_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
// For Default distribution, only the Pod should be in the bundle
|
||||
assert_eq!(bundle.resources.len(), 1);
|
||||
|
||||
let pod_obj = &bundle.resources[0];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle"));
|
||||
assert_eq!(pod_obj.metadata.namespace.as_deref(), Some("test-ns"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_openshift_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle-ocp".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::OpenshiftFamily,
|
||||
);
|
||||
|
||||
// For OpenShift, both ClusterRoleBinding and Pod should be in the bundle
|
||||
assert_eq!(bundle.resources.len(), 2);
|
||||
|
||||
// First resource should be the ClusterRoleBinding
|
||||
let crb_obj = &bundle.resources[0];
|
||||
assert_eq!(
|
||||
crb_obj.metadata.name.as_deref(),
|
||||
Some("test-bundle-ocp-scc-binding")
|
||||
);
|
||||
|
||||
// Verify it's targeting the privileged SCC
|
||||
if let Some(role_ref) = crb_obj.data.get("roleRef") {
|
||||
assert_eq!(
|
||||
role_ref.get("name").and_then(|v| v.as_str()),
|
||||
Some("system:openshift:scc:privileged")
|
||||
);
|
||||
}
|
||||
|
||||
// Second resource should be the Pod
|
||||
let pod_obj = &bundle.resources[1];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle-ocp"));
|
||||
assert_eq!(pod_obj.metadata.namespace.as_deref(), Some("test-ns"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_privileged_bundle_k3s_distribution() {
|
||||
let bundle = build_privileged_bundle(
|
||||
PrivilegedPodConfig {
|
||||
name: "test-bundle-k3s".to_string(),
|
||||
namespace: "test-ns".to_string(),
|
||||
node_name: "node-1".to_string(),
|
||||
container_name: "test-container".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::K3sFamily,
|
||||
);
|
||||
|
||||
// For K3s, only the Pod should be in the bundle (no special SCC)
|
||||
assert_eq!(bundle.resources.len(), 1);
|
||||
|
||||
let pod_obj = &bundle.resources[0];
|
||||
assert_eq!(pod_obj.metadata.name.as_deref(), Some("test-bundle-k3s"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_yaml_rendering_expected() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "pod_name".to_string(),
|
||||
namespace: "pod_namespace".to_string(),
|
||||
node_name: "node name".to_string(),
|
||||
container_name: "container name".to_string(),
|
||||
command: vec!["command".to_string(), "argument".to_string()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::Default,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&serde_yaml::to_string(&pod).unwrap(),
|
||||
"apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: pod_name
|
||||
namespace: pod_namespace
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- command
|
||||
- argument
|
||||
image: hub.nationtech.io/redhat/ubi10:latest
|
||||
name: container name
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts: []
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeName: node name
|
||||
restartPolicy: Never
|
||||
volumes: []
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_yaml_rendering_openshift() {
|
||||
let pod = build_privileged_pod(
|
||||
PrivilegedPodConfig {
|
||||
name: "pod_name".to_string(),
|
||||
namespace: "pod_namespace".to_string(),
|
||||
node_name: "node name".to_string(),
|
||||
container_name: "container name".to_string(),
|
||||
command: vec!["command".to_string(), "argument".to_string()],
|
||||
host_pid: true,
|
||||
host_network: true,
|
||||
..Default::default()
|
||||
},
|
||||
&KubernetesDistribution::OpenshiftFamily,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&serde_yaml::to_string(&pod).unwrap(),
|
||||
"apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
annotations:
|
||||
openshift.io/required-scc: privileged
|
||||
openshift.io/scc: privileged
|
||||
name: pod_name
|
||||
namespace: pod_namespace
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- command
|
||||
- argument
|
||||
image: hub.nationtech.io/redhat/ubi10:latest
|
||||
name: container name
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts: []
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeName: node name
|
||||
restartPolicy: Never
|
||||
volumes: []
|
||||
"
|
||||
);
|
||||
}
|
||||
}
|
||||
2586
harmony/src/domain/topology/k8s/mod.rs
Normal file
2586
harmony/src/domain/topology/k8s/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,19 +1,12 @@
|
||||
use std::{collections::BTreeMap, process::Command, sync::Arc, time::Duration};
|
||||
use std::{collections::BTreeMap, process::Command, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine, engine::general_purpose};
|
||||
use harmony_types::rfc1123::Rfc1123Name;
|
||||
use k8s_openapi::{
|
||||
ByteString,
|
||||
api::{
|
||||
core::v1::{Pod, Secret},
|
||||
rbac::v1::{ClusterRoleBinding, RoleRef, Subject},
|
||||
},
|
||||
};
|
||||
use kube::{
|
||||
api::{DynamicObject, GroupVersionKind, ObjectMeta},
|
||||
runtime::conditions,
|
||||
use k8s_openapi::api::{
|
||||
core::v1::{Pod, Secret},
|
||||
rbac::v1::{ClusterRoleBinding, RoleRef, Subject},
|
||||
};
|
||||
use kube::api::{GroupVersionKind, ObjectMeta};
|
||||
use log::{debug, info, trace, warn};
|
||||
use serde::Serialize;
|
||||
use tokio::sync::OnceCell;
|
||||
@@ -34,32 +27,7 @@ use crate::{
|
||||
score_cert_management::CertificateManagementScore,
|
||||
},
|
||||
k3d::K3DInstallationScore,
|
||||
k8s::{
|
||||
ingress::{K8sIngressScore, PathType},
|
||||
resource::K8sResourceScore,
|
||||
},
|
||||
monitoring::{
|
||||
grafana::{grafana::Grafana, helm::helm_grafana::grafana_helm_chart_score},
|
||||
kube_prometheus::crd::{
|
||||
crd_alertmanager_config::CRDPrometheus,
|
||||
crd_grafana::{
|
||||
Grafana as GrafanaCRD, GrafanaCom, GrafanaDashboard,
|
||||
GrafanaDashboardDatasource, GrafanaDashboardSpec, GrafanaDatasource,
|
||||
GrafanaDatasourceConfig, GrafanaDatasourceJsonData,
|
||||
GrafanaDatasourceSecureJsonData, GrafanaDatasourceSpec, GrafanaSpec,
|
||||
},
|
||||
crd_prometheuses::LabelSelector,
|
||||
prometheus_operator::prometheus_operator_helm_chart_score,
|
||||
rhob_alertmanager_config::RHOBObservability,
|
||||
service_monitor::ServiceMonitor,
|
||||
},
|
||||
},
|
||||
nats::capability::NatsCluster,
|
||||
okd::{crd::ingresses_config::Ingress as IngressResource, route::OKDTlsPassthroughScore},
|
||||
prometheus::{
|
||||
k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore,
|
||||
prometheus::PrometheusMonitoring, rhob_alerting_score::RHOBAlertingScore,
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{TlsRoute, TlsRouter, ingress::Ingress},
|
||||
@@ -69,7 +37,6 @@ use super::super::{
|
||||
DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, PreparationError,
|
||||
PreparationOutcome, Topology,
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::AlertReceiver,
|
||||
tenant::{
|
||||
TenantConfig, TenantManager,
|
||||
k8s::K8sTenantManager,
|
||||
@@ -103,7 +70,6 @@ enum K8sSource {
|
||||
pub struct K8sAnywhereTopology {
|
||||
k8s_state: Arc<OnceCell<Option<K8sState>>>,
|
||||
tenant_manager: Arc<OnceCell<K8sTenantManager>>,
|
||||
k8s_distribution: Arc<OnceCell<KubernetesDistribution>>,
|
||||
config: Arc<K8sAnywhereConfig>,
|
||||
}
|
||||
|
||||
@@ -184,216 +150,6 @@ impl TlsRouter for K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Grafana for K8sAnywhereTopology {
|
||||
async fn ensure_grafana_operator(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
debug!("ensure grafana operator");
|
||||
let client = self.k8s_client().await.unwrap();
|
||||
let grafana_gvk = GroupVersionKind {
|
||||
group: "grafana.integreatly.org".to_string(),
|
||||
version: "v1beta1".to_string(),
|
||||
kind: "Grafana".to_string(),
|
||||
};
|
||||
let name = "grafanas.grafana.integreatly.org";
|
||||
let ns = "grafana";
|
||||
|
||||
let grafana_crd = client
|
||||
.get_resource_json_value(name, Some(ns), &grafana_gvk)
|
||||
.await;
|
||||
match grafana_crd {
|
||||
Ok(_) => {
|
||||
return Ok(PreparationOutcome::Success {
|
||||
details: "Found grafana CRDs in cluster".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Err(_) => {
|
||||
return self
|
||||
.install_grafana_operator(inventory, Some("grafana"))
|
||||
.await;
|
||||
}
|
||||
};
|
||||
}
|
||||
async fn install_grafana(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
let ns = "grafana";
|
||||
|
||||
let mut label = BTreeMap::new();
|
||||
|
||||
label.insert("dashboards".to_string(), "grafana".to_string());
|
||||
|
||||
let label_selector = LabelSelector {
|
||||
match_labels: label.clone(),
|
||||
match_expressions: vec![],
|
||||
};
|
||||
|
||||
let client = self.k8s_client().await?;
|
||||
|
||||
let grafana = self.build_grafana(ns, &label);
|
||||
|
||||
client.apply(&grafana, Some(ns)).await?;
|
||||
//TODO change this to a ensure ready or something better than just a timeout
|
||||
client
|
||||
.wait_until_deployment_ready(
|
||||
"grafana-grafana-deployment",
|
||||
Some("grafana"),
|
||||
Some(Duration::from_secs(30)),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let sa_name = "grafana-grafana-sa";
|
||||
let token_secret_name = "grafana-sa-token-secret";
|
||||
|
||||
let sa_token_secret = self.build_sa_token_secret(token_secret_name, sa_name, ns);
|
||||
|
||||
client.apply(&sa_token_secret, Some(ns)).await?;
|
||||
let secret_gvk = GroupVersionKind {
|
||||
group: "".to_string(),
|
||||
version: "v1".to_string(),
|
||||
kind: "Secret".to_string(),
|
||||
};
|
||||
|
||||
let secret = client
|
||||
.get_resource_json_value(token_secret_name, Some(ns), &secret_gvk)
|
||||
.await?;
|
||||
|
||||
let token = format!(
|
||||
"Bearer {}",
|
||||
self.extract_and_normalize_token(&secret).unwrap()
|
||||
);
|
||||
|
||||
debug!("creating grafana clusterrole binding");
|
||||
|
||||
let clusterrolebinding =
|
||||
self.build_cluster_rolebinding(sa_name, "cluster-monitoring-view", ns);
|
||||
|
||||
client.apply(&clusterrolebinding, Some(ns)).await?;
|
||||
|
||||
debug!("creating grafana datasource crd");
|
||||
|
||||
let thanos_url = format!(
|
||||
"https://{}",
|
||||
self.get_domain("thanos-querier-openshift-monitoring")
|
||||
.await
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let thanos_openshift_datasource = self.build_grafana_datasource(
|
||||
"thanos-openshift-monitoring",
|
||||
ns,
|
||||
&label_selector,
|
||||
&thanos_url,
|
||||
&token,
|
||||
);
|
||||
|
||||
client.apply(&thanos_openshift_datasource, Some(ns)).await?;
|
||||
|
||||
debug!("creating grafana dashboard crd");
|
||||
let dashboard = self.build_grafana_dashboard(ns, &label_selector);
|
||||
|
||||
client.apply(&dashboard, Some(ns)).await?;
|
||||
debug!("creating grafana ingress");
|
||||
let grafana_ingress = self.build_grafana_ingress(ns).await;
|
||||
|
||||
grafana_ingress
|
||||
.interpret(&Inventory::empty(), self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Installed grafana composants".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusMonitoring<CRDPrometheus> for K8sAnywhereTopology {
|
||||
async fn install_prometheus(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
_inventory: &Inventory,
|
||||
_receivers: Option<Vec<Box<dyn AlertReceiver<CRDPrometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let client = self.k8s_client().await?;
|
||||
|
||||
for monitor in sender.service_monitor.iter() {
|
||||
client
|
||||
.apply(monitor, Some(&sender.namespace))
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "successfuly installed prometheus components".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_prometheus_operator(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
_inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let po_result = self.ensure_prometheus_operator(sender).await?;
|
||||
|
||||
match po_result {
|
||||
PreparationOutcome::Success { details: _ } => {
|
||||
debug!("Detected prometheus crds operator present in cluster.");
|
||||
return Ok(po_result);
|
||||
}
|
||||
PreparationOutcome::Noop => {
|
||||
debug!("Skipping Prometheus CR installation due to missing operator.");
|
||||
return Ok(po_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusMonitoring<RHOBObservability> for K8sAnywhereTopology {
|
||||
async fn install_prometheus(
|
||||
&self,
|
||||
sender: &RHOBObservability,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<RHOBObservability>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let po_result = self.ensure_cluster_observability_operator(sender).await?;
|
||||
|
||||
if po_result == PreparationOutcome::Noop {
|
||||
debug!("Skipping Prometheus CR installation due to missing operator.");
|
||||
return Ok(po_result);
|
||||
}
|
||||
|
||||
let result = self
|
||||
.get_cluster_observability_operator_prometheus_application_score(
|
||||
sender.clone(),
|
||||
receivers,
|
||||
)
|
||||
.await
|
||||
.interpret(inventory, self)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: outcome.message,
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(outcome.message)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_prometheus_operator(
|
||||
&self,
|
||||
sender: &RHOBObservability,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for K8sAnywhereTopology {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
@@ -554,7 +310,6 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
k8s_distribution: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(K8sAnywhereConfig::from_env()),
|
||||
}
|
||||
}
|
||||
@@ -563,7 +318,6 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
k8s_distribution: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(config),
|
||||
}
|
||||
}
|
||||
@@ -600,56 +354,14 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_k8s_distribution(&self) -> Result<&KubernetesDistribution, PreparationError> {
|
||||
self.k8s_distribution
|
||||
.get_or_try_init(async || {
|
||||
debug!("Trying to detect k8s distribution");
|
||||
let client = self.k8s_client().await.unwrap();
|
||||
|
||||
let discovery = client.discovery().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not discover API groups: {}", e))
|
||||
})?;
|
||||
|
||||
let version = client.get_apiserver_version().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not get server version: {}", e))
|
||||
})?;
|
||||
|
||||
// OpenShift / OKD
|
||||
if discovery
|
||||
.groups()
|
||||
.any(|g| g.name() == "project.openshift.io")
|
||||
{
|
||||
info!("Found KubernetesDistribution OpenshiftFamily");
|
||||
return Ok(KubernetesDistribution::OpenshiftFamily);
|
||||
}
|
||||
|
||||
// K3d / K3s
|
||||
if version.git_version.contains("k3s") {
|
||||
info!("Found KubernetesDistribution K3sFamily");
|
||||
return Ok(KubernetesDistribution::K3sFamily);
|
||||
}
|
||||
|
||||
info!("Could not identify KubernetesDistribution, using Default");
|
||||
return Ok(KubernetesDistribution::Default);
|
||||
})
|
||||
pub async fn get_k8s_distribution(&self) -> Result<KubernetesDistribution, PreparationError> {
|
||||
self.k8s_client()
|
||||
.await?
|
||||
.get_k8s_distribution()
|
||||
.await
|
||||
}
|
||||
|
||||
fn extract_and_normalize_token(&self, secret: &DynamicObject) -> Option<String> {
|
||||
let token_b64 = secret
|
||||
.data
|
||||
.get("token")
|
||||
.or_else(|| secret.data.get("data").and_then(|d| d.get("token")))
|
||||
.and_then(|v| v.as_str())?;
|
||||
|
||||
let bytes = general_purpose::STANDARD.decode(token_b64).ok()?;
|
||||
|
||||
let s = String::from_utf8(bytes).ok()?;
|
||||
|
||||
let cleaned = s
|
||||
.trim_matches(|c: char| c.is_whitespace() || c == '\0')
|
||||
.to_string();
|
||||
Some(cleaned)
|
||||
.map_err(|e| {
|
||||
PreparationError::new(format!("Failed to get k8s distribution from client : {e}"))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build_cluster_rolebinding(
|
||||
@@ -701,141 +413,6 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_grafana_datasource(
|
||||
&self,
|
||||
name: &str,
|
||||
ns: &str,
|
||||
label_selector: &LabelSelector,
|
||||
url: &str,
|
||||
token: &str,
|
||||
) -> GrafanaDatasource {
|
||||
let mut json_data = BTreeMap::new();
|
||||
json_data.insert("timeInterval".to_string(), "5s".to_string());
|
||||
|
||||
GrafanaDatasource {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.to_string()),
|
||||
namespace: Some(ns.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaDatasourceSpec {
|
||||
instance_selector: label_selector.clone(),
|
||||
allow_cross_namespace_import: Some(true),
|
||||
values_from: None,
|
||||
datasource: GrafanaDatasourceConfig {
|
||||
access: "proxy".to_string(),
|
||||
name: name.to_string(),
|
||||
r#type: "prometheus".to_string(),
|
||||
url: url.to_string(),
|
||||
database: None,
|
||||
json_data: Some(GrafanaDatasourceJsonData {
|
||||
time_interval: Some("60s".to_string()),
|
||||
http_header_name1: Some("Authorization".to_string()),
|
||||
tls_skip_verify: Some(true),
|
||||
oauth_pass_thru: Some(true),
|
||||
}),
|
||||
secure_json_data: Some(GrafanaDatasourceSecureJsonData {
|
||||
http_header_value1: Some(format!("Bearer {token}")),
|
||||
}),
|
||||
is_default: Some(false),
|
||||
editable: Some(true),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn build_grafana_dashboard(
|
||||
&self,
|
||||
ns: &str,
|
||||
label_selector: &LabelSelector,
|
||||
) -> GrafanaDashboard {
|
||||
let graf_dashboard = GrafanaDashboard {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("grafana-dashboard-{}", ns)),
|
||||
namespace: Some(ns.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaDashboardSpec {
|
||||
resync_period: Some("30s".to_string()),
|
||||
instance_selector: label_selector.clone(),
|
||||
datasources: Some(vec![GrafanaDashboardDatasource {
|
||||
input_name: "DS_PROMETHEUS".to_string(),
|
||||
datasource_name: "thanos-openshift-monitoring".to_string(),
|
||||
}]),
|
||||
json: None,
|
||||
grafana_com: Some(GrafanaCom {
|
||||
id: 17406,
|
||||
revision: None,
|
||||
}),
|
||||
},
|
||||
};
|
||||
graf_dashboard
|
||||
}
|
||||
|
||||
fn build_grafana(&self, ns: &str, labels: &BTreeMap<String, String>) -> GrafanaCRD {
|
||||
let grafana = GrafanaCRD {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("grafana-{}", ns)),
|
||||
namespace: Some(ns.to_string()),
|
||||
labels: Some(labels.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaSpec {
|
||||
config: None,
|
||||
admin_user: None,
|
||||
admin_password: None,
|
||||
ingress: None,
|
||||
persistence: None,
|
||||
resources: None,
|
||||
},
|
||||
};
|
||||
grafana
|
||||
}
|
||||
|
||||
async fn build_grafana_ingress(&self, ns: &str) -> K8sIngressScore {
|
||||
let domain = self.get_domain(&format!("grafana-{}", ns)).await.unwrap();
|
||||
let name = format!("{}-grafana", ns);
|
||||
let backend_service = format!("grafana-{}-service", ns);
|
||||
|
||||
K8sIngressScore {
|
||||
name: fqdn::fqdn!(&name),
|
||||
host: fqdn::fqdn!(&domain),
|
||||
backend_service: fqdn::fqdn!(&backend_service),
|
||||
port: 3000,
|
||||
path: Some("/".to_string()),
|
||||
path_type: Some(PathType::Prefix),
|
||||
namespace: Some(fqdn::fqdn!(&ns)),
|
||||
ingress_class_name: Some("openshift-default".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_cluster_observability_operator_prometheus_application_score(
|
||||
&self,
|
||||
sender: RHOBObservability,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<RHOBObservability>>>>,
|
||||
) -> RHOBAlertingScore {
|
||||
RHOBAlertingScore {
|
||||
sender,
|
||||
receivers: receivers.unwrap_or_default(),
|
||||
service_monitors: vec![],
|
||||
prometheus_rules: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_k8s_prometheus_application_score(
|
||||
&self,
|
||||
sender: CRDPrometheus,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<CRDPrometheus>>>>,
|
||||
service_monitors: Option<Vec<ServiceMonitor>>,
|
||||
) -> K8sPrometheusCRDAlertingScore {
|
||||
return K8sPrometheusCRDAlertingScore {
|
||||
sender,
|
||||
receivers: receivers.unwrap_or_default(),
|
||||
service_monitors: service_monitors.unwrap_or_default(),
|
||||
prometheus_rules: vec![],
|
||||
};
|
||||
}
|
||||
|
||||
async fn openshift_ingress_operator_available(&self) -> Result<(), PreparationError> {
|
||||
let client = self.k8s_client().await?;
|
||||
let gvk = GroupVersionKind {
|
||||
@@ -1001,137 +578,6 @@ impl K8sAnywhereTopology {
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_cluster_observability_operator(
|
||||
&self,
|
||||
sender: &RHOBObservability,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let status = Command::new("sh")
|
||||
.args(["-c", "kubectl get crd -A | grep -i rhobs"])
|
||||
.status()
|
||||
.map_err(|e| PreparationError::new(format!("could not connect to cluster: {}", e)))?;
|
||||
|
||||
if !status.success() {
|
||||
if let Some(Some(k8s_state)) = self.k8s_state.get() {
|
||||
match k8s_state.source {
|
||||
K8sSource::LocalK3d => {
|
||||
warn!(
|
||||
"Installing observability operator is not supported on LocalK3d source"
|
||||
);
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
debug!("installing cluster observability operator");
|
||||
todo!();
|
||||
let op_score =
|
||||
prometheus_operator_helm_chart_score(sender.namespace.clone());
|
||||
let result = op_score.interpret(&Inventory::empty(), self).await;
|
||||
|
||||
return match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: "installed cluster observability operator".into(),
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(
|
||||
"failed to install cluster observability operator (unknown error)".into(),
|
||||
)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
};
|
||||
}
|
||||
K8sSource::Kubeconfig => {
|
||||
debug!(
|
||||
"unable to install cluster observability operator, contact cluster admin"
|
||||
);
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!(
|
||||
"Unable to detect k8s_state. Skipping Cluster Observability Operator install."
|
||||
);
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Cluster Observability Operator is already present, skipping install");
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "cluster observability operator present in cluster".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_prometheus_operator(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let status = Command::new("sh")
|
||||
.args(["-c", "kubectl get crd -A | grep -i prometheuses"])
|
||||
.status()
|
||||
.map_err(|e| PreparationError::new(format!("could not connect to cluster: {}", e)))?;
|
||||
|
||||
if !status.success() {
|
||||
if let Some(Some(k8s_state)) = self.k8s_state.get() {
|
||||
match k8s_state.source {
|
||||
K8sSource::LocalK3d => {
|
||||
debug!("installing prometheus operator");
|
||||
let op_score =
|
||||
prometheus_operator_helm_chart_score(sender.namespace.clone());
|
||||
let result = op_score.interpret(&Inventory::empty(), self).await;
|
||||
|
||||
return match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: "installed prometheus operator".into(),
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(
|
||||
"failed to install prometheus operator (unknown error)".into(),
|
||||
)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
};
|
||||
}
|
||||
K8sSource::Kubeconfig => {
|
||||
debug!("unable to install prometheus operator, contact cluster admin");
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!("Unable to detect k8s_state. Skipping Prometheus Operator install.");
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Prometheus operator is already present, skipping install");
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "prometheus operator present in cluster".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_grafana_operator(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
ns: Option<&str>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let namespace = ns.unwrap_or("grafana");
|
||||
info!("installing grafana operator in ns {namespace}");
|
||||
let tenant = self.get_k8s_tenant_manager()?.get_tenant_config().await;
|
||||
let mut namespace_scope = false;
|
||||
if tenant.is_some() {
|
||||
namespace_scope = true;
|
||||
}
|
||||
let _grafana_operator_score = grafana_helm_chart_score(namespace, namespace_scope)
|
||||
.interpret(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()));
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: format!(
|
||||
"Successfully installed grafana operator in ns {}",
|
||||
ns.unwrap()
|
||||
),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
mod k8s_anywhere;
|
||||
pub mod nats;
|
||||
pub mod observability;
|
||||
mod postgres;
|
||||
pub use k8s_anywhere::*;
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::grafana::{
|
||||
grafana::Grafana,
|
||||
k8s::{
|
||||
score_ensure_grafana_ready::GrafanaK8sEnsureReadyScore,
|
||||
score_grafana_alert_receiver::GrafanaK8sReceiverScore,
|
||||
score_grafana_datasource::GrafanaK8sDatasourceScore,
|
||||
score_grafana_rule::GrafanaK8sRuleScore, score_install_grafana::GrafanaK8sInstallScore,
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<Grafana> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = GrafanaK8sInstallScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Grafana not installed {}", e)))?;
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed grafana alert sender".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<Grafana>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let receivers = match receivers {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for receiver in receivers {
|
||||
let score = GrafanaK8sReceiverScore {
|
||||
receiver,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install receiver: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert receivers installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<Grafana>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let rules = match rules {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for rule in rules {
|
||||
let score = GrafanaK8sRuleScore {
|
||||
sender: sender.clone(),
|
||||
rule,
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert rules installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<Grafana>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let scrape_targets = match scrape_targets {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for scrape_target in scrape_targets {
|
||||
let score = GrafanaK8sDatasourceScore {
|
||||
scrape_target,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to add DataSource: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All datasources installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = GrafanaK8sEnsureReadyScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Grafana not ready {}", e)))?;
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Grafana Ready".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::kube_prometheus::{
|
||||
KubePrometheus, helm::kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
|
||||
score_kube_prometheus_alert_receivers::KubePrometheusReceiverScore,
|
||||
score_kube_prometheus_ensure_ready::KubePrometheusEnsureReadyScore,
|
||||
score_kube_prometheus_rule::KubePrometheusRuleScore,
|
||||
score_kube_prometheus_scrape_target::KubePrometheusScrapeTargetScore,
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<KubePrometheus> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
kube_prometheus_helm_chart_score(sender.config.clone())
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed kubeprometheus alert sender".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<KubePrometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let receivers = match receivers {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for receiver in receivers {
|
||||
let score = KubePrometheusReceiverScore {
|
||||
receiver,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install receiver: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert receivers installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<KubePrometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let rules = match rules {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for rule in rules {
|
||||
let score = KubePrometheusRuleScore {
|
||||
sender: sender.clone(),
|
||||
rule,
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert rules installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<KubePrometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let scrape_targets = match scrape_targets {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for scrape_target in scrape_targets {
|
||||
let score = KubePrometheusScrapeTargetScore {
|
||||
scrape_target,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All scrap targets installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = KubePrometheusEnsureReadyScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("KubePrometheus not ready {}", e)))?;
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "KubePrometheus Ready".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
pub mod grafana;
|
||||
pub mod kube_prometheus;
|
||||
pub mod openshift_monitoring;
|
||||
pub mod prometheus;
|
||||
pub mod redhat_cluster_observability;
|
||||
@@ -0,0 +1,142 @@
|
||||
use async_trait::async_trait;
|
||||
use log::info;
|
||||
|
||||
use crate::score::Score;
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::okd::{
|
||||
OpenshiftClusterAlertSender,
|
||||
score_enable_cluster_monitoring::OpenshiftEnableClusterMonitoringScore,
|
||||
score_openshift_alert_rule::OpenshiftAlertRuleScore,
|
||||
score_openshift_receiver::OpenshiftReceiverScore,
|
||||
score_openshift_scrape_target::OpenshiftScrapeTargetScore,
|
||||
score_user_workload::OpenshiftUserWorkloadMonitoring,
|
||||
score_verify_user_workload_monitoring::VerifyUserWorkload,
|
||||
},
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<OpenshiftClusterAlertSender> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
info!("enabling cluster monitoring");
|
||||
let cluster_monitoring_score = OpenshiftEnableClusterMonitoringScore {};
|
||||
cluster_monitoring_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
|
||||
info!("enabling user workload monitoring");
|
||||
let user_workload_score = OpenshiftUserWorkloadMonitoring {};
|
||||
user_workload_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully configured cluster monitoring".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<OpenshiftClusterAlertSender>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
if let Some(receivers) = receivers {
|
||||
for receiver in receivers {
|
||||
info!("Installing receiver {}", receiver.name());
|
||||
let receiver_score = OpenshiftReceiverScore { receiver };
|
||||
receiver_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed receivers for OpenshiftClusterMonitoring"
|
||||
.to_string(),
|
||||
})
|
||||
} else {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<OpenshiftClusterAlertSender>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
if let Some(rules) = rules {
|
||||
for rule in rules {
|
||||
info!("Installing rule ");
|
||||
let rule_score = OpenshiftAlertRuleScore { rule: rule };
|
||||
rule_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed rules for OpenshiftClusterMonitoring".to_string(),
|
||||
})
|
||||
} else {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<OpenshiftClusterAlertSender>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
if let Some(scrape_targets) = scrape_targets {
|
||||
for scrape_target in scrape_targets {
|
||||
info!("Installing scrape target");
|
||||
let scrape_target_score = OpenshiftScrapeTargetScore {
|
||||
scrape_target: scrape_target,
|
||||
};
|
||||
scrape_target_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully added scrape targets for OpenshiftClusterMonitoring"
|
||||
.to_string(),
|
||||
})
|
||||
} else {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let verify_monitoring_score = VerifyUserWorkload {};
|
||||
info!("Verifying user workload and cluster monitoring installed");
|
||||
verify_monitoring_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "OpenshiftClusterMonitoring ready".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::prometheus::{
|
||||
Prometheus, score_prometheus_alert_receivers::PrometheusReceiverScore,
|
||||
score_prometheus_ensure_ready::PrometheusEnsureReadyScore,
|
||||
score_prometheus_install::PrometheusInstallScore,
|
||||
score_prometheus_rule::PrometheusRuleScore,
|
||||
score_prometheus_scrape_target::PrometheusScrapeTargetScore,
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<Prometheus> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = PrometheusInstallScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Prometheus not installed {}", e)))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed kubeprometheus alert sender".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<Prometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let receivers = match receivers {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for receiver in receivers {
|
||||
let score = PrometheusReceiverScore {
|
||||
receiver,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install receiver: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert receivers installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<Prometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let rules = match rules {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for rule in rules {
|
||||
let score = PrometheusRuleScore {
|
||||
sender: sender.clone(),
|
||||
rule,
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert rules installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<Prometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let scrape_targets = match scrape_targets {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for scrape_target in scrape_targets {
|
||||
let score = PrometheusScrapeTargetScore {
|
||||
scrape_target,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All scrap targets installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = PrometheusEnsureReadyScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Prometheus not ready {}", e)))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Prometheus Ready".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
use crate::{
|
||||
modules::monitoring::red_hat_cluster_observability::{
|
||||
score_alert_receiver::RedHatClusterObservabilityReceiverScore,
|
||||
score_coo_monitoring_stack::RedHatClusterObservabilityMonitoringStackScore,
|
||||
},
|
||||
score::Score,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use log::info;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::red_hat_cluster_observability::{
|
||||
RedHatClusterObservability,
|
||||
score_redhat_cluster_observability_operator::RedHatClusterObservabilityOperatorScore,
|
||||
},
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<RedHatClusterObservability> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &RedHatClusterObservability,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
info!("Verifying Redhat Cluster Observability Operator");
|
||||
|
||||
let coo_score = RedHatClusterObservabilityOperatorScore::default();
|
||||
|
||||
coo_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
|
||||
info!(
|
||||
"Installing Cluster Observability Operator Monitoring Stack in ns {}",
|
||||
sender.namespace.clone()
|
||||
);
|
||||
|
||||
let coo_monitoring_stack_score = RedHatClusterObservabilityMonitoringStackScore {
|
||||
namespace: sender.namespace.clone(),
|
||||
resource_selector: sender.resource_selector.clone(),
|
||||
};
|
||||
|
||||
coo_monitoring_stack_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed RedHatClusterObservability Operator".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &RedHatClusterObservability,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<RedHatClusterObservability>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
if let Some(receivers) = receivers {
|
||||
for receiver in receivers {
|
||||
info!("Installing receiver {}", receiver.name());
|
||||
let receiver_score = RedHatClusterObservabilityReceiverScore {
|
||||
receiver,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
receiver_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed receivers for OpenshiftClusterMonitoring"
|
||||
.to_string(),
|
||||
})
|
||||
} else {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
_sender: &RedHatClusterObservability,
|
||||
_inventory: &Inventory,
|
||||
_rules: Option<Vec<Box<dyn AlertRule<RedHatClusterObservability>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
_sender: &RedHatClusterObservability,
|
||||
_inventory: &Inventory,
|
||||
_scrape_targets: Option<Vec<Box<dyn ScrapeTarget<RedHatClusterObservability>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
_sender: &RedHatClusterObservability,
|
||||
_inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::{net::SocketAddr, str::FromStr};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
|
||||
use super::LogicalHost;
|
||||
|
||||
@@ -2,6 +2,7 @@ pub mod decentralized;
|
||||
mod failover;
|
||||
mod ha_cluster;
|
||||
pub mod ingress;
|
||||
pub mod monitoring;
|
||||
pub mod node_exporter;
|
||||
pub mod opnsense;
|
||||
pub use failover::*;
|
||||
@@ -11,7 +12,6 @@ mod http;
|
||||
pub mod installable;
|
||||
mod k8s_anywhere;
|
||||
mod localhost;
|
||||
pub mod oberservability;
|
||||
pub mod tenant;
|
||||
use derive_new::new;
|
||||
pub use k8s_anywhere::*;
|
||||
|
||||
234
harmony/src/domain/topology/monitoring.rs
Normal file
234
harmony/src/domain/topology/monitoring.rs
Normal file
@@ -0,0 +1,234 @@
|
||||
use std::{
|
||||
any::Any,
|
||||
collections::{BTreeMap, HashMap},
|
||||
net::IpAddr,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::api::DynamicObject;
|
||||
use log::{debug, info};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
topology::{PreparationError, PreparationOutcome, Topology, installable::Installable},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
/// Defines the application that sends the alerts to a receivers
|
||||
/// for example prometheus
|
||||
#[async_trait]
|
||||
pub trait AlertSender: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
|
||||
/// Trait which defines how an alert sender is impleneted for a specific topology
|
||||
#[async_trait]
|
||||
pub trait Observability<S: AlertSender> {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<S>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<S>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<S>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
}
|
||||
|
||||
/// Defines the entity that receives the alerts from a sender. For example Discord, Slack, etc
|
||||
///
|
||||
pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError>;
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError>;
|
||||
fn name(&self) -> String;
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<S>>;
|
||||
}
|
||||
|
||||
/// Defines a generic rule that can be applied to a sender, such as aprometheus alert rule
|
||||
pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
fn build_rule(&self) -> Result<serde_json::Value, InterpretError>;
|
||||
fn name(&self) -> String;
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<S>>;
|
||||
}
|
||||
|
||||
/// A generic scrape target that can be added to a sender to scrape metrics from, for example a
|
||||
/// server outside of the cluster
|
||||
pub trait ScrapeTarget<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
fn build_scrape_target(&self) -> Result<ExternalScrapeTarget, InterpretError>;
|
||||
fn name(&self) -> String;
|
||||
fn clone_box(&self) -> Box<dyn ScrapeTarget<S>>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExternalScrapeTarget {
|
||||
pub ip: IpAddr,
|
||||
pub port: i32,
|
||||
pub interval: Option<String>,
|
||||
pub path: Option<String>,
|
||||
pub labels: Option<BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
/// Alerting interpret to install an alert sender on a given topology
|
||||
#[derive(Debug)]
|
||||
pub struct AlertingInterpret<S: AlertSender> {
|
||||
pub sender: S,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<S>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<S>>>,
|
||||
pub scrape_targets: Option<Vec<Box<dyn ScrapeTarget<S>>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S: AlertSender, T: Topology + Observability<S>> Interpret<T> for AlertingInterpret<S> {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Configuring alert sender {}", self.sender.name());
|
||||
topology
|
||||
.install_alert_sender(&self.sender, inventory)
|
||||
.await?;
|
||||
|
||||
info!("Installing receivers");
|
||||
topology
|
||||
.install_receivers(&self.sender, inventory, Some(self.receivers.clone()))
|
||||
.await?;
|
||||
|
||||
info!("Installing rules");
|
||||
topology
|
||||
.install_rules(&self.sender, inventory, Some(self.rules.clone()))
|
||||
.await?;
|
||||
|
||||
info!("Adding extra scrape targets");
|
||||
topology
|
||||
.add_scrape_targets(&self.sender, inventory, self.scrape_targets.clone())
|
||||
.await?;
|
||||
|
||||
info!("Ensuring alert sender {} is ready", self.sender.name());
|
||||
topology
|
||||
.ensure_monitoring_installed(&self.sender, inventory)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully installed alert sender {}",
|
||||
self.sender.name()
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Alerting
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AlertSender> Clone for Box<dyn AlertReceiver<S>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AlertSender> Clone for Box<dyn AlertRule<S>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AlertSender> Clone for Box<dyn ScrapeTarget<S>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
///Generic routing that can map to various alert sender backends
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertRoute {
|
||||
pub receiver: String,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub matchers: Vec<AlertMatcher>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub group_by: Vec<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub repeat_interval: Option<String>,
|
||||
#[serde(rename = "continue")]
|
||||
pub continue_matching: bool,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub children: Vec<AlertRoute>,
|
||||
}
|
||||
|
||||
impl AlertRoute {
|
||||
pub fn default(name: String) -> Self {
|
||||
Self {
|
||||
receiver: name,
|
||||
matchers: vec![],
|
||||
group_by: vec![],
|
||||
repeat_interval: Some("30s".to_string()),
|
||||
continue_matching: true,
|
||||
children: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertMatcher {
|
||||
pub label: String,
|
||||
pub operator: MatchOp,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum MatchOp {
|
||||
Eq,
|
||||
NotEq,
|
||||
Regex,
|
||||
}
|
||||
|
||||
impl Serialize for MatchOp {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
let op = match self {
|
||||
MatchOp::Eq => "=",
|
||||
MatchOp::NotEq => "!=",
|
||||
MatchOp::Regex => "=~",
|
||||
};
|
||||
serializer.serialize_str(op)
|
||||
}
|
||||
}
|
||||
@@ -188,6 +188,10 @@ impl FromStr for DnsRecordType {
|
||||
pub trait NetworkManager: Debug + Send + Sync {
|
||||
async fn ensure_network_manager_installed(&self) -> Result<(), NetworkError>;
|
||||
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), NetworkError>;
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, new)]
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
pub mod monitoring;
|
||||
@@ -1,101 +0,0 @@
|
||||
use std::{any::Any, collections::HashMap};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::api::DynamicObject;
|
||||
use log::debug;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
topology::{Topology, installable::Installable},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertSender: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AlertingInterpret<S: AlertSender> {
|
||||
pub sender: S,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<S>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<S>>>,
|
||||
pub scrape_targets: Option<Vec<Box<dyn ScrapeTarget<S>>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInterpret<S> {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
debug!("hit sender configure for AlertingInterpret");
|
||||
self.sender.configure(inventory, topology).await?;
|
||||
for receiver in self.receivers.iter() {
|
||||
receiver.install(&self.sender).await?;
|
||||
}
|
||||
for rule in self.rules.iter() {
|
||||
debug!("installing rule: {:#?}", rule);
|
||||
rule.install(&self.sender).await?;
|
||||
}
|
||||
if let Some(targets) = &self.scrape_targets {
|
||||
for target in targets.iter() {
|
||||
debug!("installing scrape_target: {:#?}", target);
|
||||
target.install(&self.sender).await?;
|
||||
}
|
||||
}
|
||||
self.sender.ensure_installed(inventory, topology).await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully installed alert sender {}",
|
||||
self.sender.name()
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Alerting
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn name(&self) -> String;
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<S>>;
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AlertManagerReceiver {
|
||||
pub receiver_config: serde_json::Value,
|
||||
// FIXME we should not leak k8s here. DynamicObject is k8s specific
|
||||
pub additional_ressources: Vec<DynamicObject>,
|
||||
pub route_config: serde_json::Value,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<S>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait ScrapeTarget<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn clone_box(&self) -> Box<dyn ScrapeTarget<S>>;
|
||||
}
|
||||
@@ -5,9 +5,20 @@ use harmony_types::{
|
||||
net::{IpAddress, MacAddress},
|
||||
switch::{PortDeclaration, PortLocation},
|
||||
};
|
||||
use log::info;
|
||||
use option_ext::OptionExt;
|
||||
|
||||
use crate::topology::{PortConfig, SwitchClient, SwitchError};
|
||||
use crate::{
|
||||
modules::brocade::BrocadeSwitchAuth,
|
||||
topology::{PortConfig, SwitchClient, SwitchError},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BrocadeSwitchConfig {
|
||||
pub ips: Vec<IpAddress>,
|
||||
pub auth: BrocadeSwitchAuth,
|
||||
pub options: BrocadeOptions,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BrocadeSwitchClient {
|
||||
@@ -15,13 +26,11 @@ pub struct BrocadeSwitchClient {
|
||||
}
|
||||
|
||||
impl BrocadeSwitchClient {
|
||||
pub async fn init(
|
||||
ip_addresses: &[IpAddress],
|
||||
username: &str,
|
||||
password: &str,
|
||||
options: BrocadeOptions,
|
||||
) -> Result<Self, brocade::Error> {
|
||||
let brocade = brocade::init(ip_addresses, username, password, options).await?;
|
||||
pub async fn init(config: BrocadeSwitchConfig) -> Result<Self, brocade::Error> {
|
||||
let auth = &config.auth;
|
||||
let options = &config.options;
|
||||
|
||||
let brocade = brocade::init(&config.ips, &auth.username, &auth.password, options).await?;
|
||||
Ok(Self { brocade })
|
||||
}
|
||||
}
|
||||
@@ -52,13 +61,18 @@ impl SwitchClient for BrocadeSwitchClient {
|
||||
|| link.remote_port.contains(&interface.port_location)
|
||||
})
|
||||
})
|
||||
.map(|interface| (interface.name.clone(), PortOperatingMode::Access))
|
||||
.map(|interface| (interface.name.clone(), PortOperatingMode::Trunk))
|
||||
.collect();
|
||||
|
||||
if interfaces.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!("About to configure interfaces {interfaces:?}");
|
||||
// inquire::Confirm::new("Do you wish to configures interfaces now?")
|
||||
// .prompt()
|
||||
// .map_err(|e| SwitchError::new(e.to_string()))?;
|
||||
|
||||
self.brocade
|
||||
.configure_interfaces(&interfaces)
|
||||
.await
|
||||
@@ -208,8 +222,8 @@ mod tests {
|
||||
//TODO not sure about this
|
||||
let configured_interfaces = brocade.configured_interfaces.lock().unwrap();
|
||||
assert_that!(*configured_interfaces).contains_exactly(vec![
|
||||
(first_interface.name.clone(), PortOperatingMode::Access),
|
||||
(second_interface.name.clone(), PortOperatingMode::Access),
|
||||
(first_interface.name.clone(), PortOperatingMode::Trunk),
|
||||
(second_interface.name.clone(), PortOperatingMode::Trunk),
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use askama::Template;
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::core::v1::Node;
|
||||
@@ -10,13 +11,71 @@ use kube::{
|
||||
ResourceExt,
|
||||
api::{ObjectList, ObjectMeta},
|
||||
};
|
||||
use log::{debug, info};
|
||||
use log::{debug, info, warn};
|
||||
|
||||
use crate::{
|
||||
modules::okd::crd::nmstate,
|
||||
topology::{HostNetworkConfig, NetworkError, NetworkManager, k8s::K8sClient},
|
||||
topology::{
|
||||
HostNetworkConfig, NetworkError, NetworkManager,
|
||||
k8s::{DrainOptions, K8sClient, NodeFile},
|
||||
},
|
||||
};
|
||||
|
||||
/// NetworkManager bond configuration template
|
||||
#[derive(Template)]
|
||||
#[template(
|
||||
source = r#"[connection]
|
||||
id={{ bond_name }}
|
||||
uuid={{ bond_uuid }}
|
||||
type=bond
|
||||
autoconnect-slaves=1
|
||||
interface-name={{ bond_name }}
|
||||
|
||||
[bond]
|
||||
lacp_rate=fast
|
||||
mode=802.3ad
|
||||
xmit_hash_policy=layer2
|
||||
|
||||
[ipv4]
|
||||
method=auto
|
||||
|
||||
[ipv6]
|
||||
addr-gen-mode=default
|
||||
method=auto
|
||||
|
||||
[proxy]
|
||||
"#,
|
||||
ext = "txt"
|
||||
)]
|
||||
struct BondConfigTemplate {
|
||||
bond_name: String,
|
||||
bond_uuid: String,
|
||||
}
|
||||
|
||||
/// NetworkManager bond slave configuration template
|
||||
#[derive(Template)]
|
||||
#[template(
|
||||
source = r#"[connection]
|
||||
id={{ slave_id }}
|
||||
uuid={{ slave_uuid }}
|
||||
type=ethernet
|
||||
interface-name={{ interface_name }}
|
||||
master={{ bond_name }}
|
||||
slave-type=bond
|
||||
|
||||
[ethernet]
|
||||
|
||||
[bond-port]
|
||||
"#,
|
||||
ext = "txt"
|
||||
)]
|
||||
struct BondSlaveConfigTemplate {
|
||||
slave_id: String,
|
||||
slave_uuid: String,
|
||||
interface_name: String,
|
||||
bond_name: String,
|
||||
}
|
||||
|
||||
/// TODO document properly the non-intuitive behavior or "roll forward only" of nmstate in general
|
||||
/// It is documented in nmstate official doc, but worth mentionning here :
|
||||
///
|
||||
@@ -87,6 +146,117 @@ impl NetworkManager for OpenShiftNmStateNetworkManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Configures bonding on the primary network interface of a node.
|
||||
///
|
||||
/// Changing the *primary* network interface (making it a bond
|
||||
/// slave) will disrupt node connectivity mid-change, so the
|
||||
/// procedure is:
|
||||
///
|
||||
/// 1. Generate NetworkManager .nmconnection files
|
||||
/// 2. Drain the node (includes cordon)
|
||||
/// 3. Write configuration files to `/etc/NetworkManager/system-connections/`
|
||||
/// 4. Attempt to reload NetworkManager (optional, best-effort)
|
||||
/// 5. Reboot the node with full verification (drain, boot_id check, uncordon)
|
||||
///
|
||||
/// The reboot procedure includes:
|
||||
/// - Recording boot_id before reboot
|
||||
/// - Fire-and-forget reboot command
|
||||
/// - Waiting for NotReady status
|
||||
/// - Waiting for Ready status
|
||||
/// - Verifying boot_id changed
|
||||
/// - Uncordoning the node
|
||||
///
|
||||
/// See ADR-019 for context and rationale.
|
||||
async fn configure_bond_on_primary_interface(
|
||||
&self,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<(), NetworkError> {
|
||||
use std::time::Duration;
|
||||
|
||||
let node_name = self.get_node_name_for_id(&config.host_id).await?;
|
||||
let hostname = self.get_hostname(&config.host_id).await?;
|
||||
|
||||
info!(
|
||||
"Configuring bond on primary interface for host '{}' (node '{}')",
|
||||
config.host_id, node_name
|
||||
);
|
||||
|
||||
// 1. Generate .nmconnection files
|
||||
let files = self.generate_nmconnection_files(&hostname, config)?;
|
||||
debug!(
|
||||
"Generated {} NetworkManager configuration files",
|
||||
files.len()
|
||||
);
|
||||
|
||||
// 2. Write configuration files to the node (before draining)
|
||||
// We do this while the node is still running for faster operation
|
||||
info!(
|
||||
"Writing NetworkManager configuration files to node '{}'...",
|
||||
node_name
|
||||
);
|
||||
self.k8s_client
|
||||
.write_files_to_node(&node_name, &files)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to write configuration files to node '{}': {}",
|
||||
node_name, e
|
||||
))
|
||||
})?;
|
||||
|
||||
// 3. Reload NetworkManager configuration (best-effort)
|
||||
// This won't activate the bond yet since the primary interface would lose connectivity,
|
||||
// but it validates the configuration files are correct
|
||||
info!(
|
||||
"Reloading NetworkManager configuration on node '{}'...",
|
||||
node_name
|
||||
);
|
||||
match self
|
||||
.k8s_client
|
||||
.run_privileged_command_on_node(&node_name, "chroot /host nmcli connection reload")
|
||||
.await
|
||||
{
|
||||
Ok(output) => {
|
||||
debug!("NetworkManager reload output: {}", output.trim());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to reload NetworkManager configuration: {}. Proceeding with reboot.",
|
||||
e
|
||||
);
|
||||
// Don't fail here - reboot will pick up the config anyway
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Reboot the node with full verification
|
||||
// The reboot_node function handles: drain, boot_id capture, reboot, NotReady wait,
|
||||
// Ready wait, boot_id verification, and uncordon
|
||||
// 60 minutes timeout for bare-metal environments (drain can take 20-30 mins)
|
||||
let reboot_timeout = Duration::from_secs(3600);
|
||||
info!(
|
||||
"Rebooting node '{}' to apply network configuration (timeout: {:?})...",
|
||||
node_name, reboot_timeout
|
||||
);
|
||||
|
||||
self.k8s_client
|
||||
.reboot_node(
|
||||
&node_name,
|
||||
&DrainOptions::default_ignore_daemonset_delete_emptydir_data(),
|
||||
reboot_timeout,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
NetworkError::new(format!("Failed to reboot node '{}': {}", node_name, e))
|
||||
})?;
|
||||
|
||||
info!(
|
||||
"Successfully configured bond on primary interface for host '{}' (node '{}')",
|
||||
config.host_id, node_name
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn configure_bond(&self, config: &HostNetworkConfig) -> Result<(), NetworkError> {
|
||||
let hostname = self.get_hostname(&config.host_id).await.map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
@@ -208,14 +378,14 @@ impl OpenShiftNmStateNetworkManager {
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_hostname(&self, host_id: &Id) -> Result<String, String> {
|
||||
async fn get_node_for_id(&self, host_id: &Id) -> Result<Node, String> {
|
||||
let nodes: ObjectList<Node> = self
|
||||
.k8s_client
|
||||
.list_resources(None, None)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to list nodes: {e}"))?;
|
||||
|
||||
let Some(node) = nodes.iter().find(|n| {
|
||||
let Some(node) = nodes.into_iter().find(|n| {
|
||||
n.status
|
||||
.as_ref()
|
||||
.and_then(|s| s.node_info.as_ref())
|
||||
@@ -225,6 +395,20 @@ impl OpenShiftNmStateNetworkManager {
|
||||
return Err(format!("No node found for host '{host_id}'"));
|
||||
};
|
||||
|
||||
Ok(node)
|
||||
}
|
||||
|
||||
async fn get_node_name_for_id(&self, host_id: &Id) -> Result<String, String> {
|
||||
let node = self.get_node_for_id(host_id).await?;
|
||||
|
||||
node.metadata.name.ok_or(format!(
|
||||
"A node should always have a name, node for host_id {host_id} has no name"
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_hostname(&self, host_id: &Id) -> Result<String, String> {
|
||||
let node = self.get_node_for_id(host_id).await?;
|
||||
|
||||
node.labels()
|
||||
.get("kubernetes.io/hostname")
|
||||
.ok_or(format!(
|
||||
@@ -261,4 +445,82 @@ impl OpenShiftNmStateNetworkManager {
|
||||
let next_id = (0..).find(|id| !used_ids.contains(id)).unwrap();
|
||||
Ok(format!("bond{next_id}"))
|
||||
}
|
||||
|
||||
/// Generates NetworkManager .nmconnection files for bonding configuration.
|
||||
///
|
||||
/// Creates:
|
||||
/// - One bond master configuration file (bond0.nmconnection)
|
||||
/// - One slave configuration file per interface (bond0-<iface>.nmconnection)
|
||||
///
|
||||
/// All files are placed in `/etc/NetworkManager/system-connections/` with
|
||||
/// mode 0o600 (required by NetworkManager).
|
||||
fn generate_nmconnection_files(
|
||||
&self,
|
||||
hostname: &str,
|
||||
config: &HostNetworkConfig,
|
||||
) -> Result<Vec<NodeFile>, NetworkError> {
|
||||
let mut files = Vec::new();
|
||||
let bond_name = "bond0";
|
||||
let bond_uuid = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
// Generate bond master configuration
|
||||
let bond_template = BondConfigTemplate {
|
||||
bond_name: bond_name.to_string(),
|
||||
bond_uuid: bond_uuid.clone(),
|
||||
};
|
||||
|
||||
let bond_content = bond_template.render().map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to render bond configuration template: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
files.push(NodeFile {
|
||||
path: format!(
|
||||
"/etc/NetworkManager/system-connections/{}.nmconnection",
|
||||
bond_name
|
||||
),
|
||||
content: bond_content,
|
||||
mode: 0o600,
|
||||
});
|
||||
|
||||
// Generate slave configurations for each interface
|
||||
for switch_port in &config.switch_ports {
|
||||
let interface_name = &switch_port.interface.name;
|
||||
let slave_id = format!("{}-{}", bond_name, interface_name);
|
||||
let slave_uuid = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
let slave_template = BondSlaveConfigTemplate {
|
||||
slave_id: slave_id.clone(),
|
||||
slave_uuid,
|
||||
interface_name: interface_name.clone(),
|
||||
bond_name: bond_name.to_string(),
|
||||
};
|
||||
|
||||
let slave_content = slave_template.render().map_err(|e| {
|
||||
NetworkError::new(format!(
|
||||
"Failed to render slave configuration template for interface '{}': {}",
|
||||
interface_name, e
|
||||
))
|
||||
})?;
|
||||
|
||||
files.push(NodeFile {
|
||||
path: format!(
|
||||
"/etc/NetworkManager/system-connections/{}.nmconnection",
|
||||
slave_id
|
||||
),
|
||||
content: slave_content,
|
||||
mode: 0o600,
|
||||
});
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Generated {} NetworkManager configuration files for host '{}'",
|
||||
files.len(),
|
||||
hostname
|
||||
);
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,13 +2,15 @@ use crate::modules::application::{
|
||||
Application, ApplicationFeature, InstallationError, InstallationOutcome,
|
||||
};
|
||||
use crate::modules::monitoring::application_monitoring::application_monitoring_score::ApplicationMonitoringScore;
|
||||
use crate::modules::monitoring::grafana::grafana::Grafana;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{
|
||||
ServiceMonitor, ServiceMonitorSpec,
|
||||
};
|
||||
use crate::modules::monitoring::prometheus::Prometheus;
|
||||
use crate::modules::monitoring::prometheus::helm::prometheus_config::PrometheusConfig;
|
||||
use crate::topology::MultiTargetTopology;
|
||||
use crate::topology::ingress::Ingress;
|
||||
use crate::topology::monitoring::AlertReceiver;
|
||||
use crate::topology::monitoring::Observability;
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::{
|
||||
@@ -17,10 +19,6 @@ use crate::{
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology, tenant::TenantManager},
|
||||
};
|
||||
use crate::{
|
||||
modules::prometheus::prometheus::PrometheusMonitoring,
|
||||
topology::oberservability::monitoring::AlertReceiver,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use harmony_secret::SecretManager;
|
||||
@@ -30,12 +28,12 @@ use kube::api::ObjectMeta;
|
||||
use log::{debug, info};
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Monitoring {
|
||||
pub application: Arc<dyn Application>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -46,8 +44,7 @@ impl<
|
||||
+ TenantManager
|
||||
+ K8sclient
|
||||
+ MultiTargetTopology
|
||||
+ PrometheusMonitoring<CRDPrometheus>
|
||||
+ Grafana
|
||||
+ Observability<Prometheus>
|
||||
+ Ingress
|
||||
+ std::fmt::Debug,
|
||||
> ApplicationFeature<T> for Monitoring
|
||||
@@ -74,10 +71,8 @@ impl<
|
||||
};
|
||||
|
||||
let mut alerting_score = ApplicationMonitoringScore {
|
||||
sender: CRDPrometheus {
|
||||
namespace: namespace.clone(),
|
||||
client: topology.k8s_client().await.unwrap(),
|
||||
service_monitor: vec![app_service_monitor],
|
||||
sender: Prometheus {
|
||||
config: Arc::new(Mutex::new(PrometheusConfig::new())),
|
||||
},
|
||||
application: self.application.clone(),
|
||||
receivers: self.alert_receiver.clone(),
|
||||
@@ -119,11 +114,12 @@ impl<
|
||||
),
|
||||
};
|
||||
|
||||
alerting_score.receivers.push(Box::new(ntfy_receiver));
|
||||
alerting_score
|
||||
.interpret(&Inventory::empty(), topology)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
todo!();
|
||||
// alerting_score.receivers.push(Box::new(ntfy_receiver));
|
||||
// alerting_score
|
||||
// .interpret(&Inventory::empty(), topology)
|
||||
// .await
|
||||
// .map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(InstallationOutcome::success())
|
||||
}
|
||||
|
||||
@@ -3,11 +3,13 @@ use std::sync::Arc;
|
||||
use crate::modules::application::{
|
||||
Application, ApplicationFeature, InstallationError, InstallationOutcome,
|
||||
};
|
||||
use crate::modules::monitoring::application_monitoring::rhobs_application_monitoring_score::ApplicationRHOBMonitoringScore;
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||
use crate::modules::monitoring::red_hat_cluster_observability::RedHatClusterObservability;
|
||||
use crate::modules::monitoring::red_hat_cluster_observability::redhat_cluster_observability::RedHatClusterObservabilityScore;
|
||||
use crate::topology::MultiTargetTopology;
|
||||
use crate::topology::ingress::Ingress;
|
||||
use crate::topology::monitoring::AlertReceiver;
|
||||
use crate::topology::monitoring::Observability;
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::{
|
||||
@@ -16,10 +18,6 @@ use crate::{
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology, tenant::TenantManager},
|
||||
};
|
||||
use crate::{
|
||||
modules::prometheus::prometheus::PrometheusMonitoring,
|
||||
topology::oberservability::monitoring::AlertReceiver,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use harmony_types::net::Url;
|
||||
@@ -28,9 +26,10 @@ use log::{debug, info};
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Monitoring {
|
||||
pub application: Arc<dyn Application>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<RedHatClusterObservability>>>,
|
||||
}
|
||||
|
||||
///TODO TEST this
|
||||
#[async_trait]
|
||||
impl<
|
||||
T: Topology
|
||||
@@ -41,7 +40,7 @@ impl<
|
||||
+ MultiTargetTopology
|
||||
+ Ingress
|
||||
+ std::fmt::Debug
|
||||
+ PrometheusMonitoring<RHOBObservability>,
|
||||
+ Observability<RedHatClusterObservability>,
|
||||
> ApplicationFeature<T> for Monitoring
|
||||
{
|
||||
async fn ensure_installed(
|
||||
@@ -55,13 +54,14 @@ impl<
|
||||
.map(|ns| ns.name.clone())
|
||||
.unwrap_or_else(|| self.application.name());
|
||||
|
||||
let mut alerting_score = ApplicationRHOBMonitoringScore {
|
||||
sender: RHOBObservability {
|
||||
let mut alerting_score = RedHatClusterObservabilityScore {
|
||||
sender: RedHatClusterObservability {
|
||||
namespace: namespace.clone(),
|
||||
client: topology.k8s_client().await.unwrap(),
|
||||
resource_selector: todo!(),
|
||||
},
|
||||
application: self.application.clone(),
|
||||
receivers: self.alert_receiver.clone(),
|
||||
rules: vec![],
|
||||
scrape_targets: None,
|
||||
};
|
||||
let domain = topology
|
||||
.get_domain("ntfy")
|
||||
|
||||
138
harmony/src/modules/brocade/brocade.rs
Normal file
138
harmony/src/modules/brocade/brocade.rs
Normal file
@@ -0,0 +1,138 @@
|
||||
use async_trait::async_trait;
|
||||
use brocade::{BrocadeOptions, PortOperatingMode};
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
infra::brocade::{BrocadeSwitchClient, BrocadeSwitchConfig},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{
|
||||
HostNetworkConfig, PortConfig, PreparationError, PreparationOutcome, Switch, SwitchClient,
|
||||
SwitchError, Topology,
|
||||
},
|
||||
};
|
||||
use harmony_macros::ip;
|
||||
use harmony_types::{id::Id, net::MacAddress, switch::PortLocation};
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct BrocadeSwitchScore {
|
||||
pub port_channels_to_clear: Vec<Id>,
|
||||
pub ports_to_configure: Vec<PortConfig>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Switch> Score<T> for BrocadeSwitchScore {
|
||||
fn name(&self) -> String {
|
||||
"BrocadeSwitchScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(BrocadeSwitchInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BrocadeSwitchInterpret {
|
||||
score: BrocadeSwitchScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + Switch> Interpret<T> for BrocadeSwitchInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Applying switch configuration {:?}", self.score);
|
||||
debug!(
|
||||
"Clearing port channel {:?}",
|
||||
self.score.port_channels_to_clear
|
||||
);
|
||||
topology
|
||||
.clear_port_channel(&self.score.port_channels_to_clear)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
debug!("Configuring interfaces {:?}", self.score.ports_to_configure);
|
||||
topology
|
||||
.configure_interface(&self.score.ports_to_configure)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success("switch configured".to_string()))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("BrocadeSwitchInterpret")
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
pub struct BrocadeSwitchConfig {
|
||||
pub ips: Vec<harmony_types::net::IpAddress>,
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
pub options: BrocadeOptions,
|
||||
}
|
||||
*/
|
||||
|
||||
pub struct SwitchTopology {
|
||||
client: Box<dyn SwitchClient>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Topology for SwitchTopology {
|
||||
fn name(&self) -> &str {
|
||||
"SwitchTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
impl SwitchTopology {
|
||||
pub async fn new(config: BrocadeSwitchConfig) -> Self {
|
||||
let client = BrocadeSwitchClient::init(config)
|
||||
.await
|
||||
.expect("Failed to connect to switch");
|
||||
|
||||
let client = Box::new(client);
|
||||
Self { client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Switch for SwitchTopology {
|
||||
async fn setup_switch(&self) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_port_for_mac_address(
|
||||
&self,
|
||||
_mac_address: &MacAddress,
|
||||
) -> Result<Option<PortLocation>, SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn configure_port_channel(&self, _config: &HostNetworkConfig) -> Result<(), SwitchError> {
|
||||
todo!()
|
||||
}
|
||||
async fn clear_port_channel(&self, ids: &Vec<Id>) -> Result<(), SwitchError> {
|
||||
self.client.clear_port_channel(ids).await
|
||||
}
|
||||
async fn configure_interface(&self, ports: &Vec<PortConfig>) -> Result<(), SwitchError> {
|
||||
self.client.configure_interface(ports).await
|
||||
}
|
||||
}
|
||||
@@ -39,16 +39,16 @@ pub struct BrocadeEnableSnmpInterpret {
|
||||
}
|
||||
|
||||
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
|
||||
struct BrocadeSwitchAuth {
|
||||
username: String,
|
||||
password: String,
|
||||
pub struct BrocadeSwitchAuth {
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
#[derive(Secret, Clone, Debug, JsonSchema, Serialize, Deserialize)]
|
||||
struct BrocadeSnmpAuth {
|
||||
username: String,
|
||||
auth_password: String,
|
||||
des_password: String,
|
||||
pub struct BrocadeSnmpAuth {
|
||||
pub username: String,
|
||||
pub auth_password: String,
|
||||
pub des_password: String,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -72,7 +72,7 @@ impl<T: Topology> Interpret<T> for BrocadeEnableSnmpInterpret {
|
||||
&switch_addresses,
|
||||
&config.username,
|
||||
&config.password,
|
||||
BrocadeOptions {
|
||||
&BrocadeOptions {
|
||||
dry_run: self.score.dry_run,
|
||||
..Default::default()
|
||||
},
|
||||
5
harmony/src/modules/brocade/mod.rs
Normal file
5
harmony/src/modules/brocade/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
pub mod brocade;
|
||||
pub use brocade::*;
|
||||
|
||||
pub mod brocade_snmp;
|
||||
pub use brocade_snmp::*;
|
||||
@@ -82,17 +82,40 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
|
||||
self.score.role,
|
||||
choice.summary()
|
||||
);
|
||||
let disk_names: Vec<String> =
|
||||
choice.storage.iter().map(|s| s.name.clone()).collect();
|
||||
let mut disk_choices: Vec<(String, String)> = vec![];
|
||||
|
||||
for s in choice.storage.iter() {
|
||||
let size_gb: f64 = s.size_bytes as f64 / 1_000_000_000.0;
|
||||
let (size, unit) = if size_gb >= 1000.0 {
|
||||
(size_gb / 1000.0, "TB")
|
||||
} else {
|
||||
(size_gb, "GB")
|
||||
};
|
||||
let drive_type = if s.rotational { "rotational" } else { "SSD" };
|
||||
let smart_str = s.smart_status.as_deref().unwrap_or("N/A");
|
||||
let display = format!(
|
||||
"{} : [{}] - {:.0} {} ({}) - {} - Smart: {}",
|
||||
s.name, s.model, size, unit, drive_type, s.interface_type, smart_str
|
||||
);
|
||||
disk_choices.push((display, s.name.clone()));
|
||||
}
|
||||
|
||||
let display_refs: Vec<&str> =
|
||||
disk_choices.iter().map(|(d, _)| d.as_str()).collect();
|
||||
|
||||
let disk_choice = inquire::Select::new(
|
||||
&format!("Select the disk to use on host {}:", choice.summary()),
|
||||
disk_names,
|
||||
display_refs,
|
||||
)
|
||||
.prompt();
|
||||
|
||||
match disk_choice {
|
||||
Ok(disk_name) => {
|
||||
Ok(selected_display) => {
|
||||
let disk_name = disk_choices
|
||||
.iter()
|
||||
.find(|(d, _)| d.as_str() == selected_display)
|
||||
.map(|(_, name)| name.clone())
|
||||
.unwrap();
|
||||
info!("Selected disk {} for node {}", disk_name, choice.summary());
|
||||
host_repo
|
||||
.save_role_mapping(&self.score.role, &choice, &disk_name)
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::{
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
topology::{K8sclient, Topology, k8s::ApplyStrategy},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
@@ -29,7 +29,7 @@ impl<K: Resource + std::fmt::Debug> K8sResourceScore<K> {
|
||||
}
|
||||
|
||||
impl<
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
K: Resource
|
||||
+ std::fmt::Debug
|
||||
+ Sync
|
||||
+ DeserializeOwned
|
||||
@@ -42,6 +42,7 @@ impl<
|
||||
> Score<T> for K8sResourceScore<K>
|
||||
where
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
<K as kube::Resource>::Scope: ApplyStrategy<K>,
|
||||
{
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(K8sResourceInterpret {
|
||||
@@ -61,7 +62,7 @@ pub struct K8sResourceInterpret<K: Resource + std::fmt::Debug + Sync + Send> {
|
||||
|
||||
#[async_trait]
|
||||
impl<
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
K: Resource
|
||||
+ Clone
|
||||
+ std::fmt::Debug
|
||||
+ DeserializeOwned
|
||||
@@ -73,6 +74,7 @@ impl<
|
||||
> Interpret<T> for K8sResourceInterpret<K>
|
||||
where
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
<K as kube::Resource>::Scope: ApplyStrategy<K>,
|
||||
{
|
||||
async fn execute(
|
||||
&self,
|
||||
|
||||
@@ -18,7 +18,6 @@ pub mod network;
|
||||
pub mod okd;
|
||||
pub mod opnsense;
|
||||
pub mod postgresql;
|
||||
pub mod prometheus;
|
||||
pub mod storage;
|
||||
pub mod tenant;
|
||||
pub mod tftp;
|
||||
|
||||
@@ -1,98 +1,54 @@
|
||||
use std::any::Any;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::k8s_name::K8sName;
|
||||
use crate::modules::monitoring::kube_prometheus::KubePrometheus;
|
||||
use crate::modules::monitoring::okd::OpenshiftClusterAlertSender;
|
||||
use crate::modules::monitoring::red_hat_cluster_observability::RedHatClusterObservability;
|
||||
use crate::topology::monitoring::{AlertRoute, MatchOp};
|
||||
use crate::{interpret::InterpretError, topology::monitoring::AlertReceiver};
|
||||
use harmony_types::net::Url;
|
||||
use k8s_openapi::api::core::v1::Secret;
|
||||
use kube::Resource;
|
||||
use kube::api::{DynamicObject, ObjectMeta};
|
||||
use log::{debug, trace};
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::infra::kube::kube_resource_to_dynamic;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{
|
||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||
use crate::modules::monitoring::okd::OpenshiftClusterAlertSender;
|
||||
use crate::topology::oberservability::monitoring::AlertManagerReceiver;
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::{
|
||||
kube_prometheus::{
|
||||
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
},
|
||||
prometheus::prometheus::{Prometheus, PrometheusReceiver},
|
||||
},
|
||||
topology::oberservability::monitoring::AlertReceiver,
|
||||
};
|
||||
use harmony_types::net::Url;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct DiscordWebhook {
|
||||
pub name: K8sName,
|
||||
pub struct DiscordReceiver {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
pub selectors: Vec<HashMap<String, String>>,
|
||||
pub route: AlertRoute,
|
||||
}
|
||||
|
||||
impl DiscordWebhook {
|
||||
fn get_receiver_config(&self) -> Result<AlertManagerReceiver, String> {
|
||||
let secret_name = format!("{}-secret", self.name.clone());
|
||||
let webhook_key = format!("{}", self.url.clone());
|
||||
impl AlertReceiver<OpenshiftClusterAlertSender> for DiscordReceiver {
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let matchers: Vec<String> = self
|
||||
.route
|
||||
.matchers
|
||||
.iter()
|
||||
.map(|m| match m.operator {
|
||||
MatchOp::Eq => format!("{} = {}", m.label, m.value),
|
||||
MatchOp::NotEq => format!("{} != {}", m.label, m.value),
|
||||
MatchOp::Regex => format!("{} =~ {}", m.label, m.value),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut string_data = BTreeMap::new();
|
||||
string_data.insert("webhook-url".to_string(), webhook_key.clone());
|
||||
|
||||
let secret = Secret {
|
||||
metadata: kube::core::ObjectMeta {
|
||||
name: Some(secret_name.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
string_data: Some(string_data),
|
||||
type_: Some("Opaque".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut matchers: Vec<String> = Vec::new();
|
||||
for selector in &self.selectors {
|
||||
trace!("selector: {:#?}", selector);
|
||||
for (k, v) in selector {
|
||||
matchers.push(format!("{} = {}", k, v));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(AlertManagerReceiver {
|
||||
additional_ressources: vec![kube_resource_to_dynamic(&secret)?],
|
||||
|
||||
receiver_config: json!({
|
||||
"name": self.name,
|
||||
"discord_configs": [
|
||||
{
|
||||
"webhook_url": self.url.clone(),
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}
|
||||
]
|
||||
}),
|
||||
route_config: json!({
|
||||
"receiver": self.name,
|
||||
"matchers": matchers,
|
||||
|
||||
}),
|
||||
})
|
||||
let route_block = serde_yaml::to_value(json!({
|
||||
"receiver": self.name,
|
||||
"matchers": matchers,
|
||||
}))
|
||||
.unwrap();
|
||||
Ok(route_block)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<OpenshiftClusterAlertSender> for DiscordWebhook {
|
||||
async fn install(
|
||||
&self,
|
||||
sender: &OpenshiftClusterAlertSender,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
todo!()
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver_block = serde_yaml::to_value(json!({
|
||||
"name": self.name,
|
||||
"discord_configs": [{
|
||||
"webhook_url": format!("{}", self.url),
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}]
|
||||
}))
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(receiver_block)
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
@@ -102,93 +58,16 @@ impl AlertReceiver<OpenshiftClusterAlertSender> for DiscordWebhook {
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<OpenshiftClusterAlertSender>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
self.get_receiver_config()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<RHOBObservability> for DiscordWebhook {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
impl AlertReceiver<RedHatClusterObservability> for DiscordReceiver {
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
serde_yaml::to_value(&self.route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
async fn install(&self, sender: &RHOBObservability) -> Result<Outcome, InterpretError> {
|
||||
let ns = sender.namespace.clone();
|
||||
|
||||
let config = self.get_receiver_config()?;
|
||||
for resource in config.additional_ressources.iter() {
|
||||
todo!("can I apply a dynamicresource");
|
||||
// sender.client.apply(resource, Some(&ns)).await;
|
||||
}
|
||||
|
||||
let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
config.receiver_config
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone().to_string()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
debug!(
|
||||
"alertmanager_configs yaml:\n{:#?}",
|
||||
serde_yaml::to_string(&alertmanager_configs)
|
||||
);
|
||||
debug!(
|
||||
"alert manager configs: \n{:#?}",
|
||||
alertmanager_configs.clone()
|
||||
);
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed rhob-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<CRDPrometheus> for DiscordWebhook {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||
let ns = sender.namespace.clone();
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
//FIXME this secret needs to be applied so that the discord Configs for RedHatCO
|
||||
//CRD AlertmanagerConfigs can access the URL
|
||||
let secret_name = format!("{}-secret", self.name.clone());
|
||||
let webhook_key = format!("{}", self.url.clone());
|
||||
|
||||
@@ -205,206 +84,54 @@ impl AlertReceiver<CRDPrometheus> for DiscordWebhook {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let _ = sender.client.apply(&secret, Some(&ns)).await;
|
||||
|
||||
let spec = AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"discordConfigs": [
|
||||
{
|
||||
"apiURL": {
|
||||
"name": secret_name,
|
||||
"key": "webhook-url",
|
||||
},
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone().to_string()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(ns),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed crd-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
let receiver_config = json!({
|
||||
"name": self.name,
|
||||
"discordConfigs": [
|
||||
{
|
||||
"apiURL": {
|
||||
"key": "webhook-url",
|
||||
"name": format!("{}-secret", self.name)
|
||||
},
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}
|
||||
]
|
||||
});
|
||||
serde_yaml::to_value(receiver_config).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"discord-webhook".to_string()
|
||||
self.name.clone()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RedHatClusterObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for DiscordWebhook {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
impl AlertReceiver<KubePrometheus> for DiscordReceiver {
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
serde_yaml::to_value(self.route.clone()).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver_block = serde_yaml::to_value(json!({
|
||||
"name": self.name,
|
||||
"discord_configs": [{
|
||||
"webhook_url": format!("{}", self.url),
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}]
|
||||
}))
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(receiver_block)
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"discord-webhook".to_string()
|
||||
self.name.clone()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusReceiver for DiscordWebhook {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone().to_string()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<KubePrometheus> for DiscordWebhook {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"discord-webhook".to_string()
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl KubePrometheusReceiver for DiscordWebhook {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone().to_string()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertChannelConfig for DiscordWebhook {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig {
|
||||
let channel_global_config = None;
|
||||
let channel_receiver = self.alert_channel_receiver().await;
|
||||
let channel_route = self.alert_channel_route().await;
|
||||
|
||||
AlertManagerChannelConfig {
|
||||
channel_global_config,
|
||||
channel_receiver,
|
||||
channel_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DiscordWebhook {
|
||||
async fn alert_channel_route(&self) -> serde_yaml::Value {
|
||||
let mut route = Mapping::new();
|
||||
route.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String(self.name.clone().to_string()),
|
||||
);
|
||||
route.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
route.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
Value::Mapping(route)
|
||||
}
|
||||
|
||||
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
|
||||
let mut receiver = Mapping::new();
|
||||
receiver.insert(
|
||||
Value::String("name".to_string()),
|
||||
Value::String(self.name.clone().to_string()),
|
||||
);
|
||||
|
||||
let mut discord_config = Mapping::new();
|
||||
discord_config.insert(
|
||||
Value::String("webhook_url".to_string()),
|
||||
Value::String(self.url.to_string()),
|
||||
);
|
||||
|
||||
receiver.insert(
|
||||
Value::String("discord_configs".to_string()),
|
||||
Value::Sequence(vec![Value::Mapping(discord_config)]),
|
||||
);
|
||||
|
||||
Value::Mapping(receiver)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn discord_serialize_should_match() {
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
url: Url::Url(url::Url::parse("https://discord.i.dont.exist.com").unwrap()),
|
||||
selectors: vec![],
|
||||
};
|
||||
|
||||
let discord_receiver_receiver =
|
||||
serde_yaml::to_string(&discord_receiver.alert_channel_receiver().await).unwrap();
|
||||
println!("receiver \n{:#}", discord_receiver_receiver);
|
||||
let discord_receiver_receiver_yaml = r#"name: test-discord
|
||||
discord_configs:
|
||||
- webhook_url: https://discord.i.dont.exist.com/
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
let discord_receiver_route =
|
||||
serde_yaml::to_string(&discord_receiver.alert_channel_route().await).unwrap();
|
||||
println!("route \n{:#}", discord_receiver_route);
|
||||
let discord_receiver_route_yaml = r#"receiver: test-discord
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
assert_eq!(discord_receiver_receiver, discord_receiver_receiver_yaml);
|
||||
assert_eq!(discord_receiver_route, discord_receiver_route_yaml);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,25 +1,13 @@
|
||||
use std::any::Any;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::api::ObjectMeta;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
interpret::InterpretError,
|
||||
modules::monitoring::{
|
||||
kube_prometheus::{
|
||||
crd::{
|
||||
crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability,
|
||||
},
|
||||
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
},
|
||||
prometheus::prometheus::{Prometheus, PrometheusReceiver},
|
||||
kube_prometheus::KubePrometheus, okd::OpenshiftClusterAlertSender, prometheus::Prometheus,
|
||||
red_hat_cluster_observability::RedHatClusterObservability,
|
||||
},
|
||||
topology::oberservability::monitoring::{AlertManagerReceiver, AlertReceiver},
|
||||
topology::monitoring::AlertReceiver,
|
||||
};
|
||||
use harmony_types::net::Url;
|
||||
|
||||
@@ -29,279 +17,104 @@ pub struct WebhookReceiver {
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<RHOBObservability> for WebhookReceiver {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &RHOBObservability) -> Result<Outcome, InterpretError> {
|
||||
let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"webhookConfigs": [
|
||||
{
|
||||
"url": self.url,
|
||||
"httpConfig": {
|
||||
"tlsConfig": {
|
||||
"insecureSkipVerify": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
impl WebhookReceiver {
|
||||
fn build_receiver(&self) -> serde_json::Value {
|
||||
json!({
|
||||
"name": self.name,
|
||||
"webhookConfigs": [
|
||||
{
|
||||
"url": self.url,
|
||||
"httpConfig": {
|
||||
"tlsConfig": {
|
||||
"insecureSkipVerify": true
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
debug!(
|
||||
"alert manager configs: \n{:#?}",
|
||||
alertmanager_configs.clone()
|
||||
);
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed rhob-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
}
|
||||
}
|
||||
]})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
fn build_route(&self) -> serde_json::Value {
|
||||
json!({
|
||||
"name": self.name})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<CRDPrometheus> for WebhookReceiver {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||
let spec = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"webhookConfigs": [
|
||||
{
|
||||
"url": self.url,
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
debug!(
|
||||
"alert manager configs: \n{:#?}",
|
||||
alertmanager_configs.clone()
|
||||
);
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed crd-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
impl AlertReceiver<OpenshiftClusterAlertSender> for WebhookReceiver {
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver = self.build_receiver();
|
||||
serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let route = self.build_route();
|
||||
serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for WebhookReceiver {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusReceiver for WebhookReceiver {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<OpenshiftClusterAlertSender>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
impl AlertReceiver<RedHatClusterObservability> for WebhookReceiver {
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver = self.build_receiver();
|
||||
serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let route = self.build_route();
|
||||
serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RedHatClusterObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver = self.build_receiver();
|
||||
serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let route = self.build_route();
|
||||
serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl KubePrometheusReceiver for WebhookReceiver {
|
||||
impl AlertReceiver<Prometheus> for WebhookReceiver {
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver = self.build_receiver();
|
||||
serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let route = self.build_route();
|
||||
serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertChannelConfig for WebhookReceiver {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig {
|
||||
let channel_global_config = None;
|
||||
let channel_receiver = self.alert_channel_receiver().await;
|
||||
let channel_route = self.alert_channel_route().await;
|
||||
|
||||
AlertManagerChannelConfig {
|
||||
channel_global_config,
|
||||
channel_receiver,
|
||||
channel_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WebhookReceiver {
|
||||
async fn alert_channel_route(&self) -> serde_yaml::Value {
|
||||
let mut route = Mapping::new();
|
||||
route.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
route.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
route.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
Value::Mapping(route)
|
||||
}
|
||||
|
||||
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
|
||||
let mut receiver = Mapping::new();
|
||||
receiver.insert(
|
||||
Value::String("name".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
|
||||
let mut webhook_config = Mapping::new();
|
||||
webhook_config.insert(
|
||||
Value::String("url".to_string()),
|
||||
Value::String(self.url.to_string()),
|
||||
);
|
||||
|
||||
receiver.insert(
|
||||
Value::String("webhook_configs".to_string()),
|
||||
Value::Sequence(vec![Value::Mapping(webhook_config)]),
|
||||
);
|
||||
|
||||
Value::Mapping(receiver)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[tokio::test]
|
||||
async fn webhook_serialize_should_match() {
|
||||
let webhook_receiver = WebhookReceiver {
|
||||
name: "test-webhook".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://webhook.i.dont.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let webhook_receiver_receiver =
|
||||
serde_yaml::to_string(&webhook_receiver.alert_channel_receiver().await).unwrap();
|
||||
println!("receiver \n{:#}", webhook_receiver_receiver);
|
||||
let webhook_receiver_receiver_yaml = r#"name: test-webhook
|
||||
webhook_configs:
|
||||
- url: https://webhook.i.dont.exist.com/
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
let webhook_receiver_route =
|
||||
serde_yaml::to_string(&webhook_receiver.alert_channel_route().await).unwrap();
|
||||
println!("route \n{:#}", webhook_receiver_route);
|
||||
let webhook_receiver_route_yaml = r#"receiver: test-webhook
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
assert_eq!(webhook_receiver_receiver, webhook_receiver_receiver_yaml);
|
||||
assert_eq!(webhook_receiver_route, webhook_receiver_route_yaml);
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
pub mod dell_server;
|
||||
pub mod opnsense;
|
||||
@@ -0,0 +1,15 @@
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn high_http_error_rate() -> PrometheusAlertRule {
|
||||
let expression = r#"(
|
||||
sum(rate(http_requests_total{status=~"5.."}[5m])) by (job, route, service)
|
||||
/
|
||||
sum(rate(http_requests_total[5m])) by (job, route, service)
|
||||
) > 0.05 and sum(rate(http_requests_total[5m])) by (job, route, service) > 10"#;
|
||||
|
||||
PrometheusAlertRule::new("HighApplicationErrorRate", expression)
|
||||
.for_duration("10m")
|
||||
.label("severity", "warning")
|
||||
.annotation("summary", "High HTTP error rate on {{ $labels.job }}")
|
||||
.annotation("description", "Job {{ $labels.job }} (route {{ $labels.route }}) has an error rate > 5% over the last 10m.")
|
||||
}
|
||||
@@ -1 +1,2 @@
|
||||
pub mod alerts;
|
||||
pub mod prometheus_alert_rule;
|
||||
|
||||
@@ -1,79 +1,13 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::{
|
||||
kube_prometheus::{
|
||||
prometheus::{KubePrometheus, KubePrometheusRule},
|
||||
types::{AlertGroup, AlertManagerAdditionalPromRules},
|
||||
},
|
||||
prometheus::prometheus::{Prometheus, PrometheusRule},
|
||||
},
|
||||
topology::oberservability::monitoring::AlertRule,
|
||||
interpret::InterpretError,
|
||||
modules::monitoring::{kube_prometheus::KubePrometheus, okd::OpenshiftClusterAlertSender},
|
||||
topology::monitoring::AlertRule,
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl AlertRule<KubePrometheus> for AlertManagerRuleGroup {
|
||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_rule(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertRule<Prometheus> for AlertManagerRuleGroup {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_rule(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusRule for AlertManagerRuleGroup {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
|
||||
let mut additional_prom_rules = BTreeMap::new();
|
||||
|
||||
additional_prom_rules.insert(
|
||||
self.name.clone(),
|
||||
AlertGroup {
|
||||
groups: vec![self.clone()],
|
||||
},
|
||||
);
|
||||
AlertManagerAdditionalPromRules {
|
||||
rules: additional_prom_rules,
|
||||
}
|
||||
}
|
||||
}
|
||||
#[async_trait]
|
||||
impl KubePrometheusRule for AlertManagerRuleGroup {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
|
||||
let mut additional_prom_rules = BTreeMap::new();
|
||||
|
||||
additional_prom_rules.insert(
|
||||
self.name.clone(),
|
||||
AlertGroup {
|
||||
groups: vec![self.clone()],
|
||||
},
|
||||
);
|
||||
AlertManagerAdditionalPromRules {
|
||||
rules: additional_prom_rules,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertManagerRuleGroup {
|
||||
pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup {
|
||||
AlertManagerRuleGroup {
|
||||
@@ -129,3 +63,55 @@ impl PrometheusAlertRule {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertRule<OpenshiftClusterAlertSender> for AlertManagerRuleGroup {
|
||||
fn build_rule(&self) -> Result<serde_json::Value, InterpretError> {
|
||||
let name = self.name.clone();
|
||||
let mut rules: Vec<crate::modules::monitoring::okd::crd::alerting_rules::Rule> = vec![];
|
||||
for rule in self.rules.clone() {
|
||||
rules.push(rule.into())
|
||||
}
|
||||
|
||||
let rule_groups =
|
||||
vec![crate::modules::monitoring::okd::crd::alerting_rules::RuleGroup { name, rules }];
|
||||
|
||||
Ok(serde_json::to_value(rule_groups).map_err(|e| InterpretError::new(e.to_string()))?)
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<OpenshiftClusterAlertSender>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertRule<KubePrometheus> for AlertManagerRuleGroup {
|
||||
fn build_rule(&self) -> Result<serde_json::Value, InterpretError> {
|
||||
let name = self.name.clone();
|
||||
let mut rules: Vec<
|
||||
crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::Rule,
|
||||
> = vec![];
|
||||
for rule in self.rules.clone() {
|
||||
rules.push(rule.into())
|
||||
}
|
||||
|
||||
let rule_groups = vec![
|
||||
crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::RuleGroup {
|
||||
name,
|
||||
rules,
|
||||
},
|
||||
];
|
||||
|
||||
Ok(serde_json::to_value(rule_groups).map_err(|e| InterpretError::new(e.to_string()))?)
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,32 +5,26 @@ use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::{
|
||||
application::Application,
|
||||
monitoring::{
|
||||
grafana::grafana::Grafana, kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus,
|
||||
},
|
||||
prometheus::prometheus::PrometheusMonitoring,
|
||||
},
|
||||
modules::{application::Application, monitoring::prometheus::Prometheus},
|
||||
score::Score,
|
||||
topology::{
|
||||
K8sclient, Topology,
|
||||
oberservability::monitoring::{AlertReceiver, AlertingInterpret, ScrapeTarget},
|
||||
monitoring::{AlertReceiver, AlertingInterpret, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ApplicationMonitoringScore {
|
||||
pub sender: CRDPrometheus,
|
||||
pub sender: Prometheus,
|
||||
pub application: Arc<dyn Application>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + PrometheusMonitoring<CRDPrometheus> + K8sclient + Grafana> Score<T>
|
||||
for ApplicationMonitoringScore
|
||||
{
|
||||
impl<T: Topology + Observability<Prometheus> + K8sclient> Score<T> for ApplicationMonitoringScore {
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
debug!("creating alerting interpret");
|
||||
//TODO will need to use k8sclient to apply service monitors or find a way to pass
|
||||
//them to the AlertingInterpret potentially via Sender Prometheus
|
||||
Box::new(AlertingInterpret {
|
||||
sender: self.sender.clone(),
|
||||
receivers: self.receivers.clone(),
|
||||
|
||||
@@ -9,28 +9,27 @@ use crate::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
application::Application,
|
||||
monitoring::kube_prometheus::crd::{
|
||||
crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability,
|
||||
},
|
||||
prometheus::prometheus::PrometheusMonitoring,
|
||||
monitoring::red_hat_cluster_observability::RedHatClusterObservability,
|
||||
},
|
||||
score::Score,
|
||||
topology::{PreparationOutcome, Topology, oberservability::monitoring::AlertReceiver},
|
||||
topology::{
|
||||
Topology,
|
||||
monitoring::{AlertReceiver, AlertingInterpret, Observability},
|
||||
},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ApplicationRHOBMonitoringScore {
|
||||
pub sender: RHOBObservability,
|
||||
pub struct ApplicationRedHatClusterMonitoringScore {
|
||||
pub sender: RedHatClusterObservability,
|
||||
pub application: Arc<dyn Application>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<RedHatClusterObservability>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + PrometheusMonitoring<RHOBObservability>> Score<T>
|
||||
for ApplicationRHOBMonitoringScore
|
||||
impl<T: Topology + Observability<RedHatClusterObservability>> Score<T>
|
||||
for ApplicationRedHatClusterMonitoringScore
|
||||
{
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(ApplicationRHOBMonitoringInterpret {
|
||||
Box::new(ApplicationRedHatClusterMonitoringInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
@@ -44,38 +43,28 @@ impl<T: Topology + PrometheusMonitoring<RHOBObservability>> Score<T>
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ApplicationRHOBMonitoringInterpret {
|
||||
score: ApplicationRHOBMonitoringScore,
|
||||
pub struct ApplicationRedHatClusterMonitoringInterpret {
|
||||
score: ApplicationRedHatClusterMonitoringScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + PrometheusMonitoring<RHOBObservability>> Interpret<T>
|
||||
for ApplicationRHOBMonitoringInterpret
|
||||
impl<T: Topology + Observability<RedHatClusterObservability>> Interpret<T>
|
||||
for ApplicationRedHatClusterMonitoringInterpret
|
||||
{
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let result = topology
|
||||
.install_prometheus(
|
||||
&self.score.sender,
|
||||
inventory,
|
||||
Some(self.score.receivers.clone()),
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(outcome) => match outcome {
|
||||
PreparationOutcome::Success { details: _ } => {
|
||||
Ok(Outcome::success("Prometheus installed".into()))
|
||||
}
|
||||
PreparationOutcome::Noop => {
|
||||
Ok(Outcome::noop("Prometheus installation skipped".into()))
|
||||
}
|
||||
},
|
||||
Err(err) => Err(InterpretError::from(err)),
|
||||
}
|
||||
//TODO will need to use k8sclient to apply crd ServiceMonitor or find a way to pass
|
||||
//them to the AlertingInterpret potentially via Sender RedHatClusterObservability
|
||||
let alerting_interpret = AlertingInterpret {
|
||||
sender: self.score.sender.clone(),
|
||||
receivers: self.score.receivers.clone(),
|
||||
rules: vec![],
|
||||
scrape_targets: None,
|
||||
};
|
||||
alerting_interpret.execute(inventory, topology).await
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
|
||||
@@ -1,17 +1,48 @@
|
||||
use async_trait::async_trait;
|
||||
use k8s_openapi::Resource;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
topology::{PreparationError, PreparationOutcome},
|
||||
topology::{
|
||||
PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
pub trait Grafana {
|
||||
async fn ensure_grafana_operator(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn install_grafana(&self) -> Result<PreparationOutcome, PreparationError>;
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Grafana {
|
||||
pub namespace: String,
|
||||
}
|
||||
|
||||
impl AlertSender for Grafana {
|
||||
fn name(&self) -> String {
|
||||
"grafana".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<Grafana>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertRule<Grafana>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn ScrapeTarget<Grafana>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct GrafanaAlertingScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Grafana>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<Grafana>>>,
|
||||
pub scrape_targets: Option<Vec<Box<dyn ScrapeTarget<Grafana>>>>,
|
||||
pub sender: Grafana,
|
||||
}
|
||||
|
||||
impl<T: Topology + Observability<Grafana>> Score<T> for GrafanaAlertingScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
Box::new(AlertingInterpret {
|
||||
sender: self.sender.clone(),
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
scrape_targets: self.scrape_targets.clone(),
|
||||
})
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"HelmPrometheusAlertingScore".to_string()
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
use harmony_macros::hurl;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use std::{collections::HashMap, str::FromStr};
|
||||
|
||||
use crate::modules::helm::chart::{HelmChartScore, HelmRepository};
|
||||
|
||||
pub fn grafana_helm_chart_score(ns: &str, namespace_scope: bool) -> HelmChartScore {
|
||||
let mut values_overrides = HashMap::new();
|
||||
values_overrides.insert(
|
||||
NonBlankString::from_str("namespaceScope").unwrap(),
|
||||
namespace_scope.to_string(),
|
||||
);
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(ns).unwrap()),
|
||||
release_name: NonBlankString::from_str("grafana-operator").unwrap(),
|
||||
chart_name: NonBlankString::from_str("grafana/grafana-operator").unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: Some(values_overrides),
|
||||
values_yaml: None,
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: Some(HelmRepository::new(
|
||||
"grafana".to_string(),
|
||||
hurl!("https://grafana.github.io/helm-charts"),
|
||||
true,
|
||||
)),
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
pub mod helm_grafana;
|
||||
@@ -4,7 +4,7 @@ use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::crd_prometheuses::LabelSelector;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_prometheuses::LabelSelector;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
3
harmony/src/modules/monitoring/grafana/k8s/crd/mod.rs
Normal file
3
harmony/src/modules/monitoring/grafana/k8s/crd/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub mod crd_grafana;
|
||||
pub mod grafana_default_dashboard;
|
||||
pub mod rhob_grafana;
|
||||
@@ -4,7 +4,7 @@ use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector;
|
||||
use crate::modules::monitoring::red_hat_cluster_observability::crd::rhob_prometheuses::LabelSelector;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
1
harmony/src/modules/monitoring/grafana/k8s/helm/mod.rs
Normal file
1
harmony/src/modules/monitoring/grafana/k8s/helm/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod grafana_operator;
|
||||
7
harmony/src/modules/monitoring/grafana/k8s/mod.rs
Normal file
7
harmony/src/modules/monitoring/grafana/k8s/mod.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
pub mod crd;
|
||||
pub mod helm;
|
||||
pub mod score_ensure_grafana_ready;
|
||||
pub mod score_grafana_alert_receiver;
|
||||
pub mod score_grafana_datasource;
|
||||
pub mod score_grafana_rule;
|
||||
pub mod score_install_grafana;
|
||||
@@ -0,0 +1,54 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sEnsureReadyScore {
|
||||
pub sender: Grafana,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sEnsureReadyScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sEnsureReadyScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
// async fn ensure_ready(
|
||||
// &self,
|
||||
// inventory: &Inventory,
|
||||
// ) -> Result<PreparationOutcome, PreparationError> {
|
||||
// debug!("ensure grafana operator");
|
||||
// let client = self.k8s_client().await.unwrap();
|
||||
// let grafana_gvk = GroupVersionKind {
|
||||
// group: "grafana.integreatly.org".to_string(),
|
||||
// version: "v1beta1".to_string(),
|
||||
// kind: "Grafana".to_string(),
|
||||
// };
|
||||
// let name = "grafanas.grafana.integreatly.org";
|
||||
// let ns = "grafana";
|
||||
//
|
||||
// let grafana_crd = client
|
||||
// .get_resource_json_value(name, Some(ns), &grafana_gvk)
|
||||
// .await;
|
||||
// match grafana_crd {
|
||||
// Ok(_) => {
|
||||
// return Ok(PreparationOutcome::Success {
|
||||
// details: "Found grafana CRDs in cluster".to_string(),
|
||||
// });
|
||||
// }
|
||||
//
|
||||
// Err(_) => {
|
||||
// return self
|
||||
// .install_grafana_operator(inventory, Some("grafana"))
|
||||
// .await;
|
||||
// }
|
||||
// };
|
||||
// }
|
||||
@@ -0,0 +1,24 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sReceiverScore {
|
||||
pub sender: Grafana,
|
||||
pub receiver: Box<dyn AlertReceiver<Grafana>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sReceiverScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sReceiverScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::ScrapeTarget},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sDatasourceScore {
|
||||
pub sender: Grafana,
|
||||
pub scrape_target: Box<dyn ScrapeTarget<Grafana>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sDatasourceScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sDatasourceScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
// fn extract_and_normalize_token(&self, secret: &DynamicObject) -> Option<String> {
|
||||
// let token_b64 = secret
|
||||
// .data
|
||||
// .get("token")
|
||||
// .or_else(|| secret.data.get("data").and_then(|d| d.get("token")))
|
||||
// .and_then(|v| v.as_str())?;
|
||||
//
|
||||
// let bytes = general_purpose::STANDARD.decode(token_b64).ok()?;
|
||||
//
|
||||
// let s = String::from_utf8(bytes).ok()?;
|
||||
//
|
||||
// let cleaned = s
|
||||
// .trim_matches(|c: char| c.is_whitespace() || c == '\0')
|
||||
// .to_string();
|
||||
// Some(cleaned)
|
||||
// }
|
||||
// fn build_grafana_datasource(
|
||||
// &self,
|
||||
// name: &str,
|
||||
// ns: &str,
|
||||
// label_selector: &LabelSelector,
|
||||
// url: &str,
|
||||
// token: &str,
|
||||
// ) -> GrafanaDatasource {
|
||||
// let mut json_data = BTreeMap::new();
|
||||
// json_data.insert("timeInterval".to_string(), "5s".to_string());
|
||||
//
|
||||
// GrafanaDatasource {
|
||||
// metadata: ObjectMeta {
|
||||
// name: Some(name.to_string()),
|
||||
// namespace: Some(ns.to_string()),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// spec: GrafanaDatasourceSpec {
|
||||
// instance_selector: label_selector.clone(),
|
||||
// allow_cross_namespace_import: Some(true),
|
||||
// values_from: None,
|
||||
// datasource: GrafanaDatasourceConfig {
|
||||
// access: "proxy".to_string(),
|
||||
// name: name.to_string(),
|
||||
// rype: "prometheus".to_string(),
|
||||
// url: url.to_string(),
|
||||
// database: None,
|
||||
// json_data: Some(GrafanaDatasourceJsonData {
|
||||
// time_interval: Some("60s".to_string()),
|
||||
// http_header_name1: Some("Authorization".to_string()),
|
||||
// tls_skip_verify: Some(true),
|
||||
// oauth_pass_thru: Some(true),
|
||||
// }),
|
||||
// secure_json_data: Some(GrafanaDatasourceSecureJsonData {
|
||||
// http_header_value1: Some(format!("Bearer {token}")),
|
||||
// }),
|
||||
// is_default: Some(false),
|
||||
// editable: Some(true),
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
// }
|
||||
@@ -0,0 +1,67 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertRule},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sRuleScore {
|
||||
pub sender: Grafana,
|
||||
pub rule: Box<dyn AlertRule<Grafana>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sRuleScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sRuleScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
// kind: Secret
|
||||
// apiVersion: v1
|
||||
// metadata:
|
||||
// name: credentials
|
||||
// namespace: grafana
|
||||
// stringData:
|
||||
// PROMETHEUS_USERNAME: root
|
||||
// PROMETHEUS_PASSWORD: secret
|
||||
// type: Opaque
|
||||
// ---
|
||||
// apiVersion: grafana.integreatly.org/v1beta1
|
||||
// kind: GrafanaDatasource
|
||||
// metadata:
|
||||
// name: grafanadatasource-sample
|
||||
// spec:
|
||||
// valuesFrom:
|
||||
// - targetPath: "basicAuthUser"
|
||||
// valueFrom:
|
||||
// secretKeyRef:
|
||||
// name: "credentials"
|
||||
// key: "PROMETHEUS_USERNAME"
|
||||
// - targetPath: "secureJsonData.basicAuthPassword"
|
||||
// valueFrom:
|
||||
// secretKeyRef:
|
||||
// name: "credentials"
|
||||
// key: "PROMETHEUS_PASSWORD"
|
||||
// instanceSelector:
|
||||
// matchLabels:
|
||||
// dashboards: "grafana"
|
||||
// datasource:
|
||||
// name: prometheus
|
||||
// type: prometheus
|
||||
// access: proxy
|
||||
// basicAuth: true
|
||||
// url: http://prometheus-service:9090
|
||||
// isDefault: true
|
||||
// basicAuthUser: ${PROMETHEUS_USERNAME}
|
||||
// jsonData:
|
||||
// "tlsSkipVerify": true
|
||||
// "timeInterval": "5s"
|
||||
// secureJsonData:
|
||||
// "basicAuthPassword": ${PROMETHEUS_PASSWORD} #
|
||||
@@ -0,0 +1,189 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sInstallScore {
|
||||
pub sender: Grafana,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sInstallScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sEnsureReadyScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
// let score = grafana_operator_helm_chart_score(sender.namespace.clone());
|
||||
//
|
||||
// score
|
||||
// .create_interpret()
|
||||
// .execute(inventory, self)
|
||||
// .await
|
||||
// .map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
//
|
||||
|
||||
//
|
||||
// fn build_grafana_dashboard(
|
||||
// &self,
|
||||
// ns: &str,
|
||||
// label_selector: &LabelSelector,
|
||||
// ) -> GrafanaDashboard {
|
||||
// let graf_dashboard = GrafanaDashboard {
|
||||
// metadata: ObjectMeta {
|
||||
// name: Some(format!("grafana-dashboard-{}", ns)),
|
||||
// namespace: Some(ns.to_string()),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// spec: GrafanaDashboardSpec {
|
||||
// resync_period: Some("30s".to_string()),
|
||||
// instance_selector: label_selector.clone(),
|
||||
// datasources: Some(vec![GrafanaDashboardDatasource {
|
||||
// input_name: "DS_PROMETHEUS".to_string(),
|
||||
// datasource_name: "thanos-openshift-monitoring".to_string(),
|
||||
// }]),
|
||||
// json: None,
|
||||
// grafana_com: Some(GrafanaCom {
|
||||
// id: 17406,
|
||||
// revision: None,
|
||||
// }),
|
||||
// },
|
||||
// };
|
||||
// graf_dashboard
|
||||
// }
|
||||
//
|
||||
// fn build_grafana(&self, ns: &str, labels: &BTreeMap<String, String>) -> GrafanaCRD {
|
||||
// let grafana = GrafanaCRD {
|
||||
// metadata: ObjectMeta {
|
||||
// name: Some(format!("grafana-{}", ns)),
|
||||
// namespace: Some(ns.to_string()),
|
||||
// labels: Some(labels.clone()),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// spec: GrafanaSpec {
|
||||
// config: None,
|
||||
// admin_user: None,
|
||||
// admin_password: None,
|
||||
// ingress: None,
|
||||
// persistence: None,
|
||||
// resources: None,
|
||||
// },
|
||||
// };
|
||||
// grafana
|
||||
// }
|
||||
//
|
||||
// async fn build_grafana_ingress(&self, ns: &str) -> K8sIngressScore {
|
||||
// let domain = self.get_domain(&format!("grafana-{}", ns)).await.unwrap();
|
||||
// let name = format!("{}-grafana", ns);
|
||||
// let backend_service = format!("grafana-{}-service", ns);
|
||||
//
|
||||
// K8sIngressScore {
|
||||
// name: fqdn::fqdn!(&name),
|
||||
// host: fqdn::fqdn!(&domain),
|
||||
// backend_service: fqdn::fqdn!(&backend_service),
|
||||
// port: 3000,
|
||||
// path: Some("/".to_string()),
|
||||
// path_type: Some(PathType::Prefix),
|
||||
// namespace: Some(fqdn::fqdn!(&ns)),
|
||||
// ingress_class_name: Some("openshift-default".to_string()),
|
||||
// }
|
||||
// }
|
||||
// #[async_trait]
|
||||
// impl Grafana for K8sAnywhereTopology {
|
||||
// async fn install_grafana(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
// let ns = "grafana";
|
||||
//
|
||||
// let mut label = BTreeMap::new();
|
||||
//
|
||||
// label.insert("dashboards".to_string(), "grafana".to_string());
|
||||
//
|
||||
// let label_selector = LabelSelector {
|
||||
// match_labels: label.clone(),
|
||||
// match_expressions: vec![],
|
||||
// };
|
||||
//
|
||||
// let client = self.k8s_client().await?;
|
||||
//
|
||||
// let grafana = self.build_grafana(ns, &label);
|
||||
//
|
||||
// client.apply(&grafana, Some(ns)).await?;
|
||||
// //TODO change this to a ensure ready or something better than just a timeout
|
||||
// client
|
||||
// .wait_until_deployment_ready(
|
||||
// "grafana-grafana-deployment",
|
||||
// Some("grafana"),
|
||||
// Some(Duration::from_secs(30)),
|
||||
// )
|
||||
// .await?;
|
||||
//
|
||||
// let sa_name = "grafana-grafana-sa";
|
||||
// let token_secret_name = "grafana-sa-token-secret";
|
||||
//
|
||||
// let sa_token_secret = self.build_sa_token_secret(token_secret_name, sa_name, ns);
|
||||
//
|
||||
// client.apply(&sa_token_secret, Some(ns)).await?;
|
||||
// let secret_gvk = GroupVersionKind {
|
||||
// group: "".to_string(),
|
||||
// version: "v1".to_string(),
|
||||
// kind: "Secret".to_string(),
|
||||
// };
|
||||
//
|
||||
// let secret = client
|
||||
// .get_resource_json_value(token_secret_name, Some(ns), &secret_gvk)
|
||||
// .await?;
|
||||
//
|
||||
// let token = format!(
|
||||
// "Bearer {}",
|
||||
// self.extract_and_normalize_token(&secret).unwrap()
|
||||
// );
|
||||
//
|
||||
// debug!("creating grafana clusterrole binding");
|
||||
//
|
||||
// let clusterrolebinding =
|
||||
// self.build_cluster_rolebinding(sa_name, "cluster-monitoring-view", ns);
|
||||
//
|
||||
// client.apply(&clusterrolebinding, Some(ns)).await?;
|
||||
//
|
||||
// debug!("creating grafana datasource crd");
|
||||
//
|
||||
// let thanos_url = format!(
|
||||
// "https://{}",
|
||||
// self.get_domain("thanos-querier-openshift-monitoring")
|
||||
// .await
|
||||
// .unwrap()
|
||||
// );
|
||||
//
|
||||
// let thanos_openshift_datasource = self.build_grafana_datasource(
|
||||
// "thanos-openshift-monitoring",
|
||||
// ns,
|
||||
// &label_selector,
|
||||
// &thanos_url,
|
||||
// &token,
|
||||
// );
|
||||
//
|
||||
// client.apply(&thanos_openshift_datasource, Some(ns)).await?;
|
||||
//
|
||||
// debug!("creating grafana dashboard crd");
|
||||
// let dashboard = self.build_grafana_dashboard(ns, &label_selector);
|
||||
//
|
||||
// client.apply(&dashboard, Some(ns)).await?;
|
||||
// debug!("creating grafana ingress");
|
||||
// let grafana_ingress = self.build_grafana_ingress(ns).await;
|
||||
//
|
||||
// grafana_ingress
|
||||
// .interpret(&Inventory::empty(), self)
|
||||
// .await
|
||||
// .map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
//
|
||||
// Ok(PreparationOutcome::Success {
|
||||
// details: "Installed grafana composants".to_string(),
|
||||
// })
|
||||
// }
|
||||
// }
|
||||
@@ -1,2 +1,3 @@
|
||||
pub mod grafana;
|
||||
pub mod helm;
|
||||
pub mod grafana_alerting_score;
|
||||
pub mod k8s;
|
||||
|
||||
@@ -1,91 +1,17 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
monitoring::{
|
||||
grafana::grafana::Grafana, kube_prometheus::crd::service_monitor::ServiceMonitor,
|
||||
},
|
||||
prometheus::prometheus::PrometheusMonitoring,
|
||||
},
|
||||
topology::{
|
||||
K8sclient, Topology,
|
||||
installable::Installable,
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::{AlertReceiver, AlertSender, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[derive(CustomResource, Serialize, Deserialize, Default, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1alpha1",
|
||||
version = "v1",
|
||||
kind = "AlertmanagerConfig",
|
||||
plural = "alertmanagerconfigs",
|
||||
namespaced
|
||||
namespaced,
|
||||
derive = "Default"
|
||||
)]
|
||||
pub struct AlertmanagerConfigSpec {
|
||||
#[serde(flatten)]
|
||||
pub data: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct CRDPrometheus {
|
||||
pub namespace: String,
|
||||
pub client: Arc<K8sClient>,
|
||||
pub service_monitor: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
impl AlertSender for CRDPrometheus {
|
||||
fn name(&self) -> String {
|
||||
"CRDAlertManager".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn ScrapeTarget<CRDPrometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient + PrometheusMonitoring<CRDPrometheus> + Grafana> Installable<T>
|
||||
for CRDPrometheus
|
||||
{
|
||||
async fn configure(&self, inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
|
||||
topology.ensure_grafana_operator(inventory).await?;
|
||||
topology.ensure_prometheus_operator(self, inventory).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ensure_installed(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<(), InterpretError> {
|
||||
topology.install_grafana().await?;
|
||||
topology.install_prometheus(&self, inventory, None).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::modules::prometheus::alerts::k8s::{
|
||||
use crate::modules::monitoring::alert_rule::alerts::k8s::{
|
||||
deployment::alert_deployment_unavailable,
|
||||
pod::{alert_container_restarting, alert_pod_not_ready, pod_failed},
|
||||
pvc::high_pvc_fill_rate_over_two_days,
|
||||
|
||||
@@ -6,13 +6,14 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema)]
|
||||
#[derive(CustomResource, Default, Debug, Serialize, Deserialize, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1",
|
||||
kind = "PrometheusRule",
|
||||
plural = "prometheusrules",
|
||||
namespaced
|
||||
namespaced,
|
||||
derive = "Default"
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusRuleSpec {
|
||||
|
||||
@@ -1,23 +1,18 @@
|
||||
use std::net::IpAddr;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
modules::monitoring::kube_prometheus::crd::{
|
||||
crd_alertmanager_config::CRDPrometheus, crd_prometheuses::LabelSelector,
|
||||
},
|
||||
topology::oberservability::monitoring::ScrapeTarget,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_prometheuses::LabelSelector;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[derive(CustomResource, Default, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1alpha1",
|
||||
kind = "ScrapeConfig",
|
||||
plural = "scrapeconfigs",
|
||||
derive = "Default",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -70,8 +65,8 @@ pub struct ScrapeConfigSpec {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct StaticConfig {
|
||||
pub targets: Vec<String>,
|
||||
|
||||
pub labels: Option<LabelSelector>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub labels: Option<BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
/// Relabeling configuration for target or metric relabeling.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user